依赖:
pip install requests
pip install retrying
python 项目中:
###################### httpUtils start ########################## def _result(result): return result is None @retry(stop_max_attempt_number=3, wait_random_min=1000, wait_random_max=5000, retry_on_result=_result) def http_get(url,params={},headers={}): try: res = requests.get(url,data=params,headers=headers ,verify=False) if res.status_code != 200: raise requests.RequestException(' get data from'+res.request.url+ 'fail 。Three attempts have been tried !!!!') log_info("request path is{}" , res.request.url) headerStr = "" for headerKey in res.request.headers: headerStr = headerStr + "\n" + headerKey + ":" + res.request.headers[headerKey] log_info("request header:{}" , headerStr) log_info("request parameter:\n {}" ,str(res.request.body)) resHeaderStr = "" for resHeaderKey in res.headers: resHeaderStr = resHeaderStr + "\n" + resHeaderKey + ":" + res.headers[resHeaderKey] log_info("response header:{}", resHeaderStr) log_info("response result:\n {}" , res.content.decode("utf-8")) return res.content.decode("utf-8") except Exception as e: raise requests.RequestException(' get request to '+url+ ' exception and the exception is ' +str(e)) @retry(stop_max_attempt_number=3, wait_random_min=1000, wait_random_max=5000, retry_on_result=_result) def http_post(url,params_dict={},headers={}): headers["Content-Type"]="application/json" try: res = requests.post(url,data=json.dumps(params_dict),headers=headers,verify=False) if res.status_code != 200: raise requests.RequestException(' post data to'+res.request.url+ ' fail . Three attempts have been tried !!!!') log_info("request path: {}",res.request.url) headerStr = "" for headerKey in res.request.headers: headerStr =headerStr+ "\n" + headerKey + ":" + res.request.headers[headerKey] log_info("request header:{}" , headerStr) log_info("request parameter:\n {}",str(res.request.body)) resHeaderStr = "" for resHeaderKey in res.headers: resHeaderStr =resHeaderStr+ "\n" + resHeaderKey + ":" + res.headers[resHeaderKey] log_info("response header: {}",resHeaderStr) log_info("respobse result:\n {}",res.content.decode("utf-8")) return res.content.decode("utf-8") except Exception as e: raise requests.RequestException(' post data to '+url+ ' exception and the exception is ' +str(e))
爬虫:
import random import requests import json from retrying import retry """ pip install requests pip install retrying """ # 根据协议类型,选择不同的代理 proxies = { # "http": "http://192.168.3.224:7080", # "https": "http://12.34.56.79:9527", # "http": "user:passd@61.158.163.130:16816", } def setAgent(headers): useragent = random.choice(USER_AGENTS) headers["User-Agent"] = useragent def _result(result): return result is None @retry(stop_max_attempt_number=3, wait_random_min=1000, wait_random_max=5000, retry_on_result=_result) def get(url, params={}, headers={}, proxies=proxies): setAgent(headers) try: res = requests.get(url, data=params, headers=headers, proxies=proxies,verify-False) if res.status_code != 200: raise requests.RequestException(' get data from'+res.request.url + 'fail!!,Three attempts have been tried !!!!') print("request path is{}", res.request.url) headerStr = "" for headerKey in res.request.headers: headerStr = headerStr + "\n" + headerKey + ":" + res.request.headers[headerKey] print("request header:{}", headerStr) print("request parameter:\n {}", str(res.request.body)) resHeaderStr = "" for resHeaderKey in res.headers: resHeaderStr = resHeaderStr + "\n" + resHeaderKey + ":" + res.headers[resHeaderKey] print("response header:{}", resHeaderStr) # print("response result:\n {}", res.content.decode("utf-8")) return res.content.decode("utf-8") except Exception as e: raise requests.RequestException(' get request to '+url + ' exception and the exception is ' + str(e)) @retry(stop_max_attempt_number=3, wait_random_min=1000, wait_random_max=5000, retry_on_result=_result) def post(url, params_dict={}, headers={}, proxies = proxies): setAgent(headers) headers["Content-Type"] = "application/json" try: res = requests.post(url, data=json.dumps(params_dict), headers=headers, proxies=proxies,verify=False) if res.status_code != 200: raise requests.RequestException(' post data to'+res.request.url + ' fail,Three attempts have been tried !!!!') print("request path: {}",res.request.url) headerStr = "" for headerKey in res.request.headers: headerStr =headerStr+ "\n" + headerKey + ":" + res.request.headers[headerKey] print("request header:{}", headerStr) print("request parameter:\n {}", str(res.request.body)) resHeaderStr = "" for resHeaderKey in res.headers: resHeaderStr =resHeaderStr+ "\n" + resHeaderKey + ":" + res.headers[resHeaderKey] print("response header: {}", resHeaderStr) # print("respobse result:\n {}", res.content.decode("utf-8")) return res.content.decode("utf-8") except Exception as e: raise requests.RequestException(' post data to '+url + ' exception and the exception is ' + str(e)) USER_AGENTS = [ "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36 OPR/26.0.1656.60", "Opera/8.0 (Windows NT 5.1; U; en)", "Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/2.0.0 Opera 9.50", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; en) Opera 9.50", # Firefox "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0", "Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.10) Gecko/20100922 Ubuntu/10.10 (maverick) Firefox/3.6.10", # Safari "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2", # chrome "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11", "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.133 Safari/534.16", # 360 "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36", "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko", # 淘宝浏览器 "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.11 TaoBrowser/2.0 Safari/536.11", # 猎豹浏览器 "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER", "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; LBBROWSER)", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E; LBBROWSER)", # QQ浏览器 "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; QQBrowser/7.0.3698.400)", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)", # sogou浏览器 "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 SE 2.X MetaSr 1.0", "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SV1; QQDownload 732; .NET4.0C; .NET4.0E; SE 2.X MetaSr 1.0)", # maxthon浏览器 "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Maxthon/4.4.3.4000 Chrome/30.0.1599.101 Safari/537.36", # UC浏览器 "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 UBrowser/4.0.3214.0 Safari/537.36", ]
#coding=utf8 import requests # from common.logger import Logger # import logging class httpUtils: # logger = Logger("../logs/http.log",logging.INFO,logging.INFO) def get(self,url,params={},headers={}): res = requests.get(url,data=params,headers=headers) print("请求路径:" + res.request.url) headerStr = "" for headerKey in res.request.headers: headerStr = headerStr + "\n" + headerKey + ":" + res.request.headers[headerKey] print("请求头信息:" + headerStr) print("请求参数:\n" + res.request.body) resHeaderStr = "" for resHeaderKey in res.headers: resHeaderStr = resHeaderStr + "\n" + resHeaderKey + ":" + res.headers[resHeaderKey] print("响应头信息:" + resHeaderStr) print("返回结果:\n" + res.content.decode("utf-8")) return res def post(self,url,params={},headers={}): res = requests.post(url,data=params,headers=headers) print("请求路径:"+res.request.url) headerStr = "" for headerKey in res.request.headers: headerStr =headerStr+ "\n" + headerKey + ":" + res.request.headers[headerKey] print("请求头信息:" + headerStr) print("请求参数:\n"+res.request.body) resHeaderStr = "" for resHeaderKey in res.headers: resHeaderStr =resHeaderStr+ "\n" + resHeaderKey + ":" + res.headers[resHeaderKey] print("响应头信息:"+resHeaderStr) print("返回结果:\n"+res.content.decode("utf-8")) return res
使用:
#encoding=utf-8 ''' Created on 2019��10��9�� @author: sea ''' # from com.sea.hhtp.MyHttp import get, post from com.sea.hhtp import httputils print("###################################################") print("###################################################") print("################ GET ####################") print("###################################################") print("###################################################") headers = { "user-agent" : "ad", "Appstore-clientType" : "android", "Appstore-IMEI" : "123456789000000" } getheader={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36"} getresult =httputils.get("http://192.168.18.129:7016/worktable?page=1&size=2") print(str(getresult)) print("13213") print("13213") print("###################################################") print("###################################################") print("################ POST ###################") print("###################################################") print("###################################################") postresult = httputils.post("http://192.168.18.129:7016/worktable/dynamicQueryWithPage",'{"status":"CCD","page":1,"size":2}',{"Content-Type":"application/json"}) print(str(postresult))
使用: