爬虫的请求头(headers)和代理IP(proxies)设置
PROXIES = [ 'http://183.136.177.77:3128', 'http://54.229.233.101:80', ........... 'http://194.167.44.91:80' ] USER_AGENTS = [ "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.163 Safari/535.1", "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:6.0) Gecko/20100101 Firefox/6.0", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50", "Opera/9.80 (Windows NT 6.1; U; zh-cn) Presto/2.9.168 Version/11.50", "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 2.0.50727; SLCC2; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; InfoPath.3; .NET4.0C; Tablet PC 2.0; .NET4.0E)", "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN) AppleWebKit/523.15 (KHTML, like Gecko, Safari/419.3) Arora/0.3 (Change: 287 c9dfb30)", "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; InfoPath.3)", "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.2pre) Gecko/20070215 K-Ninja/2.1.1", "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9) Gecko/20080705 Firefox/3.0 Kapiko/3.0", "Mozilla/5.0 (X11; Linux i686; U;) Gecko/20070322 Kazehakase/0.4.5", "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.8) Gecko Fedora/1.9.0.8-1.fc10 Kazehakase/0.5.6", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20", "Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52", "Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.48 Safari/537.36", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.9 Safari/536.5", "Mozilla/5.0 (Windows NT 6.0) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.36 Safari/536.5", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3", "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3", "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3", "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3", "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3", "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.0 Safari/536.3", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24", "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24" ] s = requests.Session() s.keep_alive = False # 关闭长连接,防止连接次数过多 s.adapters.DEFAULT_RETRIES = 10 # 重连接次数 s.headers = { 'Host': 'm.haodf.com', 'User-Agent': random.choice(USER_AGENTS), 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2', 'Accept-Encoding': 'gzip, deflate, br', 'Connection': 'keep-alive', 'Upgrade-Insecure-Requests':'1', 'Cookie': 'laravel_session=eyJpdiI6ImVaTHFHaklZKzcxVlo2Mmp5ZURHVlE9PSIsInZhbHVlIjoiK2pTRWw2b2lTMkNhNXB6bnhtUmprYVdLS2lkVG45RWF3MU03eVMwTXV0YSszeVArTzJJNlVET05QSFFWOGtaU3VGWml6U2VuRnQ1UVZBVjB1SEUxdHc9PSIsIm1hYyI6IjVhMTQ0NTE2OTgxMDY4ZGYyYzA2ODgyZWUwNDNjZmVkNWU2OTgzNTQ5ZGY4MDNlMjFiZmYzNzNlNGNjMTNmODEifQ%3D%3D; log_session_id=eyJpdiI6ImlMaWJKSVdyOVwvaE5wR2ZXYzhTcnFBPT0iLCJ2YWx1ZSI6IlZETlpVd1wvUzRqWW5iWDJhc2p0Tkx3a1JYU29rRCtITitrQmx4dzF3anFVPSIsIm1hYyI6IjFjZTFkYzM1YWVhM2FjNzg2MGNhMWFhN2NkMjZjMzc2YTY1ZDE1OWRlZTBmMTI5NmI0ZDc2NjgzODE4ZGRmNWEifQ%3D%3D; _uab_collina=156801769775990313627848; Hm_lvt_d64469e9d7bdbf03af6f074dffe7f9b5=1568017698,1568079685,1568082796; zg_did=%7B%22did%22%3A%20%2216d1523522f1-0677de0f76cacd-14367940-100200-16d152352303ad%22%7D; zg_18f5038ab49c4ae4918641ae36d67496=%7B%22sid%22%3A%201568084816056%2C%22updated%22%3A%201568085876944%2C%22info%22%3A%201568017699384%2C%22superProperty%22%3A%20%22%7B%7D%22%2C%22platform%22%3A%20%22%7B%7D%22%2C%22utm%22%3A%20%22%7B%7D%22%2C%22referrerDomain%22%3A%20%22%22%2C%22zs%22%3A%200%2C%22sc%22%3A%200%2C%22firstScreen%22%3A%201568084816056%7D; _aihecong_chat_visibility=false; _aihecong_chat_source=https%3A%2F%2Fwww.xinli001.com%2Fceshi%3Fsource%3Dpc-home; _aihecong_chat_entrance=%7B%22url%22%3A%22https%3A%2F%2Fwww.xinli001.com%2Fzx%3Fsource%3Dpc-home%22%2C%22title%22%3A%22%E5%BF%83%E7%90%86%E5%92%A8%E8%AF%A2_%E5%BF%83%E7%90%86%E5%92%A8%E8%AF%A2%E5%B8%88_%E5%BF%83%E7%90%86%E5%8C%BB%E7%94%9F-%E5%A3%B9%E5%BF%83%E7%90%86%22%7D; _aihecong_chat_visitorId=5d7611ad455e6b256c3105a3; _aihecong_chat_routeId=5d7611ad455e6b256c3105a6; requestAsk=true; Hm_lpvt_d64469e9d7bdbf03af6f074dffe7f9b5=1568085877' } s.headers = {'User-Agent': random.choice(USER_AGENTS)} s.proxies = {"http": random.choice(PROXIES)}