Spider -- User-Agent 个人整理小模块
自己整理一个User-Agent模块,方便日后拿来就能用:
1、利用 fake_useragent
from fake_useragent import UserAgent # 随机生成1个User-Agent def get_headers(): ua = UserAgent() useragent = ua.random headers = {'User-Agent': useragent} return headers if __name__ == '__main__': headers = get_headers() print(headers)
2、手写 User-Agent
import random from urllib import request # 向测试网站(http://httpbin.org/get)发起请求,构造请求头并从响应中确认请求头信息 def test_useAgent(headers): # 定义常用变量:URL 、headers url = 'http://httpbin.org/get' # headers = { # 'User-Agent':'Opera/9.80 (Windows NT 6.1; U; zh-cn) Presto/2.9.168 Version/11.50' # } # 1. 创建请求对象 - 包装,并没有真正发请求 req = request.Request(url=url, headers=headers) # 2. 获取响应对象 res = request.urlopen(req) # 3. 提取响应内容 html = res.read().decode('utf-8') print(html) # 随机生成1个User-Agent def get_headers(): ua_list = [ 'Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)', 'Mozilla/5.0 (compatible; Baiduspider-render/2.0; +http://www.baidu.com/search/spider.html)', 'Mozilla/5.0 (Linux;u;Android 4.2.2;zh-cn;) AppleWebKit/534.46 (KHTML,like Gecko) Version/5.1 Mobile Safari/10600.6.3 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)', 'Mozilla/5.0 (iPhone; CPU iPhone OS 9_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13B143 Safari/601.1 (compatible; Baiduspider-render/2.0; +http://www.baidu.com/search/spider.html)', 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)', 'Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)', 'Mozilla/5.0 (compatible; Yahoo! Slurp China; http://misc.yahoo.com.cn/help.html)', ] useragent = ua_list[random.randint(0, len(ua_list))-1] headers = {'User-Agent': useragent} return headers if __name__ == '__main__': headers = get_headers() print(headers) test_useAgent(headers)