Python爬虫-Proxy代理的使用
1 ''' 2 构建代理集群/队列 3 每次访问服务器,随机抽取一个代理 4 抽取可以使用 random.choice 5 6 分析步骤: 7 1. 构建代理群 8 2. 每次访问,随机选取代理并执行 9 ''' 10 11 12 from urllib import request, error 13 14 15 # 使用代理步骤 16 # 1. 设置代理地址 17 proxy_list = [ 18 # 列表中存放的是dict类型的元素 19 {"http": "101.50.1.2:80"}, 20 {"http": "58.240.172.110:3128"}, 21 {"http": "124.193.51.249:3128"}, 22 {"http": "120.199.64.163:8081"} 23 ] 24 25 # 2. 创建ProxyHandler 26 proxy_handler_list = [] 27 for proxy in proxy_list: 28 proxy_handler = request.ProxyHandler(proxy) 29 proxy_handler_list.append(proxy_handler) 30 # 3. 创建Opener 31 opener_list = [] 32 for proxy_handler in proxy_handler_list: 33 opener = request.build_opener(proxy_handler) 34 opener_list.append(opener) 35 36 import random 37 38 url = "http://www.baidu.com" 39 # 现在如果访问url,则使用代理服务器 40 try: 41 # 4. 安装Opener 42 opener = random.choice(opener_list) 43 request.install_opener( opener) 44 45 rsp = request.urlopen(url) 46 html = rsp.read().decode() 47 print(html) 48 except error.URLError as e: 49 print(e) 50 except Exception as e: 51 print(e)