以下是GET的方法,使用的代理接口网站是 http://www.xicidaili.com/nn/
#-*- coding:utf-8 -*- from bs4 import BeautifulSoup import requests,chardet,urllib2 ip_list=[] def get_ip_list(url, headers): web_data = requests.get(url, headers=headers) soup = BeautifulSoup(web_data.text, 'lxml') ips = soup.find_all('tr') ip_list = [] for i in range(1, len(ips)): ip_info = ips[i] tds = ip_info.find_all('td') ip_list.append('http://' + tds[1].text + ':' + tds[2].text) return ip_list def get_random_ip(ip_list): proxies = {'http': ip_list[0]} return proxies def getip(): global ip_list url = 'http://www.xicidaili.com/nn/' headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36'} if not ip_list: ip_list = get_ip_list(url, headers=headers) print ip_list proxies = get_random_ip(ip_list) return proxies def deleteip(): global ip_list ip_list.pop(0) def urllink(link): # 网页HTML获取以及编码转换 for i in range(12) : try: ip = getip() print ip proxy_support = urllib2.ProxyHandler(ip) opener = urllib2.build_opener(proxy_support) urllib2.install_opener(opener) html_1 = urllib2.urlopen(link, timeout=10).read() break except Exception,e: deleteip() print '错误',i,e pass if i==11: return '' encoding_dict = chardet.detect(html_1) web_encoding = encoding_dict['encoding'] if web_encoding == 'utf-8' or web_encoding == 'UTF-8': html = html_1 else: html = html_1.decode('gbk', 'ignore').encode('utf-8') return html print urllink("http://ccdas.ipmph.com/pc/clinicalExam/getClinicalExamDetail?articleId=8165")