python3.6爬取高匿代理IP地址
python3.6简单爬取高匿代理IP地址
import re from urllib.request import urlopen from urllib.request import Request from bs4 import BeautifulSoup from lxml import etree #添加模拟浏览器协议头 headers = {'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'} url = "http://www.xicidaili.com/nn/1" req_timeout = 5 req = Request(url=url,headers=headers) f = urlopen(req,None,req_timeout) s = f.read() s = s.decode('utf-8') ss = str(s) #====================#lxml提取========================= selector = etree.HTML(ss) links = selector.xpath('//tr[@class="odd"]/td/text()|//tr[@class="odd"]/td[@class=""]/td/text()') for link in links: print(link)