python3.6爬取高匿代理IP地址

python3.6简单爬取高匿代理IP地址

import re
from urllib.request import urlopen
from urllib.request import Request
from bs4 import BeautifulSoup
from lxml import etree

#添加模拟浏览器协议头
headers = {'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'}
url = "http://www.xicidaili.com/nn/1"
req_timeout = 5
req = Request(url=url,headers=headers)
f = urlopen(req,None,req_timeout)
s = f.read()
s = s.decode('utf-8')
ss = str(s)
#====================#lxml提取=========================
selector = etree.HTML(ss)
links = selector.xpath('//tr[@class="odd"]/td/text()|//tr[@class="odd"]/td[@class=""]/td/text()')
for link in links:
    print(link)

  

posted on 2017-11-07 20:14  似不是傻  阅读(226)  评论(0编辑  收藏  举报

导航