onlyou13

  博客园  :: 首页  :: 新随笔  :: 联系 :: 订阅 订阅  :: 管理
 1 # coding: utf-8
 2 
 3 import urllib2
 4 import re
 5 import time
 6 
 7 def getDL(page):
 8     url = 'http://www.xicidaili.com/nt/{}'.format(page)
 9     header = {
10         'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36'
11     }
12 
13     req=urllib2.Request(url, headers=header)
14     res=urllib2.urlopen(req)
15     html=res.read()
16 
17     srclist=re.findall(r'<tr class=(.|\n)*?<td>(\d+\.\d+\.\d+\.\d+)</td>(.|\n)*?<td>(\d+)</td>(.|\n)*?<td>(HTTP|HTTPS)</td>', html)
18     xlist = []
19     for item in srclist:
20         xlist.append((item[5],item[1],item[3]))
21     return xlist
22 
23 def testDL(ipstr):
24     proxy= urllib2.ProxyHandler({'http':"{}:{}".format(ipstr[1], ipstr[2])})
25     opener=urllib2.build_opener(proxy)
26     urllib2.install_opener(opener)
27 
28     try:
29         testUrl = 'http://httpbin.org/ip'
30         testUrl = 'http://2017.ip138.com/ic.asp'
31         req=urllib2.Request(testUrl)
32         res=urllib2.urlopen(req).read()
33         print "********************* √ {}    -- {}".format(ipstr, res)
34 
35         with open("ok.txt","a") as f:
36             f.write("{} {} {}\n".format(ipstr[0], ipstr[1], ipstr[2]))
37             f.close()
38     except Exception as e:
39         print "******** ×, {} -- {}".format(ipstr, e)
40     time.sleep(1)
41 
42 def startTask():
43     for page in xrange(5):
44         list=getDL(page+1)
45         for item in list:
46             testDL(item)
47 
48 if __name__ == '__main__':
49     startTask()

 

posted on 2017-11-09 17:01  onlyou13  阅读(3823)  评论(0编辑  收藏  举报