Python爬虫获取迅雷会员帐号

代码如下:

 1 import re
 2 import urllib.request
 3 import urllib
 4 import time
 5  
 6 from collections import deque
 7 
 8 head = {
 9     'Connection': 'Keep-Alive',
10     'Accept': 'text/html, application/xhtml+xml, */*',
11     'Accept-Language': 'en-US,en;q=0.8,zh-Hans-CN;q=0.5,zh-Hans;q=0.3',
12     'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko'
13 }
14 visited = set()
15  
16 url = 'http://xlfans.com'  # 入口页面, 可以换成别的
17 data = None
18 full_url=urllib.request.Request(url,data,head)
19 urlop = urllib.request.urlopen(full_url)
20 data = urlop.read().decode('utf-8')
21 temp = re.search(r'href=\"http://xlfans.com/archives/(.{4})\" class=\"thumbnail\">(.*) alt=\"迅雷粉 (.*) 迅雷会员账号分享 共享中', data, re.M|re.I)
22 result = re.search(r'href=\"http://xlfans.com/archives/(.{4})', temp.group(), re.M|re.I)
23 url = url + "/archives/" + temp.group(1)
24 data = None
25 full_url=urllib.request.Request(url,data,head)
26 urlop = urllib.request.urlopen(full_url)
27 data = urlop.read().decode('utf-8')
28 save_path = 'D:\\Program Files\\python\\test.txt'
29 f_obj = open(save_path, 'w')
30 #获取系统时间,来判断是否为周末
31 cur_day = time.strftime("%w",time.localtime(time.time()))
32 if(cur_day == '5'):
33     string = "迅雷粉周末迅雷会员账号"
34 elif cur_day == '6':
35     string = "迅雷粉周末迅雷会员账号"
36     print(cur_day)
37 else:
38     string = "迅雷粉专享迅雷会员账号"
39 #娘的,是你逼我的
40 start = data.find(string)
41 data = data[start:]
42 data_que = data.split("</p>")
43 count = 0
44 for i in range(3):
45     data_temp = data_que[i]
46     num = -1
47     acc_que = data_temp.split("<br />")
48     for result in acc_que:
49         num = num + 1
50         if(count != 0):
51             if(num == 0):
52                 continue
53         f_obj.write(result)
54         f_obj.write("\n")
55     count = count + 1
56 f_obj.close()
57         

 

posted @ 2015-09-12 11:10  king@nuaa  阅读(1101)  评论(0编辑  收藏  举报