python刷取CSDN博文访问量之三

python刷取CSDN博文访问量之三
 
作者:vpoet
注:这个系列我只贴代码,代码不注释.有兴趣的自己读读就懂了,纯属娱乐,望管理员抬手
若有转载一定不要注明来源
 1 #coding=utf-8
 2 import webbrowser
 3 import time
 4 import urllib2
 5 import re
 6 import os
 7 import thread
 8 import threading
 9 mylock = threading.RLock()  
10 
11 tabcount=1
12 
13 def BlogFun(n,url,MaxVisitor):
14     visitcount = r'<span class="link_view" title="阅读次数">(\d+)人阅读</span>'
15     global tabcount 
16     while True:
17         mylock.acquire()  
18         if tabcount >10:
19             os.system('taskkill /F /IM chrome.exe')
20             tabcount = 1
21         else:
22             tabcount = tabcount + 1
23         mylock.release()  
24         webbrowser.open(url,new=1)
25         request=urllib2.Request(url)
26         request.add_header('User-Agent','Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6')
27         opener = urllib2.build_opener()
28         fblog = opener.open(request)
29         htm = fblog.read()
30         Ref=re.findall(visitcount,htm);
31         #print url+": "+str(int(Ref[0]))+"人阅读"
32         if int(Ref[0])>MaxVisitor:
33             break
34         time.sleep(n)
35 
36 
37 if __name__=="__main__":
38 
39     
40     Domain="http://blog.csdn.net"
41     main_url = "http://blog.csdn.net/u013018721"
42     patt_article = r'<span class="link_title"><a href="(.+)">'
43 
44     Mainrequest=urllib2.Request(main_url)
45     Mainrequest.add_header('User-Agent','Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6')
46     opener = urllib2.build_opener()
47     fMainblog = opener.open(Mainrequest)
48     Mainhtml= fMainblog.read()
49     article_urls = re.findall(patt_article,Mainhtml)
50     threadnumber = 1
51     MaxVisitor = 300
52     timedelay=3
53     for item in article_urls:
54         Realurl =  Domain+item
55         thread.start_new_thread(BlogFun,(timedelay,Realurl,MaxVisitor,))
56         threadnumber=threadnumber+1

 

posted @ 2015-06-28 21:41  vpoet  阅读(279)  评论(0编辑  收藏  举报