简单小爬虫
1 import urllib2,time 2 vio=0 3 f=open(r'f:\\aa.txt','w') 4 while vio<10: 5 ph=-1 6 beg=0 7 i=0 8 mmurl="http://121.42.142.123/JudgeOnline/problemstatus.php?id=1154" 9 mmurl+="&page="+str(vio) 10 vio+=1 11 up=urllib2.urlopen(mmurl) 12 const=up.read() 13 leng=len(const) 14 #print "---------------------------------------" 15 while True: 16 i+=1 17 head="156310" 18 tail="</a></td><td>" 19 ph=const.find(head,beg+20) 20 if ph==-1: 21 break 22 beg=ph 23 f.write(const[ph:ph+11]+'\n') 24 print const[ph:ph+11] 25 #print "---------------------------------------\n" 26 f.close() 27 time.sleep(2002)