1 import urllib 2 import os 3 from bs4 import BeautifulSoup 4 import difflib 5 import time 6 7 while(1): 8 if os.path.exists('d:/new.txt') and os.path.exists('d:/old.txt'): 9 os.remove('d:/old.txt') 10 if os.path.exists('d:/new.txt'): 11 os.rename('d:/new.txt','d:/old.txt') 12 url = 'http://www.zhenxin520.com/trends.asp?id=95' 13 response = urllib.urlopen(url) 14 soup =BeautifulSoup(response) 15 text = soup.find_all("td", attrs={"class": "text3"}) 16 f1=open('d:/new.txt','w+') 17 f1.write(str(text)) 18 f1.close() 19 if not os.path.exists('d:/old.txt'): 20 continue 21 f1=open('d:/new.txt','r') 22 f2=open('d:/old.txt','r') 23 f3=open('d:/diff.txt','w+') 24 d=difflib.Differ() 25 f1_lines=f1.readlines() 26 f2_lines=f2.readlines() 27 diff=list(d.compare(f1_lines,f2_lines)) 28 diff2=list(d.compare(f2_lines,f1_lines)) 29 i = -1 30 for line in diff: 31 i+=1 32 line2 = diff2[i] 33 if line[0]=='-': 34 try: 35 start = line.index('D') 36 start2 = line2.index('D') 37 substr = line[start:] 38 substr2 = line2[start2:] 39 rmstr = ['<span>','</span>','<br/>','</p>'] 40 for j in rmstr: 41 substr = substr.replace(j,'') 42 substr2 = substr2.replace(j,'') 43 f3.write('new---'+substr+'\nold---'+substr2+'\n') 44 except: 45 continue 46 f3.close() 47 f2.close() 48 f1.close() 49 time.sleep(600)
differ.txt输出结果:
new---D012红色:男L*7 XL*3 XXL*22 女M*2 童4*7
old---D012红色:男L*7 XL*4 XXL*22 女 S M L 童4*9