爬虫代码:
1 import urllib.request 2 import os, re 3 from bs4 import BeautifulSoup 4 import xlwt 5 6 URL = "https://baike.baidu.com/item/%E7%99%BE%E5%90%8D%E7%BA%A2%E9%80%9A%E4%BA%BA%E5%91%98/23252458?fr=aladdin" 7 page = urllib.request.urlopen(URL) 8 soup = BeautifulSoup(page) 9 page.close() 10 11 tables = soup.findAll('table') 12 tab = tables[0] 13 tr=tab.findAll('tr') 14 j=0 15 aa=[None]*100 16 pm = xlwt.Workbook() 17 sheet = pm.add_sheet('Sheet1', cell_overwrite_ok=True) 18 for th in tr: 19 i = 0 20 for td in th.findAll('td'): 21 sheet.write(j,i,str(td.text.replace("\n", "").replace(' ', ''))) 22 i=i+1 24 j=j+1 25 pm.save("F://py//百名红通人员.xls")