阿山的约定

博客园 首页 新随笔 联系 订阅 管理
爬虫代码:
 1 import urllib.request
 2 import os, re
 3 from bs4 import BeautifulSoup
 4 import xlwt
 5 
 6 URL = "https://baike.baidu.com/item/%E7%99%BE%E5%90%8D%E7%BA%A2%E9%80%9A%E4%BA%BA%E5%91%98/23252458?fr=aladdin"
 7 page = urllib.request.urlopen(URL)
 8 soup = BeautifulSoup(page)
 9 page.close()
10 
11 tables = soup.findAll('table')
12 tab = tables[0]
13 tr=tab.findAll('tr')
14 j=0
15 aa=[None]*100
16 pm = xlwt.Workbook()
17 sheet = pm.add_sheet('Sheet1', cell_overwrite_ok=True)
18 for th in tr:
19     i = 0
20     for td in th.findAll('td'):
21         sheet.write(j,i,str(td.text.replace("\n", "").replace(' ', '')))
22         i=i+1
24     j=j+1
25 pm.save("F://py//百名红通人员.xls")

 

posted on 2019-01-29 09:09  会飞的石头  阅读(248)  评论(0编辑  收藏  举报