python 爬虫 --豆瓣出版社-写入Excel

import urllib.request
import re
import xlwt
data=urllib.request.urlopen("https://read.douban.com/provider/all").read().decode()
pat='<div class="name">[\u4e00-\u9fa5].*?</div>'
new_data=re.compile(pat).findall(data)
book=xlwt.Workbook(encoding="utf-8")
sht=book.add_sheet("publisher",cell_overwrite_ok=True)
pat1="<.*>(.*?)<.*>"
line=0
for d in new_data:
temp=re.compile(pat1).findall(d)
txt="".join(temp)
sht.write(line,0,line+1)
sht.write(line,1,txt)
line=line+1
book.save("d:\data.xls")


posted @ 2017-11-17 08:26  沧海一粒水  阅读(187)  评论(0编辑  收藏  举报