python爬虫爬取豆瓣电影top250并写入Excel中

import requests
import re
import openpyxl
#创建工作表
wb=openpyxl.Workbook()
ws=wb.active
#调整列距
for letter in ['B','C']:
ws.column_dimensions[letter].width=66
#发送网络请求
headers={"User-Agent":'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36 SLBrowser/9.0.0.10191 SLBChan/109'}
for num in range(0, 250, 25):
response=requests.get(f"https://movie.douban.com/top250?start={num}",headers=headers)
html=response.text
#页面解析
obj=re.compile(r'<li>.*?<span class="title">(?P<name>.*?)</span>'
r'.*?<p class="">[\s\t\r\n]*(?P<director>.*?)&nbsp;&nbsp;&nbsp;(?P<actor>.*?)<br>'
r'.*?<span class="rating_num" property="v:average">(?P<rate>.*?)</span>'
r'.*? <span class="inq">(?P<quote>.*?)</span>',re.S)
result=obj.finditer(html)
#将数据写入Excel
for it in result:
match=it.groupdict()
ws.append(list(match.values()))
wb.save('豆瓣电影Top250.xlsx')

posted on 2024-02-06 10:39  leiyanyy  阅读(21)  评论(0编辑  收藏  举报