Pandas类
用pandas导出豆瓣评论
import requests
import pandas as pd
from bs4 import BeautifulSoup
link = 'https://movie.douban.com/subject/36208094/comments'
header = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36'}
res = requests.get(url=link,headers=header)
soup = BeautifulSoup(res.text,'lxml')
list_divs = soup.find_all(class_="comment")
rate_dict ={'allstar10':[],
'allstar20':[],
'allstar30':[],
'allstar40':[],
'allstar50':[]}
RateDataFrame= pd.DataFrame(rate_dict)
i =0
for pdiv in list_divs:
comment = pdiv.find(class_="short").text
tes = pdiv.find(class_="comment-info")
rate_count = tes.find_all('span')[1].get('class')[0].split(' ')[0]
RateSeries = pd.Series({rate_count:comment})
RateDataFrame.loc[i] = RateSeries
i+=1
'''index=False(不保存 DataFrame 的索引列),utf-8还是乱码,所以改成utf-8-sig'''
RateDataFrame.to_csv('out.csv',index=False,encoding='utf-8-sig',errors='replace')