微博二级评论

import json
import time
import requests
import pandas as pd

max_ids = [283033662846177, 170058843019812, 149030683477660, 142433613341546, 4777752970398295]

headers = {
'referer': 'https://weibo.com/2803301701/LwAYl0Nfk?refer_flag=1001030103_',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.63 Safari/537.36 Edg/102.0.1245.33',
'cookie': 'SINAGLOBAL=5840860407191.644.1644806743109; SUB=_2A25PfbuqDeRhGeNH61oR9i3Iyz-IHXVsgcXirDV8PUJbkNAKLUz9kW1NSvWJkWxYTclvArPVGdYD02lTJQ6b50Os; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9WFrm3zGJwUUhLB_Zq91EPT85NHD95Qf1K5Rehq0Sh50Ws4Dqcj7i--fiKyFi-2Xi--fi-2fiKyFi--NiKLWiKnXi--fiKyFiKnNi--ci-z7iK.X1KeN; PC_TOKEN=5aaf39bdbf; XSRF-TOKEN=s1ccxyvWtW6jGO9Ta1mwHTYh; _s_tentry=weibo.com; Apache=6827154524906.017.1654573459770; ULV=1654573459798:3:1:1:6827154524906.017.1654573459770:1652149266885; WBPSESS=Uy53Ou98LAPqZalnDXIg61qthXUKfA-GCTESWZylPml0kXw8FQHmrz09QCk9ktYt9cuFqGhZKnA5zBbPDj_9JV0AgnSRmc5bp6bSsjpP_iQTamqLYFE_mRYi0O4MX4ycN27Tq6GpilXI6qFMapzeiw==',
}
created_ats = []
users = []
text_raws = []
for max_id in max_ids:
time.sleep(2)
url = f'https://weibo.com/ajax/statuses/buildComments?flow=0&is_reload=1&id=4777686976959243&is_show_bulletin=2&is_mix=1&fetch_level=1&max_id={max_id}&count=20&uid=2803301701'
html = requests.get(url=url).content.decode()
print(f'********************第{1 + (max_ids.index(max_id))}页***************')
for item in json.loads(html)['data']:
created_at = item['created_at']
created_ats.append(created_at)
user = item['user']['name']
users.append(user)
text_raw = item['text_raw']
text_raws.append(text_raw)
data = {'作者': users, '评论时间': created_ats, '评论内容': text_raws}
print('*****************')
print(data)
print('*****************')
s = pd.DataFrame(data)
s.to_excel('微博高考二级评论.xlsx', index=None)
print(f'评论采集完成,共采集到{len(users)}条')

posted @ 2022-06-07 16:18  布都御魂  阅读(79)  评论(0编辑  收藏  举报