三体数据爬取

《三体》动漫点评区数据爬取

预备

## 准备的库
import pandas as pd # 数据分析库
import requests # 用于发送 HTTP 请求
import json # json 格式
import numpy as np

短评数据爬取

## 标头
headers = 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/'

## 访问地址
url0 = 'https://api.bilibili.com/pgc/review/short/list?media_id=4315402&ps=20&sort=0'
score_dp = [] # 存放短评评分
r = requests.get(url0,headers)
data_json = json.loads(r.text)
for i in range(20):
    score_dp.append(data_json['data']['list'][i]['score'])
next = data_json['data']['next']
while next:
    url = 'https://api.bilibili.com/pgc/review/short/list?media_id=4315402&ps=20&sort=0'+'&cursor='+str(next)
    rr = requests.get(url, headers)
    data_json_n = json.loads(rr.text)
    n = len(data_json_n['data']['list'])
    for i in range(n):
        score_dp.append(data_json_n['data']['list'][i]['score'])
    next = data_json_n['data']['next']
np.mean(score_dp)

长评数据爬取

headers_cp = 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/'
url_cp = 'https://api.bilibili.com/pgc/review/long/list?media_id=4315402&ps=20&sort=0'
score_cp = [] # 存放长评评分
r = requests.get(url_cp,headers_cp)
data_json = json.loads(r.text)
for i in range(len(data_json['data']['list'])):
    score_cp.append(data_json['data']['list'][i]['score'])
next = data_json['data']['next']
while next:
    url = 'https://api.bilibili.com/pgc/review/long/list?media_id=4315402&ps=20&sort=0'+'&cursor='+str(next)
    rr = requests.get(url, headers_cp)
    data_json_n = json.loads(rr.text)
    n = len(data_json_n['data']['list'])
    for i in range(n):
        score_cp.append(data_json_n['data']['list'][i]['score'])
    next = data_json_n['data']['next']
np.mean(score_cp)

综合评分

np.mean(score_dp+score_cp)

结论:

  • 短评平均评分 75
  • 长评平均评分 41
  • 综合平均评分 63
posted @ 2022-12-18 19:05  只会加减乘除  阅读(62)  评论(0编辑  收藏  举报