爬取b站周杰伦新歌mv弹幕 绘制词云

1. 爬虫代码

# -*- coding: utf-8 -*-
Created on Sat Jun 13 20:15:03 2020

@author: Administrator

import requests
import json
import chardet
import re
from pprint import pprint
# 1.根据bvid请求得到cid
def get_cid():
    url = 'https://api.bilibili.com/x/player/pagelist?bvid=BV1PK4y1b7dt&jsonp=jsonp'
    res = requests.get(url).text
    json_dict = json.loads(res)
    return json_dict["data"][0]["cid"]

# 2.根据cid请求弹幕,解析弹幕得到最终的数据
# 接口
def get_data(cid):
    final_url = "https://api.bilibili.com/x/v1/dm/list.so?oid=" + str(cid)
    final_res = requests.get(final_url)
    final_res.encoding = chardet.detect(final_res.content)['encoding']
    final_res = final_res.text
    pattern = re.compile('<d.*?>(.*?)</d>')
    data = pattern.findall(final_res)
    return data

# 3.保存弹幕列表
def save_to_file(data):
    with open("dan_mu.txt", mode="w", encoding="utf-8") as f:
        for i in data:

cid = get_cid()
data = get_data(cid)

2. 绘制词云

#!/usr/bin/env python
Image-colored wordcloud

You can color a word-cloud by using an image-based coloring strategy
implemented in ImageColorGenerator. It uses the average color of the region
occupied by the word in a source image. You can combine this with masking -
pure-white will be interpreted as 'don't occupy' by the WordCloud object when
passed as mask.
If you want white as a legal color, you can just pass a different image to
"mask", but make sure the image shapes line up.
from os import path
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt

from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator

# Read the whole text.
# text = open(r'dan_mu.txt').read()
with open(r'./dan_mu.txt', 'r', encoding='utf-8') as f:
    text = f.read()

# read the mask / color image taken from
# http://jirkavinse.deviantart.com/art/quot-Real-Life-quot-Alice-282261010
alice_coloring = np.array(Image.open(r"./wordcloud/princess.jpg"))

# 设置停用词
stopwords = set(STOPWORDS)

# 你可以通过 mask 参数 来设置词云形状
font = r'C:\Windows\Fonts\simfang.ttf'
wc = WordCloud(font_path=font, background_color="black", max_words=2000, mask=alice_coloring,
               stopwords=stopwords, max_font_size=40, random_state=42)
# generate word cloud

# create coloring from image
image_colors = ImageColorGenerator(alice_coloring)

# show
# 在只设置mask的情况下,你将会得到一个拥有图片形状的词云
plt.figure(figsize=(8, 9))
plt.imshow(wc, interpolation="bilinear")

# plt.figure()
# recolor wordcloud and show
# we could also give color_func=image_colors directly in the constructor
# 我们还可以直接在构造函数中直接给颜色
# 通过这种方式词云将会按照给定的图片颜色布局生成字体颜色策略
# plt.imshow(wc.recolor(color_func=image_colors), interpolation="bilinear")
# plt.axis("off")
# plt.figure()

# plt.imshow(alice_coloring, cmap=plt.cm.gray, interpolation="bilinear")
# plt.axis("off")



