1 import jieba
2 from matplotlib import pyplot as plt
3 from wordcloud import WordCloud
4 from PIL import Image
5 import numpy as np
6 txt = (open("红楼梦.txt", "r", encoding='utf-8')).read()
7 file1 = open("stopwords_cn.txt")
8 file2 = open("stopwords_cn(more).txt")
9 ls1 = []
10 while 1:
11 line = file1.readline()
12 new_word = line.strip()
13 if not line:
14 break
15 ls1.append(new_word)
16 ls2 = []
17 while 1:
18 line = file2.readline()
19 new_word = line.strip()
20 if not line:
21 break
22 ls2.append(new_word)
23 ls = ls1+ls2
24 words = jieba.lcut(txt)
25 counts = {}
26 for word in words:
27 for i in ls:
28 if word == i:
29 continue
30 if (len(word)) == 1:
31 continue
32 else:
33 counts[word] = counts.get(word, 0) + 1
34 items = list(counts.items())
35 items.sort(key=lambda x: x[1], reverse=True)
36 for i in range(15):
37 word, count = items[i]
38 print("{0:<10}{1:>5}".format(word, count))
39 string = ' '.join(words)
40 print(len(string))
41 img = Image.open('22.png') #打开图片
42 img_array = np.array(img) #将图片装换为数组
43 stopword=['什么', '一个', '我们', '那里', '你们', '如今', '起来', '知道', '这里', '众人', '他们', '出来', '自己', '说道', '听见', '两个', '姑娘', '不好',
44 '不知', '只见', '东西', '告诉'] #设置停止词,也就是你不想显示的词,这里这个词是我前期处理没处理好,你可以删掉他看看他的作用
45 stopword=stopword+ls
46 print(stopword)
47 wc = WordCloud(
48 background_color='white',
49 width=1000,
50 height=800,
51 mask=img_array,
52 font_path='./fonts/simhei.ttf',
53 stopwords=stopword
54 )
55 wc.generate_from_text(string)#绘制图片
56 plt.imshow(wc)
57 plt.axis('off')
58 plt.figure()
59 plt.show() #显示图片
60 wc.to_file('new.png') #保存图片