用Python分析下王小波与李银河写情书最爱用哪些词
作家王小波其实也是我国最早期的程序员,突发奇想,王小波写情书最喜欢用哪些词呢?用Python词云分析下!
直接上代码吧,有注释很好理解。输出的图片设置的比较大,所以运行的比较慢,可以适当把图片尺寸改小点。
1 import jieba 2 from stylecloud import gen_stylecloud 3 4 def jieba_cloud(file_name, icon): 5 with open(file_name, 'r', encoding='utf8') as f: 6 word_list = jieba.cut(f.read()) 7 8 result = " ".join(word_list) # 分词用 隔开 9 10 # 设置停用词 11 stopwords_file = open('stopwords.txt', 'r', encoding='utf-8') 12 stopwords = [words.strip() for words in stopwords_file.readlines()] 13 14 15 # 制作中文词云 16 icon_name = " " 17 if icon == "1": 18 icon_name = "fas fa-thumbs-up" 19 elif icon == "2": 20 icon_name = "fas fa-heartbeat" 21 elif icon == "3": 22 icon_name = "fas fa-dog" 23 elif icon == "4": 24 icon_name = "fas fa-cat" 25 elif icon == "5": 26 icon_name = "fas fa-bug" 27 elif icon == "6": 28 icon_name = "fab fa-qq" 29 pic = str(icon) + '.png' 30 if icon_name is not None and len(icon_name) > 0: 31 gen_stylecloud(text=result, 32 size=2048, # stylecloud 的大小(长度和宽度) 33 icon_name=icon_name, 34 font_path='simsun.ttc', 35 max_font_size=400, # stylecloud 中的最大字号 36 max_words=3000, # stylecloud 可包含的最大单词数 37 custom_stopwords=stopwords, #定制停用词列表 38 output_name=pic) 39 else: 40 gen_stylecloud(text=result, font_path='simsun.ttc', output_name=pic) 41 return pic 42 43 44 # 主函数 45 if __name__ == '__main__': 46 47 jieba_cloud("王小波与李银河书信集.txt", "1") 48 jieba_cloud("王小波与李银河书信集.txt", "2") 49 jieba_cloud("王小波与李银河书信集.txt", "3") 50 jieba_cloud("王小波与李银河书信集.txt", "4") 51 jieba_cloud("王小波与李银河书信集.txt", "5") 52 jieba_cloud("王小波与李银河书信集.txt", "6")
炫酷的词云来了: