中文词频统计及词云制作

import jieba
from wordcloud import WordCloud
import matplotlib.pyplot as plt
fr=open('t.txt','r',encoding='utf-8').read()
words=jieba.lcut(fr)
excludes={'.....'}
counts={}

for word in words:
    if len(word)==1:
        continue
    else:
        counts[word] = counts.get(word,0)+1
        
for word in excludes:
    del(counts[word])
    
items=list(counts.items())
items.sort(key=lambda x:x[1],reverse=True)

for i in range(20):
    word,count=items[i]
    print("{0:<10}{1:>5}".format(word,count))
wl_split=word,count
mywc = WordCloud().generate(wl_split)
plt.show()

 

posted @ 2017-09-25 21:37  36-林秋雁  阅读(211)  评论(0编辑  收藏  举报