中文词频统计

import jieba
txt=open("loe.txt",'r',encoding='utf-8').read()
news=list(jieba.cut(txt))

exp={','}

keys=set(news)-exp

dic={}
for w in keys:
    if len(w)>1:
      dic[w]=news.count(w)
      
word=list(dic.items())
word.sort(key=lambda x:x[1],reverse=True)

for i in range(20):
    print(word[i])

posted @ 2017-09-29 16:25  017黄乐仪  阅读(98)  评论(0编辑  收藏  举报