中文词频统计
import jieba news = open('bignews.txt','r').read() news_cut = jieba.lcut(news) dict = {} for i in set(news_cut): dict[i]=news_cut.count(i) delete={'的','和','了','在','为','是','为','我', ' ','-','\n',',','。','?','!','“','”',':',';','、','.','‘','’'} for i in delete: if i in dict: del dict[i] nesw_print = sorted(dict.items(), key = lambda d:d[1], reverse = True) for i in range(20): print(nesw_print[i])