中文词频统计
import jieba txt = open('小说.txt','r',encoding='utf-8').read() for i in ',。、:;“” \u3000': txt = txt.replace(i,'') words = list(jieba.cut(txt)) dic = {} keys = set(words) for o in keys: if len(o)==1: continue else: dic[o] = words.count(o)+1 wc =list(dic.items()) wc.sort(key=lambda x:x[1],reverse=True) for i in range(20): print(wc[i])