聊斋文本统计
import jieba
import os,codecs
from collections import Counter
file=open('liaozhai.txt','r',encoding='utf-8')
a=file.read()
#file.close()
b=jieba.lcut(a)
d=Counter()
for i in b:
if b!=',' and b!='。'and b!='\r' and b!='\n':
d[i]+=1
print(d)
for i in d.most_common(20):
print(i)
print('2020310143139')