聊斋文本统计

import jieba
import os,codecs
from collections import Counter
file=open('liaozhai.txt','r',encoding='utf-8')
a=file.read()
#file.close()
b=jieba.lcut(a)
d=Counter()
for i in b:
if b!=',' and b!='。'and b!='\r' and b!='\n':
d[i]+=1

print(d)
for i in d.most_common(20):
print(i)

print('2020310143139')


posted @ 2021-11-14 10:46  doublemiracle  阅读(22)  评论(0编辑  收藏  举报