中文词频练习

# -*- coding:UTF-8 -*-
# -*- author: Hiro -*-


import jieba


txt = open('a.txt','r',encoding='utf-8')
text1 = txt.read()
text2 = list(jieba.lcut(text1))

delete = {'','',' ','','','','','','','','''',' ','', '', '', '', '','','','?','','',\
           '','','','',''}
text3 = {}
for a in text2:
    text3[a] = text3.get(a,0)+1

for b in delete:
    if b in text3:
        del  text3[b]

text4 = sorted(text3.items(),key=lambda x:x[1],reverse=True)
for c in range(20):
    print(text4[c])

 

 

posted @ 2018-03-28 19:59  Hiro-D  阅读(77)  评论(0编辑  收藏  举报