综合练习:词频统计

import operator
import string

f=open("a.txt","r")
a=f.read()
a1=f.read()

s=string.punctuation+string.digits

for i in s:   #标点符号换成空格
    a=a.replace(i," ")
print(s)
print(a)
print()
print(a.lower())#大写换成小写

b=a.split()
print(b)

c={}

for i in b:#将键和对应的值输入字典
    d=0
    for j in b:
        if i==j:
            d=d+1
            c[i]=d

for i in c:
    print(i,':',c[i])

print()
print()
print()

# 去除介词、冠词等
jie1={'to','for','and','of','is','a','an','the'}

jie2=set(c)

jie=jie2-jie1
print(jie)
for i in jie:
    print(i,":",c[i])

#排序
f=sorted(c.items(), key=lambda e:e[1], reverse=True)#将字典c转换为有序的列表f

print(f)
j = {}
for item in f:#将列表f转换为字典j
    j[item[0]] = item[1]

for i in j:
    print(i,':',j[i])

#输出词频最大TOP20
jishu=0
print("词频最大TOP20:")
for i in j:
    if jishu==20:
        break
    else:
        print(i,':',j[i])
        jishu=jishu+1

 

posted @ 2018-03-26 11:30  087林金龙  阅读(96)  评论(0编辑  收藏  举报