综合练习:词频统计

f=open('/Users/Administrator/Desktop/we.txt','r')
text=f.read()
f.close()
print(text)

rep = ''',.!:?'''
exclude={'a','the','for','in','i','my','that','of','have','and','had','it','are'}
for c in rep:
     str=text.replace(c,"")
str=str.lower().split()
for i in str:
     print(i)

dict01={}
wordset=set(str)-exclude
for w in wordset:
    dict01[w]=str.count(w)
# set01 = set(str)
# print(set01)
# for item in set01:
#     dict01.update({item:str.count(item)})
# print(dict01)
for c in str:
    dict01[c]=dict01.get(c,0)+1
for w in exclude:
    dict01.pop(w)
dictlist=dict01.items()
dictlist=sorted(dict01.items(),key=lambda d:d[1],reverse=True)
for c in range(10):
    print(dictlist[c])

 

 

 

posted @ 2018-03-26 11:25  097黄大贞  阅读(124)  评论(0编辑  收藏  举报