综合练习:词频统计

file=open('news.txt','r',encoding='utf-8')
f=file.read()
f.close()
sep=''',.!?'''
for c in sep:
    news=news.replace(c,'')
    wordList = news .lower().split()

    wordDict={}
    for w in wordList:
        wordDict[w]=wordDict.get(w,0)+1
'''
wordSet = set(wordList)
for w in wordSet:
     wordDict[w]=wordList.count(w)   
'''
for w in wordList:
     print(w,wordDict[w])
df={'the','and','in','to','in','with','it'}
for i in df:
    dict.pop(i)
dictList = list(wordDict.items())
dictList.sort(key=lambda x:x[1],reverse=True)
f = open('news.txt','a')
for i in range(20):
    f.write(dictList[i][0]+''+str(dictList[i][1])+'\n')
f.close()

  

  

posted on 2018-03-27 17:43  hasbb  阅读(115)  评论(0编辑  收藏  举报

导航