词频统计
f=open('ghost.txt','r') news=f.read() f.close() sep=''':;""'',.?!-''' exclude={'the','me','you','my','of','to','in','will','for','and','from'} for c in sep: news=news.replace(c,'') wordList=news.lower().split() wordDict={} ''' for i in wordList: wordDict[i]=wordDict.get(i,0)+1 ''' wordSet=set(wordList) - exclude for i in wordSet: wordDict[i]=wordList.count(i) dictList=list(wordDict.items()) dictList.sort(key=lambda x:x[1],reverse=True) f=open('result.txt','a') for i in range(20): f.write(dictList[i][0]+' '+str(dictList[i][1])+'\n') f.close()
PS.这是经过分析后的结果