期末综合大作业:词频统计
#1 HahFile = open ('Hah..txt',mode='r',encoding='utf-8') HahText = HahFile.read() HahFile.close() print(HahText) #2 replaceList = ['',"'",',','\n'] for c in replaceList: HahText = HahText.replace(c,' ') print(HahText) HahText= HahText.replace(' ',' ') #3 print(HahText.split(' ')) HahList = HahText.split(' ') #4 HahSet = set(HahList) print(HahSet) HahDict = {} for word in HahSet : HahDict[word] = HahList.count(word) print(HahDict) for d in HahDict: print(d,HahDict[d]) #5 wordCountList = list(HahDict.items()) print(wordCountList) wordCountList.sort(key=lambda x:x[1],reverse=True) print(wordCountList) #6 Top20 for i in range (20): print(wordCountList[i]) #7 HahCountFile = open('HahCount.txt',mode='a',encoding='utf-8') for i in range(len(wordCountList)): HahCountFile.write(str(wordCountList[i][1])+' '+wordCountList[i][0]+'\n') HahCountFile.close()