词频统计

#1.
loveFile = open('love.txt',mode='r',encoding='utf-8')
loveText = loveFile.read()
loveFile.close()
print(loveText)

#2.
replaceList = [',','.',"'",'\n']
for c in replaceList:
    loveTxt = loveText.replace(c,' ')
print(loveText)

#3.
print(loveText.split(' '))
loveList = loveText.split(' ')

#4.
loveSet = set(loveList)
print(loveSet)

loveDict = set(loveList)
print(loveSet)

loveDict = {}
for word in loveSet:
    loveDict[word] =loveList.count(word)

    print(loveDict)
    for d in loveDict:
        print(d,loveDict[d])
wordCountList = list(loveDict.items())
print(wordCountList)
wordCountList.sort(key=lambda x:x[1],reverse=True)
print(wordCountList)

for i in range(20):
    print(wordCountList[i])

loveCountFile = open('loveCount.txt', mode='a',encoding='utf-8')
for i in range(len(wordCountList)):
    loveCountFile.write(str(wordCountList[i][1])+' '+wordCountList[i][0]+'\n')
loveCountFile.close()

 

posted @ 2018-06-20 21:19  王明哲0  阅读(120)  评论(0编辑  收藏  举报