[整理] python统计词频

简单的数据词频统计

import string
text = "http requset highclient springboot requset"
data = text.lower().split()
words = {}
for word in data:
    if word not in words:
        words[word] = 1
    else:
        words[word] = words[word] + 1
result = sorted(words.items(), reverse=True)
print(result)
输出
[('springboot', 1), ('requset', 2), ('http', 1), ('highclient', 1)]

英文书词频统计(瓦登尔湖)

import string
path = 'D:/python3/Walden.txt'
with open(path,'r',encoding= 'utf-8') as text:
    words = [raw_word.strip(string.punctuation).lower() for raw_word in text.read().split()]
words_index = set(words)
counts_dict = {index:words.count(index) for index in words_index}
for word in sorted(counts_dict,key=lambda x: counts_dict[x],reverse=True):
    print('{} -- {} times'.format(word,counts_dict[word]))
posted @ 2020-12-12 10:23  哆啦梦乐园  阅读(156)  评论(0编辑  收藏  举报