统计一篇英文文章内每个单词出现频率,并返回出现频率最高的前10个单词及其出现次数
统计一篇英文文章内每个单词出现频率,并返回出现频率最高的前10个单词及其出现次数
from collections import Counter import re with open("a.txt","r",encoding="utf-8") as f: txt =f.read() c =re.split("\W",txt) Cnt =Counter(c) print(Cnt) ret =Cnt.most_common(10) print(ret)
改进版本.
from collections import Counter import re with open("models.py",encoding="utf-8") as f: txt = f.read() c = re.split("\W",txt) Cnt =Counter(c) #1. 获取到的是一个大字典里面统计了单词出现的次数. print(Cnt) ret = Cnt.most_common(10) #2. 统计了前十个单词出现的次数. print(ret) lt =[] for i in ret: x= i[0] lt.append(x) #3. 统计了前十个单词出现的次数放在了一个列表里面. print(lt)
打印结果:
C:\Python36\python.exe C:/Users/NorthK_PC/PycharmProjects/pageproject/app01/111.py Counter({'': 62, 'models': 9, 'title': 2, 'IntegerField': 2, 'self': 2, 'from': 1, 'django': 1, 'db': 1, 'import': 1, 'Create': 1, 'your': 1, 'here': 1, 'class': 1, 'Book': 1, 'Model': 1, 'nid': 1, 'AutoField': 1, 'primary_key': 1, 'True': 1, 'CharField': 1, 'max_length': 1, '32': 1, 'publishDate': 1, 'DateField': 1, 'price': 1, 'DecimalField': 1, 'max_digits': 1, '5': 1, 'decimal_places': 1, '2': 1, 'keepNum': 1, 'commNum': 1, 'def': 1, '__str__': 1, 'return': 1}) [('', 62), ('models', 9), ('title', 2), ('IntegerField', 2), ('self', 2), ('from', 1), ('django', 1), ('db', 1), ('import', 1), ('Create', 1)] ['', 'models', 'title', 'IntegerField', 'self', 'from', 'django', 'db', 'import', 'Create']