10行代码使用python统计词频
# -*- coding: utf-8 -*- #!/usr/bin/env python import re f = open("C:\\Users\\陶敏\\Documents\\Pyscript\\test.txt") str = f.read() li = re.split(r'[, ;.\n\t]',str) for i in li: if(len(i))==0: li.remove(i) res_world = [] res_count = [] for i in li: if i not in res_world: res_world.append(i) res_count.append(1) else: res_count[res_world.index(i)]+=1 for i in range(3): print(res_world[res_count.index(max(res_count))],max(res_count)) res_world.pop(res_count.index(max(res_count))) res_count.pop(res_count.index(max(res_count)))
使用哈利波特文档作为测试文件,下载地址:https://pan.baidu.com/share/link?shareid=424773&uk=3744444146
测试结果如下
C:\python\python36\python3.exe C:/Users/陶敏/PycharmProjects/day1/.idea/cipin.py the 3305 to 1841 and 1797