中英文统计
第一种方法:
fo=open("sunstr.txt","r",encoding="utf-8") sunstr=fo.read().lower() for ch in ",.": sunstr.replace(ch," ") sunstr = sunstr.split(); sunstrset = set(sunstr) dic = {} for i in sunstrset: dic[i] = sunstr.count(i) print(dic) for key in dic: print(key,dic[key]); wcList = list(dic.items()) def tskeSecond(elem): return elem[1] wcList.sort(key=tskeSecond,reverse=True) print(wcList)
第二种方法:
def getTxt(): txt = open("sunstr.txt").read() txt = txt.lower() for ch in '!"@#$%^&*()+,-./:;<=>?@[\\]_`~{|}': txt.replace(ch," ") return txt # sunstr = sunstr.replace(","," ") sunstr = getTxt().split() sunstrset = set(sunstr) dic = {} for i in sunstrset: dic[i] = sunstr.count(i) print(dic) for key in dic: print(key,dic[key]); wcList = list(dic.items()) def tskeSecond(elem): return elem[1] wcList.sort(key=tskeSecond,reverse=True) print(wcList)
中文统计:
import jieba word =open("123.txt","r",encoding="utf-8").read() for ch in ",。“”": word = word.replace(ch,"") word = jieba.cut(word) word = list(word) print(word) wordset =set(word) worddic ={} for i in wordset: worddic[i]= word.count(i) print(worddic) wcList = list(worddic.items()) wcList.sort(key=lambda x:x[1],reverse=True) print(wcList)