完整的中英文词频统计
def getTxt(): txt = open("zmy.txt").read() txt = txt.lower() for ch in '!"@#$%^&*()+,-./:;<=>?@[\\]_`~{|}': txt.replace(ch," ") return txt zmy = getTxt().split() sunstrset = set(zmy) dic = {} for i in sunstrset: dic[i] = zmy.count(i) wcList = list(dic.items()) def tskeSecond(elem): return elem[1] wcList.sort(key=tskeSecond,reverse=True) print(wcList)
import jieba word =open("zhong.txt","r",encoding="utf-8").read() for ch in ",。“”": word = word.replace(ch,"") word = jieba.cut(word) word = list(word) print(word) wordset =set(word) worddic ={} for i in wordset: worddic[i]= word.count(i) print(worddic) wcList = list(worddic.items()) wcList.sort(key=lambda x:x[1],reverse=True) print(wcList)