综合应用,jieba,去标点,分词保存,统计,删词,输出
import jieba fp1=r'D:/python/a.txt' outph=r'D:/python/out.txt' f=open(fp1,'r',encoding='utf-8') txt=f.read().strip() f.close() words=jieba.lcut(txt) f=open(outph,'w',encoding='utf-8') for word in words: f.write(word) f.write('\n') f.close() #第二题去标点,统计词频 bd='[’!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~]+,。!?“”《》:、. ' counts={ } exlutes={'作者','之后'} for i in bd: txt=txt.replace(i,'') #字符串替换去标点符号 words=jieba.lcut(txt) #分词 for word in words: if len(word)==1: continue else: counts[word]=counts.get(word,0)+1 #所有词全统计 for word in exlutes: del(counts[word]) #删除{a,b} items=list(counts.items()) items.sort(key=lambda x:x[1],reverse=True) for i in range(15): word,count=items[i] print("{0:>10}---{1:<5}".format(word,count))