作业
import jieba txt=open('D:/红楼梦/红楼梦.txt',"r",encoding='utf-8').read() excludes = {"什么","一个","我们","你们","如今","说道","知道","姑娘",\ "起来","这里","出来","众人","那里","自己",\ "太太","一面","只见","两个","没有","怎么","不是","不知","这个","听见",\ "这样","进来","咱们","就是","东西","告诉","回来","回来","只是","大家",\ "老爷","只得","这些","他们","丫头","不敢","出去","所以","薛姨妈","不过",\ "不好","姐姐","的话","一时","鸳鸯","过来","不能","心里","二爷","过来",\ "如此","银子","今日","二人","答应","她们","那么","几个","还有","只管","说话",\ "那边","一回","这么"} words =jieba.lcut(txt) counts = {} for word in words: if len(word) == 1: continue elif word == "凤姐" or word == "王夫人" or word =="凤姐儿": rword = "王熙凤" elif word == "老太太" or word =="贾母" or word =="奶奶": rword = "贾母" else: rword = word counts[rword] = counts.get(rword,0) + 1 for word in excludes: del counts[word] items = list(counts.items()) items.sort(key=lambda x:x[1], reverse = True) for i in range(20): word,count=items[i] print("{0:<10}{1:>5}".format(word,count))