eno_xyn

导航

作业

import jieba
txt=open('D:/红楼梦/红楼梦.txt',"r",encoding='utf-8').read()
excludes = {"什么","一个","我们","你们","如今","说道","知道","姑娘",\
            "起来","这里","出来","众人","那里","自己",\
           "太太","一面","只见","两个","没有","怎么","不是","不知","这个","听见",\
            "这样","进来","咱们","就是","东西","告诉","回来","回来","只是","大家",\
           "老爷","只得","这些","他们","丫头","不敢","出去","所以","薛姨妈","不过",\
            "不好","姐姐","的话","一时","鸳鸯","过来","不能","心里","二爷","过来",\
           "如此","银子","今日","二人","答应","她们","那么","几个","还有","只管","说话",\
           "那边","一回","这么"}
words =jieba.lcut(txt)
counts = {}
for word in words:
    if len(word) == 1:
        continue
    elif word == "凤姐" or word == "王夫人" or word =="凤姐儿":
        rword = "王熙凤"
    elif word == "老太太" or word =="贾母" or word =="奶奶":
        rword = "贾母"
    else:
        rword = word
    counts[rword] = counts.get(rword,0) + 1

for word in excludes:
    del counts[word]
items = list(counts.items())
items.sort(key=lambda x:x[1], reverse = True)
for i in range(20):
   word,count=items[i]
   print("{0:<10}{1:>5}".format(word,count))

 

posted on 2021-11-08 22:53  eno_xyn  阅读(23)  评论(0编辑  收藏  举报