python 词频统计


def word_frequency():
    word_dict = {}
    with open('E:\\PythonFile\\tingyongci.txt') as ti:
        ti_list = list(ti.read())   #   获取停用词表(综合哈工大停用词词表)
    with open('E:\\PythonFile\\jd\\phone\\3133927.txt') as wf:
        comments = list(wf.read().split())
        for comment in comments:
            if comment in ti_list:
                continue
            else:
                if comment not in word_dict:
                    word_dict[comment] = int(1)
                else:
                    word_dict[comment] += 1
    file = open('E:\\PythonFile\\jd\\phone\\test.txt', mode='a')  #  将处理结果存到本地TXT文件中
    sorted(word_dict.items(), key=lambda item: item[1])    #  按value将字典排序
    for key in word_dict:
        print(key, word_dict[key])
        file.write(key + ' ' + str(word_dict[key]) + '\n')    # 写入文档
    file.close()
 

 


用jieba分词处理字符串,将分词结果存到TXT文件中
去停用词



posted @ 2018-03-07 16:51  倚楼灬风细  阅读(350)  评论(0编辑  收藏  举报