词频写入excel

#!/usr/bin/python    
# -*- coding:utf-8 -*-   
 
import sys 
reload(sys) 
 
sys.setdefaultencoding('utf-8') 
 
import jieba 
import jieba.analyse 
import xlwt #写入Excel表的库 
 
if __name__=="__main__": 
 
    wbk = xlwt.Workbook(encoding = 'ascii') 
    sheet = wbk.add_sheet("wordCount")#Excel单元格名字 
    word_lst = [] 
    key_list=[] 
    for line in open('ceshi.txt'):#1.txt是需要分词统计的文档 
 
        item = line.strip('\n\r').split('\t') #制表格切分 
        # print item 
        tags = jieba.analyse.extract_tags(item[0]) #jieba分词
        # analyse.extract_tags获取关键词 jieba.cut('xxx.txt',cut_all=false/true)参数true/false代表全模式,精确模式
        for t in tags: 
            word_lst.append(t) 
 
    word_dict= {} 
    with open("wordCount.txt",'w') as wf2: #打开文件 
 
        for item in word_lst: 
            if item not in word_dict: #统计数量 
                word_dict[item] = 1 
            else: 
                word_dict[item] += 1 
 
        orderList=list(word_dict.values()) 
        orderList.sort(reverse=True) 
        # print orderList 
        for i in range(len(orderList)): 
            for key in word_dict: 
                if word_dict[key]==orderList[i]: 
                    wf2.write(key+' '+str(word_dict[key])+'\n') #写入txt文档 
                    key_list.append(key) 
                    word_dict[key]=0 
     
     
    for i in range(len(key_list)): 
        sheet.write(i, 1, label = orderList[i]) 
        sheet.write(i, 0, label = key_list[i]) 
    wbk.save('wordCount.xls') #保存为 worword_dict= {} dCount.xls文件
posted @ 2017-10-31 08:18  lh459384111  阅读(437)  评论(0编辑  收藏  举报