51、tf-idf值提取关键词

import testWord2vec2 as tw
import tensorflow_util as tu
import numpy as np
model = tw.load_model()
namelist = tw.loadNameList()
import jieba
namelist1 = []
for name in namelist:
    seg_list = jieba.cut(name)
    temp_name = " ".join(seg_list)
    namelist1.append(temp_name)

from sklearn import feature_extraction
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.feature_extraction.text import CountVectorizer
vectorizer=CountVectorizer()
transformer=TfidfTransformer()
tfidf=transformer.fit_transform(vectorizer.fit_transform(namelist1))
word=vectorizer.get_feature_names()
weight=tfidf.toarray()
keyword = [];
for i in range(len(word)):
    wei = weight[i,:]
    re = np.where(wei == np.max(wei))
    print(word[re[0][0]],":",wei[re[0][0]])

 

posted @ 2017-01-19 08:33  香港胖仔  阅读(329)  评论(0编辑  收藏  举报