词向量可视化--[tensorflow , python]

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
----------------------------------
Version    : ??
File Name :     visual_vec.py
Description :   
Author  :       xijun1
Email   :
Date    :       2018/12/25
-----------------------------------
Change Activiy  :   2018/12/25
-----------------------------------

"""
__author__ = 'xijun1'
from tqdm import tqdm
import numpy as np
import tensorflow as tf
from tensorflow.contrib.tensorboard.plugins import projector
import os
import codecs

words, embeddings = [], []
log_path = 'model'

with codecs.open('/Users/xxx/github/python_demo/vec.txt', 'r') as f:
    header = f.readline()
    vocab_size, vector_size = map(int, header.split())
    for line in tqdm(range(vocab_size)):
        word_list = f.readline().split(' ')
        word = word_list[0]
        vector = word_list[1:-1]
        if word == "":
            continue
        words.append(word)
        embeddings.append(np.array(vector))
assert len(words) == len(embeddings)
print(len(words))

with tf.Session() as sess:
    X = tf.Variable([0.0], name='embedding')
    place = tf.placeholder(tf.float32, shape=[len(words), vector_size])
    set_x = tf.assign(X, place, validate_shape=False)
    sess.run(tf.global_variables_initializer())
    sess.run(set_x, feed_dict={place: embeddings})
    with codecs.open(log_path + '/metadata.tsv', 'w') as f:
        for word in tqdm(words):
            f.write(word + '\n')

    # with summary
    summary_writer = tf.summary.FileWriter(log_path, sess.graph)
    config = projector.ProjectorConfig()
    embedding_conf = config.embeddings.add()
    embedding_conf.tensor_name = 'embedding:0'
    embedding_conf.metadata_path = os.path.join('metadata.tsv')
    projector.visualize_embeddings(summary_writer, config)

    # save
    saver = tf.train.Saver()
    saver.save(sess, os.path.join(log_path, "model.ckpt"))

结果:

posted @ 2018-12-25 19:21  龚细军  阅读(2452)  评论(0编辑  收藏  举报