词向量可视化--[tensorflow , python]

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
----------------------------------
Version    : ??
File Name :     visual_vec.py
Description :   
Author  :       xijun1
Email   :
Date    :       2018/12/25
-----------------------------------
Change Activiy  :   2018/12/25
-----------------------------------

"""
__author__ = 'xijun1'
from tqdm import tqdm
import numpy as np
import tensorflow as tf
from tensorflow.contrib.tensorboard.plugins import projector
import os
import codecs

words, embeddings = [], []
log_path = 'model'

with codecs.open('/Users/xxx/github/python_demo/vec.txt', 'r') as f:
    header = f.readline()
    vocab_size, vector_size = map(int, header.split())
    for line in tqdm(range(vocab_size)):
        word_list = f.readline().split(' ')
        word = word_list[0]
        vector = word_list[1:-1]
        if word == "":
            continue
        words.append(word)
        embeddings.append(np.array(vector))
assert len(words) == len(embeddings)
print(len(words))

with tf.Session() as sess:
    X = tf.Variable([0.0], name='embedding')
    place = tf.placeholder(tf.float32, shape=[len(words), vector_size])
    set_x = tf.assign(X, place, validate_shape=False)
    sess.run(tf.global_variables_initializer())
    sess.run(set_x, feed_dict={place: embeddings})
    with codecs.open(log_path + '/metadata.tsv', 'w') as f:
        for word in tqdm(words):
            f.write(word + '\n')

    # with summary
    summary_writer = tf.summary.FileWriter(log_path, sess.graph)
    config = projector.ProjectorConfig()
    embedding_conf = config.embeddings.add()
    embedding_conf.tensor_name = 'embedding:0'
    embedding_conf.metadata_path = os.path.join('metadata.tsv')
    projector.visualize_embeddings(summary_writer, config)

    # save
    saver = tf.train.Saver()
    saver.save(sess, os.path.join(log_path, "model.ckpt"))

结果:

posted @   龚细军  阅读(2457)  评论(0编辑  收藏  举报
编辑推荐:
· [.NET]调用本地 Deepseek 模型
· 一个费力不讨好的项目,让我损失了近一半的绩效!
· .NET Core 托管堆内存泄露/CPU异常的常见思路
· PostgreSQL 和 SQL Server 在统计信息维护中的关键差异
· C++代码改造为UTF-8编码问题的总结
阅读排行:
· 一个费力不讨好的项目,让我损失了近一半的绩效!
· 清华大学推出第四讲使用 DeepSeek + DeepResearch 让科研像聊天一样简单!
· 实操Deepseek接入个人知识库
· CSnakes vs Python.NET:高效嵌入与灵活互通的跨语言方案对比
· Plotly.NET 一个为 .NET 打造的强大开源交互式图表库
历史上的今天:
2017-12-25 makefile在编译的过程中出现“except class name”
点击右上角即可分享
微信分享提示