词向量可视化--[tensorflow , python]
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
----------------------------------
Version : ??
File Name : visual_vec.py
Description :
Author : xijun1
Email :
Date : 2018/12/25
-----------------------------------
Change Activiy : 2018/12/25
-----------------------------------
"""
__author__ = 'xijun1'
from tqdm import tqdm
import numpy as np
import tensorflow as tf
from tensorflow.contrib.tensorboard.plugins import projector
import os
import codecs
words, embeddings = [], []
log_path = 'model'
with codecs.open('/Users/xxx/github/python_demo/vec.txt', 'r') as f:
header = f.readline()
vocab_size, vector_size = map(int, header.split())
for line in tqdm(range(vocab_size)):
word_list = f.readline().split(' ')
word = word_list[0]
vector = word_list[1:-1]
if word == "":
continue
words.append(word)
embeddings.append(np.array(vector))
assert len(words) == len(embeddings)
print(len(words))
with tf.Session() as sess:
X = tf.Variable([0.0], name='embedding')
place = tf.placeholder(tf.float32, shape=[len(words), vector_size])
set_x = tf.assign(X, place, validate_shape=False)
sess.run(tf.global_variables_initializer())
sess.run(set_x, feed_dict={place: embeddings})
with codecs.open(log_path + '/metadata.tsv', 'w') as f:
for word in tqdm(words):
f.write(word + '\n')
# with summary
summary_writer = tf.summary.FileWriter(log_path, sess.graph)
config = projector.ProjectorConfig()
embedding_conf = config.embeddings.add()
embedding_conf.tensor_name = 'embedding:0'
embedding_conf.metadata_path = os.path.join('metadata.tsv')
projector.visualize_embeddings(summary_writer, config)
# save
saver = tf.train.Saver()
saver.save(sess, os.path.join(log_path, "model.ckpt"))
结果:
编程是一种快乐,享受代码带给我的乐趣!!!
分类:
tensorflow
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】凌霞软件回馈社区,博客园 & 1Panel & Halo 联合会员上线
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】博客园社区专享云产品让利特惠,阿里云新客6.5折上折
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· [.NET]调用本地 Deepseek 模型
· 一个费力不讨好的项目,让我损失了近一半的绩效!
· .NET Core 托管堆内存泄露/CPU异常的常见思路
· PostgreSQL 和 SQL Server 在统计信息维护中的关键差异
· C++代码改造为UTF-8编码问题的总结
· 一个费力不讨好的项目,让我损失了近一半的绩效!
· 清华大学推出第四讲使用 DeepSeek + DeepResearch 让科研像聊天一样简单!
· 实操Deepseek接入个人知识库
· CSnakes vs Python.NET:高效嵌入与灵活互通的跨语言方案对比
· Plotly.NET 一个为 .NET 打造的强大开源交互式图表库
2017-12-25 makefile在编译的过程中出现“except class name”