TSNE的可视化特征结果
一、可视化特征或embeddings
1.1 二维的值
对bert输出层的可视化(这是个二维的,batch, hidden_state)
codeantenna
1.2 三维的值
对bert的last_state进行可视化(这个是三维的,batch,seq_length, hidden_state)
def tsne_plot_similar_words_png(title, embedding_clusters, a, filename=""):
plt.figure(figsize=(16, 9))
colors = cm.rainbow(np.linspace(0, 1, len(embedding_clusters)))
i = 1
for embeddings, color in zip(embedding_clusters, colors):
x = embeddings[:, 0]
y = embeddings[:, 1]
plt.scatter(x, y, c=color, alpha=a)
plt.text(x.mean(), y.mean(), color='white', weight='bold', fontsize=13, path_effects=[PathEffects.withStroke(linewidth=3,
foreground="black", alpha=0.7)])
i += 1
plt.title(title)
plt.grid(True)
plt.xlim(-200, 200)
plt.ylim(-200, 200)
plt.show()
# plt.savefig(filename, format='png', dpi=150, bbox_inches='tight')
def batch_size_length_state():
embed_cluster = outputs_train.last_hidden_state.detach().numpy()
n, m, k = embed_cluster.shape
print(n, m, k)
perplexity = 30
tsne = TSNE(perplexity=perplexity, n_components=2, init='pca', n_iter=3500, random_state=32)
embeddings_en_2d_gif = np.array(tsne.fit_transform(embed_cluster.reshape(n * m, k))).reshape(n, m, 2)
tsne_plot_similar_words_png('Vizualizing similar words from Google News using t-SNE (perplexity={})'.format(perplexity), embeddings_en_2d_gif, 0.6)
# batch_size_length_state()
参考
上述的代码微调一下都可以使用
github的开源代码
github上复杂的生成方法