TSNE/分析两个数据的分布
使用sklearn.manifold的函数TSNE
#coding=utf-8 import numpy as np import picklefrom sklearn.manifold import TSNE import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt #数据集装载函数 def load_data(fname): with open(fname, 'rb') as fr: ret = pickle.load(fr) return ret def plot(data1, label1, data2, label2): X_pca1 = TSNE().fit_transform(data1) X_pca2 = TSNE().fit_transform(data2) plt.figure(figsize=(10, 5)) ax1 = plt.subplot(121) ax1.scatter(X_pca1[:, 0], X_pca1[:, 1], c=label1) ax1.set_title("data1 train data") plt.savefig('a1.png') #plt.show() ax2 = plt.subplot(122) ax2.scatter(X_pca2[:, 0], X_pca2[:, 1], c=label2) ax2.set_title("data2 train data") plt.savefig('b1.png') #plt.show() def main(): #装载训练数据 train_data1, train_label1 = load_data('/home/hd_1T/haiou/class/machinelearning/data/data1/test_data.pkl') train_data2, train_label2 = load_data('/home/hd_1T/haiou/class/machinelearning/data/data2/test_data.pkl') plot(train_data1.reshape((train_data1.shape[0], train_data1.shape[1]*train_data1.shape[2])), train_label1,train_data2.reshape((train_data2.shape[0], train_data2.shape[1]*train_data1.shape[2])), train_label2)