VAE demo
先看tflearn 官方的:
from __future__ import division, print_function, absolute_import import numpy as np import matplotlib.pyplot as plt from scipy.stats import norm import tensorflow as tf import tflearn # Data loading and preprocessing import tflearn.datasets.mnist as mnist X, Y, testX, testY = mnist.load_data(one_hot=True) # Params original_dim = 784 # MNIST images are 28x28 pixels hidden_dim = 256 latent_dim = 2 # Building the encoder encoder = tflearn.input_data(shape=[None, 784], name='input_images') encoder = tflearn.fully_connected(encoder, hidden_dim, activation='relu') z_mean = tflearn.fully_connected(encoder, latent_dim) z_std = tflearn.fully_connected(encoder, latent_dim) # Sampler: Normal (gaussian) random distribution eps = tf.random_normal(tf.shape(z_std), dtype=tf.float32, mean=0., stddev=1.0, name='epsilon') z = z_mean + tf.exp(z_std / 2) * eps # Building the decoder (with scope to re-use these layers later) decoder = tflearn.fully_connected(z, hidden_dim, activation='relu', scope='decoder_h') decoder = tflearn.fully_connected(decoder, original_dim, activation='sigmoid', scope='decoder_out') # Define VAE Loss def vae_loss(x_reconstructed, x_true): # Reconstruction loss encode_decode_loss = x_true * tf.log(1e-10 + x_reconstructed) \ + (1 - x_true) * tf.log(1e-10 + 1 - x_reconstructed) encode_decode_loss = -tf.reduce_sum(encode_decode_loss, 1) # KL Divergence loss kl_div_loss = 1 + z_std - tf.square(z_mean) - tf.exp(z_std) kl_div_loss = -0.5 * tf.reduce_sum(kl_div_loss, 1) return tf.reduce_mean(encode_decode_loss + kl_div_loss) net = tflearn.regression(decoder, optimizer='rmsprop', learning_rate=0.001, loss=vae_loss, metric=None, name='target_images') # We will need 2 models, one for training that will learn the latent # representation, and one that can take random normal noise as input and # use the decoder part of the network to generate an image # Train the VAE training_model = tflearn.DNN(net, tensorboard_verbose=0) training_model.fit({'input_images': X}, {'target_images': X}, n_epoch=100, validation_set=(testX, testX), batch_size=256, run_id="vae") # Build an image generator (re-using the decoding layers) # Input data is a normal (gaussian) random distribution (with dim = latent_dim) input_noise = tflearn.input_data(shape=[None, latent_dim], name='input_noise') decoder = tflearn.fully_connected(input_noise, hidden_dim, activation='relu', scope='decoder_h', reuse=True) decoder = tflearn.fully_connected(decoder, original_dim, activation='sigmoid', scope='decoder_out', reuse=True) generator_model = tflearn.DNN(decoder, session=training_model.session) # Building a manifold of generated digits n = 25 # Figure row size figure = np.zeros((28 * n, 28 * n)) # Random normal distributions to feed network with x_axis = norm.ppf(np.linspace(0., 1., n)) y_axis = norm.ppf(np.linspace(0., 1., n)) for i, x in enumerate(x_axis): for j, y in enumerate(y_axis): samples = np.array([[x, y]]) x_reconstructed = generator_model.predict({'input_noise': samples}) digit = np.array(x_reconstructed[0]).reshape(28, 28) figure[i * 28: (i + 1) * 28, j * 28: (j + 1) * 28] = digit plt.figure(figsize=(10, 10)) plt.imshow(figure, cmap='Greys_r') plt.show()
再看:https://github.com/kiyomaro927/tflearn-vae/blob/master/source/test/one_dimension/vae.py
import tensorflow as tf import tflearn from dataset import Dataset, Datasets import pickle import sys # loading data try: h_and_w = pickle.load(open('h_and_w.pkl', 'rb')) trainX, trainY, testX, testY = h_and_w.load_data() except: print("No dataset was found.") sys.exit(1) # network parameters input_dim = 1 # height data input encoder_hidden_dim = 16 decoder_hidden_dim = 16 latent_dim = 2 # paths TENSORBOARD_DIR='experiment/' CHECKPOINT_PATH='out_models/' # training parameters n_epoch = 200 batch_size = 50 # encoder def encode(input_x): encoder = tflearn.fully_connected(input_x, encoder_hidden_dim, activation='relu') mu_encoder = tflearn.fully_connected(encoder, latent_dim, activation='linear') logvar_encoder = tflearn.fully_connected(encoder, latent_dim, activation='linear') return mu_encoder, logvar_encoder # decoder def decode(z): decoder = tflearn.fully_connected(z, decoder_hidden_dim, activation='relu') x_hat = tflearn.fully_connected(decoder, input_dim, activation='linear') return x_hat # sampler def sample(mu, logvar): epsilon = tf.random_normal(tf.shape(logvar), dtype=tf.float32, name='epsilon') std_encoder = tf.exp(tf.mul(0.5, logvar)) z = tf.add(mu, tf.mul(std_encoder, epsilon)) return z # loss function(regularization) def calculate_regularization_loss(mu, logvar): kl_divergence = -0.5 * tf.reduce_sum(1 + logvar - tf.square(mu) - tf.exp(logvar), reduction_indices=1) return kl_divergence # loss function(reconstruction) def calculate_reconstruction_loss(x_hat, input_x): mse = tflearn.objectives.mean_square(x_hat, input_x) return mse # trainer def define_trainer(target, optimizer): trainop = tflearn.TrainOp(loss=target, optimizer=optimizer, batch_size=batch_size, metric=None, name='vae_trainer') trainer = tflearn.Trainer(train_ops=trainop, tensorboard_dir=TENSORBOARD_DIR, tensorboard_verbose=3, checkpoint_path=CHECKPOINT_PATH, max_checkpoints=1) return trainer # flow of VAE training def main(): input_x = tflearn.input_data(shape=(None, input_dim), name='input_x') mu, logvar = encode(input_x) z = sample(mu, logvar) x_hat = decode(z) regularization_loss = calculate_regularization_loss(mu, logvar) reconstruction_loss = calculate_reconstruction_loss(x_hat, input_x) target = tf.reduce_mean(tf.add(regularization_loss, reconstruction_loss)) optimizer = tflearn.optimizers.Adam() optimizer = optimizer.get_tensor() trainer = define_trainer(target, optimizer) trainer.fit(feed_dicts={input_x: trainX}, val_feed_dicts={input_x: testX}, n_epoch=n_epoch, show_metric=False, snapshot_epoch=True, shuffle_all=True, run_id='VAE') return 0 if __name__ == '__main__': sys.exit(main())
keras的:https://gist.github.com/philipperemy/b8a7b7be344e447e7ee6625fe2fdd765
from __future__ import print_function import os import numpy as np from keras.layers import RepeatVector from keras.layers.core import Dropout from keras.layers.recurrent import LSTM from keras.models import Sequential from keras.models import load_model np.random.seed(123) def prepare_sequences(x_train, window_length, random_indices): full_sequence = x_train.flatten() windows = [] outliers = [] for window_start in range(0, len(full_sequence) - window_length + 1): window_end = window_start + window_length window_range = range(window_start, window_end) window = list(full_sequence[window_range]) contain_outlier = len(set(window_range).intersection(set(random_indices))) > 0 outliers.append(contain_outlier) windows.append(window) return np.expand_dims(np.array(windows), axis=2), outliers def get_signal(size, outliers_size=0.01): sig = np.expand_dims(np.random.normal(loc=0, scale=1, size=(size, 1)), axis=1) if outliers_size < 1: # percentage. outliers_size = int(size * outliers_size) random_indices = np.random.choice(range(size), size=outliers_size, replace=False) sig[random_indices] = np.random.randint(6, 9, 1)[0] return sig, random_indices def tp_fn_fp_tn(total, expected, actual): tp = len(set(expected).intersection(set(actual))) fn = len(set(expected) - set(actual)) fp = len(set(actual) - set(expected)) tn = len((total - set(expected)).intersection(total - set(actual))) return tp, fn, fp, tn def main(): window_length = 10 select_only_last_state = False model_file = 'model.h5' hidden_dim = 16 # no outliers. signal_train, _ = get_signal(100000, outliers_size=0) x_train, _ = prepare_sequences(signal_train, window_length, []) # 1 percent are outliers. signal_test, random_indices = get_signal(100000, outliers_size=0.01) x_test, contain_outliers = prepare_sequences(signal_test, window_length, random_indices) outlier_indices = np.where(contain_outliers)[0] if os.path.isfile(model_file): m = load_model(model_file) else: m = Sequential() if select_only_last_state: m.add(LSTM(hidden_dim, input_shape=(window_length, 1), return_sequences=False)) m.add(RepeatVector(window_length)) else: m.add(LSTM(hidden_dim, input_shape=(window_length, 1), return_sequences=True)) m.add(Dropout(p=0.1)) m.add(LSTM(1, return_sequences=True, activation='linear')) m.compile(loss='mse', optimizer='adam') m.fit(x_train, x_train, batch_size=64, nb_epoch=5, validation_data=(x_test, x_test)) m.save(model_file) pred_x_test = m.predict(x_test) mae_of_predictions = np.squeeze(np.max(np.square(pred_x_test - x_test), axis=1)) mae_threshold = np.mean(mae_of_predictions) + np.std(mae_of_predictions) # can use a running mean instead. actual = np.where(mae_of_predictions > mae_threshold)[0] tp, fn, fp, tn = tp_fn_fp_tn(set(range(len(pred_x_test))), outlier_indices, actual) precision = float(tp) / (tp + fp) hit_rate = float(tp) / (tp + fn) accuracy = float(tp + tn) / (tp + tn + fp + fn) print('precision = {}, hit_rate = {}, accuracy = {}'.format(precision, hit_rate, accuracy)) if __name__ == '__main__': main()
再看看keras官方的:
from __future__ import absolute_import from __future__ import division from __future__ import print_function from keras.layers import Lambda, Input, Dense from keras.models import Model from keras.datasets import mnist from keras.losses import mse, binary_crossentropy from keras.utils import plot_model from keras import backend as K import numpy as np import matplotlib.pyplot as plt import argparse import os # reparameterization trick # instead of sampling from Q(z|X), sample eps = N(0,I) # z = z_mean + sqrt(var)*eps def sampling(args): """Reparameterization trick by sampling fr an isotropic unit Gaussian. # Arguments: args (tensor): mean and log of variance of Q(z|X) # Returns: z (tensor): sampled latent vector """ z_mean, z_log_var = args batch = K.shape(z_mean)[0] dim = K.int_shape(z_mean)[1] # by default, random_normal has mean=0 and std=1.0 epsilon = K.random_normal(shape=(batch, dim)) return z_mean + K.exp(0.5 * z_log_var) * epsilon def plot_results(models, data, batch_size=128, model_name="vae_mnist"): """Plots labels and MNIST digits as function of 2-dim latent vector # Arguments: models (tuple): encoder and decoder models data (tuple): test data and label batch_size (int): prediction batch size model_name (string): which model is using this function """ encoder, decoder = models x_test, y_test = data os.makedirs(model_name, exist_ok=True) filename = os.path.join(model_name, "vae_mean.png") # display a 2D plot of the digit classes in the latent space z_mean, _, _ = encoder.predict(x_test, batch_size=batch_size) plt.figure(figsize=(12, 10)) plt.scatter(z_mean[:, 0], z_mean[:, 1], c=y_test) plt.colorbar() plt.xlabel("z[0]") plt.ylabel("z[1]") plt.savefig(filename) plt.show() filename = os.path.join(model_name, "digits_over_latent.png") # display a 30x30 2D manifold of digits n = 30 digit_size = 28 figure = np.zeros((digit_size * n, digit_size * n)) # linearly spaced coordinates corresponding to the 2D plot # of digit classes in the latent space grid_x = np.linspace(-4, 4, n) grid_y = np.linspace(-4, 4, n)[::-1] for i, yi in enumerate(grid_y): for j, xi in enumerate(grid_x): z_sample = np.array([[xi, yi]]) x_decoded = decoder.predict(z_sample) digit = x_decoded[0].reshape(digit_size, digit_size) figure[i * digit_size: (i + 1) * digit_size, j * digit_size: (j + 1) * digit_size] = digit plt.figure(figsize=(10, 10)) start_range = digit_size // 2 end_range = n * digit_size + start_range + 1 pixel_range = np.arange(start_range, end_range, digit_size) sample_range_x = np.round(grid_x, 1) sample_range_y = np.round(grid_y, 1) plt.xticks(pixel_range, sample_range_x) plt.yticks(pixel_range, sample_range_y) plt.xlabel("z[0]") plt.ylabel("z[1]") plt.imshow(figure, cmap='Greys_r') plt.savefig(filename) plt.show() # MNIST dataset (x_train, y_train), (x_test, y_test) = mnist.load_data() image_size = x_train.shape[1] original_dim = image_size * image_size x_train = np.reshape(x_train, [-1, original_dim]) x_test = np.reshape(x_test, [-1, original_dim]) x_train = x_train.astype('float32') / 255 x_test = x_test.astype('float32') / 255 # network parameters input_shape = (original_dim, ) intermediate_dim = 512 batch_size = 128 latent_dim = 2 epochs = 50 # VAE model = encoder + decoder # build encoder model inputs = Input(shape=input_shape, name='encoder_input') x = Dense(intermediate_dim, activation='relu')(inputs) z_mean = Dense(latent_dim, name='z_mean')(x) z_log_var = Dense(latent_dim, name='z_log_var')(x) # use reparameterization trick to push the sampling out as input # note that "output_shape" isn't necessary with the TensorFlow backend z = Lambda(sampling, output_shape=(latent_dim,), name='z')([z_mean, z_log_var]) # instantiate encoder model encoder = Model(inputs, [z_mean, z_log_var, z], name='encoder') encoder.summary() plot_model(encoder, to_file='vae_mlp_encoder.png', show_shapes=True) # build decoder model latent_inputs = Input(shape=(latent_dim,), name='z_sampling') x = Dense(intermediate_dim, activation='relu')(latent_inputs) outputs = Dense(original_dim, activation='sigmoid')(x) # instantiate decoder model decoder = Model(latent_inputs, outputs, name='decoder') decoder.summary() plot_model(decoder, to_file='vae_mlp_decoder.png', show_shapes=True) # instantiate VAE model outputs = decoder(encoder(inputs)[2]) vae = Model(inputs, outputs, name='vae_mlp') if __name__ == '__main__': parser = argparse.ArgumentParser() help_ = "Load h5 model trained weights" parser.add_argument("-w", "--weights", help=help_) help_ = "Use mse loss instead of binary cross entropy (default)" parser.add_argument("-m", "--mse", help=help_, action='store_true') args = parser.parse_args() models = (encoder, decoder) data = (x_test, y_test) # VAE loss = mse_loss or xent_loss + kl_loss if args.mse: reconstruction_loss = mse(inputs, outputs) else: reconstruction_loss = binary_crossentropy(inputs, outputs) reconstruction_loss *= original_dim kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var) kl_loss = K.sum(kl_loss, axis=-1) kl_loss *= -0.5 vae_loss = K.mean(reconstruction_loss + kl_loss) vae.add_loss(vae_loss) vae.compile(optimizer='adam') vae.summary() plot_model(vae, to_file='vae_mlp.png', show_shapes=True) if args.weights: vae.load_weights(args.weights) else: # train the autoencoder vae.fit(x_train, epochs=epochs, batch_size=batch_size, validation_data=(x_test, None)) vae.save_weights('vae_mlp_mnist.h5') plot_results(models, data, batch_size=batch_size, model_name="vae_mlp")
上面介绍了VAE的原理,看起来很复杂,其实最终VAE也实现了跟AutoEncoder类似的作用,输入一个序列,得到一个隐变量(从隐变量的分布中采样得到),然后将隐变量重构成原始输入。不同的是,VAE学习到的是隐变量的分布(允许隐变量存在一定的噪声和随机性),因此可以具有类似正则化防止过拟合的作用。
以下的构建一个VAE模型的keras代码,修改自keras的example代码,具体参数参考了Dount论文:
def sampling(args):
"""Reparameterization trick by sampling fr an isotropic unit Gaussian.
# Arguments:
args (tensor): mean and log of variance of Q(z|X)
# Returns:
z (tensor): sampled latent vector
"""
z_mean, z_log_var = args
batch = K.shape(z_mean)[0]
dim = K.int_shape(z_mean)[1]
# by default, random_normal has mean=0 and std=1.0
epsilon = K.random_normal(shape=(batch, dim))
std_epsilon = 1e-4
return z_mean + (z_log_var + std_epsilon) * epsilon
input_shape = (seq_len,)
intermediate_dim = 100
latent_dim = latent_dim
# VAE model = encoder + decoder
# build encoder model
inputs = Input(shape=input_shape, name='encoder_input')
x = Dense(intermediate_dim, activation='relu', kernel_regularizer=regularizers.l2(0.001))(inputs)
x = Dense(intermediate_dim, activation='relu', kernel_regularizer=regularizers.l2(0.001))(x)
z_mean = Dense(latent_dim, name='z_mean')(x)
z_log_var = Dense(latent_dim, name='z_log_var', activation='softplus')(x)
# use reparameterization trick to push the sampling out as input
# note that "output_shape" isn't necessary with the TensorFlow backend
z = Lambda(sampling, output_shape=(latent_dim,), name='z')([z_mean, z_log_var])
# build decoder model
x = Dense(intermediate_dim, activation='relu', kernel_regularizer=regularizers.l2(0.001))(z)
x = Dense(intermediate_dim, activation='relu', kernel_regularizer=regularizers.l2(0.001))(x)
x_mean = Dense(seq_len, name='x_mean')(x)
x_log_var = Dense(seq_len, name='x_log_var', activation='softplus')(x)
outputs = Lambda(sampling, output_shape=(seq_len,), name='x')([x_mean, x_log_var])
vae = Model(inputs, outputs, name='vae_mlp')
# add loss
reconstruction_loss = mean_squared_error(inputs, outputs)
reconstruction_loss *= seq_len
kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
kl_loss = K.sum(kl_loss, axis=-1)
kl_loss *= -0.5
vae_loss = K.mean(reconstruction_loss + kl_loss)
vae.add_loss(vae_loss)
vae.compile(optimizer='adam')
基于VAE的周期性KPI异常检测方法其实跟AutoEncoder基本一致,可以使用重构误差来判断异常,来下面是结果,上图是原始输入,下图是重构结果,我们能够看到VAE重构的结果比AutoEncoder的更好一些。