Source code for deepobs.fmnist.fmnist_vae

# -*- coding: utf-8 -*-
"""
Variational Autoencoder (VAE) on Fashion-MNIST. Adapted from https://towardsdatascience.com/teaching-a-variational-autoencoder-vae-to-draw-mnist-characters-978675c95776 and the mnist one.
"""

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import fmnist_input


[docs]class set_up: """Class providing the functionality for a Variational Autoencoder (VAE) adapted from `here`_ on `Fashion-MNIST`. Args: batch_size (int): Batch size of the data points. Defaults to ``64``. n_latent (int): Size of the latent space of the encoder. Defaults to ``8``. weight_decay (float): Weight decay factor. In this model there is no weight decay implemented. Defaults to ``None``. Attributes: data_loading (deepobs.data_loading): Data loading class for `Fashion-MNIST`, :class:`.fmnist_input.data_loading`. losses (tf.Tensor): Tensor of size ``batch_size`` containing the individual losses per data point. accuracy (tf.Tensor): Tensor containing the accuracy of the model. As there is no accuracy when the loss function is given directly, we set it to ``0``. train_init_op (tf.Operation): A TensorFlow operation to be performed before starting every training epoch. train_eval_init_op (tf.Operation): A TensorFlow operation to be performed before starting every training eval epoch. test_init_op (tf.Operation): A TensorFlow operation to be performed before starting every test evaluation phase. .. _here: https://towardsdatascience.com/teaching-a-variational-autoencoder-vae-to-draw-mnist-characters-978675c95776 """ def __init__(self, batch_size=64, n_latent=8, weight_decay=None): self.data_loading = fmnist_input.data_loading(batch_size=batch_size) self.losses, self.accuracy = self.set_up( weight_decay=weight_decay, n_latent=n_latent) # Operations to do when switching the phase (the one defined in data_loading initializes the iterator and assigns the phase variable, here you can add more operations) self.train_init_op = tf.group([self.data_loading.train_init_op]) self.train_eval_init_op = tf.group( [self.data_loading.train_eval_init_op]) self.test_init_op = tf.group([self.data_loading.test_init_op])
[docs] def get(self): """Returns the losses and the accuray of the model. Returns: tupel: Tupel consisting of the losses and the accuracy. """ return self.losses, self.accuracy
[docs] def set_up(self, weight_decay=None, n_latent=8): """Sets up the test problem. Args: weight_decay (float): Weight decay factor. In this model there is no weight decay implemented. Defaults to ``None``. n_latent (int): Size of the latent space of the encoder. Defaults to ``8``. Returns: tupel: Tupel consisting of the losses and the accuracy. """ if weight_decay is not None: print( "WARNING: Weight decay is non-zero but no weight decay is used for this model.") X, y, phase = self.data_loading.load() print("X", X.get_shape()) X_flat = tf.reshape(X, shape=[-1, 28 * 28]) sampled_z, mean, std = self.encoder(X, phase, n_latent=n_latent) img = self.decoder(sampled_z, phase, n_latent=n_latent) # Define Loss flatten_img = tf.reshape(img, [-1, 28 * 28]) img_loss = tf.reduce_sum(tf.squared_difference(flatten_img, X_flat), 1) latent_loss = -0.5 * \ tf.reduce_sum(1.0 + 2.0 * std - tf.square(mean) - tf.exp(2.0 * std), 1) losses = img_loss + latent_loss # There is no accuracy here but keep it, so code can be reused accuracy = tf.zeros([1, 1], tf.float32) return losses, accuracy
[docs] def lrelu(self, x, alpha=0.3): """Leaky ReLU activation function. Args: x (tf.Variable): Input to the activation function. alpha (float): Factor of the leaky ReLU. Defines how `leaky` it is. Defauylts to ``0.3``. Returns: tf.Variable: Output after the activation function. """ return tf.maximum(x, tf.multiply(x, alpha))
[docs] def encoder(self, X, phase, n_latent): """Encoder of the VAE. It consists of three convolutional and one dense layers. The convolutional layers use the leaky ReLU activation function. After each convolutional layer dropout is appleid with a keep probability of ``0.8``. Args: X (tf.Variable): Input to the encoder. phase (tf.Variable): Phase variable, determining if we are in training or evaluation mode. n_latent (int): Size of the latent space of the encoder. Defaults to ``8``. Returns: tupel: Output of the encoder, ``z``, the mean and the standard deviation. """ cond_keep_prob_1 = tf.cond(tf.equal(phase, tf.constant("train")), lambda: tf.constant(0.8), lambda: tf.constant(1.0)) activation = self.lrelu with tf.variable_scope("encoder", reuse=None): X = tf.reshape(X, shape=[-1, 28, 28, 1]) x = tf.layers.conv2d(X, filters=64, kernel_size=4, strides=2, padding='same', activation=activation) x = tf.nn.dropout(x, cond_keep_prob_1) x = tf.layers.conv2d(x, filters=64, kernel_size=4, strides=2, padding='same', activation=activation) x = tf.nn.dropout(x, cond_keep_prob_1) x = tf.layers.conv2d(x, filters=64, kernel_size=4, strides=1, padding='same', activation=activation) x = tf.nn.dropout(x, cond_keep_prob_1) x = tf.contrib.layers.flatten(x) mn = tf.layers.dense(x, units=n_latent) sd = 0.5 * tf.layers.dense(x, units=n_latent) epsilon = tf.random_normal(tf.stack([tf.shape(x)[0], n_latent])) z = tf.add(mn, tf.multiply(epsilon, tf.exp(sd)), name="z") return z, mn, sd
[docs] def decoder(self, sampled_z, phase, n_latent): """The decoder for the VAE. It uses two dense layers, followed by three deconvolutional layers (each with dropout= ``0.8``) a final dense layer. The dense layers use the leaky ReLU activation (except the last one, which uses softmax), while the deconvolutional ones use regular ReLU. Args: sampled_z (tf.Variable): Sampled ``z`` from the encoder of the size ``n_latent``. phase (tf.Variable): Phase variable, determining if we are in training or evaluation mode. n_latent (int): Size of the latent space of the encoder. Defaults to ``8``. Returns: tf.Variable: A tensor of the same size as the original images (``28`` by ``28``). """ cond_keep_prob_1 = tf.cond(tf.equal(phase, tf.constant("train")), lambda: tf.constant(0.8), lambda: tf.constant(1.0)) with tf.variable_scope("decoder", reuse=None): x = tf.layers.dense(sampled_z, units=24, activation=self.lrelu) x = tf.layers.dense(x, units=24 * 2 + 1, activation=self.lrelu) x = tf.reshape(x, [-1, 7, 7, 1]) x = tf.layers.conv2d_transpose( x, filters=64, kernel_size=4, strides=2, padding='same', activation=tf.nn.relu) x = tf.nn.dropout(x, cond_keep_prob_1) x = tf.layers.conv2d_transpose( x, filters=64, kernel_size=4, strides=1, padding='same', activation=tf.nn.relu) x = tf.nn.dropout(x, cond_keep_prob_1) x = tf.layers.conv2d_transpose( x, filters=64, kernel_size=4, strides=1, padding='same', activation=tf.nn.relu) x = tf.contrib.layers.flatten(x) x = tf.layers.dense(x, units=28 * 28, activation=tf.nn.sigmoid) img = tf.reshape(x, shape=[-1, 28, 28], name="decoder_op") return img
[docs] def generate(self, sess, sampled_z=None): """Function to generate images using the decoder. Images are ploted directly. Args: sess (tf.Session): A TensorFlow session. sampled_z (tf.Variable): Sampled ``z`` with dimensions ``latent size`` times ``number of examples``. Defaults to ``None`` which uses five randomly sampled ``z`` from a normal with stddev = ``1.0``. """ if sampled_z is None: sampled_z = [np.random.normal(0, 1, 8) for _ in range(5)] z = tf.get_default_graph().get_tensor_by_name("encoder/z:0") dec = tf.get_default_graph().get_tensor_by_name("decoder/decoder_op:0") imgs = sess.run(dec, feed_dict={z: sampled_z}) imgs = [np.reshape(imgs[i], [28, 28]) for i in range(len(imgs))] for img in imgs: plt.figure(figsize=(1, 1)) plt.axis('off') plt.imshow(img, cmap='gray') plt.show()