Source code for deepobs.fmnist.fmnist_vae

# -*- coding: utf-8 -*-
"""
Variational Autoencoder (VAE) on Fashion-MNIST. Adapted from https://towardsdatascience.com/teaching-a-variational-autoencoder-vae-to-draw-mnist-characters-978675c95776 and the mnist one.
"""

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import fmnist_input


[docs]class set_up:
    """Class providing the functionality for a Variational Autoencoder (VAE) adapted from `here`_ on `Fashion-MNIST`.

    Args:
        batch_size (int): Batch size of the data points. Defaults to ``64``.
        n_latent (int): Size of the latent space of the encoder. Defaults to ``8``.
        weight_decay (float): Weight decay factor. In this model there is no weight decay implemented. Defaults to ``None``.

    Attributes:
        data_loading (deepobs.data_loading): Data loading class for `Fashion-MNIST`, :class:`.fmnist_input.data_loading`.
        losses (tf.Tensor): Tensor of size ``batch_size`` containing the individual losses per data point.
        accuracy (tf.Tensor): Tensor containing the accuracy of the model. As there is no accuracy when the loss function is given directly, we set it to ``0``.
        train_init_op (tf.Operation): A TensorFlow operation to be performed before starting every training epoch.
        train_eval_init_op (tf.Operation): A TensorFlow operation to be performed before starting every training eval epoch.
        test_init_op (tf.Operation): A TensorFlow operation to be performed before starting every test evaluation phase.

    .. _here: https://towardsdatascience.com/teaching-a-variational-autoencoder-vae-to-draw-mnist-characters-978675c95776
    """
    def __init__(self, batch_size=64, n_latent=8, weight_decay=None):
        self.data_loading = fmnist_input.data_loading(batch_size=batch_size)
        self.losses, self.accuracy = self.set_up(
            weight_decay=weight_decay, n_latent=n_latent)

        # Operations to do when switching the phase (the one defined in data_loading initializes the iterator and assigns the phase variable, here you can add more operations)
        self.train_init_op = tf.group([self.data_loading.train_init_op])
        self.train_eval_init_op = tf.group(
            [self.data_loading.train_eval_init_op])
        self.test_init_op = tf.group([self.data_loading.test_init_op])

[docs]    def get(self):
        """Returns the losses and the accuray of the model.

        Returns:
            tupel: Tupel consisting of the losses and the accuracy.

        """
        return self.losses, self.accuracy

[docs]    def set_up(self, weight_decay=None, n_latent=8):
        """Sets up the test problem.

        Args:
            weight_decay (float): Weight decay factor. In this model there is no weight decay implemented. Defaults to ``None``.
            n_latent (int): Size of the latent space of the encoder. Defaults to ``8``.

        Returns:
            tupel: Tupel consisting of the losses and the accuracy.

        """
        if weight_decay is not None:
            print(
                "WARNING: Weight decay is non-zero but no weight decay is used for this model.")
        X, y, phase = self.data_loading.load()
        print("X", X.get_shape())
        X_flat = tf.reshape(X, shape=[-1, 28 * 28])

        sampled_z, mean, std = self.encoder(X, phase, n_latent=n_latent)
        img = self.decoder(sampled_z, phase, n_latent=n_latent)

        # Define Loss
        flatten_img = tf.reshape(img, [-1, 28 * 28])
        img_loss = tf.reduce_sum(tf.squared_difference(flatten_img, X_flat), 1)
        latent_loss = -0.5 * \
            tf.reduce_sum(1.0 + 2.0 * std - tf.square(mean) -
                          tf.exp(2.0 * std), 1)
        losses = img_loss + latent_loss

        # There is no accuracy here but keep it, so code can be reused
        accuracy = tf.zeros([1, 1], tf.float32)

        return losses, accuracy

[docs]    def lrelu(self, x, alpha=0.3):
        """Leaky ReLU activation function.

        Args:
            x (tf.Variable): Input to the activation function.
            alpha (float): Factor of the leaky ReLU. Defines how `leaky` it is. Defauylts to ``0.3``.

        Returns:
            tf.Variable: Output after the activation function.

        """
        return tf.maximum(x, tf.multiply(x, alpha))

[docs]    def encoder(self, X, phase, n_latent):
        """Encoder of the VAE. It consists of three convolutional and one dense layers. The convolutional layers use the leaky ReLU activation function. After each convolutional layer dropout is appleid with a keep probability of ``0.8``.

        Args:
            X (tf.Variable): Input to the encoder.
            phase (tf.Variable): Phase variable, determining if we are in training or evaluation mode.
            n_latent (int): Size of the latent space of the encoder. Defaults to ``8``.

        Returns:
            tupel: Output of the encoder, ``z``, the mean and the standard deviation.

        """
        cond_keep_prob_1 = tf.cond(tf.equal(phase, tf.constant("train")),
                                   lambda: tf.constant(0.8),
                                   lambda: tf.constant(1.0))
        activation = self.lrelu
        with tf.variable_scope("encoder", reuse=None):
            X = tf.reshape(X, shape=[-1, 28, 28, 1])
            x = tf.layers.conv2d(X, filters=64, kernel_size=4,
                                 strides=2, padding='same', activation=activation)
            x = tf.nn.dropout(x, cond_keep_prob_1)
            x = tf.layers.conv2d(x, filters=64, kernel_size=4,
                                 strides=2, padding='same', activation=activation)
            x = tf.nn.dropout(x, cond_keep_prob_1)
            x = tf.layers.conv2d(x, filters=64, kernel_size=4,
                                 strides=1, padding='same', activation=activation)
            x = tf.nn.dropout(x, cond_keep_prob_1)
            x = tf.contrib.layers.flatten(x)
            mn = tf.layers.dense(x, units=n_latent)
            sd = 0.5 * tf.layers.dense(x, units=n_latent)
            epsilon = tf.random_normal(tf.stack([tf.shape(x)[0], n_latent]))
            z = tf.add(mn, tf.multiply(epsilon, tf.exp(sd)), name="z")

            return z, mn, sd

[docs]    def decoder(self, sampled_z, phase, n_latent):
        """The decoder for the VAE. It uses two dense layers, followed by three deconvolutional layers (each with dropout= ``0.8``) a final dense layer. The dense layers use the leaky ReLU activation (except the last one, which uses softmax), while the deconvolutional ones use regular ReLU.

        Args:
            sampled_z (tf.Variable): Sampled ``z`` from the encoder of the size ``n_latent``.
            phase (tf.Variable): Phase variable, determining if we are in training or evaluation mode.
            n_latent (int): Size of the latent space of the encoder. Defaults to ``8``.

        Returns:
            tf.Variable: A tensor of the same size as the original images (``28`` by ``28``).

        """
        cond_keep_prob_1 = tf.cond(tf.equal(phase, tf.constant("train")),
                                   lambda: tf.constant(0.8),
                                   lambda: tf.constant(1.0))
        with tf.variable_scope("decoder", reuse=None):
            x = tf.layers.dense(sampled_z, units=24, activation=self.lrelu)
            x = tf.layers.dense(x, units=24 * 2 + 1, activation=self.lrelu)
            x = tf.reshape(x, [-1, 7, 7, 1])
            x = tf.layers.conv2d_transpose(
                x, filters=64, kernel_size=4, strides=2, padding='same', activation=tf.nn.relu)
            x = tf.nn.dropout(x, cond_keep_prob_1)
            x = tf.layers.conv2d_transpose(
                x, filters=64, kernel_size=4, strides=1, padding='same', activation=tf.nn.relu)
            x = tf.nn.dropout(x, cond_keep_prob_1)
            x = tf.layers.conv2d_transpose(
                x, filters=64, kernel_size=4, strides=1, padding='same', activation=tf.nn.relu)

            x = tf.contrib.layers.flatten(x)
            x = tf.layers.dense(x, units=28 * 28, activation=tf.nn.sigmoid)
            img = tf.reshape(x, shape=[-1, 28, 28], name="decoder_op")
            return img

[docs]    def generate(self, sess, sampled_z=None):
        """Function to generate images using the decoder. Images are ploted directly.

        Args:
            sess (tf.Session): A TensorFlow session.
            sampled_z (tf.Variable): Sampled ``z`` with dimensions ``latent size`` times ``number of examples``. Defaults to ``None`` which uses five randomly sampled ``z`` from a normal with stddev = ``1.0``.

        """
        if sampled_z is None:
            sampled_z = [np.random.normal(0, 1, 8) for _ in range(5)]
        z = tf.get_default_graph().get_tensor_by_name("encoder/z:0")

        dec = tf.get_default_graph().get_tensor_by_name("decoder/decoder_op:0")
        imgs = sess.run(dec, feed_dict={z: sampled_z})
        imgs = [np.reshape(imgs[i], [28, 28]) for i in range(len(imgs))]

        for img in imgs:
            plt.figure(figsize=(1, 1))
            plt.axis('off')
            plt.imshow(img, cmap='gray')
            plt.show()