Source code for deepobs.cifar100.cifar100_wrn

# -*- coding: utf-8 -*-
"""
This module implements the wide residual network (WRN) [1] architectures on the
CIFAR-100 data set. This is not a stand-alone deepobs test problem, but is
instantiated by the test problems cifar100_wrn404, et cetera.

The TensorFlow code is adapted from [2].


[1]: https://arxiv.org/abs/1605.07146
[2]: https://github.com/dalgu90/wrn-tensorflow
"""

import numpy as np
import tensorflow as tf
import cifar100_input


[docs]class set_up: """Class providing the functionality for `Wide Residual Networks`_ on `CIFAR-100`. The details of the architectures are described in the paper. This test problem is instantiated by the test problems cifar100_wrn404, et cetera. TensorFlow code is adapted from `here`_. Args: batch_size (int): Batch size of the data points. No default value specified. num_residual_units (int): Number of residual units in the network. No default value specified. k (int): Network width. No default value specified. weight_decay (float): Weight decay factor. In this model weight decay is applied to the weights, but not the biases. No default value specified. bn_decay (float): Decay factor for the moving average in the batch norm layer. No default value specified. Attributes: data_loading (deepobs.data_loading): Data loading class for `CIFAR-100`, :class:`.cifar100_input.data_loading`. losses (tf.Tensor): Tensor of size ``batch_size`` containing the individual losses per data point. accuracy (tf.Tensor): Tensor containing the accuracy of the model. train_init_op (tf.Operation): A TensorFlow operation to be performed before starting every training epoch. train_eval_init_op (tf.Operation): A TensorFlow operation to be performed before starting every training eval epoch. test_init_op (tf.Operation): A TensorFlow operation to be performed before starting every test evaluation phase. .. _Wide Residual Networks: https://arxiv.org/abs/1605.07146 .. _here: https://github.com/dalgu90/wrn-tensorflow """ def __init__(self, batch_size, num_residual_units, k, weight_decay, bn_decay): """Initializes the problem set_up class. Args: batch_size (int): Batch size of the data points. No default value specified. num_residual_units (int): Number of residual units in the network. No default value specified. k (int): Network width. No default value specified. weight_decay (float): Weight decay factor. In this model weight decay is applied to the weights, but not the biases. No default value specified. bn_decay (float): Decay factor for the moving average in the batch norm layer. No default value specified. """ self.data_loading = cifar100_input.data_loading(batch_size=batch_size) self.losses, self.accuracy = self.set_up( num_residual_units, k, weight_decay, bn_decay) # Operations to do when switching the phase (the one defined in data_loading initializes the iterator and assigns the phase variable, here you can add more operations) self.train_init_op = tf.group([self.data_loading.train_init_op]) self.train_eval_init_op = tf.group( [self.data_loading.train_eval_init_op]) self.test_init_op = tf.group([self.data_loading.test_init_op])
[docs] def get(self): """Returns the losses and the accuray of the model. Returns: tupel: Tupel consisting of the losses and the accuracy. """ return self.losses, self.accuracy
[docs] def set_up(self, num_residual_units, k, weight_decay, bn_decay): """Sets up the test problem. Args: num_residual_units (int): Number of residual units in the network. No default value specified. k (int): Network width. No default value specified. weight_decay (float): Weight decay factor. In this model weight decay is applied to the weights, but not the biases. No default value specified. bn_decay (float): Decay factor for the moving average in the batch norm layer. No default value specified. Returns: tupel: Tupel consisting of the losses and the accuracy. """ # Number of filter channels and stride for the blocks filters = [16, 16 * k, 32 * k, 64 * k] strides = [1, 2, 2] # num_residual_units = 4 # k = 1 # bn_averaging = 0.9 X, y, phase = self.data_loading.load() # Initial convolution layer x = self.conv(X, filter_size=3, out_channels=16, stride=1, name='conv_0') # Loop over three residual blocks for i in xrange(1, 4, 1): # First residual unit with tf.variable_scope('unit_%d_0' % i): x = self.batch_norm( x, phase=phase, decay=bn_decay, name="bn_1") x = tf.nn.relu(x, name='relu_1') # Shortcut if filters[i - 1] == filters[i]: if strides[i - 1] == 1: shortcut = tf.identity(x) else: shortcut = tf.nn.max_pool(x, [1, strides[i - 1], strides[i - 1], 1], [1, strides[i - 1], strides[i - 1], 1], 'VALID') else: shortcut = self.conv(x, filter_size=1, out_channels=filters[i], stride=strides[i - 1], name='shortcut') # Residual x = self.conv(x, filter_size=3, out_channels=filters[i], stride=strides[i - 1], name='conv_1') x = self.batch_norm( x, phase=phase, decay=bn_decay, name="bn_2") x = tf.nn.relu(x, name='relu_2') x = self.conv(x, filter_size=3, out_channels=filters[i], stride=1, name='conv_2') # Merge x = x + shortcut # further residual units for j in xrange(1, num_residual_units, 1): with tf.variable_scope('unit_%d_%d' % (i, j)): # Shortcut shortcut = x # Residual x = self.batch_norm( x, phase=phase, decay=bn_decay, name="bn_1") x = tf.nn.relu(x, name='relu_1') x = self.conv(x, filter_size=3, out_channels=filters[i], stride=1, name='conv_1') x = self.batch_norm( x, phase=phase, decay=bn_decay, name="bn_2") x = tf.nn.relu(x, name='relu_2') x = self.conv(x, filter_size=3, out_channels=filters[i], stride=1, name='conv_2') # Merge x = x + shortcut # Last unit with tf.variable_scope('unit_last'): x = self.batch_norm(x, phase=phase, decay=bn_decay) x = tf.nn.relu(x, name="relu") x = tf.reduce_mean(x, [1, 2]) # Reshaping and final fully-connected layer with tf.variable_scope('fully-connected'): x_shape = x.get_shape().as_list() x = tf.reshape(x, [-1, x_shape[1]]) linear_outputs = self.fc(x, 100) # Softmax and loss losses = tf.nn.softmax_cross_entropy_with_logits_v2( labels=y, logits=linear_outputs) # Add weight decay to the weight variables, but not to the biases for W in tf.get_collection("regularizable_variables"): tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, weight_decay * tf.nn.l2_loss(W)) # Compute mean accuracy y_pred = tf.argmax(linear_outputs, 1) y_correct = tf.argmax(y, 1) correct_prediction = tf.equal(y_pred, y_correct) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) return losses, accuracy
[docs] def batch_norm(self, x, phase, decay=0.9, name="batch_norm"): """Apply batch normalization to tensor x. Args: x (tf.Tensor): Input tensor to the batch norm layer. phase (tf.Variable): Phase variable switching between train and evaluation mode of the batch norm layer depending on its value ("train", "train_eval", "test"). decay (float): Decay factor for the moving average in the batch norm layer. Defaults to ``0.9``. name (str): Name for the layer. Defaults to ``batch_norm``. Returns: tf.Variable: Output after the batch norm layer. """ with tf.variable_scope(name): # Compute the mean and variance of x across the axes 0, 1 and 2 # TODO: with this axis reduction, this is GLOBAL normalization, is this what we want? mean_batch, variance_batch = tf.nn.moments(x, [0, 1, 2]) # Allocate variables to maintain a moving average of the batch mean/variance mean_avg = tf.get_variable('mean_avg', mean_batch.get_shape(), tf.float32, initializer=tf.zeros_initializer, trainable=False) variance_avg = tf.get_variable('std_avg', variance_batch.get_shape(), tf.float32, initializer=tf.ones_initializer, trainable=False) # Allocate variables for the beta and gamma in batch norm # TODO: Do we want those to be trainable? beta = tf.get_variable('beta', mean_batch.get_shape(), tf.float32, initializer=tf.zeros_initializer, trainable=True) gamma = tf.get_variable('gamma', variance_batch.get_shape(), tf.float32, initializer=tf.ones_initializer, trainable=True) # Add operations updating the moving averages of mean and variance # These ops are added to the UPDATE_OPS graph collection and must be added # as a dependency for the train step in order to be executed update_mean = mean_avg.assign( decay * mean_avg + (1.0 - decay) * mean_batch) update_variance = variance_avg.assign( decay * variance_avg + (1.0 - decay) * variance_batch) tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, update_mean) tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, update_variance) # Select batch mean/variance when phase=="train", otherwise select the # moving averages mean, variance = tf.cond(tf.equal(phase, "train"), lambda: (mean_batch, variance_batch), lambda: (mean_avg, variance_avg)) # Return batch-normalized tensor return tf.nn.batch_normalization(x, mean, variance, beta, gamma, 1e-5)
[docs] def conv(self, x, filter_size, out_channels, stride, padding="SAME", name="conv"): """Apply a convolution to tensor ``x`` with a convolution kernel of shape ``filter_size * filter_size * out_channels``, as well as stride and padding as specified. The kernel is created/retrieved via tf.get_variable. No bias is added and no non-linearity is applied. Args: x (tf.Tensor): Input tensor to the convolutional layer. filter_size (int): Size of the convolution. No default value specified. out_channels (int): Number of output channels after the conv layer. stride (int): Stride of the convolution. No default value specified. padding (int): Padding of the convolution. Can be ``SAME`` or ``VALID``. Defaults to ``SAME``. name (str): Name of the layer. Defaults to ``conv``. Returns: tf.Variable: Output after the convolutional layer. """ in_shape = x.get_shape() with tf.variable_scope(name): init = tf.random_normal_initializer( stddev=np.sqrt(1.0 / filter_size / filter_size / out_channels)) W = tf.get_variable("W", [filter_size, filter_size, in_shape[3], out_channels], tf.float32, initializer=init) if W not in tf.get_collection("regularizable_variables"): tf.add_to_collection("regularizable_variables", W) return tf.nn.conv2d(x, W, strides=[1, stride, stride, 1], padding=padding, name="output")
[docs] def fc(self, x, out_dim, name='fc'): """Apply a affine transformation (fully-connected layer) to tensor ``x`` with output dimension ``out_dim``. Weight matrix and bias vector are created/retrieved via tf.get_variable. No non-linearity is applied. Args: x (tf.Tensor): Input tensor to the convolutional layer. out_dim (int): Number of output dimensions after the fully-connected layer. name (str): Name of the layer. Defaults to ``fc``. Returns: tf.Variable: Output after the fully-connected layer. """ with tf.variable_scope(name): initializer = tf.random_normal_initializer( stddev=np.sqrt(1.0 / out_dim)) W = tf.get_variable("W", [x.get_shape()[1], out_dim], tf.float32, initializer=initializer) if W not in tf.get_collection("regularizable_variables"): tf.add_to_collection("regularizable_variables", W) b = tf.get_variable("b", [out_dim], tf.float32, initializer=tf.constant_initializer(0.0)) return tf.matmul(x, W) + b