# -*- coding: utf-8 -*-
"""
This module implements the wide residual network (WRN) [1] architectures on the
SVHN data set. This is not a stand-alone deepobs test problem, but is
instantiated by the test problems svhn_wrn164, et cetera.
The tensorflow code is adapted from [2].
[1]: https://arxiv.org/abs/1605.07146
[2]: https://github.com/dalgu90/wrn-tensorflow
"""
import numpy as np
import tensorflow as tf
import svhn_input
[docs]class set_up:
"""Class providing the functionality for `Wide Residual Networks`_ on `SVHN`.
The details of the architectures are described in the paper. This test problem is instantiated by the test problems svhn_wrn164, et cetera.
TensorFlow code is adapted from `here`_.
Args:
batch_size (int): Batch size of the data points. No default value specified.
num_residual_units (int): Number of residual units in the network. No default value specified.
k (int): Network width. No default value specified.
weight_decay (float): Weight decay factor. In this model weight decay is applied to the weights, but not the biases. No default value specified.
bn_decay (float): Decay factor for the moving average in the batch norm layer. No default value specified.
Attributes:
data_loading (deepobs.data_loading): Data loading class for `SVHN`, :class:`.svhn_input.data_loading`.
losses (tf.Tensor): Tensor of size ``batch_size`` containing the individual losses per data point.
accuracy (tf.Tensor): Tensor containing the accuracy of the model.
train_init_op (tf.Operation): A TensorFlow operation to be performed before starting every training epoch.
train_eval_init_op (tf.Operation): A TensorFlow operation to be performed before starting every training eval epoch.
test_init_op (tf.Operation): A TensorFlow operation to be performed before starting every test evaluation phase.
.. _Wide Residual Networks: https://arxiv.org/abs/1605.07146
.. _here: https://github.com/dalgu90/wrn-tensorflow
"""
def __init__(self, batch_size, num_residual_units, k, weight_decay, bn_decay):
"""Initializes the problem set_up class.
Args:
batch_size (int): Batch size of the data points. No default value specified.
num_residual_units (int): Number of residual units in the network. No default value specified.
k (int): Network width. No default value specified.
weight_decay (float): Weight decay factor. In this model weight decay is applied to the weights, but not the biases. No default value specified.
bn_decay (float): Decay factor for the moving average in the batch norm layer. No default value specified.
"""
self.data_loading = svhn_input.data_loading(batch_size=batch_size)
self.losses, self.accuracy = self.set_up(
num_residual_units, k, weight_decay, bn_decay)
# Operations to do when switching the phase (the one defined in data_loading initializes the iterator and assigns the phase variable, here you can add more operations)
self.train_init_op = tf.group([self.data_loading.train_init_op])
self.train_eval_init_op = tf.group(
[self.data_loading.train_eval_init_op])
self.test_init_op = tf.group([self.data_loading.test_init_op])
[docs] def get(self):
"""Returns the losses and the accuray of the model.
Returns:
tupel: Tupel consisting of the losses and the accuracy.
"""
return self.losses, self.accuracy
[docs] def set_up(self, num_residual_units, k, weight_decay, bn_decay):
"""Sets up the test problem.
Args:
num_residual_units (int): Number of residual units in the network. No default value specified.
k (int): Network width. No default value specified.
weight_decay (float): Weight decay factor. In this model weight decay is applied to the weights, but not the biases. No default value specified.
bn_decay (float): Decay factor for the moving average in the batch norm layer. No default value specified.
Returns:
tupel: Tupel consisting of the losses and the accuracy.
"""
# Number of filter channels and stride for the blocks
filters = [16, 16 * k, 32 * k, 64 * k]
strides = [1, 2, 2]
X, y, phase = self.data_loading.load()
# Initial convolution layer
x = self.conv(X, filter_size=3, out_channels=16,
stride=1, name='conv_0')
# Loop over three residual blocks
for i in xrange(1, 4, 1):
# First residual unit
with tf.variable_scope('unit_%d_0' % i):
x = self.batch_norm(
x, phase=phase, decay=bn_decay, name="bn_1")
x = tf.nn.relu(x, name='relu_1')
# Shortcut
if filters[i - 1] == filters[i]:
if strides[i - 1] == 1:
shortcut = tf.identity(x)
else:
shortcut = tf.nn.max_pool(x, [1, strides[i - 1], strides[i - 1], 1],
[1, strides[i - 1], strides[i - 1], 1], 'VALID')
else:
shortcut = self.conv(x, filter_size=1, out_channels=filters[i],
stride=strides[i - 1], name='shortcut')
# Residual
x = self.conv(x, filter_size=3, out_channels=filters[i], stride=strides[i - 1],
name='conv_1')
x = self.batch_norm(
x, phase=phase, decay=bn_decay, name="bn_2")
x = tf.nn.relu(x, name='relu_2')
x = self.conv(x, filter_size=3,
out_channels=filters[i], stride=1, name='conv_2')
# Merge
x = x + shortcut
# further residual units
for j in xrange(1, num_residual_units, 1):
with tf.variable_scope('unit_%d_%d' % (i, j)):
# Shortcut
shortcut = x
# Residual
x = self.batch_norm(
x, phase=phase, decay=bn_decay, name="bn_1")
x = tf.nn.relu(x, name='relu_1')
x = self.conv(x, filter_size=3,
out_channels=filters[i], stride=1, name='conv_1')
x = self.batch_norm(
x, phase=phase, decay=bn_decay, name="bn_2")
x = tf.nn.relu(x, name='relu_2')
x = self.conv(x, filter_size=3,
out_channels=filters[i], stride=1, name='conv_2')
# Merge
x = x + shortcut
# Last unit
with tf.variable_scope('unit_last'):
x = self.batch_norm(x, phase=phase, decay=bn_decay)
x = tf.nn.relu(x, name="relu")
x = tf.reduce_mean(x, [1, 2])
# Reshaping and final fully-connected layer
with tf.variable_scope('fully-connected'):
x_shape = x.get_shape().as_list()
x = tf.reshape(x, [-1, x_shape[1]])
linear_outputs = self.fc(x, 10)
# Softmax and loss
losses = tf.nn.softmax_cross_entropy_with_logits_v2(
labels=y, logits=linear_outputs)
# Add weight decay to the weight variables, but not to the biases
for W in tf.get_collection("regularizable_variables"):
tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES,
weight_decay * tf.nn.l2_loss(W))
# Compute mean accuracy
y_pred = tf.argmax(linear_outputs, 1)
y_correct = tf.argmax(y, 1)
correct_prediction = tf.equal(y_pred, y_correct)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
return losses, accuracy
[docs] def batch_norm(self, x, phase, decay=0.9, name="batch_norm"):
"""Apply batch normalization to tensor x.
Args:
x (tf.Tensor): Input tensor to the batch norm layer.
phase (tf.Variable): Phase variable switching between train and evaluation mode of the batch norm layer depending on its value ("train", "train_eval", "test").
decay (float): Decay factor for the moving average in the batch norm layer. Defaults to ``0.9``.
name (str): Name for the layer. Defaults to ``batch_norm``.
Returns:
tf.Variable: Output after the batch norm layer.
"""
with tf.variable_scope(name):
# Compute the mean and variance of x across the axes 0, 1 and 2
# TODO: with this axis reduction, this is GLOBAL normalization, is this what we want?
mean_batch, variance_batch = tf.nn.moments(x, [0, 1, 2])
# Allocate variables to maintain a moving average of the batch mean/variance
mean_avg = tf.get_variable('mean_avg', mean_batch.get_shape(), tf.float32,
initializer=tf.zeros_initializer, trainable=False)
variance_avg = tf.get_variable('std_avg', variance_batch.get_shape(), tf.float32,
initializer=tf.ones_initializer, trainable=False)
# Allocate variables for the beta and gamma in batch norm
# TODO: Do we want those to be trainable?
beta = tf.get_variable('beta', mean_batch.get_shape(), tf.float32,
initializer=tf.zeros_initializer, trainable=True)
gamma = tf.get_variable('gamma', variance_batch.get_shape(), tf.float32,
initializer=tf.ones_initializer, trainable=True)
# Add operations updating the moving averages of mean and variance
# These ops are added to the UPDATE_OPS graph collection and must be added
# as a dependency for the train step in order to be executed
update_mean = mean_avg.assign(
decay * mean_avg + (1.0 - decay) * mean_batch)
update_variance = variance_avg.assign(
decay * variance_avg + (1.0 - decay) * variance_batch)
tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, update_mean)
tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, update_variance)
# Select batch mean/variance when phase=="train", otherwise select the
# moving averages
mean, variance = tf.cond(tf.equal(phase, "train"),
lambda: (mean_batch, variance_batch),
lambda: (mean_avg, variance_avg))
# Return batch-normalized tensor
return tf.nn.batch_normalization(x, mean, variance, beta, gamma, 1e-5)
[docs] def conv(self, x, filter_size, out_channels, stride, padding="SAME", name="conv"):
"""Apply a convolution to tensor ``x`` with a convolution kernel of shape ``filter_size * filter_size * out_channels``, as well as stride and padding as specified. The kernel is created/retrieved via tf.get_variable. No bias is added and no non-linearity is applied.
Args:
x (tf.Tensor): Input tensor to the convolutional layer.
filter_size (int): Size of the convolution. No default value specified.
out_channels (int): Number of output channels after the conv layer.
stride (int): Stride of the convolution. No default value specified.
padding (int): Padding of the convolution. Can be ``SAME`` or ``VALID``. Defaults to ``SAME``.
name (str): Name of the layer. Defaults to ``conv``.
Returns:
tf.Variable: Output after the convolutional layer.
"""
in_shape = x.get_shape()
with tf.variable_scope(name):
init = tf.random_normal_initializer(
stddev=np.sqrt(1.0 / filter_size / filter_size / out_channels))
W = tf.get_variable("W",
[filter_size, filter_size,
in_shape[3], out_channels],
tf.float32,
initializer=init)
if W not in tf.get_collection("regularizable_variables"):
tf.add_to_collection("regularizable_variables", W)
return tf.nn.conv2d(x, W, strides=[1, stride, stride, 1], padding=padding, name="output")
[docs] def fc(self, x, out_dim, name='fc'):
"""Apply a affine transformation (fully-connected layer) to tensor ``x`` with output dimension ``out_dim``. Weight matrix and bias vector are created/retrieved via tf.get_variable. No non-linearity is applied.
Args:
x (tf.Tensor): Input tensor to the convolutional layer.
out_dim (int): Number of output dimensions after the fully-connected layer.
name (str): Name of the layer. Defaults to ``fc``.
Returns:
tf.Variable: Output after the fully-connected layer.
"""
with tf.variable_scope(name):
initializer = tf.random_normal_initializer(
stddev=np.sqrt(1.0 / out_dim))
W = tf.get_variable("W",
[x.get_shape()[1], out_dim],
tf.float32,
initializer=initializer)
if W not in tf.get_collection("regularizable_variables"):
tf.add_to_collection("regularizable_variables", W)
b = tf.get_variable("b", [out_dim], tf.float32,
initializer=tf.constant_initializer(0.0))
return tf.matmul(x, W) + b