"""
This module contains TensorFlow data loading functionality for ImageNet.
"""
import os
import tensorflow as tf
from .. import dataset_utils
[docs]class data_loading:
"""Class providing the data loading functionality for the ImageNet data set.
Args:
batch_size (int): Batch size of the input-output pairs. No default value is given.
Attributes:
batch_size (int): Batch size of the input-output pairs.
train_eval_size (int): Number of data points to evaluate during the `train eval` phase. Currently set to ``50000`` the size of the test set.
D_train (tf.data.Dataset): The training data set.
D_train_eval (tf.data.Dataset): The training evaluation data set. It is the same data as `D_train` but we go through it separately.
D_test (tf.data.Dataset): The test data set.
phase (tf.Variable): Variable to describe which phase we are currently in. Can be "train", "train_eval" or "test". The phase variable can determine the behaviour of the network, for example deactivate dropout during evaluation.
iterator (tf.data.Iterator): A single iterator for all three data sets. We us the initialization operators (see below) to switch this iterator to the data sets.
X (tf.Tensor): Tensor holding the ImageNet images. It has dimension `batch_size` x ``224`` (image size) x ``224`` (image size) x ``3`` (rgb).
y (tf.Tensor): Label of the ImageNet images. It has dimension `batch_size` x ``10`` (number of classes).
train_init_op (tf.Operation): A TensorFlow operation to be performed before starting every training epoch. It sets the `phase` variable to "train" and initializes the iterator to the training data set.
train_eval_init_op (tf.Operation): A TensorFlow operation to be performed before starting every training eval phase. It sets the `phase` variable to "train_eval" and initializes the iterator to the training eval data set.
test_init_op (tf.Operation): A TensorFlow operation to be performed before starting every test evaluation phase. It sets the `phase` variable to "test" and initializes the iterator to the test data set.
"""
def __init__(self, batch_size):
"""Initializes the data loading class.
Args:
batch_size (int): Batch size of the input-output pairs. No default value is given.
"""
self.train_eval_size = 50000 # The size of the test set
self.batch_size = batch_size
self.D_train = self.train_dataset(batch_size)
self.D_train_eval = self.train_eval_dataset(batch_size)
self.D_test = self.test_dataset(batch_size)
self.phase = tf.Variable("train", name="phase", trainable=False)
# Reinitializable iterator given types and shapes of the outputs (needs to be the same for train and test of course)
self.iterator = tf.data.Iterator.from_structure(
self.D_train.output_types, self.D_train.output_shapes)
self.X, self.y = self.iterator.get_next()
# Operations to do when switching the phase (initialize iterator and assign phase to phase variable)
self.train_init_op = tf.group([self.iterator.make_initializer(
self.D_train), tf.assign(self.phase, "train")], name="train_init_op")
self.train_eval_init_op = tf.group([self.iterator.make_initializer(
self.D_train_eval), tf.assign(self.phase, "train_eval")], name="train_eval_init_op")
self.test_init_op = tf.group([self.iterator.make_initializer(
self.D_test), tf.assign(self.phase, "test")], name="test_init_op")
[docs] def load(self):
"""Returns the data (`X` (images) and `y` (labels)) and the phase variable.
Returns:
tupel: Tupel consisting of the images (`X`), the label (`y`) and the phase variable (`phase`).
"""
return self.X, self.y, self.phase
[docs] def train_dataset(self, batch_size, data_augmentation=True):
"""Creates the training data set.
Args:
batch_size (int): Batch size of the input-output pairs.
data_augmentation (bool): Switch to turn basic data augmentation on or off while training. Defaults to ``true``.
Returns:
tf.data.Dataset: The training data set.
"""
filenames = [os.path.join(dataset_utils.get_data_dir(),
"imagenet", "train-" + str(i).zfill(5) + "-of-01024")
for i in range(1024)]
if data_augmentation:
D = self.make_dataset(
filenames,
batch_size,
per_image_standardization=True,
crop_size=224,
random_crop=True,
random_flip_left_right=True,
distort_color=False,
shuffle=True,
shuffle_buffer_size=15000,
num_prefetched_batches=8,
num_preprocessing_threads=16)
else:
D = self.make_dataset(
filenames,
batch_size,
per_image_standardization=True,
crop_size=224,
random_crop=False,
random_flip_left_right=False,
distort_color=False,
shuffle=True,
shuffle_buffer_size=15000,
num_prefetched_batches=8,
num_preprocessing_threads=16)
return D
[docs] def train_eval_dataset(self, batch_size, data_augmentation=True):
"""Creates the train eval data set.
Args:
batch_size (int): Batch size of the input-output pairs.
data_augmentation (bool): Switch to turn basic data augmentation on or off while evaluating the training data set. Defaults to ``true``.
Returns:
tf.data.Dataset: The train eval data set.
"""
filenames = [os.path.join(dataset_utils.get_data_dir(),
"imagenet", "train-" + str(i).zfill(5) + "-of-01024")
for i in range(1024)]
if data_augmentation:
D = self.make_dataset(
filenames,
batch_size,
per_image_standardization=True,
crop_size=224,
random_crop=True,
random_flip_left_right=True,
distort_color=True,
shuffle=False,
shuffle_buffer_size=-1,
num_prefetched_batches=4,
num_preprocessing_threads=8, data_set_size=self.train_eval_size)
else:
D = self.make_dataset(
filenames,
batch_size,
per_image_standardization=True,
crop_size=224,
random_crop=False,
random_flip_left_right=False,
distort_color=False,
shuffle=False,
shuffle_buffer_size=-1,
num_prefetched_batches=4,
num_preprocessing_threads=8, data_set_size=self.train_eval_size)
return D
[docs] def test_dataset(self, batch_size):
"""Creates the test data set.
Args:
batch_size (int): Batch size of the input-output pairs.
Returns:
tf.data.Dataset: The test data set.
"""
filenames = [os.path.join(dataset_utils.get_data_dir(),
"imagenet", "validation-" + str(i).zfill(5) + "-of-00128")
for i in range(128)]
return self.make_dataset(
filenames,
batch_size,
per_image_standardization=True,
crop_size=224,
random_crop=False,
random_flip_left_right=False,
distort_color=False,
shuffle=False,
shuffle_buffer_size=-1,
num_prefetched_batches=4,
num_preprocessing_threads=8)
[docs] def make_dataset(self, filenames, batch_size, per_image_standardization=True, crop_size=224, random_crop=False, random_flip_left_right=False, distort_color=False, shuffle=True, shuffle_buffer_size=15000, one_hot=True, num_prefetched_batches=8, num_preprocessing_threads=16, data_set_size=-1):
"""Creates a data set from filenames of the images and label files.
Args:
filenames (str): (List of) paths to the ``.bin`` files containing the images and labels.
batch_size (int): Batch size of the input-output pairs.
crop_size (int): Crop size of each image. Defaults to ``224``.
per_image_standardization (bool): Switch to standardize each image to have zero mean and unit norm. Defaults to ``True``.
random_crop (bool): Switch if random crops should be used. Defaults to ``False``.
random_flip_left_right (bool): Switch to randomly flip the images horizontally. Defaults to ``False``.
distort_color (bool): Switch to use random brightness, saturation, hue and contrast on each image. Defaults to ``False``.
shuffle (bool): Switch to turn on or off shuffling of the data set. Defaults to ``True``.
shuffle_buffer_size (int): Size of the shuffle buffer. Defaults to ``15000``.
one_hot (bool): Switch to turn on or off one-hot encoding of the labels. Defaults to ``True``.
num_prefetched_batches (int): Number of prefeteched batches, defaults to ``8``.
num_preprocessing_threads (int): The number of elements to process in parallel while applying the image transformations. Defaults to ``16``.
data_set_size (int): Size of the data set to extract from the images and label files. Defaults to ``-1`` meaning that the full data set is used.
Returns:
tf.data.Dataset: Data set object created from the images and label files.
"""
num_classes = 1000
# Define parse function depending on the above arguments and map the dataset
# through it
def _parse_func(example_serialized):
# Parse example proto, decode image and resize while preserving aspect
image_buffer, label, _ = self.parse_example_proto(
example_serialized)
image = self.decode_jpeg(image_buffer)
image = self.aspect_preserving_resize(
image, target_smaller_side=256)
# Crop to 224x224, either randomly or centered according to arguments
if random_crop:
image = tf.image.resize_image_with_crop_or_pad(image, 256, 256)
image = tf.random_crop(image, [224, 224, 3])
else:
image = tf.image.resize_image_with_crop_or_pad(image, 224, 224)
# Optionally perform random flip
if random_flip_left_right:
image = tf.image.random_flip_left_right(image)
# Optionally distort color
if distort_color:
image = self.color_distortion(image)
# Normalize
if per_image_standardization:
image = tf.image.per_image_standardization(image)
# Convert label to shape [] (instead of) [1,] such that the label vector for
# a mini-batch will later be of shape [batch_size,]
label = tf.reshape(label, [])
# Label to one-hot vector
if one_hot:
label = tf.squeeze(tf.one_hot(label, depth=num_classes))
return image, label
with tf.name_scope("imagenet_input"):
with tf.device('/cpu:0'):
# TODO: buffer_size, compression_type?
filenames = tf.random_shuffle(filenames)
D = tf.data.TFRecordDataset(filenames)
D = D.map(_parse_func, num_preprocessing_threads)
if shuffle:
D = D.shuffle(buffer_size=shuffle_buffer_size)
D = D.take(data_set_size)
D = D.batch(batch_size)
D = D.prefetch(buffer_size=num_prefetched_batches)
return D
[docs] def parse_example_proto(self, example_serialized):
"""Parses an Example proto containing a training example of an image. The output of the build_image_data.py image preprocessing script is a dataset containing serialized Example protocol buffers. Each Example proto contains the following fields:
image/height: 462
image/width: 581
image/colorspace: 'RGB'
image/channels: 3
image/class/label: 615
image/class/synset: 'n03623198'
image/class/text: 'knee pad'
image/format: 'JPEG'
image/filename: 'ILSVRC2012_val_00041207.JPEG'
image/encoded: <JPEG encoded string>
Args:
example_serialized (tf.string): Scalar Tensor tf.string containing a serialized Example protocol buffer.
Returns:
tupel: Tupel of image_buffer (tf.string) containing the contents of a JPEG file, the label (tf.int32) containing the label and text (tf.string) containing the human-readable label.
"""
# Dense features in Example proto.
feature_map = {
'image/encoded': tf.FixedLenFeature([], dtype=tf.string,
default_value=''),
'image/class/label': tf.FixedLenFeature([1], dtype=tf.int64,
default_value=-1),
'image/class/text': tf.FixedLenFeature([], dtype=tf.string,
default_value=''),
}
features = tf.parse_single_example(example_serialized, feature_map)
label = tf.cast(features['image/class/label'], dtype=tf.int32)
return features['image/encoded'], label, features['image/class/text']
[docs] def decode_jpeg(self, image_buffer, scope=None):
"""Decode a JPEG string into one 3-D float image Tensor.
Args:
image_buffer (tf.string): scalar string Tensor.
scope (str): Optional scope for name_scope.
Returns:
tf.Tensor: 3-D float Tensor with values ranging from [0, 1).
"""
with tf.name_scope(values=[image_buffer], name=scope,
default_name='decode_jpeg'):
# Decode the string as an RGB JPEG.
# Note that the resulting image contains an unknown height and width
# that is set dynamically by decode_jpeg. In other words, the height
# and width of image is unknown at compile-time.
image = tf.image.decode_jpeg(image_buffer, channels=3)
# After this point, all image pixels reside in [0,1)
# until the very end, when they're rescaled to (-1, 1). The various
# adjust_* ops all require this range for dtype float.
image = tf.image.convert_image_dtype(image, dtype=tf.float32)
return image
[docs] def aspect_preserving_resize(self, image, target_smaller_side):
""""Resize image such that the smaller size has size ``target_smaller_sider`` while preserving the aspect ratio.
Args:
image (tf.Tensor): Tensor containing the image to resize.
target_smaller_side (int): Target size for the smaller side in pixel.
Returns:
tf.Tensor: The resized image, with the same aspect ratio as the input.
"""
shape = tf.shape(image)
height = tf.to_float(shape[0])
width = tf.to_float(shape[1])
smaller_side = tf.reduce_min(shape[0:2])
scale = tf.divide(target_smaller_side, tf.to_float(smaller_side))
new_height = tf.to_int32(tf.round(scale * height))
new_width = tf.to_int32(tf.round(scale * width))
# TODO: resize method?
resized_image = tf.image.resize_images(image, [new_height, new_width])
return resized_image
[docs] def color_distortion(self, image, scope=None):
"""Distort the color of the image.
Args:
image (tf.Tensor): Tensor containing single image.
scope (str): Optional scope for name_scope.
Returns:
tf.Tensor: The color-distorted image.
"""
with tf.name_scope(values=[image], name=scope, default_name='distort_color'):
image = tf.image.random_brightness(image, max_delta=32. / 255.)
image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
image = tf.image.random_hue(image, max_delta=0.2)
image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
# The random_* ops do not necessarily clamp.
image = tf.clip_by_value(image, 0.0, 1.0)
return image