Source code for deepobs.two_d.two_d_input

# -*- coding: utf-8 -*-
"""
This module contains TensorFlow data loading functionality for a Simple 2D  Loss Function.
"""

import tensorflow as tf
import numpy as np


[docs]class data_loading: """Class providing the data loading functionality for simple 2D stochastic loss functions. Args: batch_size (int): Batch size. No default value is given. train_size (int): Size of the training set. Defaults to ``1000``. noise_level (float): Noise level of the training set. All training points are sampled from a gaussian distribution with the noise level as the standard deviation. Defaults to ``6``. Attributes: batch_size (int): Batch size. train_size (int): Size of the training set. Defaults to ``1000``. noise_level (float): Noise level of the training set. All training points are sampled from a gaussian distribution with the noise level as the standard deviation. Defaults to ``6``. D_train (tf.data.Dataset): The training data set. D_train_eval (tf.data.Dataset): The training evaluation data set. It is the same data as `D_train` but we go through it separately. D_test (tf.data.Dataset): The test data set. We use the mean of the data points. Thus, the test data set has just a single data point. phase (tf.Variable): Variable to describe which phase we are currently in. Can be "train", "train_eval" or "test". The phase variable can determine the behaviour of the network, for example deactivate dropout during evaluation. iterator (tf.data.Iterator): A single iterator for all three data sets. We us the initialization operators (see below) to switch this iterator to the data sets. X (tf.Tensor): Tensor holding data points. It has dimension `batch_size`. y (tf.Tensor): Tensor holding the labels of the data points. It has dimension `batch_size`. train_init_op (tf.Operation): A TensorFlow operation to be performed before starting every training epoch. It sets the `phase` variable to "train" and initializes the iterator to the training data set. train_eval_init_op (tf.Operation): A TensorFlow operation to be performed before starting every training eval phase. It sets the `phase` variable to "train_eval" and initializes the iterator to the training eval data set. test_init_op (tf.Operation): A TensorFlow operation to be performed before starting every test evaluation phase. It sets the `phase` variable to "test" and initializes the iterator to the test data set. """ def __init__(self, batch_size, train_size=1000, noise_level=6): """Initializes the data loading class. Args: batch_size (int): Batch size. No default value is given. train_size (int): Size of the training set. Defaults to ``1000``. noise_level (float): Noise level of the training set. All training points are sampled from a gaussian distribution with the noise level as the standard deviation. Defaults to ``6``. """ self.batch_size = batch_size self.train_size = train_size # The size of the test set self.noise_level = noise_level self.D_train = self.train_dataset(batch_size, size=train_size, noise_level=noise_level) self.D_train_eval = self.train_eval_dataset(batch_size, size=train_size, noise_level=noise_level) self.D_test = self.test_dataset(batch_size) self.phase = tf.Variable("train", name="phase", trainable=False) # Reinitializable iterator given types and shapes of the outputs (needs to be the same for train and test of course) self.iterator = tf.data.Iterator.from_structure( self.D_train.output_types, self.D_train.output_shapes) self.X, self.y = self.iterator.get_next() # Operations to do when switching the phase (initialize iterator and assign phase to phase variable) self.train_init_op = tf.group([self.iterator.make_initializer( self.D_train), tf.assign(self.phase, "train")], name="train_init_op") self.train_eval_init_op = tf.group([self.iterator.make_initializer( self.D_train_eval), tf.assign(self.phase, "train_eval")], name="train_eval_init_op") self.test_init_op = tf.group([self.iterator.make_initializer( self.D_test), tf.assign(self.phase, "test")], name="test_init_op")
[docs] def load(self): """Returns the data (`X` and `y` ) and the phase variable. Returns: tupel: Tupel consisting of the data points (`X`), (`y`) and the phase variable (`phase`). """ return self.X, self.y, self.phase
[docs] def train_dataset(self, batch_size, size, noise_level): """Creates the training data set. Args: batch_size (int): Batch size of the data points. size (int): Size of the training data set, i.e. the number of data points in the train set. noise_level (float): Standard deviation of the data points around the mean. The data points are drawn from a Gaussian distribution. Returns: tf.data.Dataset: The training data set. """ np.random.seed(42) data_x, data_y = np.random.normal(1.0, noise_level, size), np.random.normal(1.0, noise_level, size) data_x = data_x.flatten() data_x = np.float32(data_x) data_y = np.float32(data_y) return self.make_dataset(data_x, data_y, batch_size, one_hot=True, shuffle=True, shuffle_buffer_size=self.train_size, num_prefetched_batches=10)
[docs] def train_eval_dataset(self, batch_size, size, noise_level): """Creates the train eval data set. Args: batch_size (int): Batch size of the data points. size (int): Size of the train eval data set, i.e. the number of data points in the train eval set. noise_level (float): Standard deviation of the data points around the mean. The data points are drawn from a Gaussian distribution. Returns: tf.data.Dataset: The train eval data set. """ np.random.seed(42) data_x, data_y = np.random.normal(1.0, noise_level, size), np.random.normal(1.0, noise_level, size) data_x = data_x.flatten() data_x = np.float32(data_x) data_y = np.float32(data_y) return self.make_dataset(data_x, data_y, batch_size, one_hot=True, shuffle=True, shuffle_buffer_size=self.train_size, num_prefetched_batches=10)
[docs] def test_dataset(self, batch_size): """Creates the test data set. Args: batch_size (int): Batch size. Just a single data point is created, with the mean value of the Gaussian distributions of the training data set. Returns: tf.data.Dataset: The test data set. """ # recovers the deterministic function data_x, data_y = [1.0], [1.0] data_x = np.float32(data_x) data_y = np.float32(data_y) return self.make_dataset(data_x, data_y, batch_size, one_hot=True, shuffle=False, num_prefetched_batches=1)
[docs] def make_dataset(self, data_x, data_y, batch_size, one_hot=True, shuffle=True, shuffle_buffer_size=10000, num_prefetched_batches=10): """Creates a data set from given data points. Args: data_x (np.array): Numpy array containing the ``X`` values of the data points. data_y (np.array): Numpy array containing the ``y`` values of the data points. batch_size (int): Batch size of the input-output pairs. one_hot (bool): Switch to turn on or off one-hot encoding of the labels. Defaults to ``True``. shuffle (bool): Switch to turn on or off shuffling of the data set. Defaults to ``True``. shuffle_buffer_size (int): Size of the shuffle buffer. Defaults to ``10000`` the size of the `test` and `train eval` data set, meaning that they will be completely shuffled. num_prefetched_batches (int): Number of prefeteched batches, defaults to ``10``. Returns: tf.data.Dataset: Data set object created from the images and label files. """ with tf.name_scope("two_d"): with tf.device('/cpu:0'): D = tf.data.Dataset.from_tensor_slices((data_x, data_y)) if shuffle: D = D.shuffle(buffer_size=shuffle_buffer_size) D = D.batch(batch_size) D = D.prefetch(buffer_size=num_prefetched_batches) return D