Calculate gradient of validation error w.r.t inputs using Keras/Tensorflow or autograd - python-3.x

I need to calculate the gradient of the validation error w.r.t inputs x. I'm trying to see how much the validation error changes when I perturb one of the training samples.
The validation error (E) explicitly depends on the model weights (W).
The model weights explicitly depend on the inputs (x and y).
Therefore, the validation error implicitly depends on the inputs.
I'm trying to calculate the gradient of E w.r.t x directly.
An alternative approach would be to calculate the gradient of E w.r.t W (can easily be calculated) and the gradient of W w.r.t x (cannot do at the moment), which would allow the gradient of E w.r.t x to be calculated.
I have attached a toy example. Thanks in advance!
import numpy as np
import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical
import tensorflow as tf
from autograd import grad
train_images = mnist.train_images()
train_labels = mnist.train_labels()
test_images = mnist.test_images()
test_labels = mnist.test_labels()
# Normalize the images.
train_images = (train_images / 255) - 0.5
test_images = (test_images / 255) - 0.5
# Flatten the images.
train_images = train_images.reshape((-1, 784))
test_images = test_images.reshape((-1, 784))
# Build the model.
model = Sequential([
Dense(64, activation='relu', input_shape=(784,)),
Dense(64, activation='relu'),
Dense(10, activation='softmax'),
])
# Compile the model.
model.compile(
optimizer='adam',
loss='categorical_crossentropy',
metrics=['accuracy'],
)
# Train the model.
model.fit(
train_images,
to_categorical(train_labels),
epochs=5,
batch_size=32,
)
model.save_weights('model.h5')
# Load the model's saved weights.
# model.load_weights('model.h5')
calculate_mse = tf.keras.losses.MeanSquaredError()
test_x = test_images[:5]
test_y = to_categorical(test_labels)[:5]
train_x = train_images[:1]
train_y = to_categorical(train_labels)[:1]
train_y = tf.convert_to_tensor(train_y, np.float32)
train_x = tf.convert_to_tensor(train_x, np.float64)
with tf.GradientTape() as tape:
tape.watch(train_x)
model.fit(train_x, train_y, epochs=1, verbose=0)
valid_y_hat = model(test_x, training=False)
mse = calculate_mse(test_y, valid_y_hat)
de_dx = tape.gradient(mse, train_x)
print(de_dx)
# approach 2 - does not run
def calculate_validation_mse(x):
model.fit(x, train_y, epochs=1, verbose=0)
valid_y_hat = model(test_x, training=False)
mse = calculate_mse(test_y, valid_y_hat)
return mse
train_x = train_images[:1]
train_y = to_categorical(train_labels)[:1]
validation_gradient = grad(calculate_validation_mse)
de_dx = validation_gradient(train_x)
print(de_dx)

Here's how you can do this. Derivation is as below.
Few things to note,
I have reduced the feature size from 784 to 256 as I was running out of memory in colab (line marked in the code) . Might have to do some mem profiling to find out why
Only computed grads for the first layer. Easily extendable to other layers
Disclaimer: this derivation is correct to best of my knowledge. Please do some research and verify that it is the case. You will run into memory issues for larger inputs and layer sizes.
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical
import tensorflow as tf
f = 256
model = Sequential([
Dense(64, activation='relu', input_shape=(f,)),
Dense(64, activation='relu'),
Dense(10, activation='softmax'),
])
# Compile the model.
model.compile(
optimizer='adam',
loss='categorical_crossentropy',
metrics=['accuracy'],
)
w = model.weights[0]
# Inputs and labels
x_tr = tf.Variable(np.random.normal(size=(1,f)), shape=(1, f), dtype='float32')
y_tr = np.random.choice([0,1,2,3,4,5,6,7,8,9], size=(1,1))
y_tr_onehot = tf.keras.utils.to_categorical(y_tr, num_classes=10).astype('float32')
x_v = tf.Variable(np.random.normal(size=(1,f)), shape=(1, f), dtype='float32')
y_v = np.random.choice([0,1,2,3,4,5,6,7,8,9], size=(1,1))
y_v_onehot = tf.keras.utils.to_categorical(y_v, num_classes=10).astype('float32')
# In the context of GradientTape
with tf.GradientTape() as tape1:
with tf.GradientTape() as tape2:
y_tr_pred = model(x_tr)
tr_loss = tf.keras.losses.MeanSquaredError()(y_tr_onehot, y_tr_pred)
tmp_g = tape2.gradient(tr_loss, w)
print(tmp_g.shape)
# d(dE_tr/d(theta))/dx
# Warning this step consumes lot of memory for large layers
lr = 0.001
grads_1 = -lr * tape1.jacobian(tmp_g, x_tr)
with tf.GradientTape() as tape3:
y_v_pred = model(x_v)
v_loss = tf.keras.losses.MeanSquaredError()(y_v_onehot, y_v_pred)
# dE_val/d(theta)
grads_2 = tape3.gradient(v_loss, w)[tf.newaxis, :]
# Just crunching the dimension to get the final desired shape of (1,256)
grad = tf.matmul(tf.reshape(grads_2,[1, -1]), tf.reshape(tf.transpose(grads_1,[2,1,0,3]),[1, -1, 256]))

Related

Load model from Keras and investigate the details

I'm using Keras to fit a function, and I'm new to Keras.
With a very simple network, the Keras can fit my function very well, I just want to know what the function is and try to understand why it works very well. But the "predict" function hide the details.
Here is the code I create the network:
import numpy as np
import tensorflow as tf
from tensorflow import keras
LABEL_COLUMN = "shat"
BATCH_SIZE = 16
EPOCHS = 20
trainfilePath = "F:\\PyworkingFolder\\WWSHat\\_Data\\alpha0train.csv"
testfilePath = "F:\\PyworkingFolder\\WWSHat\\_Data\\alpha0test.csv"
with open(trainfilePath, encoding='utf-8') as txtContent:
trainArray = np.loadtxt(txtContent, delimiter=",")
with open(testfilePath, encoding='utf-8') as txtContent:
testArray = np.loadtxt(txtContent, delimiter=",")
trainSample = trainArray[:, 0:14]
trainLable = trainArray[:, 14]
testSample = testArray[:, 0:14]
testLable = testArray[:, 14]
model = keras.Sequential([
keras.layers.Dense(14, activation='relu', input_shape=[14]),
keras.layers.Dense(15, activation='relu'),
keras.layers.Dense(1)
])
optimizer = tf.keras.optimizers.RMSprop(0.001)
# optimizer = keras.optimizers.Adadelta(lr=1.0, rho=0.95, epsilon=None, decay=0.0)
model.compile(loss='mse',
optimizer=optimizer,
metrics=['mae', 'mse'])
model.summary()
history = model.fit(trainSample, trainLable, epochs=EPOCHS, batch_size=BATCH_SIZE)
model.evaluate(testSample, testLable, verbose=1)
model.save("F:\\PyworkingFolder\\WWSHat\\_Data\\alpha0.h5")
What I understand is:
the layers are weight matrices and basis matrices, it works as
out=max(0, weight * input + basis)
After some search, I find I can read the .h5 file using
import h5py
import numpy as np
FILENAME = "F:\\PyworkingFolder\\WWSHat\\_Data\\alpha0.h5"
with h5py.File(FILENAME, 'r') as f:
dense_1 = f['/model_weights/dense_1/dense_1']
dense_1_bias = dense_1['bias:0'][:]
dense_1_kernel = dense_1['kernel:0'][:]
dense_2 = f['/model_weights/dense_2/dense_2']
dense_2_bias = dense_2['bias:0'][:]
dense_2_kernel = dense_2['kernel:0'][:]
# print("Weight matrix 1:\n")
# print(dense_1_kernel)
# print("Basis matrix 1:\n")
# print(dense_1_bias)
# print("Weight matrix 2:\n")
# print(dense_2_kernel)
# print("Basis matrix 2:\n")
# print(dense_2_bias)
def layer_output(v, kernel, bias):
return np.dot(v, kernel) + bias
reluFunction = np.vectorize(lambda x: x if x >= 0.0 else 0.0)
testV = np.array([[-0.004090321213057993,
0.009615388501909157,
-0.24223693596921558,
0.015504079563927319,
-0.02659541428995062,
0.018512968977547152,
0.00836788544720289,
-0.10874776132746002,
-0.045863474556415526,
-0.010195799916571194,
0.09474219315939948,
0.03606698737846194,
-0.004560110004741025,
0.028042417959738858]])
output_1 = layer_output(testV, dense_1_kernel, dense_1_bias)
output_2 = reluFunction(output_1)
output_3 = layer_output(output_2, dense_2_kernel, dense_2_bias)
output_4 = reluFunction(output_3)
however, the result of output_4 is very different from what I get using
loaded_model = keras.models.load_model("F:\\PyworkingFolder\\WWSHat\\_Data\\alpha0.h5")
predicted = loaded_model(testV)
The "predicted" is very close to the ground truth while "output_4" is far away from the ground truth.
I get stuck here and don't know why and failed to find information about how to extract the function I want from the Keras model, I need your help!
Thanks!
model = keras.Sequential([
keras.layers.Dense(14, activation='relu', input_shape=[14]),
keras.layers.Dense(15, activation='relu'),
keras.layers.Dense(1)
])
In your model, there are 3 layers, the last dense layer has weight and biases too, you didn't consider them in your calculation.

why is different between model.evaluate() and computed loss by myself based on model.predict()?

I am running neural network by keras. There is my code:
import numpy as np
from keras import Model
from keras.models import Sequential
from keras.layers import Dense
from keras import backend as K
def mean_squared_error(y_true, y_pred):
return K.mean(K.square(y_pred - y_true),axis=-1)
np.random.seed(1)
Train_X = np.random.randint(low=0,high=100,size = (50,5))
Train_Y = np.matmul(Train_X,np.arange(10).reshape(5,2))+np.random.randint(low=0,high=10,size=(50,2))
Test_X = np.random.randint(low=0,high=100,size = (10,5))
Test_Y = np.matmul(Test_X,np.arange(10).reshape(5,2))+np.random.randint(low=0,high=10,size=(10,2))
model = Sequential()
model.add(Dense(4,activation = 'relu'))
model.add(Dense(2,activation='relu'))
model.add(Dense(2,activation='relu'))
model.add(Dense(2))
model.compile(loss=mean_squared_error, optimizer='adam', metrics=['mae'])
history = model.fit(Train_X, Train_Y, epochs=100, batch_size=5,validation_data = (Test_X, Test_Y))
loss1 = model.evaluate(Test_X,Test_Y)
loss2 = history.history['val_loss'][99]
y_pred = model.predict(Test_X)
y_true = Test_Y
loss3 = np.mean(np.square(y_pred-y_true))
I find that loss1 is the same as loss2 but is different with loss3. So i feel so confused. Could someone tell me why?
This is possibly due to different dtypes for Test_Y and y_pred. Keras tries to automatically take care of dtype mismatches for you, so it is possible that Test_Y is a float64 and y_pred is a float32. If that is indeed the case, try converting one of their dtypes for the loss3 calculation and see if the values match.
y_pred = model.predict(Test_X)
y_true = Test_Y.astype(np.float32)
loss3 = np.mean(np.square(y_pred-y_true))

Writing my own FGSM Adversarial attack always gives zero perturbation

I'm trying to write a simple FGSM attack on MNIST. I tried the foolbox library and it seems to work but the FGSM is very slow (perhaps because it searches for a minimum eps/perturbation which gives a different target label). I started writing my own and my code always gives me zero perturbation. I.e., if I plot the x_adversarial it is the same as x_input. I checked that the gradient computation leads all zero matrix. The computed loss function is small, but I imagine there is some gradient to this loss function. Can somebody please help me? I have been racking my head for a week now without any progress. Thanks again!
import tensorflow as tf
import numpy as np
import foolbox
import tensorflow.keras.backend as K
import matplotlib.pyplot as plt
# Importing the required Keras modules containing model and layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Dropout, Flatten, MaxPooling2D
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
num_digits_to_classify = 10
x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)
input_shape = (28, 28, 1)
# Making sure that the values are float so that we can get decimal points after division
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
# Normalizing the RGB codes by dividing it to the max RGB value.
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print('Number of images in x_train', x_train.shape[0])
print('Number of images in x_test', x_test.shape[0])
def create_model_deep():
model = Sequential()
model.add(Conv2D(32, kernel_size=(5,5), activation='relu', input_shape=input_shape))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.2))
model.add(Conv2D(64,kernel_size=(5,5),activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.2))
model.add(Flatten()) # Flattening the 2D arrays for fully connected layers
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(num_digits_to_classify,activation='softmax'))
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
return model
model = create_model_deep()
model.summary()
model.fit(x=x_train,y=y_train, epochs=10)
model.evaluate(x_test, y_test)
########################## foolbox FGSM attack ###############################################
from keras.backend import set_learning_phase
set_learning_phase(0)
from foolbox.criteria import Misclassification
fmodel = foolbox.models.TensorFlowModel.from_keras(model, bounds=(0,1))
attack = foolbox.attacks.FGSM(fmodel, criterion=Misclassification())
fgsm_error = 0.0
for i in range(x_test.shape[0]):
if i%1000 == 0:
print(i)
adversarial = attack(x_test[i],y_test[i])
if adversarial is not None:
adversarial = adversarial.reshape(1,28,28,1)
model_predictions = model.predict(adversarial)
label = np.argmax(model_predictions)
if label != y_test[i]:
fgsm_error = fgsm_error + 1.0
fgsm_error = fgsm_error/x_test.shape[0]
########################## My own FGSM attack ###############################################
sess = K.get_session()
eps = 0.3
x_adv = tf.placeholder(tf.float32,shape=(1,28,28,1),name="adv_example")
x_noise = tf.placeholder(tf.float32,shape=(1,28,28,1),name="adv_noise")
x_input = x_test[0].reshape(1,28,28,1)
y_input = y_test[0]
def loss_fn(y_true, y_pred):
return K.sparse_categorical_crossentropy(y_true, y_pred)
grad = K.gradients(loss_fn(y_input,model.output), model.input)
delta = K.sign(grad[0])
x_noise = x_noise + delta
x_adv = x_adv + eps*delta
x_adv = K.clip(x_adv,0.0,1.0)
x_adv, x_noise, grad = sess.run([x_adv, x_noise, grad], feed_dict={model.input:x_input, x_adv:x_input, x_noise:np.zeros_like(x_input)})
pred = model.predict(x_adv)
The following code seems to work now. Please look at the comment I made below.
sess = K.get_session()
eps = 0.3
i = 100
x_input = x_test[i].reshape(1,28,28,1)
y_input = y_test[i]
x_adv = x_input
# Added noise
x_noise = np.zeros_like(x_input)
def loss_fn(y_true, y_pred):
target = K.one_hot(y_true,10)
loss = K.categorical_crossentropy(target, y_pred)
return loss
#loss = K.print_tensor(loss,message='loss = ')
#return K.sparse_categorical_crossentropy(y_true, y_pred)
def loss_fn_sparse(y_true, y_pred):
loss = K.sparse_categorical_crossentropy(y_true, y_pred)
return loss
image = K.cast(x_input,dtype='float32')
y_pred = model(image)
loss = loss_fn_sparse(y_input, y_pred)
grad = K.gradients(loss, image)
delta = K.sign(grad[0])
x_noise = x_noise + delta
x_adv = x_adv + eps*delta
x_adv = K.clip(x_adv,0.0,1.0)
x_adv, x_noise = sess.run([x_adv, x_noise], feed_dict={model.input:x_input})
pred = model.predict(x_adv)

MNIST and transfer learning with VGG16 in Keras- low validation accuracy

I recently started taking advantage of Keras's flow_from_dataframe() feature for a project, and decided to test it with the MNIST dataset. I have a directory full of the MNIST samples in png format, and a dataframe with the absolute directory for each in one column and the label in the other.
I'm also using transfer learning, importing VGG16 as a base, and adding my own 512 node relu dense layer and 0.5 drop-out before a softmax layer of 10. (For digits 0-9). I'm using rmsprop (lr=1e-4) as the optimizer.
When I launch my environment, it calls the latest version of keras_preprocessing from Git, which has support for absolute directories and capitalized file extensions.
My problem is that I have a very high training accuracy, and a terribly low validation accuracy. By my final epoch (10), I had a training accuracy of 0.94 and a validation accuracy of 0.01.
I'm wondering if there's something fundamentally wrong with my script? With another dataset, I'm even getting NaNs for both my training and validation loss values after epoch 4. (I checked the relevant columns, there aren't any null values!)
Here's my code. I'd be deeply appreciative is someone could glance through it and see if anything jumped out at them.
import pandas as pd
import numpy as np
import keras
from keras_preprocessing.image import ImageDataGenerator
from keras import applications
from keras import optimizers
from keras.models import Model
from keras.layers import Dropout, Flatten, Dense, GlobalAveragePooling2D
from keras import backend as k
from keras.callbacks import ModelCheckpoint, CSVLogger
from keras.applications.vgg16 import VGG16, preprocess_input
# INITIALIZE MODEL
img_width, img_height = 32, 32
model = VGG16(weights = 'imagenet', include_top=False, input_shape = (img_width, img_height, 3))
# freeze all layers
for layer in model.layers:
layer.trainable = False
# Adding custom Layers
x = model.output
x = Flatten()(x)
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)
predictions = Dense(10, activation="softmax")(x)
# creating the final model
model_final = Model(input = model.input, output = predictions)
# compile the model
rms = optimizers.RMSprop(lr=1e-4)
#adadelta = optimizers.Adadelta(lr=0.001, rho=0.5, epsilon=None, decay=0.0)
model_final.compile(loss = "categorical_crossentropy", optimizer = rms, metrics=["accuracy"])
# LOAD AND DEFINE SOURCE DATA
train = pd.read_csv('MNIST_train.csv', index_col=0)
val = pd.read_csv('MNIST_test.csv', index_col=0)
nb_train_samples = 60000
nb_validation_samples = 10000
batch_size = 60
epochs = 10
# Initiate the train and test generators
train_datagen = ImageDataGenerator()
test_datagen = ImageDataGenerator()
train_generator = train_datagen.flow_from_dataframe(dataframe=train,
directory=None,
x_col='train_samples',
y_col='train_labels',
has_ext=True,
target_size = (img_height,
img_width),
batch_size = batch_size,
class_mode = 'categorical',
color_mode = 'rgb')
validation_generator = test_datagen.flow_from_dataframe(dataframe=val,
directory=None,
x_col='test_samples',
y_col='test_labels',
has_ext=True,
target_size = (img_height,
img_width),
batch_size = batch_size,
class_mode = 'categorical',
color_mode = 'rgb')
# GET CLASS INDICES
print('****************')
for cls, idx in train_generator.class_indices.items():
print('Class #{} = {}'.format(idx, cls))
print('****************')
# DEFINE CALLBACKS
path = './chk/epoch_{epoch:02d}-valLoss_{val_loss:.2f}-valAcc_{val_acc:.2f}.hdf5'
chk = ModelCheckpoint(path, monitor = 'val_acc', verbose = 1, save_best_only = True, mode = 'max')
logger = CSVLogger('./chk/training_log.csv', separator = ',', append=False)
nPlus = 1
samples_per_epoch = nb_train_samples * nPlus
# Train the model
model_final.fit_generator(train_generator,
steps_per_epoch = int(samples_per_epoch/batch_size),
epochs = epochs,
validation_data = validation_generator,
validation_steps = int(nb_validation_samples/batch_size),
callbacks = [chk, logger])
Have you tried explicitly defining the classes of the images? as such:
train_generator=image.ImageDataGenerator().flow_from_dataframe(classes=[0,1,2,3,4,5,6,7,8,9])
in both the train and validation generators.
I have found that sometimes the train and validation generators create different correspondence dictionaries.

Using tf.data.Dataset as training input to Keras model NOT working

I have a simple code, which DOES work, for training a Keras model in Tensorflow using numpy arrays as features and labels. If I then wrap these numpy arrays using tf.data.Dataset.from_tensor_slices in order to train the same Keras model using a tensorflow dataset, I get an error. I haven't been able to figure out why (it may be a tensorflow or keras bug, but I may also be missing something). I'm on python 3, tensorflow is 1.10.0, numpy is 1.14.5, no GPU involved.
OBS1: The possibility of using tf.data.Dataset as a Keras input is showed in https://www.tensorflow.org/guide/keras, under "Input tf.data datasets".
OBS2: In the code below, the code under "#Train with numpy arrays" is being executed, using numpy arrays. If this code is commented and the code under "#Train with tf.data datasets" is used instead, the error will be reproduced.
OBS3: In line 13, which is commented and starts with "###WORKAROUND 1###", if the comment is removed and the line is used for tf.data.Dataset inputs, the error changes, even though I can't completely understand why.
The complete code is:
import tensorflow as tf
import numpy as np
np.random.seed(1)
tf.set_random_seed(1)
print(tf.__version__)
print(np.__version__)
#Import mnist dataset as numpy arrays
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()#Import
x_train, x_test = x_train / 255.0, x_test / 255.0 #normalizing
###WORKAROUND 1###y_train, y_test = (y_train.astype(dtype='float32'), y_test.astype(dtype='float32'))
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1]*x_train.shape[2])) #reshaping 28 x 28 images to 1D vectors, similar to Flatten layer in Keras
batch_size = 32
#Create a tf.data.Dataset object equivalent to this data
tfdata_dataset_train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
tfdata_dataset_train = tfdata_dataset_train.batch(batch_size).repeat()
#Creates model
keras_model = tf.keras.models.Sequential([
tf.keras.layers.Dense(512, activation=tf.nn.relu),
tf.keras.layers.Dropout(0.2, seed=1),
tf.keras.layers.Dense(10, activation=tf.nn.softmax)
])
#Compile the model
keras_model.compile(optimizer='adam',
loss=tf.keras.losses.sparse_categorical_crossentropy,
metrics=['accuracy'])
#Train with numpy arrays
keras_training_history = keras_model.fit(x_train,
y_train,
initial_epoch=0,
epochs=1,
batch_size=batch_size
)
#Train with tf.data datasets
#keras_training_history = keras_model.fit(tfdata_dataset_train,
# initial_epoch=0,
# epochs=1,
# steps_per_epoch=60000//batch_size
# )
print(keras_training_history.history)
The error observed when using tf.data.Dataset as input is:
(...)
ValueError: Tensor conversion requested dtype uint8 for Tensor with dtype float32: 'Tensor("metrics/acc/Cast:0", shape=(?,), dtype=float32)'
During handling of the above exception, another exception occurred:
(...)
TypeError: Input 'y' of 'Equal' Op has type float32 that does not match type uint8 of argument 'x'.
The error when removing the comment from line 13, as commented above in OBS3, is:
(...)
tensorflow.python.framework.errors_impl.InvalidArgumentError: In[0] is not a matrix
[[Node: dense/MatMul = MatMul[T=DT_FLOAT, _class=["loc:#training/Adam/gradients/dense/MatMul_grad/MatMul_1"], transpose_a=false, transpose_b=false, _device="/job:localhost/replica:0/task:0/device:CPU:0"](_arg_sequential_input_0_0, dense/MatMul/ReadVariableOp)]]
Any help would be appreciated, including comments that you were able to reproduce the errors, so I can report the bug if it is the case.
I just upgraded to Tensorflow 1.10 to execute this code. I think that is the answer which is also discussed in the other Stackoverflow thread
This code executes but only if I remove the normalization as that line seems to use too much CPU memory. I see messages indicating that. I also reduced the cores.
import tensorflow as tf
import numpy as np
from tensorflow.keras.layers import Conv2D, MaxPool2D, Flatten, Dense, Dropout, Input
np.random.seed(1)
tf.set_random_seed(1)
batch_size = 128
NUM_CLASSES = 10
print(tf.__version__)
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
#x_train, x_test = x_train / 255.0, x_test / 255.0 #normalizing
def tfdata_generator(images, labels, is_training, batch_size=128):
'''Construct a data generator using tf.Dataset'''
def preprocess_fn(image, label):
'''A transformation function to preprocess raw data
into trainable input. '''
x = tf.reshape(tf.cast(image, tf.float32), (28, 28, 1))
y = tf.one_hot(tf.cast(label, tf.uint8), NUM_CLASSES)
return x, y
dataset = tf.data.Dataset.from_tensor_slices((images, labels))
if is_training:
dataset = dataset.shuffle(1000) # depends on sample size
# Transform and batch data at the same time
dataset = dataset.apply(tf.contrib.data.map_and_batch(
preprocess_fn, batch_size,
num_parallel_batches=2, # cpu cores
drop_remainder=True if is_training else False))
dataset = dataset.repeat()
dataset = dataset.prefetch(tf.contrib.data.AUTOTUNE)
return dataset
training_set = tfdata_generator(x_train, y_train,is_training=True, batch_size=batch_size)
testing_set = tfdata_generator(x_test, y_test, is_training=False, batch_size=batch_size)
inputs = Input(shape=(28, 28, 1))
x = Conv2D(32, (3, 3), activation='relu', padding='valid')(inputs)
x = MaxPool2D(pool_size=(2, 2))(x)
x = Conv2D(64, (3, 3), activation='relu')(x)
x = MaxPool2D(pool_size=(2, 2))(x)
x = Flatten()(x)
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)
outputs = Dense(NUM_CLASSES, activation='softmax')(x)
keras_model = tf.keras.Model(inputs, outputs)
#Compile the model
keras_model.compile('adam', 'categorical_crossentropy', metrics=['acc'])
#Train with tf.data datasets
keras_training_history = keras_model.fit(
training_set.make_one_shot_iterator(),
steps_per_epoch=len(x_train) // batch_size,
epochs=5,
validation_data=testing_set.make_one_shot_iterator(),
validation_steps=len(x_test) // batch_size,
verbose=1)
print(keras_training_history.history)
Installing the tf-nightly build, together with changing dtypes of some tensors (the error changes after installing tf-nightly), solved the problem, so it is an issue which (hopefully) will be solved in 1.11.
Related material: https://github.com/tensorflow/tensorflow/issues/21894
I am wondering how Keras is able to do 5 epochs when the
make_one_shot_iterator() which only supports iterating once through a
dataset?
could be given smth like iterations = len(y_train) * epochs - here shown for tf.v1
the code from Mohan Radhakrishnan still works in tf.v2 with little corrections in objects' belongings to new classes (in tf.v2) fixings - to make the code up-to-date... No more make_one_shot_iterator() needed
# >> author: Mohan Radhakrishnan
import tensorflow as tf
import tensorflow.keras
import numpy as np
from tensorflow.keras.layers import Conv2D, MaxPool2D, Flatten, Dense, Dropout, Input
np.random.seed(1)
tf.random.set_seed(1)
batch_size = 128
NUM_CLASSES = 10
print(tf.__version__)
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
#x_train, x_test = x_train / 255.0, x_test / 255.0 #normalizing
def tfdata_generator(images, labels, is_training, batch_size=128):
'''Construct a data generator using tf.Dataset'''
def preprocess_fn(image, label):
'''A transformation function to preprocess raw data
into trainable input. '''
x = tf.reshape(tf.cast(image, tf.float32), (28, 28, 1))
y = tf.one_hot(tf.cast(label, tf.uint8), NUM_CLASSES)
return x, y
dataset = tf.data.Dataset.from_tensor_slices((images, labels))
if is_training:
dataset = dataset.shuffle(1000) # depends on sample size
# Transform and batch data at the same time
dataset = dataset.apply( tf.data.experimental.map_and_batch(
preprocess_fn, batch_size,
num_parallel_batches=2, # cpu cores
drop_remainder=True if is_training else False))
dataset = dataset.repeat()
dataset = dataset.prefetch( tf.data.experimental.AUTOTUNE)
return dataset
training_set = tfdata_generator(x_train, y_train,is_training=True, batch_size=batch_size)
testing_set = tfdata_generator(x_test, y_test, is_training=False, batch_size=batch_size)
inputs = Input(shape=(28, 28, 1))
x = Conv2D(32, (3, 3), activation='relu', padding='valid')(inputs)
x = MaxPool2D(pool_size=(2, 2))(x)
x = Conv2D(64, (3, 3), activation='relu')(x)
x = MaxPool2D(pool_size=(2, 2))(x)
x = Flatten()(x)
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)
outputs = Dense(NUM_CLASSES, activation='softmax')(x)
keras_model = tf.keras.Model(inputs, outputs)
#Compile the model
keras_model.compile('adam', 'categorical_crossentropy', metrics=['acc'])
#Train with tf.data datasets
# training_set.make_one_shot_iterator() - 'PrefetchDataset' object has no attribute 'make_one_shot_iterator'
keras_training_history = keras_model.fit(
training_set,
steps_per_epoch=len(x_train) // batch_size,
epochs=5,
validation_data=testing_set,
validation_steps=len(x_test) // batch_size,
verbose=1)
print(keras_training_history.history)
not loading data locally, just easy DataFlow - that is very convinient - Thanks a lot - hope my corrections are proper

Resources