About 'Building Autoencoders in Keras'? - keras

I read the Building Autoencoders in Keras, the url is https://blog.keras.io/building-autoencoders-in-keras.html
In the section of Adding a sparsity constraint on the encoded representations, I have tried according to his description, but the loss can't down to 0.11, instead around 0.26.
So the result is fuzzy:
Can anyone who has done this experiment tell me what's wrong with it?
It's my code:
from keras.layers import Input, Dense
from keras.models import Model
from keras import regularizers
encoding_dim = 32 # 压缩后维度
input_img = Input(shape = (784,))
# 编码
encoded = Dense(encoding_dim, activation = 'relu',
activity_regularizer = regularizers.l1(1e-4)
)(input_img)
# 解码
decoded = Dense(784, activation = 'sigmoid')(encoded)
# 创建自动编码器
autoencoder = Model(input_img, decoded)
# 编码器
encoder = Model(input_img, encoded)
encoded_input = Input(shape = (encoding_dim,))
# 最后一层全连接层作为解码器
decoder_layer = autoencoder.layers[-1]
# 解码器
decoder = Model(encoded_input, decoder_layer(encoded_input))
# 编译模块
autoencoder.compile(optimizer = 'adadelta', loss = 'binary_crossentropy')
from keras.datasets import mnist
import numpy as np
(x_train, _), (x_test, _) = mnist.load_data()
x_train = x_train.astype('float32') / 255.
x_test = x_test.astype('float32') / 255.
x_train = x_train.reshape(x_train.shape[0], np.prod(x_train.shape[1:]))
x_test = x_test.reshape(x_test.shape[0], np.prod(x_test.shape[1:]))
autoencoder.fit(x_train, x_train,
epochs = 100,
batch_size = 256,
shuffle = True,
validation_data = (x_test, x_test))
encoded_imgs = encoder.predict(x_test)
decoded_imgs = decoder.predict(encoded_imgs)
import matplotlib.pyplot as plt
n = 10
plt.figure(figsize = (20, 4))
for i in range(10):
# 原图
ax = plt.subplot(2, n, i + 1)
plt.imshow(x_test[i].reshape(28, 28))
plt.gray()
ax.set_axis_off()
# 解码后的图
ax = plt.subplot(2, n, n + i + 1)
plt.imshow(decoded_imgs[i].reshape(28, 28))
plt.gray()
ax.set_axis_off()
plt.savefig('simpleSparse.png')
from keras import backend as K
K.clear_session()

I copied your code verbatim, and reproduced the error you got.
Solution: reduce the batch size from 256, to 16. You'll noice a huge difference in output even after 10 epochs of training.
Explanation: What is probably going on is that even though there is a reduction in your training loss, you are averaging the gradient over too many examples, to a point that the step you are taking in the direction of the gradient is cancelling itself out in some higher dimensional space and your learning algorithm is being tricked into thinking its converged to a local minima, when in reality it can't decided where to go. This last part explains why it seems that all the outputs you get look blurry and exactly the same.
Update: Reduce batch size to 4, and you'll get near perfect reconstruction even after 10 epochs.

You need to change the batch size in your code.
autoencoder.fit(x_train, x_train,epochs = 10,batch_size = 16,shuffle = True,validation_data = (x_test, x_test))

known bug. need to set regularizer to 10e-7
activity_regularizer=regularizers.activity_l1(10e-7))(input_img)
after 50 epochs, val_loss: 0.1424
https://github.com/keras-team/keras/issues/5414
If you drop your batch size it takes much longer

Related

Calculate gradient of validation error w.r.t inputs using Keras/Tensorflow or autograd

I need to calculate the gradient of the validation error w.r.t inputs x. I'm trying to see how much the validation error changes when I perturb one of the training samples.
The validation error (E) explicitly depends on the model weights (W).
The model weights explicitly depend on the inputs (x and y).
Therefore, the validation error implicitly depends on the inputs.
I'm trying to calculate the gradient of E w.r.t x directly.
An alternative approach would be to calculate the gradient of E w.r.t W (can easily be calculated) and the gradient of W w.r.t x (cannot do at the moment), which would allow the gradient of E w.r.t x to be calculated.
I have attached a toy example. Thanks in advance!
import numpy as np
import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical
import tensorflow as tf
from autograd import grad
train_images = mnist.train_images()
train_labels = mnist.train_labels()
test_images = mnist.test_images()
test_labels = mnist.test_labels()
# Normalize the images.
train_images = (train_images / 255) - 0.5
test_images = (test_images / 255) - 0.5
# Flatten the images.
train_images = train_images.reshape((-1, 784))
test_images = test_images.reshape((-1, 784))
# Build the model.
model = Sequential([
Dense(64, activation='relu', input_shape=(784,)),
Dense(64, activation='relu'),
Dense(10, activation='softmax'),
])
# Compile the model.
model.compile(
optimizer='adam',
loss='categorical_crossentropy',
metrics=['accuracy'],
)
# Train the model.
model.fit(
train_images,
to_categorical(train_labels),
epochs=5,
batch_size=32,
)
model.save_weights('model.h5')
# Load the model's saved weights.
# model.load_weights('model.h5')
calculate_mse = tf.keras.losses.MeanSquaredError()
test_x = test_images[:5]
test_y = to_categorical(test_labels)[:5]
train_x = train_images[:1]
train_y = to_categorical(train_labels)[:1]
train_y = tf.convert_to_tensor(train_y, np.float32)
train_x = tf.convert_to_tensor(train_x, np.float64)
with tf.GradientTape() as tape:
tape.watch(train_x)
model.fit(train_x, train_y, epochs=1, verbose=0)
valid_y_hat = model(test_x, training=False)
mse = calculate_mse(test_y, valid_y_hat)
de_dx = tape.gradient(mse, train_x)
print(de_dx)
# approach 2 - does not run
def calculate_validation_mse(x):
model.fit(x, train_y, epochs=1, verbose=0)
valid_y_hat = model(test_x, training=False)
mse = calculate_mse(test_y, valid_y_hat)
return mse
train_x = train_images[:1]
train_y = to_categorical(train_labels)[:1]
validation_gradient = grad(calculate_validation_mse)
de_dx = validation_gradient(train_x)
print(de_dx)
Here's how you can do this. Derivation is as below.
Few things to note,
I have reduced the feature size from 784 to 256 as I was running out of memory in colab (line marked in the code) . Might have to do some mem profiling to find out why
Only computed grads for the first layer. Easily extendable to other layers
Disclaimer: this derivation is correct to best of my knowledge. Please do some research and verify that it is the case. You will run into memory issues for larger inputs and layer sizes.
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical
import tensorflow as tf
f = 256
model = Sequential([
Dense(64, activation='relu', input_shape=(f,)),
Dense(64, activation='relu'),
Dense(10, activation='softmax'),
])
# Compile the model.
model.compile(
optimizer='adam',
loss='categorical_crossentropy',
metrics=['accuracy'],
)
w = model.weights[0]
# Inputs and labels
x_tr = tf.Variable(np.random.normal(size=(1,f)), shape=(1, f), dtype='float32')
y_tr = np.random.choice([0,1,2,3,4,5,6,7,8,9], size=(1,1))
y_tr_onehot = tf.keras.utils.to_categorical(y_tr, num_classes=10).astype('float32')
x_v = tf.Variable(np.random.normal(size=(1,f)), shape=(1, f), dtype='float32')
y_v = np.random.choice([0,1,2,3,4,5,6,7,8,9], size=(1,1))
y_v_onehot = tf.keras.utils.to_categorical(y_v, num_classes=10).astype('float32')
# In the context of GradientTape
with tf.GradientTape() as tape1:
with tf.GradientTape() as tape2:
y_tr_pred = model(x_tr)
tr_loss = tf.keras.losses.MeanSquaredError()(y_tr_onehot, y_tr_pred)
tmp_g = tape2.gradient(tr_loss, w)
print(tmp_g.shape)
# d(dE_tr/d(theta))/dx
# Warning this step consumes lot of memory for large layers
lr = 0.001
grads_1 = -lr * tape1.jacobian(tmp_g, x_tr)
with tf.GradientTape() as tape3:
y_v_pred = model(x_v)
v_loss = tf.keras.losses.MeanSquaredError()(y_v_onehot, y_v_pred)
# dE_val/d(theta)
grads_2 = tape3.gradient(v_loss, w)[tf.newaxis, :]
# Just crunching the dimension to get the final desired shape of (1,256)
grad = tf.matmul(tf.reshape(grads_2,[1, -1]), tf.reshape(tf.transpose(grads_1,[2,1,0,3]),[1, -1, 256]))

Implementing and tuning a simple CNN for 3D data using Keras Conv3D

I'm trying to implement a 3D CNN using Keras. However, I am having some difficulties understanding some details in the results obtained and further enhancing the accuracy.
The data that I am trying to analyzing have the shape {64(d1)x64(d2)x38(d3)}, where d1 and d2 are the length and width of the image (64x64 pixels) and d3 is the time dimension. In other words, I have 38 images. The channel parameter is set to 1 as my data are actually raw data and not really colorful images.
My data consist of 219 samples, hence 219x64x64x38. They are divided into training and validation sets with 20% for validation. In addition, I have a fixed 147 additional data for testing.
Below is my code that works fine. It creates a txt file that saves the results for the different combination of parameters in my network (grid search). Here in this code, I only consider tuning 2 parameters: the number of filters and lambda for L2 regularizer. I fixed the dropout and the kernel size for the filters. However, later I considered their variations.
I also tried to set the seed value so that I have some sort of reproducibility (I don't think that I have achieved this task).
My question is that:
Given the below architecture and code, I always reach for all the given combinations of parameters a convergence for the training accuracy towards 1 (which is good). However, for the validation accuracy it is most of the time around 80% +/- 4% (rarely below 70%) despite the hyper-parameters combination. Similar behavior for the test accuracy. How can I enhance this accuracy to above 90% ?
As far as I know, having a gap between the train and validation/test accuracy is a result from overfitting. However, in my model I am adding dropouts and L2 regularizers and also changing the size of my network which should somehow reduce this gap (but it is not).
Is there anything else I can do besides modifying my input data? Does adding more layers help? Or is there maybe a pre-trained 3D CNN like in the case of 2D CNN (e.g., AlexNet)? Should I try ConvLSTM? Is this the limit of this architecture?
Thank you :)
import numpy as np
import tensorflow as tf
import keras
from keras.models import Sequential
from keras.layers import Conv3D, MaxPooling3D, Dense, Flatten, Activation
from keras.utils import to_categorical
from keras.regularizers import l2
from keras.layers import Dropout
from keras.utils import multi_gpu_model
import scipy.io as sio
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from keras.callbacks import ReduceLROnPlateau
tf.set_random_seed(1234)
def normalize_minmax(X_train):
"""
Normalize to [0,1]
"""
from sklearn import preprocessing
min_max_scaler = preprocessing.MinMaxScaler()
X_minmax_train = min_max_scaler.fit_transform(X_train)
return X_minmax_train
# generate and prepare the dataset
def get_data():
# Load and prepare the data
X_data = sio.loadmat('./X_train')['X_train']
Y_data = sio.loadmat('./Y_train')['targets_train']
X_test = sio.loadmat('./X_test')['X_test']
Y_test = sio.loadmat('./Y_test')['targets_test']
return X_data, Y_data, X_test, Y_test
def get_model(X_train, Y_train, X_validation, Y_validation, F1_nb, F2_nb, F3_nb, kernel_size_1, kernel_size_2, kernel_size_3, l2_lambda, learning_rate, reduce_lr, dropout_conv1, dropout_conv2, dropout_conv3, dropout_dense, no_epochs):
no_classes = 5
sample_shape = (64, 64, 38, 1)
batch_size = 32
dropout_seed = 30
conv_seed = 20
# Create the model
model = Sequential()
model.add(Conv3D(F1_nb, kernel_size=kernel_size_1, kernel_regularizer=l2(l2_lambda), padding='same', kernel_initializer='glorot_uniform', input_shape=sample_shape))
model.add(Activation('selu'))
model.add(MaxPooling3D(pool_size=(2,2,2)))
model.add(Dropout(dropout_conv1, seed=conv_seed))
model.add(Conv3D(F2_nb, kernel_size=kernel_size_2, kernel_regularizer=l2(l2_lambda), padding='same', kernel_initializer='glorot_uniform'))
model.add(Activation('selu'))
model.add(MaxPooling3D(pool_size=(2,2,2)))
model.add(Dropout(dropout_conv2, seed=conv_seed))
model.add(Conv3D(F3_nb, kernel_size=kernel_size_3, kernel_regularizer=l2(l2_lambda), padding='same', kernel_initializer='glorot_uniform'))
model.add(Activation('selu'))
model.add(MaxPooling3D(pool_size=(2,2,2)))
model.add(Dropout(dropout_conv3, seed=conv_seed))
model.add(Flatten())
model.add(Dense(512, kernel_regularizer=l2(l2_lambda), kernel_initializer='glorot_uniform'))
model.add(Activation('selu'))
model.add(Dropout(dropout_dense, seed=dropout_seed))
model.add(Dense(no_classes, activation='softmax'))
model = multi_gpu_model(model, gpus = 2)
# Compile the model
model.compile(loss=keras.losses.categorical_crossentropy,
optimizer=keras.optimizers.Adam(lr=learning_rate),
metrics=['accuracy'])
# Train the model.
history = model.fit(X_train, Y_train, batch_size=batch_size, epochs=no_epochs, validation_data=(X_validation, Y_validation),callbacks=[reduce_lr])
return model, history
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.0001)
learning_rate = 0.001
no_epochs = 100
X_data, Y_data, X_test, Y_test = get_data()
# Normalize the train/val data
for i in range(X_data.shape[0]):
for j in range(X_data.shape[3]):
X_data[i,:,:,j] = normalize_minmax(X_data[i,:,:,j])
X_data = np.expand_dims(X_data, axis=4)
# Normalize the test data
for i in range(X_test.shape[0]):
for j in range(X_test.shape[3]):
X_test[i,:,:,j] = normalize_minmax(X_test[i,:,:,j])
X_test = np.expand_dims(X_test, axis=4)
# Shuffle the training data
# fix random seed for reproducibility
seedValue = 40
permutation = np.random.RandomState(seed=seedValue).permutation(len(X_data))
X_data = X_data[permutation]
Y_data = Y_data[permutation]
Y_data = np.squeeze(Y_data)
Y_test = np.squeeze(Y_test)
#Split between train and validation (20%). Here I did not use the classical validation_split=0.2 just to make sure that the data is the same for the different architectures I am using.
X_train = X_data[0:175,:,:,:,:]
Y_train = Y_data[0:175]
X_validation = X_data[176:,:,:,:]
Y_validation = Y_data[176:]
Y_train = to_categorical(Y_train,num_classes=5).astype(np.integer)
Y_validation = to_categorical(Y_validation,num_classes=5).astype(np.integer)
Y_test = to_categorical(Y_test,num_classes=5).astype(np.integer)
l2_lambda_list = [(1*pow(10,-4)),(2*pow(10,-4)),
(3*pow(10,-4)),
(4*pow(10,-4)),
(5*pow(10,-4)),(6*pow(10,-4)),
(7*pow(10,-4)),
(8*pow(10,-4)),(9*pow(10,-4)),(10*pow(10,-4))
]
filters_nb = [(128,64,64),(128,64,32),(128,64,16),(128,64,8),(128,32,32),(128,32,16),(128,32,8),(128,16,8),(128,8,8),
(64,64,32),(64,64,16),(64,64,8),(64,32,32),(64,32,16),(64,32,8),(64,16,16),(64,16,8),(64,8,8),
(32,32,16),(32,32,8),(32,16,16),(32,16,8),(32,8,8),
(16,16,16),(16,16,8),(16,8,8)
]
DropOut = [(0.25,0.25,0.25,0.5),
(0,0,0,0.1),(0,0,0,0.2),(0,0,0,0.3),(0,0,0,0.4),(0,0,0,0.5),
(0.1,0.1,0.1,0),(0.2,0.2,0.2,0),(0.3,0.3,0.3,0),(0.4,0.4,0.4,0),(0.5,0.5,0.5,0),
(0.1,0.1,0.1,0.1),(0.1,0.1,0.1,0.2),(0.1,0.1,0.1,0.3),(0.1,0.1,0.1,0.4),(0.1,0.1,0.1,0.5),
(0.15,0.15,0.15,0.1),(0.15,0.15,0.15,0.2),(0.15,0.15,0.15,0.3),(0.15,0.15,0.15,0.4),(0.15,0.15,0.15,0.5),
(0.2,0.2,0.2,0.1),(0.2,0.2,0.2,0.2),(0.2,0.2,0.2,0.3),(0.2,0.2,0.2,0.4),(0.2,0.2,0.2,0.5),
(0.25,0.25,0.25,0.1),(0.25,0.25,0.25,0.2),(0.25,0.25,0.25,0.3),(0.25,0.25,0.25,0.4),(0.25,0.25,0.25,0.5),
(0.3,0.3,0.3,0.1),(0.3,0.3,0.3,0.2),(0.3,0.3,0.3,0.3),(0.3,0.3,0.3,0.4),(0.3,0.3,0.3,0.5),
(0.35,0.35,0.35,0.1),(0.35,0.35,0.35,0.2),(0.35,0.35,0.35,0.3),(0.35,0.35,0.35,0.4),(0.35,0.35,0.35,0.5)
]
kernel_size = [(3,3,3),
(2,3,3),(2,3,4),(2,3,5),(2,3,6),(2,3,7),(2,3,8),(2,3,9),(2,3,10),(2,3,11),(2,3,12),(2,3,13),(2,3,14),(2,3,15),
(3,3,3),(3,3,4),(3,3,5),(3,3,6),(3,3,7),(3,3,8),(3,3,9),(3,3,10),(3,3,11),(3,3,12),(3,3,13),(3,3,14),(3,3,15),
(3,4,3),(3,4,4),(3,4,5),(3,4,6),(3,4,7),(3,4,8),(3,4,9),(3,4,10),(3,4,11),(3,4,12),(3,4,13),(3,4,14),(3,4,15),
]
for l in range(len(l2_lambda_list)):
l2_lambda = l2_lambda_list[l]
f = open("My Results.txt", "a")
lambda_Str = str(l2_lambda)
f.write("---------------------------------------\n")
f.write("lambda = "+f"{lambda_Str}\n")
f.write("---------------------------------------\n")
for i in range(len(filters_nb)):
F1_nb = filters_nb[i][0]
F2_nb = filters_nb[i][1]
F3_nb = filters_nb[i][2]
kernel_size_1 = kernel_size[0]
kernel_size_2 = kernel_size_1
kernel_size_3 = kernel_size_1
dropout_conv1 = DropOut[0][0]
dropout_conv2 = DropOut[0][1]
dropout_conv3 = DropOut[0][2]
dropout_dense = DropOut[0][3]
# fit model
model, history = get_model(X_train, Y_train, X_validation, Y_validation, F1_nb, F2_nb, F3_nb, kernel_size_1, kernel_size_2, kernel_size_3, l2_lambda, learning_rate, reduce_lr, dropout_conv1, dropout_conv2, dropout_conv3, dropout_dense, no_epochs)
# Evaluate metrics
predictions = model.predict(X_test)
out = np.argmax(predictions, axis=1)
Y_test = sio.loadmat('./Y_test')['targets_test']
Y_test = np.squeeze(Y_test)
loss = history.history['loss'][no_epochs-1]
acc = history.history['acc'][no_epochs-1]
val_loss = history.history['val_loss'][no_epochs-1]
val_acc = history.history['val_acc'][no_epochs-1]
# accuracy: (tp + tn) / (p + n)
accuracy = accuracy_score(Y_test, out)
# f1: 2 tp / (2 tp + fp + fn)
f1 = f1_score(Y_test, out,average='macro')
a = str(filters_nb[i][0]) + ',' + str(filters_nb[i][1]) + ',' + str(filters_nb[i][2]) + ': ' + str('f1-metric: ') + str('%f' % f1) + str(' | loss: ') + str('%f' % loss) + str(' | acc: ') + str('%f' % acc) + str(' | val_loss: ') + str('%f' % val_loss) + str(' | val_acc: ') + str('%f' % val_acc) + str(' | test_acc: ') + str('%f' % accuracy)
f.write(f"{a}\n")
f.close()

MNIST and transfer learning with VGG16 in Keras- low validation accuracy

I recently started taking advantage of Keras's flow_from_dataframe() feature for a project, and decided to test it with the MNIST dataset. I have a directory full of the MNIST samples in png format, and a dataframe with the absolute directory for each in one column and the label in the other.
I'm also using transfer learning, importing VGG16 as a base, and adding my own 512 node relu dense layer and 0.5 drop-out before a softmax layer of 10. (For digits 0-9). I'm using rmsprop (lr=1e-4) as the optimizer.
When I launch my environment, it calls the latest version of keras_preprocessing from Git, which has support for absolute directories and capitalized file extensions.
My problem is that I have a very high training accuracy, and a terribly low validation accuracy. By my final epoch (10), I had a training accuracy of 0.94 and a validation accuracy of 0.01.
I'm wondering if there's something fundamentally wrong with my script? With another dataset, I'm even getting NaNs for both my training and validation loss values after epoch 4. (I checked the relevant columns, there aren't any null values!)
Here's my code. I'd be deeply appreciative is someone could glance through it and see if anything jumped out at them.
import pandas as pd
import numpy as np
import keras
from keras_preprocessing.image import ImageDataGenerator
from keras import applications
from keras import optimizers
from keras.models import Model
from keras.layers import Dropout, Flatten, Dense, GlobalAveragePooling2D
from keras import backend as k
from keras.callbacks import ModelCheckpoint, CSVLogger
from keras.applications.vgg16 import VGG16, preprocess_input
# INITIALIZE MODEL
img_width, img_height = 32, 32
model = VGG16(weights = 'imagenet', include_top=False, input_shape = (img_width, img_height, 3))
# freeze all layers
for layer in model.layers:
layer.trainable = False
# Adding custom Layers
x = model.output
x = Flatten()(x)
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)
predictions = Dense(10, activation="softmax")(x)
# creating the final model
model_final = Model(input = model.input, output = predictions)
# compile the model
rms = optimizers.RMSprop(lr=1e-4)
#adadelta = optimizers.Adadelta(lr=0.001, rho=0.5, epsilon=None, decay=0.0)
model_final.compile(loss = "categorical_crossentropy", optimizer = rms, metrics=["accuracy"])
# LOAD AND DEFINE SOURCE DATA
train = pd.read_csv('MNIST_train.csv', index_col=0)
val = pd.read_csv('MNIST_test.csv', index_col=0)
nb_train_samples = 60000
nb_validation_samples = 10000
batch_size = 60
epochs = 10
# Initiate the train and test generators
train_datagen = ImageDataGenerator()
test_datagen = ImageDataGenerator()
train_generator = train_datagen.flow_from_dataframe(dataframe=train,
directory=None,
x_col='train_samples',
y_col='train_labels',
has_ext=True,
target_size = (img_height,
img_width),
batch_size = batch_size,
class_mode = 'categorical',
color_mode = 'rgb')
validation_generator = test_datagen.flow_from_dataframe(dataframe=val,
directory=None,
x_col='test_samples',
y_col='test_labels',
has_ext=True,
target_size = (img_height,
img_width),
batch_size = batch_size,
class_mode = 'categorical',
color_mode = 'rgb')
# GET CLASS INDICES
print('****************')
for cls, idx in train_generator.class_indices.items():
print('Class #{} = {}'.format(idx, cls))
print('****************')
# DEFINE CALLBACKS
path = './chk/epoch_{epoch:02d}-valLoss_{val_loss:.2f}-valAcc_{val_acc:.2f}.hdf5'
chk = ModelCheckpoint(path, monitor = 'val_acc', verbose = 1, save_best_only = True, mode = 'max')
logger = CSVLogger('./chk/training_log.csv', separator = ',', append=False)
nPlus = 1
samples_per_epoch = nb_train_samples * nPlus
# Train the model
model_final.fit_generator(train_generator,
steps_per_epoch = int(samples_per_epoch/batch_size),
epochs = epochs,
validation_data = validation_generator,
validation_steps = int(nb_validation_samples/batch_size),
callbacks = [chk, logger])
Have you tried explicitly defining the classes of the images? as such:
train_generator=image.ImageDataGenerator().flow_from_dataframe(classes=[0,1,2,3,4,5,6,7,8,9])
in both the train and validation generators.
I have found that sometimes the train and validation generators create different correspondence dictionaries.

Changing batch_size parameter in keras leads to broadcast error

I am running a simple encoder-decoder setup to train a representation for a one dimensional image. In this sample the input are lines with varying slopes and in the encoded layer we would expect something that resembles the slope. My setup is keras with a tensorflow backend. I am very new to this as well.
It all works fine, at least until I move away from steps_per_epoch to batch_size in the model.fit() method. Certain values of the batch_size, such as 1,2,3, 8 and 16 do work, for others I get a value error. My initial guess was 2^n, but that did not work.
The error I get for batch_size = 5
ValueError: operands could not be broadcast together with shapes (5,50) (3,50) (5,50)
I am trying to understand which relation between batch_size and training data is valid such that it always passes. I assumed that the training set would be simply divided into floor(N/batch_size) batches and the remainder would be processed as such.
My questions are:
What is the relation between size of data set and batch_size that are allowed.
What exactly is the keras/tensorflow trying to do such that the batch_size is important?
Thank you very much for the help.
The code to reproduce this is
import numpy as np
from keras.models import Model
from keras.layers import Input, Dense, Conv1D, Concatenate
from keras.losses import mse
from keras.optimizers import Adam
INPUT_DIM = 50
INTER_DIM = 15
LATENT_DIM = 1
# Prepare Sample Data
one_line = np.linspace(1, 30, INPUT_DIM).reshape(1, INPUT_DIM)
test_array = np.repeat(one_line, 1000, axis=0)
slopes = np.linspace(0, 1, 1000).reshape(1000, 1)
data = test_array * slopes
# Train test split
train_mask = np.where(np.random.sample(1000) < 0.8, 1, 0).astype('bool')
x_train = data[train_mask].reshape(-1, INPUT_DIM, 1)
x_test = data[~train_mask].reshape(-1, INPUT_DIM, 1)
# Define Model
input = Input(shape=(INPUT_DIM, 1), name='input')
conv_layer_small = Conv1D(filters=1, kernel_size=[3], padding='same')(input)
conv_layer_medium = Conv1D(filters=1, kernel_size=[5], padding='same')(input)
merged_convs = Concatenate()(
[conv_layer_small, conv_layer_medium])
latent = Dense(LATENT_DIM, name='latent_layer',
activation='relu')(merged_convs)
encoder = Model(input, latent)
decoder_int = Dense(INTER_DIM, name='dec_int_layer', activation='relu')(latent)
output = Dense(INPUT_DIM, name='output', activation='linear')(decoder_int)
encoder_decoder = Model(input, output, name='encoder_decoder')
# Add Loss
reconstruction_loss = mse(input, output)
encoder_decoder.add_loss(reconstruction_loss)
encoder_decoder.compile(optimizer='adam')
if __name__ == '__main__':
epochs = 100
encoder_decoder.fit(
x_train,
epochs=epochs,
batch_size=4,
verbose=2
)

Using tf.data.Dataset as training input to Keras model NOT working

I have a simple code, which DOES work, for training a Keras model in Tensorflow using numpy arrays as features and labels. If I then wrap these numpy arrays using tf.data.Dataset.from_tensor_slices in order to train the same Keras model using a tensorflow dataset, I get an error. I haven't been able to figure out why (it may be a tensorflow or keras bug, but I may also be missing something). I'm on python 3, tensorflow is 1.10.0, numpy is 1.14.5, no GPU involved.
OBS1: The possibility of using tf.data.Dataset as a Keras input is showed in https://www.tensorflow.org/guide/keras, under "Input tf.data datasets".
OBS2: In the code below, the code under "#Train with numpy arrays" is being executed, using numpy arrays. If this code is commented and the code under "#Train with tf.data datasets" is used instead, the error will be reproduced.
OBS3: In line 13, which is commented and starts with "###WORKAROUND 1###", if the comment is removed and the line is used for tf.data.Dataset inputs, the error changes, even though I can't completely understand why.
The complete code is:
import tensorflow as tf
import numpy as np
np.random.seed(1)
tf.set_random_seed(1)
print(tf.__version__)
print(np.__version__)
#Import mnist dataset as numpy arrays
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()#Import
x_train, x_test = x_train / 255.0, x_test / 255.0 #normalizing
###WORKAROUND 1###y_train, y_test = (y_train.astype(dtype='float32'), y_test.astype(dtype='float32'))
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1]*x_train.shape[2])) #reshaping 28 x 28 images to 1D vectors, similar to Flatten layer in Keras
batch_size = 32
#Create a tf.data.Dataset object equivalent to this data
tfdata_dataset_train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
tfdata_dataset_train = tfdata_dataset_train.batch(batch_size).repeat()
#Creates model
keras_model = tf.keras.models.Sequential([
tf.keras.layers.Dense(512, activation=tf.nn.relu),
tf.keras.layers.Dropout(0.2, seed=1),
tf.keras.layers.Dense(10, activation=tf.nn.softmax)
])
#Compile the model
keras_model.compile(optimizer='adam',
loss=tf.keras.losses.sparse_categorical_crossentropy,
metrics=['accuracy'])
#Train with numpy arrays
keras_training_history = keras_model.fit(x_train,
y_train,
initial_epoch=0,
epochs=1,
batch_size=batch_size
)
#Train with tf.data datasets
#keras_training_history = keras_model.fit(tfdata_dataset_train,
# initial_epoch=0,
# epochs=1,
# steps_per_epoch=60000//batch_size
# )
print(keras_training_history.history)
The error observed when using tf.data.Dataset as input is:
(...)
ValueError: Tensor conversion requested dtype uint8 for Tensor with dtype float32: 'Tensor("metrics/acc/Cast:0", shape=(?,), dtype=float32)'
During handling of the above exception, another exception occurred:
(...)
TypeError: Input 'y' of 'Equal' Op has type float32 that does not match type uint8 of argument 'x'.
The error when removing the comment from line 13, as commented above in OBS3, is:
(...)
tensorflow.python.framework.errors_impl.InvalidArgumentError: In[0] is not a matrix
[[Node: dense/MatMul = MatMul[T=DT_FLOAT, _class=["loc:#training/Adam/gradients/dense/MatMul_grad/MatMul_1"], transpose_a=false, transpose_b=false, _device="/job:localhost/replica:0/task:0/device:CPU:0"](_arg_sequential_input_0_0, dense/MatMul/ReadVariableOp)]]
Any help would be appreciated, including comments that you were able to reproduce the errors, so I can report the bug if it is the case.
I just upgraded to Tensorflow 1.10 to execute this code. I think that is the answer which is also discussed in the other Stackoverflow thread
This code executes but only if I remove the normalization as that line seems to use too much CPU memory. I see messages indicating that. I also reduced the cores.
import tensorflow as tf
import numpy as np
from tensorflow.keras.layers import Conv2D, MaxPool2D, Flatten, Dense, Dropout, Input
np.random.seed(1)
tf.set_random_seed(1)
batch_size = 128
NUM_CLASSES = 10
print(tf.__version__)
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
#x_train, x_test = x_train / 255.0, x_test / 255.0 #normalizing
def tfdata_generator(images, labels, is_training, batch_size=128):
'''Construct a data generator using tf.Dataset'''
def preprocess_fn(image, label):
'''A transformation function to preprocess raw data
into trainable input. '''
x = tf.reshape(tf.cast(image, tf.float32), (28, 28, 1))
y = tf.one_hot(tf.cast(label, tf.uint8), NUM_CLASSES)
return x, y
dataset = tf.data.Dataset.from_tensor_slices((images, labels))
if is_training:
dataset = dataset.shuffle(1000) # depends on sample size
# Transform and batch data at the same time
dataset = dataset.apply(tf.contrib.data.map_and_batch(
preprocess_fn, batch_size,
num_parallel_batches=2, # cpu cores
drop_remainder=True if is_training else False))
dataset = dataset.repeat()
dataset = dataset.prefetch(tf.contrib.data.AUTOTUNE)
return dataset
training_set = tfdata_generator(x_train, y_train,is_training=True, batch_size=batch_size)
testing_set = tfdata_generator(x_test, y_test, is_training=False, batch_size=batch_size)
inputs = Input(shape=(28, 28, 1))
x = Conv2D(32, (3, 3), activation='relu', padding='valid')(inputs)
x = MaxPool2D(pool_size=(2, 2))(x)
x = Conv2D(64, (3, 3), activation='relu')(x)
x = MaxPool2D(pool_size=(2, 2))(x)
x = Flatten()(x)
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)
outputs = Dense(NUM_CLASSES, activation='softmax')(x)
keras_model = tf.keras.Model(inputs, outputs)
#Compile the model
keras_model.compile('adam', 'categorical_crossentropy', metrics=['acc'])
#Train with tf.data datasets
keras_training_history = keras_model.fit(
training_set.make_one_shot_iterator(),
steps_per_epoch=len(x_train) // batch_size,
epochs=5,
validation_data=testing_set.make_one_shot_iterator(),
validation_steps=len(x_test) // batch_size,
verbose=1)
print(keras_training_history.history)
Installing the tf-nightly build, together with changing dtypes of some tensors (the error changes after installing tf-nightly), solved the problem, so it is an issue which (hopefully) will be solved in 1.11.
Related material: https://github.com/tensorflow/tensorflow/issues/21894
I am wondering how Keras is able to do 5 epochs when the
make_one_shot_iterator() which only supports iterating once through a
dataset?
could be given smth like iterations = len(y_train) * epochs - here shown for tf.v1
the code from Mohan Radhakrishnan still works in tf.v2 with little corrections in objects' belongings to new classes (in tf.v2) fixings - to make the code up-to-date... No more make_one_shot_iterator() needed
# >> author: Mohan Radhakrishnan
import tensorflow as tf
import tensorflow.keras
import numpy as np
from tensorflow.keras.layers import Conv2D, MaxPool2D, Flatten, Dense, Dropout, Input
np.random.seed(1)
tf.random.set_seed(1)
batch_size = 128
NUM_CLASSES = 10
print(tf.__version__)
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
#x_train, x_test = x_train / 255.0, x_test / 255.0 #normalizing
def tfdata_generator(images, labels, is_training, batch_size=128):
'''Construct a data generator using tf.Dataset'''
def preprocess_fn(image, label):
'''A transformation function to preprocess raw data
into trainable input. '''
x = tf.reshape(tf.cast(image, tf.float32), (28, 28, 1))
y = tf.one_hot(tf.cast(label, tf.uint8), NUM_CLASSES)
return x, y
dataset = tf.data.Dataset.from_tensor_slices((images, labels))
if is_training:
dataset = dataset.shuffle(1000) # depends on sample size
# Transform and batch data at the same time
dataset = dataset.apply( tf.data.experimental.map_and_batch(
preprocess_fn, batch_size,
num_parallel_batches=2, # cpu cores
drop_remainder=True if is_training else False))
dataset = dataset.repeat()
dataset = dataset.prefetch( tf.data.experimental.AUTOTUNE)
return dataset
training_set = tfdata_generator(x_train, y_train,is_training=True, batch_size=batch_size)
testing_set = tfdata_generator(x_test, y_test, is_training=False, batch_size=batch_size)
inputs = Input(shape=(28, 28, 1))
x = Conv2D(32, (3, 3), activation='relu', padding='valid')(inputs)
x = MaxPool2D(pool_size=(2, 2))(x)
x = Conv2D(64, (3, 3), activation='relu')(x)
x = MaxPool2D(pool_size=(2, 2))(x)
x = Flatten()(x)
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)
outputs = Dense(NUM_CLASSES, activation='softmax')(x)
keras_model = tf.keras.Model(inputs, outputs)
#Compile the model
keras_model.compile('adam', 'categorical_crossentropy', metrics=['acc'])
#Train with tf.data datasets
# training_set.make_one_shot_iterator() - 'PrefetchDataset' object has no attribute 'make_one_shot_iterator'
keras_training_history = keras_model.fit(
training_set,
steps_per_epoch=len(x_train) // batch_size,
epochs=5,
validation_data=testing_set,
validation_steps=len(x_test) // batch_size,
verbose=1)
print(keras_training_history.history)
not loading data locally, just easy DataFlow - that is very convinient - Thanks a lot - hope my corrections are proper

Resources