Loss is always nan - keras

I have been working with CNN model recently. I always get loss nan for this model? How do I solve this?
My Model..
def CNN_Model(inputshape):
model = tf.keras.models.Sequential([
tf.keras.layers.Conv2D(96, kernel_size = (7, 7), strides= 2, activation='relu',kernel_initializer='glorot_uniform',input_shape = inputshape),
tf.keras.layers.MaxPooling2D((3,3), strides=(2,2)),
tf.keras.layers.ZeroPadding2D((2, 2), data_format="channels_last"),
#tf.keras.layers.Lambda(lambda x: tf.image.per_image_standardization(x)),
tf.keras.layers.Conv2D(256,kernel_size = (5, 5), strides= 1, activation='relu'),
tf.keras.layers.MaxPooling2D((3,3), strides=(2,2)),
#tf.keras.layers.Lambda(lambda x: tf.image.per_image_standardization(x)),
tf.keras.layers.Conv2D(384,kernel_size = (3, 3), activation='relu',strides=1),
tf.keras.layers.Conv2D(256, kernel_size = (3, 3), activation='relu',strides=1),
tf.keras.layers.MaxPooling2D((3,3), strides=(2,2)),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(1024, kernel_regularizer=l2(0.0005), activation='relu'),
tf.keras.layers.Dropout(0.5),
tf.keras.layers.Dense(1024),
tf.keras.layers.Dense(40, activation='softmax')
])
return model
My loss function
def contrastive_loss(y_true, y_pred):
'''Contrastive loss from Hadsell-et-al.'06
http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf
'''
margin = 1
return K.mean(y_true * K.square(y_pred) + (1 - y_true) * K.square(K.maximum(margin - y_pred, 0)))
I tried to to change the layer config but nothing worked. Here

Related

Image data augmentation and training

I am using Cats Vs Dogs dataset which contains 2000 images in 2 categories and divided into train and validation directory which can be download here.
I am trying to use real-time image augmentation to feed into a CNN model using train and validation generators. I am using Python 3.8 and TF2.5. The code is as follows:
path_to_imgs = "cats_and_dogs_filtered\\"
# Define the train and validation directory-
train_dir = os.path.join(path_to_imgs, 'train')
val_dir = os.path.join(path_to_imgs, 'validation')
batch_size = 64
IMG_HEIGHT, IMG_WIDTH = 150, 150
def plotImages(images_arr):
# function to plot 5 images together-
fig, axes = plt.subplots(1, 5, figsize=(20,20))
axes = axes.flatten()
for img, ax in zip( images_arr, axes):
ax.imshow(img)
ax.axis('off')
plt.tight_layout()
plt.show()
return None
# Use image augmentation for training dataset-
image_generator = ImageDataGenerator(
rescale = 1./255, rotation_range = 135)
train_data_gen = image_generator.flow_from_directory(
directory = train_dir, batch_size = batch_size,
shuffle = True, target_size = (IMG_HEIGHT, IMG_WIDTH),
class_mode = 'sparse'
)
# Found 2000 images belonging to 2 classes.
# Validation images need no augmentations-
val_data_gen = tf.keras.preprocessing.image_dataset_from_directory(
val_dir, image_size = (IMG_HEIGHT, IMG_WIDTH),
batch_size = batch_size)
# Found 1000 files belonging to 2 classes.
# Configure the dataset for performance-
# AUTOTUNE = tf.data.AUTOTUNE
# val_data_gen = val_data_gen.cache().prefetch(buffer_size = AUTOTUNE)
val_data_gen = val_data_gen.take(batch_size).cache().repeat()
augmented_images = [train_data_gen[0][0][0] for i in range(5)]
plotImages(augmented_images)
# Get a batch of training images and labels-
x, y = next(iter(train_data_gen))
# Get a batch of validation images and labels-
x_t, y_t = next(iter(val_data_gen))
x.shape, y.shape
# ((64, 150, 150, 3), (64,))
x_t.shape, y_t.shape
# (TensorShape([64, 150, 150, 3]), TensorShape([64]))
weight_decay = 0.0005
model = Sequential()
model.add(
Conv2D(
filters = 64, kernel_size = (3, 3),
activation='relu', kernel_initializer = tf.initializers.he_normal(),
strides = (1, 1), padding = 'same', kernel_regularizer = regularizers.l2(weight_decay),
input_shape=(IMG_HEIGHT, IMG_WIDTH, 3)
)
)
model.add(
Conv2D(
filters = 64, kernel_size = (3, 3),
activation='relu', kernel_initializer = tf.initializers.he_normal(),
strides = (1, 1), padding = 'same', kernel_regularizer = regularizers.l2(weight_decay)
)
)
model.add(
# AveragePooling2D(
MaxPooling2D(
pool_size=(2, 2), strides=(2, 2)
)
)
model.add(
Conv2D(
filters = 128, kernel_size = (3, 3),
activation='relu', kernel_initializer = tf.initializers.he_normal(),
strides = (1, 1), padding = 'same', kernel_regularizer = regularizers.l2(weight_decay)
)
)
model.add(
Conv2D(
filters = 128, kernel_size = (3, 3),
activation='relu', kernel_initializer = tf.initializers.he_normal(),
strides = (1, 1), padding = 'same', kernel_regularizer = regularizers.l2(weight_decay)
)
)
model.add(
# AveragePooling2D(
MaxPooling2D(
pool_size=(2, 2), strides=(2, 2)
)
)
model.add(
Conv2D(
filters = 256, kernel_size = (3, 3),
activation='relu', kernel_initializer = tf.initializers.he_normal(),
strides = (1, 1), padding = 'same', kernel_regularizer = regularizers.l2(weight_decay)
)
)
model.add(
Conv2D(
filters = 256, kernel_size = (3, 3),
activation='relu', kernel_initializer = tf.initializers.he_normal(),
strides = (1, 1), padding = 'same', kernel_regularizer = regularizers.l2(weight_decay)
)
)
model.add(
AveragePooling2D(
# MaxPooling2D(
pool_size=(2, 2), strides=(2, 2)
)
)
model.add(Flatten())
model.add(
Dense(
units = 2, activation = 'sigmoid'
)
)
# Compile defined model-
model.compile(
optimizer = tf.keras.optimizers.Adam(learning_rate = 0.001),
# loss=tf.losses.SparseCategoricalCrossentropy(from_logits = True),
# loss = tf.losses.SparseCategoricalCrossentropy(),
loss = 'sparse_categorical_crossentropy',
metrics=['accuracy']
)
model(x).shape
# TensorShape([64, 2])
model.predict(x).shape
# (64, 2)
'''
# This is deprecated in favor of model.fit()-
model.fit_generator(
generator = train_data_gen, steps_per_epoch = len(train_data_gen),
epochs = 5
)
'''
model.fit(train_data_gen, val_data_gen, batch_size = batch_size, epochs = 5)
Using "model.fit()" gives the error:
ValueError: y argument is not supported when using
keras.utils.Sequence as input.
What am I doing wrong?

Keras custom generator: Error when checking input: expected conv2d_1_input to have 4 dimensions, but got array with shape (256, 1)

I have a large dataset (30 000 images, 80kB each), and I'm trying to load it into my model in batches in order to not run of out memory on my GPU. My single image is a 200x200 numpy array, with values either 1 or 0, stored in csv file. So I don't need to resize it or use any image reader as it's already an array. I'm using a custom generator to achieve this:
class My_Custom_Generator(keras.utils.Sequence):
def __init__(self, image_filenames, labels, batch_size):
self.image_filenames = image_filenames
self.labels = labels
self.batch_size = batch_size
def __len__(self):
return (np.ceil(len(self.image_filenames) / float(self.batch_size))).astype(np.int)
def __getitem__(self, idx):
batch_x = self.image_filenames[idx * self.batch_size: (idx + 1) * self.batch_size]
batch_y = self.labels[idx * self.batch_size: (idx + 1) * self.batch_size]
return np.array(batch_x), np.array(batch_y)
My model looks like this:
X_train_filenames = np.load('X_train_filenames.npy')
y_train = np.load('y_train.npy')
X_val_filenames = np.load('X_val_filenames.npy')
y_val = np.load('y_val.npy')
batch_size = 256
my_training_batch_generator = My_Custom_Generator(X_train_filenames, y_train, batch_size)
my_validation_batch_generator = My_Custom_Generator(X_val_filenames, y_val, batch_size)
model = Sequential()
model.add(Conv2D(filters = 32, kernel_size = (3,3),input_shape=(200,200,1)))
#model.add(BatchNormalization(axis=3))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.2))
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.2))
model.add(Conv2D(128, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.15))
model.add(Conv2D(256, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(32))
model.add(Activation('relu'))
model.add(Dense(1))
model.add(Activation('sigmoid'))
model.compile(optimizer="adam", loss='binary_crossentropy', metrics=['accuracy'])
model.summary()
model.fit_generator(generator=my_training_batch_generator,
steps_per_epoch = int(y_train.shape[0] // batch_size),
epochs = 10,
verbose = 1,
validation_data = my_validation_batch_generator,
validation_steps = int(y_val.shape[0] // batch_size))
I get an error:
ValueError: Error when checking input: expected conv2d_1_input to have 4 dimensions, but got array with shape (256, 1)
In my understanding the input should be a 4 dimensional array of shape (None,200,200,1), but I don't know how to achieve it, since I just started learning how to load a dataset in parts and not all at once.

The name "Generator" is used 2 times in the model. All layer names should be unique

I am trying to make a cycle-GAN for an unpaired image to image translation as per this reference. when trying to compile the combined model, following error encounters. I don't know why is it so as I have used the same configurations as per reference. Attaches is my code. Please have a review if anyone of you can solve my problem. Thanks in advance. Sorry for my bad English.
from keras.models import *
from keras.layers import *
from keras.optimizers import *
from keras_contrib.layers.normalization.instancenormalization import InstanceNormalization
img_rows, img_columns, channels = 256, 256, 1
img_shape = (img_rows, img_columns, channels)
def Generator():
inputs = Input(img_shape)
conv1 = Conv2D(64, (4, 4), strides=2, padding='same')(inputs) # 128
conv1 = Activation(LeakyReLU(alpha=0.2))(conv1)
conv1 = InstanceNormalization()(conv1)
conv2 = Conv2D(128, (4, 4), strides=2, padding='same')(conv1) # 64
conv2 = Activation(LeakyReLU(alpha=0.2))(conv2)
conv2 = InstanceNormalization()(conv2)
conv3 = Conv2D(256, (4, 4), strides=2, padding='same')(conv2) # 32
conv3 = Activation(LeakyReLU(alpha=0.2))(conv3)
conv3 = InstanceNormalization()(conv3)
Deconv3 = concatenate([Conv2DTranspose(256, (4, 4), strides=2, padding='same')(conv3), conv2], axis=-1) # 64
Deconv3 = InstanceNormalization()(Deconv3)
Deconv3 = Dropout(0.2)(Deconv3)
Deconv3 = Activation('relu')(Deconv3)
Deconv2 = concatenate([Conv2DTranspose(128, (4, 4), strides=2, padding='same')(Deconv3), conv1], axis=-1) # 128
Deconv2 = InstanceNormalization()(Deconv2)
Deconv2 = Dropout(0.2)(Deconv2)
Deconv2 = Activation('relu')(Deconv2)
Deconv1 = UpSampling2D(size=(2, 2))(Deconv2) # 256
Deconv1 = Conv2D(1, (4, 4), strides=1, padding='same')(Deconv1)
outputs = Activation('tanh')(Deconv1)
return Model(inputs=inputs, outputs=outputs, name='Generator')
def Discriminator():
inputs = Input(img_shape)
conv1 = Conv2D(64, (4, 4), strides=2, padding='same')(inputs) # 128
conv1 = Activation(LeakyReLU(alpha=0.2))(conv1)
conv1 = InstanceNormalization()(conv1)
conv2 = Conv2D(128, (4, 4), strides=2, padding='same')(conv1) # 64
conv2 = Activation(LeakyReLU(alpha=0.2))(conv2)
conv2 = InstanceNormalization()(conv2)
conv3 = Conv2D(256, (4, 4), strides=2, padding='same')(conv2) # 32
conv3 = Activation(LeakyReLU(alpha=0.2))(conv3)
conv3 = InstanceNormalization()(conv3)
conv4 = Conv2D(256, (4, 4), strides=2, padding='same')(conv3) # 16
conv4 = Activation(LeakyReLU(alpha=0.2))(conv4)
conv4 = InstanceNormalization()(conv4)
conv5 = Conv2D(512, (4, 4), strides=2, padding='same')(conv4) # 8
conv5 = Activation(LeakyReLU(alpha=0.2))(conv5)
conv5 = InstanceNormalization()(conv5)
conv6 = Conv2D(512, (4, 4), strides=2, padding='same')(conv5) # 4
conv6 = Activation(LeakyReLU(alpha=0.2))(conv6)
conv6 = InstanceNormalization()(conv6)
outputs = Conv2D(1, (4, 4), strides=1, padding='same')(conv6) # 4
return Model(inputs=inputs, outputs=outputs, name='Discriminator')
# Calculate output shape of D (PatchGAN)
patch = int(height / 2**6)
disc_patch = (patch, patch, 1)
# Loss weights
lambda_cycle = 10.0 # Cycle-consistency loss
lambda_id = 0.1 * lambda_cycle # Identity loss
optimizer = Adam(0.0002, 0.5)
# Build and compile the discriminators
d_A = Discriminator()
d_B = Discriminator()
d_A.compile(loss='mse', optimizer=optimizer, metrics=['accuracy'])
d_B.compile(loss='mse', optimizer=optimizer, metrics=['accuracy'])
# Build the generators
g_AB = Generator()
g_BA = Generator()
# Input images from both domains
img_A = Input(shape=img_shape)
img_B = Input(shape=img_shape)
# Translate images to the other domain
fake_B = g_AB(img_A)
fake_A = g_BA(img_B)
# Translate images back to original domain
reconstr_A = g_BA(fake_B)
reconstr_B = g_AB(fake_A)
# Identity mapping of images
img_A_id = g_BA(img_A)
img_B_id = g_AB(img_B)
# For the combined model we will only train the generators
d_A.trainable = False
d_B.trainable = False
# Discriminators determines validity of translated images
valid_A = d_A(fake_A)
valid_B = d_B(fake_B)
# Combined model trains generators to fool discriminators
combined = Model(inputs=[img_A, img_B], outputs=[ valid_A, valid_B, reconstr_A, reconstr_B, img_A_id, img_B_id ])
combined.compile(loss=['mse', 'mse', 'mae', 'mae', 'mae', 'mae'],loss_weights=[ 1, 1, lambda_cycle, lambda_cycle, lambda_id, lambda_id ], optimizer=optimizer)
and the error is
The name "Generator" is used 2 times in the model. All layer names should be unique.
These lines are the cause of the problem in the Generator and Discriminator methods as they're are invoked twice causing the duplicate name issue. Generate a unique name on every invocation or don't provide the name argument.
return Model(inputs=inputs, outputs=outputs, name='Generator')
return Model(inputs=inputs, outputs=outputs, name='Discriminator')
one possible solution:
return Model(inputs=inputs, outputs=outputs)

Make a custom loss function for mean intersection of union for regression in bounding boxes

I am trying to iterate over the batch one by one to calculate the mean intersection over union. but fit function showing this
Error: An operation has None for the gradient. Please make sure that all of your ops have a gradient defined (i.e. are differentiable). Common ops without gradient: K.argmax, K.round, K.eval.
Help as I am new to keras
#y_true shape: (None, 4)
import keras.backend as K
def iou(y_true, y_pred):
# determine the (x, y)-coordinates of the intersection rectangle
iou = 0
for i in range(K.int_shape(y_pred)[0]):
boxA = y_pred[i]
boxB = y_true[i]
xA = K.max(boxA[0], boxB[0])
yA = K.max(boxA[2], boxB[2])
xB = K.min(boxA[1], boxB[1])
yB = K.min(boxA[3], boxB[3])
interArea = K.max(0, xB - xA + 1) * K.max(0, yB - yA + 1)
boxAArea = (boxA[1] - boxA[0] + 1) * (boxA[3] - boxA[2] + 1)
boxBArea = (boxB[1] - boxB[0] + 1) * (boxB[3] - boxB[2] + 1)
iou += interArea / float(boxAArea + boxBArea - interArea)
#MEAN
mean = iou/K.int_shape(y_pred)[0]
return 1-mean
model.compile(optimizer='adam', loss=iou, metrics=['accuracy'])
model.fit(x_train, y_train, epochs=20, batch_size = 50)
my model works fine with mean squared error as loss function. Model:
input_shape = (180, 240, 3)
model = Sequential([
Conv2D(32, (3, 3), input_shape=input_shape, padding='same',activation='relu'),
MaxPooling2D(pool_size=(2, 2), strides=(2, 2)),
BatchNormalization(),
Conv2D(64, (3, 3), activation='relu', padding='same'),
MaxPooling2D(pool_size=(2, 2), strides=(2, 2)),
BatchNormalization(),
Conv2D(128, (3, 3), activation='relu', padding='same',),
Conv2D(256, (3, 3), activation='relu', padding='same',),
MaxPooling2D(pool_size=(2, 2), strides=(2, 2)),
Conv2D(128, (3, 3), activation='relu', padding='same',),
MaxPooling2D(pool_size=(2, 2), strides=(2, 2)),
BatchNormalization(),
Flatten(),
Dense(4096, activation='relu'),
Dense(4096, activation='relu'),
Dense(4, activation='relu')
])
It means that all operations inside your custom loss function should be differentiable since otherwise the optimization procedure cannot be executed. To that end, you just need to check one by one which operation is a culprit in your code and substitute it with a Keras differentiable backend analogue or to find some other alternative.
Considering the provided code snippet, there may be several possible suggestions to make it work:
for-loop should be vectorized
since you are using max(0, ...) in order to get an intersection area, it may happen that it is constant 0 and no gradient is available so check if it is not stuck there
for mean calculation there is a ready-to-use Keras backend function K.mean
it is a good practice to bound the values in order to improve your optimization (e.g., to (0,1) range)

Input_shape with Conv2D error

I want to use the Keras Conv2D but got errors:
model.add(Conv2D(64, (2, 2), padding='valid', data_format='channels_last', input_shape=(1, 4, 4, 1)))
The Keras doc tells us that input shape is a 4D tensor, but it throws this error:
ValueError: Input 0 is incompatible with layer conv2d_1: expected ndim=4, found ndim=5.
I did some debugging and found there's a check to parameters in topology.py:
if spec.ndim is not None:
if K.ndim(x) != spec.ndim:
raise ValueError('Input ' + str(input_index) +
' is incompatible with layer ' +
self.name + ': expected ndim=' +
str(spec.ndim) + ', found ndim=' +
str(K.ndim(x)))
I found that x = Tensor("conv2d_1_input:0", shape=(?, 1, 4, 4, 1), dtype=float32) is a tensor with dim=5 and spec is an instance of InputSpec with dim=4, it never is equal. How to solve this problem?
The code :
def _build_model(self):
# Neural Net for Deep-Q learning Model
model = Sequential()
model.add(Conv2D(64, (2, 2), padding='valid', data_format='channels_last', input_shape=(1, 4, 4, 1)))
model.add(Conv2D(128, 3, strides=(1, 1), padding='valid'))
model.add(Flatten())
model.add(Dense(16, activation='relu'))
model.add(Dense(self.action_size, activation='linear'))
model.compile(loss='mse', optimizer=Adam(lr=self.learning_rate))
return model
Try this:
model.add(Conv2D(64, (2, 2), padding='valid', data_format='channels_last', input_shape=(4, 4, 1)))
The Convolutional2D layer expects #samples * height * width * channels. The number of samples is inferred from your model.fit() function where you feed in you data.
If you look at MNIST as the simplest example, this works:
model = Sequential()
model.add(Conv2D(32, (3, 3), padding='same', input_shape=(28, 28, 1)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
...
history = model.fit(X-train, y_train, batch_size=32, epochs=1)

Resources