I have been working with CNN model recently. I always get loss nan for this model? How do I solve this?
My Model..
def CNN_Model(inputshape):
model = tf.keras.models.Sequential([
tf.keras.layers.Conv2D(96, kernel_size = (7, 7), strides= 2, activation='relu',kernel_initializer='glorot_uniform',input_shape = inputshape),
tf.keras.layers.MaxPooling2D((3,3), strides=(2,2)),
tf.keras.layers.ZeroPadding2D((2, 2), data_format="channels_last"),
#tf.keras.layers.Lambda(lambda x: tf.image.per_image_standardization(x)),
tf.keras.layers.Conv2D(256,kernel_size = (5, 5), strides= 1, activation='relu'),
tf.keras.layers.MaxPooling2D((3,3), strides=(2,2)),
#tf.keras.layers.Lambda(lambda x: tf.image.per_image_standardization(x)),
tf.keras.layers.Conv2D(384,kernel_size = (3, 3), activation='relu',strides=1),
tf.keras.layers.Conv2D(256, kernel_size = (3, 3), activation='relu',strides=1),
tf.keras.layers.MaxPooling2D((3,3), strides=(2,2)),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(1024, kernel_regularizer=l2(0.0005), activation='relu'),
tf.keras.layers.Dropout(0.5),
tf.keras.layers.Dense(1024),
tf.keras.layers.Dense(40, activation='softmax')
])
return model
My loss function
def contrastive_loss(y_true, y_pred):
'''Contrastive loss from Hadsell-et-al.'06
http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf
'''
margin = 1
return K.mean(y_true * K.square(y_pred) + (1 - y_true) * K.square(K.maximum(margin - y_pred, 0)))
I tried to to change the layer config but nothing worked. Here
I have a ResNet based CNN for classifying images of cats and dogs. I'm loading prelearned weights in order to speed up training. Only the last few fully connected layers are trained by me, the others are frozen. Training is quite fast, achieving 97% accuracy on the testing set after a minute or so. The data set in this case is broken up into 80% for training (20,000 images), 10% validation (2,500 images) and 10% testing (2,500 images).
The problem arises whe I try to implement cross validation on the training set. The accuracy on the training set is improving but not on the validation set. I'm new to ML but have spent a few hours over the past few days trying to get this sorted and haven't got anywhere. I'd be very appreciative of any input you'd have to offer.
Below is the code. The fist section is the code that works fine, with what I'm calling standard validation. The second section is the troublesome components.
1. Code that works fine:
Initialize data generators
train_datagen = ImageDataGenerator(zoom_range=0.15,width_shift_range=0.2,height_shift_range=0.2,shear_range=0.15)
test_datagen = ImageDataGenerator()
val_datagen = ImageDataGenerator()
Flow from directory
train_generator = train_datagen.flow_from_directory(train_path,target_size=(224, 224),batch_size=32,shuffle=True,class_mode='binary')
test_generator = test_datagen.flow_from_directory(test_path,target_size=(224,224),batch_size=32,shuffle=False,class_mode='binary')
val_generator = val_datagen.flow_from_directory(val_path,target_size=(224,224),batch_size=32,shuffle=False,class_mode='binary')
Define Identity block
def identity_block(X, f, filters, stage, block):
conv_name_base = 'res' + str(stage) + block + '_branch'
bn_name_base = 'bn' + str(stage) + block + '_branch'
F1, F2, F3 = filters
X_shortcut = X
X = Conv2D(filters=F1, kernel_size=(1, 1), strides=(1, 1), padding='valid', name=conv_name_base + '2a', kernel_initializer=glorot_uniform(seed=0))(X)
X = BatchNormalization(axis=3, name=bn_name_base + '2a')(X)
X = Activation('relu')(X)
X = Conv2D(filters=F2, kernel_size=(f, f), strides=(1, 1), padding='same', name=conv_name_base + '2b', kernel_initializer=glorot_uniform(seed=0))(X)
X = BatchNormalization(axis=3, name=bn_name_base + '2b')(X)
X = Activation('relu')(X)
X = Conv2D(filters=F3, kernel_size=(1, 1), strides=(1, 1), padding='valid', name=conv_name_base + '2c', kernel_initializer=glorot_uniform(seed=0))(X)
X = BatchNormalization(axis=3, name=bn_name_base + '2c')(X)
X = Add()([X, X_shortcut])# SKIP Connection
X = Activation('relu')(X)
return X
Define convolutional block
def convolutional_block(X, f, filters, stage, block, s=2):
conv_name_base = 'res' + str(stage) + block + '_branch'
bn_name_base = 'bn' + str(stage) + block + '_branch'
F1, F2, F3 = filters
X_shortcut = X
X = Conv2D(filters=F1, kernel_size=(1, 1), strides=(s, s), padding='valid', name=conv_name_base + '2a', kernel_initializer=glorot_uniform(seed=0))(X)
X = BatchNormalization(axis=3, name=bn_name_base + '2a')(X)
X = Activation('relu')(X)
X = Conv2D(filters=F2, kernel_size=(f, f), strides=(1, 1), padding='same', name=conv_name_base + '2b', kernel_initializer=glorot_uniform(seed=0))(X)
X = BatchNormalization(axis=3, name=bn_name_base + '2b')(X)
X = Activation('relu')(X)
X = Conv2D(filters=F3, kernel_size=(1, 1), strides=(1, 1), padding='valid', name=conv_name_base + '2c', kernel_initializer=glorot_uniform(seed=0))(X)
X = BatchNormalization(axis=3, name=bn_name_base + '2c')(X)
X_shortcut = Conv2D(filters=F3, kernel_size=(1, 1), strides=(s, s), padding='valid', name=conv_name_base + '1', kernel_initializer=glorot_uniform(seed=0))(X_shortcut)
X_shortcut = BatchNormalization(axis=3, name=bn_name_base + '1')(X_shortcut)
X = Add()([X, X_shortcut])
X = Activation('relu')(X)
return X
Define ResNet50
def ResNet50(input_shape=(224, 224, 3)):
X_input = Input(input_shape)
X = ZeroPadding2D((3, 3))(X_input)
X = Conv2D(64, (7, 7), strides=(2, 2), name='conv1', kernel_initializer=glorot_uniform(seed=0))(X)
X = BatchNormalization(axis=3, name='bn_conv1')(X)
X = Activation('relu')(X)
X = MaxPooling2D((3, 3), strides=(2, 2))(X)
X = convolutional_block(X, f=3, filters=[64, 64, 256], stage=2, block='a', s=1)
X = identity_block(X, 3, [64, 64, 256], stage=2, block='b')
X = identity_block(X, 3, [64, 64, 256], stage=2, block='c')
X = convolutional_block(X, f=3, filters=[128, 128, 512], stage=3, block='a', s=2)
X = identity_block(X, 3, [128, 128, 512], stage=3, block='b')
X = identity_block(X, 3, [128, 128, 512], stage=3, block='c')
X = identity_block(X, 3, [128, 128, 512], stage=3, block='d')
X = convolutional_block(X, f=3, filters=[256, 256, 1024], stage=4, block='a', s=2)
X = identity_block(X, 3, [256, 256, 1024], stage=4, block='b')
X = identity_block(X, 3, [256, 256, 1024], stage=4, block='c')
X = identity_block(X, 3, [256, 256, 1024], stage=4, block='d')
X = identity_block(X, 3, [256, 256, 1024], stage=4, block='e')
X = identity_block(X, 3, [256, 256, 1024], stage=4, block='f')
X = X = convolutional_block(X, f=3, filters=[512, 512, 2048], stage=5, block='a', s=2)
X = identity_block(X, 3, [512, 512, 2048], stage=5, block='b')
X = identity_block(X, 3, [512, 512, 2048], stage=5, block='c')
X = AveragePooling2D(pool_size=(2, 2), padding='same')(X)
model = Model(inputs=X_input, outputs=X, name='ResNet50')
return model
Define base model - Prelearned weights are loaded to this model
base_model = ResNet50(input_shape=(224, 224, 3))
Define head model - prelearned weights are not loaded to this model
headModel = base_model.output
headModel = Flatten()(headModel)
headModel=Dense(256, activation='relu', name='fc1',kernel_initializer=glorot_uniform(seed=0))(headModel)
headModel=Dense(128, activation='relu', name='fc2',kernel_initializer=glorot_uniform(seed=0))(headModel)
headModel = Dense( 1,activation='sigmoid', name='fc3',kernel_initializer=glorot_uniform(seed=0)(headModel)
Create ResNet50 model
model = Model(inputs=base_model.input, outputs=headModel)
Load prelearned weights to base model
base_model.load_weights("/content/drive/MyDrive/dogs-vs-cats.zip (Unzipped Files)/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5")
Make sure prelearned weights are not trainable
for layer in base_model.layers:
layer.trainable = False
Compile
from keras import losses
from keras import optimizers
from keras import metrics
model.compile(loss = 'binary_crossentropy', optimizer = 'sgd', metrics = ['accuracy'])
Train model
training_history = model.fit(train_generator,validation_data=val_generator,epochs=1,verbose=1,callbacks=[mc,es])
This works fine. val_accuracy = 0.9716
625/625 [==============================] - ETA: 0s - loss: 0.1199 - accuracy: 0.9547
Epoch 1:
val_accuracy improved from -inf to 0.97160, saving model to /content/drive/My Drive/best_model.h5
625/625 [==============================] - 275s 422ms/step - loss: 0.1199 - accuracy: 0.9547 - val_loss: 0.0729 - val_accuracy: 0.9716
Test Accuracy = 98.199
_, acc = model.evaluate(test_generator, verbose = 1)
print('Accuracy: %.3f' % (acc * 100.0))
79/79 [==============================] - 10s 120ms/step - loss: 0.0445 - accuracy: 0.9820
Accuracy: 98.199`
2. Cross validation, training and testing does not work:
Put training images and labels into an array
x=np.concatenate([train_generator.next()[0] for i in range(train_generator.__len__())])
y=np.concatenate([train_generator.next()[1] for i in range(train_generator.__len__())])
Use ski-learn for cross validation
from sklearn.model_selection import StratifiedKFold
kfold = StratifiedKFold(n_splits=3, shuffle=True, random_state=None)
Split into train and test sets & train model
for train, test in kfold.split(x, y):
# Create model copy - same as model that works (as far as I'm aware...)
base_model_copy = ResNet50(input_shape=(224, 224, 3))
head_model_copy = base_model_copy.output
head_model_copy = Flatten()(head_model_copy)
head_model_copy=Dense(256, activation='relu', name='fc1',kernel_initializer=glorot_uniform(seed=0))(head_model_copy)
head_model_copy=Dense(128, activation='relu', name='fc2',kernel_initializer=glorot_uniform(seed=0))(head_model_copy)
head_model_copy = Dense( 1,activation='sigmoid', name='fc3',kernel_initializer=glorot_uniform(seed=0))(head_model_copy)
model_copy = Model(inputs=base_model_copy.input, outputs=head_model_copy)
# load prelearned weights again
base_model_copy.load_weights("/content/drive/MyDrive/dogs-vs-cats.zip (Unzipped Files)/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5")
# make sure the prelearned weight layers are not trainable
for layer in base_model_copy.layers:
layer.trainable = False
#compile
from keras import losses
from keras import optimizers
from keras import metrics
model_copy.compile(loss = 'binary_crossentropy', optimizer = 'sgd', metrics = ['accuracy'])
trainImgGen= ImageDataGenerator(zoom_range=0.15,width_shift_range=0.2,height_shift_range=0.2,shear_range=0.15)
testImgGen = ImageDataGenerator()
trainGen = trainImgGen.flow(x[train], y[train], batch_size=32, shuffle=True)
testGen = testImgGen.flow(x[test], y[test], batch_size=32, shuffle=True)
# training for cross validation
model_copy.fit(trainGen, validation_data=testGen, epochs=500, verbose=1)
# evaluate results on test set
_, acc = model.evaluate(testGen, verbose = 1)
print(acc)
Below are the results. I'd expect it to be closer to what I got using the standard validation technique prior. Any feedback would be appreciated.
Epoch 1/500
417/417 [==============================] - 139s 325ms/step - loss: 0.7239 - accuracy: 0.5001 - val_loss: 0.7060 - val_accuracy: 0.5025
Epoch 2/500
417/417 [==============================] - 134s 322ms/step - loss: 0.6985 - accuracy: 0.5166 - val_loss: 0.7049 - val_accuracy: 0.4936
Epoch 3/500
417/417 [==============================] - 135s 323ms/step - loss: 0.6923 - accuracy: 0.5251 - val_loss: 0.7034 - val_accuracy: 0.4971
Epoch 4/500
417/417 [==============================] - 134s 322ms/step - loss: 0.6898 - accuracy: 0.5376 - val_loss: 0.7043 - val_accuracy: 0.4867
Epoch 5/500
417/417 [==============================] - 134s 322ms/step - loss: 0.6861 - accuracy: 0.5431 - val_loss: 0.7110 - val_accuracy: 0.4980
Epoch 6/500
417/417 [==============================] - 135s 323ms/step - loss: 0.6813 - accuracy: 0.5572 - val_loss: 0.7078 - val_accuracy: 0.4980
Epoch 7/500
417/417 [==============================] - 135s 325ms/step - loss: 0.6769 - accuracy: 0.5680 - val_loss: 0.7222 - val_accuracy: 0.4908
Epoch 8/500
273/417 [==================>...........] - ETA: 44s - loss: 0.6716 - accuracy: 0.5768
I am trying to code a deep auto encoder in keras. My image shape is (4575,32,32,3) and targets are (4575,1)
Here's the function
def build_deep_autoencoder(img_shape, code_size):
H,W,C = img_shape
# encoder
encoder = Sequential()
encoder.add(L.InputLayer(img_shape))
encoder.add(ResNet50(include_top=False,pooling='avg'))
encoder.add(Flatten())
encoder.add(Dense(512, activation='relu'))
encoder.add(Dropout(0.5))
encoder.add(BatchNormalization())
encoder.add(Dense(256, activation='relu'))
encoder.add(Dropout(0.5))
encoder.add(BatchNormalization())
encoder.add(Dense(code_size))
# decoder
decoder = Sequential()
decoder.add(L.InputLayer((code_size,)))
encoder.add(Flatten())
decoder.add(Dense(2*2*256))
decoder.add(Reshape((2, 2, 256)))
decoder.add(Conv2DTranspose(filters=128, kernel_size=(3, 3), strides=2, activation='elu', padding='same'))
decoder.add(Conv2DTranspose(filters=64, kernel_size=(3, 3), strides=2, activation='elu', padding='same'))
decoder.add(Conv2DTranspose(filters=32, kernel_size=(3, 3), strides=2, activation='elu', padding='same'))
decoder.add(Conv2DTranspose(filters=3, kernel_size=(3, 3), strides=2, activation=None, padding='same'))
return encoder, decoder
encoder,decoder = build_deep_autoencoder(img_shape,code_size=2)
inp = L.Input(img_shape)
code = encoder(inp)
reconstruction = decoder(code)
autoencoder = tensorflow.keras.models.Model(inp,reconstruction)
encoder.summary()
autoencoder.compile('nadam','mse')
autoencoder.fit(x=X,y=y,epochs=10)
I am getting an error:
InvalidArgumentError: Incompatible shapes: [31,32,32,3] vs. [31,1]
[[{{node training_18/Nadam/gradients/loss_12/sequential_28_loss/MeanSquaredError/sub_grad/BroadcastGradientArgs}}]]
I am using tensorflow.python.keras
Any help would be appreciated.
I'm training a pytorch neural network on google colab to classify sign langauge alphabets of 29 classes in total.
We've been fixing the code by changing various params but it won't work anyway.
transform = transforms.Compose([
#gray scale
transforms.Grayscale(),
#resize
transforms.Resize((128,128)),
#converting to tensor
transforms.ToTensor(),
#normalize
transforms.Normalize( (0.1307,), (0.3081,)),
])
data_dir = 'data/train/asl_alphabet_train'
#dataset
full_dataset = datasets.ImageFolder(root=data_dir, transform=transform)
#train & test
train_size = int(0.8 * len(full_dataset))
test_size = len(full_dataset) - train_size
#splitting
train_dataset, test_dataset = torch.utils.data.random_split(full_dataset, [train_size, test_size])
trainloader = torch.utils.data.DataLoader(train_dataset , batch_size = 4, shuffle = True )
testloader = torch.utils.data.DataLoader(test_dataset , batch_size = 4, shuffle = False )
#neural net architecture
Net(
(conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(fc1): Linear(in_features=32768, out_features=128, bias=True)
(fc2): Linear(in_features=128, out_features=29, bias=True)
(dropout): Dropout(p=0.5)
)
loss_fn = nn.CrossEntropyLoss()
#optimizer
opt = optim.SGD(model.parameters(), lr=0.01)
def train(model, train_loader, optimizer, loss_fn, epoch, device):
#telling pytorch that training mode is on
model.train()
loss_epoch_arr = []
#epochs
for e in range(epoch):
# bach_no, data, target
for batch_idx, (data, target) in enumerate(train_loader):
#moving to GPU
#data, target = data.to(device), target.to(device)
#Making gradints zero
optimizer.zero_grad()
#generating output
output = model(data)
#calculating loss
loss = loss_fn(output, target)
#backward propagation
loss.backward()
#stepping optimizer
optimizer.step()
#printing at each 10th epoch
if batch_idx % 10 == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item()))
#de-allocating memory
del data,target,output
#torch.cuda.empty_cache()
#appending values
loss_epoch_arr.append(loss.item())
#plotting loss
plt.plot(loss_epoch_arr)
plt.show()
train(model, trainloader , opt, loss_fn, 10, device)
ValueError: Expected input batch_size (1) to match target batch_size
(4).
We're beginners in pytorch and trying to figure out what the problem is.
The most likely cause of this error relates to the value of in_features within the nn.Linear function
You haven't provided your full code for this.
One way to check for this is to add the following lines to you forward function (before x.view:
print('x_shape:',x.shape)
The result will be of the form [a,b,c,d]. in_features value should be equal to b*c*d
I'm using places365-standard datasets to train my Keras CNN VGG16 model. We change the output layer from 1000 categories to 10, which are 1.botanical_garden 2.cliff 3.creek 4.forest-broadleaf 5.islet 6.mountain 7.ocean 8.pier 9.skyscraper 10.temple-asia
Every category has 5,000 training images and the total of 50,000 training images.
The problem is after 10 epochs of training 50,000 images, we still can't break our accuracy through 20%.
We will really appreciate if anyone can give us advice about why our model's accuracy is so low, thank you very much.
The model is as follow:
model = Sequential()
model.add(ZeroPadding2D((1, 1), input_shape=(224,224,3)))
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(ZeroPadding2D((1, 1)))
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model.add(ZeroPadding2D((1, 1)))
model.add(Conv2D(128, (3, 3)))
model.add(Activation('relu'))
model.add(ZeroPadding2D((1, 1)))
model.add(Conv2D(128, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model.add(ZeroPadding2D((1, 1)))
model.add(Conv2D(256, (3, 3)))
model.add(Activation('relu'))
model.add(ZeroPadding2D((1, 1)))
model.add(Conv2D(256, (3, 3)))
model.add(Activation('relu'))
model.add(ZeroPadding2D((1, 1)))
model.add(Conv2D(256, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model.add(ZeroPadding2D((1, 1)))
model.add(Conv2D(512, (3, 3)))
model.add(Activation('relu'))
model.add(ZeroPadding2D((1, 1)))
model.add(Conv2D(512, (3, 3)))
model.add(Activation('relu'))
model.add(ZeroPadding2D((1, 1)))
model.add(Conv2D(512, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model.add(ZeroPadding2D((1, 1)))
model.add(Conv2D(512, (3, 3)))
model.add(Activation('relu'))
model.add(ZeroPadding2D((1, 1)))
model.add(Conv2D(512, (3, 3)))
model.add(Activation('relu'))
model.add(ZeroPadding2D((1, 1)))
model.add(Conv2D(512, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model.add(Flatten())
model.add(Dense(4096))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(4096))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(128))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(10))
model.add(Activation('softmax'))
sgd = SGD(lr=0.1, decay=1e-6, nesterov=True)
model.summary()
model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy'])
for i in range(10):
for j in range(50):
X = np.load( "C:/Users/firzen41616316/Desktop/numpydataKeras_1000x50/imgonehot_"+str((j+1)*1000)+".npy" )
Y = np.load( "C:/Users/firzen41616316/Desktop/numpydataKeras_1000x50/labelonehot_"+str((j+1)*1000)+".npy" )
model.fit(x = X, y = Y,
validation_split = 0.2,
epochs = 1,
verbose = 1)
print('Done training ', (j+1)*1000 ,' images')
print('Done training 50000 images, Epoch ', i ,' -------------')
Here is some parts of the print out:
Done training 47000 images
Train on 800 samples, validate on 200 samples
Epoch 1/1
800/800 [==============================] - 29s 36ms/step - loss: 2.3021 - acc: 0.1187 - val_loss: 2.3036 - val_acc: 0.1050
Done training 48000 images
Train on 800 samples, validate on 200 samples
Epoch 1/1
800/800 [==============================] - 29s 36ms/step - loss: 2.3036 - acc: 0.1037 - val_loss: 2.3056 - val_acc: 0.1100
Done training 49000 images
Train on 800 samples, validate on 200 samples
Epoch 1/1
800/800 [==============================] - 29s 36ms/step - loss: 2.3028 - acc: 0.1187 - val_loss: 2.3042 - val_acc: 0.1050
Done training 50000 images
Done training 50000 images, Epoch 9 -------------