Related
I have a ResNet based CNN for classifying images of cats and dogs. I'm loading prelearned weights in order to speed up training. Only the last few fully connected layers are trained by me, the others are frozen. Training is quite fast, achieving 97% accuracy on the testing set after a minute or so. The data set in this case is broken up into 80% for training (20,000 images), 10% validation (2,500 images) and 10% testing (2,500 images).
The problem arises whe I try to implement cross validation on the training set. The accuracy on the training set is improving but not on the validation set. I'm new to ML but have spent a few hours over the past few days trying to get this sorted and haven't got anywhere. I'd be very appreciative of any input you'd have to offer.
Below is the code. The fist section is the code that works fine, with what I'm calling standard validation. The second section is the troublesome components.
1. Code that works fine:
Initialize data generators
train_datagen = ImageDataGenerator(zoom_range=0.15,width_shift_range=0.2,height_shift_range=0.2,shear_range=0.15)
test_datagen = ImageDataGenerator()
val_datagen = ImageDataGenerator()
Flow from directory
train_generator = train_datagen.flow_from_directory(train_path,target_size=(224, 224),batch_size=32,shuffle=True,class_mode='binary')
test_generator = test_datagen.flow_from_directory(test_path,target_size=(224,224),batch_size=32,shuffle=False,class_mode='binary')
val_generator = val_datagen.flow_from_directory(val_path,target_size=(224,224),batch_size=32,shuffle=False,class_mode='binary')
Define Identity block
def identity_block(X, f, filters, stage, block):
conv_name_base = 'res' + str(stage) + block + '_branch'
bn_name_base = 'bn' + str(stage) + block + '_branch'
F1, F2, F3 = filters
X_shortcut = X
X = Conv2D(filters=F1, kernel_size=(1, 1), strides=(1, 1), padding='valid', name=conv_name_base + '2a', kernel_initializer=glorot_uniform(seed=0))(X)
X = BatchNormalization(axis=3, name=bn_name_base + '2a')(X)
X = Activation('relu')(X)
X = Conv2D(filters=F2, kernel_size=(f, f), strides=(1, 1), padding='same', name=conv_name_base + '2b', kernel_initializer=glorot_uniform(seed=0))(X)
X = BatchNormalization(axis=3, name=bn_name_base + '2b')(X)
X = Activation('relu')(X)
X = Conv2D(filters=F3, kernel_size=(1, 1), strides=(1, 1), padding='valid', name=conv_name_base + '2c', kernel_initializer=glorot_uniform(seed=0))(X)
X = BatchNormalization(axis=3, name=bn_name_base + '2c')(X)
X = Add()([X, X_shortcut])# SKIP Connection
X = Activation('relu')(X)
return X
Define convolutional block
def convolutional_block(X, f, filters, stage, block, s=2):
conv_name_base = 'res' + str(stage) + block + '_branch'
bn_name_base = 'bn' + str(stage) + block + '_branch'
F1, F2, F3 = filters
X_shortcut = X
X = Conv2D(filters=F1, kernel_size=(1, 1), strides=(s, s), padding='valid', name=conv_name_base + '2a', kernel_initializer=glorot_uniform(seed=0))(X)
X = BatchNormalization(axis=3, name=bn_name_base + '2a')(X)
X = Activation('relu')(X)
X = Conv2D(filters=F2, kernel_size=(f, f), strides=(1, 1), padding='same', name=conv_name_base + '2b', kernel_initializer=glorot_uniform(seed=0))(X)
X = BatchNormalization(axis=3, name=bn_name_base + '2b')(X)
X = Activation('relu')(X)
X = Conv2D(filters=F3, kernel_size=(1, 1), strides=(1, 1), padding='valid', name=conv_name_base + '2c', kernel_initializer=glorot_uniform(seed=0))(X)
X = BatchNormalization(axis=3, name=bn_name_base + '2c')(X)
X_shortcut = Conv2D(filters=F3, kernel_size=(1, 1), strides=(s, s), padding='valid', name=conv_name_base + '1', kernel_initializer=glorot_uniform(seed=0))(X_shortcut)
X_shortcut = BatchNormalization(axis=3, name=bn_name_base + '1')(X_shortcut)
X = Add()([X, X_shortcut])
X = Activation('relu')(X)
return X
Define ResNet50
def ResNet50(input_shape=(224, 224, 3)):
X_input = Input(input_shape)
X = ZeroPadding2D((3, 3))(X_input)
X = Conv2D(64, (7, 7), strides=(2, 2), name='conv1', kernel_initializer=glorot_uniform(seed=0))(X)
X = BatchNormalization(axis=3, name='bn_conv1')(X)
X = Activation('relu')(X)
X = MaxPooling2D((3, 3), strides=(2, 2))(X)
X = convolutional_block(X, f=3, filters=[64, 64, 256], stage=2, block='a', s=1)
X = identity_block(X, 3, [64, 64, 256], stage=2, block='b')
X = identity_block(X, 3, [64, 64, 256], stage=2, block='c')
X = convolutional_block(X, f=3, filters=[128, 128, 512], stage=3, block='a', s=2)
X = identity_block(X, 3, [128, 128, 512], stage=3, block='b')
X = identity_block(X, 3, [128, 128, 512], stage=3, block='c')
X = identity_block(X, 3, [128, 128, 512], stage=3, block='d')
X = convolutional_block(X, f=3, filters=[256, 256, 1024], stage=4, block='a', s=2)
X = identity_block(X, 3, [256, 256, 1024], stage=4, block='b')
X = identity_block(X, 3, [256, 256, 1024], stage=4, block='c')
X = identity_block(X, 3, [256, 256, 1024], stage=4, block='d')
X = identity_block(X, 3, [256, 256, 1024], stage=4, block='e')
X = identity_block(X, 3, [256, 256, 1024], stage=4, block='f')
X = X = convolutional_block(X, f=3, filters=[512, 512, 2048], stage=5, block='a', s=2)
X = identity_block(X, 3, [512, 512, 2048], stage=5, block='b')
X = identity_block(X, 3, [512, 512, 2048], stage=5, block='c')
X = AveragePooling2D(pool_size=(2, 2), padding='same')(X)
model = Model(inputs=X_input, outputs=X, name='ResNet50')
return model
Define base model - Prelearned weights are loaded to this model
base_model = ResNet50(input_shape=(224, 224, 3))
Define head model - prelearned weights are not loaded to this model
headModel = base_model.output
headModel = Flatten()(headModel)
headModel=Dense(256, activation='relu', name='fc1',kernel_initializer=glorot_uniform(seed=0))(headModel)
headModel=Dense(128, activation='relu', name='fc2',kernel_initializer=glorot_uniform(seed=0))(headModel)
headModel = Dense( 1,activation='sigmoid', name='fc3',kernel_initializer=glorot_uniform(seed=0)(headModel)
Create ResNet50 model
model = Model(inputs=base_model.input, outputs=headModel)
Load prelearned weights to base model
base_model.load_weights("/content/drive/MyDrive/dogs-vs-cats.zip (Unzipped Files)/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5")
Make sure prelearned weights are not trainable
for layer in base_model.layers:
layer.trainable = False
Compile
from keras import losses
from keras import optimizers
from keras import metrics
model.compile(loss = 'binary_crossentropy', optimizer = 'sgd', metrics = ['accuracy'])
Train model
training_history = model.fit(train_generator,validation_data=val_generator,epochs=1,verbose=1,callbacks=[mc,es])
This works fine. val_accuracy = 0.9716
625/625 [==============================] - ETA: 0s - loss: 0.1199 - accuracy: 0.9547
Epoch 1:
val_accuracy improved from -inf to 0.97160, saving model to /content/drive/My Drive/best_model.h5
625/625 [==============================] - 275s 422ms/step - loss: 0.1199 - accuracy: 0.9547 - val_loss: 0.0729 - val_accuracy: 0.9716
Test Accuracy = 98.199
_, acc = model.evaluate(test_generator, verbose = 1)
print('Accuracy: %.3f' % (acc * 100.0))
79/79 [==============================] - 10s 120ms/step - loss: 0.0445 - accuracy: 0.9820
Accuracy: 98.199`
2. Cross validation, training and testing does not work:
Put training images and labels into an array
x=np.concatenate([train_generator.next()[0] for i in range(train_generator.__len__())])
y=np.concatenate([train_generator.next()[1] for i in range(train_generator.__len__())])
Use ski-learn for cross validation
from sklearn.model_selection import StratifiedKFold
kfold = StratifiedKFold(n_splits=3, shuffle=True, random_state=None)
Split into train and test sets & train model
for train, test in kfold.split(x, y):
# Create model copy - same as model that works (as far as I'm aware...)
base_model_copy = ResNet50(input_shape=(224, 224, 3))
head_model_copy = base_model_copy.output
head_model_copy = Flatten()(head_model_copy)
head_model_copy=Dense(256, activation='relu', name='fc1',kernel_initializer=glorot_uniform(seed=0))(head_model_copy)
head_model_copy=Dense(128, activation='relu', name='fc2',kernel_initializer=glorot_uniform(seed=0))(head_model_copy)
head_model_copy = Dense( 1,activation='sigmoid', name='fc3',kernel_initializer=glorot_uniform(seed=0))(head_model_copy)
model_copy = Model(inputs=base_model_copy.input, outputs=head_model_copy)
# load prelearned weights again
base_model_copy.load_weights("/content/drive/MyDrive/dogs-vs-cats.zip (Unzipped Files)/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5")
# make sure the prelearned weight layers are not trainable
for layer in base_model_copy.layers:
layer.trainable = False
#compile
from keras import losses
from keras import optimizers
from keras import metrics
model_copy.compile(loss = 'binary_crossentropy', optimizer = 'sgd', metrics = ['accuracy'])
trainImgGen= ImageDataGenerator(zoom_range=0.15,width_shift_range=0.2,height_shift_range=0.2,shear_range=0.15)
testImgGen = ImageDataGenerator()
trainGen = trainImgGen.flow(x[train], y[train], batch_size=32, shuffle=True)
testGen = testImgGen.flow(x[test], y[test], batch_size=32, shuffle=True)
# training for cross validation
model_copy.fit(trainGen, validation_data=testGen, epochs=500, verbose=1)
# evaluate results on test set
_, acc = model.evaluate(testGen, verbose = 1)
print(acc)
Below are the results. I'd expect it to be closer to what I got using the standard validation technique prior. Any feedback would be appreciated.
Epoch 1/500
417/417 [==============================] - 139s 325ms/step - loss: 0.7239 - accuracy: 0.5001 - val_loss: 0.7060 - val_accuracy: 0.5025
Epoch 2/500
417/417 [==============================] - 134s 322ms/step - loss: 0.6985 - accuracy: 0.5166 - val_loss: 0.7049 - val_accuracy: 0.4936
Epoch 3/500
417/417 [==============================] - 135s 323ms/step - loss: 0.6923 - accuracy: 0.5251 - val_loss: 0.7034 - val_accuracy: 0.4971
Epoch 4/500
417/417 [==============================] - 134s 322ms/step - loss: 0.6898 - accuracy: 0.5376 - val_loss: 0.7043 - val_accuracy: 0.4867
Epoch 5/500
417/417 [==============================] - 134s 322ms/step - loss: 0.6861 - accuracy: 0.5431 - val_loss: 0.7110 - val_accuracy: 0.4980
Epoch 6/500
417/417 [==============================] - 135s 323ms/step - loss: 0.6813 - accuracy: 0.5572 - val_loss: 0.7078 - val_accuracy: 0.4980
Epoch 7/500
417/417 [==============================] - 135s 325ms/step - loss: 0.6769 - accuracy: 0.5680 - val_loss: 0.7222 - val_accuracy: 0.4908
Epoch 8/500
273/417 [==================>...........] - ETA: 44s - loss: 0.6716 - accuracy: 0.5768
My code is available at
https://www.kaggle.com/aman0807/vgg-cifar10/execution
As you can see, the validation loss diverges from the start of the training.
I have tried with Adam optimizer as well as SGD optimizer. I cannot figure out what it is that I am doing incorrectly. Please point me in the right direction.
Edit: Included entire code here
# Importing Dependencies
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision.datasets import CIFAR10
from torchvision import transforms
from torch.utils.data import DataLoader
from tqdm import tqdm
from datetime import datetime
# Defining model
arch = [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M']
class VGGNet(nn.Module):
def __init__(self, in_channels, num_classes):
super().__init__()
self.in_channels = in_channels
self.conv_layers = self.create_conv_layers(arch)
self.fcs = nn.Sequential(
nn.Linear(in_features=512*1*1, out_features=4096),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(in_features=4096, out_features=4096),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(4096, num_classes)
)
def forward(self, x):
x = self.conv_layers(x)
# print(x.shape)
x = x.reshape(x.shape[0], -1)
x = self.fcs(x)
return x
def create_conv_layers(self, arch):
layers = []
in_channels = self.in_channels
for x in arch:
if type(x) == int:
out_channels = x
layers += [nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
nn.BatchNorm2d(x),
nn.ReLU(),
]
in_channels = x
elif x =='M':
layers += [nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))]
return nn.Sequential(*layers)
# Hyperparameters and settings
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)
TRAIN_BATCH_SIZE = 64
VAL_BATCH_SIZE = 16
EPOCHS = 50
train_data = CIFAR10(root=".", train=True,
transform=transforms.Compose([transforms.ToTensor()]), download=True)
# print(len(train_data))
val_data = CIFAR10(root=".", train=False,
transform=transforms.Compose([transforms.ToTensor()]), download=True)
# print(len(val_data))
train_loader = DataLoader(train_data, batch_size=TRAIN_BATCH_SIZE, shuffle=True, num_workers=8)
val_loader = DataLoader(val_data, batch_size=VAL_BATCH_SIZE, shuffle=True, num_workers=8)
# print(len(train_loader))
# print(len(val_loader))
num_train_batches = int(len(train_data)/TRAIN_BATCH_SIZE)
num_val_batches = int(len(val_data)/VAL_BATCH_SIZE)
# Training and Val Loop
model = VGGNet(3, 10).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
# optim = torch.optim.Adam(model.parameters(), lr=0.01)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=10, verbose=True)
# save_path = os.path.join(r"trained_models", f'{datetime.now().strftime("%m%d_%H%M%S")}.pth')
def train_val():
for epoch in range(1, EPOCHS+1):
print(f"Epoch: {epoch}/20")
model.train()
total_loss = 0
for data in train_loader:
image, target = data[0], data[1]
image, target = image.to(device), target.to(device)
optimizer.zero_grad()
output = model(image)
loss = criterion(output, target)
total_loss += loss.item()
loss.backward()
optimizer.step()
print(f"Loss : {total_loss / num_train_batches}")
save_path = os.path.join(r"trained_models", f'{datetime.now().strftime("%m%d_%H%M%S")}_{epoch}.pth')
if epoch % 5 == 0:
torch.save(model.state_dict(), save_path)
with torch.no_grad():
model.eval()
total_val_loss = 0
for data in val_loader:
image, target = data[0], data[1]
image, target = image.to(device), target.to(device)
output = model(image)
val_loss = criterion(output, target)
total_val_loss += val_loss
total_val_loss = total_val_loss/num_val_batches
print(f"Val Loss: {total_val_loss}")
scheduler.step(total_val_loss)
The output is :
> Epoch: 1/20 Loss : 1.3286100650795292 Val Loss: 1.3787670135498047
> Epoch: 2/20 Loss : 0.822020811685832 Val Loss: 0.948610246181488
> Epoch: 3/20 Loss : 0.6018326392113476 Val Loss: 0.9581698775291443
> Epoch: 4/20 Loss : 0.47134833609764004 Val Loss: 1.2446043491363525
> Epoch: 5/20 Loss : 0.35625831704114524 Val Loss: 0.8038020730018616
> Epoch: 6/20 Loss : 0.27602518926566605 Val Loss: 0.6090452075004578
> Epoch: 7/20 Loss : 0.21279048924686128 Val Loss: 0.6626076102256775
> Epoch: 8/20 Loss : 0.16782210255280214 Val Loss: 0.6386368870735168
> Epoch: 9/20 Loss : 0.12904227719518205 Val Loss: 0.8135524988174438
> Epoch: 10/20 Loss : 0.10961572862077902 Val Loss: 0.727300226688385
> Epoch: 11/20 Loss : 0.08377284912137456 Val Loss: 0.7346469163894653
> Epoch: 12/20 Loss : 0.07044737199237916 Val Loss: 0.8241418600082397
> Epoch: 13/20 Loss : 0.06040401630707726 Val Loss: 0.8411757349967957
> Epoch: 14/20 Loss : 0.05157513573171604 Val Loss: 0.9980310201644897
> Epoch: 15/20 Loss : 0.04703645325243019 Val Loss: 0.7441162467002869
> Epoch: 16/20 Loss : 0.039386494244257594 Val Loss: 0.7185537219047546
> Epoch: 17/20 Loss : 0.0361507039006692 Val Loss: 0.7251362800598145
> Epoch 17: reducing learning rate of group 0 to 1.0000e-03. Epoch:
> 18/20 Loss : 0.010131187833331622 Val Loss: 0.6911444067955017 Epoch:
> 19/20 Loss : 0.004273188020082817 Val Loss: 0.6758599877357483 Epoch:
> 20/20 Loss : 0.0023282255553611917 Val Loss: 0.6790934801101685 Epoch:
> 21/20 Loss : 0.002249847758697408 Val Loss: 0.6877240538597107 Epoch:
> 22/20 Loss : 0.0017385115527510854 Val Loss: 0.6955451369285583 Epoch:
> 23/20 Loss : 0.0015288436127294692 Val Loss: 0.6895047426223755 Epoch:
> 24/20 Loss : 0.0013521527944272392 Val Loss: 0.6986600160598755 Epoch:
> 25/20 Loss : 0.001302041847793007 Val Loss: 0.7062056660652161 Epoch:
> 26/20 Loss : 0.0009422088254753626 Val Loss: 0.721610426902771 Epoch:
> 27/20 Loss : 0.00092220353266205 Val Loss: 0.7185065150260925 Epoch:
> 28/20 Loss : 0.000855279816558849 Val Loss: 0.7262870669364929 Epoch
> 28: reducing learning rate of group 0 to 1.0000e-04. Epoch: 29/20 Loss
> : 0.0009319903464029908 Val Loss: 0.7129291892051697 Epoch: 30/20 Loss
> : 0.0009213638452758586 Val Loss: 0.7219230532646179 Epoch: 31/20 Loss
> : 0.000790543920696094 Val Loss: 0.7180697321891785 Epoch: 32/20 Loss
> : 0.0007001494628562334 Val Loss: 0.7132201194763184 Epoch: 33/20 Loss
> : 0.000848956296528779 Val Loss: 0.720444917678833 Epoch: 34/20 Loss :
> 0.0007309111221651813 Val Loss: 0.7152464389801025 Epoch: 35/20 Loss : 0.0008760697004345624 Val Loss: 0.7213227152824402 Epoch: 36/20 Loss : 0.0007820251893755433 Val Loss: 0.7167501449584961 Epoch: 37/20 Loss : 0.0008780398232813891 Val Loss: 0.7176992297172546 Epoch: 38/20 Loss : 0.0006976223276931253 Val Loss: 0.7317324280738831 Epoch: 39/20 Loss : 0.0007082251550411605 Val Loss: 0.7190949320793152 Epoch 39: reducing learning rate of group 0 to 1.0000e-05. Epoch: 40/20 Loss :
> 0.0007200245124846 Val Loss: 0.7223398089408875 Epoch: 41/20 Loss : 0.0007270275670335591 Val Loss: 0.721064031124115 Epoch: 42/20 Loss : 0.0008191512905424633 Val Loss: 0.7175146341323853 Epoch: 43/20 Loss : 0.0007468872463225763 Val Loss: 0.7313894629478455 Epoch: 44/20 Loss : 0.0007858102934361841 Val Loss: 0.7234065532684326 Epoch: 45/20 Loss : 0.0008550570492202036 Val Loss: 0.7210699319839478 Epoch: 46/20 Loss : 0.0007921795028490795 Val Loss: 0.7209402322769165 Epoch: 47/20 Loss : 0.0007907233434224173 Val Loss: 0.7207257747650146 Epoch: 48/20 Loss : 0.0008106642895507929 Val Loss: 0.7174465656280518 Epoch: 49/20 Loss : 0.0008104693277185383 Val Loss: 0.7192382216453552 Epoch: 50/20 Loss : 0.0007664988370204754 Val Loss: 0.722599446773529 Epoch 50: reducing learning rate of group 0 to 1.0000e-06.
This is my code for this program. it is working correctly but suddenly not work please anyone can solve this problem
model = Sequential()
print(nb_filters[0], 'filters')
print('input shape', img_rows, 'rows', img_cols, 'cols', patch_size, 'patchsize')
model.add(Convolution3D(
nb_filters[0],
kernel_dim1=1, # depth
kernel_dim2=nb_conv[0], # rows
kernel_dim3=nb_conv[1], # cols
input_shape=(1, img_rows, img_cols, patch_size),
activation='relu'
))
model.add(MaxPooling3D(pool_size=(1, nb_pool[0], nb_pool[0])))
model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(128, init='normal', activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(nb_classes,init='normal'))
model.add(Activation('softmax'))
#optimizer adam,sgd,RMSprop,Adagrad,Adadelta,Nadam,
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
This error creates in my program. what is the problem I don't understand to solve this I search many times but not solve this problem?
--------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-112-671e85975992> in <module>
13 x = Dense(nb_classes, activation='softmax')(x)
14
---> 15 custom_model = Model(input=resnet_model.input, output=x)
16
17 for layer in custom_model.layers[:7]:
/usr/local/lib/python3.8/dist-packages/tensorflow/python/training/tracking/base.py in _method_wrapper(self, *args, **kwargs)
455 self._self_setattr_tracking = False # pylint: disable=protected-access
456 try:
--> 457 result = method(self, *args, **kwargs)
458 finally:
459 self._self_setattr_tracking = previous_value # pylint: disable=protected-access
/usr/local/lib/python3.8/dist-packages/tensorflow/python/keras/engine/training.py in __init__(self, *args, **kwargs)
259 # self.trainable_weights
260 # self.non_trainable_weights
--> 261 generic_utils.validate_kwargs(kwargs, {'trainable', 'dtype', 'dynamic',
262 'name', 'autocast'})
263 super(Model, self).__init__(**kwargs)
/usr/local/lib/python3.8/dist-packages/tensorflow/python/keras/utils/generic_utils.py in validate_kwargs(kwargs, allowed_kwargs, error_message)
776 for kwarg in kwargs:
777 if kwarg not in allowed_kwargs:
--> 778 raise TypeError(error_message, kwarg)
779
780
TypeError: ('Keyword argument not understood:', 'input')
As suggested by Dr. Snoopy, the arguments to the tf.keras.Model are inputs and outputs but you are passing it as input and output respectively in custom_model = Model(input=resnet_model.input, output=x).
Code to reproduce the error -
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
X1 = tf.constant([2, 3, 4, 5, 6, 7])
X2 = tf.constant([2, 3, 4, 5, 6, 7])
yTrain = tf.constant([4, 6, 8, 10, 12, 14])
input1 = keras.Input(shape=(1,))
input2 = keras.Input(shape=(1,))
x = layers.concatenate([input1, input2])
x = layers.Dense(8, activation='relu')(x)
outputs = layers.Dense(2)(x)
mlp = keras.Model(input = [input1, input2], output = outputs)
mlp.summary()
mlp.compile(loss='mean_squared_error',
optimizer='adam', metrics=['accuracy'])
mlp.fit([X1, X2], yTrain, batch_size=1, epochs=10, validation_split=0.2,
shuffle=True)
mlp.evaluate([X1, X2], yTrain)
Output -
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-3-bec9ebbd1faf> in <module>()
14 x = layers.Dense(8, activation='relu')(x)
15 outputs = layers.Dense(2)(x)
---> 16 mlp = keras.Model(input = [input1, input2], output = outputs)
17
18 mlp.summary()
2 frames
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/utils/generic_utils.py in validate_kwargs(kwargs, allowed_kwargs, error_message)
776 for kwarg in kwargs:
777 if kwarg not in allowed_kwargs:
--> 778 raise TypeError(error_message, kwarg)
779
780
TypeError: ('Keyword argument not understood:', 'input')
To fix the error, change the arguments as inputs and outputs.
Fixed Code -
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
X1 = tf.constant([2, 3, 4, 5, 6, 7])
X2 = tf.constant([2, 3, 4, 5, 6, 7])
yTrain = tf.constant([4, 6, 8, 10, 12, 14])
input1 = keras.Input(shape=(1,))
input2 = keras.Input(shape=(1,))
x = layers.concatenate([input1, input2])
x = layers.Dense(8, activation='relu')(x)
outputs = layers.Dense(2)(x)
mlp = keras.Model(inputs = [input1, input2], outputs = outputs)
mlp.summary()
mlp.compile(loss='mean_squared_error',
optimizer='adam', metrics=['accuracy'])
mlp.fit([X1, X2], yTrain, batch_size=1, epochs=10, validation_split=0.2,
shuffle=True)
mlp.evaluate([X1, X2], yTrain)
Output -
Model: "functional_5"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_6 (InputLayer) [(None, 1)] 0
__________________________________________________________________________________________________
input_7 (InputLayer) [(None, 1)] 0
__________________________________________________________________________________________________
concatenate_34 (Concatenate) (None, 2) 0 input_6[0][0]
input_7[0][0]
__________________________________________________________________________________________________
dense_4 (Dense) (None, 8) 24 concatenate_34[0][0]
__________________________________________________________________________________________________
dense_5 (Dense) (None, 2) 18 dense_4[0][0]
==================================================================================================
Total params: 42
Trainable params: 42
Non-trainable params: 0
__________________________________________________________________________________________________
Epoch 1/10
4/4 [==============================] - 0s 32ms/step - loss: 54.3236 - accuracy: 0.0000e+00 - val_loss: 169.3114 - val_accuracy: 0.0000e+00
Epoch 2/10
4/4 [==============================] - 0s 6ms/step - loss: 53.4965 - accuracy: 0.0000e+00 - val_loss: 167.0008 - val_accuracy: 0.0000e+00
Epoch 3/10
4/4 [==============================] - 0s 6ms/step - loss: 52.7413 - accuracy: 0.0000e+00 - val_loss: 164.6473 - val_accuracy: 0.0000e+00
Epoch 4/10
4/4 [==============================] - 0s 6ms/step - loss: 51.8159 - accuracy: 0.0000e+00 - val_loss: 162.4427 - val_accuracy: 0.0000e+00
Epoch 5/10
4/4 [==============================] - 0s 6ms/step - loss: 51.0917 - accuracy: 0.0000e+00 - val_loss: 160.1798 - val_accuracy: 0.0000e+00
Epoch 6/10
4/4 [==============================] - 0s 6ms/step - loss: 50.4425 - accuracy: 0.0000e+00 - val_loss: 157.8355 - val_accuracy: 0.0000e+00
Epoch 7/10
4/4 [==============================] - 0s 6ms/step - loss: 49.5709 - accuracy: 0.0000e+00 - val_loss: 155.6147 - val_accuracy: 0.0000e+00
Epoch 8/10
4/4 [==============================] - 0s 6ms/step - loss: 48.7816 - accuracy: 0.0000e+00 - val_loss: 153.4298 - val_accuracy: 0.0000e+00
Epoch 9/10
4/4 [==============================] - 0s 6ms/step - loss: 47.9975 - accuracy: 0.0000e+00 - val_loss: 151.2858 - val_accuracy: 0.0000e+00
Epoch 10/10
4/4 [==============================] - 0s 6ms/step - loss: 47.3943 - accuracy: 0.0000e+00 - val_loss: 149.0254 - val_accuracy: 0.0000e+00
1/1 [==============================] - 0s 2ms/step - loss: 80.9333 - accuracy: 0.0000e+00
[80.93333435058594, 0.0]
I am training the UNET image segmentation network on brain tumor dataset from figshare, it is training well, training loss and training dice score both are changing accordingly or in the same tone with validation loss and validation dice score. Means no question of Overfitting. But after approximately 40 epochs no improvement in performance measures. It's toggling around loss 0.58 and dice score of 0.47. How to solve this? Please suggest me.
Below is my UNET network-
def unet(pretrained_weights = None,input_size = (512,512,3)):
inputs = Input(input_size)
conv1 = Convolution2D(64, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(inputs)
conv1 = BatchNormalization()(conv1)
#conv1 = Dropout(0.2)(conv1)
conv1 = Convolution2D(64, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv1)
conv1 = BatchNormalization()(conv1)
#conv1 = Dropout(0.2)(conv1)
pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)
conv2 = Convolution2D(128, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(pool1)
conv2 = BatchNormalization()(conv2)
#conv2 = Dropout(0.1)(conv2)
conv2 = Convolution2D(128, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv2)
conv2 = BatchNormalization()(conv2)
#conv2 = Dropout(0.1)(conv2)
pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)
conv3 = Convolution2D(256, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(pool2)
conv3 = BatchNormalization()(conv3)
#conv3 = Dropout(0.1)(conv3)
conv3 = Convolution2D(256, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv3)
conv3 = BatchNormalization()(conv3)
#conv3 = Dropout(0.1)(conv3)
pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)
conv4 = Convolution2D(512, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(pool3)
conv4 = BatchNormalization()(conv4)
#conv4 = Dropout(0.1)(conv4)
conv4 = Convolution2D(512, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv4)
conv4 = BatchNormalization()(conv4)
#conv4 = Dropout(0.5)(conv4)
pool4 = MaxPooling2D(pool_size=(2, 2))(conv4)
conv5 = Convolution2D(1024, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(pool4)
conv5 = BatchNormalization()(conv5)
#conv5 = Dropout(0.1)(conv5)
conv5 = Convolution2D(1024, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv5)
conv5 = BatchNormalization()(conv5)
#conv5 = Dropout(0.5)(conv5)
up6 = Convolution2D(512, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(UpSampling2D(size = (2,2))(conv5))
merge6 = concatenate([conv4,up6], axis = 3)
conv6 = Convolution2D(512, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(merge6)
conv6 = BatchNormalization()(conv6)
#conv6 = Dropout(0.1)(conv6)
conv6 = Convolution2D(512, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv6)
conv6 = BatchNormalization()(conv6)
#conv6 = Dropout(0.1)(conv6)
up7 = Convolution2D(256, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(UpSampling2D(size = (2,2))(conv6))
merge7 = concatenate([conv3,up7], axis = 3)
conv7 = Convolution2D(256, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(merge7)
conv7 = BatchNormalization()(conv7)
#conv7 = Dropout(0.1)(conv7)
conv7 = Convolution2D(256, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv7)
conv7 = BatchNormalization()(conv7)
#conv7 = Dropout(0.1)(conv7)
up8 = Convolution2D(128, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(UpSampling2D(size = (2,2))(conv7))
merge8 = concatenate([conv2,up8], axis = 3)
conv8 = Convolution2D(128, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(merge8)
conv8 = BatchNormalization()(conv8)
#conv8 = Dropout(0.1)(conv8)
conv8 = Convolution2D(128, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv8)
conv8 = BatchNormalization()(conv8)
#conv8 = Dropout(0.1)(conv8)
up9 = Convolution2D(64, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(UpSampling2D(size = (2,2))(conv8))
merge9 = concatenate([conv1,up9], axis = 3)
conv9 = Convolution2D(64, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(merge9)
conv9 = BatchNormalization()(conv9)
#conv9 = Dropout(0.2)(conv9)
conv9 = Convolution2D(64, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv9)
conv9 = BatchNormalization()(conv9)
#conv9 = Dropout(0.2)(conv9)
conv9 = Convolution2D(2, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv9)
conv9 = BatchNormalization()(conv9)
#conv9 = Dropout(0.2)(conv9)
conv10 = Convolution2D(1, 1, activation = 'sigmoid')(conv9)
model = Model(input = inputs, output = conv10)
#model.summary()
if(pretrained_weights):
model.load_weights(pretrained_weights)
return model
Callback details initialized. Staring LR= 1e-4
callbacks = [EarlyStopping(monitor='val_loss',mode="min", patience=30,verbose=1,min_delta=1e-4),
ReduceLROnPlateau(monitor='val_loss',mode="min", factor=0.1,patience=8,verbose=1),
ModelCheckpoint(monitor='val_loss',mode="min",
filepath='weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-{epoch:03d}-{dice_coef:.6f}--{val_loss:.6f}.hdf5',save_weights_only=True, verbose=1),
CSVLogger('weights/anmol/1/UNET_mixed_loss_monitor_DC_new.csv')]
My user-defined Dice Score and loss functions. I have used dice_coef_loss here.
def dice_coef(y_true, y_pred, smooth=1):
y_true_f = K.flatten(y_true)
y_pred_f = K.flatten(y_pred)
intersection = K.sum(y_true_f * y_pred_f)
return (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)
def dice_loss(y_true, y_pred):
loss = 1 - dice_coef(y_true, y_pred)
return loss
def dice_coef_loss(y_true, y_pred):
return binary_crossentropy(y_true, y_pred) + dice_loss(y_true, y_pred)
Used 2605 images for training, 306 images for validation.
Train_image Train_mask
img_size = 512
image_args = dict(seed=seed,
batch_size=2,
shuffle=True,
class_mode=None,
target_size=(img_size, img_size),
color_mode='rgb')
mask_args = dict(seed=seed,
batch_size=2,
class_mode=None,
shuffle=True,
target_size=(img_size, img_size),
color_mode='grayscale')
DIR = 'raw/brain/'
image = 'images'
masks = 'masks'
# combine generators into one which yields image and masks
train_generator = zip(image_datagen.flow_from_directory(**image_args, directory=DIR+'train_'+image),
mask_datagen.flow_from_directory(**mask_args, directory=DIR+'train_'+masks))
validation_generator = zip(image_datagen.flow_from_directory(**image_args, directory=DIR+'validation_'+image),
mask_datagen.flow_from_directory(**mask_args, directory=DIR+'validation_'+masks))
model.fit_generator(train_generator, steps_per_epoch=1302, epochs=100, validation_data=validation_generator,validation_steps=153, callbacks=callbacks)
some training log shown below
Epoch 00041: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-041-0.466533--0.511900.hdf5
Epoch 42/100
1302/1302 [==============================] - 1063s 817ms/step - loss: 0.5939 - dice_coef: 0.4658 - val_loss: 0.5076 - val_dice_coef: 0.5430
Epoch 00042: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-042-0.465990--0.507603.hdf5
Epoch 43/100
1302/1302 [==============================] - 1062s 816ms/step - loss: 0.5928 - dice_coef: 0.4678 - val_loss: 0.5191 - val_dice_coef: 0.5270
Epoch 00043: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-043-0.467685--0.519115.hdf5
Epoch 44/100
1302/1302 [==============================] - 1063s 817ms/step - loss: 0.5966 - dice_coef: 0.4632 - val_loss: 0.5158 - val_dice_coef: 0.5364
Epoch 00044: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-044-0.463308--0.515760.hdf5
Epoch 45/100
1302/1302 [==============================] - 1064s 817ms/step - loss: 0.5892 - dice_coef: 0.4702 - val_loss: 0.4993 - val_dice_coef: 0.5507
Epoch 00045: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-045-0.470134--0.499294.hdf5
Epoch 46/100
1302/1302 [==============================] - 1063s 816ms/step - loss: 0.5960 - dice_coef: 0.4636 - val_loss: 0.5166 - val_dice_coef: 0.5329
Epoch 00046: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-046-0.463810--0.516552.hdf5
Epoch 47/100
1302/1302 [==============================] - 1065s 818ms/step - loss: 0.5920 - dice_coef: 0.4672 - val_loss: 0.5062 - val_dice_coef: 0.5427
Epoch 00047: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-047-0.467146--0.506242.hdf5
Epoch 48/100
1302/1302 [==============================] - 1063s 816ms/step - loss: 0.5938 - dice_coef: 0.4657 - val_loss: 0.5239 - val_dice_coef: 0.5277
Epoch 00048: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-048-0.465866--0.523923.hdf5
Epoch 49/100
1302/1302 [==============================] - 1064s 817ms/step - loss: 0.5962 - dice_coef: 0.4639 - val_loss: 0.5035 - val_dice_coef: 0.5434
Epoch 00049: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-049-0.463924--0.503518.hdf5
Epoch 50/100
1302/1302 [==============================] - 1063s 816ms/step - loss: 0.5854 - dice_coef: 0.4743 - val_loss: 0.5463 - val_dice_coef: 0.5066
Epoch 00050: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-050-0.474530--0.546343.hdf5
Epoch 51/100
1302/1302 [==============================] - 1063s 816ms/step - loss: 0.5840 - dice_coef: 0.4749 - val_loss: 0.5146 - val_dice_coef: 0.5360
Epoch 00051: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-051-0.475072--0.514581.hdf5
Epoch 52/100
1302/1302 [==============================] - 1064s 817ms/step - loss: 0.5852 - dice_coef: 0.4742 - val_loss: 0.5257 - val_dice_coef: 0.5256
Epoch 00052: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-052-0.474234--0.525729.hdf5
Epoch 53/100
1302/1302 [==============================] - 1065s 818ms/step - loss: 0.5857 - dice_coef: 0.4736 - val_loss: 0.5157 - val_dice_coef: 0.5315
Epoch 00053: ReduceLROnPlateau reducing learning rate to 9.999999747378752e-07.
Epoch 00053: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-053-0.473557--0.515651.hdf5
Epoch 54/100
1302/1302 [==============================] - 1065s 818ms/step - loss: 0.5852 - dice_coef: 0.4737 - val_loss: 0.5067 - val_dice_coef: 0.5421
Epoch 00054: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-054-0.473682--0.506671.hdf5
Epoch 55/100
1302/1302 [==============================] - 1065s 818ms/step - loss: 0.5903 - dice_coef: 0.4696 - val_loss: 0.4910 - val_dice_coef: 0.5571
Epoch 00055: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-055-0.469478--0.491024.hdf5
Epoch 56/100
1302/1302 [==============================] - 1065s 818ms/step - loss: 0.5876 - dice_coef: 0.4711 - val_loss: 0.5154 - val_dice_coef: 0.5340
Epoch 00056: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-056-0.471110--0.515441.hdf5
Epoch 57/100
1302/1302 [==============================] - 1064s 817ms/step - loss: 0.5897 - dice_coef: 0.4703 - val_loss: 0.5263 - val_dice_coef: 0.5258
Epoch 00057: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-057-0.470255--0.526310.hdf5
Epoch 58/100
1302/1302 [==============================] - 1064s 817ms/step - loss: 0.5849 - dice_coef: 0.4741 - val_loss: 0.5067 - val_dice_coef: 0.5451
Epoch 00058: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-058-0.474262--0.506664.hdf5
Epoch 59/100
1302/1302 [==============================] - 1062s 816ms/step - loss: 0.5816 - dice_coef: 0.4769 - val_loss: 0.5160 - val_dice_coef: 0.5348
Epoch 00059: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-059-0.476830--0.516005.hdf5
Epoch 60/100
1302/1302 [==============================] - 1064s 817ms/step - loss: 0.5891 - dice_coef: 0.4709 - val_loss: 0.5179 - val_dice_coef: 0.5318
Epoch 00060: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-060-0.470746--0.517893.hdf5
Epoch 61/100
1302/1302 [==============================] - 1065s 818ms/step - loss: 0.5873 - dice_coef: 0.4727 - val_loss: 0.5064 - val_dice_coef: 0.5431
Epoch 00061: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-061-0.472722--0.506373.hdf5
Epoch 62/100
1302/1302 [==============================] - 1064s 817ms/step - loss: 0.5803 - dice_coef: 0.4793 - val_loss: 0.5187 - val_dice_coef: 0.5319
Epoch 00062: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-062-0.479199--0.518674.hdf5
Epoch 63/100
1302/1302 [==============================] - 1066s 819ms/step - loss: 0.5843 - dice_coef: 0.4738 - val_loss: 0.5052 - val_dice_coef: 0.5459
Epoch 00063: ReduceLROnPlateau reducing learning rate to 9.999999974752428e-08.
Epoch 00063: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-063-0.473731--0.505171.hdf5
Epoch 64/100
1302/1302 [==============================] - 1065s 818ms/step - loss: 0.5859 - dice_coef: 0.4731 - val_loss: 0.5064 - val_dice_coef: 0.5419
Epoch 00064: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-064-0.473008--0.506380.hdf5
Epoch 65/100
1302/1302 [==============================] - 1064s 817ms/step - loss: 0.5836 - dice_coef: 0.4752 - val_loss: 0.4997 - val_dice_coef: 0.5508
Epoch 00065: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-065-0.475424--0.499673.hdf5
Epoch 66/100
1302/1302 [==============================] - 1063s 817ms/step - loss: 0.5932 - dice_coef: 0.4660 - val_loss: 0.5168 - val_dice_coef: 0.5338
Epoch 00066: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-066-0.465829--0.516758.hdf5
Epoch 67/100
1302/1302 [==============================] - 1063s 816ms/step - loss: 0.5820 - dice_coef: 0.4765 - val_loss: 0.5179 - val_dice_coef: 0.5323
Epoch 00067: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-067-0.476715--0.517926.hdf5
Epoch 68/100
1302/1302 [==============================] - 1062s 816ms/step - loss: 0.5912 - dice_coef: 0.4689 - val_loss: 0.5125 - val_dice_coef: 0.5375
Epoch 00068: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-068-0.468950--0.512456.hdf5
Epoch 69/100
1302/1302 [==============================] - 1062s 816ms/step - loss: 0.5820 - dice_coef: 0.4769 - val_loss: 0.5282 - val_dice_coef: 0.5237
Epoch 00069: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-069-0.476976--0.528154.hdf5
Epoch 70/100
1302/1302 [==============================] - 1062s 816ms/step - loss: 0.5845 - dice_coef: 0.4743 - val_loss: 0.5204 - val_dice_coef: 0.5303
Epoch 00070: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-070-0.474195--0.520356.hdf5
Epoch 71/100
1302/1302 [==============================] - 1063s 816ms/step - loss: 0.5886 - dice_coef: 0.4708 - val_loss: 0.5230 - val_dice_coef: 0.5270
Epoch 00071: ReduceLROnPlateau reducing learning rate to 1.0000000116860975e-08.
Epoch 00071: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-071-0.470715--0.523011.hdf5
Epoch 72/100
1302/1302 [==============================] - 1062s 816ms/step - loss: 0.5837 - dice_coef: 0.4759 - val_loss: 0.5216 - val_dice_coef: 0.5303
Epoch 00072: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-072-0.475787--0.521631.hdf5
Epoch 73/100
1302/1302 [==============================] - 1062s 815ms/step - loss: 0.5804 - dice_coef: 0.4780 - val_loss: 0.5333 - val_dice_coef: 0.5171
Epoch 00073: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-073-0.478063--0.533321.hdf5
Epoch 74/100
1302/1302 [==============================] - 1065s 818ms/step - loss: 0.5842 - dice_coef: 0.4747 - val_loss: 0.5126 - val_dice_coef: 0.5393
Epoch 00074: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-074-0.474628--0.512649.hdf5
Epoch 75/100
1302/1302 [==============================] - 1069s 821ms/step - loss: 0.5836 - dice_coef: 0.4755 - val_loss: 0.5103 - val_dice_coef: 0.5386
Epoch 00075: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-075-0.475690--0.510267.hdf5
Epoch 76/100
160/1302 [==>...........................] - ETA: 15:02 - loss: 0.6069 - dice_coef: 0.4548
I face two problems when I implement 1D convnet for multi-channel sequential data.
(224 samples x 300 time sequential x 19 channels)
1) I set batch_size as 7 but it jumps with 5 times of that.
not 7 14 21 28, but 7, 56, 105, 147... what's wrong with mine?
2) when I look at the records of accuracy, it looks like to learn NOTHING.
is it impossible implement classifier for multi-channel sequential data with Conv1D?
If possible can you give me some advice from my code?
#result
x_train shape: (224, 300, 19)
224 train samples
28 test samples
Train on 224 samples, validate on 28 samples
Epoch 1/50
7/224 [..............................] - ETA: 68s - loss: 0.6945 - acc: 0.5714
56/224 [======>.......................] - ETA: 6s - loss: 0.6993 - acc: 0.4464
105/224 [=============>................] - ETA: 2s - loss: 0.6979 - acc: 0.4381
147/224 [==================>...........] - ETA: 1s - loss: 0.6968 - acc: 0.4422
189/224 [========================>.....] - ETA: 0s - loss: 0.6953 - acc: 0.4444
224/224 [==============================] - 2s - loss: 0.6953 - acc: 0.4420 - val_loss: 0.6956 - val_acc: 0.5000
Epoch 2/50
7/224 [..............................] - ETA: 0s - loss: 0.6759 - acc: 0.5714
63/224 [=======>......................] - ETA: 0s - loss: 0.6924 - acc: 0.5556
133/224 [================>.............] - ETA: 0s - loss: 0.6905 - acc: 0.5338
203/224 [==========================>...] - ETA: 0s - loss: 0.6903 - acc: 0.5567
224/224 [==============================] - 0s - loss: 0.6923 - acc: 0.5357 - val_loss: 0.6968 - val_acc: 0.5000
# code
from __future__ import print_function
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Activation
from keras.layers import Conv2D, MaxPooling2D, Conv1D, MaxPooling1D
import numpy as np
batch_size = 7
num_classes = 2
epochs = 50
# input data dimensions : 300 sequential x 19 channels
eeg_rows, num_ch = 300, 19
x_train = np.load('eeg_train.npy')
y_train = np.load('label_train.npy')
x_test = np.load('eeg_test.npy')
y_test = np.load('label_test.npy')
x_valid = np.load('eeg_valid.npy')
y_valid = np.load('label_valid.npy')
x_train = x_train.reshape(x_train.shape[0], eeg_rows, num_ch)
x_test = x_test.reshape(x_test.shape[0], eeg_rows,num_ch)
x_valid = x_valid.reshape(x_valid.shape[0], eeg_rows, num_ch)
input_shape = (eeg_rows, num_ch)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_valid = x_test.astype('float32')
x_train /= 100
x_test /= 100
x_valid /= 100
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
# model
conv = Sequential()
conv.add(Conv1D(32, 3, input_shape=input_shape, activation='relu', padding='same'))
conv.add(Conv1D(32, 3, activation='relu', padding='same'))
conv.add(MaxPooling1D(pool_size=2, strides=2, padding='same'))
conv.add(Dropout(0.2))
conv.add(Flatten())
conv.add(Dense(16, activation='relu'))
conv.add(Dropout(0.5))
conv.add(Dense(2, activation='softmax'))
conv.compile(loss='categorical_crossentropy',
optimizer=keras.optimizers.Adam(),
metrics=['accuracy'])
# train
conv.fit(x_train, y_train,
batch_size=batch_size,
epochs=epochs,
verbose=1,
validation_data=(x_valid, y_valid))
score = conv.evaluate(x_valid, y_valid, verbose=0)
print(conv.summary())
print(conv.input_shape)
print(conv.output_shape)
print('Test loss:', score[0])
print('Test accuracy:', score[1])