I'm trying to create a model on some data with 2 classes, but I keep getting an error saying:
ValueError: Error when checking target: expected softmax to have shape (1100,) but got array with shape (2,)
I know it's a fairly common error, but I can't seem to fix mine. I believe the error suggests that the model has an output shape of (1100,) but the outputs have dimension (2,). Anyone know how it can be fixed?
Here's my model:
def TestModel(nb_classes=2, inputs=(3, 224, 224)):
input_img = Input(shape=inputs)
conv1 = Convolution2D(
96, 7, 7, activation='relu', init='glorot_uniform',
subsample=(2, 2), border_mode='same', name='conv1')(input_img)
maxpool1 = MaxPooling2D(
pool_size=(3, 3), strides=(2, 2), name='maxpool1', dim_ordering="th")(conv1)
fire2_squeeze = Convolution2D(
16, 1, 1, activation='relu', init='glorot_uniform',
border_mode='same', name='fire2_squeeze')(maxpool1)
fire2_expand1 = Convolution2D(
64, 1, 1, activation='relu', init='glorot_uniform',
border_mode='same', name='fire2_expand1')(fire2_squeeze)
fire2_expand2 = Convolution2D(
64, 3, 3, activation='relu', init='glorot_uniform',
border_mode='same', name='fire2_expand2')(fire2_squeeze)
merge2 = merge(
[fire2_expand1, fire2_expand2], mode='concat', concat_axis=1)
fire3_squeeze = Convolution2D(
16, 1, 1, activation='relu', init='glorot_uniform',
border_mode='same', name='fire3_squeeze')(merge2)
fire3_expand1 = Convolution2D(
64, 1, 1, activation='relu', init='glorot_uniform',
border_mode='same', name='fire3_expand1')(fire3_squeeze)
fire3_expand2 = Convolution2D(
64, 3, 3, activation='relu', init='glorot_uniform',
border_mode='same', name='fire3_expand2')(fire3_squeeze)
merge3 = merge(
[fire3_expand1, fire3_expand2], mode='concat', concat_axis=1)
fire4_squeeze = Convolution2D(
32, 1, 1, activation='relu', init='glorot_uniform',
border_mode='same', name='fire4_squeeze')(merge3)
fire4_expand1 = Convolution2D(
128, 1, 1, activation='relu', init='glorot_uniform',
border_mode='same', name='fire4_expand1')(fire4_squeeze)
fire4_expand2 = Convolution2D(
128, 3, 3, activation='relu', init='glorot_uniform',
border_mode='same', name='fire4_expand2')(fire4_squeeze)
merge4 = merge(
[fire4_expand1, fire4_expand2], mode='concat', concat_axis=1)
maxpool4 = MaxPooling2D(
pool_size=(3, 3), strides=(2, 2), name='maxpool4')(merge4)
fire5_squeeze = Convolution2D(
32, 1, 1, activation='relu', init='glorot_uniform',
border_mode='same', name='fire5_squeeze')(maxpool4)
fire5_expand1 = Convolution2D(
128, 1, 1, activation='relu', init='glorot_uniform',
border_mode='same', name='fire5_expand1')(fire5_squeeze)
fire5_expand2 = Convolution2D(
128, 3, 3, activation='relu', init='glorot_uniform',
border_mode='same', name='fire5_expand2')(fire5_squeeze)
merge5 = merge(
[fire5_expand1, fire5_expand2], mode='concat', concat_axis=1)
fire6_squeeze = Convolution2D(
48, 1, 1, activation='relu', init='glorot_uniform',
border_mode='same', name='fire6_squeeze')(merge5)
fire6_expand1 = Convolution2D(
192, 1, 1, activation='relu', init='glorot_uniform',
border_mode='same', name='fire6_expand1')(fire6_squeeze)
fire6_expand2 = Convolution2D(
192, 3, 3, activation='relu', init='glorot_uniform',
border_mode='same', name='fire6_expand2')(fire6_squeeze)
merge6 = merge(
[fire6_expand1, fire6_expand2], mode='concat', concat_axis=1)
fire7_squeeze = Convolution2D(
48, 1, 1, activation='relu', init='glorot_uniform',
border_mode='same', name='fire7_squeeze')(merge6)
fire7_expand1 = Convolution2D(
192, 1, 1, activation='relu', init='glorot_uniform',
border_mode='same', name='fire7_expand1')(fire7_squeeze)
fire7_expand2 = Convolution2D(
192, 3, 3, activation='relu', init='glorot_uniform',
border_mode='same', name='fire7_expand2')(fire7_squeeze)
merge7 = merge(
[fire7_expand1, fire7_expand2], mode='concat', concat_axis=1)
fire8_squeeze = Convolution2D(
64, 1, 1, activation='relu', init='glorot_uniform',
border_mode='same', name='fire8_squeeze')(merge7)
fire8_expand1 = Convolution2D(
256, 1, 1, activation='relu', init='glorot_uniform',
border_mode='same', name='fire8_expand1')(fire8_squeeze)
fire8_expand2 = Convolution2D(
256, 3, 3, activation='relu', init='glorot_uniform',
border_mode='same', name='fire8_expand2')(fire8_squeeze)
merge8 = merge(
[fire8_expand1, fire8_expand2], mode='concat', concat_axis=1)
maxpool8 = MaxPooling2D(
pool_size=(3, 3), strides=(2, 2), name='maxpool8')(merge8)
fire9_squeeze = Convolution2D(
64, 1, 1, activation='relu', init='glorot_uniform',
border_mode='same', name='fire9_squeeze')(maxpool8)
fire9_expand1 = Convolution2D(
256, 1, 1, activation='relu', init='glorot_uniform',
border_mode='same', name='fire9_expand1')(fire9_squeeze)
fire9_expand2 = Convolution2D(
256, 3, 3, activation='relu', init='glorot_uniform',
border_mode='same', name='fire9_expand2')(fire9_squeeze)
merge9 = merge(
[fire9_expand1, fire9_expand2], mode='concat', concat_axis=1)
fire9_dropout = Dropout(0.5, name='fire9_dropout')(merge9)
conv10 = Convolution2D(
nb_classes, 1, 1, init='glorot_uniform',
border_mode='valid', name='conv10')(fire9_dropout)
# The size should match the output of conv10
avgpool10 = AveragePooling2D((13, 13), name='avgpool10')(conv10)
flatten = Flatten(name='flatten')(avgpool10)
softmax = Activation("softmax", name='softmax')(flatten)
return Model(input=input_img, output=softmax)
Here's the code creating the model:
def main():
nb_class = 2
width, height = 224, 224
sn = model.TestModel(nb_classes=nb_class, inputs=(height, width, 3))
print('Build model')
sgd = SGD(lr=0.001, decay=0.0002, momentum=0.9, nesterov=True)
optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy'])
# Training
train_data_dir = 'data/train'
validation_data_dir = 'data/validation'
nb_train_samples = 2000
nb_validation_samples = 800
nb_epoch = 500
# Generator
train_datagen = ImageDataGenerator(
#train_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)
train_generator = train_datagen.flow_from_directory(
target_size=(width, height),
validation_generator = test_datagen.flow_from_directory(
target_size=(width, height),
# Instantiate AccLossPlotter to visualise training
plotter = AccLossPlotter(graphs=['acc', 'loss'], save_graph=True)
early_stopping = EarlyStopping(monitor='val_loss', patience=3, verbose=0)
checkpoint = ModelCheckpoint(
callbacks=[plotter, checkpoint])
Here's summary():
Layer (type) Output Shape Param # Connected to
input_1 (InputLayer) (None, 224, 224, 3) 0
conv1 (Convolution2D) (None, 112, 112, 96) 14208 input_1[0][0]
maxpool1 (MaxPooling2D) (None, 112, 55, 47) 0 conv1[0][0]
fire2_squeeze (Convolution2D) (None, 112, 55, 16) 768 maxpool1[0][0]
fire2_expand1 (Convolution2D) (None, 112, 55, 64) 1088 fire2_squeeze[0][0]
fire2_expand2 (Convolution2D) (None, 112, 55, 64) 9280 fire2_squeeze[0][0]
merge_1 (Merge) (None, 224, 55, 64) 0 fire2_expand1[0][0]
fire3_squeeze (Convolution2D) (None, 224, 55, 16) 1040 merge_1[0][0]
fire3_expand1 (Convolution2D) (None, 224, 55, 64) 1088 fire3_squeeze[0][0]
fire3_expand2 (Convolution2D) (None, 224, 55, 64) 9280 fire3_squeeze[0][0]
merge_2 (Merge) (None, 448, 55, 64) 0 fire3_expand1[0][0]
fire4_squeeze (Convolution2D) (None, 448, 55, 32) 2080 merge_2[0][0]
fire4_expand1 (Convolution2D) (None, 448, 55, 128) 4224 fire4_squeeze[0][0]
fire4_expand2 (Convolution2D) (None, 448, 55, 128) 36992 fire4_squeeze[0][0]
merge_3 (Merge) (None, 896, 55, 128) 0 fire4_expand1[0][0]
maxpool4 (MaxPooling2D) (None, 447, 27, 128) 0 merge_3[0][0]
fire5_squeeze (Convolution2D) (None, 447, 27, 32) 4128 maxpool4[0][0]
fire5_expand1 (Convolution2D) (None, 447, 27, 128) 4224 fire5_squeeze[0][0]
fire5_expand2 (Convolution2D) (None, 447, 27, 128) 36992 fire5_squeeze[0][0]
merge_4 (Merge) (None, 894, 27, 128) 0 fire5_expand1[0][0]
fire6_squeeze (Convolution2D) (None, 894, 27, 48) 6192 merge_4[0][0]
fire6_expand1 (Convolution2D) (None, 894, 27, 192) 9408 fire6_squeeze[0][0]
fire6_expand2 (Convolution2D) (None, 894, 27, 192) 83136 fire6_squeeze[0][0]
merge_5 (Merge) (None, 1788, 27, 192) 0 fire6_expand1[0][0]
fire7_squeeze (Convolution2D) (None, 1788, 27, 48) 9264 merge_5[0][0]
fire7_expand1 (Convolution2D) (None, 1788, 27, 192) 9408 fire7_squeeze[0][0]
fire7_expand2 (Convolution2D) (None, 1788, 27, 192) 83136 fire7_squeeze[0][0]
merge_6 (Merge) (None, 3576, 27, 192) 0 fire7_expand1[0][0]
fire8_squeeze (Convolution2D) (None, 3576, 27, 64) 12352 merge_6[0][0]
fire8_expand1 (Convolution2D) (None, 3576, 27, 256) 16640 fire8_squeeze[0][0]
fire8_expand2 (Convolution2D) (None, 3576, 27, 256) 147712 fire8_squeeze[0][0]
merge_7 (Merge) (None, 7152, 27, 256) 0 fire8_expand1[0][0]
maxpool8 (MaxPooling2D) (None, 3575, 13, 256) 0 merge_7[0][0]
fire9_squeeze (Convolution2D) (None, 3575, 13, 64) 16448 maxpool8[0][0]
fire9_expand1 (Convolution2D) (None, 3575, 13, 256) 16640 fire9_squeeze[0][0]
fire9_expand2 (Convolution2D) (None, 3575, 13, 256) 147712 fire9_squeeze[0][0]
merge_8 (Merge) (None, 7150, 13, 256) 0 fire9_expand1[0][0]
fire9_dropout (Dropout) (None, 7150, 13, 256) 0 merge_8[0][0]
conv10 (Convolution2D) (None, 7150, 13, 2) 514 fire9_dropout[0][0]
avgpool10 (AveragePooling2D) (None, 550, 1, 2) 0 conv10[0][0]
flatten (Flatten) (None, 1100) 0 avgpool10[0][0]
softmax (Activation) (None, 1100) 0 flatten[0][0]
Total params: 683,954
Trainable params: 683,954
Non-trainable params: 0
Found 22778 images belonging to 2 classes.
Found 2222 images belonging to 2 classes.
Epoch 1/500
Any thought appreciated.
You shouldn't be using AveragePooling2D, but GlobalAveragePooling2D, that will reduce the spatial dimensions to 1, making the Flatten work and produce an output of (None, 2).
i'm trying to build a AutoEncoder with the following configurations
x = Input(shape=(36,1))
# Encoder
conv1_1 = Conv1D(16, 3, activation='relu', padding='same')(x)
pool1 = MaxPooling1D(2)(conv1_1)
conv1_2 = Conv1D(8, 3, activation='relu', padding='same')(pool1)
pool2 = MaxPooling1D(2)(conv1_2)
conv1_3 = Conv1D(8, 3, activation='relu', padding='same')(pool2)
h = MaxPooling1D(3)(conv1_3)
# Decoder
conv2_1 = Conv1D(8,3, activation='relu', padding='same')(h)
up1 = UpSampling1D(3)(conv2_1)
conv2_2 = Conv1D(8,3, activation='relu', padding='same')(up1)
up2 = UpSampling1D(2)(conv2_2)
conv2_3 = Conv1D(16,3, activation='relu')(up2)
up3 = UpSampling1D(2)(conv2_3)
r = Conv1D(1,3, activation='sigmoid', padding='same')(up3)
the summary is
Model: "model"
Layer (type) Output Shape Param #
input_1 (InputLayer) [(None, 36, 1)] 0
conv1d (Conv1D) (None, 36, 16) 64
max_pooling1d (MaxPooling1D (None, 18, 16) 0
conv1d_1 (Conv1D) (None, 18, 8) 392
max_pooling1d_1 (MaxPooling (None, 9, 8) 0
conv1d_2 (Conv1D) (None, 9, 8) 200
max_pooling1d_2 (MaxPooling (None, 3, 8) 0
conv1d_3 (Conv1D) (None, 3, 8) 200
up_sampling1d (UpSampling1D (None, 9, 8) 0
conv1d_4 (Conv1D) (None, 9, 8) 200
up_sampling1d_1 (UpSampling (None, 18, 8) 0
conv1d_5 (Conv1D) (None, 16, 16) 400 <-------
up_sampling1d_2 (UpSampling (None, 32, 16) 0
conv1d_6 (Conv1D) (None, 32, 1) 49
as u can see i put the arrow where the output changes and i cannot understand why, it causes to have different output from input
what can i do? there is a way to understand how to put the best parameter gived the input shape?
I have recently started learning about Image Segmentation and UNet. I am trying to do a multi class Image Segmentation where I have 7 classes and input is a (256, 256, 3) rgb image and output is (256, 256, 1) grayscale image where each intensity value corresponds to one class. I am doing pixel wise softmax. I am using sparse categorical cross entropy so as to avoid doing One Hot Encoding.
def soft1(x):
return keras.activations.softmax(x, axis = -1)
def conv2d_block(input_tensor, n_filters, kernel_size = 3, batchnorm = True):
x = Conv2D(filters = n_filters, kernel_size = (kernel_size, kernel_size),\
kernel_initializer = 'he_normal', padding = 'same')(input_tensor)
if batchnorm:
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = Conv2D(filters = n_filters, kernel_size = (kernel_size, kernel_size),\
kernel_initializer = 'he_normal', padding = 'same')(input_tensor)
if batchnorm:
x = BatchNormalization()(x)
x = Activation('relu')(x)
return x
def get_unet(input_img, n_classes, n_filters = 16, dropout = 0.1, batchnorm = True):
# Contracting Path
c1 = conv2d_block(input_img, n_filters * 1, kernel_size = 3, batchnorm = batchnorm)
p1 = MaxPooling2D((2, 2))(c1)
p1 = Dropout(dropout)(p1)
c2 = conv2d_block(p1, n_filters * 2, kernel_size = 3, batchnorm = batchnorm)
p2 = MaxPooling2D((2, 2))(c2)
p2 = Dropout(dropout)(p2)
c3 = conv2d_block(p2, n_filters * 4, kernel_size = 3, batchnorm = batchnorm)
p3 = MaxPooling2D((2, 2))(c3)
p3 = Dropout(dropout)(p3)
c4 = conv2d_block(p3, n_filters * 8, kernel_size = 3, batchnorm = batchnorm)
p4 = MaxPooling2D((2, 2))(c4)
p4 = Dropout(dropout)(p4)
c5 = conv2d_block(p4, n_filters = n_filters * 16, kernel_size = 3, batchnorm = batchnorm)
# Expansive Path
u6 = Conv2DTranspose(n_filters * 8, (3, 3), strides = (2, 2), padding = 'same')(c5)
u6 = concatenate([u6, c4])
u6 = Dropout(dropout)(u6)
c6 = conv2d_block(u6, n_filters * 8, kernel_size = 3, batchnorm = batchnorm)
u7 = Conv2DTranspose(n_filters * 4, (3, 3), strides = (2, 2), padding = 'same')(c6)
u7 = concatenate([u7, c3])
u7 = Dropout(dropout)(u7)
c7 = conv2d_block(u7, n_filters * 4, kernel_size = 3, batchnorm = batchnorm)
u8 = Conv2DTranspose(n_filters * 2, (3, 3), strides = (2, 2), padding = 'same')(c7)
u8 = concatenate([u8, c2])
u8 = Dropout(dropout)(u8)
c8 = conv2d_block(u8, n_filters * 2, kernel_size = 3, batchnorm = batchnorm)
u9 = Conv2DTranspose(n_filters * 1, (3, 3), strides = (2, 2), padding = 'same')(c8)
u9 = concatenate([u9, c1])
u9 = Dropout(dropout)(u9)
c9 = conv2d_block(u9, n_filters * 1, kernel_size = 3, batchnorm = batchnorm)
outputs = Conv2D(n_classes, (1, 1))(c9)
outputs = Reshape((image_height*image_width, 1, n_classes), input_shape = (image_height, image_width, n_classes))(outputs)
outputs = Activation(soft1)(outputs)
model = Model(inputs=[input_img], outputs=[outputs])
return model
My Model Summary is:
Model: "model_2"
Layer (type) Output Shape Param # Connected to
input_12 (InputLayer) (None, 256, 256, 3) 0
conv2d_211 (Conv2D) (None, 256, 256, 16) 448 input_12[0][0]
batch_normalization_200 (BatchN (None, 256, 256, 16) 64 conv2d_211[0][0]
activation_204 (Activation) (None, 256, 256, 16) 0 batch_normalization_200[0][0]
max_pooling2d_45 (MaxPooling2D) (None, 128, 128, 16) 0 activation_204[0][0]
dropout_89 (Dropout) (None, 128, 128, 16) 0 max_pooling2d_45[0][0]
conv2d_213 (Conv2D) (None, 128, 128, 32) 4640 dropout_89[0][0]
batch_normalization_202 (BatchN (None, 128, 128, 32) 128 conv2d_213[0][0]
activation_206 (Activation) (None, 128, 128, 32) 0 batch_normalization_202[0][0]
max_pooling2d_46 (MaxPooling2D) (None, 64, 64, 32) 0 activation_206[0][0]
dropout_90 (Dropout) (None, 64, 64, 32) 0 max_pooling2d_46[0][0]
conv2d_215 (Conv2D) (None, 64, 64, 64) 18496 dropout_90[0][0]
batch_normalization_204 (BatchN (None, 64, 64, 64) 256 conv2d_215[0][0]
activation_208 (Activation) (None, 64, 64, 64) 0 batch_normalization_204[0][0]
max_pooling2d_47 (MaxPooling2D) (None, 32, 32, 64) 0 activation_208[0][0]
dropout_91 (Dropout) (None, 32, 32, 64) 0 max_pooling2d_47[0][0]
conv2d_217 (Conv2D) (None, 32, 32, 128) 73856 dropout_91[0][0]
batch_normalization_206 (BatchN (None, 32, 32, 128) 512 conv2d_217[0][0]
activation_210 (Activation) (None, 32, 32, 128) 0 batch_normalization_206[0][0]
max_pooling2d_48 (MaxPooling2D) (None, 16, 16, 128) 0 activation_210[0][0]
dropout_92 (Dropout) (None, 16, 16, 128) 0 max_pooling2d_48[0][0]
conv2d_219 (Conv2D) (None, 16, 16, 256) 295168 dropout_92[0][0]
batch_normalization_208 (BatchN (None, 16, 16, 256) 1024 conv2d_219[0][0]
activation_212 (Activation) (None, 16, 16, 256) 0 batch_normalization_208[0][0]
conv2d_transpose_45 (Conv2DTran (None, 32, 32, 128) 295040 activation_212[0][0]
concatenate_45 (Concatenate) (None, 32, 32, 256) 0 conv2d_transpose_45[0][0]
dropout_93 (Dropout) (None, 32, 32, 256) 0 concatenate_45[0][0]
conv2d_221 (Conv2D) (None, 32, 32, 128) 295040 dropout_93[0][0]
batch_normalization_210 (BatchN (None, 32, 32, 128) 512 conv2d_221[0][0]
activation_214 (Activation) (None, 32, 32, 128) 0 batch_normalization_210[0][0]
conv2d_transpose_46 (Conv2DTran (None, 64, 64, 64) 73792 activation_214[0][0]
concatenate_46 (Concatenate) (None, 64, 64, 128) 0 conv2d_transpose_46[0][0]
dropout_94 (Dropout) (None, 64, 64, 128) 0 concatenate_46[0][0]
conv2d_223 (Conv2D) (None, 64, 64, 64) 73792 dropout_94[0][0]
batch_normalization_212 (BatchN (None, 64, 64, 64) 256 conv2d_223[0][0]
activation_216 (Activation) (None, 64, 64, 64) 0 batch_normalization_212[0][0]
conv2d_transpose_47 (Conv2DTran (None, 128, 128, 32) 18464 activation_216[0][0]
concatenate_47 (Concatenate) (None, 128, 128, 64) 0 conv2d_transpose_47[0][0]
dropout_95 (Dropout) (None, 128, 128, 64) 0 concatenate_47[0][0]
conv2d_225 (Conv2D) (None, 128, 128, 32) 18464 dropout_95[0][0]
batch_normalization_214 (BatchN (None, 128, 128, 32) 128 conv2d_225[0][0]
activation_218 (Activation) (None, 128, 128, 32) 0 batch_normalization_214[0][0]
conv2d_transpose_48 (Conv2DTran (None, 256, 256, 16) 4624 activation_218[0][0]
concatenate_48 (Concatenate) (None, 256, 256, 32) 0 conv2d_transpose_48[0][0]
dropout_96 (Dropout) (None, 256, 256, 32) 0 concatenate_48[0][0]
conv2d_227 (Conv2D) (None, 256, 256, 16) 4624 dropout_96[0][0]
batch_normalization_216 (BatchN (None, 256, 256, 16) 64 conv2d_227[0][0]
activation_220 (Activation) (None, 256, 256, 16) 0 batch_normalization_216[0][0]
conv2d_228 (Conv2D) (None, 256, 256, 7) 119 activation_220[0][0]
reshape_12 (Reshape) (None, 65536, 1, 7) 0 conv2d_228[0][0]
activation_221 (Activation) (None, 65536, 1, 7) 0 reshape_12[0][0]
Total params: 1,179,511
Trainable params: 1,178,039
Non-trainable params: 1,472
Is my model right? Shouldn't the final output be (65536, 1, 1) as I am using softmax?
The code is compiling but dice coefficient is very low.
Your model should end in (256,256,7).
That is 7 classes per pixel, and the shape should agree with your output images that are (256,256,1). This will work only for 'sparse_categorical_crossentropy' or a custom loss.
So, up to conv_228 the model seems fine (didn't look in detail, though).
There is no need for anything that comes after this convolution.
You can place the softmax directly in the conv_228 or directly after.
y_train should be (256,256,1) for this.
Your output in fact represents its pixel of your image. For its pixel, you have as an output of 1x7. Since it is sigmoid the values that this representation takes are between 0-1. Therefore the output fires when you have the desired class and therefore segmentation. If it was (65536, 1, 1) you should have not categorical but dense representation.
I am trying to train a segmentation model, But loss saturates at 0.3370 , i am really not sure what to do, can someone please help
This is the model
def unet(input_shape=(128, 128, 128), optimizer=Adam, initial_learning_rate=5e-4,
inputs = Input(shape=input_shape)
conv1 = UnetConv3D(inputs, 32, is_batchnorm=False, name='conv1')
pool1 = MaxPooling3D(pool_size=(2, 2,2 ))(conv1)
conv2 = UnetConv3D(pool1, 64, is_batchnorm=False, name='conv2')
pool2 = MaxPooling3D(pool_size=(2, 2,2 ))(conv2)
conv3 = UnetConv3D(pool2, 128, is_batchnorm=False, name='conv3')
pool3 = MaxPooling3D(pool_size=(2, 2,2 ))(conv3)
conv4 = UnetConv3D(pool3, 256, is_batchnorm=False, name='conv4')
pool4 = MaxPooling3D(pool_size=(2, 2,2 ))(conv4)
conv5 = Conv3D(512, (3, 3, 3), activation='relu', kernel_initializer=kinit, padding='same', data_format = 'channels_first')(pool4)
conv5 = Conv3D(512, (3, 3, 3), activation='relu', kernel_initializer=kinit, padding='same', data_format = 'channels_first')(conv5)
up6 = concatenate([Conv3DTranspose(256, (2, 2,2 ), strides=(2, 2,2 ), kernel_initializer=kinit, padding='same', data_format = 'channels_first')(conv5), conv4], axis=1)
conv6 = Conv3D(256, (3, 3, 3), activation='relu', padding='same', data_format = 'channels_first')(up6)
conv6 = Conv3D(256, (3, 3, 3), activation='relu', padding='same', data_format = 'channels_first')(conv6)
up7 = concatenate([Conv3DTranspose(128, (2, 2,2 ), strides=(2, 2,2 ), padding='same', data_format = 'channels_first')(conv6), conv3], axis=1)
conv7 = Conv3D(128, (3, 3, 3), activation='relu', kernel_initializer=kinit, padding='same', data_format = 'channels_first')(up7)
conv7 = Conv3D(128, (3, 3, 3), activation='relu', kernel_initializer=kinit, padding='same', data_format = 'channels_first')(conv7)
up8 = concatenate([Conv3DTranspose(64, (2, 2,2 ), strides=(2,2,2 ), kernel_initializer=kinit, padding='same', data_format = 'channels_first')(conv7), conv2], axis=1)
conv8 = Conv3D(64, (3, 3, 3), activation='relu', kernel_initializer=kinit, padding='same', data_format = 'channels_first')(up8)
up9 = concatenate([Conv3DTranspose(32, (2, 2,2 ), strides=(2, 2,2 ), kernel_initializer=kinit, padding='same', data_format = 'channels_first')(conv8), conv1], axis=1)
conv9 = Conv3D(32, (3, 3, 3), activation='relu', kernel_initializer=kinit, padding='same', data_format = 'channels_first')(up9)
conv9 = Conv3D(32, (3, 3, 3), activation='relu', kernel_initializer=kinit, padding='same', data_format = 'channels_first')(conv9)
conv10 = Conv3D(3, (1, 1, 1), activation='relu', kernel_initializer=kinit,padding = 'same', name='final', data_format = 'channels_first')(conv9)
activation_name = 'sigmoid'
activation_block = Activation(activation_name)(conv10)
model = Model(inputs=[inputs], outputs=[activation_block])
model.compile(optimizer=optimizer(), loss=loss_function)
return model
This is the helper function
def UnetConv3D(input, outdim, is_batchnorm, name):
x = Conv3D(outdim, (3, 3, 3), strides=(1, 1, 1), kernel_initializer=kinit, padding="same", name=name+'_1', data_format = 'channels_first')(input)
if is_batchnorm:
x =BatchNormalization(name=name + '_1_bn')(x)
x = Activation('relu',name=name + '_1_act')(x)
x = Conv3D(outdim, (3, 3, 3), strides=(1, 1, 1), kernel_initializer=kinit, padding="same", name=name+'_2', data_format = 'channels_first')(x)
if is_batchnorm:
x = BatchNormalization(name=name + '_2_bn')(x)
x = Activation('relu', name=name + '_2_act')(x)
return x
And this is the loss function --
def weighted_dice_coefficient(y_true, y_pred, axis=(-3, -2, -1), smooth=0.00001):
Weighted dice coefficient. Default axis assumes a "channels first" data structure
:param smooth:
:param y_true:
:param y_pred:
:param axis:
return K.mean(2. * (K.sum(y_true * y_pred,
axis=axis) + smooth/2)/(K.sum(y_true,
axis=axis) + K.sum(y_pred,
axis=axis) + smooth))
My input is (128,128,128), am i doing an obvious mistake? Please let me know if more info needed.
Model summary
Layer (type) Output Shape Param # Connected to
input_1 (InputLayer) (None, 1, 128, 128, 0
conv1_1 (Conv3D) (None, 32, 128, 128, 896 input_1[0][0]
conv1_1_act (Activation) (None, 32, 128, 128, 0 conv1_1[0][0]
conv1_2 (Conv3D) (None, 32, 128, 128, 27680 conv1_1_act[0][0]
conv1_2_act (Activation) (None, 32, 128, 128, 0 conv1_2[0][0]
max_pooling3d_1 (MaxPooling3D) (None, 32, 64, 64, 6 0 conv1_2_act[0][0]
conv2_1 (Conv3D) (None, 64, 64, 64, 6 55360 max_pooling3d_1[0][0]
conv2_1_act (Activation) (None, 64, 64, 64, 6 0 conv2_1[0][0]
conv2_2 (Conv3D) (None, 64, 64, 64, 6 110656 conv2_1_act[0][0]
conv2_2_act (Activation) (None, 64, 64, 64, 6 0 conv2_2[0][0]
max_pooling3d_2 (MaxPooling3D) (None, 64, 32, 32, 3 0 conv2_2_act[0][0]
conv3_1 (Conv3D) (None, 128, 32, 32, 221312 max_pooling3d_2[0][0]
conv3_1_act (Activation) (None, 128, 32, 32, 0 conv3_1[0][0]
conv3_2 (Conv3D) (None, 128, 32, 32, 442496 conv3_1_act[0][0]
conv3_2_act (Activation) (None, 128, 32, 32, 0 conv3_2[0][0]
max_pooling3d_3 (MaxPooling3D) (None, 128, 16, 16, 0 conv3_2_act[0][0]
conv4_1 (Conv3D) (None, 256, 16, 16, 884992 max_pooling3d_3[0][0]
conv4_1_act (Activation) (None, 256, 16, 16, 0 conv4_1[0][0]
conv4_2 (Conv3D) (None, 256, 16, 16, 1769728 conv4_1_act[0][0]
conv4_2_act (Activation) (None, 256, 16, 16, 0 conv4_2[0][0]
max_pooling3d_4 (MaxPooling3D) (None, 256, 8, 8, 8) 0 conv4_2_act[0][0]
conv3d_1 (Conv3D) (None, 512, 8, 8, 8) 3539456 max_pooling3d_4[0][0]
conv3d_2 (Conv3D) (None, 512, 8, 8, 8) 7078400 conv3d_1[0][0]
conv3d_transpose_1 (Conv3DTrans (None, 256, 16, 16, 1048832 conv3d_2[0][0]
concatenate_1 (Concatenate) (None, 512, 16, 16, 0 conv3d_transpose_1[0][0]
conv3d_3 (Conv3D) (None, 256, 16, 16, 3539200 concatenate_1[0][0]
conv3d_4 (Conv3D) (None, 256, 16, 16, 1769728 conv3d_3[0][0]
conv3d_transpose_2 (Conv3DTrans (None, 128, 32, 32, 262272 conv3d_4[0][0]
concatenate_2 (Concatenate) (None, 256, 32, 32, 0 conv3d_transpose_2[0][0]
conv3d_5 (Conv3D) (None, 128, 32, 32, 884864 concatenate_2[0][0]
conv3d_6 (Conv3D) (None, 128, 32, 32, 442496 conv3d_5[0][0]
conv3d_transpose_3 (Conv3DTrans (None, 64, 64, 64, 6 65600 conv3d_6[0][0]
concatenate_3 (Concatenate) (None, 128, 64, 64, 0 conv3d_transpose_3[0][0]
conv3d_7 (Conv3D) (None, 64, 64, 64, 6 221248 concatenate_3[0][0]
conv3d_transpose_4 (Conv3DTrans (None, 32, 128, 128, 16416 conv3d_7[0][0]
concatenate_4 (Concatenate) (None, 64, 128, 128, 0 conv3d_transpose_4[0][0]
conv3d_8 (Conv3D) (None, 32, 128, 128, 55328 concatenate_4[0][0]
conv3d_9 (Conv3D) (None, 32, 128, 128, 27680 conv3d_8[0][0]
final (Conv3D) (None, 3, 128, 128, 99 conv3d_9[0][0]
activation_1 (Activation) (None, 3, 128, 128, 0 final[0][0]
Thanks in advance
I'm trying to build u-net in keras for multi-class semantic segmentation. The model I have below does not learn anything. It always just predicts the background (first) class.
Is my use of the final 'softmax' layer correct? The documentation shows a axis parameter, but I'm not sure how to set that or what it should be.
def unet(input_shape=(572, 572, 1), classes=2):
input_image = KL.Input(shape=input_shape)
contracting_1, pooled_1 = blocks.contracting(input_image, filters=64, block_name="block1")
contracting_2, pooled_2 = blocks.contracting(pooled_1, filters=128, block_name="block2")
contracting_3, pooled_3 = blocks.contracting(pooled_2, filters=256, block_name="block3")
contracting_4, pooled_4 = blocks.contracting(pooled_3, filters=512, block_name="block4")
contracting_5, _ = blocks.contracting(pooled_4, filters=1024, block_name="block5")
dropout = KL.Dropout(rate=0.5)(contracting_5)
expanding_1 = blocks.expanding(dropout, merge_layer=contracting_4, filters=512, block_name="block6")
expanding_2 = blocks.expanding(expanding_1, merge_layer=contracting_3, filters=256, block_name="block7")
expanding_3 = blocks.expanding(expanding_2, merge_layer=contracting_2, filters=128, block_name="block8")
expanding_4 = blocks.expanding(expanding_3, merge_layer=contracting_1, filters=64, block_name="block9")
class_output = KL.Conv2D(classes, kernel_size=(1, 1), activation='softmax', name='class_output')(expanding_4)
model = KM.Model(inputs=[input_image], outputs=[class_output])
return model
def contracting(input_layer, filters, kernel_size=(3, 3), padding='same',
conv_a = KL.Conv2D(filters, kernel_size, activation='relu', padding=padding,
conv_b = KL.Conv2D(filters, kernel_size, activation='relu', padding=padding,
pool = KL.MaxPooling2D(pool_size=(2, 2), padding=padding,
batch_normalization = KL.BatchNormalization()(pool)
return conv_b, batch_normalization
def expanding(input_layer, merge_layer, filters, kernel_size=(3, 3), padding='same',
input_layer = KL.UpSampling2D(size=(2, 2))(input_layer)
conv_up = KL.Conv2D(filters, kernel_size=(2, 2), activation='relu',
padding='same', name='{}_expanding_conv_up'.format(block_name))(input_layer)
conv_up_height, conv_up_width = int(conv_up.shape[1]), int(conv_up.shape[2])
merge_height, merge_width = int(merge_layer.shape[1]), int(merge_layer.shape[2])
crop_top = (merge_height - conv_up_height) // 2
crop_bottom = (merge_height - conv_up_height) - crop_top
crop_left = (merge_width - conv_up_width) // 2
crop_right = (merge_width - conv_up_width) - crop_left
cropping = ((crop_top, crop_bottom), (crop_left, crop_right))
merge_layer = KL.Cropping2D(cropping)(merge_layer)
merged = KL.concatenate([merge_layer, conv_up])
conv_a = KL.Conv2D(filters, kernel_size, activation='relu', padding=padding,
conv_b = KL.Conv2D(filters, kernel_size, activation='relu', padding=padding,
batch_normalization = KL.BatchNormalization()(conv_b)
return batch_normalization
optimizer = keras.optimizers.SGD(lr=0.0001, momentum=0.9)
loss = keras.losses.categorical_crossentropy
metrics = [keras.metrics.categorical_accuracy]
model.compile(optimizer, loss, metrics)
Model Summary:
Layer (type) Output Shape Param # Connected to
input_2 (InputLayer) (None, 96, 96, 3) 0
block1_contracting_conv_a (Conv (None, 96, 96, 64) 1792 input_2[0][0]
block1_contracting_conv_b (Conv (None, 96, 96, 64) 36928 block1_contracting_conv_a[0][0]
block1_contracting_pool (MaxPoo (None, 48, 48, 64) 0 block1_contracting_conv_b[0][0]
batch_normalization_10 (BatchNo (None, 48, 48, 64) 256 block1_contracting_pool[0][0]
block2_contracting_conv_a (Conv (None, 48, 48, 128) 73856 batch_normalization_10[0][0]
block2_contracting_conv_b (Conv (None, 48, 48, 128) 147584 block2_contracting_conv_a[0][0]
block2_contracting_pool (MaxPoo (None, 24, 24, 128) 0 block2_contracting_conv_b[0][0]
batch_normalization_11 (BatchNo (None, 24, 24, 128) 512 block2_contracting_pool[0][0]
block3_contracting_conv_a (Conv (None, 24, 24, 256) 295168 batch_normalization_11[0][0]
block3_contracting_conv_b (Conv (None, 24, 24, 256) 590080 block3_contracting_conv_a[0][0]
block3_contracting_pool (MaxPoo (None, 12, 12, 256) 0 block3_contracting_conv_b[0][0]
batch_normalization_12 (BatchNo (None, 12, 12, 256) 1024 block3_contracting_pool[0][0]
block4_contracting_conv_a (Conv (None, 12, 12, 512) 1180160 batch_normalization_12[0][0]
block4_contracting_conv_b (Conv (None, 12, 12, 512) 2359808 block4_contracting_conv_a[0][0]
block4_contracting_pool (MaxPoo (None, 6, 6, 512) 0 block4_contracting_conv_b[0][0]
batch_normalization_13 (BatchNo (None, 6, 6, 512) 2048 block4_contracting_pool[0][0]
block5_contracting_conv_a (Conv (None, 6, 6, 1024) 4719616 batch_normalization_13[0][0]
block5_contracting_conv_b (Conv (None, 6, 6, 1024) 9438208 block5_contracting_conv_a[0][0]
dropout_2 (Dropout) (None, 6, 6, 1024) 0 block5_contracting_conv_b[0][0]
up_sampling2d_5 (UpSampling2D) (None, 12, 12, 1024) 0 dropout_2[0][0]
cropping2d_5 (Cropping2D) (None, 12, 12, 512) 0 block4_contracting_conv_b[0][0]
block6_expanding_conv_up (Conv2 (None, 12, 12, 512) 2097664 up_sampling2d_5[0][0]
concatenate_5 (Concatenate) (None, 12, 12, 1024) 0 cropping2d_5[0][0]
block6_expanding_conv_a (Conv2D (None, 12, 12, 512) 4719104 concatenate_5[0][0]
block6_expanding_conv_b (Conv2D (None, 12, 12, 512) 2359808 block6_expanding_conv_a[0][0]
batch_normalization_15 (BatchNo (None, 12, 12, 512) 2048 block6_expanding_conv_b[0][0]
up_sampling2d_6 (UpSampling2D) (None, 24, 24, 512) 0 batch_normalization_15[0][0]
cropping2d_6 (Cropping2D) (None, 24, 24, 256) 0 block3_contracting_conv_b[0][0]
block7_expanding_conv_up (Conv2 (None, 24, 24, 256) 524544 up_sampling2d_6[0][0]
concatenate_6 (Concatenate) (None, 24, 24, 512) 0 cropping2d_6[0][0]
block7_expanding_conv_a (Conv2D (None, 24, 24, 256) 1179904 concatenate_6[0][0]
block7_expanding_conv_b (Conv2D (None, 24, 24, 256) 590080 block7_expanding_conv_a[0][0]
batch_normalization_16 (BatchNo (None, 24, 24, 256) 1024 block7_expanding_conv_b[0][0]
up_sampling2d_7 (UpSampling2D) (None, 48, 48, 256) 0 batch_normalization_16[0][0]
cropping2d_7 (Cropping2D) (None, 48, 48, 128) 0 block2_contracting_conv_b[0][0]
block8_expanding_conv_up (Conv2 (None, 48, 48, 128) 131200 up_sampling2d_7[0][0]
concatenate_7 (Concatenate) (None, 48, 48, 256) 0 cropping2d_7[0][0]
block8_expanding_conv_a (Conv2D (None, 48, 48, 128) 295040 concatenate_7[0][0]
block8_expanding_conv_b (Conv2D (None, 48, 48, 128) 147584 block8_expanding_conv_a[0][0]
batch_normalization_17 (BatchNo (None, 48, 48, 128) 512 block8_expanding_conv_b[0][0]
up_sampling2d_8 (UpSampling2D) (None, 96, 96, 128) 0 batch_normalization_17[0][0]
cropping2d_8 (Cropping2D) (None, 96, 96, 64) 0 block1_contracting_conv_b[0][0]
block9_expanding_conv_up (Conv2 (None, 96, 96, 64) 32832 up_sampling2d_8[0][0]
concatenate_8 (Concatenate) (None, 96, 96, 128) 0 cropping2d_8[0][0]
block9_expanding_conv_a (Conv2D (None, 96, 96, 64) 73792 concatenate_8[0][0]
block9_expanding_conv_b (Conv2D (None, 96, 96, 64) 36928 block9_expanding_conv_a[0][0]
batch_normalization_18 (BatchNo (None, 96, 96, 64) 256 block9_expanding_conv_b[0][0]
class_output (Conv2D) (None, 96, 96, 4) 260 batch_normalization_18[0][0]
Total params: 31,039,620
Trainable params: 31,035,780
Non-trainable params: 3,840
Total params: 31,031,940
Trainable params: 31,031,940
Non-trainable params: 0
class percentages in dataset:
{0: 0.6245757457188198,
1: 0.16082110268729075,
2: 0.1188858904157366,
3: 0.09571726117815291}
class 0 is the background
shape of image from generator (rgb): (1, 96, 96, 3)
shape of labels from generator: (1, 96, 96, 4)
There doesn't seem to be anything that wrong in your model.
Softmax is ok, as it defaults to the last axis, and you're clearly using 'channels_last' as config. So it's ok.
Suggestions are:
Add a few BatchNormalization() layers and decrease your learning rate (this prevents relu from going too fast to "all zeroes").
Check that your output data range is correct, with np.unique(y_train) containing only 0 and 1
Check that every pixel is classified with only one class: (np.sum(y_train, axis=-1) == 1).all() == True.
Check if your images aren't too biased towards the first class. np.sum(y_train[:,:,:,0]) should not be too bigger than np.sum(y_train[:,:,:,1:]).
If it is, consider fitting with the class_weight parameter, passing weights to balance the loss for each class (check keras documentation on fit for how to use it)
This model works just fine for me with most of the segmentation projects, i use crossentropy for multiclass segmentation and smooth dice for binary classes
def conv_block(tensor, nfilters, size=3, padding='same', initializer="he_normal"):
x = Conv2D(filters=nfilters, kernel_size=(size, size), padding=padding, kernel_initializer=initializer)(tensor)
x = BatchNormalization()(x)
x = Activation("relu")(x)
x = Conv2D(filters=nfilters, kernel_size=(size, size), padding=padding, kernel_initializer=initializer)(x)
x = BatchNormalization()(x)
x = Activation("relu")(x)
return x
def deconv_block(tensor, residual, nfilters, size=3, padding='same', strides=(2, 2)):
y = Conv2DTranspose(nfilters, kernel_size=(size, size), strides=strides, padding=padding)(tensor)
y = concatenate([y, residual], axis=3)
y = conv_block(y, nfilters)
return y
def Unet(img_height, img_width, nclasses=3, filters=64):
# down
input_layer = Input(shape=(img_height, img_width, 3), name='image_input')
conv1 = conv_block(input_layer, nfilters=filters)
conv1_out = MaxPooling2D(pool_size=(2, 2))(conv1)
conv2 = conv_block(conv1_out, nfilters=filters*2)
conv2_out = MaxPooling2D(pool_size=(2, 2))(conv2)
conv3 = conv_block(conv2_out, nfilters=filters*4)
conv3_out = MaxPooling2D(pool_size=(2, 2))(conv3)
conv4 = conv_block(conv3_out, nfilters=filters*8)
conv4_out = MaxPooling2D(pool_size=(2, 2))(conv4)
conv4_out = Dropout(0.5)(conv4_out)
conv5 = conv_block(conv4_out, nfilters=filters*16)
conv5 = Dropout(0.5)(conv5)
# up
deconv6 = deconv_block(conv5, residual=conv4, nfilters=filters*8)
deconv6 = Dropout(0.5)(deconv6)
deconv7 = deconv_block(deconv6, residual=conv3, nfilters=filters*4)
deconv7 = Dropout(0.5)(deconv7)
deconv8 = deconv_block(deconv7, residual=conv2, nfilters=filters*2)
deconv9 = deconv_block(deconv8, residual=conv1, nfilters=filters)
# output
output_layer = Conv2D(filters=nclasses, kernel_size=(1, 1))(deconv9)
output_layer = BatchNormalization()(output_layer)
output_layer = Activation('softmax')(output_layer)
model = Model(inputs=input_layer, outputs=output_layer, name='Unet')
return model
Sometimes, the problem is related to model architecture. When you are dealing with a complicated dataset for segmentation, you need to enhance the model architecture. I encountered the same problem with a new dataset while the model could work well on another dataset. So, I used Res-Unet instead of Unet as the model architecture and the problem solved.
hope this will help
I am getting error while running the following code in keras
Traceback (most recent call last):
File "my_conv_ae.py", line 74, in <module>
validation_steps = nb_validation_samples // batch_size)
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python35\lib\site-packages\keras\legacy\interfaces.py", line 88, in wrapper
return func(*args, **kwargs)
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python35\lib\site-packages\keras\engine\training.py", line 1890, in fit_generator
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python35\lib\site-packages\keras\engine\training.py", line 1627, in train_on_batch
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python35\lib\site-packages\keras\engine\training.py", line 1309, in _standardize_user_data
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python35\lib\site-packages\keras\engine\training.py", line 127, in _standardize_input_data
ValueError: Error when checking target: expected conv2d_transpose_8 to have 4 dimensions, but got array with shape (20, 1)
The code is:
import keras
from keras.layers import Input, Dense, Conv2D, MaxPooling2D, UpSampling2D, Conv2DTranspose
from keras.models import Model
from keras import backend as K
from keras.preprocessing.image import ImageDataGenerator
import numpy as np
input_img = Input(shape=(512, 512, 1))
nb_train_samples = 1700
nb_validation_samples = 420
epochs = 10
batch_size = 20
x = Conv2D(64, (11, 11), activation='relu', strides= 1, padding='valid', kernel_initializer='glorot_uniform')(input_img)
x = Conv2D(64, (11, 11), activation='relu', strides= 1, padding='valid', kernel_initializer='glorot_uniform')(x)
x = MaxPooling2D((2, 2))(x)
x = Conv2D(128, (7, 7), activation='relu', strides= 1, padding='valid', kernel_initializer='glorot_uniform')(x)
x = Conv2D(128, (5, 5), activation='relu', strides= 1, padding='valid', kernel_initializer='glorot_uniform')(x)
x = MaxPooling2D((2, 2))(x)
x = Conv2D(256, (5, 5), activation='relu', strides= 1, padding='valid', kernel_initializer='glorot_uniform')(x)
x = Conv2D(256, (3, 3), activation='relu', strides= 1, padding='valid', kernel_initializer='glorot_uniform')(x)
x = MaxPooling2D((2, 2))(x)
x = Conv2D(512, (3, 3), activation='relu', strides= 1, padding='valid', kernel_initializer='glorot_uniform')(x)
x = Conv2D(512, (3, 3), activation='relu', strides= 1, padding='valid', kernel_initializer='glorot_uniform')(x)
encoded = MaxPooling2D((2, 2))(x)
print (K.int_shape(encoded))
at this point the representation is (26, 26, 512)
x = UpSampling2D((2, 2))(encoded)
x = Conv2DTranspose(512, (3, 3), activation='relu', strides= 1, padding='valid', kernel_initializer='glorot_uniform')(x)
x = Conv2DTranspose(512, (3, 3), activation='relu', strides= 1, padding='valid', kernel_initializer='glorot_uniform')(x)
x = UpSampling2D((2, 2))(x)
x = Conv2DTranspose(256, (3, 3), activation='relu', strides= 1, padding='valid', kernel_initializer='glorot_uniform')(x)
x = Conv2DTranspose(256, (5, 5), activation='relu', strides= 1, padding='valid', kernel_initializer='glorot_uniform')(x)
x = UpSampling2D((2, 2))(x)
x = Conv2DTranspose(128, (5, 5), activation='relu', strides= 1, padding='valid', kernel_initializer='glorot_uniform')(x)
x = Conv2DTranspose(128, (7, 7), activation='relu', strides= 1, padding='valid', kernel_initializer='glorot_uniform')(x)
x = UpSampling2D((2, 2))(x)
x = Conv2DTranspose(64, (11, 11), activation='relu', strides= 1, padding='valid', kernel_initializer='glorot_uniform')(x)
decoded = Conv2DTranspose(1, (11, 11), activation='relu', strides= 1, padding='valid', kernel_initializer='glorot_uniform')(x)
print (K.int_shape(decoded))
autoencoder = Model(input_img, decoded)
autoencoder.compile(optimizer = 'adadelta', loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'])
train_datagen = ImageDataGenerator(
test_datagen = ImageDataGenerator(rescale=1./255)
x_train = train_datagen.flow_from_directory(
target_size = (512, 512), color_mode = 'grayscale',
batch_size = batch_size,
class_mode = 'binary')
x_test = test_datagen.flow_from_directory(
target_size = (512, 512), color_mode = 'grayscale',
batch_size = batch_size,
class_mode = 'binary')
steps_per_epoch = nb_train_samples // batch_size,
epochs = epochs,
validation_data = x_test,
validation_steps = nb_validation_samples // batch_size)
decoded_imgs = autoencoder.predict(x_test)
Summary of model is as follows:
Layer (type) Output Shape Param #
input_1 (InputLayer) (None, 512, 512, 1) 0
conv2d_1 (Conv2D) (None, 502, 502, 64) 7808
conv2d_2 (Conv2D) (None, 492, 492, 64) 495680
max_pooling2d_1 (MaxPooling2 (None, 246, 246, 64) 0
conv2d_3 (Conv2D) (None, 240, 240, 128) 401536
conv2d_4 (Conv2D) (None, 236, 236, 128) 409728
max_pooling2d_2 (MaxPooling2 (None, 118, 118, 128) 0
conv2d_5 (Conv2D) (None, 114, 114, 256) 819456
conv2d_6 (Conv2D) (None, 112, 112, 256) 590080
max_pooling2d_3 (MaxPooling2 (None, 56, 56, 256) 0
conv2d_7 (Conv2D) (None, 54, 54, 512) 1180160
conv2d_8 (Conv2D) (None, 52, 52, 512) 2359808
max_pooling2d_4 (MaxPooling2 (None, 26, 26, 512) 0
up_sampling2d_1 (UpSampling2 (None, 52, 52, 512) 0
conv2d_transpose_1 (Conv2DTr (None, 54, 54, 512) 2359808
conv2d_transpose_2 (Conv2DTr (None, 56, 56, 512) 2359808
up_sampling2d_2 (UpSampling2 (None, 112, 112, 512) 0
conv2d_transpose_3 (Conv2DTr (None, 114, 114, 256) 1179904
conv2d_transpose_4 (Conv2DTr (None, 118, 118, 256) 1638656
up_sampling2d_3 (UpSampling2 (None, 236, 236, 256) 0
conv2d_transpose_5 (Conv2DTr (None, 240, 240, 128) 819328
conv2d_transpose_6 (Conv2DTr (None, 246, 246, 128) 802944
up_sampling2d_4 (UpSampling2 (None, 492, 492, 128) 0
conv2d_transpose_7 (Conv2DTr (None, 502, 502, 64) 991296
conv2d_transpose_8 (Conv2DTr (None, 512, 512, 1) 7745
Total params: 16,423,745
Trainable params: 16,423,745
Non-trainable params: 0
Please help me. Is this because of Conv2DTranspose() which I have used for decoding?
It's definitly not a problem with model architecture itself (because it working on my side). Seems like problem with your ground truth data. It must have same dimensions as your input image, but flow_from_directory don't provide such ground truth data. I guess you need use your own custom data generator.