how to fit the dimension in the autoencoder of Keras - keras

I am using a convolutional autoencoder for the Mnist image data (with dimension 28*28), here is my code
input_img = Input(shape=(28, 28, 1))
x = Convolution2D(16, (5, 5), activation='relu', padding='same')(input_img)
x = MaxPooling2D((2, 2), padding='same')(x)
x = Convolution2D(8, (3, 3), activation='relu', padding='same')(x)
x = MaxPooling2D((2, 2), padding='same')(x)
x = Convolution2D(8, (3, 3), activation='relu', padding='same')(x)
encoded = MaxPooling2D((2, 2), padding='same')(x)
x = Convolution2D(8, (3, 3), activation='relu', padding='same')(encoded)
x = UpSampling2D((2, 2))(x)
x = Convolution2D(8, (3, 3), activation='relu', padding='same')(x)
x = UpSampling2D((2, 2))(x)
x = Convolution2D(16, (5, 5), activation='relu', padding='same')(x)
x = UpSampling2D((2, 2))(x)
decoded = Convolution2D(1, (3, 3), activation='sigmoid', padding='same')(x)
I get an error message (with padding ='same' at each layer)
ValueError: Error when checking target: expected conv2d_148 to have shape (32, 32, 1) but got array
with shape (28, 28, 1)
Here is my model summary
Layer (type) Output Shape Param #
input_20 (InputLayer) (None, 28, 28, 1) 0
conv2d_142 (Conv2D) (None, 28, 28, 16) 416
max_pooling2d_64 (MaxPooling (None, 14, 14, 16) 0
conv2d_143 (Conv2D) (None, 14, 14, 8) 1160
max_pooling2d_65 (MaxPooling (None, 7, 7, 8) 0
conv2d_144 (Conv2D) (None, 7, 7, 8) 584
max_pooling2d_66 (MaxPooling (None, 4, 4, 8) 0
conv2d_145 (Conv2D) (None, 4, 4, 8) 584
up_sampling2d_64 (UpSampling (None, 8, 8, 8) 0
conv2d_146 (Conv2D) (None, 8, 8, 8) 584
up_sampling2d_65 (UpSampling (None, 16, 16, 8) 0
conv2d_147 (Conv2D) (None, 16, 16, 16) 3216
up_sampling2d_66 (UpSampling (None, 32, 32, 16) 0
conv2d_148 (Conv2D) (None, 32, 32, 1) 145
Total params: 6,689
Trainable params: 6,689
Non-trainable params: 0
I know if I change the first layer to
x = Convolution2D(16, (3, 3), activation='relu', padding='same')(input_img)
It works but I want to use a 5*5 convolution.
How it happens?

You can increase your last filter size to (5, 5) to make this work:
from tensorflow.keras.layers import *
from tensorflow.keras import Model, Input
import numpy as np
input_img = Input(shape=(28, 28, 1))
x = Conv2D(16, (5, 5), activation='relu', padding='same')(input_img)
x = MaxPooling2D((2, 2), padding='same')(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
x = MaxPooling2D((2, 2), padding='same')(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
encoded = MaxPooling2D((2, 2), padding='same')(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(encoded)
x = UpSampling2D((2, 2))(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
x = UpSampling2D((2, 2))(x)
x = Conv2D(16, (5, 5), activation='relu', padding='same')(x)
x = UpSampling2D((2, 2))(x)
decoded = Conv2D(1, (5, 5), activation='sigmoid', padding='valid')(x)
auto = Model(input_img, decoded)
auto.build(input_shape=(1, 28, 28, 1))
auto(np.random.rand(1, 28, 28, 1)).shape
TensorShape([1, 28, 28, 1])
Or, use tf.keras.Conv2DTranspose

Related

Bidirectional LSTM with attention layer tf.keras

I am trying to add attention mechanism to the bellow model.
Is there really a need for CTC loss for attention model.
How could I implement a BLSTM with attention mechanism for an image OCR problem.
def ctc_lambda_func(args):
y_pred, labels, input_length, label_length = args
# the 2 is critical here since the first couple outputs of the RNN
# tend to be garbage:
y_pred = y_pred[:, 2:, :]
return tf.keras.backend.ctc_batch_cost(labels, y_pred, input_length, label_length)
def get_Model(training):
input_shape = (img_w, img_h, 1) # (128, 64, 1)
labels = Input(name='the_labels', shape=[max_text_len], dtype='float32') # (None ,8)
input_length = Input(name='input_length', shape=[1], dtype='int64') # (None, 1)
label_length = Input(name='label_length', shape=[1], dtype='int64') # (None, 1)
# Make Networkw
inputs = Input(name='the_input', shape=input_shape, dtype='float32') # (None, 128, 64, 1)
# Convolution layer (VGG)
inner = Conv2D(64, (3, 3), padding='same', name='conv1', kernel_initializer='he_normal')(inputs) # (None, 128, 64, 64)
inner = BatchNormalization()(inner)
inner = Activation('relu')(inner)
inner = MaxPooling2D(pool_size=(2, 2), name='max1')(inner) # (None,64, 32, 64)
inner = Conv2D(128, (3, 3), padding='same', name='conv2', kernel_initializer='he_normal')(inner) # (None, 64, 32, 128)
inner = BatchNormalization()(inner)
inner = Activation('relu')(inner)
inner = MaxPooling2D(pool_size=(2, 2), name='max2')(inner) # (None, 32, 16, 128)
inner = Conv2D(256, (3, 3), padding='same', name='conv3', kernel_initializer='he_normal')(inner) # (None, 32, 16, 256)
inner = BatchNormalization()(inner)
inner = Activation('relu')(inner)
inner = Conv2D(256, (3, 3), padding='same', name='conv4', kernel_initializer='he_normal')(inner) # (None, 32, 16, 256)
inner = BatchNormalization()(inner)
inner = Activation('relu')(inner)
inner = MaxPooling2D(pool_size=(1, 2), name='max3')(inner) # (None, 32, 8, 256)
inner = Conv2D(512, (3, 3), padding='same', name='conv5', kernel_initializer='he_normal')(inner) # (None, 32, 8, 512)
inner = BatchNormalization()(inner)
inner = Activation('relu')(inner)
inner = Conv2D(512, (3, 3), padding='same', name='conv6')(inner) # (None, 32, 8, 512)
inner = BatchNormalization()(inner)
inner = Activation('relu')(inner)
inner = MaxPooling2D(pool_size=(1, 2), name='max4')(inner) # (None, 32, 4, 512)
inner = Conv2D(512, (2, 2), padding='same', kernel_initializer='he_normal', name='con7')(inner) # (None, 32, 4, 512)
inner = BatchNormalization()(inner)
inner = Activation('relu')(inner)
# CNN to RNN
inner = Reshape(target_shape=((32, 2048)), name='reshape')(inner) # (None, 32, 2048)
inner = Dense(64, activation='relu', kernel_initializer='he_normal', name='dense1')(inner) # (None, 32, 64)
# RNN layer
lstm1 = Bidirectional(LSTM(512, return_sequences=True, kernel_initializer='he_normal'), name='biLSTM1') (inner)
lstm1_norm = BatchNormalization()(lstm1)
lstm2 = Bidirectional(LSTM(512, return_sequences=True, kernel_initializer='he_normal'), name='biLSTM2') (lstm1_norm)
lstm2_norm = BatchNormalization()(lstm2)
# transforms RNN output to character activations:
inner = Dense(num_classes, kernel_initializer='he_normal',name='dense2')(lstm2_norm) #(None, 32, 63)
y_pred = Activation('softmax', name='softmax')(inner)
# Keras doesn't currently support loss funcs with extra parameters
# so CTC loss is implemented in a lambda layer
loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([y_pred, labels, input_length, label_length]) #(None, 1)
if training:
return Model(inputs=[inputs, labels, input_length, label_length], outputs=loss_out)
else:
return Model(inputs=[inputs], outputs=y_pred)```
Is lstm1 is encoder and lstm2 is decoder?
I didn't find any attention implementation using keras functional api and also it seems there is no keras attention layer.

Training a unet model , but model is not learning

I am trying to train a segmentation model, But loss saturates at 0.3370 , i am really not sure what to do, can someone please help
This is the model
def unet(input_shape=(128, 128, 128), optimizer=Adam, initial_learning_rate=5e-4,
loss_function=weighted_dice_coefficient_loss):
inputs = Input(shape=input_shape)
conv1 = UnetConv3D(inputs, 32, is_batchnorm=False, name='conv1')
pool1 = MaxPooling3D(pool_size=(2, 2,2 ))(conv1)
conv2 = UnetConv3D(pool1, 64, is_batchnorm=False, name='conv2')
pool2 = MaxPooling3D(pool_size=(2, 2,2 ))(conv2)
conv3 = UnetConv3D(pool2, 128, is_batchnorm=False, name='conv3')
pool3 = MaxPooling3D(pool_size=(2, 2,2 ))(conv3)
conv4 = UnetConv3D(pool3, 256, is_batchnorm=False, name='conv4')
pool4 = MaxPooling3D(pool_size=(2, 2,2 ))(conv4)
conv5 = Conv3D(512, (3, 3, 3), activation='relu', kernel_initializer=kinit, padding='same', data_format = 'channels_first')(pool4)
conv5 = Conv3D(512, (3, 3, 3), activation='relu', kernel_initializer=kinit, padding='same', data_format = 'channels_first')(conv5)
up6 = concatenate([Conv3DTranspose(256, (2, 2,2 ), strides=(2, 2,2 ), kernel_initializer=kinit, padding='same', data_format = 'channels_first')(conv5), conv4], axis=1)
conv6 = Conv3D(256, (3, 3, 3), activation='relu', padding='same', data_format = 'channels_first')(up6)
conv6 = Conv3D(256, (3, 3, 3), activation='relu', padding='same', data_format = 'channels_first')(conv6)
up7 = concatenate([Conv3DTranspose(128, (2, 2,2 ), strides=(2, 2,2 ), padding='same', data_format = 'channels_first')(conv6), conv3], axis=1)
conv7 = Conv3D(128, (3, 3, 3), activation='relu', kernel_initializer=kinit, padding='same', data_format = 'channels_first')(up7)
conv7 = Conv3D(128, (3, 3, 3), activation='relu', kernel_initializer=kinit, padding='same', data_format = 'channels_first')(conv7)
up8 = concatenate([Conv3DTranspose(64, (2, 2,2 ), strides=(2,2,2 ), kernel_initializer=kinit, padding='same', data_format = 'channels_first')(conv7), conv2], axis=1)
conv8 = Conv3D(64, (3, 3, 3), activation='relu', kernel_initializer=kinit, padding='same', data_format = 'channels_first')(up8)
up9 = concatenate([Conv3DTranspose(32, (2, 2,2 ), strides=(2, 2,2 ), kernel_initializer=kinit, padding='same', data_format = 'channels_first')(conv8), conv1], axis=1)
conv9 = Conv3D(32, (3, 3, 3), activation='relu', kernel_initializer=kinit, padding='same', data_format = 'channels_first')(up9)
conv9 = Conv3D(32, (3, 3, 3), activation='relu', kernel_initializer=kinit, padding='same', data_format = 'channels_first')(conv9)
conv10 = Conv3D(3, (1, 1, 1), activation='relu', kernel_initializer=kinit,padding = 'same', name='final', data_format = 'channels_first')(conv9)
activation_name = 'sigmoid'
activation_block = Activation(activation_name)(conv10)
model = Model(inputs=[inputs], outputs=[activation_block])
model.compile(optimizer=optimizer(), loss=loss_function)
return model
This is the helper function
def UnetConv3D(input, outdim, is_batchnorm, name):
x = Conv3D(outdim, (3, 3, 3), strides=(1, 1, 1), kernel_initializer=kinit, padding="same", name=name+'_1', data_format = 'channels_first')(input)
if is_batchnorm:
x =BatchNormalization(name=name + '_1_bn')(x)
x = Activation('relu',name=name + '_1_act')(x)
x = Conv3D(outdim, (3, 3, 3), strides=(1, 1, 1), kernel_initializer=kinit, padding="same", name=name+'_2', data_format = 'channels_first')(x)
if is_batchnorm:
x = BatchNormalization(name=name + '_2_bn')(x)
x = Activation('relu', name=name + '_2_act')(x)
return x
And this is the loss function --
def weighted_dice_coefficient(y_true, y_pred, axis=(-3, -2, -1), smooth=0.00001):
"""
Weighted dice coefficient. Default axis assumes a "channels first" data structure
:param smooth:
:param y_true:
:param y_pred:
:param axis:
:return:
"""
return K.mean(2. * (K.sum(y_true * y_pred,
axis=axis) + smooth/2)/(K.sum(y_true,
axis=axis) + K.sum(y_pred,
axis=axis) + smooth))
My input is (128,128,128), am i doing an obvious mistake? Please let me know if more info needed.
Model summary
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_1 (InputLayer) (None, 1, 128, 128, 0
__________________________________________________________________________________________________
conv1_1 (Conv3D) (None, 32, 128, 128, 896 input_1[0][0]
__________________________________________________________________________________________________
conv1_1_act (Activation) (None, 32, 128, 128, 0 conv1_1[0][0]
__________________________________________________________________________________________________
conv1_2 (Conv3D) (None, 32, 128, 128, 27680 conv1_1_act[0][0]
__________________________________________________________________________________________________
conv1_2_act (Activation) (None, 32, 128, 128, 0 conv1_2[0][0]
__________________________________________________________________________________________________
max_pooling3d_1 (MaxPooling3D) (None, 32, 64, 64, 6 0 conv1_2_act[0][0]
__________________________________________________________________________________________________
conv2_1 (Conv3D) (None, 64, 64, 64, 6 55360 max_pooling3d_1[0][0]
__________________________________________________________________________________________________
conv2_1_act (Activation) (None, 64, 64, 64, 6 0 conv2_1[0][0]
__________________________________________________________________________________________________
conv2_2 (Conv3D) (None, 64, 64, 64, 6 110656 conv2_1_act[0][0]
__________________________________________________________________________________________________
conv2_2_act (Activation) (None, 64, 64, 64, 6 0 conv2_2[0][0]
__________________________________________________________________________________________________
max_pooling3d_2 (MaxPooling3D) (None, 64, 32, 32, 3 0 conv2_2_act[0][0]
__________________________________________________________________________________________________
conv3_1 (Conv3D) (None, 128, 32, 32, 221312 max_pooling3d_2[0][0]
__________________________________________________________________________________________________
conv3_1_act (Activation) (None, 128, 32, 32, 0 conv3_1[0][0]
__________________________________________________________________________________________________
conv3_2 (Conv3D) (None, 128, 32, 32, 442496 conv3_1_act[0][0]
__________________________________________________________________________________________________
conv3_2_act (Activation) (None, 128, 32, 32, 0 conv3_2[0][0]
__________________________________________________________________________________________________
max_pooling3d_3 (MaxPooling3D) (None, 128, 16, 16, 0 conv3_2_act[0][0]
__________________________________________________________________________________________________
conv4_1 (Conv3D) (None, 256, 16, 16, 884992 max_pooling3d_3[0][0]
__________________________________________________________________________________________________
conv4_1_act (Activation) (None, 256, 16, 16, 0 conv4_1[0][0]
__________________________________________________________________________________________________
conv4_2 (Conv3D) (None, 256, 16, 16, 1769728 conv4_1_act[0][0]
__________________________________________________________________________________________________
conv4_2_act (Activation) (None, 256, 16, 16, 0 conv4_2[0][0]
__________________________________________________________________________________________________
max_pooling3d_4 (MaxPooling3D) (None, 256, 8, 8, 8) 0 conv4_2_act[0][0]
__________________________________________________________________________________________________
conv3d_1 (Conv3D) (None, 512, 8, 8, 8) 3539456 max_pooling3d_4[0][0]
__________________________________________________________________________________________________
conv3d_2 (Conv3D) (None, 512, 8, 8, 8) 7078400 conv3d_1[0][0]
__________________________________________________________________________________________________
conv3d_transpose_1 (Conv3DTrans (None, 256, 16, 16, 1048832 conv3d_2[0][0]
__________________________________________________________________________________________________
concatenate_1 (Concatenate) (None, 512, 16, 16, 0 conv3d_transpose_1[0][0]
conv4_2_act[0][0]
__________________________________________________________________________________________________
conv3d_3 (Conv3D) (None, 256, 16, 16, 3539200 concatenate_1[0][0]
__________________________________________________________________________________________________
conv3d_4 (Conv3D) (None, 256, 16, 16, 1769728 conv3d_3[0][0]
__________________________________________________________________________________________________
conv3d_transpose_2 (Conv3DTrans (None, 128, 32, 32, 262272 conv3d_4[0][0]
__________________________________________________________________________________________________
concatenate_2 (Concatenate) (None, 256, 32, 32, 0 conv3d_transpose_2[0][0]
conv3_2_act[0][0]
__________________________________________________________________________________________________
conv3d_5 (Conv3D) (None, 128, 32, 32, 884864 concatenate_2[0][0]
__________________________________________________________________________________________________
conv3d_6 (Conv3D) (None, 128, 32, 32, 442496 conv3d_5[0][0]
__________________________________________________________________________________________________
conv3d_transpose_3 (Conv3DTrans (None, 64, 64, 64, 6 65600 conv3d_6[0][0]
__________________________________________________________________________________________________
concatenate_3 (Concatenate) (None, 128, 64, 64, 0 conv3d_transpose_3[0][0]
conv2_2_act[0][0]
__________________________________________________________________________________________________
conv3d_7 (Conv3D) (None, 64, 64, 64, 6 221248 concatenate_3[0][0]
__________________________________________________________________________________________________
conv3d_transpose_4 (Conv3DTrans (None, 32, 128, 128, 16416 conv3d_7[0][0]
__________________________________________________________________________________________________
concatenate_4 (Concatenate) (None, 64, 128, 128, 0 conv3d_transpose_4[0][0]
conv1_2_act[0][0]
__________________________________________________________________________________________________
conv3d_8 (Conv3D) (None, 32, 128, 128, 55328 concatenate_4[0][0]
__________________________________________________________________________________________________
conv3d_9 (Conv3D) (None, 32, 128, 128, 27680 conv3d_8[0][0]
__________________________________________________________________________________________________
final (Conv3D) (None, 3, 128, 128, 99 conv3d_9[0][0]
__________________________________________________________________________________________________
activation_1 (Activation) (None, 3, 128, 128, 0 final[0][0]
==================================================================================================
Thanks in advance

Trying to perform transposed convolution but missing a pixel

def get_unet(input_img, n_filters=16, dropout=0.5, batchnorm=True):
# contracting path
c1 = conv2d_block(input_img, n_filters=n_filters * 1, kernel_size=3, batchnorm=batchnorm)
p1 = MaxPooling2D((2, 2))(c1)
p1 = Dropout(dropout * 0.5)(p1)
c2 = conv2d_block(p1, n_filters=n_filters * 2, kernel_size=3, batchnorm=batchnorm)
p2 = MaxPooling2D((2, 2))(c2)
p2 = Dropout(dropout)(p2)
c3 = conv2d_block(p2, n_filters=n_filters * 4, kernel_size=3, batchnorm=batchnorm)
p3 = MaxPooling2D((2, 2))(c3)
p3 = Dropout(dropout)(p3)
c4 = conv2d_block(p3, n_filters=n_filters * 8, kernel_size=3, batchnorm=batchnorm)
p4 = MaxPooling2D(pool_size=(2, 2))(c4)
p4 = Dropout(dropout)(p4)
c5 = conv2d_block(p4, n_filters=n_filters * 16, kernel_size=3, batchnorm=batchnorm)
# expansive path
u6 = Conv2DTranspose(n_filters * 8, (3, 3), strides=(2, 2), padding='same')(c5)
u6 = concatenate([u6, c4])
u6 = Dropout(dropout)(u6)
c6 = conv2d_block(u6, n_filters=n_filters * 8, kernel_size=3, batchnorm=batchnorm)
u7 = Conv2DTranspose(n_filters * 4, (3, 3), strides=(2, 2), padding='same')(c6)
u7 = concatenate([u7, c3])
u7 = Dropout(dropout)(u7)
c7 = conv2d_block(u7, n_filters=n_filters * 4, kernel_size=3, batchnorm=batchnorm)
u8 = Conv2DTranspose(n_filters * 2, (3, 3), strides=(2, 2), padding='same')(c7)
u8 = concatenate([u8, c2])
u8 = Dropout(dropout)(u8)
c8 = conv2d_block(u8, n_filters=n_filters * 2, kernel_size=3, batchnorm=batchnorm)
u9 = Conv2DTranspose(n_filters * 1, (3, 3), strides=(2, 2), padding='same')(c8)
u9 = concatenate([u9, c1], axis=3)
u9 = Dropout(dropout)(u9)
c9 = conv2d_block(u9, n_filters=n_filters * 1, kernel_size=3, batchnorm=batchnorm)
outputs = Conv2D(1, (1, 1), activation='sigmoid')(c9)
model = Model(inputs=[input_img], outputs=[outputs])
return model
I got this model for Keras from here. I seem to be getting the error:
File "train.py", line 87, in get_unet
u8 = concatenate([u8, c2])
ValueError: A `Concatenate` layer requires inputs with matching shapes except for the concat axis. Got inputs shapes: [(None, 256, 184, 32), (None, 256, 185, 32)]
So I printed the values of each of these Tensors, and I got:
c1: Tensor("activation_2/Relu:0", shape=(?, 512, 370, 16), dtype=float32)
c2: Tensor("activation_4/Relu:0", shape=(?, 256, 185, 32), dtype=float32)
c3: Tensor("activation_6/Relu:0", shape=(?, 128, 92, 64), dtype=float32)
c4: Tensor("activation_8/Relu:0", shape=(?, 64, 46, 128), dtype=float32)
c5: Tensor("activation_10/Relu:0", shape=(?, 32, 23, 256), dtype=float32)
u6: Tensor("dropout_5/cond/Merge:0", shape=(?, 64, 46, 256), dtype=float32)
u7: Tensor("dropout_6/cond/Merge:0", shape=(?, 128, 92, 128), dtype=float32)
u8: Tensor("conv2d_transpose_3/BiasAdd:0", shape=(?, ?, ?, 32), dtype=float32)
What happened at C2? Why is the second dimension of u8 184, while the second dimension of C2 seems to be 185. Furthermore, C3s second dimension seems to to be maxpooled by a factor of 2 from 184 (probably due to a floor function)
How would I combat this? Do I have to change the size of the images that are being inputted, or do I have to engineer something while doing the transpose convolution? Do I need to perform interpolation for the one extra pixel?
That's happening because your second dimension is not even when you divide it by 2 in your C2 layer.
You are maxpooling 185 by a factor of 2, which gives you 92.5 -> floor to 92
But when you do the operation in the other way, you are upsampling 92 by a factor of 2 which gives you 184.
To avoid this you can simply zeropad U8 to be compatible with C2, like this :
u8 = Conv2DTranspose(n_filters * 2, (3, 3), strides=(2, 2), padding='same')(c7)
u8 = ZeroPadding2D(padding=((0, 0), (0, 1)))(u8)
u8 = concatenate([u8, c2])
If you don't want to zeropad, you can reshape your input images in order to have a dimension corresponding to a power of 2 or a dimension that can be divided by two multiple times without giving an odd number, like 224 (can be divided by two 5 times before giving 7).
Hope that will help you !

Cropping in the very last layer in autoencoder in keras

I have images of shape 391 x 400. I attempted to use the autoencoder as described here.
Specifically, I have used the following code:
from keras.layers import Input, Dense, Conv2D, MaxPooling2D, UpSampling2D
from keras.models import Model
from keras import backend as K
input_img = Input(shape=(391, 400, 1)) # adapt this if using `channels_first` image data format
x = Conv2D(16, (3, 3), activation='relu', padding='same')(input_img)
x = MaxPooling2D((2, 2), padding='same')(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
x = MaxPooling2D((2, 2), padding='same')(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
encoded = MaxPooling2D((2, 2), padding='same')(x)
# at this point the representation is (4, 4, 8) i.e. 128-dimensional
x = Conv2D(8, (3, 3), activation='relu', padding='same')(encoded)
x = UpSampling2D((2, 2))(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
x = UpSampling2D((2, 2))(x)
x = Conv2D(16, (3, 3), activation='relu', padding='same')(x)
x = UpSampling2D((2, 2))(x)
decoded = Conv2D(1, (3, 3), activation='sigmoid', padding='same')(x)
autoencoder = Model(input_img, decoded)
autoencoder.compile(optimizer='adadelta', loss='binary_crossentropy')
I am getting the following:
ValueError: Error when checking target: expected conv2d_37 to have shape (None, 392, 400, 1) but got array with shape (500, 391, 400, 1)
What I need: a layer that would drop/crop/reshape the last layer from 392 x 400 to 391 x 400.
Thank you for any help.
There's a layer called Cropping2D. To crop the last layer from 392 x 400 to 391 x 400, you can use it by:
cropped = Cropping2D(cropping=((1, 0), (0, 0)))(decoded)
autoencoder = Model(input_img, cropped)
The tuple ((1, 0), (0, 0)) means to crop 1 row from the top. If you want to crop from bottom, use ((0, 1), (0, 0)) instead. You can see the documentation for more detailed description about the cropping argument.

Getting error while running convolutional autoencoder in keras

I am getting error while running the following code in keras
Traceback (most recent call last):
File "my_conv_ae.py", line 74, in <module>
validation_steps = nb_validation_samples // batch_size)
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python35\lib\site-packages\keras\legacy\interfaces.py", line 88, in wrapper
return func(*args, **kwargs)
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python35\lib\site-packages\keras\engine\training.py", line 1890, in fit_generator
class_weight=class_weight)
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python35\lib\site-packages\keras\engine\training.py", line 1627, in train_on_batch
check_batch_axis=True)
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python35\lib\site-packages\keras\engine\training.py", line 1309, in _standardize_user_data
exception_prefix='target')
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python35\lib\site-packages\keras\engine\training.py", line 127, in _standardize_input_data
str(array.shape))
ValueError: Error when checking target: expected conv2d_transpose_8 to have 4 dimensions, but got array with shape (20, 1)
The code is:
import keras
from keras.layers import Input, Dense, Conv2D, MaxPooling2D, UpSampling2D, Conv2DTranspose
from keras.models import Model
from keras import backend as K
from keras.preprocessing.image import ImageDataGenerator
import numpy as np
input_img = Input(shape=(512, 512, 1))
nb_train_samples = 1700
nb_validation_samples = 420
epochs = 10
batch_size = 20
x = Conv2D(64, (11, 11), activation='relu', strides= 1, padding='valid', kernel_initializer='glorot_uniform')(input_img)
x = Conv2D(64, (11, 11), activation='relu', strides= 1, padding='valid', kernel_initializer='glorot_uniform')(x)
x = MaxPooling2D((2, 2))(x)
x = Conv2D(128, (7, 7), activation='relu', strides= 1, padding='valid', kernel_initializer='glorot_uniform')(x)
x = Conv2D(128, (5, 5), activation='relu', strides= 1, padding='valid', kernel_initializer='glorot_uniform')(x)
x = MaxPooling2D((2, 2))(x)
x = Conv2D(256, (5, 5), activation='relu', strides= 1, padding='valid', kernel_initializer='glorot_uniform')(x)
x = Conv2D(256, (3, 3), activation='relu', strides= 1, padding='valid', kernel_initializer='glorot_uniform')(x)
x = MaxPooling2D((2, 2))(x)
x = Conv2D(512, (3, 3), activation='relu', strides= 1, padding='valid', kernel_initializer='glorot_uniform')(x)
x = Conv2D(512, (3, 3), activation='relu', strides= 1, padding='valid', kernel_initializer='glorot_uniform')(x)
encoded = MaxPooling2D((2, 2))(x)
print (K.int_shape(encoded))
at this point the representation is (26, 26, 512)
x = UpSampling2D((2, 2))(encoded)
x = Conv2DTranspose(512, (3, 3), activation='relu', strides= 1, padding='valid', kernel_initializer='glorot_uniform')(x)
x = Conv2DTranspose(512, (3, 3), activation='relu', strides= 1, padding='valid', kernel_initializer='glorot_uniform')(x)
x = UpSampling2D((2, 2))(x)
x = Conv2DTranspose(256, (3, 3), activation='relu', strides= 1, padding='valid', kernel_initializer='glorot_uniform')(x)
x = Conv2DTranspose(256, (5, 5), activation='relu', strides= 1, padding='valid', kernel_initializer='glorot_uniform')(x)
x = UpSampling2D((2, 2))(x)
x = Conv2DTranspose(128, (5, 5), activation='relu', strides= 1, padding='valid', kernel_initializer='glorot_uniform')(x)
x = Conv2DTranspose(128, (7, 7), activation='relu', strides= 1, padding='valid', kernel_initializer='glorot_uniform')(x)
x = UpSampling2D((2, 2))(x)
x = Conv2DTranspose(64, (11, 11), activation='relu', strides= 1, padding='valid', kernel_initializer='glorot_uniform')(x)
decoded = Conv2DTranspose(1, (11, 11), activation='relu', strides= 1, padding='valid', kernel_initializer='glorot_uniform')(x)
print (K.int_shape(decoded))
autoencoder = Model(input_img, decoded)
autoencoder.compile(optimizer = 'adadelta', loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'])
train_datagen = ImageDataGenerator(
rescale=1./255,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True)
test_datagen = ImageDataGenerator(rescale=1./255)
x_train = train_datagen.flow_from_directory(
'data/train',
target_size = (512, 512), color_mode = 'grayscale',
batch_size = batch_size,
class_mode = 'binary')
x_test = test_datagen.flow_from_directory(
'data/validation',
target_size = (512, 512), color_mode = 'grayscale',
batch_size = batch_size,
class_mode = 'binary')
autoencoder.fit_generator(
x_train,
steps_per_epoch = nb_train_samples // batch_size,
epochs = epochs,
validation_data = x_test,
validation_steps = nb_validation_samples // batch_size)
decoded_imgs = autoencoder.predict(x_test)
Summary of model is as follows:
Layer (type) Output Shape Param #
=================================================================
input_1 (InputLayer) (None, 512, 512, 1) 0
_________________________________________________________________
conv2d_1 (Conv2D) (None, 502, 502, 64) 7808
_________________________________________________________________
conv2d_2 (Conv2D) (None, 492, 492, 64) 495680
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 246, 246, 64) 0
_________________________________________________________________
conv2d_3 (Conv2D) (None, 240, 240, 128) 401536
_________________________________________________________________
conv2d_4 (Conv2D) (None, 236, 236, 128) 409728
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 118, 118, 128) 0
_________________________________________________________________
conv2d_5 (Conv2D) (None, 114, 114, 256) 819456
_________________________________________________________________
conv2d_6 (Conv2D) (None, 112, 112, 256) 590080
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 56, 56, 256) 0
_________________________________________________________________
conv2d_7 (Conv2D) (None, 54, 54, 512) 1180160
_________________________________________________________________
conv2d_8 (Conv2D) (None, 52, 52, 512) 2359808
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 26, 26, 512) 0
_________________________________________________________________
up_sampling2d_1 (UpSampling2 (None, 52, 52, 512) 0
_________________________________________________________________
conv2d_transpose_1 (Conv2DTr (None, 54, 54, 512) 2359808
_________________________________________________________________
conv2d_transpose_2 (Conv2DTr (None, 56, 56, 512) 2359808
_________________________________________________________________
up_sampling2d_2 (UpSampling2 (None, 112, 112, 512) 0
_________________________________________________________________
conv2d_transpose_3 (Conv2DTr (None, 114, 114, 256) 1179904
_________________________________________________________________
conv2d_transpose_4 (Conv2DTr (None, 118, 118, 256) 1638656
_________________________________________________________________
up_sampling2d_3 (UpSampling2 (None, 236, 236, 256) 0
_________________________________________________________________
conv2d_transpose_5 (Conv2DTr (None, 240, 240, 128) 819328
_________________________________________________________________
conv2d_transpose_6 (Conv2DTr (None, 246, 246, 128) 802944
_________________________________________________________________
up_sampling2d_4 (UpSampling2 (None, 492, 492, 128) 0
_________________________________________________________________
conv2d_transpose_7 (Conv2DTr (None, 502, 502, 64) 991296
_________________________________________________________________
conv2d_transpose_8 (Conv2DTr (None, 512, 512, 1) 7745
=================================================================
Total params: 16,423,745
Trainable params: 16,423,745
Non-trainable params: 0
_________________________________________________________________
Please help me. Is this because of Conv2DTranspose() which I have used for decoding?
It's definitly not a problem with model architecture itself (because it working on my side). Seems like problem with your ground truth data. It must have same dimensions as your input image, but flow_from_directory don't provide such ground truth data. I guess you need use your own custom data generator.

Resources