I have recently started learning about Image Segmentation and UNet. I am trying to do a multi class Image Segmentation where I have 7 classes and input is a (256, 256, 3) rgb image and output is (256, 256, 1) grayscale image where each intensity value corresponds to one class. I am doing pixel wise softmax. I am using sparse categorical cross entropy so as to avoid doing One Hot Encoding.
def soft1(x):
return keras.activations.softmax(x, axis = -1)
def conv2d_block(input_tensor, n_filters, kernel_size = 3, batchnorm = True):
x = Conv2D(filters = n_filters, kernel_size = (kernel_size, kernel_size),\
kernel_initializer = 'he_normal', padding = 'same')(input_tensor)
if batchnorm:
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = Conv2D(filters = n_filters, kernel_size = (kernel_size, kernel_size),\
kernel_initializer = 'he_normal', padding = 'same')(input_tensor)
if batchnorm:
x = BatchNormalization()(x)
x = Activation('relu')(x)
return x
def get_unet(input_img, n_classes, n_filters = 16, dropout = 0.1, batchnorm = True):
# Contracting Path
c1 = conv2d_block(input_img, n_filters * 1, kernel_size = 3, batchnorm = batchnorm)
p1 = MaxPooling2D((2, 2))(c1)
p1 = Dropout(dropout)(p1)
c2 = conv2d_block(p1, n_filters * 2, kernel_size = 3, batchnorm = batchnorm)
p2 = MaxPooling2D((2, 2))(c2)
p2 = Dropout(dropout)(p2)
c3 = conv2d_block(p2, n_filters * 4, kernel_size = 3, batchnorm = batchnorm)
p3 = MaxPooling2D((2, 2))(c3)
p3 = Dropout(dropout)(p3)
c4 = conv2d_block(p3, n_filters * 8, kernel_size = 3, batchnorm = batchnorm)
p4 = MaxPooling2D((2, 2))(c4)
p4 = Dropout(dropout)(p4)
c5 = conv2d_block(p4, n_filters = n_filters * 16, kernel_size = 3, batchnorm = batchnorm)
# Expansive Path
u6 = Conv2DTranspose(n_filters * 8, (3, 3), strides = (2, 2), padding = 'same')(c5)
u6 = concatenate([u6, c4])
u6 = Dropout(dropout)(u6)
c6 = conv2d_block(u6, n_filters * 8, kernel_size = 3, batchnorm = batchnorm)
u7 = Conv2DTranspose(n_filters * 4, (3, 3), strides = (2, 2), padding = 'same')(c6)
u7 = concatenate([u7, c3])
u7 = Dropout(dropout)(u7)
c7 = conv2d_block(u7, n_filters * 4, kernel_size = 3, batchnorm = batchnorm)
u8 = Conv2DTranspose(n_filters * 2, (3, 3), strides = (2, 2), padding = 'same')(c7)
u8 = concatenate([u8, c2])
u8 = Dropout(dropout)(u8)
c8 = conv2d_block(u8, n_filters * 2, kernel_size = 3, batchnorm = batchnorm)
u9 = Conv2DTranspose(n_filters * 1, (3, 3), strides = (2, 2), padding = 'same')(c8)
u9 = concatenate([u9, c1])
u9 = Dropout(dropout)(u9)
c9 = conv2d_block(u9, n_filters * 1, kernel_size = 3, batchnorm = batchnorm)
outputs = Conv2D(n_classes, (1, 1))(c9)
outputs = Reshape((image_height*image_width, 1, n_classes), input_shape = (image_height, image_width, n_classes))(outputs)
outputs = Activation(soft1)(outputs)
model = Model(inputs=[input_img], outputs=[outputs])
print(outputs.shape)
return model
My Model Summary is:
Model: "model_2"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_12 (InputLayer) (None, 256, 256, 3) 0
__________________________________________________________________________________________________
conv2d_211 (Conv2D) (None, 256, 256, 16) 448 input_12[0][0]
__________________________________________________________________________________________________
batch_normalization_200 (BatchN (None, 256, 256, 16) 64 conv2d_211[0][0]
__________________________________________________________________________________________________
activation_204 (Activation) (None, 256, 256, 16) 0 batch_normalization_200[0][0]
__________________________________________________________________________________________________
max_pooling2d_45 (MaxPooling2D) (None, 128, 128, 16) 0 activation_204[0][0]
__________________________________________________________________________________________________
dropout_89 (Dropout) (None, 128, 128, 16) 0 max_pooling2d_45[0][0]
__________________________________________________________________________________________________
conv2d_213 (Conv2D) (None, 128, 128, 32) 4640 dropout_89[0][0]
__________________________________________________________________________________________________
batch_normalization_202 (BatchN (None, 128, 128, 32) 128 conv2d_213[0][0]
__________________________________________________________________________________________________
activation_206 (Activation) (None, 128, 128, 32) 0 batch_normalization_202[0][0]
__________________________________________________________________________________________________
max_pooling2d_46 (MaxPooling2D) (None, 64, 64, 32) 0 activation_206[0][0]
__________________________________________________________________________________________________
dropout_90 (Dropout) (None, 64, 64, 32) 0 max_pooling2d_46[0][0]
__________________________________________________________________________________________________
conv2d_215 (Conv2D) (None, 64, 64, 64) 18496 dropout_90[0][0]
__________________________________________________________________________________________________
batch_normalization_204 (BatchN (None, 64, 64, 64) 256 conv2d_215[0][0]
__________________________________________________________________________________________________
activation_208 (Activation) (None, 64, 64, 64) 0 batch_normalization_204[0][0]
__________________________________________________________________________________________________
max_pooling2d_47 (MaxPooling2D) (None, 32, 32, 64) 0 activation_208[0][0]
__________________________________________________________________________________________________
dropout_91 (Dropout) (None, 32, 32, 64) 0 max_pooling2d_47[0][0]
__________________________________________________________________________________________________
conv2d_217 (Conv2D) (None, 32, 32, 128) 73856 dropout_91[0][0]
__________________________________________________________________________________________________
batch_normalization_206 (BatchN (None, 32, 32, 128) 512 conv2d_217[0][0]
__________________________________________________________________________________________________
activation_210 (Activation) (None, 32, 32, 128) 0 batch_normalization_206[0][0]
__________________________________________________________________________________________________
max_pooling2d_48 (MaxPooling2D) (None, 16, 16, 128) 0 activation_210[0][0]
__________________________________________________________________________________________________
dropout_92 (Dropout) (None, 16, 16, 128) 0 max_pooling2d_48[0][0]
__________________________________________________________________________________________________
conv2d_219 (Conv2D) (None, 16, 16, 256) 295168 dropout_92[0][0]
__________________________________________________________________________________________________
batch_normalization_208 (BatchN (None, 16, 16, 256) 1024 conv2d_219[0][0]
__________________________________________________________________________________________________
activation_212 (Activation) (None, 16, 16, 256) 0 batch_normalization_208[0][0]
__________________________________________________________________________________________________
conv2d_transpose_45 (Conv2DTran (None, 32, 32, 128) 295040 activation_212[0][0]
__________________________________________________________________________________________________
concatenate_45 (Concatenate) (None, 32, 32, 256) 0 conv2d_transpose_45[0][0]
activation_210[0][0]
__________________________________________________________________________________________________
dropout_93 (Dropout) (None, 32, 32, 256) 0 concatenate_45[0][0]
__________________________________________________________________________________________________
conv2d_221 (Conv2D) (None, 32, 32, 128) 295040 dropout_93[0][0]
__________________________________________________________________________________________________
batch_normalization_210 (BatchN (None, 32, 32, 128) 512 conv2d_221[0][0]
__________________________________________________________________________________________________
activation_214 (Activation) (None, 32, 32, 128) 0 batch_normalization_210[0][0]
__________________________________________________________________________________________________
conv2d_transpose_46 (Conv2DTran (None, 64, 64, 64) 73792 activation_214[0][0]
__________________________________________________________________________________________________
concatenate_46 (Concatenate) (None, 64, 64, 128) 0 conv2d_transpose_46[0][0]
activation_208[0][0]
__________________________________________________________________________________________________
dropout_94 (Dropout) (None, 64, 64, 128) 0 concatenate_46[0][0]
__________________________________________________________________________________________________
conv2d_223 (Conv2D) (None, 64, 64, 64) 73792 dropout_94[0][0]
__________________________________________________________________________________________________
batch_normalization_212 (BatchN (None, 64, 64, 64) 256 conv2d_223[0][0]
__________________________________________________________________________________________________
activation_216 (Activation) (None, 64, 64, 64) 0 batch_normalization_212[0][0]
__________________________________________________________________________________________________
conv2d_transpose_47 (Conv2DTran (None, 128, 128, 32) 18464 activation_216[0][0]
__________________________________________________________________________________________________
concatenate_47 (Concatenate) (None, 128, 128, 64) 0 conv2d_transpose_47[0][0]
activation_206[0][0]
__________________________________________________________________________________________________
dropout_95 (Dropout) (None, 128, 128, 64) 0 concatenate_47[0][0]
__________________________________________________________________________________________________
conv2d_225 (Conv2D) (None, 128, 128, 32) 18464 dropout_95[0][0]
__________________________________________________________________________________________________
batch_normalization_214 (BatchN (None, 128, 128, 32) 128 conv2d_225[0][0]
__________________________________________________________________________________________________
activation_218 (Activation) (None, 128, 128, 32) 0 batch_normalization_214[0][0]
__________________________________________________________________________________________________
conv2d_transpose_48 (Conv2DTran (None, 256, 256, 16) 4624 activation_218[0][0]
__________________________________________________________________________________________________
concatenate_48 (Concatenate) (None, 256, 256, 32) 0 conv2d_transpose_48[0][0]
activation_204[0][0]
__________________________________________________________________________________________________
dropout_96 (Dropout) (None, 256, 256, 32) 0 concatenate_48[0][0]
__________________________________________________________________________________________________
conv2d_227 (Conv2D) (None, 256, 256, 16) 4624 dropout_96[0][0]
__________________________________________________________________________________________________
batch_normalization_216 (BatchN (None, 256, 256, 16) 64 conv2d_227[0][0]
__________________________________________________________________________________________________
activation_220 (Activation) (None, 256, 256, 16) 0 batch_normalization_216[0][0]
__________________________________________________________________________________________________
conv2d_228 (Conv2D) (None, 256, 256, 7) 119 activation_220[0][0]
__________________________________________________________________________________________________
reshape_12 (Reshape) (None, 65536, 1, 7) 0 conv2d_228[0][0]
__________________________________________________________________________________________________
activation_221 (Activation) (None, 65536, 1, 7) 0 reshape_12[0][0]
==================================================================================================
Total params: 1,179,511
Trainable params: 1,178,039
Non-trainable params: 1,472
__________________________________________________________________________________________________
Is my model right? Shouldn't the final output be (65536, 1, 1) as I am using softmax?
The code is compiling but dice coefficient is very low.
Your model should end in (256,256,7).
That is 7 classes per pixel, and the shape should agree with your output images that are (256,256,1). This will work only for 'sparse_categorical_crossentropy' or a custom loss.
So, up to conv_228 the model seems fine (didn't look in detail, though).
There is no need for anything that comes after this convolution.
You can place the softmax directly in the conv_228 or directly after.
y_train should be (256,256,1) for this.
Your output in fact represents its pixel of your image. For its pixel, you have as an output of 1x7. Since it is sigmoid the values that this representation takes are between 0-1. Therefore the output fires when you have the desired class and therefore segmentation. If it was (65536, 1, 1) you should have not categorical but dense representation.
Related
I am trying to build 3D Resnet for small 3D patches of size [32,32,44] with one channel. In 2D Resnet, after each residual block, the size of images should reduce to half and the number of feature maps doubles as shown below
# function for creating an identity or projection residual module
def residual_module(layer_in, n_filters):
merge_input = layer_in
# check if the number of filters needs to be increase, assumes channels last format
if layer_in.shape[-1] != n_filters:
merge_input = Conv2D(n_filters, (1,1), padding='same', activation='relu', kernel_initializer='he_normal')(layer_in)
# conv1
conv1 = Conv2D(n_filters, (3,3), padding='same', activation='relu', kernel_initializer='he_normal')(layer_in)
# conv2
conv2 = Conv2D(n_filters, (3,3), padding='same', activation='linear', kernel_initializer='he_normal')(conv1)
# add filters, assumes filters/channels last
layer_out = add([conv2, merge_input])
# activation function
layer_out = Activation('relu')(layer_out)
return layer_out
# define model input
visible = Input(shape=(256, 256, 1))
layer = residual_module(visible,64)
layer_1 = residual_module(layer,128)
# create model
model = Model(inputs=visible, outputs=layer_1)
# summarize model
model.summary()
Result:
Model: "model_44"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_68 (InputLayer) [(None, 256, 256, 1) 0
__________________________________________________________________________________________________
conv2d_40 (Conv2D) (None, 256, 256, 64) 640 input_68[0][0]
__________________________________________________________________________________________________
conv2d_41 (Conv2D) (None, 256, 256, 64) 36928 conv2d_40[0][0]
__________________________________________________________________________________________________
conv2d_39 (Conv2D) (None, 256, 256, 64) 128 input_68[0][0]
__________________________________________________________________________________________________
add_207 (Add) (None, 256, 256, 64) 0 conv2d_41[0][0]
conv2d_39[0][0]
__________________________________________________________________________________________________
activation_52 (Activation) (None, 256, 256, 64) 0 add_207[0][0]
__________________________________________________________________________________________________
conv2d_43 (Conv2D) (None, 256, 256, 128 73856 activation_52[0][0]
__________________________________________________________________________________________________
conv2d_44 (Conv2D) (None, 256, 256, 128 147584 conv2d_43[0][0]
__________________________________________________________________________________________________
conv2d_42 (Conv2D) (None, 256, 256, 128 8320 activation_52[0][0]
__________________________________________________________________________________________________
add_208 (Add) (None, 256, 256, 128 0 conv2d_44[0][0]
conv2d_42[0][0]
__________________________________________________________________________________________________
activation_53 (Activation) (None, 256, 256, 128 0 add_208[0][0]
==================================================================================================
Total params: 267,456
Trainable params: 267,456
Non-trainable params: 0
However, adapting this code for 3D Resnet does not double the number of feature maps. As it can be seen in the below example that after first residual block channel dimension is still 1 and changes to 3 in the second block
def residual_module(layer_in, n_filters):
merge_input = layer_in
# check if the number of filters needs to be increase, assumes channels last format
if layer_in.shape[-1] != n_filters:
merge_input = Conv3D(n_filters, (1,1,1), padding='same', activation='relu', kernel_initializer='he_normal')(layer_in)
# conv1
conv1 = Conv3D(n_filters, (3,3,3), padding='same', activation='relu', kernel_initializer='he_normal')(layer_in)
# conv2
conv2 = Conv3D(n_filters, (3,3,3), padding='same', activation='linear', kernel_initializer='he_normal')(conv1)
# add filters, assumes filters/channels last
layer_out = add([conv2, merge_input])
# activation function
layer_out = Activation('relu')(layer_out)
return layer_out
# define model input
visible = Input(shape=(32,32,32,1))
layer = residual_module(visible,16)
layer_1 = residual_module(layer,32)
# create model
model = Model(inputs=visible, outputs=layer_1)
# summarize model
model.summary()
Result:
Model: "model_45"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_69 (InputLayer) [(None, 32, 32, 32, 0
__________________________________________________________________________________________________
conv3d_519 (Conv3D) (None, 32, 32, 32, 1 448 input_69[0][0]
__________________________________________________________________________________________________
conv3d_520 (Conv3D) (None, 32, 32, 32, 1 6928 conv3d_519[0][0]
__________________________________________________________________________________________________
conv3d_518 (Conv3D) (None, 32, 32, 32, 1 32 input_69[0][0]
__________________________________________________________________________________________________
add_209 (Add) (None, 32, 32, 32, 1 0 conv3d_520[0][0]
conv3d_518[0][0]
__________________________________________________________________________________________________
activation_54 (Activation) (None, 32, 32, 32, 1 0 add_209[0][0]
__________________________________________________________________________________________________
conv3d_522 (Conv3D) (None, 32, 32, 32, 3 13856 activation_54[0][0]
__________________________________________________________________________________________________
conv3d_523 (Conv3D) (None, 32, 32, 32, 3 27680 conv3d_522[0][0]
__________________________________________________________________________________________________
conv3d_521 (Conv3D) (None, 32, 32, 32, 3 544 activation_54[0][0]
__________________________________________________________________________________________________
add_210 (Add) (None, 32, 32, 32, 3 0 conv3d_523[0][0]
conv3d_521[0][0]
__________________________________________________________________________________________________
activation_55 (Activation) (None, 32, 32, 32, 3 0 add_210[0][0]
==================================================================================================
Total params: 49,488
Trainable params: 49,488
Non-trainable params: 0
_______________________________________________________________________________________________
What am I missing here?
Well, I figured out that the code is okay, except that the lines in model.summary() were truncated, so in fact feature maps in the first residual block are 16 and in the second they are 32. Increasing line_width was the catch
model.summary(line_length=110)
Results:
Model: "model_8"
______________________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==============================================================================================================
input_9 (InputLayer) [(None, 32, 32, 32, 1)] 0
______________________________________________________________________________________________________________
conv3d_136 (Conv3D) (None, 32, 32, 32, 16) 448 input_9[0][0]
______________________________________________________________________________________________________________
conv3d_137 (Conv3D) (None, 32, 32, 32, 16) 6928 conv3d_136[0][0]
______________________________________________________________________________________________________________
conv3d_135 (Conv3D) (None, 32, 32, 32, 16) 32 input_9[0][0]
______________________________________________________________________________________________________________
add_57 (Add) (None, 32, 32, 32, 16) 0 conv3d_137[0][0]
conv3d_135[0][0]
______________________________________________________________________________________________________________
activation_113 (Activation) (None, 32, 32, 32, 16) 0 add_57[0][0]
______________________________________________________________________________________________________________
conv3d_139 (Conv3D) (None, 32, 32, 32, 32) 13856 activation_113[0][0]
______________________________________________________________________________________________________________
conv3d_140 (Conv3D) (None, 32, 32, 32, 32) 27680 conv3d_139[0][0]
______________________________________________________________________________________________________________
conv3d_138 (Conv3D) (None, 32, 32, 32, 32) 544 activation_113[0][0]
______________________________________________________________________________________________________________
add_58 (Add) (None, 32, 32, 32, 32) 0 conv3d_140[0][0]
conv3d_138[0][0]
______________________________________________________________________________________________________________
activation_114 (Activation) (None, 32, 32, 32, 32) 0 add_58[0][0]
==============================================================================================================
Total params: 49,488
Trainable params: 49,488
Non-trainable params: 0
I am using keras' pretrained resnet 101 v2 CNN model. I wanted to know what the size of the filter was. I tried checking my model's summary but it doesn't really tell me the size directly. is it a 2x2x2 matrix or a 3x3x3 or something else?
The snippet of the model summary is:
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_3 (InputLayer) [(None, 255, 255, 3) 0
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D) (None, 261, 261, 3) 0 input_3[0][0]
__________________________________________________________________________________________________
conv1_conv (Conv2D) (None, 128, 128, 64) 9472 conv1_pad[0][0]
__________________________________________________________________________________________________
pool1_pad (ZeroPadding2D) (None, 130, 130, 64) 0 conv1_conv[0][0]
__________________________________________________________________________________________________
pool1_pool (MaxPooling2D) (None, 64, 64, 64) 0 pool1_pad[0][0]
__________________________________________________________________________________________________
conv2_block1_preact_bn (BatchNo (None, 64, 64, 64) 256 pool1_pool[0][0]
__________________________________________________________________________________________________
conv2_block1_preact_relu (Activ (None, 64, 64, 64) 0 conv2_block1_preact_bn[0][0]
__________________________________________________________________________________________________
conv2_block1_1_conv (Conv2D) (None, 64, 64, 64) 4096 conv2_block1_preact_relu[0][0]
__________________________________________________________________________________________________
conv2_block1_1_bn (BatchNormali (None, 64, 64, 64) 256 conv2_block1_1_conv[0][0]
__________________________________________________________________________________________________
conv2_block1_1_relu (Activation (None, 64, 64, 64) 0 conv2_block1_1_bn[0][0]
__________________________________________________________________________________________________
conv2_block1_2_pad (ZeroPadding (None, 66, 66, 64) 0 conv2_block1_1_relu[0][0]
__________________________________________________________________________________________________
conv2_block1_2_conv (Conv2D) (None, 64, 64, 64) 36864 conv2_block1_2_pad[0][0]
__________________________________________________________________________________________________
conv2_block1_2_bn (BatchNormali (None, 64, 64, 64) 256 conv2_block1_2_conv[0][0]
__________________________________________________________________________________________________
conv2_block1_2_relu (Activation (None, 64, 64, 64) 0 conv2_block1_2_bn[0][0]
__________________________________________________________________________________________________
conv2_block1_0_conv (Conv2D) (None, 64, 64, 256) 16640 conv2_block1_preact_relu[0][0]
__________________________________________________________________________________________________
conv2_block1_3_conv (Conv2D) (None, 64, 64, 256) 16640 conv2_block1_2_relu[0][0]
I am not sure if there is a predefined method to get this. It should be possible to get filter shape, count for a layer this way for a certain layer,
print(model.layers[2].name)
print(model.layers[2].weights[0].shape)
This gives output,
conv1_conv
(7, 7, 3, 64)
Printing print(model.layers[2].weights) gives something like,
conv1_conv
[<tf.Variable 'conv1_conv/kernel:0' shape=(7, 7, 3, 64) dtype=float32, numpy=
array([[[[ 2.04881709e-02, 1.74432080e-02, -1.19661177e-02, ...,
...
To get details for all the layers,
for i, layer in enumerate(model.layers):
print(layer.name)
if layer.weights:
print(layer.weights[0].shape)
print(layer.weights[1].shape)
print('-' * 30)
Partial output,
input_3
------------------------------
conv1_pad
------------------------------
conv1_conv
(7, 7, 3, 64)
(64,)
------------------------------
conv1_bn
(64,)
(64,)
------------------------------
conv1_relu
------------------------------
pool1_pad
------------------------------
pool1_pool
------------------------------
conv2_block1_1_conv
(1, 1, 64, 64)
(64,)
------------------------------
conv2_block1_1_bn
(64,)
(64,)
------------------------------
conv2_block1_1_relu
------------------------------
conv2_block1_2_conv
(3, 3, 64, 64)
(64,)
------------------------------
I am try to train a model which detect 128d vector to recognize face. Input of model is an image and output is 128d vector (regression) which get from "face_recognition" library.
When I put 128 output to train I got this error:
ValueError: Error when checking target: expected dense_24 to have shape (1,) but got array with shape (128,)
But when I try only one output, fit function works.
The strange part of that prediction shape is (1, 128) but I can't give 128 output to train.
Here is my model:
from keras.applications.vgg16 import VGG16
from keras.layers import Flatten, Dense
import keras
def build_facereg_disc():
# load model
model = VGG16(include_top=False, input_shape=(64, 64, 3))
# add new classifier layers
flat1 = Flatten()(model.outputs)
class1 = Dense(2048, activation='relu')(flat1)
output = Dense(128, activation='relu')(class1)
# define new model
model = models.Model(inputs=model.inputs, outputs=output)
# summarize
return model
facereg_disc = build_facereg_disc()
facereg_disc.compile(optimizer=keras.optimizers.Adam(), # Optimizer
# Loss function to minimize
loss=keras.losses.SparseCategoricalCrossentropy(),
# List of metrics to monitor
metrics=['binary_crossentropy'])
And summary:
Model: "model_27"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_20 (InputLayer) (None, 64, 64, 3) 0
_________________________________________________________________
block1_conv1 (Conv2D) (None, 64, 64, 64) 1792
_________________________________________________________________
block1_conv2 (Conv2D) (None, 64, 64, 64) 36928
_________________________________________________________________
block1_pool (MaxPooling2D) (None, 32, 32, 64) 0
_________________________________________________________________
block2_conv1 (Conv2D) (None, 32, 32, 128) 73856
_________________________________________________________________
block2_conv2 (Conv2D) (None, 32, 32, 128) 147584
_________________________________________________________________
block2_pool (MaxPooling2D) (None, 16, 16, 128) 0
_________________________________________________________________
block3_conv1 (Conv2D) (None, 16, 16, 256) 295168
_________________________________________________________________
block3_conv2 (Conv2D) (None, 16, 16, 256) 590080
_________________________________________________________________
block3_conv3 (Conv2D) (None, 16, 16, 256) 590080
_________________________________________________________________
block3_pool (MaxPooling2D) (None, 8, 8, 256) 0
_________________________________________________________________
block4_conv1 (Conv2D) (None, 8, 8, 512) 1180160
_________________________________________________________________
block4_conv2 (Conv2D) (None, 8, 8, 512) 2359808
_________________________________________________________________
block4_conv3 (Conv2D) (None, 8, 8, 512) 2359808
_________________________________________________________________
block4_pool (MaxPooling2D) (None, 4, 4, 512) 0
_________________________________________________________________
block5_conv1 (Conv2D) (None, 4, 4, 512) 2359808
_________________________________________________________________
block5_conv2 (Conv2D) (None, 4, 4, 512) 2359808
_________________________________________________________________
block5_conv3 (Conv2D) (None, 4, 4, 512) 2359808
_________________________________________________________________
block5_pool (MaxPooling2D) (None, 2, 2, 512) 0
_________________________________________________________________
flatten_10 (Flatten) (None, 2048) 0
_________________________________________________________________
dense_23 (Dense) (None, 2048) 4196352
_________________________________________________________________
dense_24 (Dense) (None, 128) 262272
=================================================================
Total params: 19,173,312
Trainable params: 19,173,312
Non-trainable params: 0
Here is preprocessing:
dir_data = "data_faces/img_align_celeba/"
Ntrain = 2000
Ntest = 100
nm_imgs = np.sort(os.listdir(dir_data))
## name of the jpg files for training set
nm_imgs_train = nm_imgs[:Ntrain]
## name of the jpg files for the testing data
nm_imgs_test = nm_imgs[Ntrain:Ntrain + Ntest]
img_shape = (64, 64, 3)
def get_npdata(nm_imgs_train):
X_train = []
for i, myid in enumerate(nm_imgs_train):
image = load_img(dir_data + "/" + myid,
target_size=img_shape[:2])
image = img_to_array(image)/255.0
X_train.append(image)
X_train = np.array(X_train)
return(X_train)
X_train = get_npdata(nm_imgs_train)
X_train.shape = (2000, 64, 64, 3)
y_train.shape = (2000, 128)
I use batch size like:
idx = np.random.randint(0, X_train.shape[0], half_batch)
imgs = X_train[idx]
labels = y_train[idx]
reg_d_loss_real = facereg_disc.train_on_batch(imgs, labels)
Your issue comes from your loss function. As explained in the doc, SparseCategoricalCrossentropy expects each sample in y_true to be an integer encoding the class, whereas CategoricalCrossentropy expects a one-hot encoded representation (which is your case).
So, switch to CategoricalCrossentropy and you should be fine.
However, to reproduce, I had to change:
flat1 = Flatten()(model.outputs)
To:
flat1 = Flatten()(model.outputs[0])
I am trying to train a segmentation model, But loss saturates at 0.3370 , i am really not sure what to do, can someone please help
This is the model
def unet(input_shape=(128, 128, 128), optimizer=Adam, initial_learning_rate=5e-4,
loss_function=weighted_dice_coefficient_loss):
inputs = Input(shape=input_shape)
conv1 = UnetConv3D(inputs, 32, is_batchnorm=False, name='conv1')
pool1 = MaxPooling3D(pool_size=(2, 2,2 ))(conv1)
conv2 = UnetConv3D(pool1, 64, is_batchnorm=False, name='conv2')
pool2 = MaxPooling3D(pool_size=(2, 2,2 ))(conv2)
conv3 = UnetConv3D(pool2, 128, is_batchnorm=False, name='conv3')
pool3 = MaxPooling3D(pool_size=(2, 2,2 ))(conv3)
conv4 = UnetConv3D(pool3, 256, is_batchnorm=False, name='conv4')
pool4 = MaxPooling3D(pool_size=(2, 2,2 ))(conv4)
conv5 = Conv3D(512, (3, 3, 3), activation='relu', kernel_initializer=kinit, padding='same', data_format = 'channels_first')(pool4)
conv5 = Conv3D(512, (3, 3, 3), activation='relu', kernel_initializer=kinit, padding='same', data_format = 'channels_first')(conv5)
up6 = concatenate([Conv3DTranspose(256, (2, 2,2 ), strides=(2, 2,2 ), kernel_initializer=kinit, padding='same', data_format = 'channels_first')(conv5), conv4], axis=1)
conv6 = Conv3D(256, (3, 3, 3), activation='relu', padding='same', data_format = 'channels_first')(up6)
conv6 = Conv3D(256, (3, 3, 3), activation='relu', padding='same', data_format = 'channels_first')(conv6)
up7 = concatenate([Conv3DTranspose(128, (2, 2,2 ), strides=(2, 2,2 ), padding='same', data_format = 'channels_first')(conv6), conv3], axis=1)
conv7 = Conv3D(128, (3, 3, 3), activation='relu', kernel_initializer=kinit, padding='same', data_format = 'channels_first')(up7)
conv7 = Conv3D(128, (3, 3, 3), activation='relu', kernel_initializer=kinit, padding='same', data_format = 'channels_first')(conv7)
up8 = concatenate([Conv3DTranspose(64, (2, 2,2 ), strides=(2,2,2 ), kernel_initializer=kinit, padding='same', data_format = 'channels_first')(conv7), conv2], axis=1)
conv8 = Conv3D(64, (3, 3, 3), activation='relu', kernel_initializer=kinit, padding='same', data_format = 'channels_first')(up8)
up9 = concatenate([Conv3DTranspose(32, (2, 2,2 ), strides=(2, 2,2 ), kernel_initializer=kinit, padding='same', data_format = 'channels_first')(conv8), conv1], axis=1)
conv9 = Conv3D(32, (3, 3, 3), activation='relu', kernel_initializer=kinit, padding='same', data_format = 'channels_first')(up9)
conv9 = Conv3D(32, (3, 3, 3), activation='relu', kernel_initializer=kinit, padding='same', data_format = 'channels_first')(conv9)
conv10 = Conv3D(3, (1, 1, 1), activation='relu', kernel_initializer=kinit,padding = 'same', name='final', data_format = 'channels_first')(conv9)
activation_name = 'sigmoid'
activation_block = Activation(activation_name)(conv10)
model = Model(inputs=[inputs], outputs=[activation_block])
model.compile(optimizer=optimizer(), loss=loss_function)
return model
This is the helper function
def UnetConv3D(input, outdim, is_batchnorm, name):
x = Conv3D(outdim, (3, 3, 3), strides=(1, 1, 1), kernel_initializer=kinit, padding="same", name=name+'_1', data_format = 'channels_first')(input)
if is_batchnorm:
x =BatchNormalization(name=name + '_1_bn')(x)
x = Activation('relu',name=name + '_1_act')(x)
x = Conv3D(outdim, (3, 3, 3), strides=(1, 1, 1), kernel_initializer=kinit, padding="same", name=name+'_2', data_format = 'channels_first')(x)
if is_batchnorm:
x = BatchNormalization(name=name + '_2_bn')(x)
x = Activation('relu', name=name + '_2_act')(x)
return x
And this is the loss function --
def weighted_dice_coefficient(y_true, y_pred, axis=(-3, -2, -1), smooth=0.00001):
"""
Weighted dice coefficient. Default axis assumes a "channels first" data structure
:param smooth:
:param y_true:
:param y_pred:
:param axis:
:return:
"""
return K.mean(2. * (K.sum(y_true * y_pred,
axis=axis) + smooth/2)/(K.sum(y_true,
axis=axis) + K.sum(y_pred,
axis=axis) + smooth))
My input is (128,128,128), am i doing an obvious mistake? Please let me know if more info needed.
Model summary
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_1 (InputLayer) (None, 1, 128, 128, 0
__________________________________________________________________________________________________
conv1_1 (Conv3D) (None, 32, 128, 128, 896 input_1[0][0]
__________________________________________________________________________________________________
conv1_1_act (Activation) (None, 32, 128, 128, 0 conv1_1[0][0]
__________________________________________________________________________________________________
conv1_2 (Conv3D) (None, 32, 128, 128, 27680 conv1_1_act[0][0]
__________________________________________________________________________________________________
conv1_2_act (Activation) (None, 32, 128, 128, 0 conv1_2[0][0]
__________________________________________________________________________________________________
max_pooling3d_1 (MaxPooling3D) (None, 32, 64, 64, 6 0 conv1_2_act[0][0]
__________________________________________________________________________________________________
conv2_1 (Conv3D) (None, 64, 64, 64, 6 55360 max_pooling3d_1[0][0]
__________________________________________________________________________________________________
conv2_1_act (Activation) (None, 64, 64, 64, 6 0 conv2_1[0][0]
__________________________________________________________________________________________________
conv2_2 (Conv3D) (None, 64, 64, 64, 6 110656 conv2_1_act[0][0]
__________________________________________________________________________________________________
conv2_2_act (Activation) (None, 64, 64, 64, 6 0 conv2_2[0][0]
__________________________________________________________________________________________________
max_pooling3d_2 (MaxPooling3D) (None, 64, 32, 32, 3 0 conv2_2_act[0][0]
__________________________________________________________________________________________________
conv3_1 (Conv3D) (None, 128, 32, 32, 221312 max_pooling3d_2[0][0]
__________________________________________________________________________________________________
conv3_1_act (Activation) (None, 128, 32, 32, 0 conv3_1[0][0]
__________________________________________________________________________________________________
conv3_2 (Conv3D) (None, 128, 32, 32, 442496 conv3_1_act[0][0]
__________________________________________________________________________________________________
conv3_2_act (Activation) (None, 128, 32, 32, 0 conv3_2[0][0]
__________________________________________________________________________________________________
max_pooling3d_3 (MaxPooling3D) (None, 128, 16, 16, 0 conv3_2_act[0][0]
__________________________________________________________________________________________________
conv4_1 (Conv3D) (None, 256, 16, 16, 884992 max_pooling3d_3[0][0]
__________________________________________________________________________________________________
conv4_1_act (Activation) (None, 256, 16, 16, 0 conv4_1[0][0]
__________________________________________________________________________________________________
conv4_2 (Conv3D) (None, 256, 16, 16, 1769728 conv4_1_act[0][0]
__________________________________________________________________________________________________
conv4_2_act (Activation) (None, 256, 16, 16, 0 conv4_2[0][0]
__________________________________________________________________________________________________
max_pooling3d_4 (MaxPooling3D) (None, 256, 8, 8, 8) 0 conv4_2_act[0][0]
__________________________________________________________________________________________________
conv3d_1 (Conv3D) (None, 512, 8, 8, 8) 3539456 max_pooling3d_4[0][0]
__________________________________________________________________________________________________
conv3d_2 (Conv3D) (None, 512, 8, 8, 8) 7078400 conv3d_1[0][0]
__________________________________________________________________________________________________
conv3d_transpose_1 (Conv3DTrans (None, 256, 16, 16, 1048832 conv3d_2[0][0]
__________________________________________________________________________________________________
concatenate_1 (Concatenate) (None, 512, 16, 16, 0 conv3d_transpose_1[0][0]
conv4_2_act[0][0]
__________________________________________________________________________________________________
conv3d_3 (Conv3D) (None, 256, 16, 16, 3539200 concatenate_1[0][0]
__________________________________________________________________________________________________
conv3d_4 (Conv3D) (None, 256, 16, 16, 1769728 conv3d_3[0][0]
__________________________________________________________________________________________________
conv3d_transpose_2 (Conv3DTrans (None, 128, 32, 32, 262272 conv3d_4[0][0]
__________________________________________________________________________________________________
concatenate_2 (Concatenate) (None, 256, 32, 32, 0 conv3d_transpose_2[0][0]
conv3_2_act[0][0]
__________________________________________________________________________________________________
conv3d_5 (Conv3D) (None, 128, 32, 32, 884864 concatenate_2[0][0]
__________________________________________________________________________________________________
conv3d_6 (Conv3D) (None, 128, 32, 32, 442496 conv3d_5[0][0]
__________________________________________________________________________________________________
conv3d_transpose_3 (Conv3DTrans (None, 64, 64, 64, 6 65600 conv3d_6[0][0]
__________________________________________________________________________________________________
concatenate_3 (Concatenate) (None, 128, 64, 64, 0 conv3d_transpose_3[0][0]
conv2_2_act[0][0]
__________________________________________________________________________________________________
conv3d_7 (Conv3D) (None, 64, 64, 64, 6 221248 concatenate_3[0][0]
__________________________________________________________________________________________________
conv3d_transpose_4 (Conv3DTrans (None, 32, 128, 128, 16416 conv3d_7[0][0]
__________________________________________________________________________________________________
concatenate_4 (Concatenate) (None, 64, 128, 128, 0 conv3d_transpose_4[0][0]
conv1_2_act[0][0]
__________________________________________________________________________________________________
conv3d_8 (Conv3D) (None, 32, 128, 128, 55328 concatenate_4[0][0]
__________________________________________________________________________________________________
conv3d_9 (Conv3D) (None, 32, 128, 128, 27680 conv3d_8[0][0]
__________________________________________________________________________________________________
final (Conv3D) (None, 3, 128, 128, 99 conv3d_9[0][0]
__________________________________________________________________________________________________
activation_1 (Activation) (None, 3, 128, 128, 0 final[0][0]
==================================================================================================
Thanks in advance
I'm trying to build u-net in keras for multi-class semantic segmentation. The model I have below does not learn anything. It always just predicts the background (first) class.
Is my use of the final 'softmax' layer correct? The documentation shows a axis parameter, but I'm not sure how to set that or what it should be.
def unet(input_shape=(572, 572, 1), classes=2):
input_image = KL.Input(shape=input_shape)
contracting_1, pooled_1 = blocks.contracting(input_image, filters=64, block_name="block1")
contracting_2, pooled_2 = blocks.contracting(pooled_1, filters=128, block_name="block2")
contracting_3, pooled_3 = blocks.contracting(pooled_2, filters=256, block_name="block3")
contracting_4, pooled_4 = blocks.contracting(pooled_3, filters=512, block_name="block4")
contracting_5, _ = blocks.contracting(pooled_4, filters=1024, block_name="block5")
dropout = KL.Dropout(rate=0.5)(contracting_5)
expanding_1 = blocks.expanding(dropout, merge_layer=contracting_4, filters=512, block_name="block6")
expanding_2 = blocks.expanding(expanding_1, merge_layer=contracting_3, filters=256, block_name="block7")
expanding_3 = blocks.expanding(expanding_2, merge_layer=contracting_2, filters=128, block_name="block8")
expanding_4 = blocks.expanding(expanding_3, merge_layer=contracting_1, filters=64, block_name="block9")
class_output = KL.Conv2D(classes, kernel_size=(1, 1), activation='softmax', name='class_output')(expanding_4)
model = KM.Model(inputs=[input_image], outputs=[class_output])
return model
blocks:
def contracting(input_layer, filters, kernel_size=(3, 3), padding='same',
block_name=""):
conv_a = KL.Conv2D(filters, kernel_size, activation='relu', padding=padding,
name='{}_contracting_conv_a'.format(block_name))(input_layer)
conv_b = KL.Conv2D(filters, kernel_size, activation='relu', padding=padding,
name='{}_contracting_conv_b'.format(block_name))(conv_a)
pool = KL.MaxPooling2D(pool_size=(2, 2), padding=padding,
name='{}_contracting_pool'.format(block_name))(conv_b)
batch_normalization = KL.BatchNormalization()(pool)
return conv_b, batch_normalization
def expanding(input_layer, merge_layer, filters, kernel_size=(3, 3), padding='same',
block_name=""):
input_layer = KL.UpSampling2D(size=(2, 2))(input_layer)
conv_up = KL.Conv2D(filters, kernel_size=(2, 2), activation='relu',
padding='same', name='{}_expanding_conv_up'.format(block_name))(input_layer)
conv_up_height, conv_up_width = int(conv_up.shape[1]), int(conv_up.shape[2])
merge_height, merge_width = int(merge_layer.shape[1]), int(merge_layer.shape[2])
crop_top = (merge_height - conv_up_height) // 2
crop_bottom = (merge_height - conv_up_height) - crop_top
crop_left = (merge_width - conv_up_width) // 2
crop_right = (merge_width - conv_up_width) - crop_left
cropping = ((crop_top, crop_bottom), (crop_left, crop_right))
merge_layer = KL.Cropping2D(cropping)(merge_layer)
merged = KL.concatenate([merge_layer, conv_up])
conv_a = KL.Conv2D(filters, kernel_size, activation='relu', padding=padding,
name='{}_expanding_conv_a'.format(block_name))(merged)
conv_b = KL.Conv2D(filters, kernel_size, activation='relu', padding=padding,
name='{}_expanding_conv_b'.format(block_name))(conv_a)
batch_normalization = KL.BatchNormalization()(conv_b)
return batch_normalization
compile:
optimizer = keras.optimizers.SGD(lr=0.0001, momentum=0.9)
loss = keras.losses.categorical_crossentropy
metrics = [keras.metrics.categorical_accuracy]
model.compile(optimizer, loss, metrics)
Model Summary:
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_2 (InputLayer) (None, 96, 96, 3) 0
__________________________________________________________________________________________________
block1_contracting_conv_a (Conv (None, 96, 96, 64) 1792 input_2[0][0]
__________________________________________________________________________________________________
block1_contracting_conv_b (Conv (None, 96, 96, 64) 36928 block1_contracting_conv_a[0][0]
__________________________________________________________________________________________________
block1_contracting_pool (MaxPoo (None, 48, 48, 64) 0 block1_contracting_conv_b[0][0]
__________________________________________________________________________________________________
batch_normalization_10 (BatchNo (None, 48, 48, 64) 256 block1_contracting_pool[0][0]
__________________________________________________________________________________________________
block2_contracting_conv_a (Conv (None, 48, 48, 128) 73856 batch_normalization_10[0][0]
__________________________________________________________________________________________________
block2_contracting_conv_b (Conv (None, 48, 48, 128) 147584 block2_contracting_conv_a[0][0]
__________________________________________________________________________________________________
block2_contracting_pool (MaxPoo (None, 24, 24, 128) 0 block2_contracting_conv_b[0][0]
__________________________________________________________________________________________________
batch_normalization_11 (BatchNo (None, 24, 24, 128) 512 block2_contracting_pool[0][0]
__________________________________________________________________________________________________
block3_contracting_conv_a (Conv (None, 24, 24, 256) 295168 batch_normalization_11[0][0]
__________________________________________________________________________________________________
block3_contracting_conv_b (Conv (None, 24, 24, 256) 590080 block3_contracting_conv_a[0][0]
__________________________________________________________________________________________________
block3_contracting_pool (MaxPoo (None, 12, 12, 256) 0 block3_contracting_conv_b[0][0]
__________________________________________________________________________________________________
batch_normalization_12 (BatchNo (None, 12, 12, 256) 1024 block3_contracting_pool[0][0]
__________________________________________________________________________________________________
block4_contracting_conv_a (Conv (None, 12, 12, 512) 1180160 batch_normalization_12[0][0]
__________________________________________________________________________________________________
block4_contracting_conv_b (Conv (None, 12, 12, 512) 2359808 block4_contracting_conv_a[0][0]
__________________________________________________________________________________________________
block4_contracting_pool (MaxPoo (None, 6, 6, 512) 0 block4_contracting_conv_b[0][0]
__________________________________________________________________________________________________
batch_normalization_13 (BatchNo (None, 6, 6, 512) 2048 block4_contracting_pool[0][0]
__________________________________________________________________________________________________
block5_contracting_conv_a (Conv (None, 6, 6, 1024) 4719616 batch_normalization_13[0][0]
__________________________________________________________________________________________________
block5_contracting_conv_b (Conv (None, 6, 6, 1024) 9438208 block5_contracting_conv_a[0][0]
__________________________________________________________________________________________________
dropout_2 (Dropout) (None, 6, 6, 1024) 0 block5_contracting_conv_b[0][0]
__________________________________________________________________________________________________
up_sampling2d_5 (UpSampling2D) (None, 12, 12, 1024) 0 dropout_2[0][0]
__________________________________________________________________________________________________
cropping2d_5 (Cropping2D) (None, 12, 12, 512) 0 block4_contracting_conv_b[0][0]
__________________________________________________________________________________________________
block6_expanding_conv_up (Conv2 (None, 12, 12, 512) 2097664 up_sampling2d_5[0][0]
__________________________________________________________________________________________________
concatenate_5 (Concatenate) (None, 12, 12, 1024) 0 cropping2d_5[0][0]
block6_expanding_conv_up[0][0]
__________________________________________________________________________________________________
block6_expanding_conv_a (Conv2D (None, 12, 12, 512) 4719104 concatenate_5[0][0]
__________________________________________________________________________________________________
block6_expanding_conv_b (Conv2D (None, 12, 12, 512) 2359808 block6_expanding_conv_a[0][0]
__________________________________________________________________________________________________
batch_normalization_15 (BatchNo (None, 12, 12, 512) 2048 block6_expanding_conv_b[0][0]
__________________________________________________________________________________________________
up_sampling2d_6 (UpSampling2D) (None, 24, 24, 512) 0 batch_normalization_15[0][0]
__________________________________________________________________________________________________
cropping2d_6 (Cropping2D) (None, 24, 24, 256) 0 block3_contracting_conv_b[0][0]
__________________________________________________________________________________________________
block7_expanding_conv_up (Conv2 (None, 24, 24, 256) 524544 up_sampling2d_6[0][0]
__________________________________________________________________________________________________
concatenate_6 (Concatenate) (None, 24, 24, 512) 0 cropping2d_6[0][0]
block7_expanding_conv_up[0][0]
__________________________________________________________________________________________________
block7_expanding_conv_a (Conv2D (None, 24, 24, 256) 1179904 concatenate_6[0][0]
__________________________________________________________________________________________________
block7_expanding_conv_b (Conv2D (None, 24, 24, 256) 590080 block7_expanding_conv_a[0][0]
__________________________________________________________________________________________________
batch_normalization_16 (BatchNo (None, 24, 24, 256) 1024 block7_expanding_conv_b[0][0]
__________________________________________________________________________________________________
up_sampling2d_7 (UpSampling2D) (None, 48, 48, 256) 0 batch_normalization_16[0][0]
__________________________________________________________________________________________________
cropping2d_7 (Cropping2D) (None, 48, 48, 128) 0 block2_contracting_conv_b[0][0]
__________________________________________________________________________________________________
block8_expanding_conv_up (Conv2 (None, 48, 48, 128) 131200 up_sampling2d_7[0][0]
__________________________________________________________________________________________________
concatenate_7 (Concatenate) (None, 48, 48, 256) 0 cropping2d_7[0][0]
block8_expanding_conv_up[0][0]
__________________________________________________________________________________________________
block8_expanding_conv_a (Conv2D (None, 48, 48, 128) 295040 concatenate_7[0][0]
__________________________________________________________________________________________________
block8_expanding_conv_b (Conv2D (None, 48, 48, 128) 147584 block8_expanding_conv_a[0][0]
__________________________________________________________________________________________________
batch_normalization_17 (BatchNo (None, 48, 48, 128) 512 block8_expanding_conv_b[0][0]
__________________________________________________________________________________________________
up_sampling2d_8 (UpSampling2D) (None, 96, 96, 128) 0 batch_normalization_17[0][0]
__________________________________________________________________________________________________
cropping2d_8 (Cropping2D) (None, 96, 96, 64) 0 block1_contracting_conv_b[0][0]
__________________________________________________________________________________________________
block9_expanding_conv_up (Conv2 (None, 96, 96, 64) 32832 up_sampling2d_8[0][0]
__________________________________________________________________________________________________
concatenate_8 (Concatenate) (None, 96, 96, 128) 0 cropping2d_8[0][0]
block9_expanding_conv_up[0][0]
__________________________________________________________________________________________________
block9_expanding_conv_a (Conv2D (None, 96, 96, 64) 73792 concatenate_8[0][0]
__________________________________________________________________________________________________
block9_expanding_conv_b (Conv2D (None, 96, 96, 64) 36928 block9_expanding_conv_a[0][0]
__________________________________________________________________________________________________
batch_normalization_18 (BatchNo (None, 96, 96, 64) 256 block9_expanding_conv_b[0][0]
__________________________________________________________________________________________________
class_output (Conv2D) (None, 96, 96, 4) 260 batch_normalization_18[0][0]
==================================================================================================
Total params: 31,039,620
Trainable params: 31,035,780
Non-trainable params: 3,840
__________________________________________________________________________________________________
Total params: 31,031,940
Trainable params: 31,031,940
Non-trainable params: 0
class percentages in dataset:
{0: 0.6245757457188198,
1: 0.16082110268729075,
2: 0.1188858904157366,
3: 0.09571726117815291}
class 0 is the background
shape of image from generator (rgb): (1, 96, 96, 3)
shape of labels from generator: (1, 96, 96, 4)
There doesn't seem to be anything that wrong in your model.
Softmax is ok, as it defaults to the last axis, and you're clearly using 'channels_last' as config. So it's ok.
Suggestions are:
Add a few BatchNormalization() layers and decrease your learning rate (this prevents relu from going too fast to "all zeroes").
Check that your output data range is correct, with np.unique(y_train) containing only 0 and 1
Check that every pixel is classified with only one class: (np.sum(y_train, axis=-1) == 1).all() == True.
Check if your images aren't too biased towards the first class. np.sum(y_train[:,:,:,0]) should not be too bigger than np.sum(y_train[:,:,:,1:]).
If it is, consider fitting with the class_weight parameter, passing weights to balance the loss for each class (check keras documentation on fit for how to use it)
This model works just fine for me with most of the segmentation projects, i use crossentropy for multiclass segmentation and smooth dice for binary classes
def conv_block(tensor, nfilters, size=3, padding='same', initializer="he_normal"):
x = Conv2D(filters=nfilters, kernel_size=(size, size), padding=padding, kernel_initializer=initializer)(tensor)
x = BatchNormalization()(x)
x = Activation("relu")(x)
x = Conv2D(filters=nfilters, kernel_size=(size, size), padding=padding, kernel_initializer=initializer)(x)
x = BatchNormalization()(x)
x = Activation("relu")(x)
return x
def deconv_block(tensor, residual, nfilters, size=3, padding='same', strides=(2, 2)):
y = Conv2DTranspose(nfilters, kernel_size=(size, size), strides=strides, padding=padding)(tensor)
y = concatenate([y, residual], axis=3)
y = conv_block(y, nfilters)
return y
def Unet(img_height, img_width, nclasses=3, filters=64):
# down
input_layer = Input(shape=(img_height, img_width, 3), name='image_input')
conv1 = conv_block(input_layer, nfilters=filters)
conv1_out = MaxPooling2D(pool_size=(2, 2))(conv1)
conv2 = conv_block(conv1_out, nfilters=filters*2)
conv2_out = MaxPooling2D(pool_size=(2, 2))(conv2)
conv3 = conv_block(conv2_out, nfilters=filters*4)
conv3_out = MaxPooling2D(pool_size=(2, 2))(conv3)
conv4 = conv_block(conv3_out, nfilters=filters*8)
conv4_out = MaxPooling2D(pool_size=(2, 2))(conv4)
conv4_out = Dropout(0.5)(conv4_out)
conv5 = conv_block(conv4_out, nfilters=filters*16)
conv5 = Dropout(0.5)(conv5)
# up
deconv6 = deconv_block(conv5, residual=conv4, nfilters=filters*8)
deconv6 = Dropout(0.5)(deconv6)
deconv7 = deconv_block(deconv6, residual=conv3, nfilters=filters*4)
deconv7 = Dropout(0.5)(deconv7)
deconv8 = deconv_block(deconv7, residual=conv2, nfilters=filters*2)
deconv9 = deconv_block(deconv8, residual=conv1, nfilters=filters)
# output
output_layer = Conv2D(filters=nclasses, kernel_size=(1, 1))(deconv9)
output_layer = BatchNormalization()(output_layer)
output_layer = Activation('softmax')(output_layer)
model = Model(inputs=input_layer, outputs=output_layer, name='Unet')
return model
Sometimes, the problem is related to model architecture. When you are dealing with a complicated dataset for segmentation, you need to enhance the model architecture. I encountered the same problem with a new dataset while the model could work well on another dataset. So, I used Res-Unet instead of Unet as the model architecture and the problem solved.
hope this will help