I am trying to make a simple GANs to generate digits from the MNIST dataset. However when I get to training(which is custom) I get this annoying warning that I suspect is the cause of not training like I'm used to.
Keep in mind this is all in tensorflow 2.0 using it's default eager execution.
GET THE DATA(not that important)
(train_images,train_labels),(test_images,test_labels) = tf.keras.datasets.mnist.load_data()
train_images = train_images.reshape(train_images.shape[0], 28, 28, 1).astype('float32')
train_images = (train_images - 127.5) / 127.5 # Normalize the images to [-1, 1]
BUFFER_SIZE = 60000
BATCH_SIZE = 256
train_dataset = tf.data.Dataset.from_tensor_slices((train_images,train_labels)).shuffle(BUFFER_SIZE).batch(BATCH_SIZE)
GENERATOR MODEL(This is where the Batch Normalization is at)
def make_generator_model():
model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(7*7*256, use_bias=False, input_shape=(100,)))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.LeakyReLU())
model.add(tf.keras.layers.Reshape((7, 7, 256)))
assert model.output_shape == (None, 7, 7, 256) # Note: None is the batch size
model.add(tf.keras.layers.Conv2DTranspose(128, (5, 5), strides=(1, 1), padding='same', use_bias=False))
assert model.output_shape == (None, 7, 7, 128)
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.LeakyReLU())
model.add(tf.keras.layers.Conv2DTranspose(64, (5, 5), strides=(2, 2), padding='same', use_bias=False))
assert model.output_shape == (None, 14, 14, 64)
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.LeakyReLU())
model.add(tf.keras.layers.Conv2DTranspose(1, (5, 5), strides=(2, 2), padding='same', use_bias=False, activation='tanh'))
assert model.output_shape == (None, 28, 28, 1)
return model
DISCRIMINATOR MODEL (likely not that important)
def make_discriminator_model():
model = tf.keras.Sequential()
model.add(tf.keras.layers.Conv2D(64, (5, 5), strides=(2, 2), padding='same'))
model.add(tf.keras.layers.LeakyReLU())
model.add(tf.keras.layers.Dropout(0.3))
model.add(tf.keras.layers.Conv2D(128, (5, 5), strides=(2, 2), padding='same'))
model.add(tf.keras.layers.LeakyReLU())
model.add(tf.keras.layers.Dropout(0.3))
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(1))
return model
INSTANTIATE THE MODELS(likely not that important)
generator = make_generator_model()
discriminator = make_discriminator_model()
DEFINE THE LOSSES(maybe the generator loss is important since that is where the gradient comes from)
def generator_loss(generated_output):
return tf.nn.sigmoid_cross_entropy_with_logits(labels = tf.ones_like(generated_output), logits = generated_output)
def discriminator_loss(real_output, generated_output):
# [1,1,...,1] with real output since it is true and we want our generated examples to look like it
real_loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.ones_like(real_output), logits=real_output)
# [0,0,...,0] with generated images since they are fake
generated_loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.zeros_like(generated_output), logits=generated_output)
total_loss = real_loss + generated_loss
return total_loss
MAKE THE OPTIMIZERS(likely not important)
generator_optimizer = tf.optimizers.Adam(1e-4)
discriminator_optimizer = tf.optimizers.Adam(1e-4)
RANDOM NOISE FOR THE GENERATOR(likely not important)
EPOCHS = 50
noise_dim = 100
num_examples_to_generate = 16
# We'll re-use this random vector used to seed the generator so
# it will be easier to see the improvement over time.
random_vector_for_generation = tf.random.normal([num_examples_to_generate,
noise_dim])
A SINGLE TRAIN STEP(This is where I get the error
def train_step(images):
# generating noise from a normal distribution
noise = tf.random.normal([BATCH_SIZE, noise_dim])
with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
generated_images = generator(noise, training=True)
real_output = discriminator(images[0], training=True)
generated_output = discriminator(generated_images, training=True)
gen_loss = generator_loss(generated_output)
disc_loss = discriminator_loss(real_output, generated_output)
This line >>>>>
gradients_of_generator = gen_tape.gradient(gen_loss, generator.variables)
<<<<< This line
gradients_of_discriminator = disc_tape.gradient(disc_loss, discriminator.variables)
generator_optimizer.apply_gradients(zip(gradients_of_generator, generator.variables))
discriminator_optimizer.apply_gradients(zip(gradients_of_discriminator, discriminator.variables))
THE FULL TRAIN(not important except that it calls train_step)
def train(dataset, epochs):
for epoch in range(epochs):
start = time.time()
for images in dataset:
train_step(images)
display.clear_output(wait=True)
generate_and_save_images(generator,
epoch + 1,
random_vector_for_generation)
# saving (checkpoint) the model every 15 epochs
if (epoch + 1) % 15 == 0:
checkpoint.save(file_prefix = checkpoint_prefix)
print ('Time taken for epoch {} is {} sec'.format(epoch + 1,
time.time()-start))
# generating after the final epoch
display.clear_output(wait=True)
generate_and_save_images(generator,
epochs,
random_vector_for_generation)
BEGIN TRAINING
train(train_dataset, EPOCHS)
The error I get is as follows,
W0330 19:42:57.366302 4738405824 optimizer_v2.py:928] Gradients does
not exist for variables ['batch_normalization_v2_54/moving_mean:0',
'batch_normalization_v2_54/moving_variance:0',
'batch_normalization_v2_55/moving_mean:0',
'batch_normalization_v2_55/moving_variance:0',
'batch_normalization_v2_56/moving_mean:0',
'batch_normalization_v2_56/moving_variance:0'] when minimizing the
loss.
And I get an image from the generator which looks like this:
which is kinda what I would expect without the normalization. Everything would clump to one corner because there are extreme values.
The problem is here:
gradients_of_generator = gen_tape.gradient(gen_loss, generator.variables)
You should only be getting gradients for the trainable variables. So you should change it to
gradients_of_generator = gen_tape.gradient(gen_loss, generator.trainable_variables)
The same goes for the three lines following. The variables field includes stuff like the running averages batch norm uses during inference. Because they are not used during training, there are no sensible gradients defined and trying to compute them will lead to a crash.
Related
I'm getting stuck on a function that is supposed to predict the label of a single image. I need to do this on a single image because I want to build a web app, where the user can upload an image and can get its prediction.
My CNN is the following with the base for the model :
class ImageClassificationBase(nn.Module):
def training_step(self, batch):
images, labels = batch
out = self(images) # Generate predictions
loss = F.cross_entropy(out, labels) # Calculate loss
return loss
def validation_step(self, batch):
images, labels = batch
out = self(images) # Generate predictions
loss = F.cross_entropy(out, labels) # Calculate loss
acc = accuracy(out, labels) # Calculate accuracy
return {'val_loss': loss.detach(), 'val_acc': acc}
def validation_epoch_end(self, outputs):
batch_losses = [x['val_loss'] for x in outputs]
epoch_loss = torch.stack(batch_losses).mean() # Combine losses
batch_accs = [x['val_acc'] for x in outputs]
epoch_acc = torch.stack(batch_accs).mean() # Combine accuracies
return {'val_loss': epoch_loss.item(), 'val_acc': epoch_acc.item()}
def epoch_end(self, epoch, result):
print("Epoch [{}], train_loss: {:.4f}, val_loss: {:.4f}, val_acc: {:.4f}".format(
epoch, result['train_loss'], result['val_loss'], result['val_acc']))
and the model itself:
class BrainTumorClassification(ImageClassificationBase):
def __init__(self):
super().__init__()
self.network = nn.Sequential(
nn.Conv2d(3, 32, kernel_size = 3, padding = 1),
nn.ReLU(),
nn.Conv2d(32,64, kernel_size = 3, stride = 1, padding = 1),
nn.ReLU(),
nn.MaxPool2d(2,2),
nn.Conv2d(64, 128, kernel_size = 3, stride = 1, padding = 1),
nn.ReLU(),
nn.Conv2d(128 ,128, kernel_size = 3, stride = 1, padding = 1),
nn.ReLU(),
nn.MaxPool2d(2,2),
nn.Conv2d(128, 256, kernel_size = 3, stride = 1, padding = 1),
nn.ReLU(),
nn.Conv2d(256,256, kernel_size = 3, stride = 1, padding = 1),
nn.ReLU(),
nn.MaxPool2d(2,2),
nn.Flatten(),
nn.Linear(82944,1024),
nn.ReLU(),
nn.Linear(1024, 512),
nn.ReLU(),
nn.Linear(512,6))
def forward(self, xb):
return self.network(xb)
The function I'm trying to implement for testing a single image is the following:
from torch.autograd import Variable
transformer = transforms.Compose([
transforms.Resize((150,150)), transforms.ToTensor()])
def classify(image_path,image_transforms, classes):
image = Image.open(image_path)
image_tensor = image_transforms(image).float()
image_tensor = image_tensor.unsqueeze_(0)
input = Variable(image_tensor)
output = model(input)
index = output.data.numpy().argmax()
pred = classes[index]
return pred
I'm getting an error:
`pred=classes[index]` index out of range
I should mention that classes has 4 elements : ['glioma_tumor', 'meningioma_tumor', 'no_tumor', 'pituitary_tumor'].
A few points to note:
Don't forget to load your trained network on your initialized model.
Variable has been deprecated, you should not use it. Gradients are tracked on tensors that have the requires_grad flag on. Here you are only inferring so you can actually use the torch.no_grad context to avoid retaining parameter activations. This will increase inference speed.
torch.Tensor.unsqueeze_, you don't have to reassign the result as the input itself is modified by the function. As a general note, all torch.Tensor functions with a _ suffix are in-place operators.
Most of all, you mentioned only having 4 classes, yet your last fully connected layer outputs 6 logits. In this case, you need to change this to 4.
Here is a possible modification:
transformer = transforms.Compose([transforms.Resize((150,150)),
transforms.ToTensor()])
#torch.no_grad()
def classify(image_path,image_transforms, classes):
image = Image.open(image_path)
image_tensor = image_transforms(image)
image_tensor.unsqueeze_(0)
output = model(image_tensor)
index = output.data.numpy().argmax()
pred = classes[index]
return pred
I have been scratching my head over this OOM Error for days and I am new to Keras. I have tried sampling down my data, lowering batch size, and removing layers from 3D-Unet but nothing is working for me. I am using LIDC IDRI dataset of CT scans of 1010 Patients. After pre-processing I save my volumes of 64x64x64 shape on disk which I extracted from resampled 256x256x256 whole CT scans (That is because at first I was first trying to train on whole CT scans but after getting OOM I decided to go with 64 cubic size shapes). Each patient has 64 shapes of 64x64x64, and in total that makes 64,640 samples on which I have to train my 3D-Unet.
Here’s my Keras code for the model:
im_width = 64
im_height = 64
im_depth = 64
path_train = 'D:/LIDC-IDRI-Dataset/'
def npz_volume_generator(inputPath, bs, mode="train", aug=None):
batch_start_index = 0
patients = os.listdir(inputPath + "images")
# loop indefinitely
while True:
# initialize our batches of scans and masks
scan_pixels = []
mask_pixels = []
# keep looping until we reach our batch size
for id_ in range(batch_start_index, batch_start_index+bs):
# attempt to read the next sample from path
scan_pixel = np.zeros((im_depth, im_width, im_height))
scan_pixel = np.load(inputPath + 'images/' + patients[id_])['arr_0']
mask_pixel = np.zeros((im_depth, im_width, im_height))
mask_pixel = np.load(inputPath + 'masks/' + patients[id_])['arr_0']
# check to see if we have reached the end of our samples
if(batch_start_index >= len(patients)):
# reset the batch start index to the beginning of our samples
batch_start_index -= len(patients)
# if we are evaluating we should now break from our
# loop to ensure we don't continue to fill up the
# batch from samples from the beginning
if mode == "eval":
break
# update our corresponding batch lists
scan_pixels.append(scan_pixel)
mask_pixels.append(mask_pixel)
batch_start_index += bs
if(batch_start_index >= len(patients)):
batch_start_index -= len(patients)
# if the data augmentation object is not None, apply it
if aug is not None:
(scan_pixels, mask_pixels) = next(aug.flow(np.array(scan_pixels),np.array(mask_pixels), batch_size=bs))
#Re-shaping and adding a channel dimension (5D Tensor)
#batch_size, length, breadth, height, channel [None,im_width,im_height,im_depth,1]
#yield the batch to the calling function
yield (np.array(expand_dims(scan_pixels, axis=4)), np.array(expand_dims(mask_pixels, axis=4)))
def conv3d_block(input_tensor, n_filters, kernel_size=3, batchnorm=True):
# first layer
x = Conv3D(filters=n_filters, kernel_size=(kernel_size, kernel_size, kernel_size), kernel_initializer="he_normal",
padding="same")(input_tensor)
if batchnorm:
x = BatchNormalization()(x)
x = Activation("relu")(x)
# second layer
x = Conv3D(filters=n_filters, kernel_size=(kernel_size, kernel_size, kernel_size), kernel_initializer="he_normal",
padding="same")(x)
if batchnorm:
x = BatchNormalization()(x)
x = Activation("relu")(x)
return x
def get_unet(input_img, n_filters=16, dropout=0.5, batchnorm=True):
# contracting path
c1 = conv3d_block(input_img, n_filters=n_filters*1, kernel_size=3, batchnorm=batchnorm)
p1 = MaxPooling3D((2, 2, 2)) (c1)
p1 = Dropout(dropout*0.5)(p1)
c2 = conv3d_block(p1, n_filters=n_filters*2, kernel_size=3, batchnorm=batchnorm)
p2 = MaxPooling3D((2, 2, 2)) (c2)
p2 = Dropout(dropout)(p2)
c3 = conv3d_block(p2, n_filters=n_filters*4, kernel_size=3, batchnorm=batchnorm)
p3 = MaxPooling3D((2, 2, 2)) (c3)
p3 = Dropout(dropout)(p3)
c4 = conv3d_block(p3, n_filters=n_filters*16, kernel_size=3, batchnorm=batchnorm)
# expansive path
u5 = Conv3DTranspose(n_filters*8, (3, 3, 3), strides=(2, 2, 2), padding='same') (c4)
u5 = concatenate([u5, c3])
u5 = Dropout(dropout)(u5)
c5 = conv3d_block(u5, n_filters=n_filters*8, kernel_size=3, batchnorm=batchnorm)
u6 = Conv3DTranspose(n_filters*4, (3, 3, 3), strides=(2, 2, 2), padding='same') (c5)
u6 = concatenate([u6, c2])
u6 = Dropout(dropout)(u6)
c6 = conv3d_block(u6, n_filters=n_filters*4, kernel_size=3, batchnorm=batchnorm)
u7 = Conv3DTranspose(n_filters*2, (3, 3,3), strides=(2, 2, 2), padding='same') (c6)
u7 = concatenate([u7, c1])
u7 = Dropout(dropout)(u7)
c7 = conv3d_block(u7, n_filters=n_filters*2, kernel_size=3, batchnorm=batchnorm)
outputs = Conv3D(1, (1, 1, 1), activation='sigmoid') (c7)
model = Model(inputs=[input_img], outputs=[outputs])
return model
# initialize the number of epochs to train for and batch size
NUM_EPOCHS = 50
BS = 8
# initialize the total number of training and testing image
NUM_TRAIN_IMAGES = len(os.listdir(path_train+ 'images/'))
NUM_TEST_IMAGES = len(os.listdir(path_train+ 'test/'))
# construct the training image generator for data augmentation
aug = ImageDataGenerator(rotation_range=20, zoom_range=0.15,
width_shift_range=0.2, height_shift_range=0.2, shear_range=0.15,
horizontal_flip=True, fill_mode="nearest")
# initialize both the training and testing image generators
trainGen = npz_volume_generator(path_train, BS, mode="train", aug=aug)
testGen = npz_volume_generator(path_train, BS, mode="train", aug=None)
# initialize our Keras model and compile it
model = get_unet(Input((im_depth, im_width, im_height, 1)), n_filters=16, dropout=0.05, batchnorm=True)
print(model.summary())
model.compile(optimizer=Adam(), loss="binary_crossentropy", metrics=["accuracy"])
# train the network
print("[INFO] training w/ generator...")
H = model.fit_generator(trainGen, steps_per_epoch=NUM_TRAIN_IMAGES // BS,
validation_data=testGen, validation_steps=NUM_TEST_IMAGES // BS,
epochs=NUM_EPOCHS)
There are two issues with the output I get. The first warning I get is this:
\Anaconda3\lib\site-packages\keras_preprocessing\image\numpy_array_iterator.py:127: UserWarning: NumpyArrayIterator is set to use the data format convention "channels_last" (channels on axis 3), i.e. expected either 1, 3, or 4 channels on axis 3. However, it was passed an array with shape (8, 64, 64, 64) (64 channels). str(self.x.shape[channels_axis]) + ' channels).')
It states that the shape passed to Keras library was (8, 64, 64, 64) (64 channels), however the input shape I declared in Input() function of Keras is (64, 64, 64, 1) with 1 being the channel on last axis, you don’t declare batch size here which is 8 in my case, yet Keras state that the shape passed on to it has 64 channels, ignoring the last dimension I gave it.
The second error that I get is as following:
ResourceExhaustedError: OOM when allocating tensor with shape[8,32,64,64,64] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
[[{{node conv3d_transpose_3/conv3d_transpose}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.
[[{{node loss/mul}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.
Again I have a problem with shape here, Tensor Shape. My shape should be (8,64,64,64,1) but what it reports is (8,32,64,64,64), not only my number of channels are huge here but I also have no idea where that 32 came from. Is there a different interpretation to Tensor Shape? I think there’s something wrong with my input shapes (which is unknowingly being to set to very large) and that is causing the OOM error.
I am trying to train a 1-D ConvNet for time series classification as shown in this paper (refer to FCN om Fig. 1b) https://arxiv.org/pdf/1611.06455.pdf
The Keras implementation is giving me vastly superior performance. Could someone explain why is that the case?
The code for Pytorch is as follow:
class Net(torch.nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv1d(x_train.shape[1], 128, 8)
self.bnorm1 = nn.BatchNorm1d(128)
self.conv2 = nn.Conv1d(128, 256, 5)
self.bnorm2 = nn.BatchNorm1d(256)
self.conv3 = nn.Conv1d(256, 128, 3)
self.bnorm3 = nn.BatchNorm1d(128)
self.dense = nn.Linear(128, nb_classes)
def forward(self, x):
c1=self.conv1(x)
b1 = F.relu(self.bnorm1(c1))
c2=self.conv2(b1)
b2 = F.relu(self.bnorm2(c2))
c3=self.conv3(b2)
b3 = F.relu(self.bnorm3(c3))
output = torch.mean(b3, 2)
dense1=self.dense(output)
return F.softmax(dense1)
model = Net()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.5, momentum=0.99)
losses=[]
for t in range(1000):
y_pred_1= model(x_train.float())
loss_1 = criterion(y_pred_1, y_train.long())
print(t, loss_1.item())
optimizer.zero_grad()
loss_1.backward()
optimizer.step()
For comparison, I use the following code for Keras:
x = keras.layers.Input(x_train.shape[1:])
conv1 = keras.layers.Conv1D(128, 8, padding='valid')(x)
conv1 = keras.layers.BatchNormalization()(conv1)
conv1 = keras.layers.Activation('relu')(conv1)
conv2 = keras.layers.Conv1D(256, 5, padding='valid')(conv1)
conv2 = keras.layers.BatchNormalization()(conv2)
conv2 = keras.layers.Activation('relu')(conv2)
conv3 = keras.layers.Conv1D(128, 3, padding='valid')(conv2)
conv3 = keras.layers.BatchNormalization()(conv3)
conv3 = keras.layers.Activation('relu')(conv3)
full = keras.layers.GlobalAveragePooling1D()(conv3)
out = keras.layers.Dense(nb_classes, activation='softmax')(full)
model = keras.models.Model(inputs=x, outputs=out)
optimizer = keras.optimizers.SGD(lr=0.5, decay=0.0, momentum=0.99)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)
hist = model.fit(x_train, Y_train, batch_size=x_train.shape[0], nb_epoch=2000)
The only difference I see between the two is the initialization but however, the results are just vastly different. For reference, I use the same preprocessing as follows for both the datasets, with a subtle difference in input shapes, for Pytorch (Batch_Size, Channels, Length) and for Keras: (Batch_Size, Length, Channels).
The reason of different results is due to different default parameters of layers and optimizer. For example in pytorch decay-rate of batch-norm is considered as 0.9, whereas in keras it is 0.99. Like that, there may be other variation in default parameters.
If you use same parameters and fixed random seed for initialization, there won't be much difference in the result for both library.
I'm trying to build a model that combines cnn and lstm.
I want to multivariate the input of cnn and put the outputs sequentially into the input of the LSTM. However, there is a problem in merging the cnn outputs. If you use concatenate, it will stretch to axis = -1 as shown. But I'll put it in the lstm structure so I'd like to increase it sequentailly. But I didn't find any function to merge except concatenate. The shape I want is (None, 6, 1904) in the image below. What can I do?
below is my build code.
def build_model():
in_layers, out_layers = [], []
for i in range(in_len):
inputs = Input(shape=(row,col, channel))
conv1 = Conv2D(4, (12, 12), activation='relu')(inputs)
pool1 = pooling.MaxPooling2D(pool_size=(4,4))(conv1)
conv2 = Conv2D(4, (7, 7) , activation='relu')(pool1)
pool2 = pooling.MaxPooling2D(pool_size=(3,3))(conv2)
conv3 = Conv2D(8, (5, 5) , activation='relu')(pool2)
pool3 = pooling.MaxPooling2D(pool_size=(2,2))(conv3)
flat = Flatten()(pool3)
# store layers
in_layers.append(inputs)
out_layers.append(flat)
print(type(flat))
merged = concatenate(out_layers)
model = Model(inputs=in_layers, outputs=merged)
plot_model(model, show_shapes=True, to_file='cnn_lstm_real.png')
return model
What you want is still concatenation, but on a different, new axis. The concatenation layer and function allows to specify the axis, so you can do it like this:
def build_model():
in_layers, out_layers = [], []
for i in range(in_len):
inputs = Input(shape=(row,col, channel))
conv1 = Conv2D(4, (12, 12), activation='relu')(inputs)
pool1 = pooling.MaxPooling2D(pool_size=(4,4))(conv1)
conv2 = Conv2D(4, (7, 7) , activation='relu')(pool1)
pool2 = pooling.MaxPooling2D(pool_size=(3,3))(conv2)
conv3 = Conv2D(8, (5, 5) , activation='relu')(pool2)
pool3 = pooling.MaxPooling2D(pool_size=(2,2))(conv3)
flat = Flatten()(pool3)
flat = Reshape((1, -1))(flat)
# store layers
in_layers.append(inputs)
out_layers.append(flat)
merged = concatenate(out_layers, axis = 1)
model = Model(inputs=in_layers, outputs=merged)
plot_model(model, show_shapes=True, to_file='cnn_lstm_real.png')
return model
The only big difference is that you need to add the new axis explicitly in the output of each branch (hence the Reshape layer), in order to allow for concatenation to happen along that axis.
I'm writing a U-net CNN in keras, and trying to use fit_generator for training. In order for this to work, I used a generator script, that could feed the images and labels for my network (simple fit function is working but I want to train a big dataset which cannot fit into the memory).
My problem is that in the model summary, it says correctly that, the output layer has a shape: (None, 288, 512, 4)
https://i.imgur.com/69xG8pO.jpg
but when I try actual training I get this error:
https://i.imgur.com/j7H6sHX.jpg
I don't get why keras wants (288, 512, 1) when in the summary it expects (288, 512, 4)
I tried it with my own unet code, and copied a working code from github also, but both of them has the exact same problem which leads me to believe that my generator script is the weak link. Below is the code I used (the image and label array functions used here were already working when I used them with "fit" in a previous CNN):
def generator(img_path, label_path, batch_size, height, width, num_classes):
input_pairs = get_pairs(img_path, label_path) # rewrite if param name changes
random.shuffle(input_pairs)
iterate_pairs = itertools.cycle(input_pairs)
while True:
X = []
Y = []
for _ in range(batch_size):
im, lab = next(iterate_pairs)
appended_im = next(iter(im))
appended_lab = next(iter(lab))
X.append(input_image_array(appended_im, width, height))
Y.append(input_label_array(appended_lab, width, height, num_classes, palette))
yield (np.array(X), np.array(Y))
I tried the generator out and the provided batches has the shapes of (for batch size of 15):
(15, 288, 512, 3)
(15, 288, 512, 4)
So I really do not know what could be the problem here.
EDIT: Here is the model code I used:
def conv_block(input_tensor, n_filter, kernel=(3, 3), padding='same', initializer="he_normal"):
x = Conv2D(n_filter, kernel, padding=padding, kernel_initializer=initializer)(input_tensor)
x = BatchNormalization()(x)
x = Activation("relu")(x)
x = Conv2D(n_filter, kernel, padding=padding, kernel_initializer=initializer)(x)
x = BatchNormalization()(x)
x = Activation("relu")(x)
return x
def deconv_block(input_tensor, residual, n_filter, kernel=(3, 3), strides=(2, 2), padding='same'):
y = Conv2DTranspose(n_filter, kernel, strides, padding)(input_tensor)
y = concatenate([y, residual], axis=3)
y = conv_block(y, n_filter)
return y
# NETWORK - n_classes is the desired number of classes, filters are fixed
def Unet(input_height, input_width, n_classes=4, filters=64):
# Downsampling
input_layer = Input(shape=(input_height, input_width, 3), name='input')
conv_1 = conv_block(input_layer, filters)
conv_1_out = MaxPooling2D(pool_size=(2, 2))(conv_1)
conv_2 = conv_block(conv_1_out, filters*2)
conv_2_out = MaxPooling2D(pool_size=(2, 2))(conv_2)
conv_3 = conv_block(conv_2_out, filters*4)
conv_3_out = MaxPooling2D(pool_size=(2, 2))(conv_3)
conv_4 = conv_block(conv_3_out, filters*8)
conv_4_out = MaxPooling2D(pool_size=(2, 2))(conv_4)
conv_4_drop = Dropout(0.5)(conv_4_out)
conv_5 = conv_block(conv_4_drop, filters*16)
conv_5_drop = Dropout(0.5)(conv_5)
# Upsampling
deconv_1 = deconv_block(conv_5_drop, conv_4, filters*8)
deconv_1_drop = Dropout(0.5)(deconv_1)
deconv_2 = deconv_block(deconv_1_drop, conv_3, filters*4)
deconv_2_drop = Dropout(0.5)(deconv_2)
deconv_3 = deconv_block(deconv_2_drop, conv_2, filters*2)
deconv_3 = deconv_block(deconv_3, conv_1, filters)
# Output - mapping each 64-component feature vector to number of classes
output = Conv2D(n_classes, (1, 1))(deconv_3)
output = BatchNormalization()(output)
output = Activation("softmax")(output)
# embed into functional API
model = Model(inputs=input_layer, outputs=output, name="Unet")
return model
Change your loss to categorical_crossentropy.
When using the sparse_categorical_crossentropy loss, your targets
should be integer targets.