Shapes are incompatible for multi class classifier - python-3.x

I have a multi-class classifier, that takes inputs from a generator:
def generate_train_data(path, x_shape):
genres = {"hip-hop":0, "r&b":1, "pop":2, "jazz":3}
genre_labels = to_categorical(list(genres.values()), num_classes=len(genres))
# some processing to create variables x and genre...
# (mock values)
x = np.zeros(x_shape)
x = x[None, :, :, :]
genre = "hip-hop"
yield (x, genre_labels[genres[genre]])
The classifier is defined below:
input_shape = (96, 84, 5)
i = Input(shape=input_shape, name='encoder_input')
cx = Conv2D(filters=8, kernel_size=3, strides=2, padding='same', activation='relu')(i)
cx = BatchNormalization()(cx)
cx = Conv2D(filters=16, kernel_size=3, strides=2, padding='same', activation='relu')(cx)
cx = BatchNormalization()(cx)
x = Flatten()(cx)
x = Dense(20, activation='relu')(x)
x = BatchNormalization()(x)
x = Dense(4, activation='softmax')(x)
classifier = Model(i, x, name='genre_classifier')
classifier.summary()
classifier.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
However, when I try to fit the classifier:
classifier.fit(generate_train_data(path, input_shape), epochs=30, validation_data=generate_test_data(path, input_shape), verbose=verbosity)
I get the following error:
ValueError: in user code:
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:806 train_function *
return step_function(self, iterator)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:796 step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
/usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:1211 run
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:2585 call_for_each_replica
return self._call_for_each_replica(fn, args, kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:2945 _call_for_each_replica
return fn(*args, **kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:789 run_step **
outputs = model.train_step(data)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:749 train_step
y, y_pred, sample_weight, regularization_losses=self.losses)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/compile_utils.py:204 __call__
loss_value = loss_obj(y_t, y_p, sample_weight=sw)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/losses.py:149 __call__
losses = ag_call(y_true, y_pred)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/losses.py:253 call **
return ag_fn(y_true, y_pred, **self._fn_kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/util/dispatch.py:201 wrapper
return target(*args, **kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/losses.py:1535 categorical_crossentropy
return K.categorical_crossentropy(y_true, y_pred, from_logits=from_logits)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/util/dispatch.py:201 wrapper
return target(*args, **kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/backend.py:4687 categorical_crossentropy
target.shape.assert_is_compatible_with(output.shape)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/tensor_shape.py:1134 assert_is_compatible_with
raise ValueError("Shapes %s and %s are incompatible" % (self, other))
ValueError: Shapes (None, 1) and (None, 4) are incompatible
The class label value returned by the generators is an array of length 4 so why is keras suggesting it is of size 1?
NOTE: This code is being run on Colab, tensorflow version 2.3. A mock version that reprocuces this error can be found on this Colab link: https://colab.research.google.com/drive/1SQZFspj3UOwP2ApIiaI2lvB2Z59bdVOk?usp=sharing
EDIT: added mock values in generate_train_data so that code can be reproducible

You need to add a dimension for batch_size for both x and y. In your generator, add a None-dimension by changing: genre_labels[genres[genre]] to genre_labels[genres[genre]][None, :].

The ouput of the one-hot encoding needed to be packed into a small 1 batch to fit [None,4] this is done by np.asarray([]).
Use
yield (x, np.asarray([genre_labels[genres[genre], :]]))
instead of:
yield (x, genre_labels[genres[genre]])

Related

Problem using tf.keras.utils.timeseries_dataset_from_array in Functional Keras API

I am working on building a LSTM model on M5 Forecasting Challenge (a Kaggle dataset)
I using functional keras API to build my model. I have attached picture of my model. Input is generated using 'tf.keras.utils.timeseries_dataset_from_array' and the error I receive is
ValueError: Layer "model_4" expects 18 input(s), but it received 1 input tensors. Inputs received: [<tf.Tensor 'IteratorGetNext:0' shape=(None, None, 18) dtype=float32>]
This is the code I am using to generate a time series dataset.
dataset = tf.keras.utils.timeseries_dataset_from_array(data=array, targets=None,
sequence_length=window, sequence_stride=1, batch_size=32)
My NN model
input_tensors = {}
for col in train_sel.columns:
if col in cat_cols:
input_tensors[col] = layers.Input(name = col, shape=(1,),dtype=tf.string)
else:
input_tensors[col]=layers.Input(name = col, shape=(1,), dtype = tf.float16
embedding = []
for feature in input_tensors:
if feature in cat_cols:
embed = layers.Embedding(input_dim = train_sel[feature].nunique(), output_dim = int(math.sqrt(train_sel[feature].nunique())))
embed = embed(input_tensors[feature])
else:
embed = layers.BatchNormalization()
embed = embed(tf.expand_dims(input_tensors[feature], -1))
embedding.append(embed)
temp = embedding
embedding = layers.concatenate(inputs = embedding)
nn_model = layers.LSTM(128)(embedding)
nn_model = layers.Dropout(0.1)(nn_model)
output = layers.Dense(1, activation = 'tanh')(nn_model)
model = tf.keras.Model(inputs=split_input,outputs = output)
Presently, I am fitting the model using
model.compile(
optimizer=tf.keras.optimizers.Adam(learning_rate=0.01),
loss=tf.keras.losses.MeanSquaredError(),
metrics=[tf.keras.losses.MeanSquaredError()])
model.fit(dataset,epochs = 5)
I am receiving a value error
ValueError: in user code:
File "/usr/local/lib/python3.8/dist-packages/keras/engine/training.py", line 1051, in train_function *
return step_function(self, iterator)
File "/usr/local/lib/python3.8/dist-packages/keras/engine/training.py", line 1040, in step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "/usr/local/lib/python3.8/dist-packages/keras/engine/training.py", line 1030, in run_step **
outputs = model.train_step(data)
File "/usr/local/lib/python3.8/dist-packages/keras/engine/training.py", line 889, in train_step
y_pred = self(x, training=True)
File "/usr/local/lib/python3.8/dist-packages/keras/utils/traceback_utils.py", line 67, in error_handler
raise e.with_traceback(filtered_tb) from None
File "/usr/local/lib/python3.8/dist-packages/keras/engine/input_spec.py", line 200, in assert_input_compatibility
raise ValueError(f'Layer "{layer_name}" expects {len(input_spec)} input(s),'
ValueError: Layer "model_4" expects 18 input(s), but it received 1 input tensors. Inputs received: [<tf.Tensor 'IteratorGetNext:0' shape=(None, None, 18) dtype=float32>]

ValueError: Shapes (None, 200, 3) and (1, 3) are incompatible

This is the model that I am trying to train for identifying possible tag(out of three tags) for each word, also I have added a layer from another model whose output shape is [1, 100]tensors and then I have concatenate it with BiLSTM output-
input1_entity = Input(shape = (200,))
last_hidden_layer_output = last_hidden_layer(tensorflow.reshape(input1_entity, [1, 200]))
embedding_entity = Embedding((4817), 200, input_length = 200, weights = [embedding_matrix], trainable = False)(input1_entity)
bilstm1_entity = Bidirectional(LSTM(100, return_sequences = True, recurrent_dropout = 0.2), merge_mode = 'concat')(embedding_entity)
lstm1_entity = Bidirectional(LSTM(100, return_sequences = True, dropout = 0.5, recurrent_dropout = 0.2))(bilstm1_entity)
lstm2_entity = Bidirectional(LSTM(50))(lstm1_entity)
merge_layer = concatenate([lstm2_entity, last_hidden_layer_output])
dense1_entity = Dense(128, activation = 'relu')(merge_layer)
dense2_entity = Dense(128, activation = 'relu')(dense1_entity)
dropout1_entity = Dropout(0.5)(dense2_entity)
dense3_entity = Dense(64, activation = 'tanh')(dropout1_entity)
output1_entity = Dense(3, activation = 'softmax')(dense3_entity)
model_entity = Model(inputs = input1_entity, outputs = output1_entity)
model_entity.compile(
loss = 'categorical_crossentropy',
optimizer = 'adam',
metrics = [tensorflow.keras.metrics.CategoricalAccuracy()],
sample_weight_mode = 'temporal'
)
And this is how I am training the model -
history = model_entity.fit(pad_tokens_train,
np.array(pad_tags_train),
batch_size=250,
verbose=1,
epochs=50,
sample_weight = sample_weight,
validation_split=0.2)
But I keep on getting this error -
ValueError: in user code:
File "/Users/kawaii/miniforge3/envs/tensor_no_gpu/lib/python3.8/site-packages/keras/engine/training.py", line 878, in train_function *
return step_function(self, iterator)
File "/Users/kawaii/miniforge3/envs/tensor_no_gpu/lib/python3.8/site-packages/keras/engine/training.py", line 867, in step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "/Users/kawaii/miniforge3/envs/tensor_no_gpu/lib/python3.8/site-packages/keras/engine/training.py", line 860, in run_step **
outputs = model.train_step(data)
File "/Users/kawaii/miniforge3/envs/tensor_no_gpu/lib/python3.8/site-packages/keras/engine/training.py", line 809, in train_step
loss = self.compiled_loss(
File "/Users/kawaii/miniforge3/envs/tensor_no_gpu/lib/python3.8/site-packages/keras/engine/compile_utils.py", line 201, in __call__
loss_value = loss_obj(y_t, y_p, sample_weight=sw)
File "/Users/kawaii/miniforge3/envs/tensor_no_gpu/lib/python3.8/site-packages/keras/losses.py", line 141, in __call__
losses = call_fn(y_true, y_pred)
File "/Users/kawaii/miniforge3/envs/tensor_no_gpu/lib/python3.8/site-packages/keras/losses.py", line 245, in call **
return ag_fn(y_true, y_pred, **self._fn_kwargs)
File "/Users/kawaii/miniforge3/envs/tensor_no_gpu/lib/python3.8/site-packages/keras/losses.py", line 1664, in categorical_crossentropy
return backend.categorical_crossentropy(
File "/Users/kawaii/miniforge3/envs/tensor_no_gpu/lib/python3.8/site-packages/keras/backend.py", line 4994, in categorical_crossentropy
target.shape.assert_is_compatible_with(output.shape)
ValueError: Shapes (None, 200, 3) and (1, 3) are incompatible

Pytorch GAN model doesn't train: matrix multiplication error

I'm trying to build a basic GAN to familiarise myself with Pytorch. I have some (limited) experience with Keras, but since I'm bound to do a larger project in Pytorch, I wanted to explore first using 'basic' networks.
I'm using Pytorch Lightning. I think I've added all necessary components. I tried passing some noise through the generator and the discriminator separately, and I think the output has the expected shape. Nonetheless, I get a runtime error when I try to train the GAN (full traceback below):
RuntimeError: mat1 and mat2 shapes cannot be multiplied (7x9 and 25x1)
I noticed that 7 is the size of the batch (by printing out the batch dimensions), even though I specified batch_size to be 64. Other than that, quite honestly, I don't know where to begin: the error traceback doesn't help me.
Chances are, I made multiple mistakes. However, I'm hoping some of you will be able to spot the current error from the code, since the multiplication error seems to point towards a dimensionality problem somewhere. Here's the code.
import os
import pytorch_lightning as pl
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
from skimage import io
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision.utils import make_grid
from torchvision.transforms import Resize, ToTensor, ToPILImage, Normalize
class DoppelDataset(Dataset):
"""
Dataset class for face data
"""
def __init__(self, face_dir: str, transform=None):
self.face_dir = face_dir
self.face_paths = os.listdir(face_dir)
self.transform = transform
def __len__(self):
return len(self.face_paths)
def __getitem__(self, idx):
if torch.is_tensor(idx):
idx = idx.tolist()
face_path = os.path.join(self.face_dir, self.face_paths[idx])
face = io.imread(face_path)
sample = {'image': face}
if self.transform:
sample = self.transform(sample['image'])
return sample
class DoppelDataModule(pl.LightningDataModule):
def __init__(self, data_dir='../data/faces', batch_size: int = 64, num_workers: int = 0):
super().__init__()
self.data_dir = data_dir
self.batch_size = batch_size
self.num_workers = num_workers
self.transforms = transforms.Compose([
ToTensor(),
Resize(100),
Normalize(mean=(123.26290927634774, 95.90498110733365, 86.03763122875182),
std=(63.20679012922922, 54.86211954409834, 52.31266645797249))
])
def setup(self, stage=None):
# Initialize dataset
doppel_data = DoppelDataset(face_dir=self.data_dir, transform=self.transforms)
# Train/val/test split
n = len(doppel_data)
train_size = int(.8 * n)
val_size = int(.1 * n)
test_size = n - (train_size + val_size)
self.train_data, self.val_data, self.test_data = random_split(dataset=doppel_data,
lengths=[train_size, val_size, test_size])
def train_dataloader(self) -> DataLoader:
return DataLoader(dataset=self.test_data, batch_size=self.batch_size, num_workers=self.num_workers)
def val_dataloader(self) -> DataLoader:
return DataLoader(dataset=self.val_data, batch_size=self.batch_size, num_workers=self.num_workers)
def test_dataloader(self) -> DataLoader:
return DataLoader(dataset=self.test_data, batch_size=self.batch_size, num_workers=self.num_workers)
class DoppelGenerator(nn.Sequential):
"""
Generator network that produces images based on latent vector
"""
def __init__(self, latent_dim: int):
super().__init__()
def block(in_channels: int, out_channels: int, padding: int = 1, stride: int = 2, bias=False):
return nn.Sequential(
nn.ConvTranspose2d(in_channels=in_channels, out_channels=out_channels, kernel_size=4, stride=stride,
padding=padding, bias=bias),
nn.BatchNorm2d(num_features=out_channels),
nn.ReLU(True)
)
self.model = nn.Sequential(
block(latent_dim, 512, padding=0, stride=1),
block(512, 256),
block(256, 128),
block(128, 64),
block(64, 32),
nn.ConvTranspose2d(32, 3, kernel_size=4, stride=2, padding=1, bias=False),
nn.Tanh()
)
def forward(self, input):
return self.model(input)
class DoppelDiscriminator(nn.Sequential):
"""
Discriminator network that classifies images in two categories
"""
def __init__(self):
super().__init__()
def block(in_channels: int, out_channels: int):
return nn.Sequential(
nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=4, stride=2, padding=1,
bias=False),
nn.BatchNorm2d(num_features=out_channels),
nn.LeakyReLU(0.2, inplace=True),
)
self.model = nn.Sequential(
block(3, 64),
block(64, 128),
block(128, 256),
block(256, 512),
nn.Conv2d(512, 1, kernel_size=4, stride=1, padding=0, bias=False),
nn.Flatten(),
nn.Linear(25, 1),
nn.Sigmoid()
)
def forward(self, input):
return self.model(input)
class DoppelGAN(pl.LightningModule):
def __init__(self,
channels: int,
width: int,
height: int,
lr: float = 0.0002,
b1: float = 0.5,
b2: float = 0.999,
batch_size: int = 64,
**kwargs):
super().__init__()
# Save all keyword arguments as hyperparameters, accessible through self.hparams.X)
self.save_hyperparameters()
# Initialize networks
# data_shape = (channels, width, height)
self.generator = DoppelGenerator(latent_dim=self.hparams.latent_dim, )
self.discriminator = DoppelDiscriminator()
self.validation_z = torch.randn(8, self.hparams.latent_dim,1,1)
def forward(self, input):
return self.generator(input)
def adversarial_loss(self, y_hat, y):
return F.binary_cross_entropy(y_hat, y)
def training_step(self, batch, batch_idx, optimizer_idx):
images = batch
# Sample noise (batch_size, latent_dim,1,1)
z = torch.randn(images.size(0), self.hparams.latent_dim,1,1)
# Train generator
if optimizer_idx == 0:
# Generate images (call generator -- see forward -- on latent vector)
self.generated_images = self(z)
# Log sampled images (visualize what the generator comes up with)
sample_images = self.generated_images[:6]
grid = make_grid(sample_images)
self.logger.experiment.add_image('generated_images', grid, 0)
# Ground truth result (ie: all fake)
valid = torch.ones(images.size(0), 1)
# Adversarial loss is binary cross-entropy
generator_loss = self.adversarial_loss(self.discriminator(self(z)), valid)
tqdm_dict = {'gen_loss': generator_loss}
output = {
'loss': generator_loss,
'progress_bar': tqdm_dict,
'log': tqdm_dict
}
return output
# Train discriminator: classify real from generated samples
if optimizer_idx == 1:
# How well can it label as real?
valid = torch.ones(images.size(0), 1)
real_loss = self.adversarial_loss(self.discriminator(images), valid)
# How well can it label as fake?
fake = torch.zeros(images.size(0), 1)
fake_loss = self.adversarial_loss(
self.discriminator(self(z).detach()), fake)
# Discriminator loss is the average of these
discriminator_loss = (real_loss + fake_loss) / 2
tqdm_dict = {'d_loss': discriminator_loss}
output = {
'loss': discriminator_loss,
'progress_bar': tqdm_dict,
'log': tqdm_dict
}
return output
def configure_optimizers(self):
lr = self.hparams.lr
b1 = self.hparams.b1
b2 = self.hparams.b2
# Optimizers
opt_g = torch.optim.Adam(self.generator.parameters(), lr=lr, betas=(b1, b2))
opt_d = torch.optim.Adam(self.discriminator.parameters(), lr=lr, betas=(b1, b2))
# Return optimizers/schedulers (currently no scheduler)
return [opt_g, opt_d], []
def on_epoch_end(self):
# Log sampled images
sample_images = self(self.validation_z)
grid = make_grid(sample_images)
self.logger.experiment.add_image('generated_images', grid, self.current_epoch)
if __name__ == '__main__':
# Global parameter
image_dim = 128
latent_dim = 100
batch_size = 64
# Initialize dataset
tfs = transforms.Compose([
ToPILImage(),
Resize(image_dim),
ToTensor()
])
doppel_dataset = DoppelDataset(face_dir='../data/faces', transform=tfs)
# Initialize data module
doppel_data_module = DoppelDataModule(batch_size=batch_size)
# Build models
generator = DoppelGenerator(latent_dim=latent_dim)
discriminator = DoppelDiscriminator()
# Test generator
x = torch.rand(batch_size, latent_dim, 1, 1)
y = generator(x)
print(f'Generator: x {x.size()} --> y {y.size()}')
# Test discriminator
x = torch.rand(batch_size, 3, 128, 128)
y = discriminator(x)
print(f'Discriminator: x {x.size()} --> y {y.size()}')
# Build GAN
doppelgan = DoppelGAN(batch_size=batch_size, channels=3, width=image_dim, height=image_dim, latent_dim=latent_dim)
# Fit GAN
trainer = pl.Trainer(gpus=0, max_epochs=5, progress_bar_refresh_rate=1)
trainer.fit(model=doppelgan, datamodule=doppel_data_module)
Full traceback:
Traceback (most recent call last):
File "/usr/local/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3437, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-2-28805d67d74b>", line 1, in <module>
runfile('/Users/wouter/Documents/OneDrive/Hardnose/Projects/Coding/0002_DoppelGANger/doppelganger/gan.py', wdir='/Users/wouter/Documents/OneDrive/Hardnose/Projects/Coding/0002_DoppelGANger/doppelganger')
File "/Applications/PyCharm.app/Contents/plugins/python/helpers/pydev/_pydev_bundle/pydev_umd.py", line 197, in runfile
pydev_imports.execfile(filename, global_vars, local_vars) # execute the script
File "/Applications/PyCharm.app/Contents/plugins/python/helpers/pydev/_pydev_imps/_pydev_execfile.py", line 18, in execfile
exec(compile(contents+"\n", file, 'exec'), glob, loc)
File "/Users/wouter/Documents/OneDrive/Hardnose/Projects/Coding/0002_DoppelGANger/doppelganger/gan.py", line 298, in <module>
trainer.fit(model=doppelgan, datamodule=doppel_data_module)
File "/usr/local/lib/python3.9/site-packages/pytorch_lightning/trainer/trainer.py", line 510, in fit
results = self.accelerator_backend.train()
File "/usr/local/lib/python3.9/site-packages/pytorch_lightning/accelerators/accelerator.py", line 57, in train
return self.train_or_test()
File "/usr/local/lib/python3.9/site-packages/pytorch_lightning/accelerators/accelerator.py", line 74, in train_or_test
results = self.trainer.train()
File "/usr/local/lib/python3.9/site-packages/pytorch_lightning/trainer/trainer.py", line 561, in train
self.train_loop.run_training_epoch()
File "/usr/local/lib/python3.9/site-packages/pytorch_lightning/trainer/training_loop.py", line 550, in run_training_epoch
batch_output = self.run_training_batch(batch, batch_idx, dataloader_idx)
File "/usr/local/lib/python3.9/site-packages/pytorch_lightning/trainer/training_loop.py", line 718, in run_training_batch
self.optimizer_step(optimizer, opt_idx, batch_idx, train_step_and_backward_closure)
File "/usr/local/lib/python3.9/site-packages/pytorch_lightning/trainer/training_loop.py", line 485, in optimizer_step
model_ref.optimizer_step(
File "/usr/local/lib/python3.9/site-packages/pytorch_lightning/core/lightning.py", line 1298, in optimizer_step
optimizer.step(closure=optimizer_closure)
File "/usr/local/lib/python3.9/site-packages/pytorch_lightning/core/optimizer.py", line 286, in step
self.__optimizer_step(*args, closure=closure, profiler_name=profiler_name, **kwargs)
File "/usr/local/lib/python3.9/site-packages/pytorch_lightning/core/optimizer.py", line 144, in __optimizer_step
optimizer.step(closure=closure, *args, **kwargs)
File "/usr/local/lib/python3.9/site-packages/torch/autograd/grad_mode.py", line 26, in decorate_context
return func(*args, **kwargs)
File "/usr/local/lib/python3.9/site-packages/torch/optim/adam.py", line 66, in step
loss = closure()
File "/usr/local/lib/python3.9/site-packages/pytorch_lightning/trainer/training_loop.py", line 708, in train_step_and_backward_closure
result = self.training_step_and_backward(
File "/usr/local/lib/python3.9/site-packages/pytorch_lightning/trainer/training_loop.py", line 806, in training_step_and_backward
result = self.training_step(split_batch, batch_idx, opt_idx, hiddens)
File "/usr/local/lib/python3.9/site-packages/pytorch_lightning/trainer/training_loop.py", line 319, in training_step
training_step_output = self.trainer.accelerator_backend.training_step(args)
File "/usr/local/lib/python3.9/site-packages/pytorch_lightning/accelerators/cpu_accelerator.py", line 62, in training_step
return self._step(self.trainer.model.training_step, args)
File "/usr/local/lib/python3.9/site-packages/pytorch_lightning/accelerators/cpu_accelerator.py", line 58, in _step
output = model_step(*args)
File "/Users/wouter/Documents/OneDrive/Hardnose/Projects/Coding/0002_DoppelGANger/doppelganger/gan.py", line 223, in training_step
real_loss = self.adversarial_loss(self.discriminator(images), valid)
File "/usr/local/lib/python3.9/site-packages/torch/nn/modules/module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "/Users/wouter/Documents/OneDrive/Hardnose/Projects/Coding/0002_DoppelGANger/doppelganger/gan.py", line 154, in forward
return self.model(input)
File "/usr/local/lib/python3.9/site-packages/torch/nn/modules/module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "/usr/local/lib/python3.9/site-packages/torch/nn/modules/container.py", line 117, in forward
input = module(input)
File "/usr/local/lib/python3.9/site-packages/torch/nn/modules/module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "/usr/local/lib/python3.9/site-packages/torch/nn/modules/linear.py", line 93, in forward
return F.linear(input, self.weight, self.bias)
File "/usr/local/lib/python3.9/site-packages/torch/nn/functional.py", line 1690, in linear
ret = torch.addmm(bias, input, weight.t())
RuntimeError: mat1 and mat2 shapes cannot be multiplied (7x9 and 25x1)
This multiplication problem comes from the DoppelDiscriminator. There is a linear layer
nn.Linear(25, 1),
that should be
nn.Linear(9, 1),
based on the error message.

Dimensional error in the keras fit function (Conditional variational autoencoder in keras)

I am trying to implement a conditional autoencoder, which is really very straightforward, and getting errors while making the fit function work. Here is the full code snippet
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
class Sampling(layers.Layer):
"""Uses (z_mean, z_log_var) to sample z, the vector encoding a digit."""
def call(self, inputs):
z_mean, z_log_var = inputs
batch = tf.shape(z_mean)[0]
dim = tf.shape(z_mean)[1]
epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
return z_mean + tf.exp(0.5 * z_log_var) * epsilon
class cVAE(keras.Model):
def __init__(self,
original_dim,
label_dim,
latent_dim,
beta=1,
batch_size=1,
**kwargs):
super(cVAE, self).__init__(**kwargs)
self.original_dim = original_dim
self.latent_dim = latent_dim
self.label_dim = label_dim
self.beta = beta
self.batch_size = batch_size
# Build the encoder
print("building encoder")
rnaseq_inputs = keras.Input(shape=(self.original_dim, ),batch_size=self.batch_size)
label_inputs = keras.Input(shape=(self.label_dim, ),batch_size=self.batch_size)
encoder_inputs = layers.concatenate([rnaseq_inputs, label_inputs], name='concat_1')
z_mean = layers.Dense(self.latent_dim,
kernel_initializer = 'glorot_uniform')(encoder_inputs)
z_mean = layers.BatchNormalization()(z_mean)
z_mean = layers.Activation('relu')(z_mean)
z_log_var = layers.Dense(self.latent_dim,
kernel_initializer = 'glorot_uniform')(encoder_inputs)
z_log_var = layers.BatchNormalization()(z_log_var)
z_log_var = layers.Activation('relu')(z_log_var)
z = Sampling()([z_mean, z_log_var])
zc = layers.concatenate([z, label_inputs],name='concat_2')
self.encoder = keras.Model([rnaseq_inputs, label_inputs], [z_mean, z_log_var, z, zc])
print("building decoder")
# Build the decoder
decoder_input_dim = self.latent_dim + self.label_dim
decoder_output_dim = self.original_dim + self.label_dim
decoder_inputs = keras.Input(shape=(decoder_input_dim, ))
decoder_outputs = keras.layers.Dense(decoder_output_dim,
activation='sigmoid')(decoder_inputs)
self.decoder = keras.Model(decoder_inputs, decoder_outputs)
self.total_loss_tracker = keras.metrics.Mean(name="total_loss")
self.reconstruction_loss_tracker = keras.metrics.Mean(
name="reconstruction_loss"
)
self.kl_loss_tracker = keras.metrics.Mean(name="kl_loss")
#property
def metrics(self):
return [
self.total_loss_tracker,
self.reconstruction_loss_tracker,
self.kl_loss_tracker,
]
def train_step(self, data):
with tf.GradientTape() as tape:
# exp_data, label_data = data
z_mean, z_log_var, z, zc = self.encoder(data)
reconstruction = self.decoder(zc)
reconstruction_loss = tf.reduce_mean(
tf.reduce_sum(
keras.losses.mean_squared_error(data, reconstruction)
)
)
kl_loss = -0.5 * (1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))
kl_loss = tf.reduce_mean(tf.reduce_sum(kl_loss, axis=1))
total_loss = reconstruction_loss + kl_loss
grads = tape.gradient(total_loss, self.trainable_weights)
self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
self.total_loss_tracker.update_state(total_loss)
self.reconstruction_loss_tracker.update_state(reconstruction_loss)
self.kl_loss_tracker.update_state(kl_loss)
return {
"loss": self.total_loss_tracker.result(),
"reconstruction_loss": self.reconstruction_loss_tracker.result(),
"kl_loss": self.kl_loss_tracker.result(),
}
toy_data = np.random.random((100,100)).astype('float32')
label = np.random.randint(0,high=2,size=100).reshape(100,1).astype('float32')
cvae_model = cVAE(original_dim=100,batch_size=2,label_dim=1,latent_dim=1)
cvae_model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.003))
# fitting
cvae_model.fit([toy_data,label])
Up until fit function everything worked. To my surprise the fit function gives the following error,
ValueError: in user code:
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training.py:805 train_function *
return step_function(self, iterator)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training.py:795 step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py:1259 run
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py:2730 call_for_each_replica
return self._call_for_each_replica(fn, args, kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py:3417 _call_for_each_replica
return fn(*args, **kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training.py:788 run_step **
outputs = model.train_step(data)
<ipython-input-232-1cc639e2055c>:182 train_step
keras.losses.mean_squared_error(data, reconstruction)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/util/dispatch.py:201 wrapper
return target(*args, **kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/losses.py:1197 mean_squared_error
y_true = math_ops.cast(y_true, y_pred.dtype)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/util/dispatch.py:201 wrapper
return target(*args, **kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/ops/math_ops.py:964 cast
x = ops.convert_to_tensor(x, name="x")
/usr/local/lib/python3.7/dist-packages/tensorflow/python/profiler/trace.py:163 wrapped
return func(*args, **kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/ops.py:1540 convert_to_tensor
ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/ops/array_ops.py:1525 _autopacking_conversion_function
return _autopacking_helper(v, dtype, name or "packed")
/usr/local/lib/python3.7/dist-packages/tensorflow/python/ops/array_ops.py:1444 _autopacking_helper
converted_elem = _autopacking_helper(elem, dtype, str(i))
/usr/local/lib/python3.7/dist-packages/tensorflow/python/ops/array_ops.py:1461 _autopacking_helper
return gen_array_ops.pack(elems_as_tensors, name=scope)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/ops/gen_array_ops.py:6398 pack
"Pack", values=values, axis=axis, name=name)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/op_def_library.py:750 _apply_op_helper
attrs=attr_protos, op_def=op_def)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/func_graph.py:592 _create_op_internal
compute_device)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/ops.py:3536 _create_op_internal
op_def=op_def)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/ops.py:2016 __init__
control_input_ops, op_def)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/ops.py:1856 _create_c_op
raise ValueError(str(e))
ValueError: Dimension 1 in both shapes must be equal, but are 100 and 1. Shapes are [2,100] and [2,1].
From merging shape 0 with other shapes. for '{{node Cast/x/0}} = Pack[N=2, T=DT_FLOAT, axis=0](IteratorGetNext, IteratorGetNext:1)' with input shapes: [2,100], [2,1].
I don't understand why it can't merge [2,100] and [2,1] on axis 1 it should produce [2,101], am I getting it wrong?
Here is what plot_model yields for encoder
PS: I tried to play with the axis of concatenation and none of the values worked.
The problem was in reconstruction, it got solved by the following concatenation in the train step
def train_step(self, data):
with tf.GradientTape() as tape:
# exp_data, label_data = data
z_mean, z_log_var, z, zc = self.encoder(data)
#form_data = np.concatenate(data)
reconstruction = self.decoder(zc)
data_cat = layers.concatenate([data[0][0],data[0][1]], axis=1)
reconstruction_loss = tf.reduce_mean(
tf.reduce_sum(
keras.losses.mean_squared_error(data_cat, reconstruction)
)
)
kl_loss = -0.5 * (1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))
kl_loss = tf.reduce_mean(tf.reduce_sum(kl_loss, axis=1))
total_loss = reconstruction_loss + kl_loss
grads = tape.gradient(total_loss, self.trainable_weights)
self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
self.total_loss_tracker.update_state(total_loss)
self.reconstruction_loss_tracker.update_state(reconstruction_loss)
self.kl_loss_tracker.update_state(kl_loss)
return {
"loss": self.total_loss_tracker.result(),
"reconstruction_loss": self.reconstruction_loss_tracker.result(),
"kl_loss": self.kl_loss_tracker.result(),
}

regularizer causes "ValueError: Shapes must be equal rank"

When trying to run
import numpy as np
import keras
X = np.ones((100,20))
Y1 = np.ones((100,5))
Y2 = np.ones((100,4))
Input_1= keras.layers.Input(shape=X.shape[1])
x = keras.layers.Dense(100)(Input_1)
x = keras.layers.Dense(100)(x)
out1 = keras.layers.Dense(5, kernel_regularizer='l1')(x)
out2 = keras.layers.Dense(4)(x)
model = keras.models.Model(inputs=Input_1, outputs=[out1,out2])
model.compile(loss = 'mse', loss_weights=np.arange(2))
model.fit(X, [Y1, Y2], epochs=2)
I get
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:805
train_function *
return step_function(self, iterator)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:795
step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
/usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:1259
run
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:2730
call_for_each_replica
return self._call_for_each_replica(fn, args, kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:3417
_call_for_each_replica
return fn(*args, **kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:788
run_step **
outputs = model.train_step(data)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:756
train_step
y, y_pred, sample_weight, regularization_losses=self.losses)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/compile_utils.py:236
call
total_loss_metric_value = math_ops.add_n(loss_metric_values)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/util/dispatch.py:201
wrapper
return target(*args, **kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/math_ops.py:3572
add_n
return gen_math_ops.add_n(inputs, name=name)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/gen_math_ops.py:419
add_n
"AddN", inputs=inputs, name=name)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/op_def_library.py:750
_apply_op_helper
attrs=attr_protos, op_def=op_def)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/func_graph.py:592
_create_op_internal
compute_device)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py:3536
_create_op_internal
op_def=op_def)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py:2016
init
control_input_ops, op_def)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py:1856
_create_c_op
raise ValueError(str(e))
ValueError: Shapes must be equal rank, but are 1 and 0
From merging shape 1 with other shapes. for '{{node AddN}} = AddN[N=3, T=DT_FLOAT](mul_2, mul_5, dense_199/kernel/Regularizer/mul)' with input shapes: [2], [2], [].
The error disappears if I omit the regularizer.
I found that loss_weights has to be a list, not an array.
import numpy as np
import keras
X = np.ones((100,20))
Y1 = np.ones((100,5))
Y2 = np.ones((100,4))
Input_1= keras.layers.Input(shape=X.shape[1])
x = keras.layers.Dense(100)(Input_1)
x = keras.layers.Dense(100)(x)
out1 = keras.layers.Dense(5, kernel_regularizer='l1')(x)
out2 = keras.layers.Dense(4)(x)
model = keras.models.Model(inputs=Input_1, outputs=[out1,out2])
model.compile(loss = 'mse', loss_weights=list(np.arange(2)))
model.fit(X, [Y1, Y2], epochs=2)
I was facing the same issue.
changing the reduction of loss function from none to auto worked like a charm.
tf.keras.losses.MeanSquaredError(reduction=tf.keras.losses.Reduction.AUTO)

Resources