How to customize a loss function with a trainable parameter? - keras

I want to customize the following loss function with a training parameter, where λ is the regularization term and w is a trainable parameter. I customize the loss layer and add the loss into my model. But, it occurs the following errors. I don't know where the wrong is and whether my this custom loss layer is right?
Loss function = binary_crossentropy(y_true, y_pred) + λ|w|
1.I customize a loss layer.
class BCLoss(Layer):
def __init__(self,gamma_init='zero',lamada = 0.00005,**kwargs):
self.gamma_init = initializations.get(gamma_init)
self.lamada = lamada
super(BCLoss, self).__init__(**kwargs)
def build(self, input_shape):
self.gamma_init = self.add_weight(name='gamma',
shape=(1,),
initializer='uniform',
trainable=True)
super(BCLoss,self).build(input_shape)
def call(self, inputs, **kwargs):
y_true, y_pred = inputs
loss = K.mean(K.binary_crossentropy(y_true, y_pred), axis=-1) + self.lamada* K.abs(self.gamma_init)
return loss
def compute_output_shape(self, input_shape):
return input_shape
2.Training the model
def build_model(self):
img_input = layers.Input(shape=self.input_shape, name='img_input')
y_true = layers.Input(shape=(1,),name='y_true')
nb_channels = self.growth_rate
# Initial convolution layer
layer_1 = layers.Convolution2D(2 * self.growth_rate, (1, 1), strides=(2, 2),
kernel_regularizer=keras.regularizers.l2(self.weight_decay))(img_input)
average_1 = layers.AveragePooling2D((2, 2), strides=(4, 4))(layer_1)
x = layers.BatchNormalization()(layer_1)
x = layers.Activation('relu')(x)
x = layers.MaxPooling2D()(x)
# Building dense blocks
skip_layer = [average_1]
for block in range(self.dense_blocks - 1):
# Add dense block
x, nb_channels = self.dense_block(x, self.dense_layers[block], nb_channels, self.growth_rate,
self.dropout_rate, self.bottleneck, self.weight_decay)
# Add transition_block
x_left,x_right = self.transition_layer(x, nb_channels, self.dropout_rate, self.compression, self.weight_decay)
x = x_left
nb_channels = int(nb_channels * self.compression)
skip_layer.append(x_right)
# Add last dense block without transition but for that with global average pooling
x, nb_channels = self.dense_block(x, self.dense_layers[-1], nb_channels,
self.growth_rate, self.dropout_rate, self.weight_decay,skip_layer =skip_layer)
x = layers.BatchNormalization()(x)
x = layers.Activation('relu')(x)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dropout(0.2)(x)
y_pred = layers.Dense(self.nb_classes, activation='sigmoid')(x)
model = keras.Model(inputs=[img_input,y_true], outputs=y_pred, name='densenet')
loss = BCLoss()([y_true,y_pred])
model.add_loss(loss)
return model
3.compile model
model.compile(loss=None, optimizer=Adam(lr=0.001), metrics=['accuracy'])
4. The error
Traceback (most recent call last):
File "G:/Workspace/workspace_python/Lung_EGRF/train.py", line 109, in <module>
train_model(config['training_data'],config['testing_data'], config['model_file'],config['input_shape'])
File "G:/Workspace/workspace_python/Lung_EGRF/train.py", line 97, in train_model
early_stopping_patience=50))
File "D:\Anaconda3\envs\py36\lib\site-packages\keras\legacy\interfaces.py", line 91, in wrapper
return func(*args, **kwargs)
File "D:\Anaconda3\envs\py36\lib\site-packages\keras\engine\training.py", line 1418, in fit_generator
initial_epoch=initial_epoch)
File "D:\Anaconda3\envs\py36\lib\site-packages\keras\engine\training_generator.py", line 217, in fit_generator
class_weight=class_weight)
File "D:\Anaconda3\envs\py36\lib\site-packages\keras\engine\training.py", line 1211, in train_on_batch
class_weight=class_weight)
File "D:\Anaconda3\envs\py36\lib\site-packages\keras\engine\training.py", line 789, in _standardize_user_data
exception_prefix='target')
File "D:\Anaconda3\envs\py36\lib\site-packages\keras\engine\training_utils.py", line 63, in standardize_input_data
'expected no data, but got:', data)
ValueError: ('Error when checking model target: expected no data, but got:',
array([0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0,
1, 0, 1, 1, 1, 1, 1, 1, 0, 1], dtype=int64))
Process finished with exit code 1

Related

PyTorch RuntimeError: Expected target size [8, 1182], got [8, 256]

I have a PyTorch model composed of a Distilbert and a BiLSTM with the following structure. Its purpose involves performing token classification over a vast amount of categories (num_labels=1182) by attaching the output of the transformer to the input of the BiLSTM.
import torch
import torch.nn as nn
import torch.nn.functional as F
from transformers import AutoModelForTokenClassification
import utilities as utils
from global_constants import MAX_DOC_LENGTH
class CustomTorchModel(nn.Module):
def __init__(self, args_model_name_or_path):
id_to_label, label_to_id = utils.unshelve_label_converters()
label_qty = len(list(label_to_id))
self.distilbert_layer = AutoModelForTokenClassification.from_pretrained(
args_model_name_or_path,
id2label=id_to_label,
label2id=label_to_id,
num_labels=label_qty
)
self.bilstm_layer = nn.LSTM(input_size=MAX_DOC_LENGTH,
hidden_size=self.distilbert_layer.config.dim,
num_layers=1,
batch_first=True,
bidirectional=True)
def forward(self, inputs):
print("input_ids size: " + str(inputs[0].size()))
print("attention_mask size: " + str(inputs[1].size()))
distilbert_output = self.distilbert_layer(input_ids=inputs[0], attention_mask=inputs[1])
print("distilbert_output.last_hidden_state size: " + str(distilbert_output.last_hidden_state.size()))
bilstm_output, (last_hidden, last_cell) = self.bilstm_layer(distilbert_output.last_hidden_state)
print("BiLSTM output size: " + str(bilstm_output.size()))
output = self.classification_layer(bilstm_output)
print("output size: " + str(output.size()))
return F.softmax(output)
Output showing the shapes after each layer. Notes: 256 is the value of MAX_DOC_LENGTH, 768 is self.distilbert_layer.config.dim and 1182 is num_labels.
input_ids size: torch.Size([8, 256])
attention_mask size: torch.Size([8, 256])
distilbert_output.last_hidden_state size: torch.Size([8, 256, 768])
BiLSTM output size: torch.Size([8, 256, 1536])
output size: torch.Size([8, 256, 1182])
This custom model is used in a pretty standard Ignite script which leverages to train the model. Since there are multiple categories and this is not binary classification, the loss function should be nn.CrossEntropyLoss:
criterion = nn.CrossEntropyLoss(reduction='mean')
optimizer = AdamW(model.parameters(), lr=1e-5)
lr_scheduler = ExponentialLR(optimizer, gamma=0.90)
trainer = create_supervised_trainer1(model.to(device), optimizer, criterion, device=device)
This is the definition of the methods used above:
def _prepare_batch(batch, device=None, non_blocking=False):
x = [batch["input_ids"], batch["attention_mask"]] # list
y = batch["labels"]
return (convert_tensor(x, device=device, non_blocking=non_blocking),
convert_tensor(y, device=device, non_blocking=non_blocking))
def create_supervised_trainer1(model, optimizer, loss_fn, metrics={}, device=None):
def _update(engine, batch):
model.train()
optimizer.zero_grad()
x, y = _prepare_batch(batch, device=device)
y_pred = model(x)
transposed_y_pred = torch.transpose(y_pred, 1, 2)
loss = loss_fn(transposed_y_pred, y.long())
loss.backward()
optimizer.step()
return loss.item(), transposed_y_pred, y.long()
def _metrics_transform(output):
return output[1], output[2]
engine = Engine(_update)
for name, metric in metrics.items():
metric._output_transform = _metrics_transform
metric.attach(engine, name)
return engine
I know I am missing something, however I'm not being able to figure out what. The execution produces an error related to the shapes (the "y" of the DataLoaders has [8, 256] and the network produces [8, 1182]. This happens even though I rearranged the tensors in the order required by CrossEntropyLoss:
Current run is terminating due to exception: Expected target size [8, 1182], got [8, 256]
Engine run is terminating due to exception: Expected target size [8, 1182], got [8, 256]
Traceback (most recent call last):
File "/home/users/user/august/src/main/ignite_script.py", line 456, in run
trainer.run(train_dataloader, max_epochs=epochs)
File "/home/users/user/.local/lib/python3.9/site-packages/ignite/engine/engine.py", line 892, in run
return self._internal_run()
File "/home/users/user/.local/lib/python3.9/site-packages/ignite/engine/engine.py", line 935, in _internal_run
return next(self._internal_run_generator)
File "/home/users/user/.local/lib/python3.9/site-packages/ignite/engine/engine.py", line 993, in _internal_run_as_gen
self._handle_exception(e)
File "/home/users/user/.local/lib/python3.9/site-packages/ignite/engine/engine.py", line 638, in _handle_exception
raise e
File "/home/users/user/.local/lib/python3.9/site-packages/ignite/engine/engine.py", line 959, in _internal_run_as_gen
epoch_time_taken += yield from self._run_once_on_dataset_as_gen()
File "/home/users/user/.local/lib/python3.9/site-packages/ignite/engine/engine.py", line 1087, in _run_once_on_dataset_as_gen
self._handle_exception(e)
File "/home/users/user/.local/lib/python3.9/site-packages/ignite/engine/engine.py", line 638, in _handle_exception
raise e
File "/home/users/user/.local/lib/python3.9/site-packages/ignite/engine/engine.py", line 1068, in _run_once_on_dataset_as_gen
self.state.output = self._process_function(self, self.state.batch)
File "/home/users/user/august/src/main/ignite_script.py", line 321, in _update
loss = loss_fn(y_pred, y.float())
File "/home/users/user/.local/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1110, in _call_impl
return forward_call(*input, **kwargs)
File "/home/users/user/.local/lib/python3.9/site-packages/torch/nn/modules/loss.py", line 1163, in forward
return F.cross_entropy(input, target, weight=self.weight,
File "/home/users/user/.local/lib/python3.9/site-packages/torch/nn/functional.py", line 2996, in cross_entropy
return torch._C._nn.cross_entropy_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index, label_smoothing)
RuntimeError: Expected target size [8, 1182], got [8, 256]
According to the documentation of
nn.CrossEntropyLoss, you can specify the targets in two ways:
class indices for each sample
probabilities of each class for each sample
but you should do any of above mentioned ways by a specific shaped target:
given:
> criterion = nn.CrossEntropyLoss()
> loss = criterion(input, target)
Input:
Shape: (C)(C), (N, C)(N,C) or (N, C, d_1, d_2, ..., d_K)(N,C,d1​,d2​,...,dK) with K≥1 in the case of K-dimensional loss.
Target:
If containing class indices, shape:
()(), (N)(N) or (N, d_1, d_2, ..., d_K)(N,d 1,d2,...,dK) with K≥1 in the case of K-dimensional loss where each value should be between [0, C)[0,C).
If containing class probabilities,
same shape as the input and each value should be between [0, 1][0,1].
Output:
If reduction is ‘none’, shape ()(), (N)(N) or (N, d_1, d_2, ..., d_K)(N,d1,d2,...,dK) with K≥1 in the case of K-dimensional loss, depending on the shape of the input. Otherwise, scalar.

I'm having trouble performing convolutional fusion procedures, about IndexError: too many indices for tensor of dimension 2

Does anyone know the problem
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.spat_feature = spat_model #feature_size = Nx512x7x7
self.temp_feature = temp_model #feature_size = Nx512x7x7
self.layer1 = nn.Sequential(nn.Conv3d(1024, 512, 1, stride=1, padding=1, dilation=1,bias=True),
nn.ReLU(),nn.MaxPool3d(kernel_size=2,stride=2))
self.fc = nn.Sequential(nn.Linear(8192,2048), nn.ReLU(), nn.Dropout(p=0.85),
nn.Linear(2048, 512), nn.ReLU(), nn.Dropout(p=0.85),
nn.Linear(512, 101))
def forward(self,spat_data,temp_data):
x1 = self.spat_feature(spat_data)
x2 = self.temp_feature(temp_data)
y = torch.cat((x1,x2), dim= 1)
for i in range(x1.size(1)):
#y[i] = self.m[i](y[i])
y[:,(2*i),:,:] = x1[:,i,:,:] #I have tried to modify four: to two :
y[:,(2*i+1)] = x2[:,i,:,:] #I have tried to modify four: to two:
y = y.view(y.size(0), 1024, 1, 7, 7)
cnn_out = self.layer1(y)
cnn_out = cnn_out.view(cnn_out.size(0),-1)
out = self.fc(cnn_out)
return out
here is my error:
Traceback (most recent call last): File "conv_fusion.py", line 345, in model = train_model(model, criterion, optimizer,scheduler, num_epochs=20) File "conv_fusion.py", line 167, in train_model outputs = model(spat_data, temp_data) File "/home/el304/.local/lib/python3.6/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl return forward_call(input, **kwargs) File "conv_fusion.py", line 263, in forward y[:,(2i),:,:] = x1[:,i,:,:] IndexError: too many indices for tensor of dimension 2
**I tried to remove the index value**
RuntimeError: shape '[10, 1024, 1, 7, 7]' is invalid for input of size 2020

Pytorch GAN model doesn't train: matrix multiplication error

I'm trying to build a basic GAN to familiarise myself with Pytorch. I have some (limited) experience with Keras, but since I'm bound to do a larger project in Pytorch, I wanted to explore first using 'basic' networks.
I'm using Pytorch Lightning. I think I've added all necessary components. I tried passing some noise through the generator and the discriminator separately, and I think the output has the expected shape. Nonetheless, I get a runtime error when I try to train the GAN (full traceback below):
RuntimeError: mat1 and mat2 shapes cannot be multiplied (7x9 and 25x1)
I noticed that 7 is the size of the batch (by printing out the batch dimensions), even though I specified batch_size to be 64. Other than that, quite honestly, I don't know where to begin: the error traceback doesn't help me.
Chances are, I made multiple mistakes. However, I'm hoping some of you will be able to spot the current error from the code, since the multiplication error seems to point towards a dimensionality problem somewhere. Here's the code.
import os
import pytorch_lightning as pl
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
from skimage import io
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision.utils import make_grid
from torchvision.transforms import Resize, ToTensor, ToPILImage, Normalize
class DoppelDataset(Dataset):
"""
Dataset class for face data
"""
def __init__(self, face_dir: str, transform=None):
self.face_dir = face_dir
self.face_paths = os.listdir(face_dir)
self.transform = transform
def __len__(self):
return len(self.face_paths)
def __getitem__(self, idx):
if torch.is_tensor(idx):
idx = idx.tolist()
face_path = os.path.join(self.face_dir, self.face_paths[idx])
face = io.imread(face_path)
sample = {'image': face}
if self.transform:
sample = self.transform(sample['image'])
return sample
class DoppelDataModule(pl.LightningDataModule):
def __init__(self, data_dir='../data/faces', batch_size: int = 64, num_workers: int = 0):
super().__init__()
self.data_dir = data_dir
self.batch_size = batch_size
self.num_workers = num_workers
self.transforms = transforms.Compose([
ToTensor(),
Resize(100),
Normalize(mean=(123.26290927634774, 95.90498110733365, 86.03763122875182),
std=(63.20679012922922, 54.86211954409834, 52.31266645797249))
])
def setup(self, stage=None):
# Initialize dataset
doppel_data = DoppelDataset(face_dir=self.data_dir, transform=self.transforms)
# Train/val/test split
n = len(doppel_data)
train_size = int(.8 * n)
val_size = int(.1 * n)
test_size = n - (train_size + val_size)
self.train_data, self.val_data, self.test_data = random_split(dataset=doppel_data,
lengths=[train_size, val_size, test_size])
def train_dataloader(self) -> DataLoader:
return DataLoader(dataset=self.test_data, batch_size=self.batch_size, num_workers=self.num_workers)
def val_dataloader(self) -> DataLoader:
return DataLoader(dataset=self.val_data, batch_size=self.batch_size, num_workers=self.num_workers)
def test_dataloader(self) -> DataLoader:
return DataLoader(dataset=self.test_data, batch_size=self.batch_size, num_workers=self.num_workers)
class DoppelGenerator(nn.Sequential):
"""
Generator network that produces images based on latent vector
"""
def __init__(self, latent_dim: int):
super().__init__()
def block(in_channels: int, out_channels: int, padding: int = 1, stride: int = 2, bias=False):
return nn.Sequential(
nn.ConvTranspose2d(in_channels=in_channels, out_channels=out_channels, kernel_size=4, stride=stride,
padding=padding, bias=bias),
nn.BatchNorm2d(num_features=out_channels),
nn.ReLU(True)
)
self.model = nn.Sequential(
block(latent_dim, 512, padding=0, stride=1),
block(512, 256),
block(256, 128),
block(128, 64),
block(64, 32),
nn.ConvTranspose2d(32, 3, kernel_size=4, stride=2, padding=1, bias=False),
nn.Tanh()
)
def forward(self, input):
return self.model(input)
class DoppelDiscriminator(nn.Sequential):
"""
Discriminator network that classifies images in two categories
"""
def __init__(self):
super().__init__()
def block(in_channels: int, out_channels: int):
return nn.Sequential(
nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=4, stride=2, padding=1,
bias=False),
nn.BatchNorm2d(num_features=out_channels),
nn.LeakyReLU(0.2, inplace=True),
)
self.model = nn.Sequential(
block(3, 64),
block(64, 128),
block(128, 256),
block(256, 512),
nn.Conv2d(512, 1, kernel_size=4, stride=1, padding=0, bias=False),
nn.Flatten(),
nn.Linear(25, 1),
nn.Sigmoid()
)
def forward(self, input):
return self.model(input)
class DoppelGAN(pl.LightningModule):
def __init__(self,
channels: int,
width: int,
height: int,
lr: float = 0.0002,
b1: float = 0.5,
b2: float = 0.999,
batch_size: int = 64,
**kwargs):
super().__init__()
# Save all keyword arguments as hyperparameters, accessible through self.hparams.X)
self.save_hyperparameters()
# Initialize networks
# data_shape = (channels, width, height)
self.generator = DoppelGenerator(latent_dim=self.hparams.latent_dim, )
self.discriminator = DoppelDiscriminator()
self.validation_z = torch.randn(8, self.hparams.latent_dim,1,1)
def forward(self, input):
return self.generator(input)
def adversarial_loss(self, y_hat, y):
return F.binary_cross_entropy(y_hat, y)
def training_step(self, batch, batch_idx, optimizer_idx):
images = batch
# Sample noise (batch_size, latent_dim,1,1)
z = torch.randn(images.size(0), self.hparams.latent_dim,1,1)
# Train generator
if optimizer_idx == 0:
# Generate images (call generator -- see forward -- on latent vector)
self.generated_images = self(z)
# Log sampled images (visualize what the generator comes up with)
sample_images = self.generated_images[:6]
grid = make_grid(sample_images)
self.logger.experiment.add_image('generated_images', grid, 0)
# Ground truth result (ie: all fake)
valid = torch.ones(images.size(0), 1)
# Adversarial loss is binary cross-entropy
generator_loss = self.adversarial_loss(self.discriminator(self(z)), valid)
tqdm_dict = {'gen_loss': generator_loss}
output = {
'loss': generator_loss,
'progress_bar': tqdm_dict,
'log': tqdm_dict
}
return output
# Train discriminator: classify real from generated samples
if optimizer_idx == 1:
# How well can it label as real?
valid = torch.ones(images.size(0), 1)
real_loss = self.adversarial_loss(self.discriminator(images), valid)
# How well can it label as fake?
fake = torch.zeros(images.size(0), 1)
fake_loss = self.adversarial_loss(
self.discriminator(self(z).detach()), fake)
# Discriminator loss is the average of these
discriminator_loss = (real_loss + fake_loss) / 2
tqdm_dict = {'d_loss': discriminator_loss}
output = {
'loss': discriminator_loss,
'progress_bar': tqdm_dict,
'log': tqdm_dict
}
return output
def configure_optimizers(self):
lr = self.hparams.lr
b1 = self.hparams.b1
b2 = self.hparams.b2
# Optimizers
opt_g = torch.optim.Adam(self.generator.parameters(), lr=lr, betas=(b1, b2))
opt_d = torch.optim.Adam(self.discriminator.parameters(), lr=lr, betas=(b1, b2))
# Return optimizers/schedulers (currently no scheduler)
return [opt_g, opt_d], []
def on_epoch_end(self):
# Log sampled images
sample_images = self(self.validation_z)
grid = make_grid(sample_images)
self.logger.experiment.add_image('generated_images', grid, self.current_epoch)
if __name__ == '__main__':
# Global parameter
image_dim = 128
latent_dim = 100
batch_size = 64
# Initialize dataset
tfs = transforms.Compose([
ToPILImage(),
Resize(image_dim),
ToTensor()
])
doppel_dataset = DoppelDataset(face_dir='../data/faces', transform=tfs)
# Initialize data module
doppel_data_module = DoppelDataModule(batch_size=batch_size)
# Build models
generator = DoppelGenerator(latent_dim=latent_dim)
discriminator = DoppelDiscriminator()
# Test generator
x = torch.rand(batch_size, latent_dim, 1, 1)
y = generator(x)
print(f'Generator: x {x.size()} --> y {y.size()}')
# Test discriminator
x = torch.rand(batch_size, 3, 128, 128)
y = discriminator(x)
print(f'Discriminator: x {x.size()} --> y {y.size()}')
# Build GAN
doppelgan = DoppelGAN(batch_size=batch_size, channels=3, width=image_dim, height=image_dim, latent_dim=latent_dim)
# Fit GAN
trainer = pl.Trainer(gpus=0, max_epochs=5, progress_bar_refresh_rate=1)
trainer.fit(model=doppelgan, datamodule=doppel_data_module)
Full traceback:
Traceback (most recent call last):
File "/usr/local/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3437, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-2-28805d67d74b>", line 1, in <module>
runfile('/Users/wouter/Documents/OneDrive/Hardnose/Projects/Coding/0002_DoppelGANger/doppelganger/gan.py', wdir='/Users/wouter/Documents/OneDrive/Hardnose/Projects/Coding/0002_DoppelGANger/doppelganger')
File "/Applications/PyCharm.app/Contents/plugins/python/helpers/pydev/_pydev_bundle/pydev_umd.py", line 197, in runfile
pydev_imports.execfile(filename, global_vars, local_vars) # execute the script
File "/Applications/PyCharm.app/Contents/plugins/python/helpers/pydev/_pydev_imps/_pydev_execfile.py", line 18, in execfile
exec(compile(contents+"\n", file, 'exec'), glob, loc)
File "/Users/wouter/Documents/OneDrive/Hardnose/Projects/Coding/0002_DoppelGANger/doppelganger/gan.py", line 298, in <module>
trainer.fit(model=doppelgan, datamodule=doppel_data_module)
File "/usr/local/lib/python3.9/site-packages/pytorch_lightning/trainer/trainer.py", line 510, in fit
results = self.accelerator_backend.train()
File "/usr/local/lib/python3.9/site-packages/pytorch_lightning/accelerators/accelerator.py", line 57, in train
return self.train_or_test()
File "/usr/local/lib/python3.9/site-packages/pytorch_lightning/accelerators/accelerator.py", line 74, in train_or_test
results = self.trainer.train()
File "/usr/local/lib/python3.9/site-packages/pytorch_lightning/trainer/trainer.py", line 561, in train
self.train_loop.run_training_epoch()
File "/usr/local/lib/python3.9/site-packages/pytorch_lightning/trainer/training_loop.py", line 550, in run_training_epoch
batch_output = self.run_training_batch(batch, batch_idx, dataloader_idx)
File "/usr/local/lib/python3.9/site-packages/pytorch_lightning/trainer/training_loop.py", line 718, in run_training_batch
self.optimizer_step(optimizer, opt_idx, batch_idx, train_step_and_backward_closure)
File "/usr/local/lib/python3.9/site-packages/pytorch_lightning/trainer/training_loop.py", line 485, in optimizer_step
model_ref.optimizer_step(
File "/usr/local/lib/python3.9/site-packages/pytorch_lightning/core/lightning.py", line 1298, in optimizer_step
optimizer.step(closure=optimizer_closure)
File "/usr/local/lib/python3.9/site-packages/pytorch_lightning/core/optimizer.py", line 286, in step
self.__optimizer_step(*args, closure=closure, profiler_name=profiler_name, **kwargs)
File "/usr/local/lib/python3.9/site-packages/pytorch_lightning/core/optimizer.py", line 144, in __optimizer_step
optimizer.step(closure=closure, *args, **kwargs)
File "/usr/local/lib/python3.9/site-packages/torch/autograd/grad_mode.py", line 26, in decorate_context
return func(*args, **kwargs)
File "/usr/local/lib/python3.9/site-packages/torch/optim/adam.py", line 66, in step
loss = closure()
File "/usr/local/lib/python3.9/site-packages/pytorch_lightning/trainer/training_loop.py", line 708, in train_step_and_backward_closure
result = self.training_step_and_backward(
File "/usr/local/lib/python3.9/site-packages/pytorch_lightning/trainer/training_loop.py", line 806, in training_step_and_backward
result = self.training_step(split_batch, batch_idx, opt_idx, hiddens)
File "/usr/local/lib/python3.9/site-packages/pytorch_lightning/trainer/training_loop.py", line 319, in training_step
training_step_output = self.trainer.accelerator_backend.training_step(args)
File "/usr/local/lib/python3.9/site-packages/pytorch_lightning/accelerators/cpu_accelerator.py", line 62, in training_step
return self._step(self.trainer.model.training_step, args)
File "/usr/local/lib/python3.9/site-packages/pytorch_lightning/accelerators/cpu_accelerator.py", line 58, in _step
output = model_step(*args)
File "/Users/wouter/Documents/OneDrive/Hardnose/Projects/Coding/0002_DoppelGANger/doppelganger/gan.py", line 223, in training_step
real_loss = self.adversarial_loss(self.discriminator(images), valid)
File "/usr/local/lib/python3.9/site-packages/torch/nn/modules/module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "/Users/wouter/Documents/OneDrive/Hardnose/Projects/Coding/0002_DoppelGANger/doppelganger/gan.py", line 154, in forward
return self.model(input)
File "/usr/local/lib/python3.9/site-packages/torch/nn/modules/module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "/usr/local/lib/python3.9/site-packages/torch/nn/modules/container.py", line 117, in forward
input = module(input)
File "/usr/local/lib/python3.9/site-packages/torch/nn/modules/module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "/usr/local/lib/python3.9/site-packages/torch/nn/modules/linear.py", line 93, in forward
return F.linear(input, self.weight, self.bias)
File "/usr/local/lib/python3.9/site-packages/torch/nn/functional.py", line 1690, in linear
ret = torch.addmm(bias, input, weight.t())
RuntimeError: mat1 and mat2 shapes cannot be multiplied (7x9 and 25x1)
This multiplication problem comes from the DoppelDiscriminator. There is a linear layer
nn.Linear(25, 1),
that should be
nn.Linear(9, 1),
based on the error message.

ValueError: Graph disconnected in vgg16

Traceback:
model = Model(input_tensor,x,name = 'vgg16_trunk')
File "/usr/local/lib/python3.6/dist-packages/keras/legacy/interfaces.py", line 91, in wrapper
return func(*args, **kwargs)
File "/usr/local/lib/python3.6/dist-packages/keras/engine/network.py", line 93, in __init__
self._init_graph_network(*args, **kwargs)
File "/usr/local/lib/python3.6/dist-packages/keras/engine/network.py", line 231, in _init_graph_network
self.inputs, self.outputs)
File "/usr/local/lib/python3.6/dist-packages/keras/engine/network.py", line 1443, in _map_graph_network
str(layers_with_complete_input))
ValueError: Graph disconnected: cannot obtain value for tensor Tensor("input_2:0", shape=(?, 32, 32, 3), dtype=float32) at layer "input_2". The following previous layers were accessed without issue: []
How to solve this problem in vgg16 ??
def create_model(input_shape):
channel_axis = 1 if K.image_data_format() == "channels_first" else -1
input_tensor = Input(shape=input_shape)
base_model = VGG16(classes=10,input_tensor=None,input_shape=input_shape,include_top=False)
x = base_model.output
x = BatchNormalization(axis=channel_axis, momentum=mom,
epsilon=eps, gamma_initializer=gamma)(x)
x = LeakyReLU(leakiness)(x)
model = Model(input_tensor,x,name = 'vgg16_trunk')
return model
Pass the input_tensor you created here:
input_tensor = Input(shape=input_shape)
where base_model is created:
base_model = VGG16(classes=10,input_tensor=input_tensor,include_top=False)
Please note also, that the tensor will already have the input_shape so it's not necessary to give it as parameter again when creating the base_model.

ValueError: Cannot feed value of shape (50,) for Tensor u'Placeholder_1:0', which has shape '(?, 10)'

I'm a new guy in tensorflow,and I'm trying an experiment with my own .tfrecords files.Now I get something wrong in my code and I don't know what happend.Does anybody tell me how can I solve this problem
from color_1 import read_and_decode, get_batch, get_test_batch
import cv2
import os
import time
import numpy as np
import tensorflow as tf
batch_size=50
n_input=56*56*3
n_classes=10
def weight_variable(shape):
initial = tf.truncated_normal(shape=shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
x = tf.placeholder(tf.float32, [None,56,56,3])
y = tf.placeholder(tf.float32, [None,n_classes])
x_image = tf.reshape(x, [-1, 56, 56, 3])
W_conv1 = weight_variable([5, 5, 3, 32])
b_conv1 = bias_variable([32])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1)+b_conv1)
h_pool1 = max_pool_2x2(h_conv1)
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)
W_fc1 = weight_variable([14*14*64, 1024])
b_fc1 = bias_variable([1024])
h_pool2_flat = tf.reshape(h_pool2, [-1, 14*14*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
keep_prob = tf.placeholder("float")
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])
y_conv = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
cross_entropy = -tf.reduce_sum(y * tf.log(y_conv))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
def run():
image, label = read_and_decode('train.tfrecords')
batch_image, batch_label = get_batch(image, label, batch_size, crop_size=56)
test_image, test_label = read_and_decode('val.tfrecords')
test_images, test_labels = get_test_batch(test_image, test_label, batch_size, crop_size=56)
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
for i in range(20000):
image_batch, label_batch = sess.run([batch_image, batch_label])
if i%100 == 0:
train_accuracy = accuracy.eval(feed_dict={x: image_batch, y:label_batch,keep_prob:1.0})
print("step %d, training accuracy %g" % (i, train_accuracy))
train_step.run(feed_dict={x:image_batch, y:label_batch,keep_prob:0.5})
print("test accuracy %g" % accuracy.eval(feed_dict={
x:test_images, y:test_labels, keep_prob:1.0}))
coord.request_stop()
coord.join(threads)
if __name__=='__main__':
run()
And the problem is just like this:
Traceback (most recent call last):
File "/home/vrview/tensorflow/example/char/tfrecords/LeNet.py", line 130, in <module>
run()
File "/home/vrview/tensorflow/example/char/tfrecords/LeNet.py", line 120, in run
train_accuracy = accuracy.eval(feed_dict={x: image_batch, y:label_batch,keep_prob:1.0})
File "/home/vrview/tensorflow/local/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 581, in eval
return _eval_using_default_session(self, feed_dict, self.graph, session)
File "/home/vrview/tensorflow/local/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 3797, in _eval_using_default_session
return session.run(tensors, feed_dict)
File "/home/vrview/tensorflow/local/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 767, in run
run_metadata_ptr)
File "/home/vrview/tensorflow/local/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 944, in _run
% (np_val.shape, subfeed_t.name, str(subfeed_t.get_shape())))
ValueError: Cannot feed value of shape (50,) for Tensor u'Placeholder_1:0', which has shape '(?, 10)'
train_accuracy = accuracy.eval(feed_dict={x: image_batch, y:label_batch,keep_prob:1.0})#bug is here
I don't know how should I do. Please tell me if you know. Thank you very much!
Looks like label_batch on line 120 is has the value of the labels and not the 1-hot encodings. For example, it probably looks like a 1 dimensional array like this [1,3,4,0,6...] when instead it needs to be a 2 dimensional array of 1-hot encodings like this [ [0,1,0,0,0,0,0,0,0,0] , [0,0,0,1,0,0,0,0,0,0] .... ].
You can use the tf.one_hot function to convert your label_batch into the needed form. To do so change
y = tf.placeholder(tf.float32, [None,n_classes])
to
y = tf.placeholder(tf.float32, [None])
y_one_hot = tf.one_hot( y , 10 )
and you'll have to change references to y to instead use y_one_hot
cross_entropy = -tf.reduce_sum(y_one_hot * tf.log(y_conv))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_one_hot, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
Cheers!

Resources