The following code is giving me an error which I cannot find the answer to. I am trying to apply a python function to each element of a tensor, which transforms the element into a vector of shape 3, so I can calculate a custom evaluation metric. It needs to be a Python function as it is used in other places too.
The error (log below) is Invalid argument: PartialTensorShape: Incompatible ranks during merge: 1 vs. 0, and I assume it has to do with the result of map_fn and its shape. However, it only happens at runtime as if I have any other shape then it throws an error with incompatible shapes when I do model.compile(). Have I misundertood how to use map_fn? Any suggestions?
Thanks in advance!
2021-04-09 12:19:31.357542: W tensorflow/core/framework/op_kernel.cc:1767] OP_REQUIRES failed at list_kernels.h:101 : Invalid argument: PartialTensorShape: Incompatible ranks during merge: 1 vs. 0
Traceback (most recent call last):
File "test.py", line 93, in <module>
validation_data=(val_input, val_output))
File "/home/user/anaconda3/envs/tf_models/lib/python3.6/site-packages/tensorflow/python/keras/engine/training.py", line 108, in _method_wrapper
return method(self, *args, **kwargs)
File "/home/user/anaconda3/envs/tf_models/lib/python3.6/site-packages/tensorflow/python/keras/engine/training.py", line 1098, in fit
tmp_logs = train_function(iterator)
File "/home/user/anaconda3/envs/tf_models/lib/python3.6/site-packages/tensorflow/python/eager/def_function.py", line 780, in __call__
result = self._call(*args, **kwds)
File "/home/user/anaconda3/envs/tf_models/lib/python3.6/site-packages/tensorflow/python/eager/def_function.py", line 840, in _call
return self._stateless_fn(*args, **kwds)
File "/home/user/anaconda3/envs/tf_models/lib/python3.6/site-packages/tensorflow/python/eager/function.py", line 2829, in __call__
return graph_function._filtered_call(args, kwargs) # pylint: disable=protected-access
File "/home/user/anaconda3/envs/tf_models/lib/python3.6/site-packages/tensorflow/python/eager/function.py", line 1848, in _filtered_call
cancellation_manager=cancellation_manager)
File "/home/user/anaconda3/envs/tf_models/lib/python3.6/site-packages/tensorflow/python/eager/function.py", line 1924, in _call_flat
ctx, args, cancellation_manager=cancellation_manager))
File "/home/user/anaconda3/envs/tf_models/lib/python3.6/site-packages/tensorflow/python/eager/function.py", line 550, in call
ctx=ctx)
File "/home/user/anaconda3/envs/tf_models/lib/python3.6/site-packages/tensorflow/python/eager/execute.py", line 60, in quick_execute
inputs, attrs, num_outputs)
tensorflow.python.framework.errors_impl.InvalidArgumentError: 2 root error(s) found.
(0) Invalid argument: PartialTensorShape: Incompatible ranks during merge: 1 vs. 0
[[node map/TensorArrayV2Stack/TensorListStack (defined at test.py:27) ]]
[[map_1/while/LoopCond/_50/_64]]
(1) Invalid argument: PartialTensorShape: Incompatible ranks during merge: 1 vs. 0
[[node map/TensorArrayV2Stack/TensorListStack (defined at test.py:27) ]]
0 successful operations.
0 derived errors ignored. [Op:__inference_train_function_823]
Function call stack:
train_function -> train_function
This is the code to reproduce the issue, using Tensorflow 2.3.1 and Python 3.6.
from typing import List
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Dense, Input, Flatten
INPUT_SHAPE = (2, 10, 10)
class CustomMetric(tf.keras.metrics.Metric):
def __init__(self, name='custom_metric', **kwargs):
super().__init__(name=name, **kwargs)
self.mean_custom_metric = self.add_weight(name='mean_custom_metric', initializer='zeros', dtype=float)
def update_state(self, y_true, y_pred, sample_weight=None):
# y_true is a probability distribution (batch, 2*10*10), so find index of most likely position
y_pred = tf.argmax(y_pred, axis=1)
# y_pred and y_true are both tensors with shape (batch, 1)
print(f"y_pred: {y_pred}")
# apply python func to convert each value to a 3D value (single scalar to vector with 3 scalars)
# according to docs: map_fn(fn, elems).shape = [elems.shape[0]] + fn(elems[0]).shape.
# So: elems.shape[0] == batch | fn(elems[0]).shape == 3,
# error happens when trying to do anything with the result of map_fn below
y_true_positions = tf.map_fn(self.wrapper, y_true, fn_output_signature=tf.float32)
y_pred_positions = tf.map_fn(self.wrapper, y_pred, fn_output_signature=tf.float32)
# y_true_positions, y_pred_positions: tensors with shape (batch, 3)
print(f"y_true_positions: {y_true_positions}")
# do something with y_true_positions and y_pred_positions
y_final = y_true_positions
mean = tf.reduce_sum(y_final)
print('---')
self.mean_custom_metric.assign(mean)
def result(self):
return self.mean_custom_metric
def reset_states(self):
self.mean_custom_metric.assign(0.0)
def wrapper(self, x):
# x: tensor with shape (1,)
print(f"x: {x}")
result = tf.py_function(python_function, [int(x)], tf.float32)
# result is a tensor of shape unknown
print(f"result: {result}")
result.set_shape(tf.TensorShape(3))
# result: tensor with shape (3,)
print(f"result: {result}")
return result
def python_function(index: int) -> List[float]:
# dummy function
return [0, 0, 0]
# dummy model
block_positions = Input(shape=(*INPUT_SHAPE, 1), dtype=tf.float32)
block_positions_layer = Flatten()(block_positions)
target_output_layer = Dense(128, activation='relu')(block_positions_layer)
target_output = Dense(np.prod(INPUT_SHAPE), activation='softmax', name='regions')(target_output_layer)
model = tf.keras.models.Model(
inputs=[block_positions],
outputs=(target_output))
custom_metric = CustomMetric()
model.compile(
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
optimizer=tf.optimizers.Adam(learning_rate=0.001),
metrics=['accuracy', custom_metric])
print(model.summary())
# placeholder data
train_input = np.zeros(shape=(100, *INPUT_SHAPE), dtype=np.float32)
train_output = np.zeros(shape=(100, 1), dtype=np.int32)
val_input = np.zeros(shape=(100, *INPUT_SHAPE), dtype=np.float32)
val_output = np.zeros(shape=(100, 1), dtype=np.int32)
history = model.fit(
train_input, train_output, epochs=10, verbose=1,
validation_data=(val_input, val_output))
I found the solution after a while. The wrapper function was returning a tensor of shape (3,), whereas the map_fn was applied over a tensor of shape (batch, 1). I don't fully understand why, but it seems that map_fn requires a return tensor of shape (batch, 1,) and not fn(elems[0]).shape as the documentation suggests.
Changing the line:
result.set_shape(tf.TensorShape(3))
for
result = tf.reshape(tf.concat(result, 1), (1, 3)) in wrapper
so that the return value is (1, 3) instead of (3) fixed the issue. After map_fn, you end up with a tensor of shape (batch, 1, 3), which I reshaped to be (batch, 3).
Related
I have a PyTorch model composed of a Distilbert and a BiLSTM with the following structure. Its purpose involves performing token classification over a vast amount of categories (num_labels=1182) by attaching the output of the transformer to the input of the BiLSTM.
import torch
import torch.nn as nn
import torch.nn.functional as F
from transformers import AutoModelForTokenClassification
import utilities as utils
from global_constants import MAX_DOC_LENGTH
class CustomTorchModel(nn.Module):
def __init__(self, args_model_name_or_path):
id_to_label, label_to_id = utils.unshelve_label_converters()
label_qty = len(list(label_to_id))
self.distilbert_layer = AutoModelForTokenClassification.from_pretrained(
args_model_name_or_path,
id2label=id_to_label,
label2id=label_to_id,
num_labels=label_qty
)
self.bilstm_layer = nn.LSTM(input_size=MAX_DOC_LENGTH,
hidden_size=self.distilbert_layer.config.dim,
num_layers=1,
batch_first=True,
bidirectional=True)
def forward(self, inputs):
print("input_ids size: " + str(inputs[0].size()))
print("attention_mask size: " + str(inputs[1].size()))
distilbert_output = self.distilbert_layer(input_ids=inputs[0], attention_mask=inputs[1])
print("distilbert_output.last_hidden_state size: " + str(distilbert_output.last_hidden_state.size()))
bilstm_output, (last_hidden, last_cell) = self.bilstm_layer(distilbert_output.last_hidden_state)
print("BiLSTM output size: " + str(bilstm_output.size()))
output = self.classification_layer(bilstm_output)
print("output size: " + str(output.size()))
return F.softmax(output)
Output showing the shapes after each layer. Notes: 256 is the value of MAX_DOC_LENGTH, 768 is self.distilbert_layer.config.dim and 1182 is num_labels.
input_ids size: torch.Size([8, 256])
attention_mask size: torch.Size([8, 256])
distilbert_output.last_hidden_state size: torch.Size([8, 256, 768])
BiLSTM output size: torch.Size([8, 256, 1536])
output size: torch.Size([8, 256, 1182])
This custom model is used in a pretty standard Ignite script which leverages to train the model. Since there are multiple categories and this is not binary classification, the loss function should be nn.CrossEntropyLoss:
criterion = nn.CrossEntropyLoss(reduction='mean')
optimizer = AdamW(model.parameters(), lr=1e-5)
lr_scheduler = ExponentialLR(optimizer, gamma=0.90)
trainer = create_supervised_trainer1(model.to(device), optimizer, criterion, device=device)
This is the definition of the methods used above:
def _prepare_batch(batch, device=None, non_blocking=False):
x = [batch["input_ids"], batch["attention_mask"]] # list
y = batch["labels"]
return (convert_tensor(x, device=device, non_blocking=non_blocking),
convert_tensor(y, device=device, non_blocking=non_blocking))
def create_supervised_trainer1(model, optimizer, loss_fn, metrics={}, device=None):
def _update(engine, batch):
model.train()
optimizer.zero_grad()
x, y = _prepare_batch(batch, device=device)
y_pred = model(x)
transposed_y_pred = torch.transpose(y_pred, 1, 2)
loss = loss_fn(transposed_y_pred, y.long())
loss.backward()
optimizer.step()
return loss.item(), transposed_y_pred, y.long()
def _metrics_transform(output):
return output[1], output[2]
engine = Engine(_update)
for name, metric in metrics.items():
metric._output_transform = _metrics_transform
metric.attach(engine, name)
return engine
I know I am missing something, however I'm not being able to figure out what. The execution produces an error related to the shapes (the "y" of the DataLoaders has [8, 256] and the network produces [8, 1182]. This happens even though I rearranged the tensors in the order required by CrossEntropyLoss:
Current run is terminating due to exception: Expected target size [8, 1182], got [8, 256]
Engine run is terminating due to exception: Expected target size [8, 1182], got [8, 256]
Traceback (most recent call last):
File "/home/users/user/august/src/main/ignite_script.py", line 456, in run
trainer.run(train_dataloader, max_epochs=epochs)
File "/home/users/user/.local/lib/python3.9/site-packages/ignite/engine/engine.py", line 892, in run
return self._internal_run()
File "/home/users/user/.local/lib/python3.9/site-packages/ignite/engine/engine.py", line 935, in _internal_run
return next(self._internal_run_generator)
File "/home/users/user/.local/lib/python3.9/site-packages/ignite/engine/engine.py", line 993, in _internal_run_as_gen
self._handle_exception(e)
File "/home/users/user/.local/lib/python3.9/site-packages/ignite/engine/engine.py", line 638, in _handle_exception
raise e
File "/home/users/user/.local/lib/python3.9/site-packages/ignite/engine/engine.py", line 959, in _internal_run_as_gen
epoch_time_taken += yield from self._run_once_on_dataset_as_gen()
File "/home/users/user/.local/lib/python3.9/site-packages/ignite/engine/engine.py", line 1087, in _run_once_on_dataset_as_gen
self._handle_exception(e)
File "/home/users/user/.local/lib/python3.9/site-packages/ignite/engine/engine.py", line 638, in _handle_exception
raise e
File "/home/users/user/.local/lib/python3.9/site-packages/ignite/engine/engine.py", line 1068, in _run_once_on_dataset_as_gen
self.state.output = self._process_function(self, self.state.batch)
File "/home/users/user/august/src/main/ignite_script.py", line 321, in _update
loss = loss_fn(y_pred, y.float())
File "/home/users/user/.local/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1110, in _call_impl
return forward_call(*input, **kwargs)
File "/home/users/user/.local/lib/python3.9/site-packages/torch/nn/modules/loss.py", line 1163, in forward
return F.cross_entropy(input, target, weight=self.weight,
File "/home/users/user/.local/lib/python3.9/site-packages/torch/nn/functional.py", line 2996, in cross_entropy
return torch._C._nn.cross_entropy_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index, label_smoothing)
RuntimeError: Expected target size [8, 1182], got [8, 256]
According to the documentation of
nn.CrossEntropyLoss, you can specify the targets in two ways:
class indices for each sample
probabilities of each class for each sample
but you should do any of above mentioned ways by a specific shaped target:
given:
> criterion = nn.CrossEntropyLoss()
> loss = criterion(input, target)
Input:
Shape: (C)(C), (N, C)(N,C) or (N, C, d_1, d_2, ..., d_K)(N,C,d1,d2,...,dK) with K≥1 in the case of K-dimensional loss.
Target:
If containing class indices, shape:
()(), (N)(N) or (N, d_1, d_2, ..., d_K)(N,d 1,d2,...,dK) with K≥1 in the case of K-dimensional loss where each value should be between [0, C)[0,C).
If containing class probabilities,
same shape as the input and each value should be between [0, 1][0,1].
Output:
If reduction is ‘none’, shape ()(), (N)(N) or (N, d_1, d_2, ..., d_K)(N,d1,d2,...,dK) with K≥1 in the case of K-dimensional loss, depending on the shape of the input. Otherwise, scalar.
While using PyTorch version 1.9.0, I'm getting the error saying that my tensors are at two different locations. Also, the error trace leads me to the LayerNorm function which has been assigned to the variable h. But when I check -
print(h.is_cuda),
it returns true. Therefore, I'm confused regarding what is causing this error and how to solve it.
File "C:/Users/user/AppData/Roaming/JetBrains/PyCharmCE2020.2/scratches/abc.py", line 206, in forward
h = nn.LayerNorm(h.shape[1])(h)
File "C:\Users\user\anaconda3\envs\paper_2\lib\site-packages\torch\nn\modules\module.py", line 1051, in _call_impl
return forward_call(*input, **kwargs)
File "C:\Users\user\anaconda3\envs\paper_2\lib\site-packages\torch\nn\modules\normalization.py", line 174, in forward
input, self.normalized_shape, self.weight, self.bias, self.eps)
File "C:\Users\user\anaconda3\envs\paper_2\lib\site-packages\torch\nn\functional.py", line 2346, in layer_norm
return torch.layer_norm(input, normalized_shape, weight, bias, eps, torch.backends.cudnn.enabled)
RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu! (when checking arugment for argument weight in method wrapper_native_layer_norm)
Update #1:
After following the stack trace, I reached the forward function in normalization.py and checked the variables present over there -
def forward(self, input: Tensor) -> Tensor:
print("Foo")
print("Check if weight is CUDA", self.weight.is_cuda)
print("Check if bias is CUDA", self.bias.is_cuda)
print("Check if input is CUDA", input.is_cuda)
#print("Check if normalized shape is CUDA", self.normalized_shape.is_cuda)
return F.layer_norm(
input, self.normalized_shape, self.weight, self.bias, self.eps)
Check if weight is CUDA False
Check if bias is CUDA False
Check if input is CUDA True
Therefore, it is the weight and the biases within the layernorm function that is causing this issue. A quick hack done by me to get the function running was as follows. However, I am not sure whether this technique is appropriate -
h = h.to(device='cpu')
h = nn.LayerNorm(h.shape[1])(h)
h = h.to(device='cuda')
I have added a minimally reproducible example below to better explain my issue. Please note the variables given in the question above and in this example will be different -
import math, random
from sklearn.datasets import load_sample_images
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.autograd as autograd
import torch.nn.functional as F
###Obtaining a random image and preprocessing it!##
dataset = load_sample_images()
first_img_data = dataset.images[0]
first_img_data = first_img_data.reshape(-1, 427, 640)
first_img_data = first_img_data[1, :, :]
first_img_data = first_img_data[0:84, 0:84].reshape(-1, 84,84)
first_img_data = torch.tensor(first_img_data)
#################################################################################################################################
USE_CUDA = torch.cuda.is_available()
Variable = lambda *args, **kwargs: autograd.Variable(*args, **kwargs).cuda() if USE_CUDA else autograd.Variable(*args, **kwargs)
class Cnn(nn.Module):
def __init__(self, input_shape):
super(Cnn, self).__init__()
self.features = nn.Sequential(
nn.Conv2d(input_shape[0], 32, kernel_size=8, stride=4),
nn.ReLU(),
nn.Conv2d(32, 64, kernel_size=4, stride=2),
nn.ReLU(),
nn.Conv2d(64, 64, kernel_size=3, stride=1),
nn.ReLU()
)
def forward(self, x):
x = self.features(x)
x = x.view(x.size(0), -1)
# If you uncomment the line below, it'll throw an error!
#x = nn.LayerNorm(x.shape[1])(x)
return x
state = first_img_data
Shape = (1,84, 84)
current_model = Cnn(Shape)
current_model.to('cuda')
state = Variable(torch.FloatTensor(np.float32(state)).unsqueeze(0), volatile=True)
q_value = current_model.forward(state)
P.S There is a similar question over here(pytorch running: RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu), but I couldn't obtain an answer by following the steps given.
I'm trying to federate a keras model which has multiple outputs. There are two separate dense layers that perform a binary classification and a multi-class classification. I am getting the following ValueError when I try to build my federated averaging process tff.learning.build_federated_averaging_process from model_fn(). Following are the code snippets and error information. I am unable to understand what is going wrong and how to resolve it.
ValueError: in user code:
/home/usr/Envs/tf-fed/lib/python3.7/site-packages/tensorflow_federated/python/learning/framework/optimizer_utils.py:387 _compute_local_training_and_client_delta *
client_output = client_delta_fn(dataset, initial_model_weights)
/home/usr/Envs/tf-fed/lib/python3.7/site-packages/tensorflow_federated/python/learning/federated_averaging.py:92 reduce_fn *
output = model.forward_pass(batch, training=True)
/home/usr/Envs/tf-fed/lib/python3.7/site-packages/tensorflow_federated/python/learning/framework/dataset_reduce.py:28 _dataset_reduce_fn *
return dataset.reduce(initial_state=initial_state_fn(), reduce_func=reduce_fn)
/home/usr/Envs/tf-fed/lib/python3.7/site-packages/tensorflow_federated/python/learning/keras_utils.py:365 forward_pass *
return self._forward_pass(batch_input, training=training)
/home/usr/Envs/tf-fed/lib/python3.7/site-packages/tensorflow_federated/python/learning/keras_utils.py:357 _forward_pass *
metric.update_state(y_true=y_true, y_pred=predictions)
/home/usr/Envs/tf-fed/lib/python3.7/site-packages/tensorflow/python/keras/utils/metrics_utils.py:90 decorated **
update_op = update_state_fn(*args, **kwargs)
/home/usr/Envs/tf-fed/lib/python3.7/site-packages/tensorflow/python/keras/metrics.py:176 update_state_fn
return ag_update_state(*args, **kwargs)
/home/usr/Envs/tf-fed/lib/python3.7/site-packages/tensorflow/python/keras/metrics.py:604 update_state **
y_pred = math_ops.cast(y_pred, self._dtype)
/home/usr/Envs/tf-fed/lib/python3.7/site-packages/tensorflow/python/util/dispatch.py:201 wrapper
return target(*args, **kwargs)
/home/usr/Envs/tf-fed/lib/python3.7/site-packages/tensorflow/python/ops/math_ops.py:920 cast
x = ops.convert_to_tensor(x, name="x")
/home/usr/Envs/tf-fed/lib/python3.7/site-packages/tensorflow/python/framework/ops.py:1499 convert_to_tensor
ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
/home/usr/Envs/tf-fed/lib/python3.7/site-packages/tensorflow/python/ops/array_ops.py:1502 _autopacking_conversion_function
return _autopacking_helper(v, dtype, name or "packed")
/home/usr/Envs/tf-fed/lib/python3.7/site-packages/tensorflow/python/ops/array_ops.py:1438 _autopacking_helper
return gen_array_ops.pack(elems_as_tensors, name=scope)
/home/usr/Envs/tf-fed/lib/python3.7/site-packages/tensorflow/python/ops/gen_array_ops.py:6477 pack
"Pack", values=values, axis=axis, name=name)
/home/usr/Envs/tf-fed/lib/python3.7/site-packages/tensorflow/python/framework/op_def_library.py:744 _apply_op_helper
attrs=attr_protos, op_def=op_def)
/home/usr/Envs/tf-fed/lib/python3.7/site-packages/tensorflow/python/framework/func_graph.py:593 _create_op_internal
compute_device)
/home/usr/Envs/tf-fed/lib/python3.7/site-packages/tensorflow/python/framework/ops.py:3485 _create_op_internal
op_def=op_def)
/home/usr/Envs/tf-fed/lib/python3.7/site-packages/tensorflow/python/framework/ops.py:1975 __init__
control_input_ops, op_def)
/home/usr/Envs/tf-fed/lib/python3.7/site-packages/tensorflow/python/framework/ops.py:1815 _create_c_op
raise ValueError(str(e))
ValueError: Dimension 1 in both shapes must be equal, but are 1 and 3. Shapes are [?,1] and [?,3].
From merging shape 0 with other shapes. for '{{node Cast_1/x}} = Pack[N=2, T=DT_FLOAT, axis=0](functional_1/eye_output/Sigmoid, functional_1/mouth_output/Softmax)' with input shapes: [?,1], [?,3].
My model_fn() looks like this:
def model_fn():
losses = [tf.keras.losses.BinaryCrossentropy(), tf.keras.losses.SparseCategoricalCrossentropy()]
metrics = [tf.keras.metrics.BinaryAccuracy(),tf.keras.metrics.SparseCategoricalAccuracy()]
keras_model = build_model()
return tff.learning.from_keras_model(
keras_model,
input_spec=spec,
loss=losses,
metrics=metrics)
where build_model() creates the keras model:
build_model():
...
out1 = Dense(1, activation='sigmoid')(fc1)
out2 = Dense(3, activation='softmax')(fc2)
model = Model(inputs=inputs, outputs=[out1, out2])
return model
And input_specification that looks like this
OrderedDict([('x',
TensorSpec(shape=(None, 240, 320), dtype=tf.float32, name=None)),
('y',
(TensorSpec(shape=(None, 1), dtype=tf.int64, name=None),
TensorSpec(shape=(None, 1), dtype=tf.int64, name=None)))])
How can I build my TFF fedAvg process using such a model?
This seems like it might be from the metrics arguments, based on reading this line in the stack trace:
tensorflow_federated/python/learning/keras_utils.py:357 _forward_pass *
metric.update_state(y_true=y_true, y_pred=predictions)
I suspect BinaryAccuracy and SparseCategoricalAccuracy are being applied to both outputs, but the metrics only operate on specific shaped tensors (code here). In particular it appears to be trying to pass both outputs to the metric at once.
This leads me to believe that TFF does not support different metrics on different outputs for multi-output models defined using Keras. This could be a good candidate for a PR or Issue at https://github.com/tensorflow/federated/issues.
when i using cross-entropy loss as a loss function, i get this Dimension out of range error
Traceback (most recent call last):
File "e:\testcode\cnn.py", line 122, in <module>
loss = loss_func(output, b_y) # cross entropy loss
File "D:\Anaconda3\envs\pytorch\lib\site-packages\torch\nn\modules\module.py", line 532, in __call__
result = self.forward(*input, **kwargs)
File "D:\Anaconda3\envs\pytorch\lib\site-packages\torch\nn\modules\loss.py", line 916, in forward
ignore_index=self.ignore_index, reduction=self.reduction)
File "D:\Anaconda3\envs\pytorch\lib\site-packages\torch\nn\functional.py", line 2021, in cross_entropy
return nll_loss(log_softmax(input, 1), target, weight, None, ignore_index, None, reduction)
File "D:\Anaconda3\envs\pytorch\lib\site-packages\torch\nn\functional.py", line 1317, in log_softmax
ret = input.log_softmax(dim)
IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)
and the loss function is
loss = loss_func(output, b_y)
the value of output is
tensor([-0.3507, 0.2214, 0.3781, 0.3057], grad_fn=<SelectBackward>)
the value of b_y is
tensor([3])
The first argument passed to the CrossEntropyLoss has to be a 2d tensor with the shape of [batch size x number of classes]. If you are only calculating the loss for a single batch, unsqueeze the logits before passing them to the loss function.
logits = torch.tensor([-0.3507, 0.2214, 0.3781, 0.3057]).unsqueeze(0)
targets = torch.tensor([3])
loss_func = torch.nn.CrossEntropyLoss()
loss_func(logits, targets)
Traceback:
model = Model(input_tensor,x,name = 'vgg16_trunk')
File "/usr/local/lib/python3.6/dist-packages/keras/legacy/interfaces.py", line 91, in wrapper
return func(*args, **kwargs)
File "/usr/local/lib/python3.6/dist-packages/keras/engine/network.py", line 93, in __init__
self._init_graph_network(*args, **kwargs)
File "/usr/local/lib/python3.6/dist-packages/keras/engine/network.py", line 231, in _init_graph_network
self.inputs, self.outputs)
File "/usr/local/lib/python3.6/dist-packages/keras/engine/network.py", line 1443, in _map_graph_network
str(layers_with_complete_input))
ValueError: Graph disconnected: cannot obtain value for tensor Tensor("input_2:0", shape=(?, 32, 32, 3), dtype=float32) at layer "input_2". The following previous layers were accessed without issue: []
How to solve this problem in vgg16 ??
def create_model(input_shape):
channel_axis = 1 if K.image_data_format() == "channels_first" else -1
input_tensor = Input(shape=input_shape)
base_model = VGG16(classes=10,input_tensor=None,input_shape=input_shape,include_top=False)
x = base_model.output
x = BatchNormalization(axis=channel_axis, momentum=mom,
epsilon=eps, gamma_initializer=gamma)(x)
x = LeakyReLU(leakiness)(x)
model = Model(input_tensor,x,name = 'vgg16_trunk')
return model
Pass the input_tensor you created here:
input_tensor = Input(shape=input_shape)
where base_model is created:
base_model = VGG16(classes=10,input_tensor=input_tensor,include_top=False)
Please note also, that the tensor will already have the input_shape so it's not necessary to give it as parameter again when creating the base_model.