How to move feature maps across images using slicing? - pytorch

I am trying to implement the online algorithm of this paper, which is on video classification. This work moves 1/8 of channel feature maps from each image, into the next image, after each convolution operation. The image of the operation has been attached here -
While trying to implement the same, I have succeeded in extracting out the first 1/8 channel feature maps, but I don't know how to add them to the succeeding image. My code has been attached below -
import cv2
import gym
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
import torch.autograd as autograd
import torch.nn.functional as F
N = 1 # Batch Size
T = 5 # Time Steps. This means that there are 5 frames in the video
C = 3 # RGB Channels
H = 144 # Height
W = 144 # Width
foo = torch.randn(N*T, C, H, W)
print("Shape of foo = ", foo.shape)
#torch.Size([5, 3, 144, 144])
class Net(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(3, 8, 5)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16 * 5 * 5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
def forward(self, x):
x = F.relu(self.conv1(x))
print("Shape of x = ", x.shape)
# torch.Size([5, 8, 140, 140])
shape_extract = x[:, :1,:,:]
print("Shape of extract = ", shape_extract.shape)
# torch.Size([5, 1, 140, 140])
# 1/8 of the channels have been extracted out from above. But how do I transfer these channel features to the next image?
return x
net = Net()
output = net(foo)

Since your whole sequence is inside the batch, you can shift the layers using torch.roll the elements on the first axis.
>>> rolled = x.roll(shifts=1, dims=1)
Going from this layer layout on axis=1:
[x_0, x_1, x_2, x_3, ..., x_7]
to this one:
[x_7, x_0, x_1, x_2, ..., x_6]
Then replacing the first element by x_0:
>>> rolled[:, 0] = x[:, 0]
Resulting in this layout:
[x_0, x_0, x_1, x_2, ..., x_6]
Then you can input tensor rolled into the next layer.
You can implement a custom layer to wrap this logic:
class ShiftLayer(nn.Module):
def forward(self, x):
out = x.roll(1, 1)
out[:, 0] = x[:, 0]
return out
Then use it inside your model:
class Net(nn.Module):
def __init__(self):
super().__init__()
...
self.shift = ShiftLayer()
def forward(self, x):
x = F.relu(self.conv1(x))
x = self.shift(x)
x = F.relu(self.conv2(x))
return x

Related

ValueError: Output tensors of a Functional model must be the output of a TensorFlow `Layer` when using custom callback to plot conv layer feature maps

I'm trying to implement a custom callback to get the feature maps of each Conv2D layer in the network plotted in TensorBoard.
When I run the code in Example 1 I get the following error:
<ipython-input-44-b691dabedd05> in on_epoch_end(self, epoch, logs)
28
29 # 3) Build partial model
---> 30 partial_model = keras.Model(
31 inputs=self.model.model.input,
32 outputs=output_layers
ValueError: Output tensors of a Functional model must be the output of a TensorFlow `Layer` (thus holding past layer metadata). Found: <keras.engine.base_layer.Layer object at 0x000002773C631CA0>
which seams as if it can't build the partial network, which is strange, because it succeeds when running is separately from the main thread.
Here is an example that illustrates the issue:
Example 1
import os
import io
import datetime as dt
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.datasets import cifar10
import matplotlib.pyplot as plt
'''
You can adjust the verbosity of the logs which are being printed by TensorFlow
by changing the value of TF_CPP_MIN_LOG_LEVEL:
0 = all messages are logged (default behavior)
1 = INFO messages are not printed
2 = INFO and WARNING messages are not printed
3 = INFO, WARNING, and ERROR messages are not printed
'''
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
DEBUG = False
class ConvModel(keras.Model):
def __init__(self, input_shape):
super().__init__()
self.input_image_shape = input_shape
self.model = keras.Sequential([
layers.Input(shape=input_shape),
layers.Conv2D(32, 3),
layers.BatchNormalization(),
layers.ReLU(),
layers.MaxPool2D(),
layers.Conv2D(64, 5),
layers.BatchNormalization(),
layers.ReLU(),
layers.MaxPool2D(),
layers.Conv2D(128, 3, kernel_regularizer=keras.regularizers.l2(0.01)),
layers.BatchNormalization(),
layers.ReLU(),
layers.Flatten(),
layers.Dense(64, activation='relu', kernel_regularizer=keras.regularizers.l2(0.01)),
layers.Dropout(0.5),
layers.Dense(10)
])
def call(self, inputs):
return self.model(inputs)
def find_sub_string(string: str, sub_string: str):
return True if string.find(sub_string) > -1 else False
def get_file_type(file_name: str):
file_type = None
if isinstance(file_name, str):
dot_idx = file_name.find('.')
if dot_idx > -1:
file_type = file_name[dot_idx + 1:]
return file_type
def get_image_from_figure(figure):
buffer = io.BytesIO()
plt.savefig(buffer, format='png')
plt.close(figure)
buffer.seek(0)
image = tf.image.decode_png(buffer.getvalue(), channels=4)
image = tf.expand_dims(image, 0)
return image
class ConvLayerVis(keras.callbacks.Callback):
def __init__(self, X, figure_configs: dict, log_dir: str, log_interval: int):
super().__init__()
self.X_test = X
n_dims = len(self.X_test.shape)
assert 2 < n_dims < 5, f'The shape of the test image should be less than 5 and grater than 2, but current shape is {self.X_test.shape}'
# In case the image is not represented as a tensor - add a dimension to the left for the batch
if len(self.X_test.shape) < 4:
self.X_test = np.reshape(self.X_test, (1,) + self.X_test.shape)
self.file_writer = tf.summary.create_file_writer(log_dir)
self.figure_configs = figure_configs
self.log_interval = log_interval
def on_training_begin(self, logs=None):
pass
def on_epoch_end(self, epoch, logs=None):
# 1) Get the layers
if epoch % self.log_interval == 0:
# 1) Get the layers
output_layer_tuples = [(idx, layer) for idx, layer in enumerate(self.model.model.layers) if find_sub_string(layer.name, 'conv2d') or find_sub_string(layer.name, 'max_pooling2d')]
output_layers = [layer_tuple[1] for layer_tuple in output_layer_tuples]
# 2) Get the layer names
conv_layer_name_tuples = [(layer_tuple[0], f'Layer #{layer_tuple[0]} - Conv 2D ') for layer_tuple in output_layer_tuples if find_sub_string(layer_tuple[1].name, 'conv2d')]
max_pool_layer_name_tuples = [(layer_tuple[0], f'Layer #{layer_tuple[0]} - Max Pooling 2D') for layer_tuple in output_layer_tuples if find_sub_string(layer_tuple[1].name, 'max_pooling2d')]
layer_name_tuples = (conv_layer_name_tuples + max_pool_layer_name_tuples)
layer_name_tuples.sort(key=lambda x: x[0])
layer_names = [layer_name_tuple[1] for layer_name_tuple in layer_name_tuples]
# 3) Build partial model
partial_model = keras.Model(
inputs=model.model.input,
outputs=output_layers
)
# 4) Get the feature maps
feature_maps = partial_model.predict(self.X_test)
# 5) Plot
rows, cols = self.figure_configs.get('rows'), self.figure_configs.get('cols')
for feature_map, layer_name in zip(feature_maps, layer_names):
fig, ax = plt.subplots(rows, cols, figsize=self.figure_configs.get('figsize'))
for row in range(rows):
for col in range(cols):
ax[row][col].imshow(feature_map[0, :, :, row+col], cmap=self.figure_configs.get('cmap'))
fig.suptitle(f'{layer_name}')
with self.file_writer.as_default():
tf.summary.image(f'{layer_name} Feature Maps', get_image_from_figure(figure=fig), step=epoch)
if __name__ == '__main__':
print(tf.config.list_physical_devices('GPU'))
# Load the data
(X, y), (X_test, y_test) = cifar10.load_data()
X, X_test = X.astype(np.float32) / 255.0, X_test.astype(np.float32) / 255.0
n, w, h, c = X.shape[0], X.shape[1], X.shape[2], X.shape[3]
n_test, w_test, h_test, c_test = X_test.shape[0], X_test.shape[1], X_test.shape[2], X_test.shape[3]
print(f'''
Dataset Stats:
Number of train images: {n}
Dimensions:
> Train:
width = {w}, height = {h}, channels = {c}
> Test:
width = {w_test}, height = {h_test}, channels = {c_test}
''')
# Model with keras.Sequential
model = ConvModel(input_shape=(w, h, c))
model.compile(loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), optimizer=keras.optimizers.Adam(learning_rate=3e-4), metrics=['accuracy'])
log_dir = f'./logs/{dt.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")}'
callbacks = [
keras.callbacks.TensorBoard(
log_dir=log_dir,
write_images=True
),
ConvLayerVis(
X=X[0],
figure_configs=dict(rows=5, cols=5, figsize=(35, 35), cmap='gray'),
log_dir=f'{log_dir}/train',
log_interval=3
)
]
model.fit(
X,
y,
batch_size=64,
epochs=15,
callbacks=callbacks
)
Thanks in advance for any help regarding this issue.
Just figured out the problem:
output_layers = [layer_tuple[1].output for layer_tuple in output_layer_tuples]
Should have recovered the output attribute of each layer.

Pytorch Transformer won't train due to tensor sizes

I tried following this tutorial for transformers:
https://www.youtube.com/watch?v=U0s0f995w14
However, when I try to train the code with my own vectors, I get the following error message:
Traceback (most recent call last):
File >"C:\Users\rreichel\Desktop\Smaragd_local\Programming\Scripts\Transformer_se>lfbuilt.py", line 279, in
loss = loss_func(outputs, target)
File "C:\Users\rreichel\Anaconda3\lib\site->packages\torch\nn\modules\module.py", line 889, in _call_impl
result = self.forward(*input, **kwargs)
File "C:\Users\rreichel\Anaconda3\lib\site->packages\torch\nn\modules\loss.py", line 1047, in forward
return F.cross_entropy(input, target, weight=self.weight,
File "C:\Users\rreichel\Anaconda3\lib\site->packages\torch\nn\functional.py", line 2693, in cross_entropy
return nll_loss(log_softmax(input, 1), target, weight, None, >ignore_index, None, reduction)
File "C:\Users\rreichel\Anaconda3\lib\site->packages\torch\nn\functional.py", line 2397, in nll_loss
raise ValueError("Expected target size {}, got {}".format(out_size, >target.size()))
ValueError: Expected target size (3, 199), got torch.Size([3, 119])
when calculating the loss during training.
The code:
# -*- coding: utf-8 -*-
"""
Created on Tue Apr 6 08:13:38 2021
#author: rreichel
"""
import torch
import torch.nn as nn
import pickle
import glob
import os
from SelfbuiltDataset_test import myDataset
import torch.optim as optim
class SelfAttention(nn.Module):
def __init__(self, embed_size, heads):
super(SelfAttention, self).__init__()
self.embed_size = embed_size
self.heads = heads
self.head_dim = embed_size // heads
assert(self.head_dim * heads == embed_size
), "Embedding size needs to be divisible by heads"
self.values = nn.Linear(self.head_dim, self.head_dim, bias=False)
self.keys = nn.Linear(self.head_dim, self.head_dim, bias=False)
self.queries = nn.Linear(self.head_dim, self.head_dim, bias=False)
self.fc_out = nn.Linear(heads * self.head_dim, embed_size)
def forward(self, values, keys, query, mask):
#Get number of training examples
N = query.shape[0]
value_len, key_len, query_len = values.shape[1], keys.shape[1], \
query.shape[1]
#Split the embedding into self.heads different pieces
values = values.reshape(N, value_len, self.heads, self.head_dim)
keys = keys.reshape(N, key_len, self.heads, self.head_dim)
query = query.reshape(N, query_len, self.heads, self.head_dim)
#(N, value_len, heads, head_dim)
values = self.values(values)
#(N, key_len, heads, head_dim)
keys = self.keys(keys)
#(N, query_len, heads, heads_dim)
queries = self.queries(query)
energy = torch.einsum("nqhd, nkhd -> nhqk", [queries, keys])
#queries shape: (N, query_len, heads, heads_dim),
#keys shape: (N, key_len, heads, heads_dim)
#energy: (N, heads, query_len, key_len)
#Mask padded indices so their weights become 0
if mask is not None:
energy = energy.masked_fill(mask == 0, float("-1e20"))
#Normalize energy values
attention = torch.softmax(energy / (self.embed_size ** (1 / 2)), dim=3)
#attention shape: (N, heads, query_len, key_len)
out = torch.einsum("nhql, nlhd -> nqhd", [attention, values]).reshape(
N, query_len, self.heads * self.head_dim)
#attention shape: (N, heads, query_len, key_len)
#values shape: (N, value_len, heads, heads_dim)
#out after matrix multiply: (N, query_len, heads, head_dim), then
#we reshape and flatten the last two dimensions.
out = self.fc_out(out)
return out
class TransformerBlock(nn.Module):
def __init__(self, embed_size, heads, dropout, forward_expansion):
super(TransformerBlock, self).__init__()
self.attention = SelfAttention(embed_size, heads)
self.norm1 = nn.LayerNorm(embed_size)
self.norm2 = nn.LayerNorm(embed_size)
self.feed_forward = nn.Sequential(
nn.Linear(embed_size, forward_expansion * embed_size),
nn.ReLU(),
nn.Linear(forward_expansion * embed_size, embed_size))
self.dropout = nn.Dropout(dropout)
def forward(self, value, key, query, mask):
attention = self.attention(value, key, query, mask)
# Add skip connection, run through normalization and finally dropout
x = self.dropout(self.norm1(attention + query))
forward = self.feed_forward(x)
out = self.dropout(self.norm2(forward + x))
return out
class Encoder(nn.Module):
def __init__(self, src_vocab_size, embed_size, num_layers, heads, device,
forward_expansion, dropout, max_length):
super(Encoder, self).__init__()
self.embed_size = embed_size
self.device = device
self.word_embedding = nn.Embedding(src_vocab_size, embed_size)
self.position_embedding = nn.Embedding(max_length, embed_size)
self.layers = nn.ModuleList([TransformerBlock(embed_size, heads,
dropout=dropout, forward_expansion=forward_expansion)
for _ in range(num_layers)])
self.dropout = nn.Dropout(dropout)
def forward(self, x, mask):
N, seq_length = x.shape
positions = torch.arange(0, seq_length).expand(N, seq_length).to(self.device)
out = self.dropout(
(self.word_embedding(x) +
self.position_embedding(positions))
)
#In the Encoder the query, key, value are all the same, it's in the
#decoder this will change. This might look a bit odd in this case.
for layer in self.layers:
out = layer(out, out, out, mask)
return out
class DecoderBlock(nn.Module):
def __init__(self, embed_size, heads, forward_expansion, dropout, device):
super(DecoderBlock, self).__init__()
self.norm = nn.LayerNorm(embed_size)
self.attention = SelfAttention(embed_size, heads=heads)
self.transformer_block = TransformerBlock(embed_size, heads, dropout,
forward_expansion)
self.dropout = nn.Dropout(dropout)
def forward(self, x, value, key, src_mask, trg_mask):
attention = self.attention(x, x, x, trg_mask)
query = self.dropout(self.norm(attention + x))
out = self.transformer_block(value, key, query, src_mask)
return out
class Decoder(nn.Module):
def __init__(self, trg_vocab_size, embed_size, num_layers, heads,
forward_expansion, dropout, device, max_length):
super(Decoder, self).__init__()
self.device = device
self.word_embedding = nn.Embedding(trg_vocab_size, embed_size)
self.position_embedding = nn.Embedding(max_length, embed_size)
self.layers = nn.ModuleList([DecoderBlock(embed_size, heads,
forward_expansion, dropout,
device)
for _ in range(num_layers)])
self.fc_out = nn.Linear(embed_size, trg_vocab_size)
self.dropout = nn.Dropout(dropout)
def forward(self, x, enc_out, src_mask, trg_mask):
N, seq_length = x.shape
positions = torch.arange(0, seq_length).expand(N,seq_length).to(self.device)
x = self.dropout((self.word_embedding(x) +
self.position_embedding(positions)))
for layer in self.layers:
x = layer(x, enc_out, enc_out, src_mask, trg_mask)
out = self.fc_out(x)
return out
class Transformer(nn.Module):
def __init__(self, src_vocab_size, trg_vocab_size, src_pad_idx,
trg_pad_idx, embed_size=512, num_layers=6,
forward_expansion=4, heads=8, dropout=0, device="cpu",
max_length=100):
super(Transformer, self).__init__()
self.encoder = Encoder(src_vocab_size, embed_size, num_layers, heads,
device, forward_expansion, dropout, max_length)
self.decoder = Decoder(trg_vocab_size, embed_size, num_layers, heads,
forward_expansion, dropout, device, max_length)
self.src_pad_idx = src_pad_idx
self.trg_pad_idx = trg_pad_idx
self.device = device
def make_src_mask(self, src):
#(N, 1, 1, src_len)
src_mask = (src != self.src_pad_idx).unsqueeze(1).unsqueeze(2)
return src_mask.to(self.device)
def make_trg_mask(self, trg):
N, trg_len = trg.shape
trg_mask = torch.tril(torch.ones((trg_len, trg_len))).expand(N, 1,
trg_len, trg_len)
return trg_mask.to(self.device)
def forward(self, src, trg):
src_mask = self.make_src_mask(src)
trg_mask = self.make_trg_mask(trg)
enc_src = self.encoder(src, src_mask)
out = self.decoder(trg, enc_src, src_mask, trg_mask)
return out
def nextMultiple(n, x):
n = n + x / 2
n = n - (n % x)
return int(n)
if __name__ == "__main__":
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
"""
#This shit are the one-hot encoded sentences (word 1, word 4 etc. as sentence)
train = torch.tensor([[1, 5, 6, 4, 3, 9, 5, 2, 0, 1, 11],
[1, 8, 7, 3, 4, 5, 6, 11, 2, 1, 3]]).to(device)
target = torch.tensor([[1, 7, 4, 3, 5, 9, 2, 0, 2, 2],
[1, 5, 6, 2, 4, 7, 6, 2, 9, 1]]).to(device)
max_len = max([len(x) for x in train]) + 1
"""
#Loading in data
data = pickle.load(open('Testdaten.pkl', 'rb'))
tmp = myDataset(data, 'POS')
#Calculating maximum sentence length (+ 1 because of start tag)
max_len = max([len(x) for x in tmp.sent_encoded]) + 1
pad_element = len(tmp.lookup_words)
#Padding everything out to maximum sentence length
train_tmp = []
for sent in tmp.sent_encoded:
train_tmp.append([pad_element] + sent + [pad_element] * (max_len - len(sent) - 1))
target_tmp = []
for sent in tmp.tags_encoded:
target_tmp.append(sent + [pad_element] * (max_len - len(sent) - 1))
#Creating tensors for model
train = torch.squeeze(torch.tensor(train_tmp))
target = torch.squeeze(torch.tensor(target_tmp))
#"""
src_pad_idx = 0
trg_pad_idx = 0
src_vocab_size = int(torch.max(train)) + 1
trg_vocab_size = int(torch.max(target)) + 1
heads = 8
es = nextMultiple(max(src_vocab_size, trg_vocab_size), heads)
model = Transformer(src_vocab_size, trg_vocab_size, src_pad_idx,
trg_pad_idx, es, 3, 2, heads, 0.1, device,
max_len).to(device)
#Defining loss function and optimizer
lr = 0.001
num_epochs = 2
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9)
loss_func = nn.CrossEntropyLoss()
# optimization algorithm
optimizer = optim.Adam(model.parameters(), lr=lr)
# train and evaluation
for cnt in range(num_epochs):
optimizer.zero_grad()
outputs = model(train, target)
outputs = outputs
#Outputs now are size[3, 119, 119]
#CrossEntropyLoss mag one-hot-encoding nicht, how to deal with this?
loss = loss_func(outputs, target)
loss.backward()
optimizer.step()
#out = model(train, target)
#print(out.shape)
I am confused since the code works with the vectors from the tutorial, but once I try to run the model with my own vocabulary, it produces this strange error. The data is just integer values encoding the corresponding words, e.g. "Hello World" would result in the training vector [1 2].
There is no differences between my data and the data from the tutorial as far as I can see. The tensor types are the same (Torch.LongTensor), they are both integer values and in a specified range. The difference is in dimensionality, the tutorial uses vectors with dimension (2, 10), while mine are (3, 199).
Also, I am sorry, but I can't reduce the code any more since otherwise, the error might not be reproduceable.
Did anyone encounter this error before?

Pytorch | I don't know why it is throwing an error? (Beginner)

import torch.nn as nn
import torch.nn.functional as F
## TODO: Define the NN architecture
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
# linear layer (784 -> 1 hidden node)
self.fc1 = nn.Linear(28 * 28, 512)
self.fc2 = nn.Linear(512 * 512)
self.fc3 = nn.Linear(512 * 10)
def forward(self, x):
# flatten image input
x = x.view(-1, 28 * 28)
# add hidden layer, with relu activation function
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = F.relu(self.fc3(x))
return x
# initialize the NN
model = Net()
print(model)
When I run this, it throws this error. Why?
TypeError: __ init __() missing 1 required positional argument: 'out_features'
This error is because you have not provided the output size of the fully connected layer in your fc2 and fc3.
Below is the modified code. I added the output size, I am not sure if this is the output size architecture you want. But for the demonstration, I put the output size. Please edit the code and add the output size as per your requirement.
Remember that the output size of the previous fully connected layer should be the input size of the next FC layer. Else it will throw size mismatch error.
import torch.nn as nn
import torch.nn.functional as F
## TODO: Define the NN architecture
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
# linear layer (784 -> 1 hidden node)
self.fc1 = nn.Linear(28 * 28, 512)
self.fc2 = nn.Linear(512 ,512*10)
self.fc3 = nn.Linear(512 * 10,10)
def forward(self, x):
# flatten image input
x = x.view(-1, 28 * 28)
# add hidden layer, with relu activation function
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = F.relu(self.fc3(x))
return x
# initialize the NN
model = Net()
print(model)

How to solve the RuntimeError when using torch.utils.tensorboard to add a graph

I am trying to use tensorboard to visualize my pytorch model and encounter a problem. The input tensor's shape is (-1, 1, 20, 15) and the output tensor's shape is (-1, 6). My model combines a list of 5 convolutional networks.
packages:
python: 3.7.6
pytorch: 1.4.0
tensorboard: 2.1.0
The pytorch model is as below:
import torch
from torch import nn
from torch.nn import functional as F
class MyModel(nn.Module):
"""example"""
def __init__(self, nchunks=[2, 5, 3, 2, 3], resp_size=6):
super().__init__()
self.nchunks = nchunks
self.conv = [nn.Conv2d(1, 2, (2, x)) for x in nchunks]
self.pool = nn.Sequential(
nn.AdaptiveMaxPool1d(output_size=10), nn.Flatten(start_dim=1)
)
self.bn = nn.BatchNorm1d(100)
self.fc1 = nn.Linear(100, 100)
self.fc2 = nn.Linear(100, 100)
self.fc3 = nn.Linear(100, resp_size)
def forward(self, x):
xi = torch.split(x, self.nchunks, dim=3)
xi = [f(subx.float()).view(-1, 2, 19) for f, subx in zip(self.conv, xi)]
xi = [self.pool(subx) for subx in xi]
xi = torch.cat(xi, dim=1)
xi = self.bn(xi)
xi = F.relu(self.fc1(xi))
xi = F.relu(self.fc2(xi))
xi = self.fc3(xi)
return xi
Here is the code for the tensorboard summary writer:
from torch.utils.tensorboard import SummaryWriter
x = torch.rand((5,1,20,15))
model = MyModel()
writer = SummaryWriter('logs')
writer.add_graph(model, x)
Such an error is returned:
RuntimeError: Cannot insert a Tensor that requires grad as a constant. Consider making it a parameter or input, or detaching the gradient
Tensor:
(1,1,.,.) =
-0.2108 -0.4986
-0.4009 -0.1910
(2,1,.,.) =
0.2383 -0.4147
0.2642 0.0456
[ torch.FloatTensor{2,1,2,2} ]
I guess the model has some issues, but I am not sure what happens.
This similar github issue does not relate to my problem because I am not using multi GPUs.
I solved the problem by replacing
[nn.Conv2d(1, 2, (2, x)) for x in nchunks]
with
nn.ModuleList([nn.Conv2d(1, 2, (2, x)) for x in nchunks])

Batchsize in input shape of chainer CNN

I have a training set of 9957 images. The training set has shape (9957, 3, 60, 80).
Is batchsize required when putting training set to model?
If required can the original shape be considered correct for fitting to conv2D layer or do I need to add batchsize to input_shape?
X_train.shape
(9957, 60,80,3)
from chainer.datasets import split_dataset_random
from chainer.dataset import DatasetMixin
import numpy as np
class MyDataset(DatasetMixin):
def __init__(self, X, labels):
super(MyDataset, self).__init__()
self.X_ = X
self.labels_ = labels
self.size_ = X.shape[0]
def __len__(self):
return self.size_
def get_example(self, i):
return np.transpose(self.X_[i, ...], (2, 0, 1)), self.labels_[i]
batch_size = 3
label_train = y_trainHot1
dataset = MyDataset(X_train1, label_train)
dataset_train, valid = split_dataset_random(dataset, 8000, seed=0)
train_iter = iterators.SerialIterator(dataset_train, batch_size)
valid_iter = iterators.SerialIterator(valid, batch_size, repeat=False,
shuffle=False)
The code below tells you that you do not have to care the batch-size by yourself. You just use DatsetMixin and SerialIterator as is instructed in the tutorial of chainer.
from chainer.dataset import DatasetMixin
from chainer.iterators import SerialIterator
import numpy as np
NUM_IMAGES = 9957
NUM_CHANNELS = 3 # RGB
IMAGE_WIDTH = 60
IMAGE_HEIGHT = 80
NUM_CLASSES = 10
BATCH_SIZE = 32
TRAIN_SIZE = min(8000, int(NUM_IMAGES * 0.9))
images = np.random.rand(NUM_IMAGES, NUM_CHANNELS, IMAGE_WIDTH, IMAGE_HEIGHT)
labels = np.random.randint(0, NUM_CLASSES, (NUM_IMAGES,))
class MyDataset(DatasetMixin):
def __init__(self, images_, labels_):
# note: input arg.'s tailing underscore is just to avoid shadowing
super(MyDataset, self).__init__()
self.images_ = images_
self.labels_ = labels_
self.size_ = len(labels_)
def __len__(self):
return self.size_
def get_example(self, i):
return self.images_[i, ...], self.labels_[i]
dataset_train = MyDataset(images[:TRAIN_SIZE, ...], labels[:TRAIN_SIZE])
dataset_valid = MyDataset(images[TRAIN_SIZE:, ...], labels[TRAIN_SIZE:])
train_iter = SerialIterator(dataset_train, BATCH_SIZE)
valid_iter = SerialIterator(dataset_valid, BATCH_SIZE, repeat=False, shuffle=False)
###############################################################################
"""This block is just for the confirmation.
.. note: NOT recommended to call :func:`concat_examples` in your code.
Use :class:`chainer.updaters.StandardUpdater` instead.
"""
from chainer.dataset import concat_examples
batch_image, batch_label = concat_examples(next(train_iter))
print("batch_image.shape\n{}".format(batch_image.shape))
print("batch_label.shape\n{}".format(batch_label.shape))
Output
batch_image.shape
(32, 3, 60, 80)
batch_label.shape
(32,)
It should be noted that chainer.dataset.concat_example is a little bit tricky part. Usually, the users do not pay attention to this function, if you use StandardUpdater which conceals the native function chainer.dataset.concat_example.
Since chainer is designed on the scheme of Trainer, (Standard)Updater, some Optimizer, (Serial)Iterator and Dataset(Mixin), if you do not follow this scheme, you have to dive into the sea of chainer source code.

Resources