Resnet implementation: forward() takes 1 positional argument but 2 were given - pytorch

I wrote this code and when I run it I get the following error: forward() takes 1 positional argument but 2 were given. As far as I know, I am passing only one argument to forward().
ResNet is a basic residual block
class ResNet(nn.Module):
def __init__(self, in_channels, mid_channels, mid2_channels ,out_channels):
super().__init__()
self.conv1 = nn.Conv2d(in_channels,mid_channels,kernel_size = 3, stride = 1, padding = 1)
self.conv1_bn = nn.BatchNorm2d(mid_channels)
self.conv2 = nn.Conv2d(mid_channels,mid2_channels,kernel_size = 3, stride = 1, padding = 1)
self.conv2_bn = nn.BatchNorm2d(mid2_channels)
self.conv3 = nn.Conv2d(mid2_channels,out_channels,kernel_size = 3, stride = 1, padding = 1)
self.conv3_bn = nn.BatchNorm2d(out_channels)
if (in_channels != out_channels):
self.conv_shortcut = nn.Conv2d(in_channels, out_channels, kernel_size = 1, stride = 1, padding = 0 )
def forward(self, X):
X_shortcut = X
X = F.relu(self.conv1(X))
X = self.conv1_bn(X)
X = F.relu(self.conv2(X))
X = self.conv2_bn(X)
X = F.relu(self.conv2(X))
X = self.conv2_bn(X)
if (in_channels == out_channels):
X = self.conv3(X) + X_shortcut
else:
X = self.conv3(X) + self.conv_shortcut(X_shortcut)
X = self.conv3_bn(F.relu(x))
return X
This the method for generating a model using the given layers.
class TotalNet(nn.Module):
def __init__(self, Layers):
super().__init__()
self.hidden = nn.ModuleList()
self.hidden.append(nn.BatchNorm2d(1))
for i in range(0,len(Layers)-1,3):
in_channels, mid_channels, mid2_channels, out_channels = Layers[i:(i+4)]
self.hidden.append(ResNet(in_channels, mid_channels, mid2_channels, out_channels))
self.hidden.append(nn.Flatten())
def forward(self, X):
X = self.hidden(X)
return X
the following is how I am calling the function:
test = TotalNet([9,2,9,9,9,9,9,9,9,9])
a = torch.rand((1,9,9), dtype = torch.float32)
test(a)

I realized that I was passing the X to the nn.ModuleList. This is incorrect that the right way would be to apply X to the elements of nn.ModuleList and updating the values of X.
In other words, the forward function of TotalNet should be the following:
for operation in self.hidden:
X = operation(X)
return X

Related

Neural Network initialized with random weights always returns the same output with random inputs

I have a problem with pytorch in Spyder. A randomly initialized Neural Network returns always the same output also for random input tensor. I am currently using local GPU with Spyder. I made sure that the initialization of the weights is random and not all zeros.
Example:
x = torch.rand(1, 3, 360, 640)
x = self.stage_1(x)
x = self.stage_2(x)
x = self.stage_3(x)
x = self.stage_4(x)
x = self.stage_5(x)
x = self.stage_6(x)
x = torch.flatten(x, start_dim=1)
y = torch.rand(1, 3, 360, 640)
y = self.stage_1(y)
y = self.stage_2(y)
y = self.stage_3(y)
y = self.stage_4(y)
y = self.stage_5(y)
y = self.stage_6(y)
y = torch.flatten(y, start_dim=1)
This code returns always y == x
This is the stage class:
class VggStage(nn.Module):
def __init__(self,
input_channels: int,
output_channels: int) -> None:
"""
Parameters
----------
input_channels : int
DESCRIPTION.
output_channels : int
DESCRIPTION.
Returns
-------
None
DESCRIPTION.
"""
super().__init__()
self.conv1 = nn.Conv2d(in_channels=input_channels,
out_channels=output_channels,
kernel_size=(3, 3))
self.conv2 = nn.Conv2d(in_channels=output_channels,
out_channels=output_channels,
kernel_size=(3, 3))
self.max_pool = nn.MaxPool2d(kernel_size=(2, 2),
stride=(2, 2))
def forward(self,
x: torch.Tensor) -> torch.Tensor:
x = F.relu(self.conv1(x))
x = F.relu(self.conv2(x))
x = self.max_pool(x)
return x

ViVIT PyTorch: RuntimeError: multi-target not supported at /pytorch/aten/src/THCUNN/generic/ClassNLLCriterion.cu:15

I am trying to run Video Vision Transformer (ViViT) code with my dataset but getting an error using CrossEntropyLoss from Pytorch as the Loss function.
There are 6 classes I have:
['Run', 'Sit', 'Walk', 'Wave', 'Sit', 'Stand']
Optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=0.0001, weight_decay=1e-9, momentum=0.9)
Class Weights
tensor([0.0045, 0.0042, 0.0048, 0.0038, 0.0070, 0.0065])
Loss Function
loss_func = nn.CrossEntropyLoss(weight=class_weights.to(device))
Code Throwning Error
train_epoch(model, optimizer, train_loader, train_loss_history, loss_func)
Error
RuntimeError: multi-target not supported at /pytorch/aten/src/THCUNN/generic/ClassNLLCriterion.cu:15
Code Calling the transformer
model = ViViT(224, 16, 100, 16).cuda()
Getting Video Frames
def get_frames(filename, n_frames=1):
frames = []
v_cap = cv2.VideoCapture(filename)
v_len = int(v_cap.get(cv2.CAP_PROP_FRAME_COUNT))
frame_list = np.linspace(0, v_len - 1, n_frames + 1, dtype=np.int16)
frame_dims = np.array([224, 224, 3])
for fn in range(v_len):
success, frame = v_cap.read()
if success is False:
continue
if (fn in frame_list):
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frame = cv2.resize(frame, (frame_dims[0], frame_dims[1]))
frames.append(frame)
v_cap.release()
return frames, v_len
Dataset Preprocessing
class DatasetProcessing(data.Dataset):
def __init__(self, df, root_dir):
super(DatasetProcessing, self).__init__()
# List of all videos path
video_list = df["Video"].apply(lambda x: root_dir + '/' + x)
self.video_list = np.asarray(video_list)
self.df = df
def __getitem__(self, index):
# Ensure that the raw videos are in respective folders and folder name matches the output class label
video_label = self.video_list[index].split('/')[-2]
video_name = self.video_list[index].split('/')[-1]
video_frames, len_ = get_frames(self.video_list[index], n_frames = 15)
video_frames = np.asarray(video_frames)
video_frames = video_frames/255
class_list = ['Run', 'Walk', 'Wave', 'Sit', 'Turn', 'Stand']
class_id_loc = np.where(class_list == video_label)
label = class_id_loc
d = torch.as_tensor(np.array(video_frames).astype('float'))
l = torch.as_tensor(np.array(label).astype('float'))
return (d, l)
def __len__(self):
return self.video_list.shape[0]
Training Epochs
def train_epoch(model, optimizer, data_loader, loss_history, loss_func):
total_samples = len(data_loader.dataset)
model.train()
for i, (data, target) in enumerate(data_loader):
optimizer.zero_grad()
x = data.cuda()
data = rearrange(x, 'b p h w c -> b p c h w').cuda()
target = target.type(torch.LongTensor).cuda()
pred = model(data.float())
output = F.log_softmax(pred, dim=1)
loss = loss_func(output, target.squeeze(1))
loss.backward()
optimizer.step()
if i % 100 == 0:
print('[' + '{:5}'.format(i * len(data)) + '/' + '{:5}'.format(total_samples) +
' (' + '{:3.0f}'.format(100 * i / len(data_loader)) + '%)] Loss: ' +
'{:6.4f}'.format(loss.item()))
loss_history.append(loss.item())
Evaluate Model
def evaluate(model, data_loader, loss_history, loss_func):
model.eval()
total_samples = len(data_loader.dataset)
correct_samples = 0
total_loss = 0
with torch.no_grad():
for data, target in data_loader:
x = data.cuda()
data = rearrange(x, 'b p h w c -> b p c h w').cuda()
target = target.type(torch.LongTensor).cuda()
output = F.log_softmax(model(data.float()), dim=1)
loss = loss_func(output, target)
_, pred = torch.max(output, dim=1)
total_loss += loss.item()
correct_samples += pred.eq(target).sum()
avg_loss = total_loss / total_samples
loss_history.append(avg_loss)
print('\nAverage test loss: ' + '{:.4f}'.format(avg_loss) +
' Accuracy:' + '{:5}'.format(correct_samples) + '/' +
'{:5}'.format(total_samples) + ' (' +
'{:4.2f}'.format(100.0 * correct_samples / total_samples) + '%)\n')
Transformer
class Transformer(nn.Module):
def __init__(self, dim, depth, heads, dim_head, mlp_dim, dropout = 0.):
super().__init__()
self.layers = nn.ModuleList([])
self.norm = nn.LayerNorm(dim)
for _ in range(depth):
self.layers.append(nn.ModuleList([
PreNorm(dim, Attention(dim, heads = heads, dim_head = dim_head, dropout = dropout)),
PreNorm(dim, FeedForward(dim, mlp_dim, dropout = dropout))
]))
def forward(self, x):
for attn, ff in self.layers:
x = attn(x) + x
x = ff(x) + x
return self.norm(x)
ViViT Code
class ViViT(nn.Module):
def __init__(self, image_size, patch_size, num_classes, num_frames, dim = 192, depth = 4, heads = 3, pool = 'cls', in_channels = 3, dim_head = 64, dropout = 0.,
emb_dropout = 0., scale_dim = 4, ):
super().__init__()
assert pool in {'cls', 'mean'}, 'pool type must be either cls (cls token) or mean (mean pooling)'
assert image_size % patch_size == 0, 'Image dimensions must be divisible by the patch size.'
num_patches = (image_size // patch_size) ** 2
patch_dim = in_channels * patch_size ** 2
self.to_patch_embedding = nn.Sequential(
Rearrange('b t c (h p1) (w p2) -> b t (h w) (p1 p2 c)', p1 = patch_size, p2 = patch_size),
nn.Linear(patch_dim, dim),
)
self.pos_embedding = nn.Parameter(torch.randn(1, num_frames, num_patches + 1, dim))
self.space_token = nn.Parameter(torch.randn(1, 1, dim))
self.space_transformer = Transformer(dim, depth, heads, dim_head, dim*scale_dim, dropout)
self.temporal_token = nn.Parameter(torch.randn(1, 1, dim))
self.temporal_transformer = Transformer(dim, depth, heads, dim_head, dim*scale_dim, dropout)
self.dropout = nn.Dropout(emb_dropout)
self.pool = pool
self.mlp_head = nn.Sequential(
nn.LayerNorm(dim),
nn.Linear(dim, num_classes)
)
def forward(self, x):
x = self.to_patch_embedding(x)
b, t, n, _ = x.shape
cls_space_tokens = repeat(self.space_token, '() n d -> b t n d', b = b, t=t)
x = torch.cat((cls_space_tokens, x), dim=2)
x += self.pos_embedding[:, :, :(n + 1)]
x = self.dropout(x)
x = rearrange(x, 'b t n d -> (b t) n d')
x = self.space_transformer(x)
x = rearrange(x[:, 0], '(b t) ... -> b t ...', b=b)
cls_temporal_tokens = repeat(self.temporal_token, '() n d -> b n d', b=b)
x = torch.cat((cls_temporal_tokens, x), dim=1)
x = self.temporal_transformer(x)
x = x.mean(dim = 1) if self.pool == 'mean' else x[:, 0]
return self.mlp_head(x)
Multi target appears to be a feature supported since version 1.10.0.
https://discuss.pytorch.org/t/crossentropyloss-vs-per-class-probabilities-target/138331
Please check your pytorch version.
Please refer to the example of using the UTF101 top5 dataset, which is available on my Colab. The version of pytorch is 1.12.0+cu113, and the code you listed was able to run the training almost exactly as it was written.

PyTorch: GRU, one-to-many / many-to-one

I would like to implement a GRU able to encode a sequence of vectors to one vector (many-to-one), and then another GRU able to decode a vector to a sequence of vector (one-to-many). The size of the vectors wouldn't be changed. I would like to have an opinion about what I implemented.
Here is the code:
class AEGRU(nn.Module):
def __init__(self, opt):
super(AEGRU, self).__init__()
self.length = 256
self.latent_space = 256
self.num_layers = 1
self.GRU_enc = nn.GRU(input_size=3, hidden_size=self.latent_space, num_layers=self.num_layers, batch_first=True)
self.fc_enc = nn.Linear(self.latent_space, self.latent_space)
self.GRU_dec = nn.GRU(input_size=self.latent_space, hidden_size=3, num_layers=self.num_layers, batch_first=True)
self.fc_dec = nn.Linear(3, 3)
def enc(self, x):
# x has shape: Batch_size x self.length x 3
h0 = torch.zeros(self.num_layers, x.shape[0], self.latent_space).cuda()
out, _ = self.GRU_enc(x, h0)
out = out[:, -1, :]
out = self.fc_enc(out)
return out
def dec(self, x):
# x has shape: Batch_size x self.latent_space
x = x[:, None, :]
h = torch.zeros(self.num_layers, x.shape[0], 3).cuda()
# method 1 ??
'''outputs = torch.zeros(x.shape[0], self.length, 3).cuda()
for i in range(self.length):
out, h = self.GRU_dec(x, h)
outputs[:, i, :] = out[:, 0, :]'''
# method 2 ??
x = x.repeat(1, self.length, 1)
outputs, _ = self.GRU_dec(x, h)
# linear layer
outputs = self.fc_dec(outputs)
return outputs
def forward(self, x):
self.indices = []
latent = self.enc(x)
output = self.dec(latent)
return output
I am not sure whether this is the good way to do a one-to-many GRU. Could I have some opinions about this?
Thanks for reading!

PyTorch to Keras model

I'm trying to replicate a model, but I'm having difficulties doing so with Keras. Here is my current implementation:
filters = 256
kernel_size = 3
strides = 1
# Head module
input = Input(shape=(img_height//scale_fact, img_width//scale_fact, img_depth))
conv0 = Conv2D(filters, kernel_size, strides=strides, padding='same',
kernel_regularizer=regularizers.l2(0.01))(input)
# Body module
res = Conv2D(filters, kernel_size, strides=strides, padding='same')(conv0)
act = ReLU()(res)
res = Conv2D(filters, kernel_size, strides=strides, padding='same')(act)
res_rec = Add()([conv0, res])
for i in range(res_blocks):
res1 = Conv2D(filters, kernel_size, strides=strides, padding='same')(res_rec)
act = ReLU()(res1)
res2 = Conv2D(filters, kernel_size, strides=strides, padding='same')(act)
res_rec = Add()([res_rec, res2])
conv = Conv2D(filters, kernel_size, strides=strides, padding='same',
kernel_regularizer=regularizers.l2(0.01))(res_rec)
add = Add()([conv0, conv])
# Tail module
conv = Conv2D(filters, kernel_size, strides=strides, padding='same',
kernel_regularizer=regularizers.l2(0.01))(add)
act = ReLU()(conv)
up = UpSampling2D(size=scale_fact if scale_fact != 4 else 2)(act) # TODO: try "Conv2DTranspose"
# mul = Multiply([np.zeros((img_width,img_height,img_depth)).fill(0.1), up])(up)
# When it's a 4X factor, we want the upscale split in two procedures
if(scale_fact == 4):
conv = Conv2D(filters, kernel_size, strides=strides, padding='same',
kernel_regularizer=regularizers.l2(0.01))(up)
act = ReLU()(conv)
up = UpSampling2D(size=2)(act) # TODO: try "Conv2DTranspose"
output = Conv2D(filters=3,
kernel_size=1,
strides=1,
padding='same',
kernel_regularizer=regularizers.l2(0.01))(up)
model = Model(inputs=input, outputs=output)
Here is a link to the file I'm trying to replicate. How am I supposed to replicate this custom PyTorch UpSampler that implements a customized PixelShuffling method ?
Here is the relevant part of the UpSampler that I'm having trouble with, for the most part:
import tensorflow as tf
import tensorflow.contrib.slim as slim
"""
Method to upscale an image using
conv2d transpose. Based on upscaling
method defined in the paper
x: input to be upscaled
scale: scale increase of upsample
features: number of features to compute
activation: activation function
"""
def upsample(x,scale=2,features=64,activation=tf.nn.relu):
assert scale in [2,3,4]
x = slim.conv2d(x,features,[3,3],activation_fn=activation)
if scale == 2:
ps_features = 3*(scale**2)
x = slim.conv2d(x,ps_features,[3,3],activation_fn=activation)
#x = slim.conv2d_transpose(x,ps_features,6,stride=1,activation_fn=activation)
x = PS(x,2,color=True)
elif scale == 3:
ps_features =3*(scale**2)
x = slim.conv2d(x,ps_features,[3,3],activation_fn=activation)
#x = slim.conv2d_transpose(x,ps_features,9,stride=1,activation_fn=activation)
x = PS(x,3,color=True)
elif scale == 4:
ps_features = 3*(2**2)
for i in range(2):
x = slim.conv2d(x,ps_features,[3,3],activation_fn=activation)
#x = slim.conv2d_transpose(x,ps_features,6,stride=1,activation_fn=activation)
x = PS(x,2,color=True)
return x
"""
Borrowed from https://github.com/tetrachrome/subpixel
Used for subpixel phase shifting after deconv operations
"""
def _phase_shift(I, r):
bsize, a, b, c = I.get_shape().as_list()
bsize = tf.shape(I)[0] # Handling Dimension(None) type for undefined batch dim
X = tf.reshape(I, (bsize, a, b, r, r))
X = tf.transpose(X, (0, 1, 2, 4, 3)) # bsize, a, b, 1, 1
X = tf.split(X, a, 1) # a, [bsize, b, r, r]
X = tf.concat([tf.squeeze(x, axis=1) for x in X],2) # bsize, b, a*r, r
X = tf.split(X, b, 1) # b, [bsize, a*r, r]
X = tf.concat([tf.squeeze(x, axis=1) for x in X],2) # bsize, a*r, b*r
return tf.reshape(X, (bsize, a*r, b*r, 1))
"""
Borrowed from https://github.com/tetrachrome/subpixel
Used for subpixel phase shifting after deconv operations
"""
def PS(X, r, color=False):
if color:
Xc = tf.split(X, 3, 3)
X = tf.concat([_phase_shift(x, r) for x in Xc],3)
else:
X = _phase_shift(X, r)
return X

Backpropagation (matrixis not aligned)

Well, the problem is with delta1, I've checked over math couple times, it seems good to me, everything should be correct with delta2, but it doesn't match with W2 transposed, here is backpropagation:
def backward(self, X, Y):
X = np.array(X)
Y = np.array(Y)
delta2 = -(Y - self.yHat) * self.deriv_sigmoid(self.a2)
dJdW2 = np.dot(self.a2.T, delta2)
delta1 = np.dot(delta2, self.W2.T)*self.deriv_sigmoid(self.a1)
dJdW1 = np.dot(X.T, delta1)
return dJdW1, dJdW2
here is forward propagation:
def forward(self, X):
self.X = X
self.a1 = np.dot(self.W1, X)
self.Z1 = self.sigmoid(self.a1)
self.a2 = np.dot(self.W2, self.Z1)
self.yHat = self.sigmoid(self.a2)
return self.yHat
And here is file from witch I call it:
NN = nn.Neural_Network(2, 3, 1)
X = [[1],[1],]
Y = [[1],]
yHat = NN.forward(X)
dJdW1, dJdW2 = NN.backward(X, Y)
I've tried checking placings in np.dot(), but it seems to be correct, and here is full code: https://hastebin.com/ikijahecaz.py

Resources