I have different sizes or shapes for each tensor like
torch.Size([1, 12, 1000])
torch.Size([1, 12, 1000])
torch.Size([1, 10, 1000])
torch.Size([1, 11, 1000])
torch.Size([1, 11, 1000])
torch.Size([1, 15, 1000])
torch.Size([1, 10, 1000])
....
and need to be like torch.Size((12+12+10+11+11+15+ .... ),1000)
my code is
def extract():
for y in sentences:
tokenized_text = tokenizer.tokenize(y)
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
tokens_tensor = torch.tensor([indexed_tokens])
model.eval()
outputs = model(tokens_tensor)
hidden_states = outputs[2]
my_tensor = torch.cat([hidden_states[i] for i in [-1,-2,-3,-4]], dim=-1)
return my_tensor
Concatenate them:
tensors = [t1, t2, t3, ...]
result = torch.cat(tensors, dim=1)
# result.size(): torch.Size([1, 12+12+10+..., 1000])
If you also want to remove the first dimension, as it has size 1:
result = result.squeeze()
# result.size(): torch.Size([12+12+10+..., 1000])
Related
I want to experiment with creating a modified Loss function for 4 channel image data.
What is the best way to split torch.Size([64, 4, 128, 128])
to
torch.Size([64, 3, 128, 128])
torch.Size([64, 1, 128, 128])
You can either slice the second axis and extract two tensors:
>>> a, b = x[:, :3], x[:, 3:]
>>> a.shape, b.shape
(64, 3, 128, 128), (64, 1, 128, 128)
Alternatively you can apply torch.split on the first dimension:
>>> a, b = x.split(3, dim=1)
>>> a.shape, b.shape
(64, 3, 128, 128), (64, 1, 128, 128)
I was able to resolve this myself by using the Split function.
Given an Image based Tensor like: torch.Size([64, 4, 128, 128])
You can split on dim 1 and given a static length.
self.E1 = torch.split(self.E, 3, 1)
print(self.E1[0].shape);
print(self.E1[1].shape);
Gives:
torch.Size([64, 4, 128, 128])
torch.Size([64, 3, 128, 128])
torch.Size([64, 1, 128, 128])
What is the working of Output_padding in Conv2dTranspose? Please Help me to understand this?
Conv2dTranspose(1024, 512, kernel_size=3, stride=2, padding=1, output_padding=1)
According to documentation here: https://pytorch.org/docs/stable/generated/torch.nn.ConvTranspose2d.html when applying Conv2D operation with Stride > 1 you can get same output dimensions with different inputs. For example, 7x7 and 8x8 inputs would both return 3x3 output with Stride=2:
import torch
conv_inp1 = torch.rand(1,1,7,7)
conv_inp2 = torch.rand(1,1,8,8)
conv1 = torch.nn.Conv2d(1, 1, kernel_size = 3, stride = 2)
out1 = conv1(conv_inp1)
out2 = conv1(conv_inp2)
print(out1.shape) # torch.Size([1, 1, 3, 3])
print(out2.shape) # torch.Size([1, 1, 3, 3])
And when applying the transpose convolution, it is ambiguous that which output shape to return, 7x7 or 8x8 for stride=2 transpose convolution. Output padding helps pytorch to determine 7x7 or 8x8 output with output_padding parameter. Note that, it doesn't pad zeros or anything to output, it is just a way to determine the output shape and apply transpose convolution accordingly.
conv_t1 = torch.nn.ConvTranspose2d(1, 1, kernel_size=3, stride=2)
conv_t2 = torch.nn.ConvTranspose2d(1, 1, kernel_size=3, stride=2, output_padding=1)
transposed1 = conv_t1(out1)
transposed2 = conv_t2(out2)
print(transposed1.shape) # torch.Size([1, 1, 7, 7])
print(transposed2.shape) # torch.Size([1, 1, 8, 8])
This is my first question, so please forgive if I've missed adding something.
I'm trying to create a Convolutional Autoencoder in Pytorch 1.7.0, yet am having difficulty in designing the model so that the output size is equal to the input size. I'm currently working on the MNIST dataset, with the input tensor size being 1128*28 and currently, the output is 1*1*29*29...
Can someone please help me identify the problem? *Please note that I'll incorporate the learnings afterwards.
class autoencoder(nn.Module):
def __init__(self, hidden_node_count):
super(autoencoder, self).__init__()
self.conv1 = nn.Conv2d(1, 32, 5, stride=2, padding=2)
self.conv2 = nn.Conv2d(32,32, 5, stride=2)#, padding=2)
self.pool = nn.MaxPool2d(hidden_node_count, hidden_node_count)
self.t_conv1 = nn.ConvTranspose2d(32, 32, 5, stride=2)#, padding=2)
self.t_conv2 = nn.ConvTranspose2d(32, 32, 5, stride=2)#, padding=2)
self.t_conv3 = nn.ConvTranspose2d(32, 1, 5, stride=2)#, padding=2)
self.relu = nn.ReLU(True)
self.tanh = nn.Tanh()
def forward(self, x):
print(x.size(), "input")
x = self.conv1(x)
x = self.relu(x)
print(x.size(), "conv1")
x = self.conv2(x)
print(x.size(), "conv2")
x = self.pool(x)
print(x.size(), "pool")
x = self.t_conv1(x)
x = self.relu(x)
print(x.size(), "deconv1")
x = self.t_conv2(x)
x = self.relu(x)
print(x.size(), "deconv2")
x = self.t_conv3(x)
x = self.tanh(x)
print(x.size(), "deconv3")
return x
With its STDOUT being ->
torch.Size([1, 1, 28, 28]) input
torch.Size([1, 32, 14, 14]) conv1
torch.Size([1, 32, 5, 5]) conv2
torch.Size([1, 32, 1, 1]) pool
torch.Size([1, 32, 5, 5]) deconv1
torch.Size([1, 32, 13, 13]) deconv2
torch.Size([1, 1, 29, 29]) deconv3
torch.Size([1, 1, 29, 29])
torch.Size([1, 1, 28, 28])
according to the documentation for ConvTranspose2d, here is the formula to compute the output size :
Hout=(Hin−1)×stride[0]−2×padding[0]+dilation[0]×(kernel_size[0]−1)+output_padding[0]+1
In your case, Hin=13, padding=0, dilation=1, kernel_size=5, output_padding=0, which gives Hout=29. Your output tensor is as it should be !
If you want to have an output of 28, add some padding. With padding=1, you will get an output of size (1,32,27,27), because the output size of a ConvTranpose2d is ambiguous (read the doc). Therefore, you need to add some output padding as well :
conv = nn.ConvTranspose2d(32, 1, 5, stride= 2, padding=1, output_padding=1)
conv(randn(1,32,13,13)).size()
>>> (1, 1, 28, 28)
Pytorch code:
up = nn.ConvTranspose2d(3, 128, 2, stride=2)
conv = nn.Conv2d(3, 128, 2)
inputs = Variable(torch.rand(1, 3, 64, 64))
print('up conv output size:', up(inputs).size())
inputs = Variable(torch.rand(1, 3, 64, 64))
print('conv output size:', conv(inputs).size())
print('up conv weight size:', up.weight.data.shape)
print('conv weight size:', conv.weight.data.shape)
Result:
up conv output size: torch.Size([1, 128, 128, 128])
conv output size: torch.Size([1, 128, 63, 63])
up conv weight size: torch.Size([3, 128, 2, 2])
conv weight size: torch.Size([128, 3, 2, 2])
Why the orders are different between ConvTranspose2d (3,128) and Conv2d (128, 3)?
Is it supposed to behave like this?
I am trying to make a classifier based on capsule network, and I am able to train this network but have a problem with making a prediction. How my code should look like to make a prediction given an arbitrary image (I need an example). One important thing is that I'm using EM routing. In dynamic routing its enough to calculate length of vector of last capsule layer to get predicted class but how it is in EM routing?
Here is my code:
poses, activations = m_capsules.nets.capsules_net(images, num_classes=10, iterations=3,
batch_size=batch_size, name='capsules_em')
global_step = tf.train.get_or_create_global_step()
loss = m_capsules.nets.spread_loss(
labels, activations, iterations_per_epoch, global_step, name='spread_loss'
)
tf.summary.scalar('losses/spread_loss', loss)
optimizer = tf.train.AdamOptimizer(learning_rate=0.0005)
train_tensor = slim.learning.create_train_op(
loss, optimizer, global_step=global_step, clip_gradient_norm=4.0
)
slim.learning.train(
train_tensor,
logdir="./log/train",
log_every_n_steps=1,
save_summaries_secs=60,
saver=tf.train.Saver(max_to_keep=2),
save_interval_secs=600,
)
So far I was trying to write estimator but having trouble. Below is a code:
mnist_classifier = tf.estimator.Estimator(model_fn=m_capsules.nets.capsules_net, model_dir=dir)
prediction = mnist_classifier.predict(input_fn=words_input_fn)
And my model looks like:
def capsules_net(inputs, num_classes, iterations, batch_size, name='ocr-caps'):
"""Define the Capsule Network model
"""
with tf.variable_scope(name) as scope:
# ReLU Conv1
# Images shape (24, 28, 28, 1) -> conv 5x5 filters, 32 output channels, strides 2 with padding, ReLU
# nets -> (?, 14, 14, 32)
nets = conv2d(
inputs,
kernel=5, out_channels=26, stride=2, padding='SAME',
activation_fn=tf.nn.relu, name='relu_conv1'
)
# PrimaryCaps
# (?, 14, 14, 32) -> capsule 1x1 filter, 32 output capsule, strides 1 without padding
# nets -> (poses (?, 14, 14, 32, 4, 4), activations (?, 14, 14, 32))
nets = primary_caps(
nets,
kernel_size=1, out_capsules=26, stride=1, padding='VALID',
pose_shape=[4, 4], name='primary_caps'
)
# ConvCaps1
# (poses, activations) -> conv capsule, 3x3 kernels, strides 2, no padding
# nets -> (poses (24, 6, 6, 32, 4, 4), activations (24, 6, 6, 32))
nets = conv_capsule(
nets, shape=[3, 3, 26, 26], strides=[1, 2, 2, 1], iterations=iterations,
batch_size=batch_size, name='conv_caps1'
)
# ConvCaps2
# (poses, activations) -> conv capsule, 3x3 kernels, strides 1, no padding
# nets -> (poses (24, 4, 4, 32, 4, 4), activations (24, 4, 4, 32))
nets = conv_capsule(
nets, shape=[3, 3, 26, 26], strides=[1, 1, 1, 1], iterations=iterations,
batch_size=batch_size, name='conv_caps2'
)
# Class capsules
# (poses, activations) -> 1x1 convolution, 10 output capsules
# nets -> (poses (24, 10, 4, 4), activations (24, 10))
nets = class_capsules(nets, num_classes, iterations=iterations,
batch_size=batch_size, name='class_capsules')
# poses (24, 10, 4, 4), activations (24, 10)
poses, activations = nets
return poses, activations