I'm very new to pytorch and I want to figure out how to input a matrix rather than image into CNN.
I have try it in the following way, but some errors occur.
I define my dataset as following:
class FrameDataSet(tud.Dataset):
def __init__(self, data):
targets = data['class'].values.tolist()
features = data.drop('class', axis=1).astype(np.int64).values
self.datalist = features.reshape((-1, feature_num, frame_size))
self.labellist = targets
def __getitem__(self, index):
return torch.Tensor(self.datalist[index].astype(float)), self.labellist[index]
def __len__(self):
return self.datalist.shape[0]
And my CNN is:
self.conv = nn.Sequential(
nn.Conv2d(1, 12, 3),
nn.ReLU(True),
nn.MaxPool2d(3, 3))
self.fc1 = nn.Linear(80, 100)
self.fc2 = nn.Linear(100, 30)
self.fc3 = nn.Linear(30, 5)
But when the data was inputted to CNN, the error brings:
File "/home/sparks/anaconda2/lib/python2.7/site-packages/torch/nn/functional.py", line 48, in conv2d
raise ValueError("Expected 4D tensor as input, got {}D tensor instead.".format(input.dim()))
Expected 4D tensor as input, got 3D tensor instead.
Your input probably missing one dimension. It should be:
(batch_size, channels, width, height)
If you only have one element in the batch, the tensor have to be in your case
e.g (1, 1, 28, 28)
because your first conv2d-layer expected a 1-channel input.
Related
I’m trying to implement very very simple UNET from this code.
class unet(nn.Module):
def __init__(self, ngf=64, norm_layer=nn.BatchNorm1d):
super(unet, self).__init__()
# construct unet structure
unet_block = skipconnection_block(ngf*2, ngf, submodule=None, norm_layer=norm_layer, inner=True)
unet_block = skipconnection_block(ngf, 1, submodule=unet_block, norm_layer=norm_layer, outer=True)
self.model = unet_block
def forward(self, x):
self.unet = nn.Sequential(self.model)
x = self.unet(x)
return x
class skipconnection_block(nn.Module):
def __init__(self, inner_nc, outer_nc, submodule=None, outer=False, inner=False, norm_layer=nn.BatchNorm1d):
super(skipconnection_block, self).__init__()
self.outer = outer
downrelu = nn.LeakyReLU(0.2, True)
uprelu = nn.ReLU(True)
if inner:
downconv_0 = nn.Conv1d(in_channels=outer_nc, out_channels=inner_nc, kernel_size=4, stride=2, padding=0)
upconv_0 = nn.ConvTranspose1d(in_channels=inner_nc, out_channels=outer_nc, kernel_size=4, stride=2, padding=0)
down = [downrelu, downconv_0]
up = [uprelu, upconv_0, norm_layer(outer_nc)]
model = down + up
elif outer:
downconv_1 = nn.Conv1d(in_channels=outer_nc, out_channels=inner_nc, kernel_size=4, stride=2, padding=0)
upconv_1 = nn.ConvTranspose1d(in_channels=inner_nc, out_channels=outer_nc, kernel_size=4, stride=2, padding=0)
down = [downrelu, downconv_1, norm_layer(inner_nc)]
up = [uprelu, upconv_1, norm_layer(outer_nc)]
model = down + [submodule] + up
self.model = nn.Sequential(*model)
def forward(self, x):
if self.outer:
return self.model(x)
else:
return torch.cat([x, self.model(x)], 1)
and when i tried like this for checking summary architecture of unet,
unet = load_skip_model()
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
unet.to(device)
print(torchsummary.summary(unet, (1, 150)))
i got this result below.
RuntimeError: Given transposed=1, weight of size [64, 1, 4], expected input[2, 128, 74] to have 64 channels, but got 128 channels instead
I dont understand why i got this.
can anyone please please give some help…?? thank uu
As a rule of thumb, when you define a neural network like so, first check your layer in/out dims one by one.
Here for example, your first conv1d does not receive the expected dimension as input which cause you error.
Hi I am trying to understand how the following PyTorch AutoEncoder code works. The code below uses the MNIST dataset which is 28X28. My question is how the nn.Linear(128,3) parameters where chosen?
I have a dataset which is 512X512 and I would like to modify the code for this AutoEncoder to support.
class LitAutoEncoder(pl.LightningModule):
def __init__(self):
super().__init__()
self.encoder = nn.Sequential(nn.Linear(28 * 28, 128), nn.ReLU(), nn.Linear(128, 3))
self.decoder = nn.Sequential(nn.Linear(3, 128), nn.ReLU(), nn.Linear(128, 28 * 28))
def forward(self, x):
# in lightning, forward defines the prediction/inference actions
embedding = self.encoder(x)
return embedding
def training_step(self, batch, batch_idx):
# training_step defined the train loop. It is independent of forward
x, y = batch
x = x.view(x.size(0), -1)
z = self.encoder(x)
x_hat = self.decoder(z)
loss = F.mse_loss(x_hat, x)
return loss
def configure_optimizers(self):
optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
return optimizer
I am assuming input image data are in this shape: x.shape == [bs, 1, h, w], where bs is batch size. Then, x is first viewed as [bs, h*w], i.e. [bs, 28*28]. This means all pixels in an image are flattened into a 1D vector.
Then in the encoder:
nn.Linear(28*28, 128) takes flattened input of size [bs, 28*28] and outputs intermediate result of size [bs, 128]
nn.Linear(128, 3): [bs, 128] -> [bs, 3]
Then in the decoder:
nn.Linear(3, 128): [bs, 3] -> [bs, 128]
nn.Linear(128, 28*28): [bs, 128] -> [bs, 28*28]
The final output is then matched against the input.
If you want to use the exact architecture for your 512x512 images, simply change every occurrence of 28*28 in the code to 512*512. However, this is a quite infeasible choice, for these reasons:
For MNIST images, nn.Linear(28*28, 128) contains 28x28x128+128=100480 parameters, while for your images nn.Linear(512*512, 128) contains 512x512x128+128=33554560 parameters. The size is too large, and it may lead to overfitting
The intermediate data [bs, 3] uses only 3 floats to encode a 512x512 image. I don't think you can recover anything with such compression
I'd suggest looking up convolutional architectures for you purpose
I trained the LSTM with a batch size of 128 and during testing my batch size is 1, why do I get this error? I'm suppose to initialize the hidden size when doing testing?
Here is the code that i'm using, I initialize the hidden state init_hidden function as (number_of_layers, batch_size, hidden_size) since batch_first=True
class ImageLSTM(nn.Module):
def __init__(self, n_inputs:int=49,
n_outputs:int=4096,
n_hidden:int=256,
n_layers:int=1,
bidirectional:bool=False):
"""
Takes a 1D flatten images.
"""
super(ImageLSTM, self).__init__()
self.n_inputs = n_inputs
self.n_hidden = n_hidden
self.n_outputs = n_outputs
self.n_layers = n_layers
self.bidirectional = bidirectional
self.lstm = nn.LSTM( input_size=self.n_inputs,
hidden_size=self.n_hidden,
num_layers=self.n_layers,
dropout = 0.5 if self.n_layers>1 else 0,
bidirectional=self.bidirectional,
batch_first=True)
if (self.bidirectional):
self.FC = nn.Sequential(
nn.Linear(self.n_hidden*2, self.n_outputs),
nn.Dropout(p=0.5),
nn.Sigmoid()
)
else:
self.FC = nn.Sequential(
nn.Linear(self.n_hidden, self.n_outputs),
# nn.Dropout(p=0.5),
nn.Sigmoid()
)
def init_hidden(self, batch_size, device=None): # input 4D tensor: (batch size, channels, width, height)
# initialize the hidden and cell state to zero
# vectors:(number of layer, batch size, number of hidden nodes)
if (self.bidirectional):
h0 = torch.zeros(2*self.n_layers, batch_size, self.n_hidden)
c0 = torch.zeros(2*self.n_layers, batch_size, self.n_hidden)
else:
h0 = torch.zeros(self.n_layers, batch_size, self.n_hidden)
c0 = torch.zeros(self.n_layers, batch_size, self.n_hidden)
if device is not None:
h0 = h0.to(device)
c0 = c0.to(device)
self.hidden = (h0,c0)
def forward(self, X): # X: tensor of shape (batch_size, channels, width, height)
# forward propagate LSTM
lstm_out, self.hidden = self.lstm(X, self.hidden) # lstm_out: tensor of shape (batch_size, seq_length, hidden_size)
# Decode the hidden state of the last time step
out = self.FC(lstm_out[:, -1, :])
return out
please edit your post and add code. How did you initialize the hidden-state? What does you model look like.
hidden[0] is not your hidden-size, its the hidden-state of the lstm. The shape of the hidden-state has to be initialized like this:
hidden = ( torch.zeros((batch_size, layers, hidden_size)), torch.zeros((layers, batch_size, hidden_size)) )
You seem to have done this correctly. But the error tells you that you gave a batch of size 1 (because as you said you want to test with only one sample) but the hidden-state is initialized with batch-size=128.
So I guess (please add code) that you hard-coded that the batch-size = 128. Dont do that. Since you have to reinitialize the hidden-state every forward pass you can do this:
...
def forward(self, x):
batch_size = x.shape[0]
hidden = (torch.zeros(self.layers, batch_size, self.hidden_size).to(device=device), torch.zeros(self.layers, batch_size, self.hidden_size).to(device=device))
output, hidden = lstm(x, hidden)
# then do what every you want with the output
I guess that this is what causes this error but please post your code, too!
I'm using stacked Autoencoder, which is a bunch of Conv layers.
However, I'm having a tensor mismatch error, and I'm not sure about the reason. Everything done in the Encoder is reversed in the Decoder!
This is for time-series data. Input shape is (bactch_size, 1, 3000)
Here's the code
class CDAutoEncoder(nn.Module):
def __init__(self, input_size, output_size, kernel, stride):
super(CDAutoEncoder, self).__init__()
self.forward_pass = nn.Sequential(
nn.Conv1d(input_size, output_size, kernel_size=kernel, stride=stride, padding=0),
nn.PReLU(),
)
self.backward_pass = nn.Sequential(
nn.ConvTranspose1d(output_size, input_size, kernel_size=kernel, stride=stride, padding=0),
nn.PReLU(),
)
def forward(self, x):
y = self.forward_pass(x)
return y
def reconstruct(self, x):
return self.backward_pass(x)
class StackedAutoEncoder(nn.Module):
def __init__(self):
super(StackedAutoEncoder, self).__init__()
self.ae1 = CDAutoEncoder(1, 32, 50, 10)
self.ae2 = CDAutoEncoder(32, 64, 10, 3)
self.ae3 = CDAutoEncoder(64, 64, 5, 1)
def forward(self, x):
a1 = self.ae1(x)
a2 = self.ae2(a1)
a3 = self.ae3(a2)
return self.reconstruct(a3)
def reconstruct(self, x):
a2_reconstruct = self.ae3.reconstruct(x)
a1_reconstruct = self.ae2.reconstruct(a2_reconstruct)
x_reconstruct = self.ae1.reconstruct(a1_reconstruct)
return x_reconstruct
The error:
RuntimeError: The size of tensor a (2990) must match the size of tensor b (3000) at non-singleton dimension 2
I've tried adding padding and it worked, but when I changed the kernel size I get different tensor-size-mismatch-error.
Apparently, there's nothing like 'same' padding, so is there automated solution for this?
I was working with Sequence to Sequence models in Pytorch. Sequence to Sequence Models comprises of an Encoder and a Decoder.
The Encoder convert a (batch_size X input_features X num_of_one_hot_encoded_classes) -> (batch_size X input_features X hidden_size)
The Decoder will take this input sequence and convert it into (batch_size X output_features X num_of_one_hot_encoded_classes)
An example would be like-
So on the above example, I would need to convert the 22 input features to 10 output features. In Keras it could be done with a RepeatVector(10).
An Example -
model.add(LSTM(256, input_shape=(22, 98)))
model.add(RepeatVector(10))
model.add(Dropout(0.3))
model.add(LSTM(256, return_sequences=True))
Although, I'm not sure if it's the proper way to convert the input sequences into the output ones.
So, my question is -
What's the standard way to convert the input sequences to
output ones. eg. converting from (batch_size, 22, 98) -> (batch_size,
10, 98)? Or how should I prepare the Decoder?
Encoder Code snippet (Written in Pytorch) -
class EncoderRNN(nn.Module):
def __init__(self, input_size, hidden_size):
super(EncoderRNN, self).__init__()
self.hidden_size = hidden_size
self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size,
num_layers=1, batch_first=True)
def forward(self, input):
output, hidden = self.lstm(input)
return output, hidden
Well, you have to options, first one is to repeat the encoder's last state for 10 times and give it as input to the decoder, like this:
import torch
input = torch.randn(64, 22, 98)
encoder = torch.nn.LSTM(98, 256, batch_first=True)
encoded, _ = encoder(input)
decoder_input = encoded[:, -1:].repeat(1, 10, 1)
decoder = torch.nn.LSTM(256, 98, batch_first=True)
decoded, _ = decoder(decoder_input)
print(decoded.shape) #torch.Size([64, 10, 98])
Another option is to use an attention mechanism, like this:
#assuming we have obtained the encoded sequence and declared the decoder as before
attention_calculator = torch.nn.Conv1d(256+98, 1, kernel_size=1)
hidden = (torch.zeros(1, 64, 98), torch.zeros(1, 64, 98))
outputs = []
for i in range(10):
attention_input = torch.cat([hidden[0][0][:, None, :].expand(-1, 22, -1), encoded], dim=2).permute(0, 2, 1)
attention_value = torch.nn.functional.softmax(attention_calculator(attention_input).squeeze(), dim=1)
decoder_input = (attention_value[:, :, None] * encoded).sum(dim=1, keepdim=True)
output, hidden = decoder(decoder_input, hidden)
outputs.append(output)
outputs = torch.cat(outputs, dim=1)