Using LSTM stateful for passing context b/w batches; may be some error in context passing, not getting good results? - pytorch

I have checked the data before giving it to the network. The data is correct.
Using LSTM and passing the context b/w batches. per_class_accuracy is changing, but the loss is not going down. Been stuck for long, not sure if there is an error in the Code?
I have multi-class classification problem based upon an imbalanced dataset
Dataset_type: CSV
Dataset_size: 20000
Based upon CSV data of sensors
X = 0.6986111111111111,0,0,1,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,1,0,0,0
Y = leaveHouse
Per class accuracy:
{'leaveHouse': 0.34932855, 'getDressed': 1.0, 'idle': 0.8074534, 'prepareBreakfast': 0.8, 'goToBed': 0.35583413, 'getDrink': 0.0, 'takeShower': 1.0, 'useToilet': 0.0, 'eatBreakfast': 0.8857143}
Training:
# Using loss weights, the inverse of class frequency
criterion = nn.CrossEntropyLoss(weight = class_weights)
hn, cn = model.init_hidden(batch_size)
for i, (input, label) in enumerate(trainLoader):
hn.detach_()
cn.detach_()
input = input.view(-1, seq_dim, input_dim)
if torch.cuda.is_available():
input = input.float().cuda()
label = label.cuda()
else:
input = input.float()
label = label
# Forward pass to get output/logits
output, (hn, cn) = model((input, (hn, cn)))
# Calculate Loss: softmax --> cross entropy loss
loss = criterion(output, label)#weig pram
running_loss += loss
loss.backward() # Backward pass
optimizer.step() # Now we can do an optimizer step
optimizer.zero_grad() # Reset gradients tensors
Network
class LSTMModel(nn.Module):
def init_hidden(self, batch_size):
self.batch_size = batch_size
if torch.cuda.is_available():
hn = torch.zeros(self.layer_dim, self.batch_size, self.hidden_dim).cuda()
# Initialize cell state
cn = torch.zeros(self.layer_dim, self.batch_size, self.hidden_dim).cuda()
else:
hn = torch.zeros(self.layer_dim, self.batch_size, self.hidden_dim)
# Initialize cell state
cn = torch.zeros(self.layer_dim, self.batch_size, self.hidden_dim)
return hn, cn
def __init__(self, input_dim, hidden_dim, layer_dim, output_dim, seq_dim):
super(LSTMModel, self).__init__()
# Hidden dimensions
self.hidden_dim = hidden_dim
# Number of hidden layers
self.layer_dim = layer_dim
self.input_dim = input_dim
# Building your LSTM
# batch_first=True causes input/output tensors to be of shape
# (batch_dim, seq_dim, feature_dim)
self.lstm = nn.LSTM(self.input_dim, hidden_dim, layer_dim, batch_first=True)
# Readout layer
self.fc = nn.Linear(hidden_dim, output_dim)
self.relu = nn.ReLU()
self.softmax = nn.Softmax(dim=1)
self.seq_dim = seq_dim
def forward(self, inputs):
# Initialize hidden state with zeros
input, (hn, cn) = inputs
input = input.view(-1, self.seq_dim, self.input_dim)
# time steps
out, (hn, cn) = self.lstm(input, (hn, cn))
# Index hidden state of last time step
out = self.fc(out[:, -1, :])
out = self.softmax(out)
return out, (hn,cn)

One problem you might have is CrossEntropyLoss combines a log softmax operation with negative log likelihood loss, but you're applying a softmax in your model. You should pass the raw logits out of the final layer to CrossEntropyLoss.
Also I an't say without seeing the models forward pass, but it looks like you're applying the softmax on dimension 1 to a tensor that (I'm inferring) has shape batch_size, sequence_length, output_dim, when you should be applying it along the output dim.

Related

Question regarding latent space in Autoencoder

I am just start learning AE few days ago. From what I know about AE, a latent space will be created after the encoder and then the decoder will regenerate images based on the latent spaces. In my other project, it requires me to embed some new feature into the AE latent space. Below are the AE code that I have try.
AE module
# build autoencoder
import torch
import matplotlib.pyplot as plt
# Creating a PyTorch class
# 28*28 ==> 9 ==> 28*28
class AE(torch.nn.Module):
def __init__(self):
super().__init__()
# Building an linear encoder with Linear
# layer followed by Relu activation function
# 784 ==> 9
self.encoder = torch.nn.Sequential(
torch.nn.Linear(64 * 64, 256),
torch.nn.ReLU(),
torch.nn.Linear(256, 128),
torch.nn.ReLU(),
torch.nn.Linear(128, 32),
torch.nn.ReLU(),
torch.nn.Linear(32, 3)
)
# Building an linear decoder with Linear
# layer followed by Relu activation function
# The Sigmoid activation function
# outputs the value between 0 and 1
# 9 ==> 784
self.decoder = torch.nn.Sequential(
torch.nn.Linear(3,32),
torch.nn.ReLU(),
torch.nn.Linear(32, 128),
torch.nn.ReLU(),
torch.nn.Linear(128, 256),
torch.nn.ReLU(),
torch.nn.Linear(256, 64*64),
torch.nn.Sigmoid()
)
def forward(self, x):
encoded = self.encoder(x)
decoded = self.decoder(encoded)
return decoded
Init
# Model Initialization
model = AE().to(device)
# Validation using MSE Loss function
loss_function = torch.nn.MSELoss()
# Using an Adam Optimizer with lr = 0.1
optimizer = torch.optim.Adam(model.parameters(),
lr = 1e-1,
weight_decay = 1e-8)
training
num_epochs = 100
output =[]
for epoch in range(num_epochs):
for data in loader:
img, _ = data
img = img.reshape(-1,64*64)
img = img.to(device)
recon = model(img)
loss = loss_function(recon, img.data)
optimizer.zero_grad()
loss.backward()
optimizer.step()
print(f'epoch [{epoch + 1}/{num_epochs}], loss:{loss. Item(): .4f}')
output. Append((epoch,img,recon))
My question is may I know how can I obtain the latent space? From what I know about the code there is only encoder and decoder. How can I retrieve the latent space so that I can embed new feature to it? Thank you.
You can simply modify your forward(self, x) function to also return the laten space embedding generated by the encoder:
def forward(self, x):
encoded = self.encoder(x)
decoded = self.decoder(encoded)
return encoded, decoded
In your train loop you can just add:
embedding, recon = model(img)

PyTorch multi-class: ValueError: Expected input batch_size (416) to match target batch_size (32)

I have created a mutli-class classification neural network. Training, and validation iterators where created with BigBucketIterator method with fields {'text_normalized_tweet':TEXT, 'label': LABEL}
TEXT = a tweet
LABEL = a float number (with 3 values: 0,1,2)
Below I execute a dummy example of my neural network:
import torch.nn as nn
class MultiClassClassifer(nn.Module):
#define all the layers used in model
def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim):
#Constructor
super(MultiClassClassifer, self).__init__()
#embedding layer
self.embedding = nn.Embedding(vocab_size, embedding_dim)
#dense layer
self.hiddenLayer = nn.Linear(embedding_dim, hidden_dim)
#Batch normalization layer
self.batchnorm = nn.BatchNorm1d(hidden_dim)
#output layer
self.output = nn.Linear(hidden_dim, output_dim)
#activation layer
self.act = nn.Softmax(dim=1) #2d-tensor
#initialize weights of embedding layer
self.init_weights()
def init_weights(self):
initrange = 1.0
self.embedding.weight.data.uniform_(-initrange, initrange)
def forward(self, text, text_lengths):
embedded = self.embedding(text)
#packed sequence
packed_embedded = nn.utils.rnn.pack_padded_sequence(embedded, text_lengths, batch_first=True)
tensor, batch_size = packed_embedded[0], packed_embedded[1]
hidden_1 = self.batchnorm(self.hiddenLayer(tensor))
return self.act(self.output(hidden_1))
Instantiate the model
INPUT_DIM = len(TEXT.vocab)
EMBEDDING_DIM = 100
HIDDEN_DIM = 64
OUTPUT_DIM = 3
model = MultiClassClassifer(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM)
When I call
text, text_lengths = batch.text_normalized_tweet
predictions = model(text, text_lengths).squeeze()
loss = criterion(predictions, batch.label)
it returns,
ValueError: Expected input batch_size (416) to match target batch_size (32).
model(text, text_lengths).squeeze() = torch.Size([416, 3])
batch.label = torch.Size([32])
I can see that the two objects have different sizes, but I have no clue how to fix this?
You may find the Google Colab notebook here
Shapes of each in, out tensor of my forward() method:
torch.Size([32, 10, 100]) #self.embedding(text)
torch.Size([320, 100]) #nn.utils.rnn.pack_padded_sequence(embedded, text_lengths, batch_first=True)
torch.Size([320, 64]) #self.batchnorm(self.hiddenLayer(tensor))
torch.Size([320, 3]) #self.act(self.output(hidden_1))
You shouldn't be using the squeeze function after the forward pass, that doesn't make sense.
After removing the squeeze function, as you see, the shape of your final output is [320,3] whereas it is expecting [32,3]. One way to fix this is to average out the embeddings you obtain for each word after the self.Embedding function like shown below:
def forward(self, text, text_lengths):
embedded = self.embedding(text)
embedded = torch.mean(embedded, dim=1, keepdim=True)
packed_embedded = nn.utils.rnn.pack_padded_sequence(embedded, text_lengths, batch_first=True)
tensor, batch_size = packed_embedded[0], packed_embedded[1]
hidden_1 = self.batchnorm(self.hiddenLayer(tensor))
return self.act(self.output(hidden_1))

Extracting Autoencoder features from the hidden layer

I have developed some code to apply Autoencoder on my dataset, in order to extract hidden features from it. I have a dataset that consists of 84 variables, and they have been normalised.
epochs = 10
batch_size = 128
lr = 0.008
# Convert Input and Output data to Tensors and create a TensorDataset
input = torch.Tensor(input.to_numpy())
output = torch.tensor(output.to_numpy())
data = torch.utils.data.TensorDataset(input, output)
# Split to Train, Validate and Test sets using random_split
number_rows = len(input) # The size of our dataset or the number of rows in excel table.
test_split = int(number_rows*0.3)
train_split = number_rows - test_split
train_set, test_set = random_split(data, [train_split, test_split])
# Create Dataloader to read the data within batch sizes and put into memory.
train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle = True)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=batch_size)
The model structure:
# Model structure
class AutoEncoder(nn.Module):
def __init__(self):
super(AutoEncoder, self).__init__()
# Encoder
self.encoder = nn.Sequential(
nn.Linear(84, 128),
nn.Tanh(),
nn.Linear(128, 64),
nn.Tanh(),
nn.Linear(64, 16),
nn.Tanh(),
nn.Linear(16, 2),
)
# Decoder
self.decoder = nn.Sequential(
nn.Linear(2, 16),
nn.Tanh(),
nn.Linear(16, 64),
nn.Tanh(),
nn.Linear(64, 128),
nn.Tanh(),
nn.Linear(128, 84),
nn.Sigmoid()
)
def forward(self, inputs):
codes = self.encoder(inputs)
decoded = self.decoder(codes)
return codes, decoded
Optimiser and Loss function
# Optimizer and loss function
model = AutoEncoder()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
loss_function = nn.MSELoss()
The training steps:
# Train
for epoch in range(epochs):
for data, labels in train_loader:
inputs = data.view(-1, 84)
# Forward
codes, decoded = model(inputs)
# Backward
optimizer.zero_grad()
loss = loss_function(decoded, inputs)
loss.backward()
optimizer.step()
# Show progress
print('[{}/{}] Loss:'.format(epoch+1, epochs), loss.item())
The Autoencoder model is saved as:
# Save
torch.save(model,'autoencoder.pth')
At this point, I would like to ask some help to understand how I could extract the features from the hidden layer. These features extracted from the hidden layer will be used in another classification algorithm.
You need to place an hook to your model. And you can use this hook to extract features from any layer. However it is a lot easier if you don't use nn.Sequential because it combines the layer together and they act as one. I run your code using this function:
There is a function for Feature Extraction which basically takes model as an input and place a hook using index of layer.
class FE(nn.Module):
def __init__(self,model_instance, output_layers, *args):
super().__init__(*args)
self.output_layers = output_layers
self.selected_out = OrderedDict()
self.pretrained = model_instance
self.fhooks = []
print("model_instance._modules.keys():",model_instance._modules.keys())
for i,l in enumerate(list(self.pretrained._modules.keys())):
print("index:",i, ", keys:",l )
if i in self.output_layers:
print("------------------------ > Hook is placed output of :" , l )
self.fhooks.append(getattr(self.pretrained,l).register_forward_hook(self.forward_hook(l)))
def forward_hook(self,layer_name):
def hook(module, input, output):
self.selected_out[layer_name] = output
return hook
def forward(self, x):
out = self.pretrained(x,None)
return out, self.selected_out
And to use:
model_hooked=FE(model ,output_layers = [0])
model_instance._modules.keys(): odict_keys(['encoder', 'decoder'])
index: 0 , keys: encoder
------------------------ > Hook is placed output of : encoder
index: 1 , keys: decoder
After placing the hook you can simply put data to new hooked model and it will output 2 values.First one is original output from last layer and second output will be the output from hooked layer
out, layerout = model_hooked(data_sample)
If you want to extract features from a loaders you can use this function:
def extract_features(FE ,layer_name, train_loader, test_loader):
extracted_features=[]
lbls=[]
extracted_features_test=[]
lbls_test=[]
for data , target in train_loader:
out, layerout = FE(data)
a=layerout[layer_name]
extracted_features.extend(a)
lbls.extend(target)
for data , target in test_loader:
out, layerout = FE(data)
a=layerout[layer_name]
extracted_features_test.extend(a)
lbls_test.extend(target)
extracted_features = torch.stack(extracted_features)
extracted_features_test = torch.stack(extracted_features_test)
lbls = torch.stack(lbls)
lbls_test = torch.stack(lbls_test)
return extracted_features, lbls ,extracted_features_test, lbls_test
And usage is like this :
Features_TRAINLOADER , lbls , Features_TESTLOADER, lbls_test =extract_features(model_hooked, "encoder", train_loader, test_loader)

How to create a data preprocessing pipeline in pytorch outside the Dataloader class?

I am trying to make a model for data with 40 features which have to classified into 10 classes. I am new to PyTorch and this is my first project in it.
I am given a custom Dataset class (which I am not allowed to change) which is as follows:
class MyData(Dataset):
def _init_(self, mode):
with open(mode+'.pkl', 'rb') as handle:
data = pickle.load(handle)
self.X = data['x'].astype('float')
self.y = data['y'].astype('long')
def _len_(self):
return len(self.X)
def _getitem_(self, idx):
if torch.is_tensor(idx):
idx = idx.tolist()
sample = (self.X[idx], self.y[idx])
return sample
I have done some preprocessing on the data like normalization and then trained and saved the model. As I wasn't allowed to change the dataset class, I made the changes outside of it and then used the DataLoader method. The preprocessing is as follows :
train_data=MyData("train")
features, labels = train_data[:]
df = pd.DataFrame(features)
x = df.values
min_max_scaler = preprocessing.MinMaxScaler()
x_scaled = min_max_scaler.fit_transform(x)
input_array = x_scaled
output_array = labels
inputs = torch.Tensor(input_array)
targets = torch.Tensor(output_array).type(torch.LongTensor)
dataset = TensorDataset(inputs, targets)
train_ds, val_ds = random_split(dataset, [3300, 300])
batch_size = 300
n_epochs = 200
log_interval = 10
train_losses = []
train_counter = []
test_losses = []
train_loader = DataLoader(train_ds, batch_size, shuffle=True)
val_loader = DataLoader(val_ds, batch_size)
test_counter = [i*len(train_loader.dataset) for i in range(n_epochs + 1)]
After this I define the training and testing functions ( and remove the print statements as the autograder will not be able to grade my assignment if I do so) as follows:
def train(epoch):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
optimizer.zero_grad()
output = model(data.double())
loss = criterion(output, target)
loss.backward()
optimizer.step()
if batch_idx % log_interval == 0:
train_losses.append(loss.item())
train_counter.append(
(batch_idx*32) + ((epoch-1)*len(train_loader.dataset)))
save_model(model)
def test():
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
for data, target in val_loader:
output = model(data.double())
test_loss += criterion(output, target).item()
pred = output.data.max(1, keepdim=True)[1]
correct += pred.eq(target.data.view_as(pred)).sum()
test_loss /= len(val_loader.dataset)
test_losses.append(test_loss)
test()
for epoch in range(1, n_epochs + 1):
train(epoch)
test()
Even after doing that, the autograder is still not able to grade my code. I mainly think it's because maybe I am making an error with how I input the data to the model but I am not able to narrow down to what exactly is the problem and how do I correct it. As I'm new to pytorch, I was looking at how to do the preprocessing but all of them involved the Dataset Class so I'm not sure how to go about it.
My model is as follows:
class MyModel(nn.Module):
def __init__(self):
super(MyModel, self).__init__()
#self.flatten=nn.Flatten()
self.net_stack=nn.Sequential(
nn.Conv1d(in_channels=40, out_channels=256, kernel_size=1, stride=2), #applying batch norm
nn.ReLU(),
nn.MaxPool1d(kernel_size=1),
nn.Dropout(p=0.1),
nn.BatchNorm1d(256, affine=True),
nn.Conv1d(in_channels=256, out_channels=128, kernel_size=1, stride=2), #applying batch norm
nn.ReLU(),
nn.MaxPool1d(kernel_size=1),
nn.Dropout(p=0.1),
nn.BatchNorm1d(128, affine=True),
nn.Conv1d(in_channels=128, out_channels=64, kernel_size=1, stride=2), #applying batch norm
nn.ReLU(),
nn.MaxPool1d(kernel_size=1),
nn.Dropout(p=0.1),
nn.BatchNorm1d(64, affine=True),
nn.Conv1d(in_channels=64, out_channels=32, kernel_size=1, stride=2), #applying batch norm
nn.ReLU(),
nn.MaxPool1d(kernel_size=1),
nn.Dropout(p=0.1),
nn.BatchNorm1d(32, affine=True),
nn.Flatten(),
nn.Linear(32, 10),
nn.Softmax(dim=1)).double()
def forward(self,x):
# result=self.net_stack(x[None])
x=x.double()
result=self.net_stack(x[:, :, None]).double()
print(result.size())
return result
One instruction I've got is that they've written:
# Please make sure we can load your model with:
# model = MyModel()
# This means you must give default values to all parameters you may wish to set, such as output size.
You can try to do it within the training loop
for batch_idx, (data, target) in enumerate(train_loader):
# you can do something here to manipulate your input
data = transform(data)
data.to('cuda') # Move to gpu, i noticed you didnt do it in your training loop
# Forward pass
output = model(data)

Expected input batch_size (18) to match target batch_size (6)

Is RNN for image classification available only for gray image?
The following program works for gray image classification.
If RGB images are used, I have this error:
Expected input batch_size (18) to match target batch_size (6)
at this line loss = criterion(outputs, labels).
My data loading for train, valid and test are as follows.
input_size = 300
inputH = 300
inputW = 300
#Data transform (normalization & data augmentation)
stats = ((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
train_resize_tfms = tt.Compose([tt.Resize((inputH, inputW), interpolation=2),
tt.ToTensor(),
tt.Normalize(*stats)])
train_tfms = tt.Compose([tt.Resize((inputH, inputW), interpolation=2),
tt.RandomHorizontalFlip(),
tt.ToTensor(),
tt.Normalize(*stats)])
valid_tfms = tt.Compose([tt.Resize((inputH, inputW), interpolation=2),
tt.ToTensor(),
tt.Normalize(*stats)])
test_tfms = tt.Compose([tt.Resize((inputH, inputW), interpolation=2),
tt.ToTensor(),
tt.Normalize(*stats)])
#Create dataset
train_ds = ImageFolder('./data/train', train_tfms)
valid_ds = ImageFolder('./data/valid', valid_tfms)
test_ds = ImageFolder('./data/test', test_tfms)
from torch.utils.data.dataloader import DataLoader
batch_size = 6
#Training data loader
train_dl = DataLoader(train_ds, batch_size, shuffle = True, num_workers = 8, pin_memory=True)
#Validation data loader
valid_dl = DataLoader(valid_ds, batch_size, shuffle = True, num_workers = 8, pin_memory=True)
#Test data loader
test_dl = DataLoader(test_ds, 1, shuffle = False, num_workers = 1, pin_memory=True)
My model is as follows.
num_steps = 300
hidden_size = 256 #size of hidden layers
num_classes = 5
num_epochs = 20
learning_rate = 0.001
# Fully connected neural network with one hidden layer
num_layers = 2 # 2 RNN layers are stacked
class RNN(nn.Module):
def __init__(self, input_size, hidden_size, num_layers, num_classes):
super(RNN, self).__init__()
self.num_layers = num_layers
self.hidden_size = hidden_size
self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True, dropout=0.2)#batch must have first dimension
#our inpyt needs to have shape
#x -> (batch_size, seq, input_size)
self.fc = nn.Linear(hidden_size, num_classes)#this fc is after RNN. So needs the last hidden size of RNN
def forward(self, x):
#according to ducumentation of RNN in pytorch
#rnn needs input, h_0 for inputs at RNN (h_0 is initial hidden state)
#the following one is initial hidden layer
h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)#first one is number of layers and second one is batch size
#output has two outputs. The first tensor contains the output features of the hidden last layer for all time steps
#the second one is hidden state f
out, _ = self.rnn(x, h0)
#output has batch_size, num_steps, hidden size
#we need to decode hidden state only the last time step
#out (N, 30, 128)
#Since we need only the last time step
#Out (N, 128)
out = out[:, -1, :] #-1 for last time step, take all for N and 128
out = self.fc(out)
return out
stacked_rnn_model = RNN(input_size, hidden_size, num_layers, num_classes).to(device)
# Loss and optimizer
criterion = nn.CrossEntropyLoss()#cross entropy has softmax at output
#optimizer = torch.optim.Adam(stacked_rnn_model.parameters(), lr=learning_rate) #optimizer used gradient optimization using Adam
optimizer = torch.optim.SGD(stacked_rnn_model.parameters(), lr=learning_rate)
# Train the model
n_total_steps = len(train_dl)
for epoch in range(num_epochs):
t_losses=[]
for i, (images, labels) in enumerate(train_dl):
# origin shape: [6, 3, 300, 300]
# resized: [6, 300, 300]
images = images.reshape(-1, num_steps, input_size).to(device)
print('images shape')
print(images.shape)
labels = labels.to(device)
# Forward pass
outputs = stacked_rnn_model(images)
print('outputs shape')
print(outputs.shape)
loss = criterion(outputs, labels)
t_losses.append(loss)
# Backward and optimize
optimizer.zero_grad()
loss.backward()
optimizer.step()
Printing images and outputs shapes are
images shape
torch.Size([18, 300, 300])
outputs shape
torch.Size([18, 5])
Where is the mistake?
Tl;dr: You are flattening the first two axes, namely batch and channels.
I am not sure you are taking the right approach but I will write about that layer.
In any case, let's look at the issue you are facing. You have a data loader that produces (6, 3, 300, 300), i.e. batches of 6 three-channel 300x300 images. By the look of it you are looking to reshape each batch element (3, 300, 300) into (step_size=300, -1).
However instead of that you are affecting the first axis - which you shouldn't - with images.reshape(-1, num_steps, input_size). This will have the desired effect when working with a single-channel images since dim=1 wouldn't be the "channel axis". In your case your have 3 channels, therefore, the resulting shape is: (6*3*300*300//300//300, 300, 300) which is (18, 300, 300) since num_steps=300 and input_size=300. As a result you are left with 18 batch elements instead of 6.
Instead what you want is to reshape with (batch_size, num_steps, -1). Leaving the last axis (a.k.a. seq_length) of variable size. This will result in a shape (6, 300, 900).
Here is a corrected and reduced snippet:
batch_size = 6
channels = 3
inputH, inputW = 300, 300
train_ds = TensorDataset(torch.rand(100, 3, inputH, inputW), torch.rand(100, 5))
train_dl = DataLoader(train_ds, batch_size)
class RNN(nn.Module):
def __init__(self, input_size, hidden_size, num_layers, num_classes):
super(RNN, self).__init__()
# (batch_size, seq, input_size)
self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
# (batch_size, hidden_size)
self.fc = nn.Linear(hidden_size, num_classes)
# (batch_size, num_classes)
def forward(self, x):
out, _ = self.rnn(x)
out = out[:, -1, :]
out = self.fc(out)
return out
num_steps = 300
input_size = inputH*inputW*channels//num_steps
hidden_size = 256
num_classes = 5
num_layers = 2
rnn = RNN(input_size, hidden_size, num_layers, num_classes)
for x, y in train_dl:
print(x.shape, y.shape)
images = images.reshape(batch_size, num_steps, -1)
print(images.shape)
outputs = rnn(images)
print(outputs.shape)
break
As I said in the beginning I am a bit wary about this approach because you are essentially feeding your RNN a RGB 300x300 image in the form of a sequence of 300 flattened vectors... I can't say if that makes sense and terms of training and if the model will be able to learn from that. I could be wrong!

Resources