"IndexError: tensors used as indices must be long, byte or bool tensors" Pytorch - pytorch

The dataset is a custom torch.geometric dataset
inv_mask = ~mask
--> 224 loop_attr[edge_index[0][inv_mask]] = edge_attr[inv_mask]
225
226 edge_attr = torch.cat([edge_attr[mask], loop_attr], dim=0)
IndexError: tensors used as indices must be long, byte or bool tensors
Code:-
from torch_geometric.nn import GCNConv
class GCN(torch.nn.Module):
def __init__(self, hidden_channels):
super().__init__()
torch.manual_seed(13213)
self.conv1 = GCNConv(dataset.num_features, hidden_channels)
self.conv2 = GCNConv(hidden_channels, num_classes)
def forward(self,x, edge_index):
x = self.conv1(x, edge_index)
x = x.relu()
x = F.dropout(x, p=0.5, training = self.training)
x = self.conv2(x, edge_index)
return x
model = GCN(hidden_channels = 16)
dataset.train_mask = torch.tensor([range(0,14000)]).type(torch.bool)
dataset.test_mask = torch.tensor([range(14000, 22470)]).type(torch.bool)
optimizer = torch.optim.Adam(model.parameters(), lr = 0.01, weight_decay = 5e-4)
criterion = torch.nn.CrossEntropyLoss()
dataset.y = dataset.y.float()
dataset.x = dataset.x.float()
dataset.edge_index = dataset.edge_index.float()
def train():
model.train()
optimizer.zero_grad()
out = model(dataset.x, dataset.edge_index)
loss = criterion(out[dataset.train_mask], dataset.y[dataset.train_mask])
loss.backward()
optimizer.step()
return loss
def test():
model.eval()
out = model(dataset.x, dataset.edge_index)
# pred = out.argmax(dim=1)
test_correct = out[dataset.test_mask] == dataset.y[dataset.test_mask]
test_acc = int(test_correct.sum()) / int(data.test_mask.sum())
return test_acc
for e in range(1,101):
loss = train()
print(f'Epoch: {epoch:02d}, Loss: {loss:.3f}')
The error points to optimizer.zero_grad()
could anyone please explain how to debug code in pytorch, since i used tensorflow for almost every deep learning task I did but when it came to GNN I felt torch geometric would be a viable option.
Please help me get ahead of this error and also suggest ways for me to improve the code ....

Related

LSTM multi-sequence input to one sequence output

I am new with neural networks and am currently trying to make an LSTM model that predicts an output sequence based on multiple parameters. Excuse my ignorance and dummyness in advance.
I have obtained training and validation datasets, which look somewhat like the following:
For every ID four rows are recorded, which uses columns holding certain parameters and the corresponding Y output. Practically, there are thus ~122,000 / 4 = ~30,500 samples (I mistakenly put 122,000 as ID, it is in fact the number of rows). Since the parameter values and the corresponding Y values follow temporal patterns, I am interested if a model such as LSTM improves the prediction.
I want to predict the Y in my validation dataset (~73,000/4 = ~18,000 samples), based on the temporal patterns of the parameters. But is this possible? Most tutorials I followed use a single sequence, for which an LSTM is used to extend a similar input sequence. I thus want an LSTM with 'multi-sequence' input, which outputs one sequence. How do I go about this?
I'm using PyTorch as framework. A simple LSTM model I created using a tutorial, which would not incorporate the parameters:
training_y = traindf.reset_index()['Y']
validation_y = traindf.reset_index()['Y']
Then create a dataset for this:
class YDataset(Dataset):
def __init__(self, data, seq_len = 100):
self.data = data
self.data = torch.from_numpy(data).float().view(-1)
self.seq_len = seq_len
def __len__(self):
return len(self.data)-self.seq_len-1
def __getitem__(self,index):
return self.data[index : index+self.seq_len] , self.data[index+self.seq_len]
train_y = YDataset(training_y_df)
vali_y = YDataset(validation_y_df)
batch_size = 64
train_dataloader = DataLoader(train_y, batch_size, drop_last=True)
vali_dataloader = DataLoader(vali_y, batch_size, drop_last=True)
device = "cuda" if torch.cuda.is_available() else "cpu"
Then create the model:
class Lstm_model(nn.Module):
def __init__(self, input_dim, hidden_size, num_layers):
super(Lstm_model, self).__init__()
self.num_layers = num_layers
self.input_size = input_dim
self.hidden_size = hidden_size
self.lstm = nn.LSTM(input_size=input_dim, hidden_size = hidden_size, num_layers = num_layers)
self.fc = nn.Linear(hidden_size, 1)
def forward(self,x,hn,cn):
out , (hn,cn) = self.lstm(x, (hn, cn))
final_out = self.fc(out[-1])
return final_out, hn,cn
def predict(self,x):
hn, cn = self.init()
final_out = self.fc(out[-1])
return final_out
def init(self):
h0 = torch.zeros(self.num_layers, batch_size, self.hidden_size).to(device)
c0 = torch.zeros(self.num_layers, batch_size, self.hidden_size).to(device)
return h0 , c0
input_dim = 1
hidden_size = 50
num_layers = 3
model = Lstm_model(input_dim , hidden_size , num_layers).to(device)
Loss function and training loop (more or less same as for validation):
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
def train(dataloader):
hn, cn = model.init()
model.train()
for batch , item in enumerate(dataloader):
x , y = item
x = x.to(device)
y = y.to(device)
out , hn , cn = model(x.reshape(100,batch_size,1),hn,cn)
loss = loss_fn(out.reshape(batch_size), y)
hn = hn.detach()
cn = cn.detach()
optimizer.zero_grad()
loss.backward()
optimizer.step()
if batch == len(dataloader)-1:
loss = loss.item
print(f"train loss: {loss:>7f} ")
Epochs and loss metrics:
epochs = 200 # Takes really long for me
for epoch in range(epochs):
print(f"epoch {epoch} ")
train(train_dataloader)
test(vali_dataloader)
Final metrics:
import math
from sklearn.metrics import mean_squared_error
import numpy as np
def calculate_metrics(data_loader):
pred_arr = []
y_arr = []
with torch.no_grad():
hn , cn = model.init()
for batch , item = in enumerate(data_loader):
x , y = item
x , y = x.to(device) , y.to(device)
x = x.view(100,64,1)
pred = model(x, hn, cn)[0]
pred = scalar.inverse_transform(pred.detach().cpu().numpy().reshape(-1))
y = scalar.inverse_transform(y.detach().cpu().numpy().reshape(1,-1)).reshape(-1)
pred_arr = pred_arr + list(pred)
y_arr = y_arr + list(y)
return math.sqrt(mean_squared_error(y_arr,pred_arr))
I used this code more as an example of how LSTM would work. Nevertheless, I don't know if this is the right track for me. Does someone know what I should do or a tutorial that does work for my example? Thanks in advance!

Loss Not Decreasing for a Bert from Scratch PyTorch Model

I followed Aladdin Persson's Youtube video to code up just the encoder portion of the transformer model in PyTorch, except I just used the Pytorch's multi-head attention layer. The model seems to produce the correct shape of data. However, during training, the training loss does not drop and the resulting model always predicts the same output of 0.4761. Dataset used for training is from the Sarcasm Detection Dataset from Kaggle. Would appreciate any help you guys can give on errors that I have made.
import pandas as pd
from transformers import BertTokenizer
import torch.nn as nn
import torch
from sklearn.model_selection import train_test_split
from torch.optim.lr_scheduler import ReduceLROnPlateau
import math
df = pd.read_json("Sarcasm_Headlines_Dataset_v2.json", lines=True)
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
encoded_input = tokenizer(df['headline'].tolist(), return_tensors='pt',padding=True)
X = encoded_input['input_ids']
y = torch.tensor(df['is_sarcastic'].values).float()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify = y)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
torch.cuda.empty_cache()
class TransformerBlock(nn.Module):
def __init__(self,embed_dim, num_heads, dropout, expansion_ratio):
super(TransformerBlock, self).__init__()
self.attention = nn.MultiheadAttention(embed_dim, num_heads)
self.norm1 = nn.LayerNorm(embed_dim)
self.norm2 = nn.LayerNorm(embed_dim)
self.feed_forward = nn.Sequential(
nn.Linear(embed_dim, expansion_ratio*embed_dim),
nn.ReLU(),
nn.Linear(expansion_ratio*embed_dim,embed_dim)
)
self.dropout = nn.Dropout(dropout)
def forward(self, value, key, query):
attention, _ = self.attention(value, key, query)
x=self.dropout(self.norm1(attention+query))
forward = self.feed_forward(x)
out=self.dropout(self.norm2(forward+x))
return out
class Encoder(nn.Module):
#the vocab size is one more than the max value in the X matrix.
def __init__(self,vocab_size=30109,embed_dim=128,num_layers=1,num_heads=4,device="cpu",expansion_ratio=4,dropout=0.1,max_length=193):
super(Encoder,self).__init__()
self.device = device
self.word_embedding = nn.Embedding(vocab_size,embed_dim)
self.position_embedding = nn.Embedding(max_length,embed_dim)
self.layers = nn.ModuleList(
[
TransformerBlock(embed_dim,num_heads,dropout,expansion_ratio) for _ in range(num_layers)
]
)
self.dropout = nn.Dropout(dropout)
self.classifier1 = nn.Linear(embed_dim,embed_dim)
self.classifier2 = nn.Linear(embed_dim,1)
self.relu = nn.ReLU()
def forward(self,x):
N, seq_length = x.shape
positions = torch.arange(0,seq_length).expand(N, seq_length).to(self.device)
out = self.dropout(self.word_embedding(x) + self.position_embedding(positions))
for layer in self.layers:
#print(out.shape)
out = layer(out,out,out)
#Get the first output for classification
#Pooled output from hugging face is: Last layer hidden-state of the first token of the sequence (classification token) further processed by a Linear layer and a Tanh activation function.
#Pooled output from hugging face will be different from out[:,0,:], which is the output from the CLS token.
out = self.relu(self.classifier1(out[:,0,:]))
out = self.classifier2(out)
return out
torch.cuda.empty_cache()
net = Encoder(device=device)
net.to(device)
batch_size = 32
num_train_samples = X_train.shape[0]
num_val_samples = X_test.shape[0]
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(net.parameters(),lr=1e-5)
scheduler = ReduceLROnPlateau(optimizer, 'min', patience=5)
val_loss_hist=[]
loss_hist=[]
epoch = 0
min_val_loss = math.inf
print("Training Started")
patience = 0
for _ in range(100):
epoch += 1
net.train()
epoch_loss = 0
permutation = torch.randperm(X_train.size()[0])
for i in range(0,X_train.size()[0], batch_size):
indices = permutation[i:i+batch_size]
features=X_train[indices].to(device)
labels=y_train[indices].reshape(-1,1).to(device)
output = net.forward(features)
loss = criterion(output, labels)
optimizer.zero_grad()
loss.backward()
optimizer.step()
epoch_loss+=loss.item()
epoch_loss = epoch_loss / num_train_samples * num_val_samples
loss_hist.append(epoch_loss)
#print("Eval")
net.eval()
epoch_val_loss = 0
permutation = torch.randperm(X_test.size()[0])
for i in range(0,X_test.size()[0], batch_size):
indices = permutation[i:i+batch_size]
features=X_test[indices].to(device)
labels = y_test[indices].reshape(-1,1).to(device)
output = net.forward(features)
loss = criterion(output, labels)
epoch_val_loss+=loss.item()
val_loss_hist.append(epoch_val_loss)
scheduler.step(epoch_val_loss)
#if epoch % 5 == 0:
print("Epoch: " + str(epoch) + " Train Loss: " + format(epoch_loss, ".4f") + ". Val Loss: " + format(epoch_val_loss, ".4f") + " LR: " + str(optimizer.param_groups[0]['lr']))
if epoch_val_loss < min_val_loss:
min_val_loss = epoch_val_loss
torch.save(net.state_dict(), "torchmodel/weights_best.pth")
print('\033[93m'+"Model Saved"+'\033[0m')
patience = 0
else:
patience += 1
if (patience == 10):
break
print("Training Ended")

Inference on Multiple Label Sentiment analysis using pytorch_lightning

I have trained an LSTM model for sentiment analysis using pytorch_lightning but I've been having difficulties incorporating the inference.
This is my model:
class LSTM(pl.LightningModule):
def __init__(self,n_vocab,n_embed,
n_hidden,n_output,n_layers,learning_rate,embedding_matrix=None):
super().__init__()
self.n_vocab = n_vocab
self.n_layer = n_layers
self.n_hidden = n_hidden
self.embedding = nn.Embedding(n_vocab, n_embed, padding_idx = 0)
if embedding_matrix is not None:
self.embedding.weight = nn.Parameter(torch.tensor(embedding_matrix, dtype=torch.float32))
self.embedding.weight.requires_grad = False
self.lstm = nn.LSTM(n_embed, n_hidden, n_layers, batch_first = True, bidirectional = True)
self.fc = nn.Linear(2 * n_hidden, n_output)
self.dropout = nn.Dropout(0.2)
self.sigmoid = nn.Sigmoid()
self.batch_size = batch_size
self.learning_rate = learning_rate
def forward(self,input_words):
embedded_words = self.embedding(input_words)
lstm_out, _ = self.lstm(embedded_words)
lstm_out_f=lstm_out[:,-1 , :300 ]
lstm_out_b=lstm_out[:, 0 , 300: ]
lstm_out_final = torch.cat([lstm_out_f,lstm_out_b], dim=-1)
lstm_out_final = self.dropout(lstm_out_final)
fc_out = self.fc(lstm_out_final)
return fc_out
def configure_optimizers(self):
optimizer = torch.optim.Adam(self.parameters(), lr = self.learning_rate)
return optimizer
def training_step(self, batch, batch_nb):
x , y= batch
y_hat = self(x)
loss = F.cross_entropy(y_hat, y)
self.log("train_loss", loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
return loss
def validation_step(self, batch, batch_nb):
x, y = batch
y_hat = self(x)
loss = F.cross_entropy(y_hat, y).to(device = 'cuda')
f1 = torchmetrics.F1(num_classes=5).to(device = 'cuda')
f1_score = f1(y_hat, y)
accuracy = Accuracy().to(device = 'cuda')
accur = accuracy(y_hat, y)
self.log("val_loss", loss)
self.log("f1_score", f1_score)
self.log("accuracy", accur)
def test_step(self, batch, batch_idx):
x, y = batch
logits = self(x)
print(logits)
logitsies = softmax(logits)
choice = argmax(logitsies)
loss = F.nll_loss(logits, y)
self.log("test_loss", loss)
return choice
def predict_step(self, batch, batch_idx, dataloader_idx):
x, y = batch
x = x.view(x.size(0), -1)
y_hat = self(x)
logitsies = softmax(logits)
choice = argmax(logitsies)
loss = F.nll_loss(logits, y)
self.log("predict_loss", loss)
return choice
I called the model as such:
model = LSTM(
n_vocab=size_of_emb_matrix,
n_embed=embed_vector_len,
n_hidden=150,
n_output=5,
n_layers=1,
learning_rate=1e-4,
embedding_matrix=embedding_matrix
)
Now I am trying to write a function that would allow me to do inference. I have managed to succesfuly tokenize the input sentence and encode it through an already pre-defined functions, yet I have been getting several errors no matter what I try. I have found myself stuck and don't know how to continue. This is my function so far.
def get_sentiment(text):
x = encode_sentence(text,vocab2index)
x_bar = x[0]
y_hat = torch.tensor(x_bar)
trainer.predict(model,y_hat)
The encode_sentence function is as follows:
def encode_sentence(text, vocab2index, N=70):
tokenized = tokenize(text)
encoded = np.zeros(N, dtype=int)
enc1 = np.array([vocab2index.get(word, vocab2index["UNK"]) for word in tokenized])
length = min(N, len(enc1))
encoded[:length] = enc1[:length]
return encoded, length
As I call the get_sentiment function, I am using the trainer.predict function which allows me to predict, hence doing inference.
But I have been getting the following Issue:
AttributeError Traceback (most recent call
last)
<ipython-input-195-825c536cbbb2> in <module>()
----> 1 get_sentiment("love that dress")
13 frames
/usr/local/lib/python3.7/dist-
packages/pytorch_lightning/loops/epoch/prediction_epoch_loop.py in
_store_batch_indices(self, dataloader_idx)
162 def _store_batch_indices(self, dataloader_idx: int) -> None:
163 """Stores the batch indices if the predictions should be
stored"""
--> 164 batch_sampler =
self.trainer.predict_dataloaders[dataloader_idx].batch_sampler
165 if isinstance(batch_sampler, IndexBatchSamplerWrapper):
166 self.current_batch_indices = batch_sampler.batch_indices
AttributeError: 'Tensor' object has no attribute 'batch_sampler'

Building Autoencoder with Softmax classifier - Input mismatch error

I am trying to train an auto-encoder with a softmax classifier to replicate the results in this paper Intriguing properties of neural networks.
My implementation is the following:
n_embedded = 400
class AE400_10(nn.Module):
def __init__(self):
super(AE400_10, self).__init__()
self.encoder = nn.Sequential(nn.Linear(28*28, n_embedded), nn.Sigmoid())
self.decoder = nn.Sequential(nn.Linear(n_embedded, 28*28))
self.classifier = nn.Sequential(nn.Linear(28*28, 10))
def forward(self, x):
x = x.view(-1, 28*28)
encoded = self.encoder(x)
decoded = self.decoder(encoded)
out = self.classifier(decoded) ##NEW UPDATED
return decoded, F.log_softmax(out)
For the training I have the following:
model = AE400_10().to(device)
criterion1 = nn.MSELoss()
criterion2 = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)
for epoch in range(epochs):
total_batch = len(train_set) // batch_size_train
for batch_idx, (data, target) in enumerate(MNSIT_train):
X = data.to(device)
Y = target.to(device)
optimizer.zero_grad()
decoded, out = model(X)
loss1 = criterion1(decoded, inputs)
loss2 = criterion2(out, labels)
loss = loss1 + loss2
loss.backward()
optimizer.step()
if (batch_idx+1) % 100 == 0:
print('Epoch [%d/%d], lter [%d/%d], Loss: %.4f'%(epoch+1, epochs, batch_idx+1, total_batch, cost.item()))
But I am getting the following error:
RuntimeError: size mismatch, m1: [128 x 400], m2: [784 x 10] at
/Users/soumith/mc3build/conda-bld/pytorch_1549593514549/work/aten/src/TH/generic/THTensorMath.cpp:940
I understand this is an error in the dimension but I am not sure why it is happening.
::UPDATE::
I fixed the input to the classifier based on the comments below and now I am getting the following error:
RuntimeError: The size of tensor a (784) must match the size of tensor
b (28) at non-singleton dimension 3
I don't use nn.Sequential so I'm not sure why exactly this happens but if you
replace
self.classifier = nn.Sequential(nn.Linear(28*28, 10))
with
self.classifier = nn.Linear(28*28, 10)
your code works
-->
import torch
import torch.nn as nn
import torch.nn.functional as F
n_embedded = 400
class AE400_10(nn.Module):
def __init__(self):
super(AE400_10, self).__init__()
self.encoder = nn.Sequential(nn.Linear(28*28, n_embedded), nn.Sigmoid())
self.decoder = nn.Sequential(nn.Linear(n_embedded, 28*28))
self.test = nn.Linear(28*28, 10)
self.classifier = nn.Sequential(nn.Linear(28*28, 10))
def forward(self, x):
x = x.view(-1,28*28)
encoded = self.encoder(x)
decoded = self.decoder(encoded)
out = self.classifier(decoded)
return decoded, F.log_softmax(out)
x = torch.ones(128,28,28)
model = AE400_10()
model(x)
instead of encoded out = self.classifier(encoded)
put decoded as input of classifier
out = self.classifier(decoded)
I think, here is why you are getting the mismatch, because the classifier is expecting a tensor of 28 *28 as input as defined in your code.
Then,when calling the criterions:
loss1 = criterion1(decoded, X)
loss2 = criterion2(out, Y)

Failing to train SkipGram word embedding in Pytorch

I am training the skipgram word embeddings using the famous model described in https://arxiv.org/abs/1310.4546. I want to train it in PyTorch but I am getting errors and I can't figure out where they are coming from. Below I have provided my model class, training loop, and batching method. Does anyone have any insight into whats going on?
I am getting an error on the output = loss(data, target) line. It is having a problem with <class 'torch.LongTensor'> which is weird because CrossEntropyLoss takes a long tensor. The output shape might be wrong which is: torch.Size([1000, 100, 1000]) after the feedforward.
I have my model defined as:
import torch
import torch.nn as nn
torch.manual_seed(1)
class SkipGram(nn.Module):
def __init__(self, vocab_size, embedding_dim):
super(SkipGram, self).__init__()
self.embeddings = nn.Embedding(vocab_size, embedding_dim)
self.hidden_layer = nn.Linear(embedding_dim, vocab_size)
# Loss needs to be input: (minibatch (N), C) target: (minibatch, 1), each label is a class
# Calculate loss in training
def forward(self, x):
embeds = self.embeddings(x)
x = self.hidden_layer(embeds)
return x
My training is defined as:
import torch.optim as optim
from torch.autograd import Variable
net = SkipGram(1000, 300)
optimizer = optim.SGD(net.parameters(), lr=0.01)
batch_size = 100
size = len(train_ints)
batches = batch_index_gen(batch_size, size)
inputs, targets = build_tensor_from_batch_index(batches[0], train_ints)
for i in range(100):
running_loss = 0.0
for batch_idx, batch in enumerate(batches):
data, target = build_tensor_from_batch_index(batch, train_ints)
# if (torch.cuda.is_available()):
# data, target = data.cuda(), target.cuda()
# net = net.cuda()
data, target = Variable(data), Variable(target)
optimizer.zero_grad()
output = net.forward(data)
loss = nn.CrossEntropyLoss()
output = loss(data, target)
output.backward()
optimizer.step()
running_loss += loss.data[0]
optimizer.step()
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
i, batch_idx * len(batch_size), len(size),
100. * (batch_idx * len(batch_size)) / len(size), loss.data[0]))
If useful my batching is:
def build_tensor_from_batch_index(index, train_ints):
minibatch = []
for i in range(index[0], index[1]):
input_arr = np.zeros( (1000,1), dtype=np.int )
target_arr = np.zeros( (1000,1), dtype=np.int )
input_index, target_index = train_ints[i]
input_arr[input_index] = 1
target_arr[input_index] = 1
input_tensor = torch.from_numpy(input_arr)
target_tensor = torch.from_numpy(target_arr)
minibatch.append( (input_tensor, target_tensor) )
# Concatenate all tensors into a minibatch
#x = [tensor[0] for tensor in minibatch]
#print(x)
input_minibatch = torch.cat([tensor[0] for tensor in minibatch], 1)
target_minibatch = torch.cat([tensor[1] for tensor in minibatch], 1)
#target_minibatch = minibatch[0][1]
return input_minibatch, target_minibatch
I'm not sure about that since I did not read the paper, but seems weird that you are computing the loss with the original data and the targets:
output = loss(data, target)
Considering that the output of the network is output = net.forward(data) I think you should compute your loss as:
error = loss(output, target)
If this doesn't help, briefly point me out what the paper says about the loss function.

Resources