How can I use the LBFGS optimizer with pytorch ignite? - pytorch

I started using Ignite recently and i found it very interesting.
I would like to train a model using as an optimizer the LBFGS algorithm from the torch.optim module.
This is my code:
from ignite.engine import Events, Engine, create_supervised_trainer, create_supervised_evaluator
from ignite.metrics import RootMeanSquaredError, Loss
from ignite.handlers import EarlyStopping
D_in, H, D_out = 5, 10, 1
model = simpleNN(D_in, H, D_out) # a simple MLP with 1 Hidden Layer
model.double()
train_loader, val_loader = get_data_loaders(i)
optimizer = torch.optim.LBFGS(model.parameters(), lr=1)
loss_func = torch.nn.MSELoss()
#Ignite
trainer = create_supervised_trainer(model, optimizer, loss_func)
evaluator = create_supervised_evaluator(model, metrics={'RMSE': RootMeanSquaredError(),'LOSS': Loss(loss_func)})
#trainer.on(Events.ITERATION_COMPLETED)
def log_training_loss(engine):
print("Epoch[{}] Loss: {:.5f}".format(engine.state.epoch, len(train_loader), engine.state.output))
def score_function(engine):
val_loss = engine.state.metrics['RMSE']
print("VAL_LOSS: {:.5f}".format(val_loss))
return -val_loss
handler = EarlyStopping(patience=10, score_function=score_function, trainer=trainer)
evaluator.add_event_handler(Events.COMPLETED, handler)
trainer.run(train_loader, max_epochs=100)
And the error that raises is:
TypeError: step() missing 1 required positional argument: 'closure'
I know that is required to define a closure for the implementation of LBFGS, so my question is how can I do it using ignite? or is there another approach for doing this?

The way to do it is like this:
from ignite.engine import Engine
model = ...
optimizer = torch.optim.LBFGS(model.parameters(), lr=1)
criterion =
def update_fn(engine, batch):
model.train()
x, y = batch
# pass to device if needed as here: https://github.com/pytorch/ignite/blob/40d815930d7801b21acfecfa21cd2641a5a50249/ignite/engine/__init__.py#L45
def closure():
y_pred = model(x)
loss = criterion(y_pred, y)
optimizer.zero_grad()
loss.backward()
return loss
optimizer.step(closure)
trainer = Engine(update_fn)
# everything else is the same
Source

You need to encapsulate all evaluating step with zero_grad and returning step in
for batch in loader():
def closure():
...
return loss
optim.step(closure)
Pytorch docs for 'closure'

Related

tf.GradientTape gradient() returns None

I am trying to train my keras model using TensorFlow, so far I can build the model,
def Model(input_shape, num_of_layers):
num_of_layers = 5
mod = keras.models.Sequential()
mod.add(keras.layers.Dense(1, input_shape = (input_shape,)))
for i in range(num_of_layers - 1):
mod.add(keras.layers.Dense(16, activation = 'tanh'))
mod.add(keras.layers.Dense(1, activation = 'tanh'))
return mod
and loss function.
def loss(u_pred, u_true):
return tf.reduce_mean(tf.keras.losses.mean_squared_error(u_pred, u_true))
Then I create a train function to train the model.
def train(model, X, epoch = 500, lr = 1e-3):
trainable_params = [tf.Variable(model.get_weights()[i]) for i in range(len(model.get_weights()))]
loss_array = []
optim = tf.keras.optimizers.Adam(learning_rate = lr)
for i in range(epoch):
with tf.GradientTape() as g:
g.watch(trainable_params)
loss_val = loss(model(X), tf.zeros_like(X))
grad = g.gradient(loss_val, trainable_params)
...
The grad returns a vector of None when I print it. What went wrong with my train function? I have converted my model's weights and biases to tensor object using tf.Variable. Using tf.cast or tf.convert_to_tensor doesn't help either.

using pytorch for Gradient Descent

my code:
import torch
import torch.nn as nn
import torch.nn.functional as F
class MultivariateLinearRegressionModel(nn.Module):
def __init__(self):
super().__init__()
self.linear = nn.Linear(3,1)
def forward(self,x):
# print(1)
return self.linear(x)
x_train = torch.FloatTensor([[73,80,75],
[93,88,93],
[89,91,90],
[96,98,100],
[73,66,70]])
y_train = torch.FloatTensor([[152],[185],[180],[196], [142]])
model = MultivariateLinearRegressionModel()
optimizer = torch.optim.SGD(model.parameters(), lr = 1e-5)
# print(222)
ep = 2000
for epoch in range(ep+1):
hypothesis = model(x_train)
cost = F.mse_loss(hypothesis, y_train)
if epoch % 100 == 0:
print('Epoch {:4d}/{} Cost: {:.6f}'.format(
epoch, 2000, cost.item()
))
optimizer.zero_grad()
cost.backward()
optimizer.step()
my problem:
this code is my own MultivariateLinearRegressionModel.
But in the for loop
hypothesis = model(x_train) why this code is same with
hypothesis = model.forward(x_train) ??
i don't know why this 2 code statement is same.
is this a python grammar??
Because your model MultivariateLinearRegressionModel is inherited from nn.Module so when ever you call model(x_train), it will automatically execute the forward function which is defined in MultivariateLinearRegressionModel class.
That's why model(x_train) and model.forward(x_train) give the same result.

I get something wrong when use model.train() and model.eval() on pytorch

I have prepare features and their labels as blow; I want to build a model which is constructed by transformers' encoder and then add a linear layer to predict a value. but I got some error when I use the model to predict after its training.
At first I run below code:
import torch
from torch import nn
features = torch.rand(bach_size, channels, lenght)
labels = torch.rand(batch_size)
class TransformerModel(nn.Module):
def __init__(self):
super(TransformerModel, self).__init__()
encoder_layer = nn.TransformerEncoderLayer(d_model=8, nhead=8, dropout=0.5)
self.transformer_encoder = nn.TransformerEncoder(encoder_layer, 6)
self.decoder = nn.Linear(40, 1)
def forward(self, src):
encoded = self.transformer_encoder(src.transpose(1, 0)).transpose(1, 0)
pred = self.decoder(encoded.reshape(encoded.shape[0], -1))
return pred
model = TransformerModel()
criterion = nn.MSELoss()
lr = 0.3 # learning rate
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
def train():
model.train() # Turn on the train mode
optimizer.zero_grad()
output = model(features)
loss = criterion(output.view(-1, 1), labels.view(-1, 1))
loss.backward()
torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5)
optimizer.step()
return loss.item()
for _ in range(100):
train()
After that, I predict features by the below codes:
model.eval()
output = model(features)
I get all values of 'output' are the same, and if use 'model.train()', the 'output' seems Ok; so what is the problem? or the model was built wrong?

“AttributeError: classificadorFinal' object has no attribute 'log_softmax” when trying to train a neural network using pytorch

I'm learning to use pytorch and I got an error that won't let me continue programming.
My code:
import torch.nn as nn
from skorch import NeuralNetClassifier #integracao com sklearn
from sklearn.model_selection import cross_val_score,GridSearchCV
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
import torch
import torch.nn.functional as F
from torch import nn,optim
class classificadorFinal(nn.Module):
def __init__(self, activation=F.tanh, neurons=16, initializer=torch.nn.init.uniform_, dropout=0.3):
##from melhores_parametros
super().__init__()
self.dense0 = nn.Linear(4, neurons)
initializer(self.dense0.weight)
self.activation0 = activation
self.dense1 = nn.Linear(neurons, neurons)
initializer(self.dense1.weight)
self.activation1 = activation
self.dense2 = nn.Linear(neurons, 3)
self.dropout = nn.Dropout(dropout)
def forward(self, X):
X = self.dense0(X)
X = self.activation0(X)
X = self.dropout(X)
X = self.dense1(X)
X = self.activation1(X)
X = self.dropout(X)
X = self.dense2(X)
return X
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(classificador.parameters(), lr = 0.001, weight_decay = 0.0001)
#treino
for epoch in range(200):##from melhores_parametros
running_loss = 0.
running_accuracy = 0.
for data in train_loader:
inputs, labels = data
optimizer.zero_grad()
outputs = classificadorFinal(inputs)
loss = criterion(outputs, labels)###erro
loss.backward()
optimizer.step()
running_loss += loss.item()
ps = F.softmax(outputs)
top_p, top_class = ps.topk(k = 1, dim = 1)
equals = top_class == labels.view(*top_class.shape)
running_accuracy += torch.mean(equals.type(torch.float))
print('Época {:3d}: perda {:3.5f} - precisão {:3.5f}'.format(epoch + 1, running_loss/len(train_loader), running_accuracy/len(train_loader)))
The error occurs exactly on loss = criterion(outputs, labels):
AttributeError: 'classificadorFinal' object has no attribute 'log_softmax'
I found out this error is well known, but I did not understand the proposed solution:
disable aux_logits when the model is created aux_logits=False.
A little help, please!
The outputs are not actually the output of the model, but rather the model itself. classificadorFinal is the class, calling it creates an object/instance of that class, and inputs will be the first argument to the __init__ method, namely activation.
# Creates an instance of the model
outputs = classificadorFinal(inputs)
You first have to create the model (an instance), which should be done once, then call that model with the inputs. It looks like you have already created the model before, as you are using classificador.parameters() for the optimiser, hence classificador is presumably the instance of the model. You need to call classificador (instance) not classificadorFinal (class) to create the outputs.
# Call the instance of the model, not the class
outputs = classificador(inputs)

Failing to train SkipGram word embedding in Pytorch

I am training the skipgram word embeddings using the famous model described in https://arxiv.org/abs/1310.4546. I want to train it in PyTorch but I am getting errors and I can't figure out where they are coming from. Below I have provided my model class, training loop, and batching method. Does anyone have any insight into whats going on?
I am getting an error on the output = loss(data, target) line. It is having a problem with <class 'torch.LongTensor'> which is weird because CrossEntropyLoss takes a long tensor. The output shape might be wrong which is: torch.Size([1000, 100, 1000]) after the feedforward.
I have my model defined as:
import torch
import torch.nn as nn
torch.manual_seed(1)
class SkipGram(nn.Module):
def __init__(self, vocab_size, embedding_dim):
super(SkipGram, self).__init__()
self.embeddings = nn.Embedding(vocab_size, embedding_dim)
self.hidden_layer = nn.Linear(embedding_dim, vocab_size)
# Loss needs to be input: (minibatch (N), C) target: (minibatch, 1), each label is a class
# Calculate loss in training
def forward(self, x):
embeds = self.embeddings(x)
x = self.hidden_layer(embeds)
return x
My training is defined as:
import torch.optim as optim
from torch.autograd import Variable
net = SkipGram(1000, 300)
optimizer = optim.SGD(net.parameters(), lr=0.01)
batch_size = 100
size = len(train_ints)
batches = batch_index_gen(batch_size, size)
inputs, targets = build_tensor_from_batch_index(batches[0], train_ints)
for i in range(100):
running_loss = 0.0
for batch_idx, batch in enumerate(batches):
data, target = build_tensor_from_batch_index(batch, train_ints)
# if (torch.cuda.is_available()):
# data, target = data.cuda(), target.cuda()
# net = net.cuda()
data, target = Variable(data), Variable(target)
optimizer.zero_grad()
output = net.forward(data)
loss = nn.CrossEntropyLoss()
output = loss(data, target)
output.backward()
optimizer.step()
running_loss += loss.data[0]
optimizer.step()
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
i, batch_idx * len(batch_size), len(size),
100. * (batch_idx * len(batch_size)) / len(size), loss.data[0]))
If useful my batching is:
def build_tensor_from_batch_index(index, train_ints):
minibatch = []
for i in range(index[0], index[1]):
input_arr = np.zeros( (1000,1), dtype=np.int )
target_arr = np.zeros( (1000,1), dtype=np.int )
input_index, target_index = train_ints[i]
input_arr[input_index] = 1
target_arr[input_index] = 1
input_tensor = torch.from_numpy(input_arr)
target_tensor = torch.from_numpy(target_arr)
minibatch.append( (input_tensor, target_tensor) )
# Concatenate all tensors into a minibatch
#x = [tensor[0] for tensor in minibatch]
#print(x)
input_minibatch = torch.cat([tensor[0] for tensor in minibatch], 1)
target_minibatch = torch.cat([tensor[1] for tensor in minibatch], 1)
#target_minibatch = minibatch[0][1]
return input_minibatch, target_minibatch
I'm not sure about that since I did not read the paper, but seems weird that you are computing the loss with the original data and the targets:
output = loss(data, target)
Considering that the output of the network is output = net.forward(data) I think you should compute your loss as:
error = loss(output, target)
If this doesn't help, briefly point me out what the paper says about the loss function.

Resources