SequenceClassifierOutput has generator as loss instead of a tensor - pytorch

I'm doing Distillation from a Roberta with an Adapter, I'm following this tutorial
and in the function distill_roberta_weights() I just change teacher_model.config.to_dict()
to student.load_state_dict(teacher.state_dict(), strict=False), so the student model has the adapter too.
But when I am training the distillation using the
DistillationTrainer
from here
I get the following error
Do you have any idea of what is the problem?
The student_output has a loss generator instead the tensor, the part of the cross entropy does not have any problem as it uses the logits from the outputs.
EDIT:
I am adding more information
def distill_weights(teacher, student):
"""
Recursively copies the weights of the (teacher) to the (student).
This function is meant to be first called on a RobertaFor... model, but is then called on every children of that model recursively.
The only part that's not fully copied is the encoder, of which only half is copied.
"""
# If the part is an entire RoBERTa model or a RobertaFor..., unpack and iterate
if isinstance(teacher, RobertaModel) or type(teacher).__name__.startswith('RobertaFor'):
for teacher_part, student_part in zip(teacher.children(), student.children()):
distill_weights(teacher_part, student_part)
# Else if the part is an encoder, copy one out of every layer
elif isinstance(teacher, RobertaEncoder):
teacher_encoding_layers = [layer for layer in next(teacher.children())]
student_encoding_layers = [layer for layer in next(student.children())]
for i in range(len(student_encoding_layers)):
student_encoding_layers[i].load_state_dict(teacher_encoding_layers[2*i].state_dict())
# Else the part is a head or something else, copy the state_dict
else:
student.load_state_dict(teacher.state_dict(), strict=False)
def distill_roberta_based(teacher_model):
"""
Distilates a RoBERTa (teacher_model) like would DistilBERT for a BERT model.
The student model has the same configuration, except for the number of hidden layers, which is // by 2.
The student layers are initilized by copying one out of two layers of the teacher, starting with layer 0.
The head of the teacher is also copied.
"""
# Set student configuration
configuration = teacher_model.config.to_dict()
configuration['num_hidden_layers'] //= 2
configuration = RobertaConfig.from_dict(configuration)
# create student model
student_model = type(teacher_model)(configuration)
distill_weights(teacher=teacher_model, student=student_model)
return student_model
#function for train the Distillated model
class DistillationTrainer(Trainer):
def __init__(self, *args, teacher_model=None, **kwargs):
super().__init__(*args, **kwargs)
self.teacher = teacher_model
# place teacher on same device as student
self._move_model_to_device(self.teacher,self.model.device)
self.teacher.eval()
def compute_loss(self, model, inputs, return_outputs = False) :
"""
The distillation loss for distilating a BERT-like model.
The loss takes the (teacher_logits), (student_logits) and (labels) for various losses.
The (temperature) can be given, otherwise it's set to 1 by default.
"""
outputs_student = model(**inputs)
print(outputs_student)
student_loss = outputs_student.loss
# compute teacher output
with torch.no_grad():
outputs_teacher = self.teacher(**inputs)
# assert size
assert outputs_student.logits.size() == outputs_teacher.logits.size()
# Classification loss (problem-specific loss)
loss_function = CrossEntropyLoss()
# Temperature and sotfmax
student_logits = F.softmax (outputs_student.logits / self.args.temperature, dim=-1)
teacher_logits = F.softmax (outputs_teacher.logits / self.args.temperature, dim=-1)
loss_logits = loss_function(student_logits, teacher_logits)
# Return weighted student loss
loss = self.args.alpha * student_loss + (1. - self.args.alpha) * loss_logits
return (loss, outputs_student) if return_outputs else loss
#create the student
student_model_adapter = distill_roberta_based(teacher_model)
#activate adapter
student_model_adapter.set_active_adapters('parallel')
student_model_adapter.train_adapter('parallel')
trainer = DistillationTrainer(
student_model_adapter,
training_args,
teacher_model=teacher_model,
train_dataset=tokenized_datasets["train"],
eval_dataset=tokenized_datasets["validation"],
data_collator=data_collator,
tokenizer=tokenizer,
compute_metrics=compute_metrics,
)
trainer.args._n_gpu = 4
So, the desired output of outputs_student should be like
SequenceClassifierOutput(loss=tensor([0.6899, 0.6902, 0.6926, 0.6913, 0.6906, 0.6904, 0.6922, 0.6917],
device='cuda:0', grad_fn=<GatherBackward>), logits=tensor([[-1.2512e-03, -9.7885e-03],
[ 6.2714e-03, -5.7755e-03],.....])
But instead the output is
SequenceClassifierOutput(loss=<generator object gather.<locals>.gather_map.<locals>.<genexpr> at 0x7f5bb4fbe9d0>, logits=tensor([[-0.0150, 0.0075],
[-0.0122, 0.0181],...

Related

How to make custom validation_step in tensorflow 2 Tensorflow 2 / Keras?

I have a question regarding the validation Data.
I have this neural network and I divided my data into train_generator, val_generator, test_generator.
I made a custom model with a custom fit.
class MyModel(tf.keras.Model):
def __init__(self):
def __call__(.....)
def train_step(....)
then I have:
train_generator = DataGenerator(....)
val_generator = DataGenerator(....)
test_generator = DataGenerator(....)
then :
model = MyModel()
model.compile(optimizer=keras.optimizers.Adam(clipnorm=5.),
metrics=["accuracy"])
model.fit(train_generator, validation_data = val_generator, epochs=40)
ok and the program gives me no errors
But my question is : how can I know what happens with my validation_data ?
Is it processed the same way as the train_data ( train_generator ) in the train_step function ?
Or do I need to specify how to process the validation data ?
If it helps I will also live MyModel class
class MyModel(tf.keras.Model):
def __init__(self):
super(MyModel2, self).__init__()
self.dec2 = Decoder2()
def __call__(self, y_hat, **kwargs):
print(y_hat.shape)
z_hat = self.dec2(y_hat)
return z_hat
def train_step(self, dataset):
with tf.GradientTape() as tape:
y_hat = dataset[0]
z_true = dataset[1]
z_pred = self(y_hat, training=True)
#print("This is z_true : ", z_true.shape)
#print("This is z_pred : ", z_pred.shape)
loss = tf.reduce_mean(tf.abs(tf.cast(z_pred, tf.float64) - tf.cast(z_true, tf.float64)))
print("loss: ", loss)
global_loss.append(loss)
# Compute gradients. TRE SA FAC GRADIENT CLIPPING
trainable_vars = self.trainable_variables
gradients = tape.gradient(loss, trainable_vars)
# Update weights
self.optimizer.apply_gradients(zip(gradients, trainable_vars))
# Update metrics (includes the metric that tracks the loss)
self.compiled_metrics.update_state(z_true, z_pred)
# Return a dict mapping metric names to current value
return {m.name: m.result() for m in self.metrics}
You have to add a test_step(self, data) function to your MyModel class as you can see it here: Providing your own evaluation step

How to print the output weights for the output layer in BERT?

I would like to print the output vector/tensor in BERT an wasn't sure how to do it. I've been using the following example to walk myself through it:
https://colab.research.google.com/drive/1pTuQhug6Dhl9XalKB0zUGf4FIdYFlpcX
Its a simple classification problem, but I want to be able to get the output vector before we classify the training examples. Can someone point to where in the code I can do this and how?
Do you want the weights to the output layer or the logits? I think you want the logits, it is more work but better in the long run to subclass so you can play with it yourself. Here part of subclass I did where I wanted dropout and more control. I'll just include it here where you can access all the parts of the model
class MyBert(BertPreTrainedModel):
def __init__(self, config, dropout_prob):
super().__init__(config)
self.num_labels = 2
self.bert = BertModel(config)
self.dropout = torch.nn.Dropout(dropout_prob)
self.classifier = torch.nn.Linear(config.hidden_size, self.num_labels)
self.init_weights()
def forward(self,
input_ids=None,
attention_mask=None,
token_type_ids=None,
position_ids=None,
head_mask=None,
inputs_embeds=None,
labels=None,):
outputs = self.bert(
input_ids,
attention_mask=attention_mask,
token_type_ids=token_type_ids,
position_ids=position_ids,
head_mask=head_mask,
inputs_embeds=inputs_embeds,
)
pooled_output = outputs[1]
pooled_output = self.dropout(pooled_output)
logits = self.classifier(pooled_output)
outputs = (logits,) + outputs[2:] # add hidden states and attention if they are here
if labels is not None:
loss_fct = torch.nn.CrossEntropyLoss()
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
outputs = (loss,) + outputs
return outputs # (loss), logits, (hidden_states), (attentions)

I have no idea where 'plot_learning_curve' file or 'learning.utils' module is

While following an example about classifying cats and dogs using AlexNet on some post I got stuck on this import error:
Traceback (most recent call last):
File "C:\Users\Gsum\Desktop\Asirra 개 고양이\asirra-dogs-cats-classification-master\learning\optimizers.py", line 5, in <module>
from learning.utils import plot_learning_curve
ImportError: No module named 'learning'
I've been looking for modules named similar to learning or learn which includes 'plot_learning_curve' function.
Anyone who knows which library includes plot learning curve function, I would appreciate some help.
Here is my code:
import os
import time
from abc import abstractmethod
import tensorflow as tf
from learning.utils import plot_learning_curve
class Optimizer(object):
"""Base class for gradient-based optimization algorithms."""
def __init__(self, model, train_set, evaluator, val_set=None, **kwargs):
"""
Optimizer initializer.
:param model: ConvNet, the model to be learned.
:param train_set: DataSet, training set to be used.
:param evaluator: Evaluator, for computing performance scores during training.
:param val_set: DataSet, validation set to be used, which can be None if not used.
:param kwargs: dict, extra arguments containing training hyperparameters.
- batch_size: int, batch size for each iteration.
- num_epochs: int, total number of epochs for training.
- init_learning_rate: float, initial learning rate.
"""
self.model = model
self.train_set = train_set
self.evaluator = evaluator
self.val_set = val_set
# Training hyperparameters
self.batch_size = kwargs.pop('batch_size', 256)
self.num_epochs = kwargs.pop('num_epochs', 320)
self.init_learning_rate = kwargs.pop('init_learning_rate', 0.01)
self.learning_rate_placeholder = tf.placeholder(tf.float32) # Placeholder for current learning rate
self.optimize = self._optimize_op()
self._reset()
def _reset(self):
"""Reset some variables."""
self.curr_epoch = 1
self.num_bad_epochs = 0 # number of bad epochs, where the model is updated without improvement.
self.best_score = self.evaluator.worst_score # initialize best score with the worst one
self.curr_learning_rate = self.init_learning_rate # current learning rate
#abstractmethod
def _optimize_op(self, **kwargs):
"""
tf.train.Optimizer.minimize Op for a gradient update.
This should be implemented, and should not be called manually.
"""
pass
#abstractmethod
def _update_learning_rate(self, **kwargs):
"""
Update current learning rate (if needed) on every epoch, by its own schedule.
This should be implemented, and should not be called manually.
"""
pass
def _step(self, sess, **kwargs):
"""
Make a single gradient update and return its results.
This should not be called manually.
:param sess: tf.Session.
:param kwargs: dict, extra arguments containing training hyperparameters.
- augment_train: bool, whether to perform augmentation for training.
:return loss: float, loss value for the single iteration step.
y_true: np.ndarray, true label from the training set.
y_pred: np.ndarray, predicted label from the model.
"""
augment_train = kwargs.pop('augment_train', True)
# Sample a single batch
X, y_true = self.train_set.next_batch(self.batch_size, shuffle=True,
augment=augment_train, is_train=True)
# Compute the loss and make update
_, loss, y_pred = \
sess.run([self.optimize, self.model.loss, self.model.pred],
feed_dict={self.model.X: X, self.model.y: y_true,
self.model.is_train: True,
self.learning_rate_placeholder: self.curr_learning_rate})
return loss, y_true, y_pred
def train(self, sess, save_dir='/tmp', details=False, verbose=True, **kwargs):
"""
Run optimizer to train the model.
:param sess: tf.Session.
:param save_dir: str, the directory to save the learned weights of the model.
:param details: bool, whether to return detailed results.
:param verbose: bool, whether to print details during training.
:param kwargs: dict, extra arguments containing training hyperparameters.
:return train_results: dict, containing detailed results of training.
"""
saver = tf.train.Saver()
sess.run(tf.global_variables_initializer()) # initialize all weights
train_results = dict() # dictionary to contain training(, evaluation) results and details
train_size = self.train_set.num_examples
num_steps_per_epoch = train_size // self.batch_size
num_steps = self.num_epochs * num_steps_per_epoch
if verbose:
print('Running training loop...')
print('Number of training iterations: {}'.format(num_steps))
step_losses, step_scores, eval_scores = [], [], []
start_time = time.time()
# Start training loop
for i in range(num_steps):
# Perform a gradient update from a single minibatch
step_loss, step_y_true, step_y_pred = self._step(sess, **kwargs)
step_losses.append(step_loss)
# Perform evaluation in the end of each epoch
if (i+1) % num_steps_per_epoch == 0:
# Evaluate model with current minibatch, from training set
step_score = self.evaluator.score(step_y_true, step_y_pred)
step_scores.append(step_score)
# If validation set is initially given, use it for evaluation
if self.val_set is not None:
# Evaluate model with the validation set
eval_y_pred = self.model.predict(sess, self.val_set, verbose=False, **kwargs)
eval_score = self.evaluator.score(self.val_set.labels, eval_y_pred)
eval_scores.append(eval_score)
if verbose:
# Print intermediate results
print('[epoch {}]\tloss: {:.6f} |Train score: {:.6f} |Eval score: {:.6f} |lr: {:.6f}'\
.format(self.curr_epoch, step_loss, step_score, eval_score, self.curr_learning_rate))
# Plot intermediate results
plot_learning_curve(-1, step_losses, step_scores, eval_scores=eval_scores,
mode=self.evaluator.mode, img_dir=save_dir)
curr_score = eval_score
# else, just use results from current minibatch for evaluation
else:
if verbose:
# Print intermediate results
print('[epoch {}]\tloss: {} |Train score: {:.6f} |lr: {:.6f}'\
.format(self.curr_epoch, step_loss, step_score, self.curr_learning_rate))
# Plot intermediate results
plot_learning_curve(-1, step_losses, step_scores, eval_scores=None,
mode=self.evaluator.mode, img_dir=save_dir)
curr_score = step_score
# Keep track of the current best model,
# by comparing current score and the best score
if self.evaluator.is_better(curr_score, self.best_score, **kwargs):
self.best_score = curr_score
self.num_bad_epochs = 0
saver.save(sess, os.path.join(save_dir, 'model.ckpt')) # save current weights
else:
self.num_bad_epochs += 1
self._update_learning_rate(**kwargs)
self.curr_epoch += 1
if verbose:
print('Total training time(sec): {}'.format(time.time() - start_time))
print('Best {} score: {}'.format('evaluation' if eval else 'training',
self.best_score))
print('Done.')
if details:
# Store training results in a dictionary
train_results['step_losses'] = step_losses # (num_iterations)
train_results['step_scores'] = step_scores # (num_epochs)
if self.val_set is not None:
train_results['eval_scores'] = eval_scores # (num_epochs)
return train_results
class MomentumOptimizer(Optimizer):
"""Gradient descent optimizer, with Momentum algorithm."""
def _optimize_op(self, **kwargs):
"""
tf.train.MomentumOptimizer.minimize Op for a gradient update.
:param kwargs: dict, extra arguments for optimizer.
- momentum: float, the momentum coefficient.
:return tf.Operation.
"""
momentum = kwargs.pop('momentum', 0.9)
update_vars = tf.trainable_variables()
return tf.train.MomentumOptimizer(self.learning_rate_placeholder, momentum, use_nesterov=False)\
.minimize(self.model.loss, var_list=update_vars)
def _update_learning_rate(self, **kwargs):
"""
Update current learning rate, when evaluation score plateaus.
:param kwargs: dict, extra arguments for learning rate scheduling.
- learning_rate_patience: int, number of epochs with no improvement
after which learning rate will be reduced.
- learning_rate_decay: float, factor by which the learning rate will be updated.
- eps: float, if the difference between new and old learning rate is smaller than eps,
the update is ignored.
"""
learning_rate_patience = kwargs.pop('learning_rate_patience', 10)
learning_rate_decay = kwargs.pop('learning_rate_decay', 0.1)
eps = kwargs.pop('eps', 1e-8)
if self.num_bad_epochs > learning_rate_patience:
new_learning_rate = self.curr_learning_rate * learning_rate_decay
# Decay learning rate only when the difference is higher than epsilon.
if self.curr_learning_rate - new_learning_rate > eps:
self.curr_learning_rate = new_learning_rate
self.num_bad_epochs = 0

Difference between Parameter vs. Tensor in PyTorch

I would like to know the difference between PyTorch Parameter and Tensor?
The existing answer is for the old PyTorch where variables are being used?
This is the whole idea of the Parameter class (attached) in a single image.
Since it is sub-classed from Tensor it is a Tensor.
But there is a trick. Parameters that are inside of a module are added to the list of Module parameters. If m is your module m.parameters() will hold your parameter.
Here is the example:
class M(nn.Module):
def __init__(self):
super().__init__()
self.weights = nn.Parameter(torch.randn(2, 2))
self.bias = nn.Parameter(torch.zeros(2))
def forward(self, x):
return x # self.weights + self.bias
m=M()
m.parameters()
list(m.parameters())
---
[Parameter containing:
tensor([[ 0.5527, 0.7096],
[-0.2345, -1.2346]], requires_grad=True), Parameter containing:
tensor([0., 0.], requires_grad=True)]
You see how the parameters will show what we defined.
And if we just add a tensor inside a class, like self.t = Tensor, it will not show in the parameters list. That is literally it. Nothing fancy.
Adding to #prosti's answer, a nn.Module class, doesn't always explicitly knows what Tensor objects it should optimize for. If you go through this simple commented piece of code, it could clarify it further.
import torch
from torch import nn
# Simple Objective : Learn a function that maps [1,1] -> [0,0]
x = torch.ones(2) # input tensor
y = torch.zeros(2) # expected output
# Model 1
class M1(nn.Module):
def __init__(self):
super().__init__()
self.weights = nn.Parameter(torch.randn(2, 2))
self.bias = nn.Parameter(torch.zeros(2))
def forward(self, x):
return x # self.weights + self.bias
# Model 2
class M2(nn.Module):
def __init__(self):
super().__init__()
# though the Tensor Objects below can undergo backprop and minimize some loss
# our model class doesn't know, it should use these tensors during optimization
self.weights = torch.randn(2,2).requires_grad_(True)
self.bias = torch.zeros(2).requires_grad_(True)
def forward(self, x):
return x # self.weights + self.bias
m1=M1()
m2 = M2()
# Bunch of parameters get printed
print('Model 1 params : ')
print(list(m1.parameters()))
# This is empty, meaning, there is no parameter for model to optimize
# In the forward pass, model just knows to use these
# `weight` and `bias` tensor to do some operations over the input.
# But model doesn't know, it should optimize over those `weight` and `bias` tensors objects
print('Model 2 params : ')
print(list(m2.parameters()))
# Initialize the loss function
loss_fn = nn.MSELoss(reduction='mean')
## ===== Training ===== ##
# Trainer
def train_loop(model, loss_fn=loss_fn):
# Simple optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
for i in range(5):
# Compute prediction and loss
pred = model(x)
loss = loss_fn(pred, y)
# Backpropagation
optimizer.zero_grad()
loss.backward()
optimizer.step()
print(f"loss > {loss.item()}")
# ====== Train Model 1 ====== #
# loss will keep on decreasing, as model_1 finds better weights for
train_loop( m1 )
# ====== Trying to Train Model 2 ====== #
# Code breaks, at this line : optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
# Reason being, there is no any parameters to optimize for.
train_loop( m2 )
For further clarification, check out this short blog implementing pytorch's nn.Linear module.

Implementing RNN and LSTM into DQN Pytorch code

I have some troubles finding some example on the great www to how i implement a recurrent neural network with LSTM layer into my current Deep q-network in Pytorch so it become a DRQN.. Bear with me i am just getting started..
Futhermore, I am NOT working with images processing, thereby CNN so do not worry about this. My states are purely temperatures values.
Here is my code that i am currently train my DQN with:
# Importing the libraries
import numpy as np
import random # random samples from different batches (experience replay)
import os # For loading and saving brain
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim # for using stochastic gradient descent
import torch.autograd as autograd # Conversion from tensor (advanced arrays) to avoid all that contains a gradient
# We want to put the tensor into a varaible taht will also contain a
# gradient and to this we need:
from torch.autograd import Variable
# to convert this tensor into a variable containing the tensor and the gradient
# Creating the architecture of the Neural Network
class Network(nn.Module): #inherinting from nn.Module
#Self - refers to the object that will be created from this class
# - self here to specify that we're referring to the object
def __init__(self, input_size, nb_action): #[self,input neuroner, output neuroner]
super(Network, self).__init__() #inorder to use modules in torch.nn
# Input and output neurons
self.input_size = input_size
self.nb_action = nb_action
# Full connection between different layers of NN
# In this example its one input layer, one hidden layer and one output layer
# Using self here to specify that fc1 is a variable of my object
self.fc1 = nn.Linear(input_size, 40)
self.fc2 = nn.Linear(40, 30)
#Example of adding a hiddenlayer
# self.fcX = nn.Linear(30,30)
self.fc3 = nn.Linear(30, nb_action) # 30 neurons in hidden layer
# For function that will activate neurons and perform forward propagation
def forward(self, state):
# rectifier function
x = F.relu(self.fc1(state))
x = F.relu(self.fc2(x))
q_values = self.fc3(x)
return q_values
# Implementing Experience Replay
# We know that RL is based on MDP
# So going from one state(s_t) to the next state(s_t+1)
# We gonna put 100 transition between state into what we call the memory
# So we can use the distribution of experience to make a decision
class ReplayMemory(object):
def __init__(self, capacity):
self.capacity = capacity #100 transitions
self.memory = [] #memory to save transitions
# pushing transitions into memory with append
#event=transition
def push(self, event):
self.memory.append(event)
if len(self.memory) > self.capacity: #memory only contain 100 events
del self.memory[0] #delete first transition from memory if there is more that 100
# taking random sample
def sample(self, batch_size):
#Creating variable that will contain the samples of memory
#zip =reshape function if list = ((1,2,3),(4,5,6)) zip(*list)= (1,4),(2,5),(3,6)
# (state,action,reward),(state,action,reward)
samples = zip(*random.sample(self.memory, batch_size))
#This is to be able to differentiate with respect to a tensor
#and this will then contain the tensor and gradient
#so for state,action and reward we will store the seperately into some
#bytes which each one will get a gradient
#so that eventually we'll be able to differentiate each one of them
return map(lambda x: Variable(torch.cat(x, 0)), samples)
# Implementing Deep Q Learning
class Dqn():
def __init__(self, input_size, nb_action, gamma, lrate, T):
self.gamma = gamma #self.gamma gets assigned to input argument
self.T = T
# Sliding window of the evolving mean of the last 100 events/transitions
self.reward_window = []
#Creating network with network class
self.model = Network(input_size, nb_action)
#creating memory with memory class
#We gonna take 100000 samples into memory and then we will sample from this memory to
#to get a snakk number of random transitions
self.memory = ReplayMemory(100000)
#creating optimizer (stochastic gradient descent)
self.optimizer = optim.Adam(self.model.parameters(), lr = lrate) #learning rate
#input vector which is batch of input observations
#by unsqeeze we create a fake dimension to this is
#what the network expect for its inputs
#have to be the first dimension of the last_state
self.last_state = torch.Tensor(input_size).unsqueeze(0)
#Inilizing
self.last_action = 0
self.last_reward = 0
def select_action(self, state):
#Q value depends on state
#Temperature parameter T will be a positive number and the closer
#it is to ze the less sure the NN will when taking an action
#forexample
#softmax((1,2,3))={0.04,0.11,0.85} ==> softmax((1,2,3)*3)={0,0.02,0.98}
#to deactivate brain then set T=0, thereby it is full random
probs = F.softmax((self.model(Variable(state, volatile = True))*self.T),dim=1) # T=100
#create a random draw from the probability distribution created from softmax
action = probs.multinomial()
print(probs.multinomial())
return action.data[0,0]
# See section 5.3 in AI handbook
def learn(self, batch_state, batch_next_state, batch_reward, batch_action):
outputs = self.model(batch_state).gather(1, batch_action.unsqueeze(1)).squeeze(1)
#next input for target see page 7 in attached AI handbook
next_outputs = self.model(batch_next_state).detach().max(1)[0]
target = self.gamma*next_outputs + batch_reward
#Using hubble loss inorder to obtain loss
td_loss = F.smooth_l1_loss(outputs, target)
#using lass loss/error to perform stochastic gradient descent and update weights
self.optimizer.zero_grad() #reintialize the optimizer at each iteration of the loop
#This line of code that backward propagates the error into the NN
#td_loss.backward(retain_variables = True) #userwarning
td_loss.backward(retain_graph = True)
#And this line of code uses the optimizer to update the weights
self.optimizer.step()
def update(self, reward, new_signal):
#Updated one transition and we have dated the last element of the transition
#which is the new state
new_state = torch.Tensor(new_signal).float().unsqueeze(0)
self.memory.push((self.last_state, new_state, torch.LongTensor([int(self.last_action)]), torch.Tensor([self.last_reward])))
#After ending in a state its time to play a action
action = self.select_action(new_state)
if len(self.memory.memory) > 100:
batch_state, batch_next_state, batch_action, batch_reward = self.memory.sample(100)
self.learn(batch_state, batch_next_state, batch_reward, batch_action)
self.last_action = action
self.last_state = new_state
self.last_reward = reward
self.reward_window.append(reward)
if len(self.reward_window) > 1000:
del self.reward_window[0]
return action
def score(self):
return sum(self.reward_window)/(len(self.reward_window)+1.)
def save(self):
torch.save({'state_dict': self.model.state_dict(),
'optimizer' : self.optimizer.state_dict(),
}, 'last_brain.pth')
def load(self):
if os.path.isfile('last_brain.pth'):
print("=> loading checkpoint... ")
checkpoint = torch.load('last_brain.pth')
self.model.load_state_dict(checkpoint['state_dict'])
self.optimizer.load_state_dict(checkpoint['optimizer'])
print("done !")
else:
print("no checkpoint found...")
I hope there is someone out there that can help me and could implement a RNN and a LSTM layer into my code! I believe in you stackflow!
Best regards Søren Koch
From my point of view, I think you could add RNN, LSTM layer to the Network#__init__,Network#forward; shape of data should be reshaped into sequences...
For more detail, I think you should read these two following articles; after that implementing RNN, LSTM not hard as it seem to be.
http://pytorch.org/tutorials/beginner/nlp/sequence_models_tutorial.html#sphx-glr-beginner-nlp-sequence-models-tutorial-py
http://pytorch.org/tutorials/intermediate/char_rnn_classification_tutorial.html

Resources