Related
I'm trying to use Sharpness-Aware Minimization (SAM) optimizer in my code, using the already built Pytorch code from here. Then, I would also like to use gradient accumulation, but I have no idea how to make this works properly. Using the proposed idea in one of the closed issue for mixed-precision:
def train(
args, model, device, train_loader, optimizer, first_step_scaler, second_step_scaler, epoch
):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
enable_running_stats(model)
# First forward step
with autocast():
output = model(data)
loss = F.nll_loss(output, target)
first_step_scaler.scale(loss).backward()
# We unscale manually for two reasons: (1) SAM's first-step adds the gradient
# to weights directly. So gradient must be unscaled; (2) unscale_ checks if any
# gradient is inf and updates optimizer_state["found_inf_per_device"] accordingly.
# We use optimizer_state["found_inf_per_device"] to decide whether to apply
# SAM's first-step or not.
first_step_scaler.unscale_(optimizer)
optimizer_state = first_step_scaler._per_optimizer_states[id(optimizer)]
# Check if any gradients are inf/nan
inf_grad_cnt = sum(v.item() for v in optimizer_state["found_inf_per_device"].values())
if inf_grad_cnt == 0:
# if valid graident, apply sam_first_step
optimizer.first_step(zero_grad=True, mixed_precision=True)
sam_first_step_applied = True
else:
# if invalid graident, skip sam and revert to single optimization step
optimizer.zero_grad()
sam_first_step_applied = False
# Update the scaler with no impact on the model (weights or gradient). This update step
# resets the optimizer_state["found_inf_per_device"]. So, it is applied after computing
# inf_grad_cnt. Note that zero_grad() has no impact on the update() operation,
# because update() leverage optimizer_state["found_inf_per_device"]
first_step_scaler.update()
disable_running_stats(model)
# Second forward step
with autocast():
output = model(data)
loss = F.nll_loss(output, target)
second_step_scaler.scale(loss).backward()
if sam_first_step_applied:
# If sam_first_step was applied, apply the 2nd step
optimizer.second_step(mixed_precision=True)
second_step_scaler.step(optimizer)
I tried something like this:
def train(
args, model, device, train_loader, optimizer, first_step_scaler, second_step_scaler, epoch, gradient_acc=2
):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
enable_running_stats(model)
# First forward step
with autocast():
output = model(data)
loss = F.nll_loss(output, target)
loss = loss / gradient_acc
first_step_scaler.scale(loss).backward()
# We unscale manually for two reasons: (1) SAM's first-step adds the gradient
# to weights directly. So gradient must be unscaled; (2) unscale_ checks if any
# gradient is inf and updates optimizer_state["found_inf_per_device"] accordingly.
# We use optimizer_state["found_inf_per_device"] to decide whether to apply
# SAM's first-step or not.
first_step_scaler.unscale_(optimizer)
optimizer_state = first_step_scaler._per_optimizer_states[id(optimizer)]
# Check if any gradients are inf/nan
inf_grad_cnt = sum(v.item() for v in optimizer_state["found_inf_per_device"].values())
if inf_grad_cnt == 0:
# if valid graident, apply sam_first_step
optimizer.first_step(zero_grad=True, mixed_precision=True)
sam_first_step_applied = True
else:
# if invalid graident, skip sam and revert to single optimization step
optimizer.zero_grad()
sam_first_step_applied = False
# Update the scaler with no impact on the model (weights or gradient). This update step
# resets the optimizer_state["found_inf_per_device"]. So, it is applied after computing
# inf_grad_cnt. Note that zero_grad() has no impact on the update() operation,
# because update() leverage optimizer_state["found_inf_per_device"]
first_step_scaler.update()
disable_running_stats(model)
# Second forward step
with autocast():
output = model(data)
loss = F.nll_loss(output, target)
loss = loss / gradient_acc
second_step_scaler.scale(loss).backward()
if sam_first_step_applied:
# If sam_first_step was applied, apply the 2nd step
optimizer.second_step(mixed_precision=True)
if not (batch_idx + 1) % gradient_acc != 0:
second_step_scaler.step(optimizer)
second_step_scaler.update()
optimizer.zero_grad()
But I noticed this makes my loss increasing rather than decreasing, anyone have any idea how to improvise this?
I am trying to get blazor working in an existing MVC application hosted in IIS. I followed the example here
There is also a similar example for razor pages. both work nicely in IIS express and self hosted, but when i try and use them in IIS they fail.
When hosted in IIS the framework is registered to the site and not the application. when opened in chrome the console gives the following errors:
blazor.server.js:1 Failed to load resource: the server responded with a status of 503 (Service Unavailable)
favicon.ico:1 Failed to load resource: the server responded with a status of 503 (Service Unavailable)
I tried changing the framework script refrence from <script src="_framework/blazor.server.js"></script> to <script src="~/_framework/blazor.server.js"></script>. This solves the initial error, but blazor falls over at the next step :
[2020-03-05T09:46:07.566Z] Information: Normalizing '_blazor' to 'http://localhost/_blazor'.
_blazor/negotiate?negotiateVersion=1:1 Failed to load resource: the server responded with a status of 503 (Service Unavailable)
blazor.server.js:1 [2020-03-05T09:46:07.611Z] Error: Failed to complete negotiation with the server: Error: Service Unavailable
e.log # blazor.server.js:1
(anonymous) # blazor.server.js:1
(anonymous) # blazor.server.js:1
(anonymous) # blazor.server.js:1
s # blazor.server.js:1
Promise.then (async)
c # blazor.server.js:1
(anonymous) # blazor.server.js:1
B # blazor.server.js:1
e.getNegotiationResponse # blazor.server.js:1
(anonymous) # blazor.server.js:1
(anonymous) # blazor.server.js:1
(anonymous) # blazor.server.js:1
(anonymous) # blazor.server.js:1
(anonymous) # blazor.server.js:1
(anonymous) # blazor.server.js:1
B # blazor.server.js:1
e.startInternal # blazor.server.js:1
(anonymous) # blazor.server.js:1
(anonymous) # blazor.server.js:1
(anonymous) # blazor.server.js:1
(anonymous) # blazor.server.js:1
B # blazor.server.js:1
e.start # blazor.server.js:1
(anonymous) # blazor.server.js:1
(anonymous) # blazor.server.js:1
(anonymous) # blazor.server.js:1
(anonymous) # blazor.server.js:1
v # blazor.server.js:1
e.startInternal # blazor.server.js:1
(anonymous) # blazor.server.js:1
(anonymous) # blazor.server.js:1
(anonymous) # blazor.server.js:1
(anonymous) # blazor.server.js:1
v # blazor.server.js:1
e.startWithStateTransitions # blazor.server.js:1
e.start # blazor.server.js:1
(anonymous) # blazor.server.js:8
(anonymous) # blazor.server.js:8
(anonymous) # blazor.server.js:8
(anonymous) # blazor.server.js:8
r # blazor.server.js:8
S # blazor.server.js:8
(anonymous) # blazor.server.js:8
(anonymous) # blazor.server.js:8
(anonymous) # blazor.server.js:8
(anonymous) # blazor.server.js:8
r # blazor.server.js:8
E # blazor.server.js:8
(anonymous) # blazor.server.js:8
n # blazor.server.js:1
(anonymous) # blazor.server.js:1
(anonymous) # blazor.server.js:1
Show 22 more frames
blazor.server.js:1 [2020-03-05T09:46:07.612Z] Error: Failed to start the connection: Error: Service Unavailable
e.log # blazor.server.js:1
(anonymous) # blazor.server.js:1
(anonymous) # blazor.server.js:1
(anonymous) # blazor.server.js:1
s # blazor.server.js:1
Promise.then (async)
c # blazor.server.js:1
(anonymous) # blazor.server.js:1
B # blazor.server.js:1
e.startInternal # blazor.server.js:1
(anonymous) # blazor.server.js:1
(anonymous) # blazor.server.js:1
(anonymous) # blazor.server.js:1
(anonymous) # blazor.server.js:1
B # blazor.server.js:1
e.start # blazor.server.js:1
(anonymous) # blazor.server.js:1
(anonymous) # blazor.server.js:1
(anonymous) # blazor.server.js:1
(anonymous) # blazor.server.js:1
v # blazor.server.js:1
e.startInternal # blazor.server.js:1
(anonymous) # blazor.server.js:1
(anonymous) # blazor.server.js:1
(anonymous) # blazor.server.js:1
(anonymous) # blazor.server.js:1
v # blazor.server.js:1
e.startWithStateTransitions # blazor.server.js:1
e.start # blazor.server.js:1
(anonymous) # blazor.server.js:8
(anonymous) # blazor.server.js:8
(anonymous) # blazor.server.js:8
(anonymous) # blazor.server.js:8
r # blazor.server.js:8
S # blazor.server.js:8
(anonymous) # blazor.server.js:8
(anonymous) # blazor.server.js:8
(anonymous) # blazor.server.js:8
(anonymous) # blazor.server.js:8
r # blazor.server.js:8
E # blazor.server.js:8
(anonymous) # blazor.server.js:8
n # blazor.server.js:1
(anonymous) # blazor.server.js:1
(anonymous) # blazor.server.js:1
Show 14 more frames
blazor.server.js:15 [2020-03-05T09:46:07.612Z] Error: Error: Service Unavailable
e.log # blazor.server.js:15
C # blazor.server.js:8
(anonymous) # blazor.server.js:8
(anonymous) # blazor.server.js:8
(anonymous) # blazor.server.js:8
s # blazor.server.js:8
Promise.then (async)
c # blazor.server.js:8
(anonymous) # blazor.server.js:8
r # blazor.server.js:8
S # blazor.server.js:8
(anonymous) # blazor.server.js:8
(anonymous) # blazor.server.js:8
(anonymous) # blazor.server.js:8
(anonymous) # blazor.server.js:8
r # blazor.server.js:8
E # blazor.server.js:8
(anonymous) # blazor.server.js:8
n # blazor.server.js:1
(anonymous) # blazor.server.js:1
(anonymous) # blazor.server.js:1
blazor.server.js:1 Uncaught (in promise) Error: Cannot send data if the connection is not in the 'Connected' State.
at e.send (blazor.server.js:1)
at e.sendMessage (blazor.server.js:1)
at e.sendWithProtocol (blazor.server.js:1)
at blazor.server.js:1
at new Promise ()
at e.invoke (blazor.server.js:1)
at e. (blazor.server.js:15)
at blazor.server.js:15
at Object.next (blazor.server.js:15)
at blazor.server.js:15
e.send # blazor.server.js:1
e.sendMessage # blazor.server.js:1
e.sendWithProtocol # blazor.server.js:1
(anonymous) # blazor.server.js:1
e.invoke # blazor.server.js:1
(anonymous) # blazor.server.js:15
(anonymous) # blazor.server.js:15
(anonymous) # blazor.server.js:15
(anonymous) # blazor.server.js:15
r # blazor.server.js:15
e.startCircuit # blazor.server.js:15
(anonymous) # blazor.server.js:8
(anonymous) # blazor.server.js:8
(anonymous) # blazor.server.js:8
a # blazor.server.js:8
Promise.then (async)
c # blazor.server.js:8
a # blazor.server.js:8
Promise.then (async)
c # blazor.server.js:8
(anonymous) # blazor.server.js:8
r # blazor.server.js:8
E # blazor.server.js:8
(anonymous) # blazor.server.js:8
n # blazor.server.js:1
(anonymous) # blazor.server.js:1
(anonymous) # blazor.server.js:1
favicon.ico:1 Failed to load resource: the server responded with a status of 503 (Service Unavailable)
The signalr hub seems to be pointing at the root site path in IIS instead of the application path.
Does anybody know how I can resolve this? To recreate the problem just download the source code here and setup a debug configuration pointing to IIS
Thanks
Paul
Thanks to the comment from agua from mars the solution is to add the following tag to the head of _layout.cshtml
eg
<head>
<meta charset="utf-8" />
<base href="~/" />
I am porting code to train a neural network. I wrote the code as part of an Udacity project and it worked fine in the Udacity environment.
Now I am porting the code to an Nvidia Jetson Nano running Ubuntu 18.04 and Python 3.6.8.
When iterating through the training data, somehow "._" sneakes into the file path prior the file name and issues an error message.
When I run the file, I get following error message:
Traceback (most recent call last):
File "train_rev6.py", line 427, in <module>
main()
File "train_rev6.py", line 419, in main
train_model(in_args)
File "train_rev6.py", line 221, in train_model
for inputs, labels in trainloader:
File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 560, in __next__
batch = self.collate_fn([self.dataset[i] for i in indices])
File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 560, in <listcomp>
batch = self.collate_fn([self.dataset[i] for i in indices])
File "/usr/local/lib/python3.6/dist-packages/torchvision/datasets/folder.py", line 132, in __getitem__
sample = self.loader(path)
File "/usr/local/lib/python3.6/dist-packages/torchvision/datasets/folder.py", line 178, in default_loader
return pil_loader(path)
File "/usr/local/lib/python3.6/dist-packages/torchvision/datasets/folder.py", line 160, in pil_loader
img = Image.open(f)
File "/usr/local/lib/python3.6/dist-packages/PIL/Image.py", line 2705, in open
% (filename if filename else fp))
OSError: cannot identify image file <_io.BufferedReader name='/home/mme/Documents/001_UdacityFinalProjectFlowersRev2/flowers/train/40/._image_04589.jpg'>
I suspect the error is due to the "._" prior the file name "image...", as this is not part of the file name and when I prompt
sudo find / -name image_00824.jpg
I get the correct path:
/home/mme/Documents/001_UdacityFinalProjectFlowersRev2/flowers/train/81/image_00824.jpg
without "._" prior the file name.
My issue here seems the same as in
OSError: cannot identify image file
(Adjusting and running from PIL import Image;Image.open(open("path/to/file", 'rb')) as suggested in the answer does not issue an error message.)
The file path is give in the command line:
python3 train_rev6.py --file_path "/home/mme/Documents/001_UdacityFinalProjectFlowersRev2/flowers" --arch "vgg16" --epochs 5 --gpu "gpu" --running_loss True --valid_loss True --valid_accuracy True --test True
The code below shows the two relevant functions.
Any idea how I get rid of this "._"?
def load_data(in_args):
"""
Function to:
- Specify diretories for training, validation and test set.
- Define your transforms for the training, validation and testing sets.
- Load the datasets with ImageFolder.
- Using the image datasets and the trainforms, define the dataloaders.
- Label mapping.
"""
# Specify diretories for training, validation and test set.
data_dir = in_args.file_path
train_dir = data_dir + "/train"
valid_dir = data_dir + "/valid"
test_dir = data_dir + "/test"
# Define your transforms for the training, validation, and testing sets
# Means: [0.485, 0.456, 0.406]. Standard deviations [0.229, 0.224, 0.225]. Calculated by ImageNet images.
# Transformation on training set: random rotation, random resized crop to 224 x 224 pixels, random horizontal and vertical flip, tranform to a tensor and normalize data.
train_transforms = transforms.Compose([transforms.RandomRotation(23),
transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.RandomVerticalFlip(),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406],
[0.229, 0.224, 0.225])])
# Transformation on validation set: resize and center crop to 224 x 224 pixels, tranform to a tensor and normalize data.
valid_transforms = transforms.Compose([transforms.Resize(255),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406],
[0.229, 0.224, 0.225])])
# Transformation on test set: resize and center crop to 224 x 224 pixels, tranform to a tensor and normalize data.
test_transforms = transforms.Compose([transforms.Resize(255),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406],
[0.229, 0.224, 0.225])])
# Load the datasets with ImageFolder
global train_dataset
global valid_dataset
global test_dataset
train_dataset = datasets.ImageFolder(data_dir + "/train", transform=train_transforms)
valid_dataset = datasets.ImageFolder(data_dir + "/valid", transform=valid_transforms)
test_dataset = datasets.ImageFolder(data_dir + "/test", transform=test_transforms)
# Using the image datasets and the trainforms, define the dataloaders, as global variables.
global trainloader
global validloader
global testloader
trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
validloader = torch.utils.data.DataLoader(valid_dataset, batch_size=64)
testloader = torch.utils.data.DataLoader(test_dataset, batch_size=64)
# Label mapping.
global cat_to_name
with open("cat_to_name.json", "r") as f:
cat_to_name = json.load(f)
print("Done loading data...")
return
def train_model(in_args):
"""
Function to build and train model.
"""
# Number of epochs.
global epochs
epochs = in_args.epochs
# Set running_loss to 0
running_loss = 0
# Prepare lists to print losses and accuracies.
global list_running_loss
global list_valid_loss
global list_valid_accuracy
list_running_loss, list_valid_loss, list_valid_accuracy = [], [], []
# If in testing mode, set loop counter to prematurly return to the main().
if in_args.test == True:
loop_counter = 0
# for loop to train model.
for epoch in range(epochs):
# for loop to iterate through training dataloader.
for inputs, labels in trainloader:
# If in testing mode, increase loop counter to prematurly return to the main() after 5 loops.
if in_args.test == True:
loop_counter +=1
if loop_counter == 5:
return
# Move input and label tensors to the default device.
inputs, labels = inputs.to(device), labels.to(device)
# Set gradients to 0 to avoid accumulation
optimizer.zero_grad()
# Forward pass, back propagation, gradient descent and updating weights and bias.
# Forward pass through model to get log of probabilities.
log_ps = model.forward(inputs)
# Calculate loss of model output based on model prediction and labels.
loss = criterion(log_ps, labels)
# Back propagation of loss through model / gradient descent.
loss.backward()
# Update weights / gradient descent.
optimizer.step()
# Accumulate loss for training image set for print out in terminal
running_loss += loss.item()
# Calculate loss for verification image set and accuracy for print out in terminal.
# Validation pass and print out the validation accuracy.
# Set loss of validation set and accuracy to 0.
valid_loss = 0
# test_loss = 0
valid_accuracy = 0
# test_accuracy = 0
# Set model to evaluation mode to turn off dropout so all images in the validation & test set are passed through the model.
model.eval()
# Turn off gradients for validation, saves memory and computations.
with torch.no_grad():
# for loop to evaluate loss of validation image set and its accuracy.
for valid_inputs, valid_labels in validloader:
# Move input and label tensors to the default device.
valid_inputs, valid_labels = valid_inputs.to(device), valid_labels.to(device)
# Run validation image set through model.
valid_log_ps = model.forward(valid_inputs)
# Calculate loss for validation image set.
valid_batch_loss = criterion(valid_log_ps, valid_labels)
# Accumulate loss for validation image set.
valid_loss += valid_batch_loss.item()
# Calculate probabilities
valid_ps = torch.exp(valid_log_ps)
# Get the most likely class using the ps.topk method.
valid_top_k, valid_top_class = valid_ps.topk(1, dim=1)
# Check if the predicted classes match the labels.
valid_equals = valid_top_class == valid_labels.view(*valid_top_class.shape)
# Calculate the percentage of correct predictions.
valid_accuracy += torch.mean(valid_equals.type(torch.FloatTensor)).item()
# Print out losses and accuracies
# Create string for running_loss.
str1 = ["Train loss: {:.3f} ".format(running_loss) if in_args.running_loss == True else ""]
str1 = "".join(str1)
# Create string for valid_loss.
str2 = ["Valid loss: {:.3f} ".format(valid_loss/len(validloader)) if in_args.valid_loss == True else ""]
str2 = "".join(str2)
# Create string for valid_accuracy.
str3 = ["Valid accuracy: {:.3f} ".format(valid_accuracy/len(validloader)) if in_args.valid_accuracy == True else ""]
str3 = "".join(str3)
# Print strings
print(f"{epoch+1}/{epochs} " + str1 + str2 + str3)
# Append current losses and accuracy to lists to print losses and accuracies.
list_running_loss.append(running_loss)
list_valid_loss.append(valid_loss/len(validloader))
list_valid_accuracy.append(valid_accuracy/len(validloader))
# Set running_loss to 0.
running_loss = 0
# Set model back to train mode.
model.train()
print("Done training model...")
return
A colleague at work pointed out that in Linux files beginning with a period are hidden files. So I selected "show hidden files" in the file explorer and there they were. I deleted them, which resolved the issue (see commands below).
Find and display all files beginning with "._" in all subfolder (display the selected files first to make sure these are the files you want to delete):
find test -name '._*' -print
Find and delete all files beginning with "._" in all subfolder
find test -name '._*' -delete
I'm trying to learn deep reinforcement learning through OpenAI spinning up. To do this, I want to rewrite some of their code using pytorch instead of tensorflow.
Currently I'm trying to convert the code for basic policy gradient (link with explanations) and this is my code so far:
import torch
import torch.nn as nn
from torch.nn.functional import log_softmax
from torch.distributions import Categorical
import torch.optim as optim
import numpy as np
import gym
from gym.spaces import Discrete, Box
class Policy(nn.Module):
def __init__(self, sizes, activation=nn.Tanh(), output_activation=None):
# Build a feedforward neural network.
super(Policy, self).__init__()
self.layers=nn.ModuleList([nn.Linear(sizes[i],sizes[i+1]) for i in
range(len(sizes)-1)])
self.activation=activation
self.output_activation=output_activation
self.returns=[] # for R(tau) weighting in policy gradient
self.rewards=[] # list for rewards accrued throughout ep
self.logits=[] # for measuring episode logits
def forward(self,x):
for layer in self.layers[:-1]:
x=self.activation(layer(x))
x=self.layers[-1](x)
if not self.output_activation==None:
x=self.output_activation(self.layers[-1](x))
return x
# make action selection op (outputs int actions, sampled from policy)
def select_action(logits):
return Categorical(logits=logits).sample()
# make loss function whose gradient, for the right data, is policy gradient
def loss(action_logits,tau_rets):
return torch.sum(torch.dot(log_softmax(action_logits),tau_rets))
def train(env_name='CartPole-v0', hidden_sizes=[32], lr=1e-2,
epochs=50, batch_size=5000, render=False):
# make environment, check spaces, get obs / act dims
env = gym.make(env_name)
assert isinstance(env.observation_space, Box), \
"This example only works for envs with continuous state spaces."
assert isinstance(env.action_space, Discrete), \
"This example only works for envs with discrete action spaces."
obs_dim = env.observation_space.shape[0]
n_acts = env.action_space.n
# make core of policy network
policy = Policy(sizes=[obs_dim]+hidden_sizes+[n_acts])
# make train op
train_op = optim.Adam(policy.parameters(), lr=lr)
# for training policy
def train_one_epoch():
# make some empty lists for logging.
batch_returns = [] # for measuring episode returns
batch_lens = [] # for measuring episode lengths
# reset episode-specific variables
obs = torch.from_numpy(env.reset()).type(torch.FloatTensor) # first obs comes from starting distribution
done = False # signal from environment that episode is over
num_obs=0 # to measure the number of observations
# render first episode of each epoch
finished_rendering_this_epoch = False
# collect experience by acting in the environment with current policy
while True:
# rendering
if (not finished_rendering_this_epoch) and render:
env.render()
# act in the environment
act_logit=policy.forward(obs)
act = select_action(act_logit)
tmp, reward, done, _ = env.step(act.numpy())
obs=torch.from_numpy(tmp).type(torch.FloatTensor)
num_obs+=1
# save logit, reward
policy.rewards.append(reward)
policy.logits.append(act_logit[act].item())
if done:
# if episode is over, record info about episode
ep_ret, ep_len = sum(policy.rewards), len(policy.rewards)
batch_returns.append(ep_ret)
batch_lens.append(ep_len)
# the weight for each logprob(a|s) is R(tau)
policy.returns+= [ep_ret] * ep_len
# reset episode-specific variables
tmp, done, policy.rewards = env.reset(), False, []
obs=torch.from_numpy(tmp).type(torch.FloatTensor)
# won't render again this epoch
finished_rendering_this_epoch = True
# end experience loop if we have enough of it
if num_obs > batch_size:
break
# take a single policy gradient update step
print (len(policy.returns),len(policy.rewards),len(policy.logits))
batch_loss = loss(torch.tensor(policy.logits),torch.tensor(policy.returns))
batch_loss.backward()
return batch_loss, batch_returns, batch_lens
# training loop
for i in range(epochs):
batch_loss, batch_rets, batch_lens = train_one_epoch()
print('epoch: %3d \t loss: %.3f \t return: %.3f \t ep_len: %.3f'%
(i, batch_loss, np.mean(batch_rets), np.mean(batch_lens)))
When I run train(), I get the following error:
RuntimeError Traceback (most recent call last)
<ipython-input-163-2da0ffaf5447> in <module>()
----> 1 train()
<ipython-input-162-560e772be08b> in train(env_name, hidden_sizes, lr, epochs,
batch_size, render)
114 # training loop
115 for i in range(epochs):
--> 116 batch_loss, batch_rets, batch_lens = train_one_epoch()
117 print('epoch: %3d \t loss: %.3f \t return: %.3f \t ep_len: %.3f'%
118 (i, batch_loss, np.mean(batch_rets), np.mean(batch_lens)))
<ipython-input-162-560e772be08b> in train_one_epoch()
109 print (len(policy.returns),len(policy.rewards),len(policy.logits))
110 batch_loss = loss(torch.tensor(policy.logits),torch.tensor(policy.returns))
--> 111 batch_loss.backward()
112 return batch_loss, batch_returns, batch_lens
113
~\Anaconda3\lib\site-packages\torch\tensor.py in backward(self, gradient,
retain_graph, create_graph)
91 products. Defaults to ``False``.
92 """
---> 93 torch.autograd.backward(self, gradient, retain_graph, create_graph)
94
95 def register_hook(self, hook):
~\Anaconda3\lib\site-packages\torch\autograd\__init__.py in backward(tensors, grad_tensors, retain_graph, create_graph, grad_variables)
88 Variable._execution_engine.run_backward(
89 tensors, grad_tensors, retain_graph, create_graph,
---> 90 allow_unreachable=True) # allow_unreachable flag
91
92
RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn
I don't understand why this happens since my code is similar to other rl pytorch code such as this.
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import SVC
classifier = SVC(C=100, # penalty parameter, setting it to a larger value
kernel='rbf', # kernel type, rbf working fine here
degree=3, # default value, not tuned yet
gamma=1, # kernel coefficient, not tuned yet
coef0=1, # change to 1 from default value of 0.0
shrinking=True, # using shrinking heuristics
tol=0.001, # stopping criterion tolerance
probability=False, # no need to enable probability estimates
cache_size=200, # 200 MB cache size
class_weight=None, # all classes are treated equally
verbose=False, # print the logs
max_iter=-1, # no limit, let it run
decision_function_shape=None, # will use one vs rest explicitly
random_state=None)
model = OneVsRestClassifier(classifier, n_jobs=4)
model.fit(X_train,y_train)
I am getting this error:
ValueError: WRITEBACKIFCOPY base is read-only.
Please perform scaling of input data before training the model i.e. OneVsRestClassifier.
eg.
from sklearn.preprocessing import MinMaxScaler #if its a dense matrix else use MaxAbsScaler in case of sparse matrix
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
classifier = SVC(C=100, # penalty parameter, setting it to a larger value
kernel='rbf', # kernel type, rbf working fine here
degree=3, # default value, not tuned yet
gamma=1, # kernel coefficient, not tuned yet
coef0=1, # change to 1 from default value of 0.0
shrinking=True, # using shrinking heuristics
tol=0.001, # stopping criterion tolerance
probability=False, # no need to enable probability estimates
cache_size=200, # 200 MB cache size
class_weight=None, # all classes are treated equally
verbose=False, # print the logs
max_iter=-1, # no limit, let it run
decision_function_shape=None, # will use one vs rest explicitly
random_state=None)
model = OneVsRestClassifier(classifier, n_jobs=-1)
model.fit(X_train,y_train)