PyTorch CNN doesn't update weights while training - pytorch

I want to predict a 8x8 matrix with the original 8x8 matrix. But the weights DO NOT update in the training process.
I use two simple conv layers to conv input matrix from 1x8x8 to 2x8x8. Then I used another conv layer to convert 2x8x8 to 1x8x8. The inputs and outputs in the data folder are generated randomly. The pytorch codes are shown as follows.
I have already checked some posts about weights not update issues. I think there must be some wrong with "requires_grad = True" of data or loss.backward().
Any suggestions about the codes would be grateful. Thanks in advance.
M
Tue Sep 6 15:34:17 CST 2022
The data input folder is in
data/CM10_1/CM_1.txt
data/CM10_1/CM_2.txt
data/CM10_1/CM_3.txt
data/CM10_1/CM_4.txt
The data output folder is in
data/CM10_2/CM_1.txt
data/CM10_2/CM_2.txt
data/CM10_2/CM_3.txt
data/CM10_2/CM_4.txt
CM_i.txt is shown as
207 244 107 173 70 111 180 244
230 246 233 193 11 97 192 86
32 40 202 189 24 195 70 149
232 247 244 100 209 202 173 57
161 244 167 167 177 47 167 191
24 123 9 43 80 124 41 65
71 204 216 180 242 113 30 129
139 36 238 8 8 164 127 178
data/CM_info_tr.csv
CMname,
CM_1.txt,
CM_2.txt,
CM_3.txt,
CM_4.txt,
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# outline###############################################################
#
# CM10_1/CM_i.txt to predict CM10_2/CM_i.txt
#
# data pair example
# CM10_1/CM_1.txt -> CM10_2/CM_1.txt
#
# CM10_1/CM_1.txt is 8x8 matrix with random int
# CM10_2/CM_1.txt is 8x8 matrix with random int
#
# The model uses two conv layers
# layer 01 : 1x8x8 -> 2x8x8
# layer 02 : 2x8x8 -> 1x8x8
#
# The loss is the difference between
# CM10_2/CM_1.txt(predicted) and CM10_2/CM_1.txt
#
# main ###############################################################
from __future__ import print_function, division
import os
import sys
import torch
import pandas as pd
import numpy as np
import torch.nn.functional as F
from skimage import io, transform
from torch.utils.data import Dataset, DataLoader
from torch import nn
from torch.autograd import Variable
torch.cuda.empty_cache()
# Ignore warnings
import warnings
warnings.filterwarnings("ignore")
# test CM parameters
n_Ca = 8
batch_size = 4
#device = "cuda" if torch.cuda.is_available() else "cpu"
device = "cpu"
# define class dataset CMDataset ###################################################
class CMDataset(Dataset):
"""CM dataset"""
def __init__(self,csv_CM,CM_beg_dir,CM_end_dir,n_Ca=n_Ca):
"""
Args:
csv_CM (string): Path to the csv file with CM class.
CM_beg_dir (string): Directory with all the CM begin data.
CM_end_dir (string): Directory with all the CM end data.
"""
self.CM_info = pd.read_csv(csv_CM)
self.CM_beg_dir = CM_beg_dir
self.CM_end_dir = CM_end_dir
def __len__(self):
return len(self.CM_info)# the number of the samples
def __getitem__(self, idx):
if torch.is_tensor(idx):
idx = idx.tolist()
#load and convert CM begin data ---------------------------------------
CM_beg_path = os.path.join(self.CM_beg_dir, self.CM_info.iloc[idx, 0])
CM_beg_data = np.loadtxt(CM_beg_path)
CM_beg_data = CM_beg_data.reshape(1,n_Ca,n_Ca)
CM_beg_data = CM_beg_data.astype(np.float32)
CM_beg_data = torch.from_numpy(CM_beg_data)
CM_beg_data = CM_beg_data.to(device)
#load and convert CM endin data ---------------------------------------
CM_end_path = os.path.join(self.CM_end_dir, self.CM_info.iloc[idx, 0])
CM_end_data = np.loadtxt(CM_end_path)
CM_end_data = CM_end_data.reshape(1,n_Ca,n_Ca)
CM_end_data = CM_end_data.astype(np.float32)
CM_end_data = torch.from_numpy(CM_end_data)
CM_end_data = CM_end_data.to(device)
return CM_beg_data, CM_end_data
# define class model CMNet ###################################################
class CMNet(nn.Module):
def __init__(self):
super(CMNet, self).__init__()
self.lay_CM_01 = nn.Conv2d(in_channels=1,out_channels=2,kernel_size=1,stride=1,bias=True)
self.lay_CM_02 = nn.Conv2d(in_channels=2,out_channels=1,kernel_size=1,stride=1,bias=True)
def forward(self, CM_data):
[n_in_batch,n_in_chan,n_in_hei,n_in_wid]=CM_data.shape
n_Ca = n_in_hei
out1_1 = self.lay_CM_01(CM_data)
out1_2 = out1_1
out1_3 = self.lay_CM_02(out1_2)
out = out1_3
return out
# load data for training and validing
CM_dataset_train = CMDataset(csv_CM = 'data/CM_info_tr.csv',
CM_beg_dir = 'data/CM10_1/',
CM_end_dir = 'data/CM10_2/',
n_Ca = n_Ca)
train_dataloader = DataLoader(CM_dataset_train,
batch_size=batch_size,
shuffle=True)
# training parameter
learning_rate = 2
epochs = 5
model = CMNet()
model = model.to(device)
# Initialize the loss function
loss_fn = nn.MSELoss(reduction='mean')
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
# define train loop ###############################################################
def train_loop(dataloader, model, loss_fn, optimizer):
size = len(dataloader.dataset)
for batch, (X1,Y) in enumerate(dataloader):
X1=X1.to(torch.float32)
Y = Y.to(torch.float32)
# Compute prediction and loss
X1=torch.autograd.Variable(X1)
pred = model(X1)
pred = torch.autograd.Variable(pred)
# compute loss
loss = loss_fn(pred,Y)
loss = Variable(loss, requires_grad = True)
# Backpropagation
optimizer.zero_grad()
loss.backward()
optimizer.step()
loss, current = loss.item(), batch * len(X1)
print(f" loss:{loss:>15f}, [{current:>5d}/{size:>5d}]")
# Train ###############################################################
for t in range(epochs):
print(f"Epoch {t+1}\n----------------------------------------------")
# print(list(model.parameters()))
train_loop(train_dataloader, model, loss_fn, optimizer)
#print("Train and Valid Done!")

What pytorch version are you using? Variable is depracated for 5 years now. Remove the lines loss = Variable(loss, requires_grad = True) and pred = torch.autograd.Variable(pred), that should do the trick. Try and read the current documentation and don't rely on archaic tutorials.

Related

PyTorch DataLoader: Only one element tensors can be converted Python scalars

For Python 3.10 and torch version: 1.12.1, I am using MNIST dataset scaled in the range [0, 1] with one-hot encoded vectors for the target as:
batch_size = 256
# Define transformations for MNIST dataset-
# MNIST dataset statistics-
# mean = np.array([0.1307])
# std_dev = np.array([0.3081])
transforms_apply = transforms.Compose(
[
transforms.ToTensor(),
# transforms.Normalize(mean = mean, std = std_dev)
]
)
# Load MNIST dataset-
train_dataset = torchvision.datasets.MNIST(
root = 'data', train = True,
transform = transforms_apply, download = True
)
test_dataset = torchvision.datasets.MNIST(
root = 'data', train = False,
transform = transforms_apply
)
# Sanity check-
print(f"training dataset length/shape: {list(train_dataset.data.size())}")
# training dataset length/shape: [60000, 28, 28]
print(f"mean = {train_dataset.data.float().mean() / 255:.4f} &"
f" std dev = {train_dataset.data.float().std() / 255:.4f}"
)
# mean = 0.1307 & std dev = 0.3081
# Convert the targets to one-hot encoded vectors-
train_dataset.targets = F.one_hot(train_dataset.targets, num_classes = 10)
test_dataset.targets = F.one_hot(test_dataset.targets, num_classes = 10)
# Sanity checks-
print(f"Train dataset: min = {train_dataset.data.min()} & max = {train_dataset.data.max()};"
f" Test dataset: min = {test_dataset.data.min()} & max = {test_dataset.data.max()}"
)
# Train dataset: min = 0 & max = 255; Test dataset: min = 0 & max = 255
train_dataset.data.shape, train_dataset.targets.shape
# (torch.Size([60000, 28, 28]), torch.Size([60000, 10]))
test_dataset.data.shape, test_dataset.targets.shape
# (torch.Size([10000, 28, 28]), torch.Size([10000, 10]))
# Create training and testing dataloaders-
train_loader = torch.utils.data.DataLoader(
dataset = train_dataset, batch_size = batch_size,
shuffle = True
)
test_loader = torch.utils.data.DataLoader(
dataset = test_dataset, batch_size = batch_size,
shuffle = False
)
print(f"Sizes of train_dataset: {len(train_dataset)} and test_dataet: {len(test_dataset)}")
print(f"Sizes of train_loader: {len(train_loader)} and test_loader: {len(test_loader)}")
# Sizes of train_dataset: 60000 and test_dataet: 10000
# Sizes of train_loader: 235 and test_loader: 40
print(f"len(train_loader) = {len(train_loader)} & len(test_loader) = {len(test_loader)}")
# len(train_loader) = 235 & len(test_loader) = 40
# Sanity check-
len(train_dataset) / batch_size, len(test_dataset) / batch_size
# (234.375, 39.0625)
# Get some random batch of training images & labels-
x, y = next(iter(train_loader))
print(f"images.shape: {x.shape}, labels.shape: {y.shape}")
This generates the error:
--------------------------------------------------------------------------- ValueError Traceback (most recent call
last) Input In [25], in <cell line: 2>()
1 # Get some random batch of training images & labels-
----> 2 x, y = next(iter(train_loader))
3 print(f"images.shape: {x.shape}, labels.shape: {y.shape}")
File
~\anaconda3\envs\torch-gpu\lib\site-packages\torch\utils\data\dataloader.py:681,
in _BaseDataLoaderIter.next(self)
678 if self._sampler_iter is None:
679 # TODO(https://github.com/pytorch/pytorch/issues/76750)
680 self._reset() # type: ignore[call-arg]
--> 681 data = self._next_data()
682 self._num_yielded += 1
683 if self._dataset_kind == _DatasetKind.Iterable and
684 self._IterableDataset_len_called is not None and
685 self._num_yielded > self._IterableDataset_len_called:
File
~\anaconda3\envs\torch-gpu\lib\site-packages\torch\utils\data\dataloader.py:721,
in _SingleProcessDataLoaderIter._next_data(self)
719 def _next_data(self):
720 index = self._next_index() # may raise StopIteration
--> 721 data = self._dataset_fetcher.fetch(index) # may raise StopIteration
722 if self._pin_memory:
723 data = _utils.pin_memory.pin_memory(data, self._pin_memory_device)
File
~\anaconda3\envs\torch-gpu\lib\site-packages\torch\utils\data_utils\fetch.py:49,
in _MapDatasetFetcher.fetch(self, possibly_batched_index)
47 def fetch(self, possibly_batched_index):
48 if self.auto_collation:
---> 49 data = [self.dataset[idx] for idx in possibly_batched_index]
50 else:
51 data = self.dataset[possibly_batched_index]
File
~\anaconda3\envs\torch-gpu\lib\site-packages\torch\utils\data_utils\fetch.py:49,
in (.0)
47 def fetch(self, possibly_batched_index):
48 if self.auto_collation:
---> 49 data = [self.dataset[idx] for idx in possibly_batched_index]
50 else:
51 data = self.dataset[possibly_batched_index]
File
~\anaconda3\envs\torch-gpu\lib\site-packages\torchvision\datasets\mnist.py:138,
in MNIST.getitem(self, index)
130 def getitem(self, index: int) -> Tuple[Any, Any]:
131 """
132 Args:
133 index (int): Index (...)
136 tuple: (image, target) where target is index of the target class.
137 """
--> 138 img, target = self.data[index], int(self.targets[index])
140 # doing this so that it is consistent with all other datasets
141 # to return a PIL Image
142 img = Image.fromarray(img.numpy(), mode="L")
ValueError: only one element tensors can be converted to Python
scalars
I know that this is due to the one-hot encoding since when not using it, this error is absent. How to solve it?
You can add it to your transforms using the Lambda transform
transforms_apply = transforms.Compose(
[
transforms.ToTensor(),
# transforms.Normalize(mean = mean, std = std_dev)
transforms.Lambda(lambda t: F.one_hot(t.long(), num_classes=10))
]
)

How do I make my custom loss function scalar?

I am coding a dqn from scratch and therefore have written my loss function. While calling backward on my loss function, I get the following error - RuntimeError: grad can be implicitly created only for scalar outputs
Here's my code -
import numpy as np
import gym
import matplotlib.pyplot as plt
import os
import torch
import random
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from collections import deque
import sys
env = gym.make("CliffWalking-v0")
# In[103]:
#Hyperparameters
episodes = 5000
eps = 1.0
learning_rate = 0.1
discount_factor = 0.99
tot_rewards = []
decay_val = 0.001
mem_size = 50000
batch_size = 2
gamma = 0.99
# In[104]:
class NeuralNetwork(nn.Module):
def __init__(self, state_size, action_size):
super(NeuralNetwork, self).__init__()
self.state_size = state_size
self.action_size = action_size
self.linear_relu_stack = nn.Sequential(
nn.Linear(1, 30),
nn.ReLU(),
nn.Linear(30, 30),
nn.ReLU(),
nn.Linear(30, action_size)
)
def forward(self, x):
x = self.linear_relu_stack(x)
return x
# In[105]:
model = NeuralNetwork(env.observation_space.n, env.action_space.n)
opt = torch.optim.Adam(params=model.parameters(), lr=learning_rate)
loss = nn.MSELoss()
replay_buffer = deque(maxlen=mem_size)
# In[106]:
state = torch.tensor(env.reset(), dtype=torch.float32)
state = state.unsqueeze(dim=0)
print(state.shape)
out = model(state)
# In[111]:
def compute_td_loss(batch_size):
state, next_state, reward, done, action = zip(*random.sample(replay_buffer, batch_size))
state = torch.from_numpy(np.array(state).reshape(-1, 1)).unsqueeze(dim = 0).type(torch.float32)
next_state = torch.from_numpy(np.array(next_state).reshape(-1, 1)).unsqueeze(dim = 0).type(torch.float32)
reward = torch.from_numpy(np.array(reward))
done = torch.from_numpy(np.array(done))
action = torch.from_numpy(np.array(action)).type(torch.int64)
q_values = model(state)
next_q_values = model(next_state)
q_vals = q_values.squeeze().gather(dim=-1, index=action.reshape(-1,1)).reshape(1, -1)
max_next_q_values = torch.max(next_q_values,2)[0].detach()
print("q_vals = ", q_vals)
print("max_next_q_values = ", max_next_q_values)
loss = 0.5*(reward + gamma*max_next_q_values - q_vals)**2
print("reward = ", reward)
print("loss = ", loss)
opt.zero_grad()
loss.backward()
opt.step()
return loss
# In[112]:
for i in range(episodes):
state = env.reset()
done = False
steps = 0
eps_rew = 0
while not done and steps<50:
if np.random.uniform(0,1)<eps:
action = env.action_space.sample()
else:
state = torch.tensor(state, dtype=torch.float32)
state = state.unsqueeze(dim=0)
action = np.argmax(model(state).detach().numpy())
next_state, reward, done, info = env.step(action)
replay_buffer.append((state, next_state, reward, done, action))
if len(replay_buffer)>batch_size:
loss = compute_td_loss(batch_size)
sys.exit()
eps = eps/(1 + 0.001)
eps_rew += reward
if done:
break
state = next_state
tot_rewards.append(eps_rew)
Here's the error that I get -
RuntimeError Traceback (most recent call last)
<ipython-input-112-015fd74c95d9> in <module>
14 replay_buffer.append((state, next_state, reward, done, action))
15 if len(replay_buffer)>batch_size:
---> 16 loss = compute_td_loss(batch_size)
17 sys.exit()
18 eps = eps/(1 + 0.001)
<ipython-input-111-3e1e02c32b4f> in compute_td_loss(batch_size)
16 print("loss = ", loss)
17 opt.zero_grad()
---> 18 loss.backward()
19 opt.step()
20 return loss
c:\users\thoma\anaconda3\envs\custom_atari_env\lib\site-packages\torch\_tensor.py in backward(self, gradient, retain_graph, create_graph, inputs)
253 create_graph=create_graph,
254 inputs=inputs)
--> 255 torch.autograd.backward(self, gradient, retain_graph, create_graph, inputs=inputs)
256
257 def register_hook(self, hook):
c:\users\thoma\anaconda3\envs\custom_atari_env\lib\site-packages\torch\autograd\__init__.py in backward(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)
141
142 grad_tensors_ = _tensor_or_tensors_to_tuple(grad_tensors, len(tensors))
--> 143 grad_tensors_ = _make_grads(tensors, grad_tensors_)
144 if retain_graph is None:
145 retain_graph = create_graph
c:\users\thoma\anaconda3\envs\custom_atari_env\lib\site-packages\torch\autograd\__init__.py in _make_grads(outputs, grads)
48 if out.requires_grad:
49 if out.numel() != 1:
---> 50 raise RuntimeError("grad can be implicitly created only for scalar outputs")
51 new_grads.append(torch.ones_like(out, memory_format=torch.preserve_format))
52 else:
RuntimeError: grad can be implicitly created only for scalar outputs
Given that your batch_size = 2 and looking at your code your loss will likely be of size batch_size x 1. Given that what you are likely trying to do is to compute the gradient of expected Q loss you can use monte carlo estimator where instead of computing an expectation we use a mean over a finite sample (here - your batch). Consequently what you are missing is taking a mean of your loss before calling backwards.

Converting spinning up policy gradient to pytorch

I'm trying to learn deep reinforcement learning through OpenAI spinning up. To do this, I want to rewrite some of their code using pytorch instead of tensorflow.
Currently I'm trying to convert the code for basic policy gradient (link with explanations) and this is my code so far:
import torch
import torch.nn as nn
from torch.nn.functional import log_softmax
from torch.distributions import Categorical
import torch.optim as optim
import numpy as np
import gym
from gym.spaces import Discrete, Box
class Policy(nn.Module):
def __init__(self, sizes, activation=nn.Tanh(), output_activation=None):
# Build a feedforward neural network.
super(Policy, self).__init__()
self.layers=nn.ModuleList([nn.Linear(sizes[i],sizes[i+1]) for i in
range(len(sizes)-1)])
self.activation=activation
self.output_activation=output_activation
self.returns=[] # for R(tau) weighting in policy gradient
self.rewards=[] # list for rewards accrued throughout ep
self.logits=[] # for measuring episode logits
def forward(self,x):
for layer in self.layers[:-1]:
x=self.activation(layer(x))
x=self.layers[-1](x)
if not self.output_activation==None:
x=self.output_activation(self.layers[-1](x))
return x
# make action selection op (outputs int actions, sampled from policy)
def select_action(logits):
return Categorical(logits=logits).sample()
# make loss function whose gradient, for the right data, is policy gradient
def loss(action_logits,tau_rets):
return torch.sum(torch.dot(log_softmax(action_logits),tau_rets))
def train(env_name='CartPole-v0', hidden_sizes=[32], lr=1e-2,
epochs=50, batch_size=5000, render=False):
# make environment, check spaces, get obs / act dims
env = gym.make(env_name)
assert isinstance(env.observation_space, Box), \
"This example only works for envs with continuous state spaces."
assert isinstance(env.action_space, Discrete), \
"This example only works for envs with discrete action spaces."
obs_dim = env.observation_space.shape[0]
n_acts = env.action_space.n
# make core of policy network
policy = Policy(sizes=[obs_dim]+hidden_sizes+[n_acts])
# make train op
train_op = optim.Adam(policy.parameters(), lr=lr)
# for training policy
def train_one_epoch():
# make some empty lists for logging.
batch_returns = [] # for measuring episode returns
batch_lens = [] # for measuring episode lengths
# reset episode-specific variables
obs = torch.from_numpy(env.reset()).type(torch.FloatTensor) # first obs comes from starting distribution
done = False # signal from environment that episode is over
num_obs=0 # to measure the number of observations
# render first episode of each epoch
finished_rendering_this_epoch = False
# collect experience by acting in the environment with current policy
while True:
# rendering
if (not finished_rendering_this_epoch) and render:
env.render()
# act in the environment
act_logit=policy.forward(obs)
act = select_action(act_logit)
tmp, reward, done, _ = env.step(act.numpy())
obs=torch.from_numpy(tmp).type(torch.FloatTensor)
num_obs+=1
# save logit, reward
policy.rewards.append(reward)
policy.logits.append(act_logit[act].item())
if done:
# if episode is over, record info about episode
ep_ret, ep_len = sum(policy.rewards), len(policy.rewards)
batch_returns.append(ep_ret)
batch_lens.append(ep_len)
# the weight for each logprob(a|s) is R(tau)
policy.returns+= [ep_ret] * ep_len
# reset episode-specific variables
tmp, done, policy.rewards = env.reset(), False, []
obs=torch.from_numpy(tmp).type(torch.FloatTensor)
# won't render again this epoch
finished_rendering_this_epoch = True
# end experience loop if we have enough of it
if num_obs > batch_size:
break
# take a single policy gradient update step
print (len(policy.returns),len(policy.rewards),len(policy.logits))
batch_loss = loss(torch.tensor(policy.logits),torch.tensor(policy.returns))
batch_loss.backward()
return batch_loss, batch_returns, batch_lens
# training loop
for i in range(epochs):
batch_loss, batch_rets, batch_lens = train_one_epoch()
print('epoch: %3d \t loss: %.3f \t return: %.3f \t ep_len: %.3f'%
(i, batch_loss, np.mean(batch_rets), np.mean(batch_lens)))
When I run train(), I get the following error:
RuntimeError Traceback (most recent call last)
<ipython-input-163-2da0ffaf5447> in <module>()
----> 1 train()
<ipython-input-162-560e772be08b> in train(env_name, hidden_sizes, lr, epochs,
batch_size, render)
114 # training loop
115 for i in range(epochs):
--> 116 batch_loss, batch_rets, batch_lens = train_one_epoch()
117 print('epoch: %3d \t loss: %.3f \t return: %.3f \t ep_len: %.3f'%
118 (i, batch_loss, np.mean(batch_rets), np.mean(batch_lens)))
<ipython-input-162-560e772be08b> in train_one_epoch()
109 print (len(policy.returns),len(policy.rewards),len(policy.logits))
110 batch_loss = loss(torch.tensor(policy.logits),torch.tensor(policy.returns))
--> 111 batch_loss.backward()
112 return batch_loss, batch_returns, batch_lens
113
~\Anaconda3\lib\site-packages\torch\tensor.py in backward(self, gradient,
retain_graph, create_graph)
91 products. Defaults to ``False``.
92 """
---> 93 torch.autograd.backward(self, gradient, retain_graph, create_graph)
94
95 def register_hook(self, hook):
~\Anaconda3\lib\site-packages\torch\autograd\__init__.py in backward(tensors, grad_tensors, retain_graph, create_graph, grad_variables)
88 Variable._execution_engine.run_backward(
89 tensors, grad_tensors, retain_graph, create_graph,
---> 90 allow_unreachable=True) # allow_unreachable flag
91
92
RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn
I don't understand why this happens since my code is similar to other rl pytorch code such as this.

Value error while one hot encoding a variable in the jupyter notebook for a neural network

In the code below I am attempting to create a neural network that tries to replicate classical music. I am trying to one hot encode the output variable with this line of code:
y = np_utils.to_categorical(dataY)
but it gives me this error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-21-0b5dca9da053> in <module>()
65 X = X / float(n_vocab)
66 # one hot encode the output variable
---> 67 y = np_utils.to_categorical(dataY)
68 # define the LSTM model
69 model = Sequential()
~\Anaconda3\lib\site-packages\keras\utils\np_utils.py in to_categorical(y,
num_classes)
26 y = y.ravel()
27 if not num_classes:
---> 28 num_classes = np.max(y) + 1
29 n = y.shape[0]
30 categorical = np.zeros((n, num_classes))
~\Anaconda3\lib\site-packages\numpy\core\fromnumeric.py in amax(a, axis,
out, keepdims)
2318
2319 return _methods._amax(a, axis=axis,
-> 2320 out=out, **kwargs)
2321
2322
~\Anaconda3\lib\site-packages\numpy\core\_methods.py in _amax(a, axis, out,
keepdims)
24 # small reductions
25 def _amax(a, axis=None, out=None, keepdims=False):
---> 26 return umr_maximum(a, axis, None, out, keepdims)
27
28 def _amin(a, axis=None, out=None, keepdims=False):
ValueError: zero-size array to reduction operation maximum which has no
identity
Below is the complete code:
import glob
import pickle
import numpy
from music21 import converter, instrument, note, chord
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import BatchNormalization
from keras.layers import Dropout
from keras.layers import LSTM
from keras.layers import Activation
from keras.utils import np_utils
from keras.callbacks import ModelCheckpoint
def get_notes():
""" Get all the notes and chords from the midi files in the ./midi_songs
directory """
notes = []
for file in glob.glob("smaller classical music repertoir/*.mid"):
midi = converter.parse(file)
notes_to_parse = None
parts = instrument.partitionByInstrument(midi)
if parts: # file has instrument parts
notes_to_parse = parts.parts[0].recurse()
else: # file has notes in a flat structure
notes_to_parse = midi.flat.notes
for element in notes_to_parse:
if isinstance(element, note.Note):
notes.append(str(element.pitch))
elif isinstance(element, chord.Chord):
notes.append('.'.join(str(n) for n in element.normalOrder))
with open('C:/Users/emili/Documents/python projects/music data/notes1', 'wb') as filepath:
pickle.dump(notes, filepath)
return notes
notes = get_notes()
# create mapping of unique chars to integers
chars = sorted(list(set(notes)))
char_to_int = dict((c, i) for i, c in enumerate(chars))
# summarize the loaded data
n_chars = len(notes)
n_vocab = len(chars)
print ("Total Characters: ", n_chars)
print ("Total Vocab: ", n_vocab)
# prepare the dataset of input to output pairs encoded as integers
seq_length = 100
dataX = []
dataY = []
for i in range(0, n_chars - seq_length, 1):
seq_in = notes[i:i + seq_length]
seq_out = notes[i + seq_length]
dataX.append([char_to_int[char] for char in seq_in])
dataY.append(char_to_int[seq_out])
n_patterns = len(dataX)
print ("Total Patterns: ", n_patterns)
# reshape X to be [samples, time steps, features]
X = numpy.reshape(dataX, (n_patterns, seq_length, 1))
# normalize
X = X / float(n_vocab)
# one hot encode the output variable
y = np_utils.to_categorical(dataY)
# define the LSTM model
model = Sequential()
model.add(LSTM(50, input_shape=(X.shape[1], X.shape[2]),
return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(46))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')
# define the checkpoint
filepath="weights-improvement-{epoch:02d}-{loss:.4f}-bigger.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1,
save_best_only=True, mode='min')
callbacks_list = [checkpoint]
# fit the model
model.fit(X, y, epochs=1000, batch_size=64, callbacks=callbacks_list)
edit: For some reason, the code works when I compile it in sublime text but not in the jupyter notebook.

pytorch model.cuda() runtime error

I'm building a text classifier using pytorch, and got into some trouble with .cuda() method. I know that .cuda() moves all parameters into gpu so that the training procedure can be faster. However, error occurred in .cuda() method like this:
start_time = time.time()
for model_type in ('lstm',):
hyperparam_combinations = score_util.all_combination(hyperparam_dict[model_type].values())
# for selecting best scoring model
for test_idx, setting in enumerate(hyperparam_combinations):
args = custom_dataset.list_to_args(setting,model_type=model_type)
print(args)
tsv = "test %d\ttrain_loss\ttrain_acc\ttrain_auc\tval_loss\tval_acc\tval_auc\n"%(test_idx) # tsv record
avg_score = [] # cv_mean score
### 4 fold cross validation
for cv_num,(train_iter,val_iter) in enumerate(cv_splits):
### model initiation
model = model_dict[model_type](args)
if args.emb_type is not None: # word embedding init
emb = emb_dict[args.emb_type]
emb = score_util.embedding_init(emb,tr_text_field,args.emb_type)
model.embed.weight.data.copy_(emb)
model.cuda()
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-20-ff6cfce73c10> in <module>()
23 model.embed.weight.data.copy_(emb)
24
---> 25 model.cuda()
26
27 optimizer= torch.optim.Adam(model.parameters(),lr=args.lr)
~\Anaconda3\lib\site-packages\torch\nn\modules\module.py in cuda(self, device_id)
145 copied to that device
146 """
--> 147 return self._apply(lambda t: t.cuda(device_id))
148
149 def cpu(self, device_id=None):
~\Anaconda3\lib\site-packages\torch\nn\modules\module.py in _apply(self, fn)
116 def _apply(self, fn):
117 for module in self.children():
--> 118 module._apply(fn)
119
120 for param in self._parameters.values():
~\Anaconda3\lib\site-packages\torch\nn\modules\module.py in _apply(self, fn)
122 # Variables stored in modules are graph leaves, and we don't
123 # want to create copy nodes, so we have to unpack the data.
--> 124 param.data = fn(param.data)
125 if param._grad is not None:
126 param._grad.data = fn(param._grad.data)
RuntimeError: Variable data has to be a tensor, but got torch.cuda.FloatTensor
These are error traceback and I can't see why this happens.
This code worked very well before I set epoch parameter to 1 to run some tests. I set epoch to 1000 again, but the problem lingers on.
Aren't torch.cuda.FloatTensor object also Tensors? Any help would be much appreciated.
my model looks like this :
class TR_LSTM(nn.Module):
def __init__(self,args,
use_hidden_average=False,
pretrained_emb = None):
super(TR_LSTM,self).__init__()
# arguments
self.emb_dim = args.embed_dim
self.emb_num = args.embed_num
self.num_hidden_unit = args.hidden_state_dim
self.num_lstm_layer = args.num_lstm_layer
self.use_hidden_average = use_hidden_average
self.batch_size = args.batch_size
# layers
self.embed = nn.Embedding(self.emb_num, self.emb_dim)
if pretrained_emb is not None:
self.embed.weight.data.copy_(pretrained_emb)
self.lstm_layer = nn.LSTM(self.emb_dim, self.num_hidden_unit, self.num_lstm_layer, batch_first = True)
self.fc_layer = nn.Sequential(nn.Linear(self.num_hidden_unit,self.num_hidden_unit),
nn.Linear(self.num_hidden_unit,2))
def forward(self,x):
x = self.embed(x) # batch * max_seq_len * emb_dim
h_0,c_0 = self.init_hidden(x.size(0))
x, (_, _) = self.lstm_layer(x, (h_0,c_0)) # batch * seq_len * hidden_unit_num
if not self.use_hidden_average:
x = x[:,x.size(1)-1,:]
x = x.squeeze(1)
else:
x = x.mean(1).squeeze(1)
x = self.fc_layer(x)
return x
def init_hidden(self,batch_size):
h_0, c_0 = torch.zeros(self.num_lstm_layer,batch_size , self.num_hidden_unit),\
torch.zeros(self.num_lstm_layer,batch_size , self.num_hidden_unit)
h_0, c_0 = h_0.cuda(), c_0.cuda()
h_0_param, c_0_param = torch.nn.Parameter(h_0), torch.nn.Parameter(c_0)
return h_0_param, c_0_param
model.cuda() is called inside your training/test loop, which is the problem. As the error message suggests, you repeatedly convert parameters(tensors) in your model to cuda, which is not the right way to convert model into cuda tensor.
model object should be created and cuda-ize outside the loop. Only training/test instances shall be convert to cuda tensor every time you feed your model. I also suggest you read examples code from pytorch document site.

Resources