REINFORCE for Cartpole: Training Unstable - pytorch

I am implementing REINFORCE for Cartpole-V0. However, the training process is very unstable. I have not implemented `early-stopping' for the environment and allow training to continue for a fixed (high) number of episodes. After a few thousand iterations, the training reward seems to go down again. Is this due to overfitting and early-stopping is essential, or have I implemented something incorrectly?
Here is my code:
import gym
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
import os
def running_average(x, n):
N = n
kernel = np.ones(N)
conv_len = x.shape[0]-N
y = np.zeros(conv_len)
for i in range(conv_len):
y[i] = kernel # x[i:i+N] # matrix multiplication operator: np.mul
y[i] /= N
return y
class PolicyNetwork(nn.Module):
def __init__(self, state_dim, n_actions):
super().__init__()
self.n_actions = n_actions
self.model = nn.Sequential(
nn.Linear(state_dim, 64),
nn.ReLU(),
nn.Linear(64, 32),
nn.ReLU(),
nn.Linear(32, n_actions),
nn.Softmax(dim=1)
).float()
def forward(self, X):
return self.model(X)
def train_reinforce_agent(env, episode_length, max_episodes, gamma, visualize_step, learning_rate=0.003):
model = PolicyNetwork(env.observation_space.shape[0], env.action_space.n)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
EPISODE_LENGTH = episode_length
MAX_EPISODES = max_episodes
GAMMA = gamma
VISUALIZE_STEP = max(1, visualize_step)
score = []
for episode in range(MAX_EPISODES):
curr_state = env.reset()
done = False
all_episode_t = []
score_episode = 0
for t in range(EPISODE_LENGTH):
act_prob = model(torch.from_numpy(curr_state).unsqueeze(0).float())
action = np.random.choice(np.array(list(range(env.action_space.n))), p=act_prob.squeeze(0).data.numpy())
prev_state = curr_state
curr_state, reward, done, info = env.step(action)
score_episode += reward
e_t = {'state': prev_state, 'action':action, 'reward': reward, 'returns':0}
all_episode_t.append(e_t)
if done:
break
score.append(score_episode)
G = 0
max_G = 0
for t in range(len(all_episode_t)-1, -1, -1):
G = GAMMA*G + all_episode_t[t]['reward']
all_episode_t[t]['returns'] = G
if G > max_G:
max_G = G
episode_returns = np.array([all_episode_t[t]['returns'] for t in range(len(all_episode_t))])
# normalize the returns
for t in range(len(all_episode_t)):
all_episode_t[t]['returns'] = (all_episode_t[t]['returns'] - np.mean(episode_returns))/(max_G + 10**(-6))
episode_returns = torch.FloatTensor(episode_returns)
state_batch = torch.Tensor(np.array([all_episode_t[t]['state'] for t in range(len(all_episode_t))]))
action_batch = torch.Tensor(np.array([all_episode_t[t]['action'] for t in range(len(all_episode_t))]))
pred_batch = model(state_batch)
prob_batch = pred_batch.gather(dim=1, index=action_batch.long().view(-1, 1)).squeeze()
loss_tensor = torch.log(prob_batch) * episode_returns
loss = -torch.sum(loss_tensor)
optimizer.zero_grad()
loss.backward()
optimizer.step()
if episode % VISUALIZE_STEP == 0 and episode > 0:
print('Episode {}\tAverage Score: {:.2f}'.format(episode, np.mean(score[-VISUALIZE_STEP:-1])))
# # EARLY-STOPPING: if the average score across last 100 episodes is greater than 195, game is solved
# if np.mean(score[-100:-1]) > 195:
# break
# Training plot
score = np.array(score)
avg_score = running_average(score, visualize_step)
plt.figure(figsize=(15, 7))
plt.ylabel("Episodic Reward", fontsize=12)
plt.xlabel("Training Episodes", fontsize=12)
plt.plot(score, color='gray', linewidth=1)
plt.plot(avg_score, color='blue', linewidth=3)
plt.scatter(np.arange(score.shape[0]), score, color='green', linewidth=0.3)
plt.savefig("cartpole_reinforce_training_plot.pdf")
def main():
env = gym.make('CartPole-v0')
episode_length = 300
n_episodes = 5000
gamma = 0.99
vis_steps = 100
learning_rate = 0.003
train_reinforce_agent(env, episode_length, n_episodes, gamma, vis_steps, learning_rate=learning_rate)
if __name__ == "__main__":
main()

Related

how can I predict only 5 days price in this LSTM model (by pytorch)

class StockDataset(Dataset):
#데이터 셋은 i번째 레코드 값을 주는 역할 수행
def __init__(self, symbol, x_frames, y_frames, start, end):
self.symbol = symbol
self.x_frames = x_frames
self.y_frames = y_frames
self.start = datetime.datetime(*start)
self.end = datetime.datetime(*end)
#위에서 지정했던 데이터, 날짜 값들 다 받아옴
self.data = pdr.DataReader(self.symbol, 'yahoo', self.start, self.end)
def __len__(self):
return len(self.data) - (self.x_frames + self.y_frames) + 1
def __getitem__(self, idx):
global data
#global data_set
#데이터셋 i번째 값 입력받았을때 그걸 출력해줌 데이터를 '리스트'화 하는 것
idx += self.x_frames
data = self.data.iloc[idx-self.x_frames:idx+self.y_frames]
data = data[['High', 'Low', 'Open', 'Close', 'Adj Close', 'Volume']]
data = data.apply(lambda x: np.log(x+1) - np.log(x[self.x_frames-1]+1)) #로그수익률 변환 한뒤, 혹시모를 결측값 위해 1더해줌
global x_ex
global y_ex
x_ex= data[:self.x_frames]
y_ex= data[self.x_frames:]
data = data.values #numpy array로 변환한거
X = data[:self.x_frames]
y = data[self.x_frames:]
return X, y
This one is dataset
class LSTM(nn.Module):
#50분 이후부터 모델설명
def __init__(self, input_dim, hidden_dim, output_dim, num_layers, batch_size, dropout, use_bn):
super(LSTM, self).__init__()
self.input_dim = input_dim
self.hidden_dim = hidden_dim
self.output_dim = output_dim
self.num_layers = num_layers
self.batch_size = batch_size
self.dropout = dropout
self.use_bn = use_bn
self.lstm = nn.LSTM(self.input_dim, self.hidden_dim, self.num_layers)
self.hidden = self.init_hidden()
self.regressor = self.make_regressor()
def init_hidden(self):
return (torch.zeros(self.num_layers, self.batch_size, self.hidden_dim),
torch.zeros(self.num_layers, self.batch_size, self.hidden_dim))
def make_regressor(self):
layers = []
if self.use_bn:
layers.append(nn.BatchNorm1d(self.hidden_dim))
layers.append(nn.Dropout(self.dropout))
layers.append(nn.Linear(self.hidden_dim, self.hidden_dim // 2))
layers.append(nn.ReLU())
layers.append(nn.Linear(self.hidden_dim // 2, self.output_dim))
regressor = nn.Sequential(*layers)
return regressor
def forward(self, x):
lstm_out, self.hidden = self.lstm(x, self.hidden)
y_pred = self.regressor(lstm_out[-1].view(self.batch_size, -1))
return y_pred
This one is model
def test(model, partition, args):
global y_true
global y_pred
global X
testloader = DataLoader(partition['test'],
batch_size=args.batch_size,
shuffle=False, drop_last=True)
model.eval()
test_acc = 0.0
with torch.no_grad():
for i, (X, y) in enumerate(testloader):
X = X.transpose(0, 1).float().to(args.device)
y_true = y[:, :, 3].float().to(args.device)
model.hidden = [hidden.to(args.device) for hidden in model.init_hidden()]
y_pred = model(X)
test_acc += metric(y_pred, y_true)[0]
test_acc = test_acc / len(testloader)
return test_acc
This is test data loader.
# ====== Random Seed Initialization ====== #
seed = 666
np.random.seed(seed)
torch.manual_seed(seed)
parser = argparse.ArgumentParser()
args = parser.parse_args("")
args.exp_name = "exp1_lr"
args.device = 'cuda' if torch.cuda.is_available() else 'cpu'
# ====== Data Loading ====== #
args.symbol = '005930.KS' #원하는 종목
args.batch_size = 4 #배치사이즈
args.x_frames = 5 #수정x 이전 n일치 데이터 이게 너무 길면 1주일 예측 불가능
args.y_frames = 5 #수정y 이후 n일치 데이터 이게 너무 길면 1주일 예측 불가능
# ====== Model Capacity ===== #
args.input_dim = 6
args.hid_dim = 50
args.n_layers = 2 # (은닉층의 레이어 갯수) https://justkode.kr/deep-learning/pytorch-rnn링크 참고
# ====== Regularization ======= #
args.l2 = 0.0001
args.dropout = 0.3
args.use_bn = True
# ====== Optimizer & Training ====== #
args.optim = 'RMSprop' #'RMSprop' #SGD, RMSprop, ADAM...
args.lr = 0.001
args.epoch = 1
# ====== Experiment Variable ====== #
name_var1 = 'lr' # lr=러닝레이트
name_var2 = 'n_layers'#뉴럴 네트워크 몇개를 쌓을것인지?
list_var1 = [0.001, 0.0001, 0.00001]
list_var2 = [1,2,3]
#데이터셋 실제 형성
trainset = StockDataset(args.symbol, args.x_frames, args.y_frames, (2012,1,1), (2021,1,1)) #학습기간
valset = StockDataset(args.symbol, args.x_frames, args.y_frames, (2021,1,2), (2021,12,30)) #검증기간 최소 +6 월, +19 일 안하면 float division by zero 에러 발생 왜?? 21년 기준
testset = StockDataset(args.symbol, args.x_frames, args.y_frames, (2022,1,10), (2022,1,14)) #테스트기간 최소 +6 월, + 25일 안하면 float division by zero 에러 발생. 22년기준
#기간이 일정 영업일 이상을 요구하는듯? <<146 영업일 이상 데이터 요구. 그만큼 안주면 오류남 왜??
partition = {'train': trainset, 'val':valset, 'test':testset}
for var1 in list_var1:
for var2 in list_var2:
setattr(args, name_var1, var1)
setattr(args, name_var2, var2)
print(args)
setting, result = experiment(partition, deepcopy(args))
save_exp_result(setting, result)
#꼭 디렉토리에 있는 파일들 지운다음에 그래프 그려야한다. 안그러면 결과값 전부 겹쳐서 나옴
This one is hyper parameter regulate.
I wonder how can I get result when I set testset length in 5days? (like (2022,1,10) (2022,1,14))
This cord didn't work when I set testset length at least 7month (maybe + 146 trade day)
error is float divided by zero. (when I use lower 146 days.)
if I set length +146 days, then codes work well.
I think this code cause error:
data = data.apply(lambda x: np.log(x+1) - np.log(x[self.x_frames-1]+1))
log data was so small, so error occurred. (my opinion)
data is yahoo finance data. Thanx to read
When I # below code, then data got infinite.
data = data.apply(lambda x: np.log(x+1) - np.log(x[self.x_frames-1]+1))

Solving MountainCar-v0 by predicting future velocity? Why does this work?

I solved the MountainCar-v0 OpenAI gym challenge by predicting future velocity based on current action. This system uses the absolute value of velocity as the reward function.
My question is why does this work?
My hypothesis is that this is working similar to q-learning, where we are creating a map/table of all actions and their results, then the action picker just picks the highest velocity. This works since velocity is linear, so picking the highest future velocity will very likely help you reach the next highest velocity.
Here is the code. It should take less than 3 minutes to reach it's goal.
import numpy as np
import gym
from collections import deque, namedtuple
import random
import torch
import torch.nn as nn
import torch.optim as optim
EPSILON_DECAY = 0.99975
MIN_EPSILON = 0.001
env = gym.make("MountainCar-v0")
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.layer1 = nn.Linear(2, 128)
self.relu1 = nn.ReLU()
self.layer2 = nn.Linear(128, 3)
def forward(self, x):
l1 = self.relu1(self.layer1(x))
output = self.layer2(l1)
return output
model = Model()
target = Model()
target.load_state_dict(model.state_dict())
mse = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr = 0.0001)
actionPoint = namedtuple("actionPoint", ["currentState", "action", "reward", "observation", "done"])
actionMap = deque()
epsilon = 1
for epoch in range(100000):
observation, info = env.reset(return_info=True)
currentState = observation
totalReward = 0
maxSpeed = 0
done = False
while not done:
if np.random.random() > epsilon:
action = model(torch.from_numpy(currentState))
action = torch.argmax(action).item()
else:
action = env.action_space.sample()
observation, reward, done, info = env.step(action)
totalReward += reward
maxSpeed = max(maxSpeed, abs(observation[1]))
if done and totalReward != -200:
print("reached goal", totalReward)
actionMap.append(actionPoint(currentState, action, reward, observation, done))
currentState = observation
if len(actionMap) > 128:
samples = random.sample(actionMap, 128)
sampleReward = torch.FloatTensor([abs(s.observation[1]) for s in samples])
sampleCurrentState = torch.from_numpy(np.array([s.currentState for s in samples]))
futurePrediction = target(sampleCurrentState)
for i, s in enumerate(samples):
futurePrediction[i][s.action] = sampleReward[i]
currentPrediction = model(sampleCurrentState)
optimizer.zero_grad()
loss = mse(currentPrediction, futurePrediction.detach())
loss.backward()
optimizer.step()
print(maxSpeed)
target.load_state_dict(model.state_dict())
if epsilon > MIN_EPSILON:
epsilon *= EPSILON_DECAY
epsilon = max(epsilon, MIN_EPSILON)
env.close()

Torch throws a RuntimeError: element 0 of tensors does not require grad... but can't find where computational graph is severed

I am getting the above error:
RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn
I looked this up and it looks like the computational graph is not connected for some reason. However, I cannot find the location where the graph is severed.
My code is a reproduction of the arjovsky WGAN: https://github.com/martinarjovsky/WassersteinGAN
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import torch
import torch.nn as nn
from __future__ import print_function
import random
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim as optim
import torch.utils.data
import torchvision.datasets as dset
import torchvision.transforms as transforms
import torchvision.utils as vutils
from torch.autograd import Variable
import os
import json
class MLP_G(nn.Module):
def __init__(self, isize, nz, ngf, ngpu):
super(MLP_G, self).__init__()
self.ngpu = ngpu
main = nn.Sequential(
# Z goes into a linear of size: ngf
nn.Linear(nz, ngf),
nn.ReLU(True),
nn.Linear(ngf, ngf),
nn.ReLU(True),
nn.Linear(ngf, ngf),
nn.ReLU(True),
nn.Linear(ngf, isize),
)
self.main = main
self.isize = isize
self.nz = nz
def forward(self, input):
input = input.view(input.size(0), input.size(1))
if isinstance(input.data, torch.cuda.FloatTensor) and self.ngpu > 1:
output = nn.parallel.data_parallel(self.main, input, range(self.ngpu))
else:
output = self.main(input)
return output.view(output.size(0), self.isize)
class MLP_D(nn.Module):
def __init__(self, isize, nz, ndf, ngpu):
super(MLP_D, self).__init__()
self.ngpu = ngpu
main = nn.Sequential(
# Z goes into a linear of size: ndf
nn.Linear(isize, ndf),
nn.ReLU(True),
nn.Linear(ndf, ndf),
nn.ReLU(True),
nn.Linear(ndf, ndf),
nn.ReLU(True),
nn.Linear(ndf, 1),
)
self.main = main
self.isize = isize
self.nz = nz
def forward(self, input):
input = input.view(input.size(0),input.size(1))
if isinstance(input.data, torch.cuda.FloatTensor) and self.ngpu > 1:
output = nn.parallel.data_parallel(self.main, input, range(self.ngpu))
else:
output = self.main(input)
output = output.mean(0)
return output.view(1)
netG = None #path to saved generator
netD = None #discriminator path
batchSize = 1000 #size of batch (which is size of data)
cuda = False
lrD = lrG = .00005
beta1 = .5
niter = 25
experiment = '/content/drive/MyDrive/savefolder'
clamp_upper = .01
clamp_lower = -clamp_upper
manualSeed = random.randint(1, 10000) # fix seed
print("Random Seed: ", manualSeed)
random.seed(manualSeed)
torch.manual_seed(manualSeed)
cudnn.benchmark = True
dataset = torch.tensor(np.stack([x,y, instrument], axis = 1)).float().reshape(-1,3)
ngpu = 1
nz = 4 #three latents and the instrument
ngf = 128
ndf = 128
# custom weights initialization called on netG and netD
def weights_init(m):
classname = m.__class__.__name__
if classname.find('Conv') != -1:
m.weight.data.normal_(0.0, 0.02)
elif classname.find('BatchNorm') != -1:
m.weight.data.normal_(1.0, 0.02)
m.bias.data.fill_(0)
netG = MLP_G(2, nz, ngf, ngpu)
netG.apply(weights_init)
print(netG)
netD = MLP_D(3, nz, ndf, ngpu)
print(netD)
input = torch.FloatTensor(batchSize, 2)
noise = torch.FloatTensor(batchSize, nz-1)
fixed_noise = torch.FloatTensor(batchSize, nz-1).normal_(0, 1)
one = torch.FloatTensor([1])
mone = one * -1
# setup optimizer
optimizerD = optim.Adam(netD.parameters(), lr=lrD, betas=(beta1, 0.999))
optimizerG = optim.Adam(netG.parameters(), lr=lrG, betas=(beta1, 0.999))
real_cpu = data = dataset
gen_iterations = 0
for epoch in range(niter):
#data_iter = iter(dataloader)
############################
# (1) Update D network
###########################
for p in netD.parameters(): # reset requires_grad
p.requires_grad = True # they are set to False below in netG update
# train the discriminator Diters times
if gen_iterations < 25 or gen_iterations % 500 == 0:
Diters = 100
else:
Diters = 5
j = 0
while j < Diters:
j += 1
# clamp parameters to a cube
for p in netD.parameters():
p.data.clamp_(clamp_lower, clamp_upper)
# train with real
netD.zero_grad()
if cuda:
real_cpu = real_cpu.cuda()
input.resize_as_(real_cpu).copy_(real_cpu)
inputv = Variable(input, requires_grad=False)
errD_real = netD(inputv)
errD_real.backward(one)#Error Occurs here
# train with fake
noise.resize_(batchSize, nz-1).normal_(0, 1)
noisev = torch.cat([Variable(noise, requires_grad=False), dataset[:,2].reshape(-1,1)], 1)# totally freeze netG
fake = torch.cat([Variable(netG(noisev).data), dataset[:,2].view(-1,1)], 1)
inputv = fake
errD_fake = netD(inputv)
errD_fake.backward(mone)
errD = errD_real - errD_fake
optimizerD.step()
############################
# (2) Update G network
###########################
for p in netD.parameters():
p.requires_grad = False # to avoid computation
netG.zero_grad()
# in case our last batch was the tail batch of the dataloader,
# make sure we feed a full batch of noise
noise.resize_(batchSize, nz-1).normal_(0, 1)
noisev = torch.cat([Variable(noise), dataset[:,2].view(-1,1)], 1)
fake = torch.cat([netG(noisev), dataset[:,2].view(-1,1)], 1)
errG = netD(fake)
errG.backward(one)
optimizerG.step()
gen_iterations += 1
i = 0
print('[%d/%d][%d] Loss_D: %f Loss_G: %f Loss_D_real: %f Loss_D_fake %f'
% (epoch, niter, gen_iterations,
errD.data[0], errG.data[0], errD_real.data[0], errD_fake.data[0]))
# if gen_iterations % 500 == 0:
# real_cpu = real_cpu.mul(0.5).add(0.5)
# vutils.save_image(real_cpu, '{0}/real_samples.png'.format(opt.experiment))
# fake = netG(Variable(fixed_noise, volatile=True))
# fake.data = fake.data.mul(0.5).add(0.5)
# vutils.save_image(fake.data, '{0}/fake_samples_{1}.png'.format(opt.experiment, gen_iterations))
# do checkpointing
torch.save(netG.state_dict(), '{0}/netG_epoch_{1}.pth'.format(experiment, epoch))
torch.save(netD.state_dict(), '{0}/netD_epoch_{1}.pth'.format(experiment, epoch))
Error occurs on the line: errD_real.backward(one). The error might be something regarding zeroing out the computational graph as the code runs for one iteration then throws an error. Thanks for your help.
You most certainly need to add require_grad=True on one. You could define it as:
one = torch.tensor([1], dtype=torch.float16, requires_grad=True)

Pytorch on Windows : Dataloader problems with numworkers

I have just got a new computer running Windows 10 which has a GPU so I wanted to see if I could sensibly use it for machine learning.
So I tried running an old model which I previously trained on Google Colab.
The answer is that it does do quite well, but I discovered that I could not use more than one worker in the Dataloader. Googling found that this is a known issue with PyTorch on Windows in Jupyter Notebooks so I tried running it in a normal Python program. I found that it did work but that the creation of the DataIterator took a very long time. Below are the times in seconds for 1, 2 and 6 workers, each done twice:
I note that 2 workers seems to be the fastest and there seems to be quite a lot of variation which surprised me as the machine was doing nothing else.
So the first question is:
Is there a way to let PyTorch choose the most efficient number of workers to use?
The second question is:
If I install a version of Linux will I be able to use Jupyter Notebooks with multiple workers which is what I would prefer to do in an ideal world.
The code I ran is below the relevant part is after if __name__ == "__main__":
# -*- coding: utf-8 -*-
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset,DataLoader
import os
import numpy as np
#import gym
import pickle
import matplotlib.pyplot as plt
import time
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, in_planes, planes, stride=1):
super(BasicBlock, self).__init__()
self.conv1 = nn.Conv2d(
in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
stride=1, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.shortcut = nn.Sequential()
if stride != 1 or in_planes != self.expansion*planes:
self.shortcut = nn.Sequential(
nn.Conv2d(in_planes, self.expansion*planes,
kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(self.expansion*planes)
)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.bn2(self.conv2(out))
out += self.shortcut(x)
out = F.relu(out)
return out
# create dataset
class C4Dataset(Dataset):
'''
the data for the first 12moves is held in a pickles list
as (key,val)
The key has to be converted to the pos amd mask which can then be converted to the ones,twos and zeros
Val is the value for the player playing so needs to be changed for wnen moves i odd to minus val
'''
fileName = r'C:\Users\alan\Desktop\Python\Python36\connect4\Layers\ListAllKeyVal19'
bottom_mask = 4432676798593
board_mask = bottom_mask * ((1 << 6) - 1)
bNos = 2**np.arange(49, dtype = np.uint64)
def getData(fileName):
with open(fileName,'rb') as inFile:
dict = pickle.load(inFile)
return dict
def oneHot(x):
return np.eye(37,dtype = np.float32)[x]
def getNoMoves(ones,twos) :
return np.sum(ones+twos)
def getPosMask(key):
binary = ('{:049b}'.format(key))[::-1]
arr = np.frombuffer(binary.encode(encoding='utf-8', errors='strict'),'u1') - ord('0')
outArr = np.reshape(arr,(7,7),order = 'F')
arr = np.flipud(outArr)
pos = arr.copy()
mask =arr.copy()
for col in range(7):
res = np.where(arr[:,col]==1)
topPos = res[0][0]
pos[topPos,col] = 0
mask[topPos,col] = 0
if topPos<6:
mask[topPos+1:,col] = 1
msk = np.flipud(mask)
msk = np.reshape(msk,(49),order = 'F')
maskNo = np.array(msk.dot(C4Dataset.bNos),dtype = np.uint64).item()
return pos.astype('float32'),(pos ^ mask).astype('float32'),(np.logical_not(mask)).astype('float32'),maskNo
def possible(mask) :
poss = (mask + C4Dataset.bottom_mask) & C4Dataset.board_mask
binary = ('{:049b}'.format(poss))[::-1]
arr = np.frombuffer(binary.encode(encoding='utf-8', errors='strict'),'u1') - ord('0')
outArr = np.reshape(arr,(7,7),order = 'F')
arr = np.flipud(outArr)
return arr
def __init__(self):
self.lst = C4Dataset.getData(C4Dataset.fileName)
def __len__(self):
return len(self.lst)
def __getitem__(self, idx):
key,val = self.lst[idx]
val = int(val)
ones,twos,zeros,mask = C4Dataset.getPosMask(key)
arr = np.zeros((5,7,7),dtype = np.float32)
arr[0,:6,:7] = ones[1:,:]
arr[1,:6,:7] = twos[1:,:]
arr[2,:6,:7] = zeros[1:,:]
moves = int(C4Dataset.getNoMoves(ones,twos))
p = (moves % 2) + 3
arr[p,:6,:7] = C4Dataset.possible(mask)[1:,:]
return arr,val+18 #C4Dataset.oneHot(val+18)
class C4Net(nn.Module):
def __init__(self, inFilters,outFilters):
super(C4Net, self).__init__()
self.conv1 = nn.Conv2d(inFilters, 32, kernel_size=3,
stride=1, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(32)
self.layer1 = BasicBlock(32,32)
self.layer2 = BasicBlock(32,32)
self.layer3 = BasicBlock(32,32)
self.layer4 = BasicBlock(32,32)
self.layer5 = BasicBlock(32,32)
self.layer6 = BasicBlock(32,32)
self.layer7 = BasicBlock(32,32)
self.layer8 = BasicBlock(32,32)
self.linear = nn.Linear(32*7*7,outFilters)#1568
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.layer1(out)
out = self.layer2(out)
out = self.layer3(out)
out = self.layer4(out)
out = self.layer5(out)
out = self.layer6(out)
out = self.layer7(out)
out = self.layer8(out)
#out = F.avg_pool2d(out, 2)
out = out.view(out.size(0), -1)
out = self.linear(out)
return out
# show some images
def show(img):
npimg = img.numpy()[:3,:,:]
plt.imshow(np.transpose(npimg, (1,2,0)), interpolation='nearest')
# get some random training images
if __name__ == "__main__":
dirName =r'C:\Users\alan\Desktop\Python\Python36\connect4\Layers'
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# Assuming that we are on a CUDA machine, this should print a CUDA device:
print(device)
# create dataloader
max_epochs = 1
batchSize = 1024#512#256
learningRate = .00003
# Parameters
params = {'batch_size': batchSize,'shuffle': True,'num_workers': 2}
# Generators
dataset = C4Dataset()
start = time.time()
dataloader = DataLoader(dataset, **params)
middle = time.time()
print('create dataloader',middle-start)
dataiter = iter(dataloader)
end = time.time()
print('create data iterator',end-middle)
images, labels = dataiter.next()
final = time.time()
print('get one batch',final-end)
# show images
show(torchvision.utils.make_grid(images[:16]))
#create the weights
wts =np.array([59, 963, 12406, 148920, 62551, 47281, 55136, 54312, 44465, 31688,
27912, 37907, 114778, 242800, 394530, 495237, 582174, 163370, 480850,
201152, 690905, 633937, 721340, 372479, 193375, 84648, 76576, 91087, 130428,
154184, 157339, 156453, 227696, 1705325, 548155, 44315, 2082],dtype = np.float32)
maxwt = wts.max()
weights = wts/maxwt
weights = torch.from_numpy(weights)
weights.to(device)
# create the network
net = C4Net(5,37)
net.to(device)
PATH = r'C:\Users\alan\Desktop\Python\connectX\c4Net37Weights00003.pth'
net.load_state_dict(torch.load(PATH,map_location=torch.device(device)))
#create the loss function and optimiser
criterion = nn.CrossEntropyLoss(weight = weights.to(device) )
optimizer = optim.Adam(net.parameters(), lr=learningRate)
#train the network
import time
start = time.time()
for epoch in range(max_epochs): # loop over the dataset multiple times
running_loss = 0.0
for i, data in enumerate(dataloader, 0):
# get the inputs; data is a list of [inputs, labels]
inputs, labels = inputs, labels = data[0].to(device), data[1].to(device)
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
if i % 2000 == 1999: # print every 2000 mini-batches
print('[%d, %5d] loss: %.3f' %
(epoch + 1, i + 1, running_loss / 2000))
running_loss = 0.0
torch.save(net.state_dict(),r'C:\Users\alan\Desktop\Python\connectX\tempWeights')
print('Finished Training')
# save the weights
PATH = r'C:\Users\alan\Desktop\Python\connectX\c4Net37Weights00004.pth'
torch.save(net.state_dict(), PATH)
end = time.time()
print('elapsed time',end-start)
PS the machine is a Dell XPS 17 with Intel Core i9-10885H with 8 cores and the GPU is a NVIDIA GeForce RTX 2060 with Max-Q. In this one test it runs 4 times faster than on Google Colab but I do not know what GPU I was allocated.

Tensorflow : ValueError: Can't load save_path when it is None

import os
import tarfile
from six.moves import urllib
URL = 'http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz'
PATH = 'aclImdb'
def fetch_data(url = URL, path = PATH):
if not os.path.isdir(path):
os.makedirs(path)
file_path = os.path.join(oath, "aclImdb_v1.tar.gz")
urllib.request.urlretrieve(url, file_path)
file_gz = tarfile.open(file_path)
file_gz.extractall(path = path)
file_gz.close()
import pyprind # for progress visualisation
import pandas as pd
PATH = 'aclImdb'
labels = {'pos': 1, 'neg': 0} # int class labels for 'positive' and 'negative'
pbar = pyprind.ProgBar(50000) # initialise a progress bar with 50k iterations = no. of docs
df = pd.DataFrame()
# use nested for loops to iterate over 'train' & 'test' subdir
for s in ('test', 'train'):
for l in ('pos', 'neg'): # and read text files from 'pos' and 'neg' subdir
path = os.path.join(PATH, s, l)
for file in os.listdir(path):
# append to the df pandas DataFrame with an int class (post = 1, neg = 0)
with open(os.path.join(path, file), 'r', encoding = 'utf-8') as infile:
txt = infile.read()
df = df.append([[txt, labels[l]]], ignore_index = True)
pbar.update()
df.columns = ['review', 'sentiment']
import numpy as np
np. random.seed(0)
df = df.reindex(np.random.permutation(df.index))
df.to_csv('movie_data.csv', index = False, encoding = 'utf-8')
n_words = max(list(word_to_int.values())) + 1
df = pd.read_csv('movie_data.csv', encoding = 'utf-8')
df.head(3)
# Separate words and count each word's occurence
import pyprind # for progress visualisation
from collections import Counter
from string import punctuation
import re
counts = Counter() # collects the counts of occurence of each unique word
pbar = pyprind.ProgBar(len(df['review']),
title = 'Counting word occurences...') # progress bar
for i, review in enumerate(df['review']):
text = ''.join([c if c not in punctuation else ' '+c+' '
for c in review]).lower()
df.loc[i, 'review'] = text
pbar.update()
counts.update(text.split())
# Mapping each unique word to an int
word_counts = sorted(counts, key = counts.get, reverse = True)
print(word_counts[:5])
word_to_int = {word: ii for ii, word in enumerate(word_counts, 1)}
mapped_reviews = []
pbar = pyprind.ProgBar(len(df['review']),
title = 'Map movie reviews to integers...')
# Left-pad with zeros if the sequence length < 200
# Use 200 elements if the length > 200
sequence_length = 200
sequences = np.zeros((len(mapped_reviews), sequence_length), dtype = int)
for i, row in enumerate(mapped_reviews):
review_arr = np.array(row)
sequences[i, -len(row):] = review_arr[-sequence_length:]
# Split the dataset into training and test sets
X_train = sequences[:25000, :]
y_train = df.loc[:25000, 'sentiment'].values
X_test = sequences[25000:, :]
y_test = df.loc[25000:, 'sentiment'].values
# Define the mini-batches generator
np.random.seed(123)
def batch_gen(x, y = None, batch_size = 64):
n_batches = len(x) // batch_size
x = x[:n_batches * batch_size]
if y is not None:
y = y[:n_batches * batch_size]
for ii in range(0, len(x), batch_size):
if y is not None:
yield x[ii : ii + batch_size], y[ii : ii + batch_size]
else:
yield x[ii : ii + batch_size]
import tensorflow as tf
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' ## suppress the 3.5 warning if using TF 1.4
class SentimentRNN(object):
# Define __init__
def __init__(self,
n_words,
seq_len = 200,
lstm_size = 256,
num_layers = 1,
batch_size = 64,
learning_rate = 0.0001,
embed_size = 200):
self.n_words = n_words
self.seq_len = seq_len
self.lstm_size = lstm_size # no. of hidden units
self.num_layers = num_layers
self.batch_size = batch_size
self.learning_rate = learning_rate
self.embed_size = embed_size
self.g = tf.Graph()
with self.g.as_default():
tf.set_random_seed(123)
self.build()
self.saver = tf.train.Saver()
self.init_op = tf.global_variables_initializer()
# Define the build method
def build(self):
# Define the placeholders
tf_x = tf.placeholder(tf.int32,
shape = (self.batch_size, self.seq_len),
name = 'tf_x')
tf_y = tf.placeholder(tf.float32,
shape = (self.batch_size),
name = 'tf_y')
tf_keepprob = tf.placeholder(tf.float32,
name = 'tf_keepprob')
# Create the embedding layer
embedding = tf.Variable(
tf.random_uniform(
shape = (self.n_words, self.embed_size),
minval = -1,
maxval = 1),
name = 'embedding')
embed_x = tf.nn.embedding_lookup(embedding,
tf_x,
name = 'embed_x')
# Define LSTM cells and stack them
cells = tf.contrib.rnn.MultiRNNCell(
[tf.contrib.rnn.DropoutWrapper(
tf.contrib.rnn.BasicLSTMCell(num_units = self.lstm_size),
output_keep_prob = tf_keepprob)
for i in range(self.num_layers)])
# Define the initial state:
self.initial_state = cells.zero_state(
self.batch_size, tf.float32)
print(' << initial state >> ', self.initial_state)
# Put together components with tf.nn.dynamic_rnn
lstm_outputs, self.final_state = tf.nn.dynamic_rnn(
cell = cells,
inputs = embed_x,
initial_state = self.initial_state)
## lstm_outputs shape: [batch_size, max_time, cells.output_size]
print('\n << lstm_output >> ', lstm_outputs)
print('\n << final state >> ', self.final_state)
# Apply a full-connected layer on the RNN output
logits = tf.layers.dense(
inputs = lstm_outputs[:, -1],
units = 1, # dimensionality of the output space
activation = None,
name = 'logits')
# Remove dimensions of size 1 from the tensor shape
logits = tf.squeeze(input = logits,
name = 'logits_squeezed')
print ('\n << logits >> ', logits)
# If you want prob's
y_proba = tf.nn.sigmoid(logits, name = 'probabilities')
predictions = {'probabilities' : y_proba,
'labels' : tf.cast(tf.round(y_proba),
tf.int32,
name = 'labels')}
print('\n << predictions >> ', predictions)
# Define the cost function
cost = tf.reduce_mean(
tf.nn.sigmoid_cross_entropy_with_logits(
labels = tf_y,
logits = logits),
name = 'cost')
# Define the optimiser
optimizer = tf.train.AdamOptimizer(self.learning_rate)
train_op = optimizer.minimize(cost, name = 'train_op')
# Define the train method
def train(self, X_train, y_train, num_epochs):
with tf.Session(graph = self.g) as sess:
sess.run(self.init_op)
iteration = 1
for epoch in range(num_epochs):
state = sess.run(self.initial_state)
for batch_x, batch_y in batch_gen(
X_train,
y_train,
batch_size = self.batch_size):
feed = {'tf_x:0' : batch_x,
'tf_y:0' : batch_y,
'tf_keepprob:0' : 0.5,
self.initial_state : state}
loss, _, state = sess.run(
['cost:0',
'train_op',
self.final_state],
feed_dict=feed)
if iteration % 20 == 0:
print("Epoch: %d/%d Iteration: %d "
"| Train loss: %.5f" % (
epoch + 1,
num_epochs,
iteration,
loss))
iteration += 1
if (epoch + 1) % 10 == 0:
self.saver.save(
sess,
"model/sentiment-%d.ckpt" % epoch)
# Define the predict method
def predict(self, X_data, return_proba=False):
preds = []
with tf.Session(graph = self.g) as sess:
self.saver.restore(
sess,
tf.train.latest_checkpoint('model/'))
test_state = sess.run(self.initial_state)
for ii, batch_x in enumerate(batch_gen(
x = X_data,
y = None,
batch_size = self.batch_size), 1):
feed = {'tf_x:0' : batch_x,
'tf_keepprob:0' : 1.0,
self.initial_state : test_state}
if return_proba:
pred, test_state = sess.run(
['probabilities:0', self.final_state],
feed_dict=feed)
else:
pred, test_state = sess.run(
['labels:0', self.final_state],
feed_dict=feed)
preds.append(pred)
return np.concatenate(preds)
for review in df['review']:
mapped_reviews.append([word_to_int[word] for word in review.split()])
pbar.update()
rnn = SentimentRNN(n_words = n_words,
seq_len = sequence_length,
embed_size = 256,
lstm_size = 128,
num_layers = 1,
batch_size = 100,
learning_rate = 0.001)
preds = rnn.predict(X_test)
y_true = y_test\[:len(preds)\]
print('Test accuracy... %.3f' % (np.sum(preds == y_true) / len(y_true)))][1]
Create an object of the SentimentRNN class with the following parameters:
n_words = n_words, seq_len = sequence_length, embed_size = 256, lstm_size = 128, num_layers = 1, batch_size = 100, learning_rate = 0.001.
Since we have a relatively small dataset, the number of layers = 1 may generalise better
enter image description here
ValueError Traceback (most recent call last)
<ipython-input-23-a3cfe03a9a49> in <module>()
----> 1 preds = rnn.predict(X_test)
2 y_true = y_test[:len(preds)]
3 print('Test accuracy... %.3f' % (np.sum(preds == y_true) / len(y_true)))
<ipython-input-12-d83ee67c43b6> in predict(self, X_data, return_proba)
173 self.saver.restore(
174 sess,
--> 175 tf.train.latest_checkpoint('model/'))
176 test_state = sess.run(self.initial_state)
177
/usr/local/anaconda/lib/python3.6/site-packages/tensorflow/python/training/saver.py in restore(self, sess, save_path)
1680 return
1681 if save_path is None:
-> 1682 raise ValueError("Can't load save_path when it is None.")
1683 logging.info("Restoring parameters from %s", save_path)
1684 if context.in_graph_mode():
ValueError: Can't load save_path when it is None.
The error just means tf.train.latest_checkpoint didn't find anything. It returns None, then the Saver complains because it was passed None. So there's no checkpoint in that directory.

Resources