Tensorflow 2 ValueError: Shapes (20, 1) and (20, 2) are incompatible in gym environment - python-3.x

Just for learning I wanted to test this code. But there is a problem in it. I do not understand the problem. It says: ValueError: Shapes (20, 1) and (20, 2) are incompatiblefrom the line loss = network.train_on_batch(states, discounted_rewards)
Maybe there is something new in Tensorflow that was not there, the it was implemented.
The code from the website: https://adventuresinmachinelearning.com/policy-gradient-tensorflow-2/
import gym
import tensorflow as tf
from tensorflow import keras
import numpy as np
import datetime as dt
#STORE_PATH = '/Users/andrewthomas/Adventures in ML/TensorFlowBook/TensorBoard/PolicyGradientCartPole'
GAMMA = 0.95
env = gym.make("CartPole-v0")
state_size = 4
num_actions = env.action_space.n
tf.keras.backend.set_floatx('float64')
network = keras.Sequential([
keras.layers.Dense(30, activation='relu', kernel_initializer=keras.initializers.he_normal()),
keras.layers.Dense(30, activation='relu', kernel_initializer=keras.initializers.he_normal()),
keras.layers.Dense(num_actions, activation='softmax')
])
network.compile(loss='categorical_crossentropy',optimizer=keras.optimizers.Adam())
def get_action(network, state, num_actions):
print(state.reshape((1, -1)))
softmax_out = network(state.reshape((1, -1)))
print(softmax_out)
selected_action = np.random.choice(num_actions, p=softmax_out.numpy()[0])
return selected_action
def update_network(network, rewards, states, actions, num_actions):
reward_sum = 0
discounted_rewards = []
for reward in rewards[::-1]: # reverse buffer r
reward_sum = reward + GAMMA * reward_sum
discounted_rewards.append(reward_sum)
discounted_rewards.reverse()
discounted_rewards = np.array(discounted_rewards)
# standardise the rewards
discounted_rewards -= np.mean(discounted_rewards)
discounted_rewards /= np.std(discounted_rewards)
states = np.vstack(states)
print("States", states.shape)
print(states)
print("Rewards", discounted_rewards.shape)
print(discounted_rewards)
loss = network.train_on_batch(states, discounted_rewards)
return loss
num_episodes = 10000000
#train_writer = tf.summary.create_file_writer(STORE_PATH + f"/PGCartPole_{dt.datetime.now().strftime('%d%m%Y%H%M')}")
for episode in range(num_episodes):
state = env.reset()
rewards = []
states = []
actions = []
while True:
action = get_action(network, state, num_actions)
new_state, reward, done, _ = env.step(action)
states.append(state)
rewards.append(reward)
actions.append(action)
if done:
loss = update_network(network, rewards, states, actions, num_actions)
tot_reward = sum(rewards)
print(f"Episode: {episode}, Reward: {tot_reward}, avg loss: {loss:.5f}")
with train_writer.as_default():
tf.summary.scalar('reward', tot_reward, step=episode)
tf.summary.scalar('avg loss', loss, step=episode)
break
state = new_state

Related

ValueError: It seems that you are using the Keras 2 and you are passing both `kernel_size` and `strides` as integer positional arguments

I'm a computer science undergraduate student in the 4th semester, and I'm learning about Machine Learning in this lockdown.
from __future__ import print_function
from keras import backend as K
K.common.set_image_dim_ordering('th') # ensure our dimension notation matches
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.layers import Reshape
from keras.layers.core import Activation
from keras.layers.normalization import BatchNormalization
from keras.layers.convolutional import UpSampling2D
from keras.layers.convolutional import Convolution2D, AveragePooling2D
from keras.layers.core import Flatten
from keras.optimizers import SGD, Adam
from keras.datasets import mnist
from keras import utils
import numpy as np
from PIL import Image, ImageOps
import argparse
import math
import os
import os.path
import glob
def generator_model():
model = Sequential()
model.add(Dense(input_dim=100, output_dim=1024))
model.add(Activation('tanh'))
model.add(Dense(128*8*8))
model.add(BatchNormalization())
model.add(Activation('tanh'))
model.add(Reshape((128, 8, 8), input_shape=(128*8*8,)))
model.add(UpSampling2D(size=(4, 4)))
model.add(Convolution2D(64, 5, 5, border_mode='same'))
model.add(Activation('tanh'))
model.add(UpSampling2D(size=(4, 4)))
model.add(Convolution2D(1, 5, 5, border_mode='same'))
model.add(Activation('tanh'))
return model
def discriminator_model():
model = Sequential()
model.add(Convolution2D(64, 5, 5, border_mode='same', input_shape=(1, 128, 128)))
model.add(Activation('tanh'))
model.add(AveragePooling2D(pool_size=(4, 4)))
model.add(Convolution2D(128, 5, 5))
model.add(Activation('tanh'))
model.add(AveragePooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(256))
model.add(Activation('tanh'))
model.add(Dense(1))
model.add(Activation('sigmoid'))
return model
def generator_containing_discriminator(generator, discriminator):
model = Sequential()
model.add(generator)
discriminator.trainable = False
model.add(discriminator)
return model
def combine_images(generated_images):
num = generated_images.shape[0]
width = int(math.sqrt(num))
height = int(math.ceil(float(num)/width))
shape = generated_images.shape[2:]
image = np.zeros((height*shape[0], width*shape[1]),
dtype=generated_images.dtype)
for index, img in enumerate(generated_images):
i = int(index/width)
j = index % width
image[i*shape[0]:(i+1)*shape[0], j*shape[1]:(j+1)*shape[1]] = \
img[0, :, :]
return image
model = generator_model()
print(model.summary())
def load_data(pixels=128, verbose=False):
print("Loading data")
X_train = []
paths = glob.glob(os.path.normpath(os.getcwd() + '/logos/*.jpg'))
for path in paths:
if verbose: print(path)
im = Image.open(path)
im = ImageOps.fit(im, (pixels, pixels), Image.ANTIALIAS)
im = ImageOps.grayscale(im)
#im.show()
im = np.asarray(im)
X_train.append(im)
print("Finished loading data")
return np.array(X_train)
def train(epochs, BATCH_SIZE, weights=False):
"""
:param epochs: Train for this many epochs
:param BATCH_SIZE: Size of minibatch
:param weights: If True, load weights from file, otherwise train the model from scratch.
Use this if you have already saved state of the network and want to train it further.
"""
X_train = load_data()
X_train = (X_train.astype(np.float32) - 127.5)/127.5
X_train = X_train.reshape((X_train.shape[0], 1) + X_train.shape[1:])
discriminator = discriminator_model()
generator = generator_model()
if weights:
generator.load_weights('goodgenerator.h5')
discriminator.load_weights('gooddiscriminator.h5')
discriminator_on_generator = \
generator_containing_discriminator(generator, discriminator)
d_optim = SGD(lr=0.0005, momentum=0.9, nesterov=True)
g_optim = SGD(lr=0.0005, momentum=0.9, nesterov=True)
generator.compile(loss='binary_crossentropy', optimizer="SGD")
discriminator_on_generator.compile(
loss='binary_crossentropy', optimizer=g_optim)
discriminator.trainable = True
discriminator.compile(loss='binary_crossentropy', optimizer=d_optim)
noise = np.zeros((BATCH_SIZE, 100))
for epoch in range(epochs):
print("Epoch is", epoch)
print("Number of batches", int(X_train.shape[0]/BATCH_SIZE))
for index in range(int(X_train.shape[0]/BATCH_SIZE)):
for i in range(BATCH_SIZE):
noise[i, :] = np.random.uniform(-1, 1, 100)
image_batch = X_train[index*BATCH_SIZE:(index+1)*BATCH_SIZE]
generated_images = generator.predict(noise, verbose=0)
#print(generated_images.shape)
if index % 20 == 0 and epoch % 10 == 0:
image = combine_images(generated_images)
image = image*127.5+127.5
destpath = os.path.normpath(os.getcwd()+ "/logo-generated-images/"+str(epoch)+"_"+str(index)+".png")
Image.fromarray(image.astype(np.uint8)).save(destpath)
X = np.concatenate((image_batch, generated_images))
y = [1] * BATCH_SIZE + [0] * BATCH_SIZE
d_loss = discriminator.train_on_batch(X, y)
print("batch %d d_loss : %f" % (index, d_loss))
for i in range(BATCH_SIZE):
noise[i, :] = np.random.uniform(-1, 1, 100)
discriminator.trainable = False
g_loss = discriminator_on_generator.train_on_batch(
noise, [1] * BATCH_SIZE)
discriminator.trainable = True
print("batch %d g_loss : %f" % (index, g_loss))
if epoch % 10 == 9:
generator.save_weights('goodgenerator.h5', True)
discriminator.save_weights('gooddiscriminator.h5', True)
def clean(image):
for i in range(1, image.shape[0] - 1):
for j in range(1, image.shape[1] - 1):
if image[i][j] + image[i+1][j] + image[i][j+1] + image[i-1][j] + image[i][j-1] > 127 * 5:
image[i][j] = 255
return image
def generate(BATCH_SIZE):
generator = generator_model()
generator.compile(loss='binary_crossentropy', optimizer="SGD")
generator.load_weights('goodgenerator.h5')
noise = np.zeros((BATCH_SIZE, 100))
a = np.random.uniform(-1, 1, 100)
b = np.random.uniform(-1, 1, 100)
grad = (b - a) / BATCH_SIZE
for i in range(BATCH_SIZE):
noise[i, :] = np.random.uniform(-1, 1, 100)
generated_images = generator.predict(noise, verbose=1)
#image = combine_images(generated_images)
print(generated_images.shape)
for image in generated_images:
image = image[0]
image = image*127.5+127.5
Image.fromarray(image.astype(np.uint8)).save("dirty.png")
Image.fromarray(image.astype(np.uint8)).show()
clean(image)
image = Image.fromarray(image.astype(np.uint8))
image.show()
image.save("clean.png")
def get_args():
parser = argparse.ArgumentParser()
parser.add_argument("--mode", type=str)
parser.add_argument("--batch_size", type=int, default=128)
parser.add_argument("--nice", dest="nice", action="store_true")
parser.set_defaults(nice=False)
args = parser.parse_args()
return args
train(400, 10, False)
generate(1)
I was trying this GAN code from this GitHub repository, for learning about Generative Adversarial Networks, but the below error occurred. Can you please tell me where the definitions are provided in the code? Please help me!
The Troublesome Line:-
train(400, 10, False)
This is the error:-
ValueError: It seems that you are using the Keras 2 and you are passing both `kernel_size` and `strides` as integer positional arguments. For safety reasons, this is disallowed. Pass `strides` as a keyword argument instead.
The error arises from every addition of a Conv2D layer in your model. You need to change the line in your code
model.add(Convolution2D(64, 5, 5, border_mode='same'))
to something like (depending on what exactly you want)
model.add(Conv2D(64,kernel_size=5,strides=2,padding='same'))
Notice that I have explicity named the argument strides here because the error says I should pass it as a keyword argument.

Pytorch NN Training issue: Loss of NN does not decrase

I want to classify random Instagram images as "image has a dog" or "image has not a dog".
To train my NN to classify dogs I want to use the Stanford Dogs Dataset, so I have about 20.000 training images of different dogs with different breeds.
But while training my NN the loss does not decrease, I checked that with different learning rates and with or without dropout layers.
Can anyone give tips or does anyone see bugs in the following code?:
import torch
import torchvision
from torchvision import transforms
from PIL import Image
from os import listdir
import os
import random
import torch.optim as optim
from torch.autograd import Variable
import torch.nn.functional as F
import torch.nn as nn
TRAINDATAPATH = 'C:/Users/.../Desktop/train/'
TESTDATAPATH = 'C:/Users/.../Desktop/#apfel/'
"""normalize = transforms.Normalize(
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]
)"""
normalize = transforms.Normalize(
mean=[0.5, 0.5, 0.5],
std=[0.5, 0.5, 0.5]
)
transforms = transforms.Compose([transforms.Resize(256),
transforms.CenterCrop(256),
transforms.ToTensor(),
normalize])
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
train_data_list = []
target_list = []
train_data = []
batch_size = 1
files = listdir(TRAINDATAPATH)
for i in range(len(listdir(TRAINDATAPATH))):
try:
f = random.choice(files)
files.remove(f)
img = Image.open(TRAINDATAPATH + f)
img_tensor = transforms(img) # (3,256,256)
train_data_list.append(img_tensor)
isObj = 1 if 'obj' in f else 0
isNotObj = 0 if 'obj' in f else 1
target = [isObj, isNotObj]
target_list.append(target)
if len(train_data_list) >= 1:
train_data.append((torch.stack(train_data_list), target_list))
train_data_list = []
target_list = []
print('Loaded batch ', int(len(train_data)/batch_size), 'of ', int(len(listdir(TRAINDATAPATH))/batch_size))
print('Percentage Done: ', 100*int(len(train_data)/batch_size)/int(len(listdir(TRAINDATAPATH))/batch_size), '%')
except Exception:
print("Error occured but ignored")
print(str(Exception))
continue
class Netz(nn.Module):
def __init__(self):
super(Netz, self).__init__()
self.conv1 = nn.Conv2d(3, 6, kernel_size=5)
self.conv2 = nn.Conv2d(6, 12, kernel_size=5)
self.conv3 = nn.Conv2d(12, 18, kernel_size=5)
self.conv4 = nn.Conv2d(18, 24, kernel_size=5)
self.fc1 = nn.Linear(3456, 1000)
self.fc2 = nn.Linear(1000, 2)
def forward(self, x):
x = self.conv1(x)
x = F.max_pool2d(x,2)
x = F.relu(x)
x = self.conv2(x)
x = F.max_pool2d(x,2)
x = F.relu(x)
x = self.conv3(x)
x = F.max_pool2d(x,2)
x = F.relu(x)
x = self.conv4(x)
x = F.max_pool2d(x,2)
x = F.relu(x)
x = x.view(-1,3456)
x = F.relu(self.fc1(x))
x = self.fc2(x)
return torch.sigmoid(x)
model = Netz()
model.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
optimizer = optim.Adadelta(model.parameters(), lr=10)
def train(epoch):
global model
model.train()
batch_idx = 0
for data, target in train_data:
batch_idx += 1
data = data.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
target = torch.Tensor(target).to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
data = Variable(data)
target = Variable(target)
optimizer.zero_grad()
output = model(data)
criterion = F.binary_cross_entropy
loss = criterion(output, target)
loss.backward()
optimizer.step()
print('Train Epoch: '+ str(epoch) + '\tLoss: ' + str(loss.data.item()) )
def test():
global model
model.eval()
files = listdir(TESTDATAPATH)
f = random.choice(files)
img = Image.open(TESTDATAPATH + f)
img_eval_tensor = transforms(img)
img_eval_tensor.unsqueeze_(0)
data = Variable(img_eval_tensor.to(torch.device("cuda" if torch.cuda.is_available() else "cpu")) )
out = model(data)
string_prediction = str(out.data.max(0, keepdim=True)[1])
print(string_prediction[9:10])
for epoch in range(1,4):
train(epoch)
i = 100
while i > 0:
test()
i -= 1
In the TRAINDATAPATH are thousands of Dog images with the filename "obj_XXX.jpg" and some other images WITHOUT dogs with other filenames don't including "obj".
In the TESTDATAPATH are just random images, some with dogs, some without.
The NN classifies them all as "not including dogs" or "0" which is incorrect.
Thanks for every help!
You are doing a binary classification but you are using two classes:
isObj = 1 if 'obj' in f else 0
isNotObj = 0 if 'obj' in f else 1
target = [isObj, isNotObj]
In the binary case, it should be a single class, where 1 means it is a dog, and 0 means it is not. You are already doing it, but twice. You can remove the isNotObj entirely and only keep the isObj.
You need to adapt the model accordingly, such that it only predicts the isObj, therefore fc2 should only have 1 class as output:
self.fc2 = nn.Linear(1000, 1)
In the testing phase you need to make a prediction based on a single class, which can be seen as the probability of being a dog. Then you set a threshold for which you consider the model to be confident enough that it's actually a dog. To make it balanced, the threshold is 0.5, so everything above that is a dog and everything below it is not. This can easily be achieved with torch.round:
# Size: [batch_size, 1]
out = model(data)
predictions = torch.round(out)
# Get rid of the singular dimension
# To get size: [batch_size]
predictions = predictions.squeeze(1)
Besides that, the learning rate of 10 is astronomically high, and a learning rate greater than 1 makes it impossible to converge. A more appropriate learning rate is around 0.01 or 0.001.
And on side note, since you are new to PyTorch: Please don't use Variable, it was deprecated with PyTorch 0.4.0, which was released over 2 years ago, and all of its functionality has been merged into the tensors.

Actually printing values from tensor object

I'm currently trying to implement a basic autoencoder using Keras, and I have come to the stage where I would want the output from the second hidden layer. I think that I'm able to get the right object, the problem is that I get it as a tensor object, the code I've been trying to run is the following:
from keras.layers import Input, Dense, initializers
import numpy as np
from Dataset import Dataset
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.optimizers import Adam
from keras.layers import Dense, Activation
import tensorflow as tf
import time
#global variables
d = Dataset()
num_features = d.X_train.shape[1]
#input = [784, 400, 100, 10, 100, 400]
#output = [400, 100, 10, 100, 400, 784]
names = ['hidden1', 'hidden2', 'hidden3', 'hidden4', 'hidden5', 'hidden6']
list_of_nodes = [784, 400, 144, 10]
def generate_hidden_nodes(list_of_nodes):
input = []
for j in range(len(list_of_nodes)):
input.append(list_of_nodes[j])
for i in range(len(list_of_nodes)-2):
input.append(list_of_nodes[-2-i])
output = input[::-1]
return input, output
input,output = generate_hidden_nodes(list_of_nodes)
def autoencoder(epochs):
w = initializers.RandomNormal(mean=0.0, stddev=0.05, seed=None)
model = Sequential()
input, output = generate_hidden_nodes(list_of_nodes)
for j in range(len(input)):
if j == (len(input)-1):
model.add(Dense(output[j], activation='sigmoid', kernel_initializer=w, input_dim=input[j], name=names[j]))
#model.add(Dropout(0.45))
else:
model.add(Dense(output[j], activation='relu', kernel_initializer=w, input_dim=input[j],
name = names[j]))
#model.add(Dropout(0.45))
model.compile(optimizer=Adam(lr=0.001), loss='binary_crossentropy', metrics=['acc'])
history = model.fit(d.X_train, d.X_train,
epochs=epochs,
batch_size=50,
shuffle=True,
validation_split = 0.2)
#validation_data=(d.X_test, d.X_test))
#print(history.history.keys())
#plt.plot(history.history['val_acc'])
#print(history.history['val_acc'])
plt.show()
return model
def cv():
accuracy = 0
size = 5
epochs = 20
variance = 0
storage = np.zeros((size, epochs))
for j in range(size):
ae = autoencoder(epochs)
#print(ae.history.history['val_acc'])
storage[j] = ae.history.history['val_acc']
for i in range(size):
accuracy += storage[i][-1]
mean = accuracy/size
for k in range(size):
variance += ((storage[k][-1] - mean)**2)
variance = variance/size
return mean, variance
#mean, variance = cv()
#print(mean)
#print(variance)
#time.sleep(10)
def finding_index():
elements, index = np.unique(d.Y_test, return_index=True)
return elements, index
def plotting():
ae = autoencoder(20)
elements, index = finding_index()
y_proba = ae.predict(d.X_test)
plt.figure(figsize=(20, 4))
# size = 20
for i in range(len(index)):
ax = plt.subplot(2, len(index), i + 1)
plt.imshow(d.X_test[index[i]].reshape(28, 28))
plt.gray()
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
ax = plt.subplot(2, len(index), i + 1 + len(index))
plt.imshow(y_proba[index[i]].reshape(28, 28))
plt.gray()
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
plt.show()
def plotting_weights(epochs):
ae = autoencoder(epochs)
output_layer = ae.get_layer('hidden2')
weights = output_layer.get_weights()[0]
print(weights.shape)
size = 20
plt.figure(figsize=(20, 4))
for j in range(3):
plt.gray()
plt.imshow(weights[j].reshape(12, 12))
plt.show()
def get_output():
w = initializers.RandomNormal(mean=0.0, stddev=0.05, seed=None)
new_model = Sequential()
new_model.add(Dense(400, activation='relu', kernel_initializer=w, input_dim = 784))
new_model.add(Dense(144, activation='sigmoid', kernel_initializer=w, input_dim = 400))
#new_model.add(Dense(784, activation='sigmoid', kernel_initializer=w, input_dim = 144))
new_model.compile(optimizer=Adam(lr=0.001), loss='binary_crossentropy', metrics=['acc'])
history = new_model.fit(d.X_train, d.X_train,
epochs=20,
batch_size=50,
shuffle=True,
validation_split=0.2)
y = new_model.predict(d.X_test)
elements, index = finding_index()
#return y.shape
def get_output2():
ae = autoencoder(5)
a =ae.layers[1].output()
init_op = tf.initialize_all_variables()
with tf.Session() as sess:
sess.run(init_op) # execute init_op
# print the random values that we sample
print(a)
get_output2()
I've tried to just print(a) as well, but as I said, that returns me a tensor object. Can someone provide me some information how I can actually print those value? Thanks in advance!
Simplest:
import keras.backend as K
print(K.eval(ae.layers[1].output()))
This is equivalent to:
with tf.Session() as sess:
print(sess.run(a))
I find it more readable to simply use the keras.backend interface.

MethodNotDefined: ('impl', , 'Round3') Apply node that caused the error

I m getting the following error when running the below code. It stops at training.
WARNING (theano.configdefaults): g++ not available, if using conda: conda install m2w64-toolchain D:\ANACONDA\lib\site-packages\theano\configdefaults.py:560:
UserWarning: DeprecationWarning: there is no c++ compiler.This is deprecated and with Theano 0.11 a c++ compiler will be mandatory warnings.warn("DeprecationWarning: there is no c++ compiler."
WARNING (theano.configdefaults): g++ not detected ! Theano will be unable to execute optimized C-implementations (for both CPU and GPU) and will default to Python implementations. Performance will be severely degraded. To remove this warning, set Theano flags cxx to an empty string.
WARNING (theano.tensor.blas): Using NumPy C-API based implementation for BLAS functions.
batch_size = 100
alpha = 0.1
epsilon = 0.0001
num_units = 8
n_hidden_layers = 3
num_epochs = 1000
dropout_in = 0.2
dropout_hidden = 0.5
activation = binary_net.binary_tanh_unit
binary = True
stochastic = False
H = 1.0
W_LR_scale = Glorot
LR_start = 0.003
LR_fin = 3e-07
LR_decay = 0.9908319448927676
save_path = mnist_parameters.npz
shuffle_parts = 1
Loading MNIST dataset...
Building the MLP...
W_LR_scale = 22.97825
H = 1.0
W_LR_scale = 3.2659864
H = 1.0
W_LR_scale = 3.2659864
H = 1.0
W_LR_scale = 3.4641016
H = 1.0
Training...
​
MethodNotDefined: ('impl', , 'Round3')
Apply node that caused the error: Elemwise{Composite{(i0 * (i1 + (i0 * round3(clip(i2, i3, i4)))) * i5)}}[(0, 2)](TensorConstant{(1, 1) of 2.0}, TensorConstant{(1, 1) of -1.0}, Elemwise{Composite{(i0 * (i1 + (i2 * i3 * i4) + i5))}}.0, TensorConstant{(1, 1) of 0}, TensorConstant{(1, 1) of 1}, Elemwise{Composite{Cast{float32}(LT(i0, i1))}}[(0, 0)].0)
Toposort index: 82
Inputs types: [TensorType(float32, (True, True)), TensorType(float32, (True, True)), TensorType(float32, matrix), TensorType(int8, (True, True)), TensorType(int8, (True, True)), TensorType(float32, matrix)]
Inputs shapes: [(1, 1), (1, 1), (100, 8), (1, 1), (1, 1), (100, 8)]
Inputs strides: [(4, 4), (4, 4), (32, 4), (1, 1), (1, 1), (32, 4)]
Inputs values: [array([[2.]], dtype=float32), array([[-1.]], dtype=float32), 'not shown', array([[0]], dtype=int8), array([[1]], dtype=int8), 'not shown']
Outputs clients: [[InplaceDimShuffle{1,0}(Elemwise{Composite{(i0 * (i1 + (i0 * round3(clip(i2, i3, i4)))) * i5)}}[(0, 2)].0), Dot22(Elemwise{Composite{(i0 * (i1 + (i0 * round3(clip(i2, i3, i4)))) * i5)}}[(0, 2)].0, Elemwise{Composite{Switch(RoundHalfToEven(clip((i0 * (i1 + i2)), i3, i4)), i1, i5)}}.0)]]
HINT: Re-running with most Theano optimization disabled could give you a back-trace of when this node was created. This can be done with by setting the Theano flag 'optimizer=fast_compile'. If that does not work, Theano optimizations can be disabled with 'optimizer=None'.
HINT: Use the Theano flag 'exception_verbosity=high' for a debugprint and storage map footprint of this apply node.
Here below is my code:
from __future__ import print_function
import sys
import os
import time
import numpy as np
np.random.seed(1234) # for reproducibility
import theano.tensor as T
import lasagne
from lasagne.layers import get_all_layers
import pickle
import gzip
import binary_net
from pylearn2.datasets.mnist import MNIST
from pylearn2.utils import serial
from collections import OrderedDict
if __name__ == "__main__":
# BN parameters
batch_size = 100
print("batch_size = "+str(batch_size))
# alpha is the exponential moving average factor
# alpha = .15
alpha = .1
print("alpha = "+str(alpha))
epsilon = 1e-4
print("epsilon = "+str(epsilon))
# MLP parameters
num_units = 8
print("num_units = "+str(num_units))
n_hidden_layers = 3
print("n_hidden_layers = "+str(n_hidden_layers))
# Training parameters
num_epochs = 1000
print("num_epochs = "+str(num_epochs))
# Dropout parameters
dropout_in = .2 # 0. means no dropout
print("dropout_in = "+str(dropout_in))
dropout_hidden = .5
print("dropout_hidden = "+str(dropout_hidden))
# BinaryOut
activation = binary_net.binary_tanh_unit
print("activation = binary_net.binary_tanh_unit")
# activation = binary_net.binary_sigmoid_unit
# print("activation = binary_net.binary_sigmoid_unit")
# BinaryConnect
binary = True
print("binary = "+str(binary))
stochastic = False
print("stochastic = "+str(stochastic))
# (-H,+H) are the two binary values
# H = "Glorot"
H = 1.
print("H = "+str(H))
# W_LR_scale = 1.
W_LR_scale = "Glorot" # "Glorot" means we are using the coefficients from Glorot's paper
print("W_LR_scale = "+str(W_LR_scale))
# Decaying LR
LR_start = .003
print("LR_start = "+str(LR_start))
LR_fin = 0.0000003
print("LR_fin = "+str(LR_fin))
LR_decay = (LR_fin/LR_start)**(1./num_epochs)
print("LR_decay = "+str(LR_decay))
# BTW, LR decay might good for the BN moving average...
save_path = "mnist_parameters.npz"
print("save_path = "+str(save_path))
shuffle_parts = 1
print("shuffle_parts = "+str(shuffle_parts))
print('Loading MNIST dataset...')
train_set = MNIST(which_set= 'train', start=0, stop = 50000, center = False)
valid_set = MNIST(which_set= 'train', start=50000, stop = 60000, center = False)
test_set = MNIST(which_set= 'test', center = False)
# bc01 format
# Inputs in the range [-1,+1]
# print("Inputs in the range [-1,+1]")
train_set.X = 2* train_set.X.reshape(-1, 1, 28, 28) - 1.
valid_set.X = 2* valid_set.X.reshape(-1, 1, 28, 28) - 1.
test_set.X = 2* test_set.X.reshape(-1, 1, 28, 28) - 1.
# flatten targets
train_set.y = np.hstack(train_set.y)
valid_set.y = np.hstack(valid_set.y)
test_set.y = np.hstack(test_set.y)
# Onehot the targets
train_set.y = np.float32(np.eye(10)[train_set.y])
valid_set.y = np.float32(np.eye(10)[valid_set.y])
test_set.y = np.float32(np.eye(10)[test_set.y])
# for hinge loss
train_set.y = 2* train_set.y - 1.
valid_set.y = 2* valid_set.y - 1.
test_set.y = 2* test_set.y - 1.
print('Building the MLP...')
# Prepare Theano variables for inputs and targets
input = T.tensor4('inputs')
target = T.matrix('targets')
LR = T.scalar('LR', dtype=theano.config.floatX)
mlp = lasagne.layers.InputLayer(
shape=(None, 1, 28, 28),
input_var=input)
mlp = lasagne.layers.DropoutLayer(
mlp,
p=dropout_in)
for k in range(n_hidden_layers):
mlp = binary_net.DenseLayer(
mlp,
binary=binary,
stochastic=stochastic,
H=H,
W_LR_scale=W_LR_scale,
nonlinearity=lasagne.nonlinearities.identity,
num_units=num_units)
mlp = lasagne.layers.BatchNormLayer(
mlp,
epsilon=epsilon,
alpha=alpha)
mlp = lasagne.layers.NonlinearityLayer(
mlp,
nonlinearity=activation)
mlp = lasagne.layers.DropoutLayer(
mlp,
p=dropout_hidden)
mlp = binary_net.DenseLayer(
mlp,
binary=binary,
stochastic=stochastic,
H=H,
W_LR_scale=W_LR_scale,
nonlinearity=lasagne.nonlinearities.identity,
num_units=10)
mlp = lasagne.layers.BatchNormLayer(
mlp,
epsilon=epsilon,
alpha=alpha)
train_output = lasagne.layers.get_output(mlp, deterministic=False)
# squared hinge loss
loss = T.mean(T.sqr(T.maximum(0.,1.-target*train_output)))
if binary:
# W updates
W = lasagne.layers.get_all_params(mlp, binary=True)
W_grads = binary_net.compute_grads(loss,mlp)
updates = lasagne.updates.adam(loss_or_grads=W_grads, params=W, learning_rate=LR)
updates = binary_net.clipping_scaling(updates,mlp)
# other parameters updates
params = lasagne.layers.get_all_params(mlp, trainable=True, binary=False)
updates = OrderedDict(list(updates.items()) + list(lasagne.updates.adam(loss_or_grads=loss, params=params,learning_rate=LR).items()))
else:
params = lasagne.layers.get_all_params(mlp, trainable=True)
updates = lasagne.updates.adam(loss_or_grads=loss, params=params, learning_rate=LR)
test_output = lasagne.layers.get_output(mlp, deterministic=True)
test_loss = T.mean(T.sqr(T.maximum(0.,1.-target*test_output)))
test_err = T.mean(T.neq(T.argmax(test_output, axis=1), T.argmax(target, axis=1)),dtype=theano.config.floatX)
# Compile a function performing a training step on a mini-batch (by giving the updates dictionary)
# and returning the corresponding training loss:
train_fn = theano.function([input, target, LR], loss, updates=updates)
# Compile a second function computing the validation loss and accuracy:
val_fn = theano.function([input, target], [test_loss, test_err])
print('Training...')
binary_net.train(
train_fn,val_fn,
mlp,
batch_size,
LR_start,LR_decay,
num_epochs,
train_set.X,train_set.y,
valid_set.X,valid_set.y,
test_set.X,test_set.y,
save_path,
shuffle_parts)
Can anyone please help to solve this error.

Tensorflow RNN stuck at 20% error

I created my first tensorflow neuronal network, initially for generating sequences. It produced weird outputs so I simplified it a lot to see if it can reach an error rate of 0% with just 5 inputs and 5 output classes. Somehow it does not seem to backpropagate at all because it is stuck at 20 % error rate without moving at all. So if anyone can point me to my mistake I made thank you in advance :)
Cheers
import numpy as np
import tensorflow as tf
import sys
trainingInputs = [
[[0],[0],[0],[0]],
[[1],[0],[0],[0]],
[[0],[1],[0],[0]],
[[0],[0],[1],[0]],
[[0],[0],[0],[1]]]
trainingOutputs = [
[1,0,0,0],
[0,1,0,0],
[0,0,1,0],
[0,0,0,1],
[0,0,0,0]]
data = tf.placeholder(tf.float32, [None, len(trainingInputs[0]),1])
target = tf.placeholder(tf.float32, [None, len(trainingOutputs[0])])
num_hidden = 24
cell = tf.contrib.rnn.LSTMCell(num_hidden,state_is_tuple=True)
val, _ = tf.nn.dynamic_rnn(cell, data, dtype=tf.float32)
val = tf.transpose(val, [1, 0, 2])
last = tf.gather(val, int(val.get_shape()[0]) - 1)
weight = tf.Variable(tf.truncated_normal([num_hidden, int(target.get_shape()[1])]))
bias = tf.Variable(tf.constant(0.1, shape=[target.get_shape()[1]]))
prediction = tf.nn.softmax(tf.matmul(last, weight) + bias)
cross_entropy = -tf.reduce_sum(target * tf.log(tf.clip_by_value(prediction,1e-10,1.0)))
optimizer = tf.train.GradientDescentOptimizer(0.01)
minimize = optimizer.minimize(cross_entropy)
mistakes = tf.not_equal(tf.argmax(target, 1), tf.argmax(prediction, 1))
error = tf.reduce_mean(tf.cast(mistakes, tf.float32))
init_op = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init_op)
batch_size = 1
no_of_batches = int((len(trainingInputs)) / batch_size)
def trainNetwork():
epoch = 1000
for i in range(epoch):
ptr = 0
for j in range(no_of_batches):
inp, out = trainingInputs[ptr:ptr+batch_size], trainingOutputs[ptr:ptr+batch_size]
ptr+=batch_size
sess.run(minimize, feed_dict={data: inp, target: out})
def generateOutput():
incorrect = sess.run(error,{data: trainingInputs, target: trainingOutputs})
sys.stdout.write('error {:3.1f}%'.format(100 * incorrect) + "\n")
sys.stdout.flush()
for i in range(200):
trainNetwork()
generateOutput()
sess.close()

Resources