ValueError: It seems that you are using the Keras 2 and you are passing both `kernel_size` and `strides` as integer positional arguments - python-3.x

I'm a computer science undergraduate student in the 4th semester, and I'm learning about Machine Learning in this lockdown.
from __future__ import print_function
from keras import backend as K
K.common.set_image_dim_ordering('th') # ensure our dimension notation matches
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.layers import Reshape
from keras.layers.core import Activation
from keras.layers.normalization import BatchNormalization
from keras.layers.convolutional import UpSampling2D
from keras.layers.convolutional import Convolution2D, AveragePooling2D
from keras.layers.core import Flatten
from keras.optimizers import SGD, Adam
from keras.datasets import mnist
from keras import utils
import numpy as np
from PIL import Image, ImageOps
import argparse
import math
import os
import os.path
import glob
def generator_model():
model = Sequential()
model.add(Dense(input_dim=100, output_dim=1024))
model.add(Activation('tanh'))
model.add(Dense(128*8*8))
model.add(BatchNormalization())
model.add(Activation('tanh'))
model.add(Reshape((128, 8, 8), input_shape=(128*8*8,)))
model.add(UpSampling2D(size=(4, 4)))
model.add(Convolution2D(64, 5, 5, border_mode='same'))
model.add(Activation('tanh'))
model.add(UpSampling2D(size=(4, 4)))
model.add(Convolution2D(1, 5, 5, border_mode='same'))
model.add(Activation('tanh'))
return model
def discriminator_model():
model = Sequential()
model.add(Convolution2D(64, 5, 5, border_mode='same', input_shape=(1, 128, 128)))
model.add(Activation('tanh'))
model.add(AveragePooling2D(pool_size=(4, 4)))
model.add(Convolution2D(128, 5, 5))
model.add(Activation('tanh'))
model.add(AveragePooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(256))
model.add(Activation('tanh'))
model.add(Dense(1))
model.add(Activation('sigmoid'))
return model
def generator_containing_discriminator(generator, discriminator):
model = Sequential()
model.add(generator)
discriminator.trainable = False
model.add(discriminator)
return model
def combine_images(generated_images):
num = generated_images.shape[0]
width = int(math.sqrt(num))
height = int(math.ceil(float(num)/width))
shape = generated_images.shape[2:]
image = np.zeros((height*shape[0], width*shape[1]),
dtype=generated_images.dtype)
for index, img in enumerate(generated_images):
i = int(index/width)
j = index % width
image[i*shape[0]:(i+1)*shape[0], j*shape[1]:(j+1)*shape[1]] = \
img[0, :, :]
return image
model = generator_model()
print(model.summary())
def load_data(pixels=128, verbose=False):
print("Loading data")
X_train = []
paths = glob.glob(os.path.normpath(os.getcwd() + '/logos/*.jpg'))
for path in paths:
if verbose: print(path)
im = Image.open(path)
im = ImageOps.fit(im, (pixels, pixels), Image.ANTIALIAS)
im = ImageOps.grayscale(im)
#im.show()
im = np.asarray(im)
X_train.append(im)
print("Finished loading data")
return np.array(X_train)
def train(epochs, BATCH_SIZE, weights=False):
"""
:param epochs: Train for this many epochs
:param BATCH_SIZE: Size of minibatch
:param weights: If True, load weights from file, otherwise train the model from scratch.
Use this if you have already saved state of the network and want to train it further.
"""
X_train = load_data()
X_train = (X_train.astype(np.float32) - 127.5)/127.5
X_train = X_train.reshape((X_train.shape[0], 1) + X_train.shape[1:])
discriminator = discriminator_model()
generator = generator_model()
if weights:
generator.load_weights('goodgenerator.h5')
discriminator.load_weights('gooddiscriminator.h5')
discriminator_on_generator = \
generator_containing_discriminator(generator, discriminator)
d_optim = SGD(lr=0.0005, momentum=0.9, nesterov=True)
g_optim = SGD(lr=0.0005, momentum=0.9, nesterov=True)
generator.compile(loss='binary_crossentropy', optimizer="SGD")
discriminator_on_generator.compile(
loss='binary_crossentropy', optimizer=g_optim)
discriminator.trainable = True
discriminator.compile(loss='binary_crossentropy', optimizer=d_optim)
noise = np.zeros((BATCH_SIZE, 100))
for epoch in range(epochs):
print("Epoch is", epoch)
print("Number of batches", int(X_train.shape[0]/BATCH_SIZE))
for index in range(int(X_train.shape[0]/BATCH_SIZE)):
for i in range(BATCH_SIZE):
noise[i, :] = np.random.uniform(-1, 1, 100)
image_batch = X_train[index*BATCH_SIZE:(index+1)*BATCH_SIZE]
generated_images = generator.predict(noise, verbose=0)
#print(generated_images.shape)
if index % 20 == 0 and epoch % 10 == 0:
image = combine_images(generated_images)
image = image*127.5+127.5
destpath = os.path.normpath(os.getcwd()+ "/logo-generated-images/"+str(epoch)+"_"+str(index)+".png")
Image.fromarray(image.astype(np.uint8)).save(destpath)
X = np.concatenate((image_batch, generated_images))
y = [1] * BATCH_SIZE + [0] * BATCH_SIZE
d_loss = discriminator.train_on_batch(X, y)
print("batch %d d_loss : %f" % (index, d_loss))
for i in range(BATCH_SIZE):
noise[i, :] = np.random.uniform(-1, 1, 100)
discriminator.trainable = False
g_loss = discriminator_on_generator.train_on_batch(
noise, [1] * BATCH_SIZE)
discriminator.trainable = True
print("batch %d g_loss : %f" % (index, g_loss))
if epoch % 10 == 9:
generator.save_weights('goodgenerator.h5', True)
discriminator.save_weights('gooddiscriminator.h5', True)
def clean(image):
for i in range(1, image.shape[0] - 1):
for j in range(1, image.shape[1] - 1):
if image[i][j] + image[i+1][j] + image[i][j+1] + image[i-1][j] + image[i][j-1] > 127 * 5:
image[i][j] = 255
return image
def generate(BATCH_SIZE):
generator = generator_model()
generator.compile(loss='binary_crossentropy', optimizer="SGD")
generator.load_weights('goodgenerator.h5')
noise = np.zeros((BATCH_SIZE, 100))
a = np.random.uniform(-1, 1, 100)
b = np.random.uniform(-1, 1, 100)
grad = (b - a) / BATCH_SIZE
for i in range(BATCH_SIZE):
noise[i, :] = np.random.uniform(-1, 1, 100)
generated_images = generator.predict(noise, verbose=1)
#image = combine_images(generated_images)
print(generated_images.shape)
for image in generated_images:
image = image[0]
image = image*127.5+127.5
Image.fromarray(image.astype(np.uint8)).save("dirty.png")
Image.fromarray(image.astype(np.uint8)).show()
clean(image)
image = Image.fromarray(image.astype(np.uint8))
image.show()
image.save("clean.png")
def get_args():
parser = argparse.ArgumentParser()
parser.add_argument("--mode", type=str)
parser.add_argument("--batch_size", type=int, default=128)
parser.add_argument("--nice", dest="nice", action="store_true")
parser.set_defaults(nice=False)
args = parser.parse_args()
return args
train(400, 10, False)
generate(1)
I was trying this GAN code from this GitHub repository, for learning about Generative Adversarial Networks, but the below error occurred. Can you please tell me where the definitions are provided in the code? Please help me!
The Troublesome Line:-
train(400, 10, False)
This is the error:-
ValueError: It seems that you are using the Keras 2 and you are passing both `kernel_size` and `strides` as integer positional arguments. For safety reasons, this is disallowed. Pass `strides` as a keyword argument instead.

The error arises from every addition of a Conv2D layer in your model. You need to change the line in your code
model.add(Convolution2D(64, 5, 5, border_mode='same'))
to something like (depending on what exactly you want)
model.add(Conv2D(64,kernel_size=5,strides=2,padding='same'))
Notice that I have explicity named the argument strides here because the error says I should pass it as a keyword argument.

Related

Multi-input/Multi-output : Wrong output dimension when using KerasClassifier and GridSearchCV

I have built a multi-input (100 features) multi-ouput (100 predictions) ANN model using keras and tensorflow. I have been able to train my model and reach a quite satisfying accuracy on the test set using the following code :
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import backend as K
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
def my_loss_fn(y_true, y_pred) :
d = K.sum(K.abs(y_true), axis = -1)
n = K.sum((K.tanh(100000*y_true*y_pred)/2 + 0.5)*K.abs(y_true), axis = -1)
return 1 - n/d
def my_metric_fn(y_true, y_pred) :
d = K.sum(K.abs(y_true))
n = K.sum((K.tanh(100000*y_true*y_pred)/2 + 0.5)*K.abs(y_true))
return n/d
def accuracy(y_true, y_pred) :
#print(y_true.shape, y_true)
#print(y_pred.shape, y_true)
acc = np.zeros([1, len(y_true)])
for day in range(len(y_pred)) :
d = 0
n = 0
for i in range(len(y_pred[0])) :
d = d + abs(y_true[day, i])
if np.sign(y_pred[day, i])*np.sign(y_true[day, i]) > 0 :
n = n + abs(y_true[day, i])
else :
n = n + 0
acc[0, day] = n/d
return np.mean(acc, axis = -1)[0]
#Model
classifier = Sequential()
classifier.add(Dense(units = 50, input_shape = (100, ), activation = "tanh"))
classifier.add(Dropout(0.2))
classifier.add(Dense(units=100, activation = 'tanh'))
classifier.compile(optimizer = 'rmsprop', loss = my_loss_fn, metrics = ['accuracy', my_metric_fn])
#Training
callback = tf.keras.callbacks.EarlyStopping(monitor = 'val_loss', min_delta = 0.0001, patience = 20, verbose = 0, mode = 'min')
nb_epochs = 250
history = classifier.fit(X_train, y_train, epochs = nb_epochs, batch_size = 31, callbacks = [callback], verbose = True, validation_split = 0., validation_data = (X_test, y_test), use_multiprocessing = True)
#Prediction
y_pred_train = classifier.predict(X_train)
y_pred_test = classifier.predict(X_test)
acc_test = accuracy(y_test, y_pred_test)
acc_train = accuracy(y_train, y_pred_train)
I am trying to improve the performance of my model by tuning the hyperparameters so I used KerasClassifier() and GridSearchCV(). The following code illustrates my approach for the gridsearch.
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import make_scorer
from tensorflow import autograph
#Building a function to create the classifier
def build_classifier(nb_layers, nb_nodes, optimizer, dropout, activation_fn):
classifier=Sequential()
classifier.add(Dense(units = nb_nodes, input_shape = (100, ), activation = activation_fn))
for i in range(nb_layers-1) :
classifier.add(Dense(units = nb_nodes, activation = activation_fn, kernel_initializer = "uniform"))
classifier.add(Dropout(dropout))
classifier.add(Dense(units = 100, activation = 'tanh'))
classifier.compile(optimizer=optimizer, loss = tf.autograph.experimental.do_not_convert(my_loss_fn), metrics= ['accuracy', tf.autograph.experimental.do_not_convert(my_metric_fn)])
return classifier
#Creating a scorer to feed to the GridSearchCV()
my_scorer = make_scorer(accuracy, greater_is_better = True)
classifier=KerasClassifier(build_fn=build_classifier)
parameters={'batch_size':[13, 31],'epochs':[100, 150], 'optimizer':['adam', 'rmsprop'], 'dropout' : [0.2, 0.1], 'nb_layers' : [2, 3], 'nb_nodes' : [45, 50, 110, 115], 'activation_fn' : ['relu', 'tanh']}
grid_search=GridSearchCV(estimator=classifier, scoring = my_scorer, param_grid=parameters, cv=5, verbose = 1)
grid_search=grid_search.fit(X_train_, y_train_raw)
When I fit my GridSearchCV() object I get the following error at the end of the first combination of hyperparameters (when the scoring is computed) :
TypeError: object of type 'numpy.int32' has no len()
I investigated by adding print commandes inside my accuracy() function
#print(y_true.shape, y_true)
#print(y_pred.shape, y_pred)
to print both the shape and the array y_true and y_pred given as inputs for my accuracy() function used as the scoring in the GridSearchCV() object.
I found out that y_true.shape == (555, 100) but y_pred.shape == (555,). The value 555 corresponds to the number of lines of the fifth validation set because cv = 5.
However, I do not understand why the prediction of the gridsearch is not a multi-output prediction even though the number of nodes of the last layer of the classifier is (100,).
This was a regression problem so I used KerasRegressor() instead and it solved the issue. I guess that for a multi-output classification problem, KerasClassifier() expect the output to be a 2D hot encoded array.

Why is my GAN not producing more good images after a certain point?

Question
I was training a gan to generate human faces. Within approximately 500 epochs, it learned to generate images like this:
Well, now this image is not too bad. We can see a face in the center of the image.
Then I trained it for more 1000 epochs and it learned nothing. It was still generating the same type of images as shown above. Why was that? Why wasn't my gan not learning to produce even better images?
Code for the Models
Here is the code of the discriminator:
def define_discriminator(in_shape=(64, 64, 3)):
Model = Sequential([
Conv2D(32, (3, 3), padding='same', input_shape=in_shape),
BatchNormalization(),
LeakyReLU(alpha=0.2),
MaxPooling2D(pool_size=(2,2)),
Dropout(0.2),
Conv2D(64, (3,3), padding='same'),
BatchNormalization(),
LeakyReLU(alpha=0.2),
MaxPooling2D(pool_size=(2,2)),
Dropout(0.3),
Conv2D(128, (3,3), padding='same'),
BatchNormalization(),
LeakyReLU(alpha=0.2),
MaxPooling2D(pool_size=(2,2)),
Dropout(0.3),
Conv2D(256, (3,3), padding='same'),
BatchNormalization(),
LeakyReLU(alpha=0.2),
MaxPooling2D(pool_size=(2,2)),
Dropout(0.4),
Flatten(),
Dense(1, activation='sigmoid')
])
opt = Adam(lr=0.00002)
Model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])
return Model
Here is the code of the generator and the GAN:
def define_generator(in_shape=100):
Model = Sequential([
Dense(256*8*8, input_dim=in_shape),
BatchNormalization(),
LeakyReLU(alpha=0.2),
Reshape((8, 8, 256)),
Conv2DTranspose(256, (3,3), strides=(2,2), padding='same'),
BatchNormalization(),
LeakyReLU(alpha=0.2),
Conv2DTranspose(64, (3,3), strides=(2,2), padding='same'),
BatchNormalization(),
LeakyReLU(alpha=0.2),
Conv2DTranspose(3, (4, 4), strides=(2,2), padding='same', activation='sigmoid')
])
return Model
def define_gan(d_model, g_model):
d_model.trainable = False
model = Sequential([
g_model,
d_model
])
opt = Adam(lr=0.0002, beta_1=0.5)
model.compile(loss='binary_crossentropy', optimizer=opt)
return model
Entire Reproducible Code
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, BatchNormalization
from tensorflow.keras.layers import Dropout, Flatten, Dense, Conv2DTranspose
from tensorflow.keras.layers import MaxPooling2D, Activation, Reshape, LeakyReLU
from tensorflow.keras.datasets import mnist
from tensorflow.keras.optimizers import Adam
from numpy import ones
from numpy import zeros
from numpy.random import rand
from numpy.random import randint
from numpy.random import randn
from numpy import vstack
from numpy import array
import os
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.preprocessing.image import img_to_array
from matplotlib import pyplot
def load_data(filepath):
image_array = []
n = 0
for fold in os.listdir(filepath):
if fold != 'wiki.mat':
if n > 1:
break
for img in os.listdir(os.path.join(filepath, fold)):
image = load_img(filepath + fold + '/'+ img, target_size=(64, 64))
img_array = img_to_array(image)
img_array = img_array.astype('float32')
img_array = img_array / 255.0
image_array.append(img_array)
n += 1
return array(image_array)
def generate_latent_points(n_samples, latent_dim=100):
latent_points = randn(n_samples * latent_dim)
latent_points = latent_points.reshape(n_samples, latent_dim)
return latent_points
def generate_real_samples(n_samples, dataset):
ix = randint(0, dataset.shape[0], n_samples)
x = dataset[ix]
y = ones((n_samples, 1))
return x, y
def generate_fake_samples(g_model, n_samples):
latent_points = generate_latent_points(n_samples)
x = g_model.predict(latent_points)
y = zeros((n_samples, 1))
return x, y
def save_plot(examples, epoch, n=10):
# plot images
for i in range(n * n):
# define subplot
pyplot.subplot(n, n, 1 + i)
# turn off axis
pyplot.axis('off')
# plot raw pixel data
pyplot.imshow(examples[i, :, :, 0])
# save plot to file
filename = 'generated_plot_e%03d.png' % (epoch+1)
pyplot.savefig(filename)
pyplot.close()
def summarize_performance(d_model, g_model, gan_model, dataset, epoch, n_samples=100):
real_x, real_y = generate_real_samples(n_samples, dataset)
_, d_real_acc = d_model.evaluate(real_x, real_y)
fake_x, fake_y = generate_fake_samples(g_model, n_samples)
_, d_fake_acc = d_model.evaluate(fake_x, fake_y)
latent_points, y = generate_latent_points(n_samples), ones((n_samples, 1))
gan_loss = gan_model.evaluate(latent_points, y)
print('Epoch %d, acc_real=%.3d, acc_fake=%.3f, gan_loss=%.3f' % (epoch, d_real_acc, d_fake_acc, gan_loss))
save_plot(fake_x, epoch)
filename = 'Genarator_Model % d' % (epoch + 1)
g_model.save(filename)
def train(d_model, g_model, gan_model, dataset, epochs=200):
batch_size = 64
half_batch = int(batch_size / 2)
batch_per_epoch = int(dataset.shape[0] / batch_size)
for epoch in range(epochs):
for i in range(batch_per_epoch):
real_x, real_y = generate_real_samples(half_batch, dataset)
_, d_real_acc = d_model.train_on_batch(real_x, real_y)
fake_x, fake_y = generate_fake_samples(g_model, half_batch)
_, d_fake_acc = d_model.train_on_batch(fake_x, fake_y)
latent_points, y = generate_latent_points(batch_size), ones((batch_size, 1))
gan_loss = gan_model.train_on_batch(latent_points, y)
print('Epoch %d, acc_real=%.3d, acc_fake=%.3f, gan_loss=%.3f' % (epoch, d_real_acc, d_fake_acc, gan_loss))
if (epoch % 2) == 0:
summarize_performance(d_model, g_model, gan_model, dataset, epoch)
dataset = load_data(filepath) # filepath is not defined since every person will have seperate filepath
discriminator_model = define_discriminator()
generator_model = define_generator()
gan_model = define_gan(discriminator_model, generator_model)
train(discriminator_model, generator_model, gan_model, dataset)
Dataset
If you want here is the dataset.

Tensorflow 2.0 Identical model structure and hyper parameters result in different performance in different calling approaches

there. I am a starter and learning Tensorflow 2.0. I have one model called in 3 different approaches. And the performances are different. Could anyone tell me why this is the case?
The model constructing and calling approach:
import os, sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import datasets, models, layers, regularizers, optimizers
def prepare_mnist_features_and_labels(x, y):
x = tf.cast(x, tf.float32)/255.0
y = tf.cast(y, tf.int64)
return x, y
def mninst_dataset():
(x_train, y_train), (x_eval, y_eval) = datasets.mnist.load_data()
print('x_train/y_train shape:', x_train.shape, y_train.shape)
y_train = tf.one_hot(y_train, depth=10)
y_eval = tf.one_hot(y_eval, depth=10)
ds_train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
ds_train = ds_train.map(prepare_mnist_features_and_labels)
ds_train = ds_train.shuffle(x_train.shape[0]).batch(128)
ds_eval = tf.data.Dataset.from_tensor_slices((x_eval, y_eval))
ds_eval = ds_eval.map(prepare_mnist_features_and_labels)
ds_eval = ds_eval.shuffle(x_eval.shape[0]).batch(128)
sample = next(iter(ds_train))
print('sample: ', sample[0].shape, sample[1].shape)
return ds_train, ds_eval
def main():
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
trainset, evalset = mninst_dataset()
model = keras.Sequential()
model.add(layers.Reshape(target_shape=[28, 28, 1], input_shape=[28, 28]))
model.add(layers.Conv2D(filters=32, kernel_size=(5, 5), activation=tf.nn.relu, strides=[1,1], padding="SAME"))
model.add(layers.MaxPool2D(pool_size=(2,2), strides=[1,1], padding="SAME"))
model.add(layers.Conv2D(filters=64, kernel_size=(5, 5), activation=tf.nn.relu, strides=[1,1], padding="SAME"))
model.add(layers.MaxPool2D(pool_size=(2,2), strides=[2,2], padding="SAME"))
model.add(layers.Flatten())
model.add(layers.Dense(units=512, activation=tf.nn.relu, kernel_regularizer=regularizers.l2(0.01)))
model.add(layers.Dense(units=10, activation=tf.nn.relu, kernel_regularizer=regularizers.l2(0.01)))
model.compile(optimizer=optimizers.Adam(lr=0.01), loss=tf.losses.CategoricalCrossentropy(from_logits=True), metrics=['accuracy'])
model.fit(trainset.repeat(), epochs=30, steps_per_epoch=500,
validation_data=evalset.repeat(), validation_steps=10)
if __name__=='__main__':
main()
The second approach to construct the model and run it is the following:
import os, sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import datasets, models, layers, regularizers, optimizers
from tqdm import tqdm
def prepare_mnist_features_and_labels(x, y):
x = tf.cast(x, tf.float32)/255.0
y = tf.cast(y, tf.int64)
return x, y
def mnist_dataset():
(x_train, y_train), (x_eval, y_eval) = datasets.mnist.load_data()
# y_train = tf.one_hot(y_train, depth=10)
# y_eval = tf.one_hot(y_eval, depth=10)
ds_train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
ds_train = ds_train.map(prepare_mnist_features_and_labels)
# Test: replace x_train.shape[0] by the number of the training samples, which is 60000
ds_train = ds_train.shuffle(x_train.shape[0]).batch(128)
ds_eval = tf.data.Dataset.from_tensor_slices((x_eval, y_eval))
ds_eval = ds_eval.map(prepare_mnist_features_and_labels)
ds_eval = ds_eval.shuffle(x_eval.shape[0]).batch(128)
# sample = next(iter(ds_train))
# print('sample: ', sample[0].shape, sample[1].shape)
return ds_train, ds_eval
# tf.nn.sparse_softmax_cross_entropy_with_logits(labels, logits, name=None):
# labels: Tensof of shape [d_0, d_1, ..., d_{r-1}]. Each label must be an index in [0, num_classes]
# logits: Unscaled of log probabilities of shape [d_0, d_1, ..., d_{r-1}, num_classes]
# A common use is to have logits of shape [batch_size, num_classes] and have labels of shape [batch_size]
def compute_loss(logits, labels):
# print(logits.numpy())
# print(labels.numpy())
return tf.reduce_mean(
tf.nn.sparse_softmax_cross_entropy_with_logits(
labels=labels, logits=logits
)
)
def compute_accuracy(logits, labels):
predictions = tf.argmax(logits, axis=1)
# print(predictions)
# print(labels)
# print(list(zip(predictions.numpy(), labels.numpy())))
return tf.reduce_mean(tf.cast(tf.equal(predictions, labels), tf.float32))
def train_one_step(model, optimizer, x, y):
# At each train step, first calculate the forward loss
with tf.GradientTape() as tape:
logits = model(x)
loss = compute_loss(logits, y)
# Then calculate the backward gradients over each trainable variables
grads = tape.gradient(loss, model.trainable_variables)
# Optimize and update variables throught backpropagation
optimizer.apply_gradients(zip(grads, model.trainable_variables))
# Compute current model accuracy
accuracy = compute_accuracy(logits, y)
return loss, accuracy
def train(epoch, model, optimizer, trainset):
#def train(epoch, model, optimizer):
# trainset = mnist_dataset()[0]
loss = 0.0
accuracy = 0.0
#for step, (x, y) in enumerate(tqdm(trainset)):
for step, (x, y) in enumerate(tqdm(trainset)):
loss, accuracy = train_one_step(model, optimizer, x, y)
if step % 110 == 0:
print('epoch', epoch, ': loss', loss.numpy(), '; accuracy', accuracy.numpy())
return loss, accuracy
class MyModel(keras.Model):
def __init__(self):
super(MyModel, self).__init__()
self.layer1 = layers.Conv2D(filters=32, kernel_size=(5, 5), activation=tf.nn.relu, strides=[1,1], padding="SAME", input_shape=(-1, 28, 28, 1))
self.layer2 = layers.MaxPool2D(pool_size=(2,2), strides=[1,1], padding="SAME")
self.layer3 = layers.Conv2D(filters=64, kernel_size=(5, 5), activation=tf.nn.relu, strides=[1,1], padding="SAME")
self.layer4 = layers.MaxPool2D(pool_size=(2,2), strides=[2,2], padding="SAME")
self.layer5 = layers.Flatten()
self.layer6 = layers.Dense(units=512, activation=tf.nn.relu, kernel_regularizer=regularizers.l2(0.01))
self.layer7 = layers.Dense(units=10, activation=tf.nn.relu, kernel_regularizer=regularizers.l2(0.01))
def call(self, x, training=False):
x = tf.reshape(x, (-1, 28, 28, 1))
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.layer5(x)
x = self.layer6(x)
x = self.layer7(x)
return x
def main():
# set random seed
tf.random.set_seed(22)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
trainset, evalset = mnist_dataset()
model = MyModel()
optimizer = optimizers.Adam(lr=0.001)
# Save checkpoints with keras api as the first approach
# Save checkpoints manually as a second approach.
# find a way to implement early-stopping strategy in the programming style
# for epoch in tqdm(range(30)):
for epoch in range(50):
loss, accuracy = train(epoch, model, optimizer, trainset)
print('Final epoch', epoch, ': loss', loss.numpy(), '; accuracy', accuracy.numpy())
if __name__ == '__main__':
main()
And the last approach is below:
import os, sys
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import datasets, models, layers, regularizers, optimizers
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
def prepare_mnist_features_and_labels(x, y):
x = tf.cast(x, tf.float32)/255.0
y = tf.cast(y, tf.int64)
return x, y
def mnist_dataset():
(x_train, y_train), (x_eval, y_eval) = datasets.mnist.load_data()
print('x_train/y_train shape:', x_train.shape, y_train.shape)
y_train = tf.one_hot(y_train, depth=10)
y_eval = tf.one_hot(y_eval, depth=10)
ds_train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
ds_train = ds_train.map(prepare_mnist_features_and_labels)
ds_train = ds_train.shuffle(x_train.shape[0]).batch(128)
ds_eval = tf.data.Dataset.from_tensor_slices((x_eval, y_eval))
ds_eval = ds_eval.map(prepare_mnist_features_and_labels)
ds_eval = ds_eval.shuffle(x_eval.shape[0]).batch(128)
sample = next(iter(ds_train))
print('sample: ', sample[0].shape, sample[1].shape)
return ds_train, ds_eval
class MyModel(keras.Model):
# self.model = keras.Sequential([
# layers.Reshape(target_shape=(28*28, ), input_shape=(28, 28)),
# layers.Dense(100, activation=tf.nn.relu),
# layers.Dense(100, activation=tf.nn.relu),
# layers.Desnse(10)
# ])
def __init__(self):
super(MyModel, self).__init__()
self.layer1 = layers.Conv2D(filters=32, kernel_size=(5, 5), activation=tf.nn.relu, strides=[1,1], padding="SAME", input_shape=(-1, 28, 28, 1))
self.layer2 = layers.MaxPool2D(pool_size=(2,2), strides=[1,1], padding="SAME")
self.layer3 = layers.Conv2D(filters=64, kernel_size=(5, 5), activation=tf.nn.relu, strides=[1,1], padding="SAME")
self.layer4 = layers.MaxPool2D(pool_size=(2,2), strides=[2,2], padding="SAME")
self.layer5 = layers.Flatten()
self.layer6 = layers.Dense(units=512, activation=tf.nn.relu, kernel_regularizer=regularizers.l2(0.01))
self.layer7 = layers.Dense(units=10, activation=tf.nn.relu, kernel_regularizer=regularizers.l2(0.01))
def call(self, x, training=False):
x = tf.reshape(x, (-1, 28, 28, 1))
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.layer5(x)
x = self.layer6(x)
x = self.layer7(x)
return x
def main():
tf.random.set_seed(22)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
trainset, evalset = mnist_dataset()
model = MyModel()
model.compile(optimizer=optimizers.Adam(lr=0.001), loss=tf.losses.CategoricalCrossentropy(from_logits=True), metrics=['accuracy'])
model.fit(trainset.repeat(), epochs=30, steps_per_epoch=500, verbose=1,
validation_data=evalset.repeat(), validation_steps=10)
if __name__ == '__main__':
main()
Each of them take a while to train. Can anyone tell me why the performances are different? And in the future how I should debug by myself?
Thank you so much for any help.
the problem is solved after carefully examine the network. It turn out that the last fully connected layer in the model was activated with a relu function, which in not appropriate. And the choice of loss function tf.losses.categoricalCrossentropy and tf.nn.sparse_softmax_cross_entropy_with_logits also make a big difference. No matter what get chosen, Make sure the loss function and the final output of the network match.

Actually printing values from tensor object

I'm currently trying to implement a basic autoencoder using Keras, and I have come to the stage where I would want the output from the second hidden layer. I think that I'm able to get the right object, the problem is that I get it as a tensor object, the code I've been trying to run is the following:
from keras.layers import Input, Dense, initializers
import numpy as np
from Dataset import Dataset
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.optimizers import Adam
from keras.layers import Dense, Activation
import tensorflow as tf
import time
#global variables
d = Dataset()
num_features = d.X_train.shape[1]
#input = [784, 400, 100, 10, 100, 400]
#output = [400, 100, 10, 100, 400, 784]
names = ['hidden1', 'hidden2', 'hidden3', 'hidden4', 'hidden5', 'hidden6']
list_of_nodes = [784, 400, 144, 10]
def generate_hidden_nodes(list_of_nodes):
input = []
for j in range(len(list_of_nodes)):
input.append(list_of_nodes[j])
for i in range(len(list_of_nodes)-2):
input.append(list_of_nodes[-2-i])
output = input[::-1]
return input, output
input,output = generate_hidden_nodes(list_of_nodes)
def autoencoder(epochs):
w = initializers.RandomNormal(mean=0.0, stddev=0.05, seed=None)
model = Sequential()
input, output = generate_hidden_nodes(list_of_nodes)
for j in range(len(input)):
if j == (len(input)-1):
model.add(Dense(output[j], activation='sigmoid', kernel_initializer=w, input_dim=input[j], name=names[j]))
#model.add(Dropout(0.45))
else:
model.add(Dense(output[j], activation='relu', kernel_initializer=w, input_dim=input[j],
name = names[j]))
#model.add(Dropout(0.45))
model.compile(optimizer=Adam(lr=0.001), loss='binary_crossentropy', metrics=['acc'])
history = model.fit(d.X_train, d.X_train,
epochs=epochs,
batch_size=50,
shuffle=True,
validation_split = 0.2)
#validation_data=(d.X_test, d.X_test))
#print(history.history.keys())
#plt.plot(history.history['val_acc'])
#print(history.history['val_acc'])
plt.show()
return model
def cv():
accuracy = 0
size = 5
epochs = 20
variance = 0
storage = np.zeros((size, epochs))
for j in range(size):
ae = autoencoder(epochs)
#print(ae.history.history['val_acc'])
storage[j] = ae.history.history['val_acc']
for i in range(size):
accuracy += storage[i][-1]
mean = accuracy/size
for k in range(size):
variance += ((storage[k][-1] - mean)**2)
variance = variance/size
return mean, variance
#mean, variance = cv()
#print(mean)
#print(variance)
#time.sleep(10)
def finding_index():
elements, index = np.unique(d.Y_test, return_index=True)
return elements, index
def plotting():
ae = autoencoder(20)
elements, index = finding_index()
y_proba = ae.predict(d.X_test)
plt.figure(figsize=(20, 4))
# size = 20
for i in range(len(index)):
ax = plt.subplot(2, len(index), i + 1)
plt.imshow(d.X_test[index[i]].reshape(28, 28))
plt.gray()
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
ax = plt.subplot(2, len(index), i + 1 + len(index))
plt.imshow(y_proba[index[i]].reshape(28, 28))
plt.gray()
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
plt.show()
def plotting_weights(epochs):
ae = autoencoder(epochs)
output_layer = ae.get_layer('hidden2')
weights = output_layer.get_weights()[0]
print(weights.shape)
size = 20
plt.figure(figsize=(20, 4))
for j in range(3):
plt.gray()
plt.imshow(weights[j].reshape(12, 12))
plt.show()
def get_output():
w = initializers.RandomNormal(mean=0.0, stddev=0.05, seed=None)
new_model = Sequential()
new_model.add(Dense(400, activation='relu', kernel_initializer=w, input_dim = 784))
new_model.add(Dense(144, activation='sigmoid', kernel_initializer=w, input_dim = 400))
#new_model.add(Dense(784, activation='sigmoid', kernel_initializer=w, input_dim = 144))
new_model.compile(optimizer=Adam(lr=0.001), loss='binary_crossentropy', metrics=['acc'])
history = new_model.fit(d.X_train, d.X_train,
epochs=20,
batch_size=50,
shuffle=True,
validation_split=0.2)
y = new_model.predict(d.X_test)
elements, index = finding_index()
#return y.shape
def get_output2():
ae = autoencoder(5)
a =ae.layers[1].output()
init_op = tf.initialize_all_variables()
with tf.Session() as sess:
sess.run(init_op) # execute init_op
# print the random values that we sample
print(a)
get_output2()
I've tried to just print(a) as well, but as I said, that returns me a tensor object. Can someone provide me some information how I can actually print those value? Thanks in advance!
Simplest:
import keras.backend as K
print(K.eval(ae.layers[1].output()))
This is equivalent to:
with tf.Session() as sess:
print(sess.run(a))
I find it more readable to simply use the keras.backend interface.

How do I create a variable-length input LSTM in Keras?

I am trying to do some vanilla pattern recognition with an LSTM using Keras to predict the next element in a sequence.
My data look like this:
where the label of the training sequence is the last element in the list: X_train['Sequence'][n][-1].
Because my Sequence column can have a variable number of elements in the sequence, I believe an RNN to be the best model to use. Below is my attempt to build an LSTM in Keras:
# Build the model
# A few arbitrary constants...
max_features = 20000
out_size = 128
# The max length should be the length of the longest sequence (minus one to account for the label)
max_length = X_train['Sequence'].apply(len).max() - 1
# Normal LSTM model construction with sigmoid activation
model = Sequential()
model.add(Embedding(max_features, out_size, input_length=max_length, dropout=0.2))
model.add(LSTM(128, dropout_W=0.2, dropout_U=0.2))
model.add(Dense(1))
model.add(Activation('sigmoid'))
# try using different optimizers and different optimizer configs
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
And here's how I attempt to train my model:
# Train the model
for seq in X_train['Sequence']:
print("Length of training is {0}".format(len(seq[:-1])))
print("Training set is {0}".format(seq[:-1]))
model.fit(np.array([seq[:-1]]), [seq[-1]])
My output is this:
Length of training is 13
Training set is [1, 3, 13, 87, 1053, 28576, 2141733, 508147108, 402135275365, 1073376057490373, 9700385489355970183, 298434346895322960005291, 31479360095907908092817694945]
However, I get the following error:
Exception: Error when checking model input: expected embedding_input_1 to have shape (None, 347) but got array with shape (1, 13)
I believe my training step is correctly setup, so my model construction must be wrong. Note that 347 is max_length.
How can I correctly build a variable-length input LSTM in Keras? I'd prefer not to pad the data. Not sure if it's relevant, but I'm using the Theano backend.
I am not clear about the embedding procedure. But still here is a way to implement a variable-length input LSTM. Just do not specify the timespan dimension when building LSTM.
import keras.backend as K
from keras.layers import LSTM, Input
I = Input(shape=(None, 200)) # unknown timespan, fixed feature size
lstm = LSTM(20)
f = K.function(inputs=[I], outputs=[lstm(I)])
import numpy as np
data1 = np.random.random(size=(1, 100, 200)) # batch_size = 1, timespan = 100
print f([data1])[0].shape
# (1, 20)
data2 = np.random.random(size=(1, 314, 200)) # batch_size = 1, timespan = 314
print f([data2])[0].shape
# (1, 20)
The trick to training and classifying sequences is training with masking and classifying using a stateful network. Here's an example that I made that classifies whether a sequence of variable length starts with zero or not.
import numpy as np
np.random.seed(1)
import tensorflow as tf
tf.set_random_seed(1)
from keras import models
from keras.layers import Dense, Masking, LSTM
import matplotlib.pyplot as plt
def stateful_model():
hidden_units = 256
model = models.Sequential()
model.add(LSTM(hidden_units, batch_input_shape=(1, 1, 1), return_sequences=False, stateful=True))
model.add(Dense(1, activation='relu', name='output'))
model.compile(loss='binary_crossentropy', optimizer='rmsprop')
return model
def train_rnn(x_train, y_train, max_len, mask):
epochs = 10
batch_size = 200
vec_dims = 1
hidden_units = 256
in_shape = (max_len, vec_dims)
model = models.Sequential()
model.add(Masking(mask, name="in_layer", input_shape=in_shape,))
model.add(LSTM(hidden_units, return_sequences=False))
model.add(Dense(1, activation='relu', name='output'))
model.compile(loss='binary_crossentropy', optimizer='rmsprop')
model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs,
validation_split=0.05)
return model
def gen_train_sig_cls_pair(t_stops, num_examples, mask):
x = []
y = []
max_t = int(np.max(t_stops))
for t_stop in t_stops:
one_indices = np.random.choice(a=num_examples, size=num_examples // 2, replace=False)
sig = np.zeros((num_examples, max_t), dtype=np.int8)
sig[one_indices, 0] = 1
sig[:, t_stop:] = mask
x.append(sig)
cls = np.zeros(num_examples, dtype=np.bool)
cls[one_indices] = 1
y.append(cls)
return np.concatenate(x, axis=0), np.concatenate(y, axis=0)
def gen_test_sig_cls_pair(t_stops, num_examples):
x = []
y = []
for t_stop in t_stops:
one_indices = np.random.choice(a=num_examples, size=num_examples // 2, replace=False)
sig = np.zeros((num_examples, t_stop), dtype=np.bool)
sig[one_indices, 0] = 1
x.extend(list(sig))
cls = np.zeros((num_examples, t_stop), dtype=np.bool)
cls[one_indices] = 1
y.extend(list(cls))
return x, y
if __name__ == '__main__':
noise_mag = 0.01
mask_val = -10
signal_lengths = (10, 15, 20)
x_in, y_in = gen_train_sig_cls_pair(signal_lengths, 10, mask_val)
mod = train_rnn(x_in[:, :, None], y_in, int(np.max(signal_lengths)), mask_val)
testing_dat, expected = gen_test_sig_cls_pair(signal_lengths, 3)
state_mod = stateful_model()
state_mod.set_weights(mod.get_weights())
res = []
for s_i in range(len(testing_dat)):
seq_in = list(testing_dat[s_i])
seq_len = len(seq_in)
for t_i in range(seq_len):
res.extend(state_mod.predict(np.array([[[seq_in[t_i]]]])))
state_mod.reset_states()
fig, axes = plt.subplots(2)
axes[0].plot(np.concatenate(testing_dat), label="input")
axes[1].plot(res, "ro", label="result", alpha=0.2)
axes[1].plot(np.concatenate(expected, axis=0), "bo", label="expected", alpha=0.2)
axes[1].legend(bbox_to_anchor=(1.1, 1))
plt.show()
Not sure how applicable recurrent networks are for your sequences, ie how strongly dependent each element is on its preceding sequence as opposed to other factors. That being said (which doesn't help you one bit of course), if you don't want to pad your input with some bad value, a stateful model that processes a single timestep at once is the only alternative for variable length sequences IMHO. If you don't mind taking an alternative approach to encoding:
import numpy as np
import keras.models as kem
import keras.layers as kel
import keras.callbacks as kec
import sklearn.preprocessing as skprep
X_train, max_features = {'Sequence': [[1, 2, 4, 5, 8, 10, 16], [1, 2, 1, 5, 5, 1, 11, 16, 7]]}, 16
num_mem_units = 64
size_batch = 1
num_timesteps = 1
num_features = 1
num_targets = 1
num_epochs = 1500
model = kem.Sequential()
model.add(kel.LSTM(num_mem_units, stateful=True, batch_input_shape=(size_batch, num_timesteps, num_features),
return_sequences=True))
model.add(kel.Dense(num_targets, activation='sigmoid'))
model.summary()
model.compile(loss='binary_crossentropy', optimizer='adam')
range_act = (0, 1) # sigmoid
range_features = np.array([0, max_features]).reshape(-1, 1)
normalizer = skprep.MinMaxScaler(feature_range=range_act)
normalizer.fit(range_features)
reset_state = kec.LambdaCallback(on_epoch_end=lambda *_ : model.reset_states())
# training
for seq in X_train['Sequence']:
X = seq[:-1]
y = seq[1:] # predict next element
X_norm = normalizer.transform(np.array(X).reshape(-1, 1)).reshape(-1, num_timesteps, num_features)
y_norm = normalizer.transform(np.array(y).reshape(-1, 1)).reshape(-1, num_timesteps, num_targets)
model.fit(X_norm, y_norm, epochs=num_epochs, batch_size=size_batch, shuffle=False,
callbacks=[reset_state])
# prediction
for seq in X_train['Sequence']:
model.reset_states()
for istep in range(len(seq)-1): # input up to not incl last
val = seq[istep]
X = np.array([val]).reshape(-1, 1)
X_norm = normalizer.transform(X).reshape(-1, num_timesteps, num_features)
y_norm = model.predict(X_norm)
yhat = int(normalizer.inverse_transform(y_norm[0])[0, 0])
y = seq[-1] # last
put = '{0} predicts {1:d}, expecting {2:d}'.format(', '.join(str(val) for val in seq[:-1]), yhat, y)
print(put)
which produces sth like:
1, 2, 4, 5, 8, 10 predicts 11, expecting 16
1, 2, 1, 5, 5, 1, 11, 16 predicts 7, expecting 7
with ridiculous loss, however.
It turns out that you can do this using ragged inputs.
Firstly, you need to convert your input data to classes using the to_categorical function
from tensorflow.keras.utils import to_categorical
from tensorflow.ragged import constant
X_train = constant(list(map(lambda x: to_categorical(x, num_classes=max_features),X_train)))
Then, you need to edit your model slightly:
model = Sequential()
model.add(Input((None,max_features),ragged=True)) # use this instead of an Embedding
model.add(Embedding(max_features, out_size, input_length=max_length, dropout=0.2))
model.add(LSTM(128, dropout_W=0.2, dropout_U=0.2))
model.add(Dense(1))
model.add(Activation('sigmoid'))
And then work from there!

Resources