K.gradients() return “None”. (Keras with tensorflow backend). Is there a problem with my model structure or something?
I have modified WGAN-GP model with keras for text, refer to this code: [https://github.com/OctThe16th/WGAN-GP-with-keras-for-text/blob/master/Exploration/GenerativeAdverserialWGAN-GP.py].
from keras.layers import Dense, Flatten, Input, BatchNormalization, Dropout, GRU, Bidirectional, Reshape, Activation
from keras.layers.noise import GaussianNoise
from keras.models import Model
from keras.layers.merge import _Merge
from keras.layers import Convolution1D, AveragePooling1D, ZeroPadding1D, UpSampling1D, concatenate, regularizers
from keras.layers import Embedding, Reshape, Lambda
from keras.layers import LSTM, multiply
from keras.optimizers import Adam, RMSprop
from random import randint
from keras.initializers import Constant
from keras import backend as K
from keras import layers
import numpy as np
from functools import partial
import pickle
import os
import tensorflow as tf
def wasserstein_loss(y_true, y_pred):
return K.mean(y_true * y_pred)
def gradient_penalty_loss(y_true, y_pred, averaged_samples):
'''Computes gradient penalty based on prediction and weighted real / fake samples'''
gradients = K.gradients(K.sum(y_pred), averaged_samples)
# compute the euclidean norm by squaring ...
gradients_sqr = K.square(gradients)
# ... summing over the rows ...
gradients_sqr_sum = K.sum(gradients_sqr)
# ... and sqrt
gradient_l2_norm = K.sqrt(gradients_sqr_sum)
# compute lambda * (1 - ||grad||)^2 still for each single sample
gradient_penalty = K.square(1 - gradient_l2_norm)
# return the mean as loss over all the batch samples
return K.mean(gradient_penalty)
class RandomWeightedAverage(_Merge):
def _merge_function(self, inputs):
weights = K.random_uniform((BATCH_SIZE, 1), dtype='float32')
return (weights * inputs[0]) + ((1 - weights) * inputs[1])
#K.argmax() is not differentiable, this function is defined to repalce K.argmax(), and it is differentiable.
def argmax(x):
y = tf.reduce_sum(tf.cumsum(tf.ones_like(x), axis=-1) * tf.exp(beta * x) / tf.reduce_sum(tf.exp(beta * x), axis=-1, keep_dims=True), axis=-1) - 1
return y
def generator_mod(softmax_shape):
person1_input = Input(shape=(1,), dtype='float32')
noise_input = Input(shape=(1, person_embedding_dim), dtype='float32')
relation_input = Input(shape=(1,), dtype='float32')
person1_embedded = Embedding(1,person_embedding_dim)(person1_input)
relation_embedded = Embedding(1,relation_embedding_dim)(relation_input)
embedded_layer = concatenate([person1_embedded, relation_embedded, noise_input], axis=1)
drop_1 = BatchNormalization(momentum=0.8)(embedded_layer)
x_1 = Convolution1D(filters=64, kernel_size=3, padding='same', activation='elu')(drop_1)
x_1 = BatchNormalization()(x_1)
x_1 = Convolution1D(filters=32, kernel_size=3, padding='same', activation='elu')(x_1)
x_1 = BatchNormalization()(x_1)
x_1 = Flatten()(x_1)
x_1 = Dense(32, activation='relu')(x_1)
######################################################################
person1_description = Input(shape=(max_sequence_length,), dtype='float32')
embedded_sequences1 = Embedding(len(word_index) + 1, word_embeddings_dim)(person1_description)
lstm_out1 = Bidirectional(LSTM(64))(embedded_sequences1)
attention_1 = Dense(128, activation='softmax', name='attention_vec')(lstm_out1)
attention_mul = multiply([lstm_out1, attention_1], name='attention_mul')
#####globel attention finish#####
x_2 = Dense(32, activation='relu')(attention_mul)
full_connected = multiply([x_1, x_2], name='full_connected')
x = Dense(softmax_shape, activation='softmax')(full_connected)
output = Lambda(argmax)(x)#shape (?,)
output = Lambda(K.expand_dims, arguments={'axis': -1})(output) #shape (?,1)
model = Model(inputs = [person1_input, noise_input, relation_input, person1_description], outputs = output)
return model
def discriminator_mod():
person1_input = Input(shape=(1,), dtype='float32')
person2_input = Input(shape=(1,), dtype='float32')
relation_input = Input(shape=(1,), dtype='float32')
person1_embedded = Embedding(1, person_embedding_dim)(person1_input)
person2_embedded = Embedding(1, person_embedding_dim)(person2_input)
relation_embedded = Embedding(len(word_index) + 1, word_embeddings_dim)(relation_input)
embedded_layer = concatenate([person1_embedded, person2_embedded, relation_embedded], axis=1)
drop_1 = Dropout(0.5)(embedded_layer)
x = Convolution1D(128, 1, activation='relu')(drop_1)
x = BatchNormalization()(x)
x = Convolution1D(filters=64, kernel_size=3, padding='same', activation='elu')(x)
x = BatchNormalization()(x)
x = Convolution1D(filters=32, kernel_size=3, padding='same', activation='elu')(x)
x = BatchNormalization()(x)
x = Flatten()(x)
x = Dense(32, activation='relu')(x)
auxiliary_input1 = Input(shape=(max_sequence_length,), dtype='float32', name='aux_input1')
embedded_sequences1 = Embedding(len(word_index) + 1, word_embeddings_dim)(auxiliary_input1)
lstm_out1 = Bidirectional(LSTM(64))(embedded_sequences1)
lstm_drop1 = Dropout(0.5)(lstm_out1)
auxiliary_input2 = Input(shape=(max_sequence_length,), dtype='float32', name='aux_input2')
embedded_sequences2 = Embedding(len(word_index) + 1, word_embeddings_dim)(auxiliary_input2)
lstm_out2 = Bidirectional(LSTM(64))(embedded_sequences2)
lstm_drop2 = Dropout(0.5)(lstm_out2)
lstm_drop = multiply([lstm_drop1, lstm_drop2])
#####globel attention start#####
attention_1 = Dense(128, activation='softmax', name='attention_vec')(lstm_drop)
attention_mul = multiply([lstm_drop, attention_1], name='attention_mul')
#####globel attention finish#####
# attention_mul = Flatten()(attention_mul)
attention_mul = Dense(32, activation='relu')(attention_mul)
#####globel attention start#####
full_connected = multiply([x, attention_mul], name='full_connected')
attention_2 = Dense(32, activation='softmax')(full_connected)
attention_final = multiply([full_connected, attention_2])
#####globel attention finish#####
dense_layer = Dense(16, activation='relu')(attention_final)
main_output = Dense(1, activation='tanh', name='main_output')(dense_layer)
model = Model(inputs=[person1_input, person2_input, relation_input, auxiliary_input1, auxiliary_input2], outputs= main_output)
return model
def train(from_save_point=False, suffix='rnn'):
X_train = np.random.randn(10,243)
generator = generator_mod(person_total)
discriminator = discriminator_mod()
generator.summary()
discriminator.summary()
for layer in discriminator.layers:
layer.trainable = False
discriminator.trainable = False
for layer in discriminator.layers:
layer.trainable = False
discriminator.trainable = False
person1 = Input(shape=(1,))
relation = Input(shape=(1,))
seed = Input(shape=(1,person_embedding_dim))
person1_description = Input(shape=(max_sequence_length,))
genarated_person2 = generator([person1, seed, relation, person1_description])
person2_description = Input(shape=(max_sequence_length,))
discriminator_layers_for_generator = discriminator([person1, genarated_person2, relation, person1_description, person2_description])
generator_model = Model(inputs=[person1, relation, seed, person1_description, person2_description], outputs=[discriminator_layers_for_generator])
generator_model.compile(optimizer= RMSprop(lr=0.0001, rho=0.9), loss=wasserstein_loss)
for layer in discriminator.layers:
layer.trainable = True
for layer in generator.layers:
layer.trainable = False
discriminator.trainable = True
generator.trainable = False
person2 = Input(shape=(1,))
generated_samples_for_discriminator = generator([person1, seed, relation, person1_description])
discriminator_output_from_generator = discriminator([person1, generated_samples_for_discriminator, relation, person1_description, person2_description])
discriminator_output_from_real_samples = discriminator([person1, person2, relation, person1_description, person2_description])
averaged_samples = RandomWeightedAverage()([person2, generated_samples_for_discriminator])
averaged_samples_out = discriminator([person1, averaged_samples, relation, person1_description, person2_description])
partial_gp_loss = partial(gradient_penalty_loss, averaged_samples= averaged_samples)
partial_gp_loss.__name__ = 'gradient_penalty'
discriminator_model = Model(inputs=[person1, person2, relation, person1_description, person2_description, seed], outputs=[discriminator_output_from_real_samples, discriminator_output_from_generator, averaged_samples_out])
# averaged_samples_out
discriminator_model.compile(optimizer=RMSprop(lr=0.0001, rho=0.9), loss=[wasserstein_loss, wasserstein_loss, partial_gp_loss])
# partial_gp_loss
positive_y = np.ones((BATCH_SIZE, 1), dtype=np.float32)
negative_y = -positive_y
dummy_y = np.zeros((BATCH_SIZE, 1), dtype=np.float32)
if __name__ == "__main__":
# convert_text_to_nptensor(cutoff=50, min_frequency_words=100000, max_lines=20000000)
train(from_save_point=False, suffix='Google')
However, when the code execute to this line:
gradients = K.gradients(K.sum(y_pred), averaged_samples)
The error message is that:
'TypeError: Failed to convert object of type to Tensor. Contents: [None]. Consider casting elements to a supported type'
Can anyone help me? Thank you very much!
Related
The proposed method can automatically detect the features of medical images under the condition determined by the algorithms, and achieve the correct and fast recognition results.
I was trying to run the image classification with using CNN method but then I got the error message below
File "<ipython-input-2-4e7ea6cc5087>", line 1, in <module>
runfile('C:/Users/MDIC/Desktop/VGG for 10 Images.py', wdir='C:/Users/MDIC/Desktop')
File "C:\Anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 786, in runfile
execfile(filename, namespace)
File "C:\Anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 110, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "C:/Users/MDIC/Desktop/VGG for 10 Images.py", line 224, in <module>
sp = plt.subplot(nrows, ncols, i + 1)
File "C:\Anaconda3\lib\site-packages\matplotlib\pyplot.py", line 1084, in subplot
a = fig.add_subplot(*args, **kwargs)
File "C:\Anaconda3\lib\site-packages\matplotlib\figure.py", line 1367, in add_subplot
a = subplot_class_factory(projection_class)(self, *args, **kwargs)
File "C:\Anaconda3\lib\site-packages\matplotlib\axes\_subplots.py", line 60, in __init__
).format(maxn=rows*cols, num=num))
ValueError: num must be 1 <= num <= 25, not 26
This is my Python code
# Importing libraries
from matplotlib import pyplot as plt
from tensorflow.keras.preprocessing.image import array_to_img, img_to_array, load_img
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.image as mpimg
import numpy as np
import os
# Preparing dataset
# Setting names of the directies for ten sets
base_dir = 'data'
seta ='Man1'
setb ='Man2'
setc ='Man3'
setd ='Man4'
sete ='Man5'
setf ='Man6'
setg ='Man7'
seth ='Man8'
seti ='Man9'
setj ='Man10'
# Each of the sets has three sub directories train, validation and test
train_dir = os.path.join(base_dir, 'train')
validation_dir = os.path.join(base_dir, 'validation')
test_dir = os.path.join(base_dir, 'test')
def prepare_data(base_dir, seta, setb, setc, setd, sete, setf, setg, seth, seti, setj):
# Take the directory names for the base directory and both the sets
# Returns the paths for train, validation for each of the sets
seta_train_dir = os.path.join(train_dir, seta)
setb_train_dir = os.path.join(train_dir, setb)
setc_train_dir = os.path.join(train_dir, setc)
setd_train_dir = os.path.join(train_dir, setd)
sete_train_dir = os.path.join(train_dir, sete)
setf_train_dir = os.path.join(train_dir, setf)
setg_train_dir = os.path.join(train_dir, setg)
seth_train_dir = os.path.join(train_dir, seth)
seti_train_dir = os.path.join(train_dir, seti)
setj_train_dir = os.path.join(train_dir, setj)
seta_valid_dir = os.path.join(validation_dir, seta)
setb_valid_dir = os.path.join(validation_dir, setb)
setc_valid_dir = os.path.join(validation_dir, setc)
setd_valid_dir = os.path.join(validation_dir, setd)
sete_valid_dir = os.path.join(validation_dir, sete)
setf_valid_dir = os.path.join(validation_dir, setf)
setg_valid_dir = os.path.join(validation_dir, setg)
seth_valid_dir = os.path.join(validation_dir, seth)
seti_valid_dir = os.path.join(validation_dir, seti)
setj_valid_dir = os.path.join(validation_dir, setj)
seta_train_fnames = os.listdir(seta_train_dir)
setb_train_fnames = os.listdir(setb_train_dir)
setc_train_fnames = os.listdir(setc_train_dir)
setd_train_fnames = os.listdir(setd_train_dir)
sete_train_fnames = os.listdir(sete_train_dir)
setf_train_fnames = os.listdir(setf_train_dir)
setg_train_fnames = os.listdir(setg_train_dir)
seth_train_fnames = os.listdir(seth_train_dir)
seti_train_fnames = os.listdir(seti_train_dir)
setj_train_fnames = os.listdir(setj_train_dir)
return seta_train_dir, setb_train_dir, setc_train_dir, setd_train_dir, sete_train_dir, setf_train_dir, setg_train_dir, seth_train_dir, seti_train_dir, setj_train_dir, seta_valid_dir, setb_valid_dir, setc_valid_dir, setd_valid_dir, sete_valid_dir, setf_valid_dir, setg_valid_dir, seth_valid_dir, seti_valid_dir, setj_valid_dir, seta_train_fnames, setb_train_fnames, setc_train_fnames, setd_train_fnames, sete_train_fnames, setf_train_fnames, setg_train_fnames, seth_train_fnames, seti_train_fnames, setj_train_fnames
seta_train_dir, setb_train_dir, setc_train_dir, setd_train_dir, sete_train_dir, setf_train_dir, setg_train_dir, seth_train_dir, seti_train_dir, setj_train_dir, seta_valid_dir, setb_valid_dir, setc_valid_dir, setd_valid_dir, sete_valid_dir, setf_valid_dir, setg_valid_dir, seth_valid_dir, seti_valid_dir, setj_valid_dir, seta_train_fnames, setb_train_fnames, setc_train_fnames, setd_train_fnames, sete_train_fnames, setf_train_fnames, setg_train_fnames, seth_train_fnames, seti_train_fnames, setj_train_fnames = prepare_data(base_dir, seta, setb, setc, setd, sete, setf, setg, seth, seti, setj)
seta_test_dir = os.path.join(test_dir, seta)
setb_test_dir = os.path.join(test_dir, setb)
setc_test_dir = os.path.join(test_dir, setc)
setd_test_dir = os.path.join(test_dir, setd)
sete_test_dir = os.path.join(test_dir, sete)
setf_test_dir = os.path.join(test_dir, setf)
setg_test_dir = os.path.join(test_dir, setg)
seth_test_dir = os.path.join(test_dir, seth)
seti_test_dir = os.path.join(test_dir, seti)
setj_test_dir = os.path.join(test_dir, setj)
test_fnames_seta = os.listdir(seta_test_dir)
test_fnames_setb = os.listdir(setb_test_dir)
test_fnames_setc = os.listdir(setc_test_dir)
test_fnames_setd = os.listdir(setd_test_dir)
test_fnames_sete = os.listdir(sete_test_dir)
test_fnames_setf = os.listdir(setf_test_dir)
test_fnames_setg = os.listdir(setg_test_dir)
test_fnames_seth = os.listdir(seth_test_dir)
test_fnames_seti = os.listdir(seti_test_dir)
test_fnames_setj = os.listdir(setj_test_dir)
datagen = ImageDataGenerator(
height_shift_range = 0.2,
width_shift_range = 0.2,
rotation_range = 40,
shear_range = 0.2,
zoom_range = 0.2,
horizontal_flip = True,
fill_mode = 'nearest')
img_path = os.path.join(seta_train_dir, seta_train_fnames[3])
img = load_img(img_path, target_size = (150, 150))
x = img_to_array(img)
x = x.reshape((1,) + x.shape)
i = 0
for batch in datagen.flow(x, batch_size = 1):
plt.figure(i)
imgplot = plt.imshow(array_to_img(batch[0]))
i += 1
if i % 10 == 0:
break
# Convolutional Neural Network model
# Import TensorFlow libraries
from tensorflow.keras import layers
from tensorflow.keras import Model
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
img_input = layers.Input(shape = (150, 150, 3))
# 2D Convolution layer with 64 filters of dimension 3x3 and ReLU activation algorithm
x = layers.Conv2D(64, 3, activation = 'relu')(img_input)
# 2D max pooling layer
x = layers.MaxPooling2D(2)(x)
# 2D Convolution layer with 128 filters of dimension 3x3 and ReLU activation algorithm
x = layers.Conv2D(128, 3, activation = 'relu')(x)
# 2D Max pooling layer
x = layers.MaxPooling2D(2)(x)
# 2D Convolution layer with 256 filters of dimension 3x3 and ReLU activation algorithm
x = layers.Conv2D(256, 3, activation = 'relu')(x)
# 2D Max pooling layer
x = layers.MaxPooling2D(2)(x)
# 2D Convolution layer with 512 filters of dimension 3x3 and ReLU activation algorithm
x = layers.Conv2D(512, 3, activation = 'relu')(x)
# 2D Max pooling layer
x = layers.MaxPooling2D(2)(x)
# 2D Convolution layer with 512 filters of dimension 3x3 and ReLU activation algorithm
x = layers.Conv2D(512, 3, activation = 'relu')(x)
# Flatten layer
x = layers.Flatten()(x)
# Fully connected layers and ReLU activation algorithm
x = layers.Dense(4096, activation = 'relu')(x)
x = layers.Dense(4096, activation = 'relu')(x)
x = layers.Dense(1000, activation = 'relu')(x)
# Dropout layers for optimisation
x = layers.Dropout(0.5)(x)
# Fully connected layers and sigmoid activation algorithm
model = Sequential()
model.add(Dense(10))
output = layers.Dense(10, activation = 'sigmoid')(x)
model = Model(img_input, output)
model.summary()
import tensorflow as tf
# Using binary_crossentropy as the loss function and
# Adam optimizer as the optimizing function when training
model.compile(loss = 'sparse_categorical_crossentropy',
optimizer = tf.optimizers.Adam(learning_rate = 0.0005),
metrics = ['acc'])
from tensorflow.keras.preprocessing.image import ImageDataGenerator
# All images will be rescaled by 1./255
train_datagen = ImageDataGenerator(rescale = 1./255)
test_datagen = ImageDataGenerator(rescale = 1./255)
# Flow training images in batches of 20 using train_datagen generator
train_generator = train_datagen.flow_from_directory(
train_dir,
target_size = (150, 150),
batch_size = 20,
class_mode = 'binary')
validation_generator = test_datagen.flow_from_directory(
validation_dir,
target_size = (150, 150),
batch_size = 20,
class_mode = 'binary')
# 4x4 grid
nrows = 5
ncols = 5
pic_index = 0
# Set up matpotlib fig and size it to fit 5x5 pics
fig = plt.gcf()
fig.set_size_inches(nrows * 5, ncols * 5)
pic_index += 10
next_seta_pix = [os.path.join(seta_train_dir, fname)
for fname in seta_train_fnames[pic_index-10:pic_index]]
next_setb_pix = [os.path.join(setb_train_dir, fname)
for fname in setb_train_fnames[pic_index-10:pic_index]]
next_setc_pix = [os.path.join(setc_train_dir, fname)
for fname in setc_train_fnames[pic_index-10:pic_index]]
next_setd_pix = [os.path.join(setd_train_dir, fname)
for fname in setd_train_fnames[pic_index-10:pic_index]]
next_sete_pix = [os.path.join(sete_train_dir, fname)
for fname in sete_train_fnames[pic_index-10:pic_index]]
next_setf_pix = [os.path.join(setf_train_dir, fname)
for fname in setf_train_fnames[pic_index-10:pic_index]]
next_setg_pix = [os.path.join(setg_train_dir, fname)
for fname in setg_train_fnames[pic_index-10:pic_index]]
next_seth_pix = [os.path.join(seth_train_dir, fname)
for fname in seth_train_fnames[pic_index-10:pic_index]]
next_seti_pix = [os.path.join(seti_train_dir, fname)
for fname in seti_train_fnames[pic_index-10:pic_index]]
next_setj_pix = [os.path.join(setj_train_dir, fname)
for fname in setj_train_fnames[pic_index-10:pic_index]]
for i, img_path in enumerate(next_seta_pix + next_setb_pix + next_setc_pix + next_setd_pix + next_sete_pix + next_setf_pix + next_setg_pix + next_seth_pix + next_seti_pix + next_setj_pix):
# Set up subplot; subplot indices start at 1
sp = plt.subplot(nrows, ncols, i + 1)
# Dont show axes
sp.axis('Off')
img = mpimg.imread(img_path)
plt.imshow(img)
plt.show()
# Train the model
mymodel = model.fit_generator(
train_generator,
steps_per_epoch = 10,
epochs = 80,
validation_data = validation_generator,
validation_steps = 7,
verbose = 2)
import random
from tensorflow.keras.preprocessing.image import img_to_array, load_img
successive_outputs = [layer.output for layer in model.layers[1:]]
visualization_model = Model(img_input, successive_outputs)
a_img_files = [os.path.join(seta_train_dir, f) for f in seta_train_fnames]
b_img_files = [os.path.join(setb_train_dir, f) for f in setb_train_fnames]
c_img_files = [os.path.join(setc_train_dir, f) for f in setc_train_fnames]
d_img_files = [os.path.join(setd_train_dir, f) for f in setd_train_fnames]
e_img_files = [os.path.join(sete_train_dir, f) for f in sete_train_fnames]
f_img_files = [os.path.join(setf_train_dir, f) for f in setf_train_fnames]
g_img_files = [os.path.join(setg_train_dir, f) for f in setg_train_fnames]
h_img_files = [os.path.join(seth_train_dir, f) for f in seth_train_fnames]
i_img_files = [os.path.join(seti_train_dir, f) for f in seti_train_fnames]
j_img_files = [os.path.join(setj_train_dir, f) for f in setj_train_fnames]
img_path = random.choice(a_img_files + b_img_files + c_img_files + d_img_files + e_img_files + f_img_files + g_img_files + h_img_files + i_img_files + j_img_files)
img = load_img(img_path, target_size = (150, 150))
x = img_to_array(img)
x = x.reshape((1,) + x.shape)
x /= 255
successive_feature_maps = visualization_model.predict(x)
layer_names = [layer.name for layer in model.layers]
for layer_name, feature_map in zip(layer_names, successive_feature_maps):
if len(feature_map.shape) == 4:
# Just do this for the conv/maxpool layers
n_features = feature_map.shape[-1]
# The feature map has shape(1, size, size, n_features)
size = feature_map.shape[1]
# Will tile images in this matrix
display_grid = np.zeros((size, size * n_features))
for i in range(n_features):
# Postprocess the feature
x = feature_map[0, :, :, i]
x -= x.mean()
x *= 64
x += 128
x = np.clip(x, 0, 255).astype('float32')
# Will tile each filter into this big horizontal grid
display_grid[:, i * size : (i + 1) * size] = x
# Accuracy results for each training and validation epoch
acc = mymodel.history['acc']
val_acc = mymodel.history['val_acc']
# Loss results for each training and validation epoch
loss = mymodel.history['loss']
val_loss = mymodel.history['val_loss']
what i understood from your code , your doing multi class classification because you have used Dense(10) at the last layer so
you need to change class_mode = 'binary' into
class model ='categorical' &
also change activation function sigmoid into
output = layers.Dense(10, activation = 'softmax')(x)
I did some modifications and now my error has changed to:
invalidargumenterror: indices[29,195] = 2036 is not in [0, 2035)
Is there a way to resolve this? Also is there anything wrong on the rest of the program? Thank you.
def generator(df,tokenizer,label_encoder, batch_size=32):
n_examples = len(df)
number_of_batches = n_examples / batch_size
counter = 0
while 1:
start_index = index * batch_size
end_index = start_index + batch_size
if counter > number_of_batches + 1:
df.sample(frac=1).reset_index(drop=True)
counter = 0
counter += 1
X = tokenizer.texts_to_sequences(df.iloc[start_index: end_index]['text'])
X = sequence.pad_sequences(X, maxlen=160)
X = sequence.pad_sequences(X, maxlen=160)
Y = label_encoder.transform(df['code'])
yield X, Y
# Generators
train_generator = generator(df_train,tokenizer,label_encoder, batch_size=32)
validation_generator = generator(df_valid,tokenizer,label_encoder, batch_size=32)
#import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Input, Dropout
from tensorflow.keras.layers import Embedding, LSTM, GlobalMaxPooling1D
from tensorflow.keras.optimizers import Adam
input_text = Input(shape=(160,), dtype='int32')
embedding = Embedding(input_dim=len(df_valid) + 1,
output_dim=300,
input_length=160)(input_text)
lstm = LSTM(units=256,
dropout=0.5,
recurrent_dropout=0.5,
return_sequences=True, )(embedding)
pool = GlobalMaxPooling1D()(lstm)
dropout = Dropout(0.5)(pool)
output = Dense(len(label_encoder.classes_), activation='softmax')(dropout)
model = Model(inputs=input_text, outputs=output)
optimer = Adam(lr=.001)
model.compile(optimizer='adam',
loss='categorical_crossentropy',
metrics=['accuracy'])
model.fit(train_generator,
validation_data=validation_generator,
epochs=20,steps_per_epoch = len(df_train)/batch_size)
I tried to make a class using batchnormalization layer from tf 2.0, however it gave me an error that Gradients does not exist for variables. I tried to use batchnormalization directly but it gave me the same error as well. it seems like it is not traing the variable related to the batchnormalization step.
I tried to use model.trainable_variables instead of model.variables but it didn't work either.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.utils import to_categorical
import numpy as np
import matplotlib.pyplot as plt
import os
from scipy import ndimage
learning_rate = 0.001
training_epochs = 15
batch_size = 100
tf.random.set_seed(777)
cur_dir = os.getcwd()
ckpt_dir_name = 'checkpoints'
model_dir_name = 'minst_cnn_best'
checkpoint_dir = os.path.join(cur_dir, ckpt_dir_name, model_dir_name)
os.makedirs(checkpoint_dir, exist_ok=True)
checkpoint_prefix = os.path.join(checkpoint_dir, model_dir_name)
mnist = tf.keras.datasets.mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
train_images = train_images.astype(np.float32) /255.
test_images = test_images.astype(np.float32) /255.
print(train_images.shape, test_images.shape)
train_images = np.expand_dims(train_images, axis = -1)
test_images = np.expand_dims(test_images, axis = -1)
print(train_images.shape, test_images.shape)
train_labels = to_categorical(train_labels, 10)
test_labels = to_categorical(test_labels, 10)
train_dataset = tf.data.Dataset.from_tensor_slices((train_images,
train_labels)).shuffle(buffer_size = 100000).batch(batch_size)
test_dataset = tf.data.Dataset.from_tensor_slices((test_images,
test_labels)).batch(batch_size)
class ConvBNRelu(tf.keras.Model):
def __init__(self, filters, kernel_size=3, strides=1, padding='SAME'):
super(ConvBNRelu, self).__init__()
self.conv = keras.layers.Conv2D(filters=filters, kernel_size=kernel_size, strides=strides,
padding=padding, kernel_initializer='glorot_normal')
self.batchnorm = tf.keras.layers.BatchNormalization()
def call(self, inputs, training=False):
layer = self.conv(inputs)
layer = self.batchnorm(layer)
layer = tf.nn.relu(layer)
return layer
class DenseBNRelu(tf.keras.Model):
def __init__(self, units):
super(DenseBNRelu, self).__init__()
self.dense = keras.layers.Dense(units=units, kernel_initializer='glorot_normal')
self.batchnorm = tf.keras.layers.BatchNormalization()
def call(self, inputs, training=False):
layer = self.dense(inputs)
layer = self.batchnorm(layer)
layer = tf.nn.relu(layer)
return layer
class MNISTModel(tf.keras.Model):
def __init__(self):
super(MNISTModel, self).__init__()
self.conv1 = ConvBNRelu(filters=32, kernel_size=[3, 3], padding='SAME')
self.pool1 = keras.layers.MaxPool2D(padding='SAME')
self.conv2 = ConvBNRelu(filters=64, kernel_size=[3, 3], padding='SAME')
self.pool2 = keras.layers.MaxPool2D(padding='SAME')
self.conv3 = ConvBNRelu(filters=128, kernel_size=[3, 3], padding='SAME')
self.pool3 = keras.layers.MaxPool2D(padding='SAME')
self.pool3_flat = keras.layers.Flatten()
self.dense4 = DenseBNRelu(units=256)
self.drop4 = keras.layers.Dropout(rate=0.4)
self.dense5 = keras.layers.Dense(units=10, kernel_initializer='glorot_normal')
def call(self, inputs, training=False):
net = self.conv1(inputs)
net = self.pool1(net)
net = self.conv2(net)
net = self.pool2(net)
net = self.conv3(net)
net = self.pool3(net)
net = self.pool3_flat(net)
net = self.dense4(net)
net = self.drop4(net)
net = self.dense5(net)
return net
models = []
num_models = 5
for m in range(num_models):
models.append(MNISTModel())
def loss_fn(model, images, labels):
logits = model(images, training=True)
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits,
labels=labels))
return loss
def grad(model, images, labels):
with tf.GradientTape() as tape:
loss = loss_fn(model, images, labels)
return tape.gradient(loss, model.variables)
def evaluate(models, images, labels):
predictions = np.zeros_like(labels)
for model in models:
logits = model(images, training=False)
predictions += logits
correct_prediction = tf.equal(tf.argmax(predictions, 1), tf.argmax(labels, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
return accuracy
optimizer = keras.optimizers.Adam(learning_rate = learning_rate)
checkpoints = []
for m in range(num_models):
checkpoints.append(tf.train.Checkpoint(cnn=models[m]))
for epoch in range(training_epochs):
avg_loss = 0.
avg_train_acc = 0.
avg_test_acc = 0.
train_step = 0
test_step = 0
for images, labels in train_dataset:
for model in models:
grads = grad(model, images, labels)
optimizer.apply_gradients(zip(grads, model.variables))
loss = loss_fn(model, images, labels)
avg_loss += loss / num_models
acc = evaluate(models, images, labels)
avg_train_acc += acc
train_step += 1
avg_loss = avg_loss / train_step
avg_train_acc = avg_train_acc / train_step
for images, labels in test_dataset:
acc = evaluate(models, images, labels)
avg_test_acc += acc
test_step += 1
avg_test_acc = avg_test_acc / test_step
print('Epoch:', '{}'.format(epoch + 1), 'loss =', '{:.8f}'.format(avg_loss),
'train accuracy = ', '{:.4f}'.format(avg_train_acc),
'test accuracy = ', '{:.4f}'.format(avg_test_acc))
for idx, checkpoint in enumerate(checkpoints):
checkpoint.save(file_prefix=checkpoint_prefix+'-{}'.format(idx))
print('Learning Finished!')
W0727 20:27:05.344142 140332288718656 optimizer_v2.py:982] Gradients does not exist for variables ['mnist_model/conv_bn_relu/batch_normalization/moving_mean:0', 'mnist_model/conv_bn_relu/batch_normalization/moving_variance:0', 'mnist_model/conv_bn_relu_1/batch_normalization_1/moving_mean:0', 'mnist_model/conv_bn_relu_1/batch_normalization_1/moving_variance:0', 'mnist_model/conv_bn_relu_2/batch_normalization_2/moving_mean:0', 'mnist_model/conv_bn_relu_2/batch_normalization_2/moving_variance:0', 'mnist_model/dense_bn_relu/batch_normalization_3/moving_mean:0', 'mnist_model/dense_bn_relu/batch_normalization_3/moving_variance:0'] when minimizing the loss.
W0727 20:27:05.407717 140332288718656 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/optimizer_v2/optimizer_v2.py:460: BaseResourceVariable.constraint (from tensorflow.python.ops.resource_variable_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Apply a constraint manually following the optimizer update step.
W0727 20:27:05.499249 140332288718656 optimizer_v2.py:982] Gradients does not exist for variables ['mnist_model_1/conv_bn_relu_3/batch_normalization_4/moving_mean:0', 'mnist_model_1/conv_bn_relu_3/batch_normalization_4/moving_variance:0', 'mnist_model_1/conv_bn_relu_4/batch_normalization_5/moving_mean:0', 'mnist_model_1/conv_bn_relu_4/batch_normalization_5/moving_variance:0', 'mnist_model_1/conv_bn_relu_5/batch_normalization_6/moving_mean:0', 'mnist_model_1/conv_bn_relu_5/batch_normalization_6/moving_variance:0', 'mnist_model_1/dense_bn_relu_1/batch_normalization_7/moving_mean:0', 'mnist_model_1/dense_bn_relu_1/batch_normalization_7/moving_variance:0'] when minimizing the loss.
...
You're computing the gradient of the loss with respect to the model.variables: this collection contains not only the trainable variables (the model weights) but also the non-trainable variables like the moving mean and variance computed by the batch normalization layer.
You have to compute the gradient with respect to the trainable_variables. In short change the lines
return tape.gradient(loss, model.variables)
and
optimizer.apply_gradients(zip(grads, model.variables))
to
return tape.gradient(loss, model.trainable_variables)
and
optimizer.apply_gradients(zip(grads, model.trainable_variables))
Let's say we have a custom layer in Keras like this:
import numpy as np
import tensorflow as tf
from keras import backend as K
from keras.layers import Layer
class Custom_Layer(Layer):
def __init__(self,**kwargs):
super(ProbabilisticActivation, self).__init__(**kwargs)
self.params_1 = 0
self.params_2 = 0
def build(self, input_shape):
self.params_1 = K.variable(np.zeros(shape=input_shape[1::]))
self.params_2 = K.variable(np.zeros(shape=input_shape[1::]))
super(Custom_Layer,self).build(input_shape)
def call(self, x, training=None):
# DO SOMETHING
How could I access the value of the parameters (params_1, params_2) in the training process? I tried to get parameters by using model.get_layer('Name of Custom Layer').params_1, but in this case, I can not access the value of the parameters.
Here is the model architecture:
def get_model(img_height, img_width:
input_layer = Input(shape=(img_height, img_width, 3))
x = Conv2D(32, (3, 3), padding='same', name='conv2d_1', activation='relu')(input_layer)
x = Custom_Layer()(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Dropout(0.25)(x)
x = Conv2D(64, kernel_size=(3, 3), name='conv2d_2', activation='relu')(x)
x = Conv2D(64, (3, 3), name='conv2d_4', activation='relu')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Dropout(0.25)(x)
x = Flatten()(x)
x = Dense(512)(x)
x = Activation('relu')(x)
x = Dropout(0.5)(x)
x = Dense(10)(x)
x = Activation('softmax')(x)
model = Model(inputs=[input_layer], outputs=[x])
model.summary()
return model
Note that params_1 and params_2 are TensorFlow tensors. To get their value, you should run them within a tf.Session. You could do something along the lines of:
from keras import backend as K
# ... train model
sess = K.get_session()
params_1 = model.get_layer('Name of Custom Layer').params_1
values_1 = sess.run(params_1)
print(values_1)
NOTE: Not tested.
I'm currently trying to implement a basic autoencoder using Keras, and I have come to the stage where I would want the output from the second hidden layer. I think that I'm able to get the right object, the problem is that I get it as a tensor object, the code I've been trying to run is the following:
from keras.layers import Input, Dense, initializers
import numpy as np
from Dataset import Dataset
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.optimizers import Adam
from keras.layers import Dense, Activation
import tensorflow as tf
import time
#global variables
d = Dataset()
num_features = d.X_train.shape[1]
#input = [784, 400, 100, 10, 100, 400]
#output = [400, 100, 10, 100, 400, 784]
names = ['hidden1', 'hidden2', 'hidden3', 'hidden4', 'hidden5', 'hidden6']
list_of_nodes = [784, 400, 144, 10]
def generate_hidden_nodes(list_of_nodes):
input = []
for j in range(len(list_of_nodes)):
input.append(list_of_nodes[j])
for i in range(len(list_of_nodes)-2):
input.append(list_of_nodes[-2-i])
output = input[::-1]
return input, output
input,output = generate_hidden_nodes(list_of_nodes)
def autoencoder(epochs):
w = initializers.RandomNormal(mean=0.0, stddev=0.05, seed=None)
model = Sequential()
input, output = generate_hidden_nodes(list_of_nodes)
for j in range(len(input)):
if j == (len(input)-1):
model.add(Dense(output[j], activation='sigmoid', kernel_initializer=w, input_dim=input[j], name=names[j]))
#model.add(Dropout(0.45))
else:
model.add(Dense(output[j], activation='relu', kernel_initializer=w, input_dim=input[j],
name = names[j]))
#model.add(Dropout(0.45))
model.compile(optimizer=Adam(lr=0.001), loss='binary_crossentropy', metrics=['acc'])
history = model.fit(d.X_train, d.X_train,
epochs=epochs,
batch_size=50,
shuffle=True,
validation_split = 0.2)
#validation_data=(d.X_test, d.X_test))
#print(history.history.keys())
#plt.plot(history.history['val_acc'])
#print(history.history['val_acc'])
plt.show()
return model
def cv():
accuracy = 0
size = 5
epochs = 20
variance = 0
storage = np.zeros((size, epochs))
for j in range(size):
ae = autoencoder(epochs)
#print(ae.history.history['val_acc'])
storage[j] = ae.history.history['val_acc']
for i in range(size):
accuracy += storage[i][-1]
mean = accuracy/size
for k in range(size):
variance += ((storage[k][-1] - mean)**2)
variance = variance/size
return mean, variance
#mean, variance = cv()
#print(mean)
#print(variance)
#time.sleep(10)
def finding_index():
elements, index = np.unique(d.Y_test, return_index=True)
return elements, index
def plotting():
ae = autoencoder(20)
elements, index = finding_index()
y_proba = ae.predict(d.X_test)
plt.figure(figsize=(20, 4))
# size = 20
for i in range(len(index)):
ax = plt.subplot(2, len(index), i + 1)
plt.imshow(d.X_test[index[i]].reshape(28, 28))
plt.gray()
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
ax = plt.subplot(2, len(index), i + 1 + len(index))
plt.imshow(y_proba[index[i]].reshape(28, 28))
plt.gray()
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
plt.show()
def plotting_weights(epochs):
ae = autoencoder(epochs)
output_layer = ae.get_layer('hidden2')
weights = output_layer.get_weights()[0]
print(weights.shape)
size = 20
plt.figure(figsize=(20, 4))
for j in range(3):
plt.gray()
plt.imshow(weights[j].reshape(12, 12))
plt.show()
def get_output():
w = initializers.RandomNormal(mean=0.0, stddev=0.05, seed=None)
new_model = Sequential()
new_model.add(Dense(400, activation='relu', kernel_initializer=w, input_dim = 784))
new_model.add(Dense(144, activation='sigmoid', kernel_initializer=w, input_dim = 400))
#new_model.add(Dense(784, activation='sigmoid', kernel_initializer=w, input_dim = 144))
new_model.compile(optimizer=Adam(lr=0.001), loss='binary_crossentropy', metrics=['acc'])
history = new_model.fit(d.X_train, d.X_train,
epochs=20,
batch_size=50,
shuffle=True,
validation_split=0.2)
y = new_model.predict(d.X_test)
elements, index = finding_index()
#return y.shape
def get_output2():
ae = autoencoder(5)
a =ae.layers[1].output()
init_op = tf.initialize_all_variables()
with tf.Session() as sess:
sess.run(init_op) # execute init_op
# print the random values that we sample
print(a)
get_output2()
I've tried to just print(a) as well, but as I said, that returns me a tensor object. Can someone provide me some information how I can actually print those value? Thanks in advance!
Simplest:
import keras.backend as K
print(K.eval(ae.layers[1].output()))
This is equivalent to:
with tf.Session() as sess:
print(sess.run(a))
I find it more readable to simply use the keras.backend interface.