Accessing Variables of Custom Layers in Keras - keras

Let's say we have a custom layer in Keras like this:
import numpy as np
import tensorflow as tf
from keras import backend as K
from keras.layers import Layer
class Custom_Layer(Layer):
def __init__(self,**kwargs):
super(ProbabilisticActivation, self).__init__(**kwargs)
self.params_1 = 0
self.params_2 = 0
def build(self, input_shape):
self.params_1 = K.variable(np.zeros(shape=input_shape[1::]))
self.params_2 = K.variable(np.zeros(shape=input_shape[1::]))
super(Custom_Layer,self).build(input_shape)
def call(self, x, training=None):
# DO SOMETHING
How could I access the value of the parameters (params_1, params_2) in the training process? I tried to get parameters by using model.get_layer('Name of Custom Layer').params_1, but in this case, I can not access the value of the parameters.
Here is the model architecture:
def get_model(img_height, img_width:
input_layer = Input(shape=(img_height, img_width, 3))
x = Conv2D(32, (3, 3), padding='same', name='conv2d_1', activation='relu')(input_layer)
x = Custom_Layer()(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Dropout(0.25)(x)
x = Conv2D(64, kernel_size=(3, 3), name='conv2d_2', activation='relu')(x)
x = Conv2D(64, (3, 3), name='conv2d_4', activation='relu')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Dropout(0.25)(x)
x = Flatten()(x)
x = Dense(512)(x)
x = Activation('relu')(x)
x = Dropout(0.5)(x)
x = Dense(10)(x)
x = Activation('softmax')(x)
model = Model(inputs=[input_layer], outputs=[x])
model.summary()
return model

Note that params_1 and params_2 are TensorFlow tensors. To get their value, you should run them within a tf.Session. You could do something along the lines of:
from keras import backend as K
# ... train model
sess = K.get_session()
params_1 = model.get_layer('Name of Custom Layer').params_1
values_1 = sess.run(params_1)
print(values_1)
NOTE: Not tested.

Related

3D image classification using 3D CNN

I design a CNN network in order to work with "cifar10" dataset in keras.
here is my code:
input_layer = Input(shape=(32,32,3))
x = Conv3D(32,(5,5,3),activation='relu',padding='same')(input_layer)
x = Conv3D(32,(5,5,3),activation='relu',padding='same')(x)
x = MaxPool3D(pool_size=2, padding='same')(x)
x = Conv3D(32,(5,5,3),activation='relu',padding='same')(x)
x = Conv3D(32,(5,5,3),activation='relu',padding='same')(x)
x = MaxPool3D(pool_size=2, padding='same')(x)
x = Flatten()(x)
x = Dense(128,kernel_initializer='random_normal', bias_initializer='zeros')(x)
x = Dense(128,kernel_initializer='random_normal', bias_initializer='zeros')(x)
output_layer = Dense(10,activation='softmax',kernel_initializer='random_normal', bias_initializer='zeros')(x)
Cifar10_CNN = Model(input_layer, output_layer)
When I build the model I get this error:
Input 0 is incompatible with layer conv3d_5: expected ndim=5, found ndim=4
How can I solve this?
You probably should read up about differences between Conv2D, Conv3D. Though it can be confusing (given images are in fact 3 dimensional), they are still considered 2D (you don't consider the channel dimension when thinking about convolution in Keras. Convolution anyway happens on the channels dimension). So You don't need Conv3D for images, you need Conv2D.
from tensorflow.keras.layers import Input, Dense, Conv2D, MaxPool2D, Flatten
from tensorflow.keras.models import Model
input_layer = Input(shape=(32,32,3))
x = Conv2D(32,(5,5),activation='relu',padding='same')(input_layer)
x = Conv2D(32,(5,5),activation='relu',padding='same')(x)
x = MaxPool2D(pool_size=2, padding='same')(x)
x = Conv2D(32,(5,5),activation='relu',padding='same')(x)
x = Conv2D(32,(5,5),activation='relu',padding='same')(x)
x = MaxPool2D(pool_size=2, padding='same')(x)
x = Flatten()(x)
x = Dense(128,kernel_initializer='random_normal', bias_initializer='zeros')(x)
x = Dense(128,kernel_initializer='random_normal', bias_initializer='zeros')(x)
output_layer = Dense(10,activation='softmax',kernel_initializer='random_normal', bias_initializer='zeros')(x)
Cifar10_CNN = Model(input_layer, output_layer)
print(Cifar10_CNN.summary())

Tensorflow 2.0 Identical model structure and hyper parameters result in different performance in different calling approaches

there. I am a starter and learning Tensorflow 2.0. I have one model called in 3 different approaches. And the performances are different. Could anyone tell me why this is the case?
The model constructing and calling approach:
import os, sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import datasets, models, layers, regularizers, optimizers
def prepare_mnist_features_and_labels(x, y):
x = tf.cast(x, tf.float32)/255.0
y = tf.cast(y, tf.int64)
return x, y
def mninst_dataset():
(x_train, y_train), (x_eval, y_eval) = datasets.mnist.load_data()
print('x_train/y_train shape:', x_train.shape, y_train.shape)
y_train = tf.one_hot(y_train, depth=10)
y_eval = tf.one_hot(y_eval, depth=10)
ds_train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
ds_train = ds_train.map(prepare_mnist_features_and_labels)
ds_train = ds_train.shuffle(x_train.shape[0]).batch(128)
ds_eval = tf.data.Dataset.from_tensor_slices((x_eval, y_eval))
ds_eval = ds_eval.map(prepare_mnist_features_and_labels)
ds_eval = ds_eval.shuffle(x_eval.shape[0]).batch(128)
sample = next(iter(ds_train))
print('sample: ', sample[0].shape, sample[1].shape)
return ds_train, ds_eval
def main():
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
trainset, evalset = mninst_dataset()
model = keras.Sequential()
model.add(layers.Reshape(target_shape=[28, 28, 1], input_shape=[28, 28]))
model.add(layers.Conv2D(filters=32, kernel_size=(5, 5), activation=tf.nn.relu, strides=[1,1], padding="SAME"))
model.add(layers.MaxPool2D(pool_size=(2,2), strides=[1,1], padding="SAME"))
model.add(layers.Conv2D(filters=64, kernel_size=(5, 5), activation=tf.nn.relu, strides=[1,1], padding="SAME"))
model.add(layers.MaxPool2D(pool_size=(2,2), strides=[2,2], padding="SAME"))
model.add(layers.Flatten())
model.add(layers.Dense(units=512, activation=tf.nn.relu, kernel_regularizer=regularizers.l2(0.01)))
model.add(layers.Dense(units=10, activation=tf.nn.relu, kernel_regularizer=regularizers.l2(0.01)))
model.compile(optimizer=optimizers.Adam(lr=0.01), loss=tf.losses.CategoricalCrossentropy(from_logits=True), metrics=['accuracy'])
model.fit(trainset.repeat(), epochs=30, steps_per_epoch=500,
validation_data=evalset.repeat(), validation_steps=10)
if __name__=='__main__':
main()
The second approach to construct the model and run it is the following:
import os, sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import datasets, models, layers, regularizers, optimizers
from tqdm import tqdm
def prepare_mnist_features_and_labels(x, y):
x = tf.cast(x, tf.float32)/255.0
y = tf.cast(y, tf.int64)
return x, y
def mnist_dataset():
(x_train, y_train), (x_eval, y_eval) = datasets.mnist.load_data()
# y_train = tf.one_hot(y_train, depth=10)
# y_eval = tf.one_hot(y_eval, depth=10)
ds_train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
ds_train = ds_train.map(prepare_mnist_features_and_labels)
# Test: replace x_train.shape[0] by the number of the training samples, which is 60000
ds_train = ds_train.shuffle(x_train.shape[0]).batch(128)
ds_eval = tf.data.Dataset.from_tensor_slices((x_eval, y_eval))
ds_eval = ds_eval.map(prepare_mnist_features_and_labels)
ds_eval = ds_eval.shuffle(x_eval.shape[0]).batch(128)
# sample = next(iter(ds_train))
# print('sample: ', sample[0].shape, sample[1].shape)
return ds_train, ds_eval
# tf.nn.sparse_softmax_cross_entropy_with_logits(labels, logits, name=None):
# labels: Tensof of shape [d_0, d_1, ..., d_{r-1}]. Each label must be an index in [0, num_classes]
# logits: Unscaled of log probabilities of shape [d_0, d_1, ..., d_{r-1}, num_classes]
# A common use is to have logits of shape [batch_size, num_classes] and have labels of shape [batch_size]
def compute_loss(logits, labels):
# print(logits.numpy())
# print(labels.numpy())
return tf.reduce_mean(
tf.nn.sparse_softmax_cross_entropy_with_logits(
labels=labels, logits=logits
)
)
def compute_accuracy(logits, labels):
predictions = tf.argmax(logits, axis=1)
# print(predictions)
# print(labels)
# print(list(zip(predictions.numpy(), labels.numpy())))
return tf.reduce_mean(tf.cast(tf.equal(predictions, labels), tf.float32))
def train_one_step(model, optimizer, x, y):
# At each train step, first calculate the forward loss
with tf.GradientTape() as tape:
logits = model(x)
loss = compute_loss(logits, y)
# Then calculate the backward gradients over each trainable variables
grads = tape.gradient(loss, model.trainable_variables)
# Optimize and update variables throught backpropagation
optimizer.apply_gradients(zip(grads, model.trainable_variables))
# Compute current model accuracy
accuracy = compute_accuracy(logits, y)
return loss, accuracy
def train(epoch, model, optimizer, trainset):
#def train(epoch, model, optimizer):
# trainset = mnist_dataset()[0]
loss = 0.0
accuracy = 0.0
#for step, (x, y) in enumerate(tqdm(trainset)):
for step, (x, y) in enumerate(tqdm(trainset)):
loss, accuracy = train_one_step(model, optimizer, x, y)
if step % 110 == 0:
print('epoch', epoch, ': loss', loss.numpy(), '; accuracy', accuracy.numpy())
return loss, accuracy
class MyModel(keras.Model):
def __init__(self):
super(MyModel, self).__init__()
self.layer1 = layers.Conv2D(filters=32, kernel_size=(5, 5), activation=tf.nn.relu, strides=[1,1], padding="SAME", input_shape=(-1, 28, 28, 1))
self.layer2 = layers.MaxPool2D(pool_size=(2,2), strides=[1,1], padding="SAME")
self.layer3 = layers.Conv2D(filters=64, kernel_size=(5, 5), activation=tf.nn.relu, strides=[1,1], padding="SAME")
self.layer4 = layers.MaxPool2D(pool_size=(2,2), strides=[2,2], padding="SAME")
self.layer5 = layers.Flatten()
self.layer6 = layers.Dense(units=512, activation=tf.nn.relu, kernel_regularizer=regularizers.l2(0.01))
self.layer7 = layers.Dense(units=10, activation=tf.nn.relu, kernel_regularizer=regularizers.l2(0.01))
def call(self, x, training=False):
x = tf.reshape(x, (-1, 28, 28, 1))
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.layer5(x)
x = self.layer6(x)
x = self.layer7(x)
return x
def main():
# set random seed
tf.random.set_seed(22)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
trainset, evalset = mnist_dataset()
model = MyModel()
optimizer = optimizers.Adam(lr=0.001)
# Save checkpoints with keras api as the first approach
# Save checkpoints manually as a second approach.
# find a way to implement early-stopping strategy in the programming style
# for epoch in tqdm(range(30)):
for epoch in range(50):
loss, accuracy = train(epoch, model, optimizer, trainset)
print('Final epoch', epoch, ': loss', loss.numpy(), '; accuracy', accuracy.numpy())
if __name__ == '__main__':
main()
And the last approach is below:
import os, sys
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import datasets, models, layers, regularizers, optimizers
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
def prepare_mnist_features_and_labels(x, y):
x = tf.cast(x, tf.float32)/255.0
y = tf.cast(y, tf.int64)
return x, y
def mnist_dataset():
(x_train, y_train), (x_eval, y_eval) = datasets.mnist.load_data()
print('x_train/y_train shape:', x_train.shape, y_train.shape)
y_train = tf.one_hot(y_train, depth=10)
y_eval = tf.one_hot(y_eval, depth=10)
ds_train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
ds_train = ds_train.map(prepare_mnist_features_and_labels)
ds_train = ds_train.shuffle(x_train.shape[0]).batch(128)
ds_eval = tf.data.Dataset.from_tensor_slices((x_eval, y_eval))
ds_eval = ds_eval.map(prepare_mnist_features_and_labels)
ds_eval = ds_eval.shuffle(x_eval.shape[0]).batch(128)
sample = next(iter(ds_train))
print('sample: ', sample[0].shape, sample[1].shape)
return ds_train, ds_eval
class MyModel(keras.Model):
# self.model = keras.Sequential([
# layers.Reshape(target_shape=(28*28, ), input_shape=(28, 28)),
# layers.Dense(100, activation=tf.nn.relu),
# layers.Dense(100, activation=tf.nn.relu),
# layers.Desnse(10)
# ])
def __init__(self):
super(MyModel, self).__init__()
self.layer1 = layers.Conv2D(filters=32, kernel_size=(5, 5), activation=tf.nn.relu, strides=[1,1], padding="SAME", input_shape=(-1, 28, 28, 1))
self.layer2 = layers.MaxPool2D(pool_size=(2,2), strides=[1,1], padding="SAME")
self.layer3 = layers.Conv2D(filters=64, kernel_size=(5, 5), activation=tf.nn.relu, strides=[1,1], padding="SAME")
self.layer4 = layers.MaxPool2D(pool_size=(2,2), strides=[2,2], padding="SAME")
self.layer5 = layers.Flatten()
self.layer6 = layers.Dense(units=512, activation=tf.nn.relu, kernel_regularizer=regularizers.l2(0.01))
self.layer7 = layers.Dense(units=10, activation=tf.nn.relu, kernel_regularizer=regularizers.l2(0.01))
def call(self, x, training=False):
x = tf.reshape(x, (-1, 28, 28, 1))
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.layer5(x)
x = self.layer6(x)
x = self.layer7(x)
return x
def main():
tf.random.set_seed(22)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
trainset, evalset = mnist_dataset()
model = MyModel()
model.compile(optimizer=optimizers.Adam(lr=0.001), loss=tf.losses.CategoricalCrossentropy(from_logits=True), metrics=['accuracy'])
model.fit(trainset.repeat(), epochs=30, steps_per_epoch=500, verbose=1,
validation_data=evalset.repeat(), validation_steps=10)
if __name__ == '__main__':
main()
Each of them take a while to train. Can anyone tell me why the performances are different? And in the future how I should debug by myself?
Thank you so much for any help.
the problem is solved after carefully examine the network. It turn out that the last fully connected layer in the model was activated with a relu function, which in not appropriate. And the choice of loss function tf.losses.categoricalCrossentropy and tf.nn.sparse_softmax_cross_entropy_with_logits also make a big difference. No matter what get chosen, Make sure the loss function and the final output of the network match.

the loss didn't drop in tf with keras cifar10 datesets

I create a vgg like model in tensorflow and use cifar10 in keras to train it, but the loss didn't drop, can u find what's the problem?
cifar10 datesets
from keras.datasets import cifar100, cifar10
from keras.utils import np_utils
from keras.preprocessing.image import ImageDataGenerator
def load_cifar(num_class=100):
if num_class == 100:
nb_classes = 100
(trainX, trainY), (testX, testY) = cifar100.load_data()
else:
nb_classes = 10
(trainX, trainY), (testX, testY) = cifar10.load_data()
trainX = trainX.astype('float32')
testX = testX.astype('float32')
trainX /= 255.
testX /= 255.
Y_train = np_utils.to_categorical(trainY, nb_classes)
Y_test = np_utils.to_categorical(testY, nb_classes)
generator = ImageDataGenerator(rotation_range=15,
width_shift_range=5. / 32,
height_shift_range=5. / 32,
horizontal_flip=True if num_class ==10 else False)
generator.fit(trainX, seed=0)
return trainX, Y_train, testX, Y_test, generator
model and train
import sys
import os
sys.path.append(os.path.abspath('../'))
print(sys.path)
import tensorflow as tf
from Input import cifar
BatchSize = 2
def gen_gen(batch_size=BatchSize):
trainX, Y_train, testX, Y_test, generator = cifar.load_cifar(10)
gen = generator.flow(trainX, Y_train, batch_size=batch_size)
return gen
class vgg16TF():
def __init__(self, ih=32, iw=32, batch_size=BatchSize):
self.ih = ih
self.iw = iw
self.batch_size = batch_size
def unit(self, x, conv_nums, filters, name=None):
for i in range(1, conv_nums+1):
x = tf.layers.conv2d(inputs=x, filters=filters, kernel_size=(5, 5), padding='same',
use_bias=True, activation=tf.nn.relu, name=name+'_conv'+str(i))
x = tf.layers.max_pooling2d(x, (3, 3), strides=(2, 2), name=name+'_pool')
return x
def net(self):
input_x = tf.placeholder(shape=(self.batch_size, self.ih, self.iw, 3), dtype=tf.float32)
x = self.unit(input_x, 2, 64, name='blook1')
x = self.unit(x, 2, 64, name='blook2')
# x = self.unit(x, 3, 256, name='blook3')
# x = self.unit(x, 3, 512, name='blook4')
# x = self.unit(x, 3, 512, name='blook5')
x = tf.layers.flatten(x, name='flatten')
x = tf.layers.dense(x, 384, activation=tf.nn.relu, name='fc1', use_bias=True)
x = tf.layers.dense(x, 192, activation=tf.nn.relu, name='fc2', use_bias=True)
y = tf.layers.dense(x, 10, name='prediction', use_bias=True)
print(y)
return input_x, y
def loss(self, labels, logits):
labels = tf.cast(labels, tf.int64)
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
labels=tf.arg_max(labels,1), logits=logits
)
cross_entropy_mean = tf.reduce_mean(cross_entropy)
tf.add_to_collection('losses', cross_entropy_mean)
return tf.add_n(tf.get_collection('losses'))
def train(self):
input_x, y_ = self.net()
input_y = tf.placeholder(shape=(self.batch_size, 10), dtype=tf.float64)
loss = self.loss(input_y, y_)
optimizer = tf.train.AdamOptimizer().minimize(loss=loss)
# correct_pred = tf.equal(tf.arg_max(y_, 1), tf.arg_max(input_y, 1))
# accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
with tf.Session() as sess:
tf.global_variables_initializer().run()
gen = gen_gen()
for i in range(10000):
train_x, train_y = gen.next()
# print(train_x.shape)
loss_val, _ = sess.run([loss, optimizer], feed_dict={input_x: train_x, input_y: train_y})
if i % 10 == 0:
print(loss_val)
some loss of train step:
2.2985106
2.2944324
2.3120923
2.306837
2.304546
2.2818785
2.3069105
2.3087378
2.3094501
2.2966876
2.3119392
2.2941442
2.2990022
2.2830834
2.3137615
I don't see any obvious errors in the code, but I can share my experience that the Adam optimizer does not always work well with VGG-like networks due to the large number of parameters, and the symptom is that the loss does not decrease. In that case you should use plain SGD with an appropriate learning rate and a schedule.

K.gradients returning [None]

K.gradients() return “None”. (Keras with tensorflow backend). Is there a problem with my model structure or something?
I have modified WGAN-GP model with keras for text, refer to this code: [https://github.com/OctThe16th/WGAN-GP-with-keras-for-text/blob/master/Exploration/GenerativeAdverserialWGAN-GP.py].
from keras.layers import Dense, Flatten, Input, BatchNormalization, Dropout, GRU, Bidirectional, Reshape, Activation
from keras.layers.noise import GaussianNoise
from keras.models import Model
from keras.layers.merge import _Merge
from keras.layers import Convolution1D, AveragePooling1D, ZeroPadding1D, UpSampling1D, concatenate, regularizers
from keras.layers import Embedding, Reshape, Lambda
from keras.layers import LSTM, multiply
from keras.optimizers import Adam, RMSprop
from random import randint
from keras.initializers import Constant
from keras import backend as K
from keras import layers
import numpy as np
from functools import partial
import pickle
import os
import tensorflow as tf
def wasserstein_loss(y_true, y_pred):
return K.mean(y_true * y_pred)
def gradient_penalty_loss(y_true, y_pred, averaged_samples):
'''Computes gradient penalty based on prediction and weighted real / fake samples'''
gradients = K.gradients(K.sum(y_pred), averaged_samples)
# compute the euclidean norm by squaring ...
gradients_sqr = K.square(gradients)
# ... summing over the rows ...
gradients_sqr_sum = K.sum(gradients_sqr)
# ... and sqrt
gradient_l2_norm = K.sqrt(gradients_sqr_sum)
# compute lambda * (1 - ||grad||)^2 still for each single sample
gradient_penalty = K.square(1 - gradient_l2_norm)
# return the mean as loss over all the batch samples
return K.mean(gradient_penalty)
class RandomWeightedAverage(_Merge):
def _merge_function(self, inputs):
weights = K.random_uniform((BATCH_SIZE, 1), dtype='float32')
return (weights * inputs[0]) + ((1 - weights) * inputs[1])
#K.argmax() is not differentiable, this function is defined to repalce K.argmax(), and it is differentiable.
def argmax(x):
y = tf.reduce_sum(tf.cumsum(tf.ones_like(x), axis=-1) * tf.exp(beta * x) / tf.reduce_sum(tf.exp(beta * x), axis=-1, keep_dims=True), axis=-1) - 1
return y
def generator_mod(softmax_shape):
person1_input = Input(shape=(1,), dtype='float32')
noise_input = Input(shape=(1, person_embedding_dim), dtype='float32')
relation_input = Input(shape=(1,), dtype='float32')
person1_embedded = Embedding(1,person_embedding_dim)(person1_input)
relation_embedded = Embedding(1,relation_embedding_dim)(relation_input)
embedded_layer = concatenate([person1_embedded, relation_embedded, noise_input], axis=1)
drop_1 = BatchNormalization(momentum=0.8)(embedded_layer)
x_1 = Convolution1D(filters=64, kernel_size=3, padding='same', activation='elu')(drop_1)
x_1 = BatchNormalization()(x_1)
x_1 = Convolution1D(filters=32, kernel_size=3, padding='same', activation='elu')(x_1)
x_1 = BatchNormalization()(x_1)
x_1 = Flatten()(x_1)
x_1 = Dense(32, activation='relu')(x_1)
######################################################################
person1_description = Input(shape=(max_sequence_length,), dtype='float32')
embedded_sequences1 = Embedding(len(word_index) + 1, word_embeddings_dim)(person1_description)
lstm_out1 = Bidirectional(LSTM(64))(embedded_sequences1)
attention_1 = Dense(128, activation='softmax', name='attention_vec')(lstm_out1)
attention_mul = multiply([lstm_out1, attention_1], name='attention_mul')
#####globel attention finish#####
x_2 = Dense(32, activation='relu')(attention_mul)
full_connected = multiply([x_1, x_2], name='full_connected')
x = Dense(softmax_shape, activation='softmax')(full_connected)
output = Lambda(argmax)(x)#shape (?,)
output = Lambda(K.expand_dims, arguments={'axis': -1})(output) #shape (?,1)
model = Model(inputs = [person1_input, noise_input, relation_input, person1_description], outputs = output)
return model
def discriminator_mod():
person1_input = Input(shape=(1,), dtype='float32')
person2_input = Input(shape=(1,), dtype='float32')
relation_input = Input(shape=(1,), dtype='float32')
person1_embedded = Embedding(1, person_embedding_dim)(person1_input)
person2_embedded = Embedding(1, person_embedding_dim)(person2_input)
relation_embedded = Embedding(len(word_index) + 1, word_embeddings_dim)(relation_input)
embedded_layer = concatenate([person1_embedded, person2_embedded, relation_embedded], axis=1)
drop_1 = Dropout(0.5)(embedded_layer)
x = Convolution1D(128, 1, activation='relu')(drop_1)
x = BatchNormalization()(x)
x = Convolution1D(filters=64, kernel_size=3, padding='same', activation='elu')(x)
x = BatchNormalization()(x)
x = Convolution1D(filters=32, kernel_size=3, padding='same', activation='elu')(x)
x = BatchNormalization()(x)
x = Flatten()(x)
x = Dense(32, activation='relu')(x)
auxiliary_input1 = Input(shape=(max_sequence_length,), dtype='float32', name='aux_input1')
embedded_sequences1 = Embedding(len(word_index) + 1, word_embeddings_dim)(auxiliary_input1)
lstm_out1 = Bidirectional(LSTM(64))(embedded_sequences1)
lstm_drop1 = Dropout(0.5)(lstm_out1)
auxiliary_input2 = Input(shape=(max_sequence_length,), dtype='float32', name='aux_input2')
embedded_sequences2 = Embedding(len(word_index) + 1, word_embeddings_dim)(auxiliary_input2)
lstm_out2 = Bidirectional(LSTM(64))(embedded_sequences2)
lstm_drop2 = Dropout(0.5)(lstm_out2)
lstm_drop = multiply([lstm_drop1, lstm_drop2])
#####globel attention start#####
attention_1 = Dense(128, activation='softmax', name='attention_vec')(lstm_drop)
attention_mul = multiply([lstm_drop, attention_1], name='attention_mul')
#####globel attention finish#####
# attention_mul = Flatten()(attention_mul)
attention_mul = Dense(32, activation='relu')(attention_mul)
#####globel attention start#####
full_connected = multiply([x, attention_mul], name='full_connected')
attention_2 = Dense(32, activation='softmax')(full_connected)
attention_final = multiply([full_connected, attention_2])
#####globel attention finish#####
dense_layer = Dense(16, activation='relu')(attention_final)
main_output = Dense(1, activation='tanh', name='main_output')(dense_layer)
model = Model(inputs=[person1_input, person2_input, relation_input, auxiliary_input1, auxiliary_input2], outputs= main_output)
return model
def train(from_save_point=False, suffix='rnn'):
X_train = np.random.randn(10,243)
generator = generator_mod(person_total)
discriminator = discriminator_mod()
generator.summary()
discriminator.summary()
for layer in discriminator.layers:
layer.trainable = False
discriminator.trainable = False
for layer in discriminator.layers:
layer.trainable = False
discriminator.trainable = False
person1 = Input(shape=(1,))
relation = Input(shape=(1,))
seed = Input(shape=(1,person_embedding_dim))
person1_description = Input(shape=(max_sequence_length,))
genarated_person2 = generator([person1, seed, relation, person1_description])
person2_description = Input(shape=(max_sequence_length,))
discriminator_layers_for_generator = discriminator([person1, genarated_person2, relation, person1_description, person2_description])
generator_model = Model(inputs=[person1, relation, seed, person1_description, person2_description], outputs=[discriminator_layers_for_generator])
generator_model.compile(optimizer= RMSprop(lr=0.0001, rho=0.9), loss=wasserstein_loss)
for layer in discriminator.layers:
layer.trainable = True
for layer in generator.layers:
layer.trainable = False
discriminator.trainable = True
generator.trainable = False
person2 = Input(shape=(1,))
generated_samples_for_discriminator = generator([person1, seed, relation, person1_description])
discriminator_output_from_generator = discriminator([person1, generated_samples_for_discriminator, relation, person1_description, person2_description])
discriminator_output_from_real_samples = discriminator([person1, person2, relation, person1_description, person2_description])
averaged_samples = RandomWeightedAverage()([person2, generated_samples_for_discriminator])
averaged_samples_out = discriminator([person1, averaged_samples, relation, person1_description, person2_description])
partial_gp_loss = partial(gradient_penalty_loss, averaged_samples= averaged_samples)
partial_gp_loss.__name__ = 'gradient_penalty'
discriminator_model = Model(inputs=[person1, person2, relation, person1_description, person2_description, seed], outputs=[discriminator_output_from_real_samples, discriminator_output_from_generator, averaged_samples_out])
# averaged_samples_out
discriminator_model.compile(optimizer=RMSprop(lr=0.0001, rho=0.9), loss=[wasserstein_loss, wasserstein_loss, partial_gp_loss])
# partial_gp_loss
positive_y = np.ones((BATCH_SIZE, 1), dtype=np.float32)
negative_y = -positive_y
dummy_y = np.zeros((BATCH_SIZE, 1), dtype=np.float32)
if __name__ == "__main__":
# convert_text_to_nptensor(cutoff=50, min_frequency_words=100000, max_lines=20000000)
train(from_save_point=False, suffix='Google')
However, when the code execute to this line:
gradients = K.gradients(K.sum(y_pred), averaged_samples)
The error message is that:
'TypeError: Failed to convert object of type to Tensor. Contents: [None]. Consider casting elements to a supported type'
Can anyone help me? Thank you very much!

Extracting Activation maps from trained neural network

I have a trained cnn model. I am trying to extract the output from each convolutional layer and plot the results to explore which regions of the image have high activations. Any ideas on how to do this?
Below is the network I have trained.
input_shape = (3,227,227)
x = Input(input_shape)
# Conv Layer 1
x = Convolution2D(96, 7,7,subsample=(4,4),activation='relu',
name='conv_1', init='he_normal')(x_input)
x = MaxPooling2D((3, 3), strides=(2,2), name='maxpool')(x)
x = BatchNormalization()(x)
x = ZeroPadding2D((2,2))(x)
# Conv Layer 2
x = Convolution2D(256, 5,5,activation='relu',name='conv_2', init='he_normal')(x)
x = MaxPooling2D((3, 3), strides=(2,2),name='maxpool2')(x)
x = BatchNormalization()(x)
x = ZeroPadding2D((2,2))(x)
# Conv Layer 3
x = Convolution2D(384, 3,3,activation='relu',
name='conv_3', init='he_normal')(x)
x = MaxPooling2D((3, 3), strides=(2,2),name='maxpool3')(x)
x = Flatten()(x)
x = Dense(512, activation = "relu")(x)
x = Dropout(0.5)(x)
x = Dense(512, activation ="relu")(x)
x = Dropout(0.5)(x)
predictions = Dense(2, activation="softmax")(x)
model = Model(inputs = x_input, outputs = predictions)
Thanks!
Look at this GitHub issue and the FAQ How can I obtain the output of an intermediate layer?. It seems the easiest way to do that is defining new models with the outputs that you want. For example:
input_shape = (3,227,227)
x = Input(input_shape)
# Conv Layer 1
# Save layer in a variable
conv1 = Convolution2D(96, 7, 7, subsample=(4,4), activation='relu',
name='conv_1', init='he_normal')(x_input)
x = conv1
x = MaxPooling2D(...)(x)
# ...
conv2 = Convolution2D(...)(x)
x = conv2
# ...
conv3 = Convolution2D(...)(x)
x = conv3
# ...
predictions = Dense(2, activation="softmax")(x)
# Main model
model = Model(inputs=x_input, outputs=predictions)
# Intermediate evaluation model
conv_layers_model = Model(inputs=x_input, outputs=[conv1, conv2, conv3])
# After training is done, retrieve intermediate evaluations for data
conv1_val, conv2_val, conv3_val = conv_layers_model.predict(data)
Note that since you are using the same objects in both models the weights are automatically shared between them.
A more complete example of activation visualization can be found here. In that case they use the K.function approach.

Resources