Keras 1.0: getting intermediate layer output - theano

I am currently trying to visualize the output of an intermediate layer in Keras 1.0 (which I could do with Keras 0.3) but it does not work anymore.
x = model.input
y = model.layers[3].output
f = theano.function([x], y)
But I get the following error:
MissingInputError: ("An input of the graph, used to compute DimShuffle{x,x,x,x}(keras_learning_phase), was not provided and not given a value.Use the Theano flag exception_verbosity='high',for more information on this error.", keras_learning_phase)
Prior to Keras 1.0, with my graph model, I could just do:
x = graph.inputs['input'].input
y = graph.nodes[layer].get_output(train=False)
f = theano.function([x], y, allow_input_downcast=True)
So I suspect it to come from the "train=False" parameter which I don't know how to set in the new version.
Thank you for your help

Try:
In the import statements first give
from keras import backend as K
from theano import function
then
f = K.function([model.layers[0].input, K.learning_phase()],
[model.layers[3].output])
# output in test mode = 0
layer_output = get_3rd_layer_output([X_test, 0])[0]
# output in train mode = 1
layer_output = get_3rd_layer_output([X_train, 1])[0]

This was just answered by François Chollet on github:
Your model apparently has a different behavior in training and test mode, and so needs to know what mode it should be using.
Use
iterate = K.function([input_img, K.learning_phase()], [loss, grads])
and pass 1 or 0 as value for the learning phase, based on whether you want the model in training mode or test mode.
https://github.com/fchollet/keras/issues/2417

Related

Tensorflow: why is the outcome of single neural network node without activation function different then my own calculation?

i created a single node with 3 inputs and one output with bias 0 and no activation function.
as far as i understand, the only thing that happens here is a matrix multiplication between the input vector and the randomly initialized weights but when i do the multiplication myself with the same inputs and weights i get a different outcome? what am i missing/doing wrong?
thanks in advance!
i base my calculation on some code provided here
here is the code:
def example_code(self):
import tensorflow as tf
data = [[1.0,2.0,3.0]]
x = tf.placeholder(tf.float32,shape=[1,3],name="mydata")
node = tf.layers.Dense(units=1)
y = node(x)
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
print("input: "+str(data))
outcome = sess.run(y,feed_dict={x:data})
#print("outcome from tensorflow: "+str(outcome))
weights = node.get_weights()[0]
bias = node.get_weights()[1]
print("weights: "+str(weights))
print("bias: "+str(bias))
print("outcome from tensorflow: " + str(outcome))
outcome = tf.matmul(data,weights)
print("manually calculated outcome: "+str(sess.run(outcome)))
output from code:
input: [[1.0, 2.0, 3.0]]
weights: [[ 0.72705185] [-0.70188504] [ 0.5336163 ]]
bias: [0.]
outcome from tensorflow: [[-1.3463312]]
manually calculated outcome: [[0.9241307]]
The problem is that tf.layers is not using uses is not using your session sess. This in turn results in different initializations for the weights, hence the two different values. tf.layers ends up using tf.keras.backend.get_session() to retrieve the session used for initialization and retrieval of weights (node.get_weights()). tf.keras.backend.get_session() tries to use the default session if there is one, and if there is not then it creates its own session. In this case, sess is not configured as default session (only tf.InteractiveSession gets automatically configured as default session on construction). The simplest fix is to use tf.Session in the recommended way, as a context manager:
def example_code(self):
import tensorflow as tf
with tf.Session() as sess:
data = [[1.0,2.0,3.0]]
x = tf.placeholder(tf.float32,shape=[1,3],name="mydata")
node = tf.layers.Dense(units=1)
y = node(x)
init = tf.global_variables_initializer()
sess.run(init)
print("input: "+str(data))
outcome = sess.run(y,feed_dict={x:data})
#print("outcome from tensorflow: "+str(outcome))
weights = node.get_weights()[0]
bias = node.get_weights()[1]
print("weights: "+str(weights))
print("bias: "+str(bias))
print("outcome from tensorflow: " + str(outcome))
outcome = tf.matmul(data,weights)
print("manually calculated outcome: "+str(sess.run(outcome)))
This will set sess as default session, and also it will make sure its resources are freed when the function is finished (which was another issue in your code). If for whatever reason you want to use some session as default but do not want to close it with the context manager, you can just use as_default():
def example_code(self):
import tensorflow as tf
sess = tf.Session():
with sess.as_default():
data = [[1.0,2.0,3.0]]
x = tf.placeholder(tf.float32,shape=[1,3],name="mydata")
node = tf.layers.Dense(units=1)
y = node(x)
init = tf.global_variables_initializer()
sess.run(init)
print("input: "+str(data))
outcome = sess.run(y,feed_dict={x:data})
#print("outcome from tensorflow: "+str(outcome))
weights = node.get_weights()[0]
bias = node.get_weights()[1]
print("weights: "+str(weights))
print("bias: "+str(bias))
print("outcome from tensorflow: " + str(outcome))
outcome = tf.matmul(data,weights)
print("manually calculated outcome: "+str(sess.run(outcome)))
# You need to manually ensure that the session gets closed after
sess.close()

Reinforce learning - how to teach a neuronal network avoid actions already chosen during the episode?

I built a custom Open AI Gym environment in which I have 13 different actions and and 33 observation items. During an episode every action can be used, but it can be used only once otherwise the episode ends. Thus the maximum lenght of an episode is 13.
I tried to train several neuronal network for this, but so far the NN did not learned it well and it ends much prior the 13rd step. The last layer of the NN is a softmax layer with 13 neurons.
Do you have any idea how an NN would look like which could learn to choose from 13 actions one-by-one?
Kind regards,
Ferenc
I found something interesting in this topic
https://ai.stackexchange.com/questions/7755/how-to-implement-a-constrained-action-space-in-reinforcement-learning
Will check if the 'do-nothing' idea helps ...
At the end I wrote this code:
from keras import backend as K
import tensorflow as tf
def mask_output2(x):
inp, soft_out = x
# add a very small value in order to avoid having 0 everywhere
c = K.constant(0.0000001, dtype='float32', shape=(32, 13))
y = soft_out + c
y = Lambda(lambda x: K.switch(K.equal(x[0],0), x[1], K.zeros_like(x[1])))([inp, soft_out])
y_sum = K.sum(y, axis=-1)
y_sum_corrected = Lambda(lambda x: K.switch(K.equal(x[0],0), K.ones_like(x[0]), x[0] ))([y_sum])
y_sum_corrected = tf.divide(1,y_sum_corrected)
y = tf.einsum('ij,i->ij', y, y_sum_corrected)
return y
It simply corrects the sigmoid result in order to clear (set to 0) those neurons where the inp tensor is set to 1 (showing an action already used).

Keras residual connection on only part of input

I already made a model without residual connection which compile and fit without any errors [using Keras Sequential API]
I wish to test a modified version just adding a residual connection like in SPEECH ENHANCEMENT BASED ON DEEP NEURAL NETWORKS WITH SKIP CONNECTIONS. So, I need to use Functional API instead.
My problem is extract a piece in the middle of inputs. I tried that.
INPUT_SIZE = N*OUTPUT_SIZE # N must be odd
HIDDEN_SIZE = N*OUTPUT_SIZE # N must be odd
modelInputs = Input(shape=(INPUT_SIZE,))
x = Dense(HIDDEN_SIZE, activation='relu', kernel_initializer=INPUT_KERNEL_INITIALIZER)(modelInputs)
for _ in np.arange(1,N_HIDDEN):
x = Dense(HIDDEN_SIZE, activation='relu', kernel_initializer=INPUT_KERNEL_INITIALIZER)(x)
Y = Dense(OUTPUT_SIZE, activation='relu', kernel_initializer=INPUT_KERNEL_INITIALIZER)(x)
# --------------------------------------------------------
# Here, 4 options I tried to get "modelInputs_selected"
# --------------------------------------------------------
# Try 1
modelInputs_selected = Lambda(lambda x: x[int(N/2)*OUTPUT_SIZE:(int(N/2)+1)*OUTPUT_SIZE])(modelInputs)
# Try 2 [Try 1 with 'output_shape' filled]
modelInputs_selected = Lambda(lambda x: x[int(N/2)*OUTPUT_SIZE:(int(N/2)+1)*OUTPUT_SIZE, :], output_shape=(OUTPUT_SIZE,))(modelInputs)
# Try 3
modelInputs_selected = K.transpose(K.gather(K.transpose(modelInputs), K.arange(int(N/2)*OUTPUT_SIZE, (int(N/2)+1)*OUTPUT_SIZE)))
# Try 4 [Try 3 unwrapped]
toto1 = K.transpose(modelInputs)
toto2 = K.gather(toto1, K.arange(int(N/2)*OUTPUT_SIZE, (int(N/2)+1)*OUTPUT_SIZE))
modelInputs_selected = K.transpose(toto2)
# --------------------------------------------------------
# End of option tried
# --------------------------------------------------------
predictions = add([modelInputs_selected, Y])
model = Model(inputs=modelInputs, outputs=predictions)
Results are:
Try 1 & Try 2:
Error = Incoherent shape during add()
Try 3 & Try 4:
Good shapes for add()
Error with Model(...)
I gone into Model() step by step. We go backward starting with the last layer
Output add() OK
Previous one K.transpose(): Error AttributeError: 'Tensor' object has no attribute '_keras_history' in "build_map_of_graph"
The model construction failed because I use a function from the backend (TensorFlow, for me)?
Anyone can help, please?
Maybe if I use a multiply()?
modelInputs is (m, N*OUTPUT_SIZE) and modelInputs_selected is (m,OUTPUT_SIZE)
With the good matrix A (N*OUTPUT_SIZE, OUTPUT_SIZE): modelInputs_selected = multiply(modelInputs, A)
Error in try 1
You're ignoring the samples dimension (the first dimension).
The tensor x should come in with shape (samples_or_batch_size, INPUT_SIZE).
Solution:
So you need lambda x: x[:, int(N/2)*OUTPUT_SIZE:(int(N/2)+1)*OUTPUT_SIZE]
But honestly, why not one of these?
lambda x: x[:,:OUTPUT_SIZE]
lambda x: x[:,-OUTPUT_SIZE:]
Is it a problem taking from the beginning or from the end? This even frees you from using an N, the only condition will be that INPUT_SIZE >= OUTPUT_SIZE.
Error in try 2
You're not ignoring the samples, but you're inverting the dimensions.
It should be x[:,expression] instead of x[expression,:].
You only need to declare the output shape if you're not using Tensorflow (Tensorflow can do it automatically for you)
Errors in 3 and 4
You cannot use any function outside of a layer.
There is no problem if you use either Keras backend or Tensorflow functions, but they must be inside layers, such as in try 1 and 2.
The no _keras_history is a typical error for using functions outside of layers.

Creating a session in a graph that uses another graph and its session

Versions : I am using tensorflow (version : v1.1.0-13-g8ddd727 1.1.0) in python3 (Python 3.4.3 (default, Nov 17 2016, 01:08:31) [GCC 4.8.4] on linux), it is installed from source and GPU-based (name: GeForce GTX TITAN X major: 5 minor: 2 memoryClockRate (GHz) 1.076).
Context : Generative adversarial networks (GANs) learn to synthesise new samples from a high-dimensional distribution by passing samples drawn from a latent space through a generative network. When the high-dimensional distribution describes images of a particular data set, the network should learn to generate visually similar image samples for latent variables that are close to each other in the latent space. For tasks such as image retrieval and image classification, it may be useful to exploit the arrangement of the latent space by projecting images into it, and using this as a representation for discriminative tasks.
Context Problem : I am trying to invert a generator (compute L2 norm between an input image in cifar10 and a image g(z) of the generator, where z is a parameter to be trained with stochastic gradient descent in order to minimize this norm and find an approximation of the preimage of the input image).
Technical Issue : Therefore, I am building a new graph in a new session in tensorflow but I need to use a trained gan that was trained in another session, which I cannot import because the two graphs are not the same. That is to say, when I use sess.run(), the variables are not found and therefore there is a Error Message.
The code is
import tensorflow as tf
from data import cifar10, utilities
from . import dcgan
import logging
logger = logging.getLogger("gan.test")
BATCH_SIZE = 1
random_z = tf.get_variable(name='z_to_invert', shape=[BATCH_SIZE, 100], initializer=tf.random_normal_initializer())
#random_z = tf.random_normal([BATCH_SIZE, 100], mean=0.0, stddev=1.0, name='random_z')
# Generate images with generator
generator = dcgan.generator(random_z, is_training=True, name='generator')
# Add summaries to visualise output images
generator_visualisation = tf.cast(((generator / 2.0) + 0.5) * 255.0, tf.uint8)
summary_generator = tf.summary.\
image('summary/generator', generator_visualisation,
max_outputs=8)
#Create one image to test inverting
test_image = map((lambda inp: (inp[0]*2. -1., inp[1])),
utilities.infinite_generator(cifar10.get_train(), BATCH_SIZE))
inp, _ = next(test_image)
summary_inp = tf.summary.image('input_image', inp)
img_summary = tf.summary.merge([summary_generator, summary_inp])
with tf.name_scope('error'):
error = inp - generator #generator = g(z)
# We set axis = None because norm(tensor, ord=ord) is equivalent to norm(reshape(tensor, [-1]), ord=ord)
error_norm = tf.norm(error, ord=2, axis=None, keep_dims=False, name='L2Norm')
summary_error = tf.summary.scalar('error_norm', error_norm)
with tf.name_scope('Optimizing'):
optimizer = tf.train.AdamOptimizer(0.001).minimize(error_norm, var_list=z)
sv = tf.train.Supervisor(logdir="gan/invert_logs/", save_summaries_secs=None, save_model_secs=None)
batch = 0
with sv.managed_session() as sess:
logwriter = tf.summary.FileWriter("gan/invert_logs/", sess.graph)
while not sv.should_stop():
if batch > 0 and batch % 100 == 0:
logger.debug('Step {} '.format(batch))
(_, s) = sess.run((optimizer, summary_error))
logwriter.add_summary(s, batch)
print('step %d: Patiente un peu poto!' % batch)
img = sess.run(img_summary)
logwriter.add_summary(img, batch)
batch += 1
print(batch)
I understood what is the problem, it is actually that I am trying to run a session which is saved in gan/train_logs but the graph does not have those variables I am trying to run.
Therefore, I tried to implement this instead :
graph = tf.Graph()
tf.reset_default_graph()
with tf.Session(graph=graph) as sess:
ckpt = tf.train.get_checkpoint_state('gan/train_logs/')
saver = tf.train.import_meta_graph(ckpt.model_checkpoint_path + '.meta', clear_devices=True)
saver.restore(sess, ckpt.model_checkpoint_path)
logwriter = tf.summary.FileWriter("gan/invert_logs/", sess.graph)
#inp, _ = next(test_image)
BATCH_SIZE = 1
#Create one image to test inverting
test_image = map((lambda inp: (inp[0]*2. -1., inp[1])),
utilities.infinite_generator(cifar10.get_train(), BATCH_SIZE))
inp, _ = next(test_image)
#M_placeholder = tf.placeholder(tf.float32, shape=cifar10.get_shape_input(), name='M_input')
M_placeholder = inp
zmar = tf.summary.image('input_image', inp)
#Create sample noise from random normal distribution
z = tf.get_variable(name='z', shape=[BATCH_SIZE, 100], initializer=tf.random_normal_initializer())
# Function g(z) zhere z is randomly generated
g_z = dcgan.generator(z, is_training=True, name='generator')
generator_visualisation = tf.cast(((g_z / 2.0) + 0.5) * 255.0, tf.uint8)
sum_generator = tf.summary.image('summary/generator', generator_visualisation)
img_summary = tf.summary.merge([sum_generator, zmar])
with tf.name_scope('error'):
error = M_placeholder - g_z
# We set axis = None because norm(tensor, ord=ord) is equivalent to norm(reshape(tensor, [-1]), ord=ord)
error_norm = tf.norm(error, ord=2, axis=None, keep_dims=False, name='L2Norm')
summary_error = tf.summary.scalar('error_norm', error_norm)
with tf.name_scope('Optimizing'):
optimizer = tf.train.AdamOptimizer(0.001).minimize(error_norm, var_list=z)
sess.run(tf.global_variables_initializer())
for i in range(10000):
(_, s) = sess.run((optimizer, summary_error))
logwriter.add_summary(s, i)
print('step %d: Patiente un peu poto!' % i)
img = sess.run(img_summary)
logwriter.add_summary(img, i)
print('Done Training')
This script runs, but I have checked on tensorboard, the generator that is used here does not have the trained weights and it only produces noise.
I think I am trying to run a session in a graph that uses another graph and its trained session. I have read thoroughly the Graphs and Session documentation on tensorflow website https://www.tensorflow.org/versions/r1.3/programmers_guide/graphs, I have found an interesting tf.import_graph_def function :
You can rebind tensors in the imported graph to tf.Tensor objects in the default graph by passing the optional input_map argument. For example, input_map enables you to take import a graph fragment defined in a tf.GraphDef, and statically connect tensors in the graph you are building to tf.placeholder tensors in that fragment.
You can return tf.Tensor or tf.Operation objects from the imported graph by passing their names in the return_elements list.
But I don't know how to use this function, no example is given, and also I only found those two links that may help me :
https://github.com/tensorflow/tensorflow/issues/7508
Tensorflow: How to use a trained model in a application?
It would be really nice to have your help on this topic. This should be straightforward for someone who has already used the tf.import_graph_def function... What I really need is to get the trained generator to apply it to a new variable z which is to be trained in another session.
Thanks

Get gradient value necessary to break an image

I've been experimenting with adversarial images and I read up on the fast gradient sign method from the following link https://arxiv.org/pdf/1412.6572.pdf...
The instructions explain that the necessary gradient can be calculated using backpropagation...
I've been successful at generating adversarial images but I have failed at attempting to extract the gradient necessary to create an adversarial image. I will demonstrate what I mean.
Let us assume that I have already trained my algorithm using logistic regression. I restore the model and I extract the number I wish to change into a adversarial image. In this case it is the number 2...
# construct model
logits = tf.matmul(x, W) + b
pred = tf.nn.softmax(logits)
...
...
# assign the images of number 2 to the variable
sess.run(tf.assign(x, labels_of_2))
# setup softmax
sess.run(pred)
# placeholder for target label
fake_label = tf.placeholder(tf.int32, shape=[1])
# setup the fake loss
fake_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,labels=fake_label)
# minimize fake loss using gradient descent,
# calculating the derivatives of the weight of the fake image will give the direction of weights necessary to change the prediction
adversarial_step = tf.train.GradientDescentOptimizer(learning_rate=FLAGS.learning_rate).minimize(fake_loss, var_list=[x])
# continue calculating the derivative until the prediction changes for all 10 images
for i in range(FLAGS.training_epochs):
# fake label tells the training algorithm to use the weights calculated for number 6
sess.run(adversarial_step, feed_dict={fake_label:np.array([6])})
sess.run(pred)
This is my approach, and it works perfectly. It takes my image of number 2 and changes it only slightly so that when I run the following...
x_in = np.expand_dims(x[0], axis=0)
classification = sess.run(tf.argmax(pred, 1))
print(classification)
it will predict the number 2 as a number 6.
The issue is, I need to extract the gradient necessary to trick the neural network into thinking number 2 is 6. I need to use this gradient to create the nematode mentioned above.
I am not sure how can I extract the gradient value. I tried looking at tf.gradients but I was unable to figure out how to produce an adversarial image using this function. I implemented the following after the fake_loss variable above...
tf.gradients(fake_loss, x)
for i in range(FLAGS.training_epochs):
# calculate gradient with weight of number 6
gradient_value = sess.run(gradients, feed_dict={fake_label:np.array([6])})
# update the image of number 2
gradient_update = x+0.007*gradient_value[0]
sess.run(tf.assign(x, gradient_update))
sess.run(pred)
Unfortunately the prediction did not change in the way I wanted, and moreover this logic resulted in a rather blurry image.
I would appreciate an explanation as to what I need to do in order calculate and extract the gradient that will trick the neural network, so that if I were to take this gradient and apply it to my image as a nematode, it will result in a different prediction.
Why not let the Tensorflow optimizer add the gradients to your image? You can still evaluate the nematode to get the resulting gradients that were added.
I created a bit of sample code to demonstrate this with a panda image. It uses the VGG16 neural network to transform your own panda image into a "goldfish" image. Every 100 iterations it saves the image as PDF so you can print it losslessly to check if your image is still a goldfish.
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import IPython.display as ipyd
from libs import vgg16 # Download here! https://github.com/pkmital/CADL/tree/master/session-4/libs
pandaimage = plt.imread('panda.jpg')
pandaimage = vgg16.preprocess(pandaimage)
plt.imshow(pandaimage)
img_4d = np.array([pandaimage])
g = tf.get_default_graph()
input_placeholder = tf.Variable(img_4d,trainable=False)
to_add_image = tf.Variable(tf.random_normal([224,224,3], mean=0.0, stddev=0.1, dtype=tf.float32))
combined_images_not_clamped = input_placeholder+to_add_image
filledmax = tf.fill(tf.shape(combined_images_not_clamped), 1.0)
filledmin = tf.fill(tf.shape(combined_images_not_clamped), 0.0)
greater_than_one = tf.greater(combined_images_not_clamped, filledmax)
combined_images_with_max = tf.where(greater_than_one, filledmax, combined_images_not_clamped)
lower_than_zero =tf.less(combined_images_with_max, filledmin)
combined_images = tf.where(lower_than_zero, filledmin, combined_images_with_max)
net = vgg16.get_vgg_model()
tf.import_graph_def(net['graph_def'], name='vgg')
names = [op.name for op in g.get_operations()]
style_layer = 'prob:0'
the_prediction = tf.import_graph_def(
net['graph_def'],
name='vgg',
input_map={'images:0': combined_images},return_elements=[style_layer])
goldfish_expected_np = np.zeros(1000)
goldfish_expected_np[1]=1.0
goldfish_expected_tf = tf.Variable(goldfish_expected_np,dtype=tf.float32,trainable=False)
loss = tf.reduce_sum(tf.square(the_prediction[0]-goldfish_expected_tf))
optimizer = tf.train.AdamOptimizer().minimize(loss)
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
def show_many_images(*images):
fig = plt.figure()
for i in range(len(images)):
print(images[i].shape)
subplot_number = 100+10*len(images)+(i+1)
plt.subplot(subplot_number)
plt.imshow(images[i])
plt.show()
for i in range(1000):
_, loss_val = sess.run([optimizer,loss])
if i%100==1:
print("Loss at iteration %d: %f" % (i,loss_val))
_, loss_val,adversarial_image,pred,nematode = sess.run([optimizer,loss,combined_images,the_prediction,to_add_image])
res = np.squeeze(pred)
average = np.mean(res, 0)
res = res / np.sum(average)
plt.imshow(adversarial_image[0])
plt.show()
print([(res[idx], net['labels'][idx]) for idx in res.argsort()[-5:][::-1]])
show_many_images(img_4d[0],nematode,adversarial_image[0])
plt.imsave('adversarial_goldfish.pdf',adversarial_image[0],format='pdf') # save for printing
Let me know if this helps you!

Resources