Bypass keras dimension check for custom loss - keras

I am trying to implement my custom loss function but I am having difficulties making it work with Keras.
I am dealing with n-dimensional data, and I would like for each input sample to compute a loss based on a another n-dimensional vector, we'll call it P (it encodes the reliability of each corresponding input measures, to put it simply).
My model is an autoencoder with n inputs (X) and n outputs (Y). I started implementing a solution in which I would overload my Y matrix by appending it P : new_Y = concat([Y, P]), and then in my custom loss split this matrix back and compute my loss. Problem is, tensorflow seems to perform a shape check of the output of my model with my new_Y, before even looking at my loss function.
Code to reproduce the error :
import numpy as np
from tensorflow import keras
import keras.backend as K
from keras import Model
from keras.layers import Input, Dense
from keras.optimizers import Adam
from keras.losses import MeanSquaredError as TFMSE
tf_mse = TFMSE()
input_dim = 2
batch_size = 16
X = np.random.randn(input_dim * batch_size).reshape((batch_size, input_dim, 1))
P = np.random.randn(input_dim * batch_size).reshape((batch_size, input_dim, 1))
Y = X.copy()
new_Y = np.hstack([Y, P])
_input = Input((input_dim, 1), name='input')
elayer = Dense(input_dim, kernel_initializer='normal', activation='relu')(_input)
bottleneck = Dense(1, kernel_initializer='normal', activation='linear')(elayer)
dlayer = Dense(input_dim, kernel_initializer='normal', activation='relu')(bottleneck)
model = Model(inputs=_input, outputs=dlayer)
def adjusted_mse(y_true, y_pred):
# I am first trying to simply apply classic MSE on Y here :
# y_true = [Y, P], so y_true[:, :input_dim] = Y
return tf_mse(y_true[:, :input_dim], y_pred)
model.compile(
loss=adjusted_mse,
optimizer=Adam(),
metrics=['mse']
)
model.fit(X, new_Y, epochs=8, batch_size=4, validation_split=.1, verbose=True)
Error raised :
Traceback (most recent call last):
File "<string>", line 1, in <module>
File "***/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 70, in error_handler
raise e.with_traceback(filtered_tb) from None
File "/tmp/__autograph_generated_filew2svrbng.py", line 15, in tf__train_function
retval_ = ag__.converted_call(ag__.ld(step_function), (ag__.ld(self), ag__.ld(iterator)), None, fscope)
ValueError: in user code:
...
ValueError: Dimensions must be equal, but are 2 and 4 for '{{node SquaredDifference}} = SquaredDifference[T=DT_FLOAT](model/dense_2/Relu, IteratorGetNext:1)' with input shapes: [?,2,2], [?,4,1].
Anyone have any idea how I could at least bypass this verification step ? Or is there a simpler way to do that ? I was thinking about overloading X the same way and passing the P part through an identity layer then concatenate it back, but that is twisted..
Thank you

Related

tf.keras plot_model: add_node() received a non node class object

I'm getting back into python and have been trying out some stuff with tensorflow and keras. I wanted to use the plot_model function and after sorting out some graphviz issues I am now getting this error -
TypeError: add_node() received a non node class object:
I've tried to find an answer myself but have come up short, as the only answer I found with this error didn't seem to be to do with tf. Any suggestions or alternative ideas would be greatly appreciated.
Here's the code and error message - my first question on here so sorry if I missed anything, just let me know.
I'm using miniconda3 with python 3.8
import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras.layers import Conv2D, MaxPool2D, Flatten, Dense, Dropout
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import plot_model
from tensorflow.keras.callbacks import EarlyStopping
from numpy import argmax
from matplotlib import pyplot
from random import randint
tf.keras.backend.set_floatx("float64")
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
x_train = x_train[..., tf.newaxis]
x_test = x_test[..., tf.newaxis]
class mnist_model(Model):
def __init__(self):
super(mnist_model, self).__init__()
self.conv = Conv2D(32, 3, activation = tf.nn.leaky_relu, kernel_initializer = 'he_uniform', input_shape = (28, 28, 3))
self.pool = MaxPool2D((2,2))
self.flat = Flatten()
self.den1 = Dense(128, activation = tf.nn.relu, kernel_initializer = 'he_normal')
self.drop = Dropout(0.25)
self.den2 = Dense(10, activation = tf.nn.softmax)
def call(self, inputs):
n = self.conv(inputs)
n = self.pool(n)
n = self.flat(n)
n = self.den1(n)
n = self.drop(n)
return self.den2(n)
model = mnist_model()
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
limit = EarlyStopping(monitor = 'val_loss', patience = 5)
history = model.fit(x_train, y_train, batch_size=64, epochs = 1, verbose = 2, validation_split = 0.15, steps_per_epoch = 100, callbacks = [limit])
print("\nTraining finished\n\nTesting 10000 samples")
model.evaluate(x_test, y_test, verbose = 1)
print("Testing finished\n")
plot_model(model, show_shapes = True, rankdir = 'LR')
##################################################################################################################################################################
## Error message: ##
Train on 51000 samples, validate on 9000 samples
Training finished
Testing 10000 samples
10000/10000 [==============================] - 7s 682us/sample - loss: 0.2447 - accuracy: 0.9242
Testing finished
Traceback (most recent call last):
File "C:\Users\Thomas\Desktop\Various Python\Tensorflow\Tensorflow_experimentation\tc_mnist.py", line 60, in <module>
plot_model(model, show_shapes = True, rankdir = 'LR')
File "C:\Users\Thomas\miniconda3\envs\tensorflow\lib\site-packages\tensorflow_core\python\keras\utils\vis_utils.py", line 283, in plot_model
dpi=dpi)
File "C:\Users\Thomas\miniconda3\envs\tensorflow\lib\site-packages\tensorflow_core\python\keras\utils\vis_utils.py", line 131, in model_to_dot
dot.add_node(node)
File "C:\Users\Thomas\miniconda3\envs\tensorflow\lib\site-packages\pydotplus\graphviz.py", line 1281, in add_node
'class object: {}'.format(str(graph_node))
TypeError: add_node() received a non node class object: <pydotplus.graphviz.Node object at 0x00000221C7E3E888>`
I think root-cause of the issue is with shape inference of Subclassed model where model.summary shows multiple as Output Shape. I added a model call within the subclassed model as shown below.
def model(self):
x = tf.keras.layers.Input(shape=(28, 28, 1))
return Model(inputs=[x], outputs=self.call(x))
With this modification, shape inference is automatic in Functional API. As Functional and Sequential model as static graphs of layers, we can get the shape inference easily. However, subclassed model is a piece of python code (a call method) and there is no graph of layers to infer easily. We cannot know how layers are connected to each other (because that's defined in the body of call, not as an explicit data structure), so we cannot infer input / output shapes.
Please check full code here for your reference.

How to fix AttributeError: 'NoneType' object has no attribute '_inbound_nodes' that comes while creating lstm model using manhattan distance?

I am trying to create a neural net model that return the similarity score of two sentences using manhattan LSTM (e.g.https://medium.com/mlreview/implementing-malstm-on-kaggles-quora-question-pairs-competition-8b31b0b16a07 ). I have used quora-questions pairs dataset and generated their embeddings using google-bert. Now, i want to create a LSTM model like the above examples and use it but i am getting the following error:
Using TensorFlow backend.
(100000, 1, 768)
(100000, 1, 768)
(100000,)
(100000, 100)
Traceback (most recent call last):
File "train_model_manhattan.py", line 151, in <module>
model = Model(inputs=[inp1,inp2], outputs=[malstm_distance])
File "/home/manishp/anaconda3/envs/bert_env/lib/python3.6/site-packages/keras/legacy/interfaces.py", line 91, in wrapper
return func(*args, **kwargs)
File "/home/manishp/anaconda3/envs/bert_env/lib/python3.6/site-packages/keras/engine/network.py", line 93, in __init__
self._init_graph_network(*args, **kwargs)
File "/home/manishp/anaconda3/envs/bert_env/lib/python3.6/site-packages/keras/engine/network.py", line 231, in _init_graph_network
self.inputs, self.outputs)
File "/home/manishp/anaconda3/envs/bert_env/lib/python3.6/site-packages/keras/engine/network.py", line 1366, in _map_graph_network
tensor_index=tensor_index)
File "/home/manishp/anaconda3/envs/bert_env/lib/python3.6/site-packages/keras/engine/network.py", line 1353, in build_map
node_index, tensor_index)
File "/home/manishp/anaconda3/envs/bert_env/lib/python3.6/site-packages/keras/engine/network.py", line 1353, in build_map
node_index, tensor_index)
File "/home/manishp/anaconda3/envs/bert_env/lib/python3.6/site-packages/keras/engine/network.py", line 1325, in build_map
node = layer._inbound_nodes[node_index]
AttributeError: 'NoneType' object has no attribute '_inbound_nodes'
Here is what I have already tried. Note that embeddings returned has shape(768) i.e. is a vector of size 768 like this [1.2e+05 2.7e-01 7.8 .... 8.9]
print(np.shape(train_vec1)) => (100000, 1, 768)
print(np.shape(train_vec2)) => (100000, 1, 768)
print(np.shape(train_label))
#################################################
def exponent_neg_manhattan_distance(left, right):
return np.exp(-np.sum(np.abs(left-right), axis=1, keepdims=True))
def manhattan_distance(left, right):
''' Helper function for the similarity estimate of the LSTMs outputs'''
print(np.shape(left))
return K.sum(K.abs(left - right), axis=1, keepdims=True)
#################################################
import keras
from keras.layers import Input, LSTM, Dense
from keras.models import Model
inp1= Input(shape=(768,))
inp2= Input(shape=(768,))
x = keras.layers.concatenate([inp1, inp2],axis=-1)
x = Dense(1024, activation='relu')(x)
x = Dropout(0.5) (x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.5) (x)
x = Dense(64, activation='relu')(x)
out=Dense(1)(x)
# Since this is a siamese network, both sides share the same LSTM
shared_lstm = LSTM(100)
left_output = shared_lstm(train_vec1_tensor)
right_output = shared_lstm(train_vec2_tensor)
# Calculates the distance as defined by the MaLSTM model
malstm_distance = Lambda(function=lambda x: manhattan_distance(x[0], x[1]),output_shape=lambda x: (x[0][0], 1))([left_output, right_output])
#######################
Getting error when code flow reaches the following line
#######################
model = Model(inputs=[inp1,inp2], outputs=[malstm_distance])
This is my entire code
import os
data_file='quora_duplicate_questions.tsv'
# 0 means dont load, 1 means fetch from file
LOAD_ENCODING_FROM_FILE=1
encoding_data_file_quest1='encoding_quest1'
encoding_data_file_quest2='encoding_quest2'
encoding_data_file_label='quest_label'
#################################################
import numpy as np
import pandas as pd
import tensorflow as tf
import re
from bert_serving.client import BertClient
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
import numpy as np
import pickle
from keras import models
from keras import layers
from keras import optimizers
from keras.layers import Dropout
from keras import backend as K
from keras.layers import Lambda
#################################################
maxlen = 125 # We will cut reviews after 125 words
# The next step is to tranform all sentences to fixed length encoding using bert embeddings
# [0.1 0.4 0.4] [0.9 0.6 0.1] 2.4
# [0.4 0.1 0.3] [0.5 0.6 0.1] 1.0
# Save the encodings in a file
if LOAD_ENCODING_FROM_FILE == 1:
with open(encoding_data_file_quest1, "rb") as fp:
vec1=pickle.load(fp)
with open(encoding_data_file_quest2, "rb") as fp:
vec2=pickle.load(fp)
with open(encoding_data_file_label, "rb") as fp:
label=pickle.load(fp)
train_vec1 = np.asarray(vec1, np.float32)
train_vec2 = np.asarray(vec2, np.float32)
train_vec1 = train_vec1.reshape((100000,1,768))
train_vec2 = train_vec2.reshape((100000,1,768))
train_vec1_tensor = K.cast(train_vec1,dtype='float32')
train_vec2_tensor = K.cast(train_vec2,dtype='float32')
train_label = np.asarray(label,np.float32)
print(np.shape(train_vec1))
print(np.shape(train_vec2))
print(np.shape(train_label))
#################################################
def exponent_neg_manhattan_distance(left, right):
return np.exp(-np.sum(np.abs(left-right), axis=1, keepdims=True))
def manhattan_distance(left, right):
''' Helper function for the similarity estimate of the LSTMs outputs'''
print(np.shape(left))
return K.sum(K.abs(left - right), axis=1, keepdims=True)
#################################################
import keras
from keras.layers import Input, LSTM, Dense
from keras.models import Model
inp1= Input(shape=(768,))
inp2= Input(shape=(768,))
x = keras.layers.concatenate([inp1, inp2],axis=-1)
x = Dense(1024, activation='relu')(x)
x = Dropout(0.5) (x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.5) (x)
x = Dense(64, activation='relu')(x)
out=Dense(1)(x)
# Since this is a siamese network, both sides share the same LSTM
shared_lstm = LSTM(100)
left_output = shared_lstm(train_vec1_tensor)
right_output = shared_lstm(train_vec2_tensor)
# Calculates the distance as defined by the MaLSTM model
malstm_distance = Lambda(function=lambda x: manhattan_distance(x[0], x[1]),output_shape=lambda x: (x[0][0], 1))([left_output, right_output])
#######################
Getting error when code flow reaches the following line
#######################
model = Model(inputs=[inp1,inp2], outputs=[malstm_distance])
model.summary()
optimizer = optimizers.Adadelta(clipnorm=gradient_clipping_norm)
model.compile(optimizer,
loss='mean_squared_error',
metrics=['accuracy'])
history=model.fit([train_vec1, train_vec2], train_label,
epochs=30,batch_size=200,
validation_split=0.2)
I want the model to take two embeddings, calculate the manhattan distance of the embeddings and return the distance.
left_output and right_output are obtained from the LSTM layer. The inputs are fed to the Input layer and through a series of Dense layers. However, note that there is no connection anywhere between the set of Dense layers and the LSTM. The Model expects the output from the LSTM layer which is not possible. This line keras.layers.concatenate should use the outputs from the shared_lstm rather than using the outputs of input layers directly. Like this
keras.layers.concatenate([left_output, right_output],axis=-1)
Only, then this can be a Siamese network.

Error when fiting linear binary classifier with tensorflow ValueError: No gradients provided for any variable, check your graph

I have an error when trying to fit a linear binary classifier using step function and MSE, instead of softmax and cross-entropy loss. I have and error which I can't overcome probably due to shape inconsistencies. I provide a code sample. Please help
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification as gen_data
from sklearn.model_selection import train_test_split
rng = np.random
# Setting hyperparameters
n_observations = 100
lr = 0.005
n_iter = 100
# Generate input data
xs, ys = gen_data(n_features=2, n_redundant=0, n_informative=2,
random_state=0, n_clusters_per_class=1)
# Split data into train and test
X_train, X_test, y_train, y_test = train_test_split(xs, ys, test_size=.4)
X_train = np.float32(X_train)
X_test = np.float32(X_test)
# Graph
X = tf.placeholder(tf.float32)
Y = tf.placeholder(tf.float32)
W = tf.Variable(np.float32(rng.randn(2)), name="weight")
b = tf.Variable(np.float32(rng.randn()), name="bias")
def step(x):
is_greater = tf.greater(x, 0)
as_float = tf.to_float(is_greater)
doubled = tf.multiply(as_float, 2)
return tf.subtract(doubled, 1)
Y_pred = step(tf.add(tf.multiply(X , W), b))
cost = tf.reduce_mean(tf.squared_difference(Y_pred, Y))
# Using built-in optimization algorithm to train the model:
train_step = tf.train.GradientDescentOptimizer(0.005).minimize(cost)
sess = tf.Session()
sess.run(tf.initialize_all_variables())
for step in range(n_iter):
sess.run(train_step, feed_dict={X:X_train, Y:y_train})
print ("iter: {0}; weight: {1}; bias: {2}".format(step,
sess.run(W),
sess.run(b)))
This is the error:
ValueErrorTraceback (most recent call last)
<ipython-input-17-5a0c4711802c> in <module>()
26
27 # Using built-in optimization algorithm to train the model:
---> 28 train_step = tf.train.GradientDescentOptimizer(0.005).minimize(cost)
29
30 # Using TF differentiation from scratch to implement a step-by-step optimizer
/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/optimizer.pyc in minimize(self, loss, global_step, var_list, gate_gradients, aggregation_method, colocate_gradients_with_ops, name, grad_loss)
405 "No gradients provided for any variable, check your graph for ops"
406 " that do not support gradients, between variables %s and loss %s." %
--> 407 ([str(v) for _, v in grads_and_vars], loss))
408
409 return self.apply_gradients(grads_and_vars, global_step=global_step,
ValueError: No gradients provided for any variable, check your graph for ops that do not support gradients, between variables ["<tf.Variable 'weight:0' shape=(2,) dtype=float64_ref>", "<tf.Variable 'bias:0' shape=() dtype=float32_ref>", "<tf.Variable 'weight_1:0' shape=(2,) dtype=float64_ref>", "<tf.Variable 'bias_1:0' shape=() dtype=float32_ref>",
Your training data isn't changing between training steps. That is, each training step feeds the same values for X and Y:
for step in range(n_iter):
sess.run(train_step, feed_dict={X:X_train, Y:y_train})
If you set different values for X and Y between training steps, the error should go away.

model.fit_generator: Error when checking target: expected lambda_2 to have 4 dimensions, but got array with shape (200, 1)

I implemented a generator to feed the training process, but fit_generator throws this error:
Error when checking target: expected lambda_2 to have 4 dimensions,
but got array with shape (200, 1)
It seems that the function is switching X and y at some point, because (200,1) is "y" shape, not "X" shape.
If I test the generator with the code below it works fine:
for i in range(32):
train = next(train_generator)
print(train[0].shape)
But with the fit_generator an error is thrown.
This is my code:
import os
import csv
samples = []
with open('data/driving_log.csv') as csvfile:
reader = csv.reader(csvfile)
for line in reader:
samples.append(line)
from sklearn.model_selection import train_test_split
train_samples, validation_samples = train_test_split(samples, test_size=0.2)
import cv2
import numpy as np
import sklearn
def generator(samples, batch_size=32):
num_samples = len(samples)
while 1: # Loop forever so the generator never terminates
sklearn.utils.shuffle(samples)
for offset in range(0, num_samples, batch_size):
batch_samples = samples[offset:offset+batch_size]
images = []
angles = []
for batch_sample in batch_samples:
name = 'data\\'+batch_sample[0].split('\\')[-1]
center_image = cv2.imread(name)
center_angle = float(batch_sample[3])
if not center_image is None:
images.append(center_image)
angles.append(center_angle)
# trim image to only see section with road
X_train = np.array(images)
y_train = np.array(angles)
yield sklearn.utils.shuffle(X_train, y_train)
# compile and train the model using the generator function
train_generator = generator(train_samples, batch_size=int(len(train_samples)/32))
validation_generator = generator(validation_samples, batch_size=int(len(validation_samples)/32))
ch, row, col = 3, 160, 320 # Trimmed image format
from keras.models import Sequential
from keras.layers import Lambda
model = Sequential()
# Preprocess incoming data, centered around zero with small standard deviation
model.add(Lambda(lambda x: x/127.5 - 1.,
input_shape=(row, col, ch),
output_shape=(row, col, ch)))
#model.add(... finish defining the rest of your model architecture here ...)
model.compile(loss='mse', optimizer='adam')
model.fit_generator(train_generator,
steps_per_epoch=len(train_samples) / 32, validation_data=validation_generator,
validation_steps=len(validation_samples)/32, epochs=3)
Any ideas how I can solve this?
That was a error on load images, the name was not correctly defined. As the cv2.imread(name) don't raises a error wen it don't find the image, just returning a None object, the method was returning a empty variable, that causes the error on the network.

What should the generator passed to predict_generator() return?

I am calling Keras predict_generator() like:
bottleneck_features_train = model.predict_generator(train_gen, len(telemetry))
where train_gen() is defined like
def train_gen():
# ...
yield (X, y)
and X is a numpy array with shape (48, 299, 299, 3), y is a numpy array with shape (48,)
I get the error below. What should I do instead?
Otherwise, a link to a working example would help. Only examples I have found are for Keras 1 or using ImageDataGenerator.flow().
I am running Keras 2.0.2.
Here the error:
Traceback (most recent call last):
File "/home/fanta/workspace/CarND-Behavioral-Cloning-P3/cache.py", line 143, in <module>
tf.app.run()
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/platform/app.py", line 44, in run
_sys.exit(main(_sys.argv[:1] + flags_passthrough))
File "/home/fanta/workspace/CarND-Behavioral-Cloning-P3/cache.py", line 138, in main
bottleneck_features_train = model.predict_generator(train_gen, len(telemetry))
File "/usr/local/lib/python3.5/dist-packages/keras/legacy/interfaces.py", line 88, in wrapper
return func(*args, **kwargs)
File "/usr/local/lib/python3.5/dist-packages/keras/engine/training.py", line 2094, in predict_generator
outs = self.predict_on_batch(x)
File "/usr/local/lib/python3.5/dist-packages/keras/engine/training.py", line 1677, in predict_on_batch
self._feed_input_shapes)
File "/usr/local/lib/python3.5/dist-packages/keras/engine/training.py", line 100, in _standardize_input_data
'Found: array with shape ' + str(data.shape))
ValueError: The model expects 0 input arrays, but only received one array. Found: array with shape (48, 299, 299, 3)
Process finished with exit code 1
===== UPDATE =====
The issue is not related to the generator. Here below a short program to reproduce it. Note that if you switch the network from inception to vgg, it works fine.
from keras.applications.inception_v3 import InceptionV3
from keras.applications.vgg16 import VGG16
from keras.layers import Input, AveragePooling2D
from keras.models import Model
from keras.datasets import cifar10
from scipy.misc import imresize
import pickle
import tensorflow as tf
import keras.backend as K
import numpy as np
network='inception' # Must be 'inception' or 'vgg'
dataset='cifar10'
batch_size=64
if network == 'vgg':
size = (224, 224)
elif network == 'inception':
size = (299, 299)
else:
assert False, "network must be either 'inception' or 'vgg'"
def create_model():
input_tensor = Input(shape=(size[0], size[1], 3))
if network == 'inception':
model = InceptionV3(input_tensor=input_tensor, include_top=False)
x = model.output
x = AveragePooling2D((8, 8), strides=(8, 8))(x)
model = Model(model.input, x)
elif network == 'vgg':
model = VGG16(input_tensor=input_tensor, include_top=False)
x = model.output
x = AveragePooling2D((7, 7))(x)
model = Model(model.input, x)
else:
assert False
return model
def main():
# Download and load cifar10 dataset
(X_train, y_train), (_, _) = cifar10.load_data()
# Reduce the dataset to the first 1000 entries, to save memory and computation time
X_train = X_train[0:1000]
y_train = y_train[0:1000]
# Resize dataset images to comply with expected input image size
X_train = [imresize(image, size) for image in X_train]
X_train = np.array(X_train)
# File name where to save bottlenecked features
train_output_file = "{}_{}_{}.p".format(network, dataset, 'bottleneck_features_train')
print("Saving to", train_output_file)
with tf.Session() as sess:
K.set_session(sess)
K.set_learning_phase(1)
model = create_model()
# We skip pre-processing and bottleneck the features
bottleneck_features_train = model.predict(X_train, batch_size=batch_size, verbose=1)
data = {'features': bottleneck_features_train, 'labels': y_train}
pickle.dump(data, open(train_output_file, 'wb'))
if __name__ == '__main__':
main()
At prediction step your generator should only yield the input and not the targets. So only the X, not the y.
Does that help?

Resources