how to merge classification models horizontally - keras

I have many models already trained, which each answer a simple yes/no question. Pseudocode:
model_dog = keras.load('is_dog')
model_cat = keras.load('is_cat')
model_rat = keras.load('is_rat')
image = load_photo_as_numpy_array('photo.jpg')
multi_class = [ m.predict(image) for m in (model_dog,model_cat,model_rat) ]
This works fine, but it is a> slow because inference is done sequentially instead of in parallel (I have several hundred such models, not just 3), and b> is much more complex to use than if I had ONE model which does multi-classification.
What I want, is:
model = keras.concat_horizontal([ model_dog, model_cat, model_rat ])
model.save('combined_model')
Then whenever I want to use the combined model, it is as simple as:
model = keras.load('combined_model')
multi_class = m.predict(image)
This way, I can add a new classification to the combined model, by training one simple model, for example, that recognizes a fish.

As I suggested in comments, you can merge multiple models in one new model and predict using this new model.
First, I write a function to merge models and return a new combined model. This is what you want:
def concat_horizontal(models, input_shape):
models_count = len(models)
hidden = []
input = tf.keras.layers.Input(shape=input_shape)
for i in range(models_count):
hidden.append(models[i](input))
output = tf.keras.layers.concatenate(hidden)
model = tf.keras.Model(inputs=input, outputs=output)
return model
Let's explore an example. Say we want merge two sequential models like this:
def model_1():
model = tf.keras.models.Sequential([
tf.keras.layers.Flatten(input_shape=(28,28,1)),
tf.keras.layers.Dense(150, activation='relu'),
tf.keras.layers.Dense(200, activation='relu'),
tf.keras.layers.Dense(150, activation='relu'),
tf.keras.layers.Dense(10, activation='softmax')], name="model1")
model.compile(optimizer=tf.keras.optimizers.Adam(), loss=tf.keras.losses.sparse_categorical_crossentropy, metrics=['accuracy'])
return model
def model_2():
model = tf.keras.models.Sequential([
tf.keras.layers.Flatten(input_shape=(28,28,1)),
tf.keras.layers.Dense(150, activation='relu'),
tf.keras.layers.Dense(150, activation='relu'),
tf.keras.layers.Dense(10, activation='softmax')], name="model2")
model.compile(optimizer=tf.keras.optimizers.Adam(), loss=tf.keras.losses.sparse_categorical_crossentropy, metrics=['accuracy'])
return model
model1 = model_1()
model2 = model_2()
Let's use MNIST as train dataset for both of our models:
import tensorflow_datasets as tfds
ds_1 = tfds.load('mnist', split='train', as_supervised=True)
ds_2 = tfds.load('mnist', split='test', as_supervised=True)
def map_fn(image, label):
image = image / 255
return image, label
ds_1 = ds_1.map(map_fn).shuffle(1024).batch(32)
ds_2 = ds_2.map(map_fn).shuffle(1024).batch(32)
Now, we can train models, save them, and then load them like this:
model1.fit(ds_1, epochs=2, validation_data=ds_1)
model2.fit(ds_2, epochs=2, validation_data=ds_2)
model1.save('model1.h5')
model2.save('model2.h5')
model3 = tf.keras.models.load_model('model1.h5')
model4 = tf.keras.models.load_model('model2.h5')
So we have 2 separate models (model3,model4) and want to merge these, to a new one. Pass them along the input shape (in this case it is MNIST data shape) to the function we have written above:
new_model = concat_horizontal([model3,model4],(28,28,1))
Now, if we plot this new model:
tf.keras.utils.plot_model(new_model)
It's time to get predictions of model:
sample = ds_1.unbatch().take(1)
for i,j in sample:
img = i
lbl = j
img = tf.expand_dims(img,axis=0)
pred = new_model.predict(img)
pred = np.reshape(pred,(2,10))
results = np.argmax(pred,axis=1)
print(results)
import matplotlib.pyplot as plt
plt.imshow(np.array(img).squeeze())
plt.show
In my case I get both of predictions classified as 4:
Output:

Related

How to reinitialize a layer in Keras but not the weights

I want to copy some layers of a given model in an other new model (with all their attributes like stride, padding etc.). I want to keep the weights and attributes but not the inbound_node/outbound_node property linked to the old model.
I don't think there is any function in Keras to do that but it's very easy with the get_config, get_weights() and set_weights() method of Keras Layers.
new_fresh_layer = layer.__class__(**layer.get_config())
old_layer_weights = layer.get_weights()
x = new_fresh_layer(layer_inputs)
new_fresh_layer.set_weights(old_layer_weights)
if also works when adding layers sequentialy to a model:
model1 = Sequential()
old_layer = Dense(10, input_shape=(10,))
model1.add(old_layer)
model2 = Sequential()
new_layer = old_layer.__class__(**old_layer.get_config())
model2.add(new_layer)
new_layer.set_weights(old_layer.get_weights())
assert (new_layer.get_weights()[0] == old_layer.get_weights()[0]).all()
assert (new_layer.get_weights()[1] == old_layer.get_weights()[1]).all()

Keras, Tensorflow : Merge two different model output into one

I am working on one deep learning model where I am trying to combine two different model's output :
The overall structure is like this :
So the first model takes one matrix, for example [ 10 x 30 ]
#input 1
input_text = layers.Input(shape=(1,), dtype="string")
embedding = ElmoEmbeddingLayer()(input_text)
model_a = Model(inputs = [input_text] , outputs=embedding)
# shape : [10,50]
Now the second model takes two input matrix :
X_in = layers.Input(tensor=K.variable(np.random.uniform(0,9,[10,32])))
M_in = layers.Input(tensor=K.variable(np.random.uniform(1,-1,[10,10]))
md_1 = New_model()([X_in, M_in]) #new_model defined somewhere
model_s = Model(inputs = [X_in, A_in], outputs = md_1)
# shape : [10,50]
I want to make these two matrices trainable like in TensorFlow I was able to do this by :
matrix_a = tf.get_variable(name='matrix_a',
shape=[10,10],
dtype=tf.float32,
initializer=tf.constant_initializer(np.array(matrix_a)),trainable=True)
I am not getting any clue how to make those matrix_a and matrix_b trainable and how to merge the output of both networks then give input.
I went through this question But couldn't find an answer because their problem statement is different from mine.
What I have tried so far is :
#input 1
input_text = layers.Input(shape=(1,), dtype="string")
embedding = ElmoEmbeddingLayer()(input_text)
model_a = Model(inputs = [input_text] , outputs=embedding)
# shape : [10,50]
X_in = layers.Input(tensor=K.variable(np.random.uniform(0,9,[10,10])))
M_in = layers.Input(tensor=K.variable(np.random.uniform(1,-1,[10,100]))
md_1 = New_model()([X_in, M_in]) #new_model defined somewhere
model_s = Model(inputs = [X_in, A_in], outputs = md_1)
# [10,50]
#tranpose second model output
tranpose = Lambda(lambda x: K.transpose(x))
agglayer = tranpose(md_1)
# concat first and second model output
dott = Lambda(lambda x: K.dot(x[0],x[1]))
kmean_layer = dotter([embedding,agglayer])
# input
final_model = Model(inputs=[input_text, X_in, M_in], outputs=kmean_layer,name='Final_output')
final_model.compile(loss = 'categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
final_model.summary()
Overview of the model :
Update:
Model b
X = np.random.uniform(0,9,[10,32])
M = np.random.uniform(1,-1,[10,10])
X_in = layers.Input(tensor=K.variable(X))
M_in = layers.Input(tensor=K.variable(M))
layer_one = Model_b()([M_in, X_in])
dropout2 = Dropout(dropout_rate)(layer_one)
layer_two = Model_b()([layer_one, X_in])
model_b_ = Model([X_in, M_in], layer_two, name='model_b')
model a
length = 150
dic_size = 100
embed_size = 12
input_text = Input(shape=(length,))
embedding = Embedding(dic_size, embed_size)(input_text)
embedding = LSTM(5)(embedding)
embedding = Dense(10)(embedding)
model_a = Model(input_text, embedding, name = 'model_a')
I am merging like this:
mult = Lambda(lambda x: tf.matmul(x[0], x[1], transpose_b=True))([embedding, model_b_.output])
final_model = Model(inputs=[model_b_.input[0],model_b_.input[1],model_a.input], outputs=mult)
Is it right way to matmul two keras model?
I don't know if I am merging the output correctly and the model is correct.
I would greatly appreciate it if anyone kindly gives me some advice on how should I make that matrix trainable and how to merge the model's output correctly then give input.
Thanks in advance!
Trainable weights
Ok. Since you are going to have custom trainable weights, the way to do this in Keras is creating a custom layer.
Now, since your custom layer has no inputs, we will need a hack that will be explained later.
So, this is the layer definition for the custom weights:
from keras.layers import *
from keras.models import Model
from keras.initializers import get as get_init, serialize as serial_init
import keras.backend as K
import tensorflow as tf
class TrainableWeights(Layer):
#you can pass keras initializers when creating this layer
#kwargs will take base layer arguments, such as name and others if you want
def __init__(self, shape, initializer='uniform', **kwargs):
super(TrainableWeights, self).__init__(**kwargs)
self.shape = shape
self.initializer = get_init(initializer)
#build is where you define the weights of the layer
def build(self, input_shape):
self.kernel = self.add_weight(name='kernel',
shape=self.shape,
initializer=self.initializer,
trainable=True)
self.built = True
#call is the layer operation - due to keras limitation, we need an input
#warning, I'm supposing the input is a tensor with value 1 and no shape or shape (1,)
def call(self, x):
return x * self.kernel
#for keras to build the summary properly
def compute_output_shape(self, input_shape):
return self.shape
#only needed for saving/loading this layer in model.save()
def get_config(self):
config = {'shape': self.shape, 'initializer': serial_init(self.initializer)}
base_config = super(TrainableWeights, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
Now, this layer should be used like this:
dummyInputs = Input(tensor=K.constant([1]))
trainableWeights = TrainableWeights(shape)(dummyInputs)
Model A
Having the layer defined, we can start modeling.
First, let's see the model_a side:
#general vars
length = 150
dic_size = 100
embed_size = 12
#for the model_a segment
input_text = Input(shape=(length,))
embedding = Embedding(dic_size, embed_size)(input_text)
#the following two lines are just a resource to reach the desired shape
embedding = LSTM(5)(embedding)
embedding = Dense(50)(embedding)
#creating model_a here is optional, only if you want to use model_a independently later
model_a = Model(input_text, embedding, name = 'model_a')
Model B
For this, we are going to use our TrainableWeights layer.
But first, let's simulate a New_model() as mentioned.
#simulates New_model() #notice the explicit batch_shape for the matrices
newIn1 = Input(batch_shape = (10,10))
newIn2 = Input(batch_shape = (10,30))
newOut1 = Dense(50)(newIn1)
newOut2 = Dense(50)(newIn2)
newOut = Add()([newOut1, newOut2])
new_model = Model([newIn1, newIn2], newOut, name='new_model')
Now the entire branch:
#the matrices
dummyInput = Input(tensor = K.constant([1]))
X_in = TrainableWeights((10,10), initializer='uniform')(dummyInput)
M_in = TrainableWeights((10,30), initializer='uniform')(dummyInput)
#the output of the branch
md_1 = new_model([X_in, M_in])
#optional, only if you want to use model_s independently later
model_s = Model(dummyInput, md_1, name='model_s')
The whole model
Finally, we can join the branches in a whole model.
Notice how I didn't have to use model_a or model_s here. You can do it if you want, but those submodels are not needed, unless you want later to get them individually for other usages. (Even if you created them, you don't need to change the code below to use them, they're already part of the same graph)
#I prefer tf.matmul because it's clear and understandable while K.dot has weird behaviors
mult = Lambda(lambda x: tf.matmul(x[0], x[1], transpose_b=True))([embedding, md_1])
#final model
model = Model([input_text, dummyInput], mult, name='full_model')
Now train it:
model.compile('adam', 'binary_crossentropy', metrics=['accuracy'])
model.fit(np.random.randint(0,dic_size, size=(128,length)),
np.ones((128, 10)))
Since the output is 2D now, there is no problem about the 'categorical_crossentropy', my comment was because of doubts on the output shape.

keras fit_generator reading chunks from hdfstore

I try to build a generator for a Keras model which will be trained on a large hdf store.
To speed up the training, I pre-calculated all features incl. one-hot encoding already in the hdfstore. So the call from that should be straight forward.
To feed chunks of my data into the network, I try to use fit_generator, but struggle to get it up and running.
The generator:
def myGenerator(myStore, generateFrom,generateTo):
# Create empty arrays to contain batch of features and labels#
while True:
X = pd.read_hdf(myStore,'X',start=generateFrom,stop=generateTo)
y = pd.read_hdf(myStore,'y',start=generateFrom,stop=generateTo)
yield X,y
Network and fitting:
def get_model(shape):
'''Create a keras model.'''
inputlayer = Input(shape=shape)
model = BatchNormalization()(inputlayer)
model = Dense(1024, activation='relu')(model)
model = Dropout(0.25)(model)
model = BatchNormalization()(inputlayer)
model = Dense(512, activation='relu')(model)
model = Dropout(0.25)(model)
model = BatchNormalization()(inputlayer)
model = Dense(256, activation='relu')(model)
model = Dropout(0.25)(model)
model = BatchNormalization()(inputlayer)
model = Dense(128, activation='relu')(model)
model = Dropout(0.25)(model)
# 11 because background noise has been taken out
model = Dense(2, activation='tanh')(model)
model = Model(inputs=inputlayer, outputs=model)
return model
shape = (6603,10000)
model = get_model(shape)
model.compile(loss='mean_squared_error', optimizer=Adam(), metrics=['accuracy'])
#X = generator(myStore)
#Xt = generator(myStore)
labelbinarizer = LabelBinarizer()
y = labelbinarizer.fit_transform(y)
#yt = labelbinarizer.fit_transform(yt)
generateFrom = 0
for i in range(10):
generateTo=generateFrom+10000
model.fit_generator(
generator=myGenerator(myStore,generateFrom,generateTo),
epochs=1,
steps_per_epoch=X[0].shape[0] // 1000)
generateFrom=generateTo
I have tried both, to have the fit_generator within a loop and plug in the range (as shown above), but also to handle the range inside the generator. Both does not work. Currently running into
TypeError: 'generator' object is not subscriptable
Likely I have some misunderstanding how fit_generator() is supposed to be used in this context. Most examples out there are around generating tensors from pictures.
Any hint is appreciated.
Thanks
The function read_hdf returns a panda object, you need to convert it to numpy array.

Keras: Multiple inputs and Multiple ouputs for fit_generator using flow_from_directory

Multi task learning Model accepts three inputs. I am using keras data generator. Is it possible to pass three data generator to model.fit_generator function ?.
Problem Definition
I am working a classification problem. The dataset i am using is Painters by number, a competition hosted by kaggle . The task is to identify painter,style and genre given paintings.
I have developed individual models to perform each task. Now, i would like to incorporate multi task learning, see if it outperforms individual models.
Model No of classes (Softmax)
------ ------------------------
Model predicting painter 8
given paintings
Model predicting style 10
given paintings
Model predicting genre 23
given paintings
The above table details the individual models and the no of output classes for each model.
Now, i want to do multi task learning , so I came up with below simple architecture
Multi Task Learning Architecture
style = Input(shape=(64,64,3))
genre = Input(shape=(64,64,3))
painter = Input(shape=(64,64,3))
shared_conv = Convolution2D(
filters = 5,# 5 feature maps
kernel_size = (5,5),
strides = 1)
shared_conv_layer_A = shared_conv(style)
shared_conv_layer_B = shared_conv(genre)
shared_conv_layer_C = shared_conv(painter)
merged_layer = keras.layers.concatenate([shared_conv_layer_A,shared_conv_layer_B,shared_conv_layer_C],axis=-1)
pooling = MaxPooling2D(
pool_size = (2,2),
strides = 2
)(merged_layer)
dense = Flatten()(pooling)
out_style = Dense(
no_classes_style,
kernel_initializer=glorot_normal(seed=seed_val),
bias_initializer = 'zero',
kernel_regularizer = l2(l=0.0001),
activation = 'softmax',
)(dense)
out_genre = Dense(
no_classes_genre,
kernel_initializer=glorot_normal(seed=seed_val),
bias_initializer = 'zero',
kernel_regularizer = l2(l=0.0001),
activation = 'softmax',
)(dense)
out_painter = Dense(
no_classes_painter,
kernel_initializer=glorot_normal(seed=seed_val),
bias_initializer = 'zero',
kernel_regularizer = l2(l=0.0001),
activation = 'softmax',
)(dense)
multi_tasking_model = Model(inputs=[style,genre,painter],outputs=[out_style,out_genre,out_painter])
multi_tasking_model.summary()
multi_tasking_model.compile(
loss = 'categorical_crossentropy',
optimizer=Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=0.00000001 ),
metrics=['accuracy']
)
Now i want to pass three keras image data generators. So, i came up with a custom data generator
def create_data_generator(style_generator,genre_generator,painter_generator):
# Input
_style_generator = style_generator[0]
_genre_generator = genre_generator[0]
_painter_generator = painter_generator[0]
# Label
_lstyle_generator = style_generator[1]
_lgenre_generator = genre_generator[1]
_lpainter_generator = painter_generator[1]
return [_style_generator,_genre_generator,_painter_generator],[_lstyle_generator,_genre_generator,_painter_generator]
train_mulitle_data_generator = create_data_generator(trainStyleDataGenerator,trainGenreDataGenerator,trainPainterDataGenerator)
valid_mulitle_data_generator = create_data_generator(validationStyleDataGenerator,validationGenreDataGenerator,validationPainterDataGenerator)
history = multi_tasking_model.fit_generator(
generator = train_mulitle_data_generator,
steps_per_epoch= len(train_mulitle_data_generator),
epochs = no_epoch,
validation_data = valid_mulitle_data_generator,
)
The error i encountered
'tuple' object has no attribute 'ndim'
Is there any alternative way to pass multiple inputs and multiple outputs. Any suggestions or tips would be greatly helpful please ?.
At the moment create_data_generator does not define a generator. Try this:
def create_data_generator(style_generator,genre_generator,painter_generator):
while(True):
_style_generator, _lstyle_generator = next(style_generator)
_genre_generator, _lgenre_generator = next(genre_generator)
_painter_generator, _lpainter_generator = next(painter_generator)
yield [_style_generator,_genre_generator,_painter_generator], [_lstyle_generator,_genre_generator,_painter_generator]

Is it possible to train using same model with two inputs?

Hello I have a some question for keras.
currently i want implement some network
using same cnn model, and use two images as input of cnn model
and use two result of cnn model, provide to Dense model
for example
def cnn_model():
input = Input(shape=(None, None, 3))
x = Conv2D(8, (3, 3), strides=(1, 1))(input)
x = GlobalAvgPool2D()(x)
model = Model(input, x)
return model
def fc_model(cnn1, cnn2):
input_1 = cnn1.output
input_2 = cnn2.output
input = concatenate([input_1, input_2])
x = Dense(1, input_shape=(None, 16))(input)
x = Activation('sigmoid')(x)
model = Model([cnn1.input, cnn2.input], x)
return model
def main():
cnn1 = cnn_model()
cnn2 = cnn_model()
model = fc_model(cnn1, cnn2)
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(x=[image1, image2], y=[1.0, 1.0], batch_size=1, ecpochs=1)
i want to implement model something like this, and train models
but i got error message like below :
'All layer names should be unique'
Actually i want use only one CNN model as feature extractor and finally use two features to predict one float value as 0.0 ~ 1.0
so whole system -->>
using two images and extract features from same CNN model, and features are provided to Dense model to get one floating value
Please, help me implement this system and how to train..
Thank you
See the section of the Keras documentation on shared layers:
https://keras.io/getting-started/functional-api-guide/
A code snippet from the documentation above demonstrating this:
# This layer can take as input a matrix
# and will return a vector of size 64
shared_lstm = LSTM(64)
# When we reuse the same layer instance
# multiple times, the weights of the layer
# are also being reused
# (it is effectively *the same* layer)
encoded_a = shared_lstm(tweet_a)
encoded_b = shared_lstm(tweet_b)
# We can then concatenate the two vectors:
merged_vector = keras.layers.concatenate([encoded_a, encoded_b], axis=-1)
# And add a logistic regression on top
predictions = Dense(1, activation='sigmoid')(merged_vector)
# We define a trainable model linking the
# tweet inputs to the predictions
model = Model(inputs=[tweet_a, tweet_b], outputs=predictions)
model.compile(optimizer='rmsprop',
loss='binary_crossentropy',
metrics=['accuracy'])
model.fit([data_a, data_b], labels, epochs=10)

Resources