I am working on one deep learning model where I am trying to combine two different model's output :
The overall structure is like this :
So the first model takes one matrix, for example [ 10 x 30 ]
#input 1
input_text = layers.Input(shape=(1,), dtype="string")
embedding = ElmoEmbeddingLayer()(input_text)
model_a = Model(inputs = [input_text] , outputs=embedding)
# shape : [10,50]
Now the second model takes two input matrix :
X_in = layers.Input(tensor=K.variable(np.random.uniform(0,9,[10,32])))
M_in = layers.Input(tensor=K.variable(np.random.uniform(1,-1,[10,10]))
md_1 = New_model()([X_in, M_in]) #new_model defined somewhere
model_s = Model(inputs = [X_in, A_in], outputs = md_1)
# shape : [10,50]
I want to make these two matrices trainable like in TensorFlow I was able to do this by :
matrix_a = tf.get_variable(name='matrix_a',
shape=[10,10],
dtype=tf.float32,
initializer=tf.constant_initializer(np.array(matrix_a)),trainable=True)
I am not getting any clue how to make those matrix_a and matrix_b trainable and how to merge the output of both networks then give input.
I went through this question But couldn't find an answer because their problem statement is different from mine.
What I have tried so far is :
#input 1
input_text = layers.Input(shape=(1,), dtype="string")
embedding = ElmoEmbeddingLayer()(input_text)
model_a = Model(inputs = [input_text] , outputs=embedding)
# shape : [10,50]
X_in = layers.Input(tensor=K.variable(np.random.uniform(0,9,[10,10])))
M_in = layers.Input(tensor=K.variable(np.random.uniform(1,-1,[10,100]))
md_1 = New_model()([X_in, M_in]) #new_model defined somewhere
model_s = Model(inputs = [X_in, A_in], outputs = md_1)
# [10,50]
#tranpose second model output
tranpose = Lambda(lambda x: K.transpose(x))
agglayer = tranpose(md_1)
# concat first and second model output
dott = Lambda(lambda x: K.dot(x[0],x[1]))
kmean_layer = dotter([embedding,agglayer])
# input
final_model = Model(inputs=[input_text, X_in, M_in], outputs=kmean_layer,name='Final_output')
final_model.compile(loss = 'categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
final_model.summary()
Overview of the model :
Update:
Model b
X = np.random.uniform(0,9,[10,32])
M = np.random.uniform(1,-1,[10,10])
X_in = layers.Input(tensor=K.variable(X))
M_in = layers.Input(tensor=K.variable(M))
layer_one = Model_b()([M_in, X_in])
dropout2 = Dropout(dropout_rate)(layer_one)
layer_two = Model_b()([layer_one, X_in])
model_b_ = Model([X_in, M_in], layer_two, name='model_b')
model a
length = 150
dic_size = 100
embed_size = 12
input_text = Input(shape=(length,))
embedding = Embedding(dic_size, embed_size)(input_text)
embedding = LSTM(5)(embedding)
embedding = Dense(10)(embedding)
model_a = Model(input_text, embedding, name = 'model_a')
I am merging like this:
mult = Lambda(lambda x: tf.matmul(x[0], x[1], transpose_b=True))([embedding, model_b_.output])
final_model = Model(inputs=[model_b_.input[0],model_b_.input[1],model_a.input], outputs=mult)
Is it right way to matmul two keras model?
I don't know if I am merging the output correctly and the model is correct.
I would greatly appreciate it if anyone kindly gives me some advice on how should I make that matrix trainable and how to merge the model's output correctly then give input.
Thanks in advance!
Trainable weights
Ok. Since you are going to have custom trainable weights, the way to do this in Keras is creating a custom layer.
Now, since your custom layer has no inputs, we will need a hack that will be explained later.
So, this is the layer definition for the custom weights:
from keras.layers import *
from keras.models import Model
from keras.initializers import get as get_init, serialize as serial_init
import keras.backend as K
import tensorflow as tf
class TrainableWeights(Layer):
#you can pass keras initializers when creating this layer
#kwargs will take base layer arguments, such as name and others if you want
def __init__(self, shape, initializer='uniform', **kwargs):
super(TrainableWeights, self).__init__(**kwargs)
self.shape = shape
self.initializer = get_init(initializer)
#build is where you define the weights of the layer
def build(self, input_shape):
self.kernel = self.add_weight(name='kernel',
shape=self.shape,
initializer=self.initializer,
trainable=True)
self.built = True
#call is the layer operation - due to keras limitation, we need an input
#warning, I'm supposing the input is a tensor with value 1 and no shape or shape (1,)
def call(self, x):
return x * self.kernel
#for keras to build the summary properly
def compute_output_shape(self, input_shape):
return self.shape
#only needed for saving/loading this layer in model.save()
def get_config(self):
config = {'shape': self.shape, 'initializer': serial_init(self.initializer)}
base_config = super(TrainableWeights, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
Now, this layer should be used like this:
dummyInputs = Input(tensor=K.constant([1]))
trainableWeights = TrainableWeights(shape)(dummyInputs)
Model A
Having the layer defined, we can start modeling.
First, let's see the model_a side:
#general vars
length = 150
dic_size = 100
embed_size = 12
#for the model_a segment
input_text = Input(shape=(length,))
embedding = Embedding(dic_size, embed_size)(input_text)
#the following two lines are just a resource to reach the desired shape
embedding = LSTM(5)(embedding)
embedding = Dense(50)(embedding)
#creating model_a here is optional, only if you want to use model_a independently later
model_a = Model(input_text, embedding, name = 'model_a')
Model B
For this, we are going to use our TrainableWeights layer.
But first, let's simulate a New_model() as mentioned.
#simulates New_model() #notice the explicit batch_shape for the matrices
newIn1 = Input(batch_shape = (10,10))
newIn2 = Input(batch_shape = (10,30))
newOut1 = Dense(50)(newIn1)
newOut2 = Dense(50)(newIn2)
newOut = Add()([newOut1, newOut2])
new_model = Model([newIn1, newIn2], newOut, name='new_model')
Now the entire branch:
#the matrices
dummyInput = Input(tensor = K.constant([1]))
X_in = TrainableWeights((10,10), initializer='uniform')(dummyInput)
M_in = TrainableWeights((10,30), initializer='uniform')(dummyInput)
#the output of the branch
md_1 = new_model([X_in, M_in])
#optional, only if you want to use model_s independently later
model_s = Model(dummyInput, md_1, name='model_s')
The whole model
Finally, we can join the branches in a whole model.
Notice how I didn't have to use model_a or model_s here. You can do it if you want, but those submodels are not needed, unless you want later to get them individually for other usages. (Even if you created them, you don't need to change the code below to use them, they're already part of the same graph)
#I prefer tf.matmul because it's clear and understandable while K.dot has weird behaviors
mult = Lambda(lambda x: tf.matmul(x[0], x[1], transpose_b=True))([embedding, md_1])
#final model
model = Model([input_text, dummyInput], mult, name='full_model')
Now train it:
model.compile('adam', 'binary_crossentropy', metrics=['accuracy'])
model.fit(np.random.randint(0,dic_size, size=(128,length)),
np.ones((128, 10)))
Since the output is 2D now, there is no problem about the 'categorical_crossentropy', my comment was because of doubts on the output shape.
Related
I have many models already trained, which each answer a simple yes/no question. Pseudocode:
model_dog = keras.load('is_dog')
model_cat = keras.load('is_cat')
model_rat = keras.load('is_rat')
image = load_photo_as_numpy_array('photo.jpg')
multi_class = [ m.predict(image) for m in (model_dog,model_cat,model_rat) ]
This works fine, but it is a> slow because inference is done sequentially instead of in parallel (I have several hundred such models, not just 3), and b> is much more complex to use than if I had ONE model which does multi-classification.
What I want, is:
model = keras.concat_horizontal([ model_dog, model_cat, model_rat ])
model.save('combined_model')
Then whenever I want to use the combined model, it is as simple as:
model = keras.load('combined_model')
multi_class = m.predict(image)
This way, I can add a new classification to the combined model, by training one simple model, for example, that recognizes a fish.
As I suggested in comments, you can merge multiple models in one new model and predict using this new model.
First, I write a function to merge models and return a new combined model. This is what you want:
def concat_horizontal(models, input_shape):
models_count = len(models)
hidden = []
input = tf.keras.layers.Input(shape=input_shape)
for i in range(models_count):
hidden.append(models[i](input))
output = tf.keras.layers.concatenate(hidden)
model = tf.keras.Model(inputs=input, outputs=output)
return model
Let's explore an example. Say we want merge two sequential models like this:
def model_1():
model = tf.keras.models.Sequential([
tf.keras.layers.Flatten(input_shape=(28,28,1)),
tf.keras.layers.Dense(150, activation='relu'),
tf.keras.layers.Dense(200, activation='relu'),
tf.keras.layers.Dense(150, activation='relu'),
tf.keras.layers.Dense(10, activation='softmax')], name="model1")
model.compile(optimizer=tf.keras.optimizers.Adam(), loss=tf.keras.losses.sparse_categorical_crossentropy, metrics=['accuracy'])
return model
def model_2():
model = tf.keras.models.Sequential([
tf.keras.layers.Flatten(input_shape=(28,28,1)),
tf.keras.layers.Dense(150, activation='relu'),
tf.keras.layers.Dense(150, activation='relu'),
tf.keras.layers.Dense(10, activation='softmax')], name="model2")
model.compile(optimizer=tf.keras.optimizers.Adam(), loss=tf.keras.losses.sparse_categorical_crossentropy, metrics=['accuracy'])
return model
model1 = model_1()
model2 = model_2()
Let's use MNIST as train dataset for both of our models:
import tensorflow_datasets as tfds
ds_1 = tfds.load('mnist', split='train', as_supervised=True)
ds_2 = tfds.load('mnist', split='test', as_supervised=True)
def map_fn(image, label):
image = image / 255
return image, label
ds_1 = ds_1.map(map_fn).shuffle(1024).batch(32)
ds_2 = ds_2.map(map_fn).shuffle(1024).batch(32)
Now, we can train models, save them, and then load them like this:
model1.fit(ds_1, epochs=2, validation_data=ds_1)
model2.fit(ds_2, epochs=2, validation_data=ds_2)
model1.save('model1.h5')
model2.save('model2.h5')
model3 = tf.keras.models.load_model('model1.h5')
model4 = tf.keras.models.load_model('model2.h5')
So we have 2 separate models (model3,model4) and want to merge these, to a new one. Pass them along the input shape (in this case it is MNIST data shape) to the function we have written above:
new_model = concat_horizontal([model3,model4],(28,28,1))
Now, if we plot this new model:
tf.keras.utils.plot_model(new_model)
It's time to get predictions of model:
sample = ds_1.unbatch().take(1)
for i,j in sample:
img = i
lbl = j
img = tf.expand_dims(img,axis=0)
pred = new_model.predict(img)
pred = np.reshape(pred,(2,10))
results = np.argmax(pred,axis=1)
print(results)
import matplotlib.pyplot as plt
plt.imshow(np.array(img).squeeze())
plt.show
In my case I get both of predictions classified as 4:
Output:
Hello I have the below code where I am loading pre-saved custom weights from a .cpkt file into a resnet model.
'''
def resnet_model():
input_tensor = Input(shape=(224,224,3))
base_model = keras.applications.ResNet50(input_tensor=input_tensor,weights = 'imagenet', include_top = False)
for layer in base_model.layers:
layer.trainable = True
x = base_model.output
x = GlobalAveragePooling2D(data_format='channels_last')(x)
x = Dense(256)(x)
l2_norm_final = Lambda(lambda x: K.l2_normalize(x,axis=1))(x)
final_model = Model(inputs=base_model.input, outputs = l2_norm_final)
return final_model
model = resnet_model()
model.load_weights(weights_file_orig)
#this works i.e., W has the model's weights
W = model.get_weights()
#this does not work i.e., w,b have []
all_weights = [], all_biases = []
for layer in model.layers:
w,b = layer.get_weights()
all_weights.append(w)
all_biases.append(b)
'''
How do I get the layer by layer weights and biases from a saved .cpkt file?
Thanks a lot!
first correction :
you are not using multi-assignment properly, correction is mentioned below :
all_weights = [], all_biases = [] # wrong
all_weights, all_biases = [], [] # correct way to use multi-assignment in python
second correction:
not all layers have weights ex: Input , Dropout etc. so when you try to get both node weights and bias weights of these layers you will have error indicating few values to unpack, below code should get job done.
for layer in model.layers:
try:
w,b = layer.get_weights()
all_weights.append(w)
all_biases.append(b)
except:
pass # not all layers have weights !
if you want to get weights of only pre-trained model(res-net), then before running above code define model variable as follows:
model = keras.applications.ResNet50(input_tensor=input_tensor,weights = 'imagenet', include_top = False)
Architecture I want to implement
I wish to implement this architecture with Keras functional API. I am new to this and here is my code for now (which gets stuck at concatenating inputs).
# Arbitrary dimension for all embeddings
embedding_dim = 10
# Quarter hour of the day embedding
input_quarter_hour = Input(shape=(1,))
embed_quarter_hour = Embedding(metadata['n_quarter_hours'], embedding_dim, input_length=1)(input_quarter_hour)
embed_quarter_hour = Reshape((embedding_dim,))(embed_quarter_hour)
# Day of the week embedding
input_day_of_week = Input(shape=(1,))
embed_day_of_week = Embedding(metadata['n_days_per_week'], embedding_dim, input_length=1)(input_day_of_week)
embed_day_of_week = Reshape((embedding_dim,))(embed_day_of_week)
# Week of the year embedding
input_week_of_year = Input(shape=(1,))
embed_week_of_year = Embedding(metadata['n_weeks_per_year'], embedding_dim, input_length=1)(input_week_of_year)
embed_week_of_year = Reshape((embedding_dim,))(embed_week_of_year)
# Client ID embedding
input_client_ids = Input(shape=(1,))
embed_client_ids = Embedding(metadata['n_client_ids'], embedding_dim, input_length=1)(input_client_ids)
embed_client_ids = Reshape((embedding_dim,))(embed_client_ids)
# Taxi ID embedding
input_taxi_ids = Input(shape=(1,))
embed_taxi_ids = Embedding(metadata['n_taxi_ids'], embedding_dim, input_length=1)(input_taxi_ids)
embed_taxi_ids = Reshape((embedding_dim,))(embed_taxi_ids)
# Taxi stand ID embedding
input_stand_ids = Input(shape=(1,))
embed_stand_ids = Embedding(metadata['n_stand_ids'], embedding_dim, input_length=1)(input_stand_ids)
embed_stand_ids = Reshape((embedding_dim,))(embed_stand_ids)
# GPS coordinates (5 first lat/long and 5 latest lat/long, therefore 20 values)
coords_in = Input(shape=(20,))
coords_out = Dense(1, input_dim=20, init='normal')(coords_in)
#model = Sequential()
concatenated = concatenate([
embed_quarter_hour,
embed_day_of_week,
embed_week_of_year,
embed_client_ids,
embed_taxi_ids,
embed_stand_ids,
coords_out
])
out = Dense(500, activation='relu')(concatenated)
out = Dense(len(clusters),activation='softmax',name='output_layer')(out)
cast_clusters = K.cast_to_floatx(clusters)
def destination(probabilities):
return tf.matmul(probabilities, cast_clusters)
out = Activation(destination)(out)
model = Model(concatenated,out)
I am getting this error :
Graph disconnected: cannot obtain value for tensor
Tensor("input_64:0", shape=(?, 1), dtype=float32) at layer "input_64".
The following previous layers were accessed without issue: [].
I am guessing the problem stems from the size of my tensors... But I don't now how to debug this kind of code.
You should pass a list of all inputs to the model when creating a Keras Model instance. The variable concatenated that you are using in your code does not contain the inputs but instead contains the outputs of certain layers. Moreover, you should not concatenate your inputs but simply use a list.
The following code should work:
inputs = [
input_quarter_hour,
input_day_of_week,
input_week_of_year,
input_client_ids,
input_taxi_ids,
input_stand_ids,
coords_in
]
model = Model(inputs=inputs, outputs=out)
Megre doesn't work anymore. I tried the new functional API (concatenate, add, multiply) but it doesn't work for models. How to implement it?
lower_model = [self.build_network(self.model_config['critic_lower'], input_shape=(self.history_length, self.n_stock, 1))
for _ in range(1 + self.n_smooth + self.n_down)]
merged = Merge(lower_model, mode='concat')
# upper layer
upper_model = self.build_network(self.model_config['critic_upper'], model=merged)
# action layer
action = self.build_network(self.model_config['critic_action'], input_shape=(self.n_stock,), is_conv=False)
# output layer
merged = Merge([upper_model, action], mode='mul')
model = Sequential()
model.add(merged)
model.add(Dense(1))
return model
I cannot really give you the exact answer, because your question is not detailed enough, but I can provide you an example, where layers are concatenated. Common problem is to import Concatenate and use it as in previous versions.
nlp_input = Input(shape=(seq_length,), name='nlp_input')
meta_input = Input(shape=(10,), name='meta_input')
emb = Embedding(output_dim=embedding_size, input_dim=100, input_length=seq_length)(nlp_input)
nlp_out = Bidirectional(LSTM(128, dropout=0.3, recurrent_dropout=0.3, kernel_regularizer=regularizers.l2(0.01)))(emb)
x = concatenate([nlp_out, meta_input])
x = Dense(classifier_neurons, activation='relu')(x)
x = Dense(1, activation='sigmoid')(x)
model = Model(inputs=[nlp_input , meta_input], outputs=[x])
This is a dirty workaround to show how to get input and output tensors from models and use concatenate layers with them. Also to learn how to use Dense and other layers with tensors and create functional API models.
Ideally, you should rewrite everything that's inside build_network for clean and optimized code. (Perhaps this doesn't even work depending on the content of this function, but this is the idea)
lower_model = [self.build_network(
self.model_config['critic_lower'],
input_shape=(self.history_length, self.n_stock, 1))
for _ in range(1 + self.n_smooth + self.n_down)]
#for building models you need input and output tensors
lower_inputs = [model.input for model in lower_model]
lower_outputs = [model.output for model in lower_model]
#these lines assume each model in the list has only one input and output
#using a concatenate layer on a list of tensors
merged_tensor = Concatenate()(lower_outputs) #or Concatenate(axis=...)(lower_outputs)
#this is a workaround for compatibility.
#ideally you should work just with tensors, not create unnecessary intermediate models
merged_model = Model(lower_inputs, merged_tensor) #make model from input tensors to outputs
# upper layer
upper_model = self.build_network(self.model_config['critic_upper'], model=merged_model)
# action layer
action = self.build_network(self.model_config['critic_action'], input_shape=(self.n_stock,), is_conv=False)
# output layer - get the output tensors from the models
upper_out = upper_model.output
action_out = action.output
#apply the Multiply layer on the list of tensors
merged_tensor = Multiply()([upper_out, action_out])
#apply the Dense layer on the merged tensor
out = Dense(1)(merged_tensor)
#get input tensors to create a model
upper_iputs = upper_model.inputs #should be a list
action_inputs = action.inputs #if not a list, append to the previous list
inputs = upper_inputs + action_inputs
model = Model(inputs, out)
return model
I try to build a generator for a Keras model which will be trained on a large hdf store.
To speed up the training, I pre-calculated all features incl. one-hot encoding already in the hdfstore. So the call from that should be straight forward.
To feed chunks of my data into the network, I try to use fit_generator, but struggle to get it up and running.
The generator:
def myGenerator(myStore, generateFrom,generateTo):
# Create empty arrays to contain batch of features and labels#
while True:
X = pd.read_hdf(myStore,'X',start=generateFrom,stop=generateTo)
y = pd.read_hdf(myStore,'y',start=generateFrom,stop=generateTo)
yield X,y
Network and fitting:
def get_model(shape):
'''Create a keras model.'''
inputlayer = Input(shape=shape)
model = BatchNormalization()(inputlayer)
model = Dense(1024, activation='relu')(model)
model = Dropout(0.25)(model)
model = BatchNormalization()(inputlayer)
model = Dense(512, activation='relu')(model)
model = Dropout(0.25)(model)
model = BatchNormalization()(inputlayer)
model = Dense(256, activation='relu')(model)
model = Dropout(0.25)(model)
model = BatchNormalization()(inputlayer)
model = Dense(128, activation='relu')(model)
model = Dropout(0.25)(model)
# 11 because background noise has been taken out
model = Dense(2, activation='tanh')(model)
model = Model(inputs=inputlayer, outputs=model)
return model
shape = (6603,10000)
model = get_model(shape)
model.compile(loss='mean_squared_error', optimizer=Adam(), metrics=['accuracy'])
#X = generator(myStore)
#Xt = generator(myStore)
labelbinarizer = LabelBinarizer()
y = labelbinarizer.fit_transform(y)
#yt = labelbinarizer.fit_transform(yt)
generateFrom = 0
for i in range(10):
generateTo=generateFrom+10000
model.fit_generator(
generator=myGenerator(myStore,generateFrom,generateTo),
epochs=1,
steps_per_epoch=X[0].shape[0] // 1000)
generateFrom=generateTo
I have tried both, to have the fit_generator within a loop and plug in the range (as shown above), but also to handle the range inside the generator. Both does not work. Currently running into
TypeError: 'generator' object is not subscriptable
Likely I have some misunderstanding how fit_generator() is supposed to be used in this context. Most examples out there are around generating tensors from pictures.
Any hint is appreciated.
Thanks
The function read_hdf returns a panda object, you need to convert it to numpy array.