input_shape error in first dense layer of tensoflow - python-3.x

I am trying to create a model which takes a python list of 4 elements and returns two values as a prediction. Here is my code:
class DQNagent:
def create_model(self):
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Dense(16, activation ='relu',input_shape =(4,1)))
model.add(tf.keras.layers.Dense(32, activation ='relu'))
model.add(tf.keras.layers.Dense(2, activation="linear"))
model.compile(loss="mse", optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),metrics=['accuracy'])
return model
def model_summary(self,model):
return model.summary()
def predict(self, state):
return model.predict(state)
state = [ 0.02495595 0.04527366 -0.002845 0.04326009]
agent = DQNagent()
model = agent.create_model()
action = model.predict(state)
print(action)
This code is raising an error as:
ValueError: Input 0 of layer sequential is incompatible with the layer: expected axis -1 of input shape to have value 4 but received input with shape (None, 1)
I have given the input_shape=(4,1). Is this the wrong way I am giving the input_shape?
How can I fix this error?
When I am giving the input_shape = (1,4), it still throwing the same error. Why is it so happening? I have only one state example to feed it and I want only two numbers as output.
Is it possible?
Please help!
Thank you!

.predict(X) expects batch to be the first dimension of X. In your case it interprets your 4x1 array like you provided a batch of 4 examples of size 1. Add a new dimension to state for it to become 1x4 so it's a batch of 1, that contains 4 features.
class DQNagent:
def create_model(self):
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Dense(16, activation ='relu',input_shape=(4, )))
model.add(tf.keras.layers.Dense(32, activation ='relu'))
model.add(tf.keras.layers.Dense(8, activation="linear"))
model.compile(loss="mse", optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),metrics=['accuracy'])
return model
def model_summary(self,model):
return model.summary()
def predict(self, state):
return model.predict(state)
state = [[ 0.02495595, 0.04527366, -0.002845, 0.04326009]]
agent = DQNagent()
model = agent.create_model()
action = model.predict(state)
print(action)

Related

Keras: saving model defined as a class raises NotImplementedError

I am writing this post after reading similar questions and answers that didn't work in my case. You may notice that I defined the input shape in the first layer.
I created a very small CNN in Keras, as follows:
import tensorflow as tf
class MyNet(tf.keras.Model):
def __init__(self):
super(MyNet, self).__init__()
self.conv1 = tf.keras.layers.Conv2D(32, 5, strides = (2,2), data_format = 'channels_first', input_shape = (3,224,224))
self.bn1 = tf.keras.layers.BatchNormalization(axis = 1)
self.fc1 = tf.keras.layers.Dense(10)
self.globalavg = tf.keras.layers.GlobalAveragePooling2D(data_format = 'channels_first')
def call(self, inputs):
x = self.conv1(inputs)
x = self.bn1(x)
x = tf.keras.activations.relu(x)
x = self.globalavg(x)
return self.fc1(x)
Then I fed something into it and printed the result successfully (the weights are probably random at the moment, but that's ok):
image = tf.ones(shape = (1, 3, 224, 224)) # Defined "channels first" when created the layers
mynet = MyNet()
outputs = mynet(image)
print(tf.keras.backend.eval(outputs))
The result I saw at this step was the 10 outputs of the fc1 layer:
[[-1.1747773 -0.21640654 -0.16266493 -0.44879064 -0.642066 0.78132695 -0.03920581 -0.30874395 -0.04169023 -0.10409291]]
Then I tried to save the model with its weights, by calling mynet.save('mynet.hdf5'), and got the following error:
NotImplementedError: Currently `save` requires model to be a graph network. Consider using `save_weights`, in order to save the weights of the model.
Note that I am new to Keras and that most of my experience is with PyTorch.
What am I doing wrong?
Update:
Following #ikibir's answer, I redefined the network as a sequential network:
myNetAsSeq = tf.keras.models.Sequential()
myNetAsSeq.add(tf.keras.layers.Conv2D(32, 5, strides = (2,2), data_format = 'channels_first', input_shape = (3,224,224)))
myNetAsSeq.add(tf.keras.layers.BatchNormalization(axis = 1))
myNetAsSeq.add(tf.keras.layers.Activation('relu'))
myNetAsSeq.add(tf.keras.layers.GlobalAveragePooling2D(data_format = 'channels_first'))
myNetAsSeq.add(tf.keras.layers.Dense(10))
This time calling myNetAsSeq.save('mynet.hdf5') succeeded.
I am not sure about my answer but i believe you don't create a model you are just creating each layer individually, when you run 'call' function you just pass the variables to this layers.
In keras you should use
model = models.Sequential()
for create model and you should use
model.add()
to add layers
then you can save this model

Keras, Tensorflow : Merge two different model output into one

I am working on one deep learning model where I am trying to combine two different model's output :
The overall structure is like this :
So the first model takes one matrix, for example [ 10 x 30 ]
#input 1
input_text = layers.Input(shape=(1,), dtype="string")
embedding = ElmoEmbeddingLayer()(input_text)
model_a = Model(inputs = [input_text] , outputs=embedding)
# shape : [10,50]
Now the second model takes two input matrix :
X_in = layers.Input(tensor=K.variable(np.random.uniform(0,9,[10,32])))
M_in = layers.Input(tensor=K.variable(np.random.uniform(1,-1,[10,10]))
md_1 = New_model()([X_in, M_in]) #new_model defined somewhere
model_s = Model(inputs = [X_in, A_in], outputs = md_1)
# shape : [10,50]
I want to make these two matrices trainable like in TensorFlow I was able to do this by :
matrix_a = tf.get_variable(name='matrix_a',
shape=[10,10],
dtype=tf.float32,
initializer=tf.constant_initializer(np.array(matrix_a)),trainable=True)
I am not getting any clue how to make those matrix_a and matrix_b trainable and how to merge the output of both networks then give input.
I went through this question But couldn't find an answer because their problem statement is different from mine.
What I have tried so far is :
#input 1
input_text = layers.Input(shape=(1,), dtype="string")
embedding = ElmoEmbeddingLayer()(input_text)
model_a = Model(inputs = [input_text] , outputs=embedding)
# shape : [10,50]
X_in = layers.Input(tensor=K.variable(np.random.uniform(0,9,[10,10])))
M_in = layers.Input(tensor=K.variable(np.random.uniform(1,-1,[10,100]))
md_1 = New_model()([X_in, M_in]) #new_model defined somewhere
model_s = Model(inputs = [X_in, A_in], outputs = md_1)
# [10,50]
#tranpose second model output
tranpose = Lambda(lambda x: K.transpose(x))
agglayer = tranpose(md_1)
# concat first and second model output
dott = Lambda(lambda x: K.dot(x[0],x[1]))
kmean_layer = dotter([embedding,agglayer])
# input
final_model = Model(inputs=[input_text, X_in, M_in], outputs=kmean_layer,name='Final_output')
final_model.compile(loss = 'categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
final_model.summary()
Overview of the model :
Update:
Model b
X = np.random.uniform(0,9,[10,32])
M = np.random.uniform(1,-1,[10,10])
X_in = layers.Input(tensor=K.variable(X))
M_in = layers.Input(tensor=K.variable(M))
layer_one = Model_b()([M_in, X_in])
dropout2 = Dropout(dropout_rate)(layer_one)
layer_two = Model_b()([layer_one, X_in])
model_b_ = Model([X_in, M_in], layer_two, name='model_b')
model a
length = 150
dic_size = 100
embed_size = 12
input_text = Input(shape=(length,))
embedding = Embedding(dic_size, embed_size)(input_text)
embedding = LSTM(5)(embedding)
embedding = Dense(10)(embedding)
model_a = Model(input_text, embedding, name = 'model_a')
I am merging like this:
mult = Lambda(lambda x: tf.matmul(x[0], x[1], transpose_b=True))([embedding, model_b_.output])
final_model = Model(inputs=[model_b_.input[0],model_b_.input[1],model_a.input], outputs=mult)
Is it right way to matmul two keras model?
I don't know if I am merging the output correctly and the model is correct.
I would greatly appreciate it if anyone kindly gives me some advice on how should I make that matrix trainable and how to merge the model's output correctly then give input.
Thanks in advance!
Trainable weights
Ok. Since you are going to have custom trainable weights, the way to do this in Keras is creating a custom layer.
Now, since your custom layer has no inputs, we will need a hack that will be explained later.
So, this is the layer definition for the custom weights:
from keras.layers import *
from keras.models import Model
from keras.initializers import get as get_init, serialize as serial_init
import keras.backend as K
import tensorflow as tf
class TrainableWeights(Layer):
#you can pass keras initializers when creating this layer
#kwargs will take base layer arguments, such as name and others if you want
def __init__(self, shape, initializer='uniform', **kwargs):
super(TrainableWeights, self).__init__(**kwargs)
self.shape = shape
self.initializer = get_init(initializer)
#build is where you define the weights of the layer
def build(self, input_shape):
self.kernel = self.add_weight(name='kernel',
shape=self.shape,
initializer=self.initializer,
trainable=True)
self.built = True
#call is the layer operation - due to keras limitation, we need an input
#warning, I'm supposing the input is a tensor with value 1 and no shape or shape (1,)
def call(self, x):
return x * self.kernel
#for keras to build the summary properly
def compute_output_shape(self, input_shape):
return self.shape
#only needed for saving/loading this layer in model.save()
def get_config(self):
config = {'shape': self.shape, 'initializer': serial_init(self.initializer)}
base_config = super(TrainableWeights, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
Now, this layer should be used like this:
dummyInputs = Input(tensor=K.constant([1]))
trainableWeights = TrainableWeights(shape)(dummyInputs)
Model A
Having the layer defined, we can start modeling.
First, let's see the model_a side:
#general vars
length = 150
dic_size = 100
embed_size = 12
#for the model_a segment
input_text = Input(shape=(length,))
embedding = Embedding(dic_size, embed_size)(input_text)
#the following two lines are just a resource to reach the desired shape
embedding = LSTM(5)(embedding)
embedding = Dense(50)(embedding)
#creating model_a here is optional, only if you want to use model_a independently later
model_a = Model(input_text, embedding, name = 'model_a')
Model B
For this, we are going to use our TrainableWeights layer.
But first, let's simulate a New_model() as mentioned.
#simulates New_model() #notice the explicit batch_shape for the matrices
newIn1 = Input(batch_shape = (10,10))
newIn2 = Input(batch_shape = (10,30))
newOut1 = Dense(50)(newIn1)
newOut2 = Dense(50)(newIn2)
newOut = Add()([newOut1, newOut2])
new_model = Model([newIn1, newIn2], newOut, name='new_model')
Now the entire branch:
#the matrices
dummyInput = Input(tensor = K.constant([1]))
X_in = TrainableWeights((10,10), initializer='uniform')(dummyInput)
M_in = TrainableWeights((10,30), initializer='uniform')(dummyInput)
#the output of the branch
md_1 = new_model([X_in, M_in])
#optional, only if you want to use model_s independently later
model_s = Model(dummyInput, md_1, name='model_s')
The whole model
Finally, we can join the branches in a whole model.
Notice how I didn't have to use model_a or model_s here. You can do it if you want, but those submodels are not needed, unless you want later to get them individually for other usages. (Even if you created them, you don't need to change the code below to use them, they're already part of the same graph)
#I prefer tf.matmul because it's clear and understandable while K.dot has weird behaviors
mult = Lambda(lambda x: tf.matmul(x[0], x[1], transpose_b=True))([embedding, md_1])
#final model
model = Model([input_text, dummyInput], mult, name='full_model')
Now train it:
model.compile('adam', 'binary_crossentropy', metrics=['accuracy'])
model.fit(np.random.randint(0,dic_size, size=(128,length)),
np.ones((128, 10)))
Since the output is 2D now, there is no problem about the 'categorical_crossentropy', my comment was because of doubts on the output shape.

Attribute error: None type has no attribute summary in keras

I have tried to go in deep with my understanding of word embedding and NLP in keras implementing and copying part of the code creating a Keras model using functional API. When I launch model.summary I receive an Attribute error: None type has no attribute 'summary'.
After many attempts decreasing the numbers of layers, the dimension of word embedding matrix unfortunately nothing changed. I don't know what to do.
def pretrained_embedding_layer(word_to_vec, word_to_index):
vocab_len = len(word_to_index) + 1
emb_dim = word_to_vec["sole"].shape[0]
emb_matrix = np.zeros((vocab_len,emb_dim))
for word, index in word_to_index.items():
emb_matrix[index, :] = word_to_vec[word]
print(emb_matrix.shape)
embedding_layer = Embedding(vocab_len,emb_dim,trainable =False)
embedding_layer.build((None,))
embedding_layer.set_weights([emb_matrix])
return embedding_layer
def Chatbot_V1(input_shape, word_to_vec, word_to_index):
# Define sentence_indices as the input of the graph, it should be of shape input_shape and dtype 'int32' (as it contains indices).
sentence_indices = Input(input_shape, dtype='int32')
# Create the embedding layer pretrained with GloVe Vectors (≈1 line)
embedding_layer = pretrained_embedding_layer(word_to_vec, word_to_index)
embeddings = embedding_layer(sentence_indices)
# Propagate the embeddings through an LSTM layer with 128-dimensional hidden state
X = LSTM(128, return_sequences=True)(embeddings)
# Add dropout with a probability of 0.5
X = Dropout(0.5)(X)
# Propagate X trough another LSTM layer with 128-dimensional hidden state
# Be careful, the returned output should be a single hidden state, not a batch of sequences.
X = LSTM(128, return_sequences=True)(X)
# Add dropout with a probability of 0.5
X = Dropout(0.5)(X)
# Propagate X through a Dense layer with softmax activation to get back a batch of vocab_dim dimensional vectors.
X = Dense(vocab_dim)(X)
# Add a softmax activation
preds = Activation('softmax')(X)
# Create Model instance which converts sentence_indices into X.
model = Model(sentence_indices, preds)
model = Chatbot_V1((maxLen,), word_to_vec, word_to_index)
model.summary()
Launching model.summary:
AttributeError: 'NoneType' object has no attribute 'summary'
Why? What is wrong in layers definition?
The function Chatbot_V1 does not return anything, and in python this is signaled by None if you assign the return value of the function to a variable. So just use the return keyword to return the model at the end of Chatbot_V1

Is it possible to train using same model with two inputs?

Hello I have a some question for keras.
currently i want implement some network
using same cnn model, and use two images as input of cnn model
and use two result of cnn model, provide to Dense model
for example
def cnn_model():
input = Input(shape=(None, None, 3))
x = Conv2D(8, (3, 3), strides=(1, 1))(input)
x = GlobalAvgPool2D()(x)
model = Model(input, x)
return model
def fc_model(cnn1, cnn2):
input_1 = cnn1.output
input_2 = cnn2.output
input = concatenate([input_1, input_2])
x = Dense(1, input_shape=(None, 16))(input)
x = Activation('sigmoid')(x)
model = Model([cnn1.input, cnn2.input], x)
return model
def main():
cnn1 = cnn_model()
cnn2 = cnn_model()
model = fc_model(cnn1, cnn2)
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(x=[image1, image2], y=[1.0, 1.0], batch_size=1, ecpochs=1)
i want to implement model something like this, and train models
but i got error message like below :
'All layer names should be unique'
Actually i want use only one CNN model as feature extractor and finally use two features to predict one float value as 0.0 ~ 1.0
so whole system -->>
using two images and extract features from same CNN model, and features are provided to Dense model to get one floating value
Please, help me implement this system and how to train..
Thank you
See the section of the Keras documentation on shared layers:
https://keras.io/getting-started/functional-api-guide/
A code snippet from the documentation above demonstrating this:
# This layer can take as input a matrix
# and will return a vector of size 64
shared_lstm = LSTM(64)
# When we reuse the same layer instance
# multiple times, the weights of the layer
# are also being reused
# (it is effectively *the same* layer)
encoded_a = shared_lstm(tweet_a)
encoded_b = shared_lstm(tweet_b)
# We can then concatenate the two vectors:
merged_vector = keras.layers.concatenate([encoded_a, encoded_b], axis=-1)
# And add a logistic regression on top
predictions = Dense(1, activation='sigmoid')(merged_vector)
# We define a trainable model linking the
# tweet inputs to the predictions
model = Model(inputs=[tweet_a, tweet_b], outputs=predictions)
model.compile(optimizer='rmsprop',
loss='binary_crossentropy',
metrics=['accuracy'])
model.fit([data_a, data_b], labels, epochs=10)

How to change input shape in Sequential model in Keras

I have a sequential model that I built in Keras.
I try to figure out how to change the shape of the input. In the following example
model = Sequential()
model.add(Dense(32, input_shape=(500,)))
model.add(Dense(10, activation='softmax'))
model.compile(optimizer='rmsprop',
loss='categorical_crossentropy',
metrics=['accuracy'])
let's say that I want to build a new model with different input shape, conceptual this should looks like this:
model1 = model
model1.layers[0] = Dense(32, input_shape=(250,))
is there a way to modify the model input shape?
Somewhat related, so hopefully someone will find this useful: If you have an existing model where the input is a placeholder that looks like (None, None, None, 3) for example, you can load the model, replace the first layer with a concretely shaped input. Transformation of this kind is very useful when for example you want to use your model in iOS CoreML (In my case the input of the model was a MLMultiArray instead of CVPixelBuffer, and the model compilation failed)
from keras.models import load_model
from keras import backend as K
from keras.engine import InputLayer
import coremltools
model = load_model('your_model.h5')
# Create a new input layer to replace the (None,None,None,3) input layer :
input_layer = InputLayer(input_shape=(272, 480, 3), name="input_1")
# Save and convert :
model.layers[0] = input_layer
model.save("reshaped_model.h5")
coreml_model = coremltools.converters.keras.convert('reshaped_model.h5')
coreml_model.save('MyPredictor.mlmodel')
Think about what changing the input shape in that situation would mean.
Your first model
model.add(Dense(32, input_shape=(500,)))
Has a dense layer that really is a 500x32 matrix.
If you changed your input to 250 elements, your layers's matrix and input dimension would mismatch.
If, however, what you were trying to achieve was to reuse your last layer's trained parameters from your first 500 element input model, you could get those weights by get_weights. Then you could rebuild a new model and set values at the new model with set_weights.
model1 = Sequential()
model1.add(Dense(32, input_shape=(250,)))
model1.add(Dense(10, activation='softmax'))
model1.layers[1].set_weights(model1.layers[1].get_weights())
Keep in mind that model1 first layer (aka model1.layers[0]) would still be untrained
Here is another solution without defining each layer of the model from scratch. The key for me was to use "_layers" instead of "layers". The latter only seems to return a copy.
import keras
import numpy as np
def get_model():
old_input_shape = (20, 20, 3)
model = keras.models.Sequential()
model.add(keras.layers.Conv2D(9, (3, 3), padding="same", input_shape=old_input_shape))
model.add(keras.layers.MaxPooling2D((2, 2)))
model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(1, activation="sigmoid"))
model.compile(loss='binary_crossentropy', optimizer=keras.optimizers.Adam(lr=0.0001), metrics=['acc'], )
model.summary()
return model
def change_model(model, new_input_shape=(None, 40, 40, 3)):
# replace input shape of first layer
model._layers[1].batch_input_shape = new_input_shape
# feel free to modify additional parameters of other layers, for example...
model._layers[2].pool_size = (8, 8)
model._layers[2].strides = (8, 8)
# rebuild model architecture by exporting and importing via json
new_model = keras.models.model_from_json(model.to_json())
new_model.summary()
# copy weights from old model to new one
for layer in new_model.layers:
try:
layer.set_weights(model.get_layer(name=layer.name).get_weights())
except:
print("Could not transfer weights for layer {}".format(layer.name))
# test new model on a random input image
X = np.random.rand(10, 40, 40, 3)
y_pred = new_model.predict(X)
print(y_pred)
return new_model
if __name__ == '__main__':
model = get_model()
new_model = change_model(model)

Resources