Where is the window size in Pytorch LSTM model incorporated? - pytorch

I have built a lstm model that takes input data with 3 features and the rolling window size is 18. My model has layers as I have attached in the code below. What I don't understand is how is the rolling window size of 18 incorporated in the model if the window size is never passed as an argument to the model. And if the model takes input as just one row at a time, is it not equivalent to using window size = 1?
class LSTMnetwork(nn.Module):
def __init__(self,input_size=3,hidden_size1=24, hidden_size2=50, hidden_size3=20,output_size=1):
super().__init__()
self.hidden_size1 = hidden_size1
self.hidden_size2 = hidden_size2
self.hidden_size3 = hidden_size3
# Add an LSTM and dropout layer:
self.lstm1 = nn.LSTM(input_size,hidden_size1)
self.dropout1 = nn.Dropout(p=0.2)
# Add second LSTM and dropout layer:
self.lstm2 = nn.LSTM(hidden_size1,hidden_size2)
self.dropout2 = nn.Dropout(p=0.2)
# Add a fully-connected layer:
self.fc1 = nn.Linear(hidden_size2,hidden_size3)
# Add a fully-connected layer:
self.fc2 = nn.Linear(hidden_size3,output_size)
# Initialize h0 and c0:
self.hidden1 = (torch.zeros(1,1,self.hidden_size1),
torch.zeros(1,1,self.hidden_size1))
# Initialize h1 and c1:
self.hidden2 = (torch.zeros(1,1,self.hidden_size2),
torch.zeros(1,1,self.hidden_size2))
def forward(self,seq):
lstm1_out, self.hidden1 = self.lstm1(seq.view(len(seq),1,-1), self.hidden1)
dropout1 = self.dropout1(lstm1_out)
lstm2_out, self.hidden2 = self.lstm2(dropout1.view(len(dropout1),1,-1), self.hidden2)
dropout2 = self.dropout2(lstm2_out)
fc1_out = F.relu(self.fc1(dropout2))
fc2_out = self.fc2(fc1_out)
return fc2_out[-1]

Related

How could we use Bahdanau attention in a stacked LSTM model?

I aim to use attention in a stacked LSTM model, but I don't know how to add AdditiveAttention mechanism of Keras between encoder and decoder layers. Let say, we have an input layer, an encoder, and a decoder, and a dense classification layer, and we aim our decoder to pay attention on all the hidden states of the encoder (h = [h1, ..., hT]) in deriving its outputs. Is there any high-level coding using the Keras whereby I can do? For example,
input_layer = Input(shape=(T, f))
x = input_layer
x = LSTM(num_neurons1, return_sequences=True)(x)
# Adding attention here, but I don't know how?
x = LSTM(num_neurons2)(x)
output_layer = Dense(1, 'sigmoid')(x)
model = Model(input_layer, output_layer)
...
I think this is wrong to use: x = AdditiveAttention(x, x). Am I right?
Maybe it is helpful for your issue ?
This is a classification model with LSTM and attention for classification on
character-level:
first create a custom layer for attention :
class attention(Layer):
def init(self,**kwargs):
super(attention,self).init(**kwargs)
def build(self,input_shape):
self.W=self.add_weight(name='attention_weight', shape=(input_shape[-1],1),
initializer='random_normal', trainable=True)
self.b=self.add_weight(name='attention_bias', shape=(input_shape[1],1),
initializer='zeros', trainable=True)
super(attention, self).build(input_shape)
def call(self,x):
# Alignment scores. Pass them through tanh function
e = K.tanh(K.dot(x,self.W)+self.b)
# Remove dimension of size 1
e = K.squeeze(e, axis=-1)
# Compute the weights
alpha = K.softmax(e)
# Reshape to tensorFlow format
alpha = K.expand_dims(alpha, axis=-1)
# Compute the context vector
context = x * alpha
context = K.sum(context, axis=1)
return context
LEN_CHA = 64 # number of characters
LEN_Input = 110 # depend on the longest sentence, padded with zero
def LSTM_model_attention(Labels=3):
model = Sequential()
model.add(Embedding(LEN_CHA, EMBEDDING_DIM, input_length=LEN_INPUT))
model.add(SpatialDropout1D(0.7))
model.add(Bidirectional(LSTM(256, return_sequences=True)))
model.add(attention())
model.add(Dense(Labels, activation='softmax'))
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['acc'])
return model
LSTM_attention = LSTM_model_attention()
LSTM_attention.summary()

How can I feed the output from last layer of mobilenet to a Unet model

I am trying to build an image segmentation model with a Keras mobilenet model pre-trained on imagenet dataset. How ever to train the model further, I want to add the U-net layers to the existing model and only train the layers of u-net architecture with mobilenet model helping as a backbone.
Problem: The last layer of mobilenet model is of dimensions (7x7x1024), which is a RelU layer, I wish want to re-shape this to (256x256x3) which can be understood by the U-net input layer.
not the last layer, but creating a unet on mobilenet can be done using the below code:
ALPHA = 1 # Width hyper parameter for MobileNet (0.25, 0.5, 0.75, 1.0). Higher width means more accurate but slower
IMAGE_HEIGHT = 224
IMAGE_WIDTH = 224
HEIGHT_CELLS = 28
WIDTH_CELLS = 28
def create_model(trainable=True):
model = MobileNet(input_shape=(IMAGE_HEIGHT, IMAGE_WIDTH, 3), include_top=False, alpha=ALPHA, weights="imagenet")
block0 = model.get_layer("conv_pw_1_relu").output
block = model.get_layer("conv_pw_1_relu").output
block1 = model.get_layer("conv_pw_3_relu").output
block2 = model.get_layer("conv_pw_5_relu").output
block3 = model.get_layer("conv_pw_11_relu").output
block4 = model.get_layer("conv_pw_13_relu").output
x = Concatenate()([UpSampling2D()(block4), block3])
x = Concatenate()([UpSampling2D()(x), block2])
x = Concatenate()([UpSampling2D()(x), block1])
x = Concatenate()([UpSampling2D()(x), block])
# x = Concatenate()([UpSampling2D()(x), block0])
x = UpSampling2D()(x)
x = Conv2D(1, kernel_size=1, activation="sigmoid")(x)
x = Reshape((IMAGE_HEIGHT, IMAGE_HEIGHT))(x)
return Model(inputs=model.input, outputs=x)

Keras, Tensorflow : Merge two different model output into one

I am working on one deep learning model where I am trying to combine two different model's output :
The overall structure is like this :
So the first model takes one matrix, for example [ 10 x 30 ]
#input 1
input_text = layers.Input(shape=(1,), dtype="string")
embedding = ElmoEmbeddingLayer()(input_text)
model_a = Model(inputs = [input_text] , outputs=embedding)
# shape : [10,50]
Now the second model takes two input matrix :
X_in = layers.Input(tensor=K.variable(np.random.uniform(0,9,[10,32])))
M_in = layers.Input(tensor=K.variable(np.random.uniform(1,-1,[10,10]))
md_1 = New_model()([X_in, M_in]) #new_model defined somewhere
model_s = Model(inputs = [X_in, A_in], outputs = md_1)
# shape : [10,50]
I want to make these two matrices trainable like in TensorFlow I was able to do this by :
matrix_a = tf.get_variable(name='matrix_a',
shape=[10,10],
dtype=tf.float32,
initializer=tf.constant_initializer(np.array(matrix_a)),trainable=True)
I am not getting any clue how to make those matrix_a and matrix_b trainable and how to merge the output of both networks then give input.
I went through this question But couldn't find an answer because their problem statement is different from mine.
What I have tried so far is :
#input 1
input_text = layers.Input(shape=(1,), dtype="string")
embedding = ElmoEmbeddingLayer()(input_text)
model_a = Model(inputs = [input_text] , outputs=embedding)
# shape : [10,50]
X_in = layers.Input(tensor=K.variable(np.random.uniform(0,9,[10,10])))
M_in = layers.Input(tensor=K.variable(np.random.uniform(1,-1,[10,100]))
md_1 = New_model()([X_in, M_in]) #new_model defined somewhere
model_s = Model(inputs = [X_in, A_in], outputs = md_1)
# [10,50]
#tranpose second model output
tranpose = Lambda(lambda x: K.transpose(x))
agglayer = tranpose(md_1)
# concat first and second model output
dott = Lambda(lambda x: K.dot(x[0],x[1]))
kmean_layer = dotter([embedding,agglayer])
# input
final_model = Model(inputs=[input_text, X_in, M_in], outputs=kmean_layer,name='Final_output')
final_model.compile(loss = 'categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
final_model.summary()
Overview of the model :
Update:
Model b
X = np.random.uniform(0,9,[10,32])
M = np.random.uniform(1,-1,[10,10])
X_in = layers.Input(tensor=K.variable(X))
M_in = layers.Input(tensor=K.variable(M))
layer_one = Model_b()([M_in, X_in])
dropout2 = Dropout(dropout_rate)(layer_one)
layer_two = Model_b()([layer_one, X_in])
model_b_ = Model([X_in, M_in], layer_two, name='model_b')
model a
length = 150
dic_size = 100
embed_size = 12
input_text = Input(shape=(length,))
embedding = Embedding(dic_size, embed_size)(input_text)
embedding = LSTM(5)(embedding)
embedding = Dense(10)(embedding)
model_a = Model(input_text, embedding, name = 'model_a')
I am merging like this:
mult = Lambda(lambda x: tf.matmul(x[0], x[1], transpose_b=True))([embedding, model_b_.output])
final_model = Model(inputs=[model_b_.input[0],model_b_.input[1],model_a.input], outputs=mult)
Is it right way to matmul two keras model?
I don't know if I am merging the output correctly and the model is correct.
I would greatly appreciate it if anyone kindly gives me some advice on how should I make that matrix trainable and how to merge the model's output correctly then give input.
Thanks in advance!
Trainable weights
Ok. Since you are going to have custom trainable weights, the way to do this in Keras is creating a custom layer.
Now, since your custom layer has no inputs, we will need a hack that will be explained later.
So, this is the layer definition for the custom weights:
from keras.layers import *
from keras.models import Model
from keras.initializers import get as get_init, serialize as serial_init
import keras.backend as K
import tensorflow as tf
class TrainableWeights(Layer):
#you can pass keras initializers when creating this layer
#kwargs will take base layer arguments, such as name and others if you want
def __init__(self, shape, initializer='uniform', **kwargs):
super(TrainableWeights, self).__init__(**kwargs)
self.shape = shape
self.initializer = get_init(initializer)
#build is where you define the weights of the layer
def build(self, input_shape):
self.kernel = self.add_weight(name='kernel',
shape=self.shape,
initializer=self.initializer,
trainable=True)
self.built = True
#call is the layer operation - due to keras limitation, we need an input
#warning, I'm supposing the input is a tensor with value 1 and no shape or shape (1,)
def call(self, x):
return x * self.kernel
#for keras to build the summary properly
def compute_output_shape(self, input_shape):
return self.shape
#only needed for saving/loading this layer in model.save()
def get_config(self):
config = {'shape': self.shape, 'initializer': serial_init(self.initializer)}
base_config = super(TrainableWeights, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
Now, this layer should be used like this:
dummyInputs = Input(tensor=K.constant([1]))
trainableWeights = TrainableWeights(shape)(dummyInputs)
Model A
Having the layer defined, we can start modeling.
First, let's see the model_a side:
#general vars
length = 150
dic_size = 100
embed_size = 12
#for the model_a segment
input_text = Input(shape=(length,))
embedding = Embedding(dic_size, embed_size)(input_text)
#the following two lines are just a resource to reach the desired shape
embedding = LSTM(5)(embedding)
embedding = Dense(50)(embedding)
#creating model_a here is optional, only if you want to use model_a independently later
model_a = Model(input_text, embedding, name = 'model_a')
Model B
For this, we are going to use our TrainableWeights layer.
But first, let's simulate a New_model() as mentioned.
#simulates New_model() #notice the explicit batch_shape for the matrices
newIn1 = Input(batch_shape = (10,10))
newIn2 = Input(batch_shape = (10,30))
newOut1 = Dense(50)(newIn1)
newOut2 = Dense(50)(newIn2)
newOut = Add()([newOut1, newOut2])
new_model = Model([newIn1, newIn2], newOut, name='new_model')
Now the entire branch:
#the matrices
dummyInput = Input(tensor = K.constant([1]))
X_in = TrainableWeights((10,10), initializer='uniform')(dummyInput)
M_in = TrainableWeights((10,30), initializer='uniform')(dummyInput)
#the output of the branch
md_1 = new_model([X_in, M_in])
#optional, only if you want to use model_s independently later
model_s = Model(dummyInput, md_1, name='model_s')
The whole model
Finally, we can join the branches in a whole model.
Notice how I didn't have to use model_a or model_s here. You can do it if you want, but those submodels are not needed, unless you want later to get them individually for other usages. (Even if you created them, you don't need to change the code below to use them, they're already part of the same graph)
#I prefer tf.matmul because it's clear and understandable while K.dot has weird behaviors
mult = Lambda(lambda x: tf.matmul(x[0], x[1], transpose_b=True))([embedding, md_1])
#final model
model = Model([input_text, dummyInput], mult, name='full_model')
Now train it:
model.compile('adam', 'binary_crossentropy', metrics=['accuracy'])
model.fit(np.random.randint(0,dic_size, size=(128,length)),
np.ones((128, 10)))
Since the output is 2D now, there is no problem about the 'categorical_crossentropy', my comment was because of doubts on the output shape.

How to add computation (eg log and sqrt root) to keras vgg16 before softmax layer

I am implementing the improved bilinear pooling (http://vis-www.cs.umass.edu/bcnn/docs/improved_bcnn.pdf) and I want to add computation (eg log and square root) before softmax layer to my model, which is fine tuned from keras vgg16 .
How can I do that?
vgg_16 = keras.applications.vgg16.VGG16(weights='imagenet',include_top=False, input_shape=(224, 224, 3))
vgg_16.layers.pop()
My_Model = Sequential()
for layer in vgg_16.layers:
My_Model.add(layer)
for layer in My_Model.layers:
layer.trainable = False
# I want to add this function on top of my model then feed the result to a softmax layer
#
def BILINEAR_POOLING(bottom1, bottom2, sum_pool=True):
assert(np.all(bottom1.shape[:3] == bottom2.shape[:3]))
batch_size, height, width = bottom1.shape[:3]
output_dim = bottom1.shape[-1] * bottom2.shape[-1]
bottom1_flat = bottom1.reshape((-1, bottom1.shape[-1]))
bottom2_flat = bottom2.reshape((-1, bottom2.shape[-1]))
output = np.empty((batch_size*height*width, output_dim), np.float32)
for n in range(len(output)):
output[n, ...] = np.outer(bottom1_flat[n], bottom2_flat[n]).reshape(-1)
output = output.reshape((batch_size, height, width, output_dim))
if sum_pool:
output = np.sum(output, axis=(1, 2))
return output
The solution was only adding a keras lambda layer
as follow
My_Model.add(Lambda(BILINEAR_POOLING,output_shape=[512,512]))

Adding new nodes to output layer in Keras

I want to add new nodes to the output layer to train it later, i'm doing:
def add_outputs(self, n_new_outputs):
out = self.model.get_layer('fc8').output
last_layer = self.model.get_layer('fc7').output
out2 = Dense(n_new_outputs, activation='softmax', name='fc9')(last_layer)
output = merge([out, out2], mode='concat')
self.model = Model(input=self.model.input, output=output)
where 'fc7'is the fully connected layer before the output layer 'fc8'. I exect to have just the last layer with out = self.model.get_layer('fc8').output but the output is all the model.
Is there any way to take just a layer from a network?
Maybe theres other easier way to do it....
Thanks!!!!
Finally i find a solution:
1) get the weights from the last layer
2) add zeros to the weights and random initialize it's connections
3) pop the output layer and create a new one
4) set new weights to the new layer
here the code:
def add_outputs(self, n_new_outputs):
#Increment the number of outputs
self.n_outputs += n_new_outputs
weights = self.model.get_layer('fc8').get_weights()
#Adding new weights, weights will be 0 and the connections random
shape = weights[0].shape[0]
weights[1] = np.concatenate((weights[1], np.zeros(n_new_outputs)), axis=0)
weights[0] = np.concatenate((weights[0], -0.0001 * np.random.random_sample((shape, n_new_outputs)) + 0.0001), axis=1)
#Deleting the old output layer
self.model.layers.pop()
last_layer = self.model.get_layer('batchnormalization_1').output
#New output layer
out = Dense(self.n_outputs, activation='softmax', name='fc8')(last_layer)
self.model = Model(input=self.model.input, output=out)
#set weights to the layer
self.model.get_layer('fc8').set_weights(weights)
print(weights[0])

Resources