What to put in load model when there is custom objects as gradient reversal layer in Tensorflow (Domain Adaptation ) - python-3.x

So, here is a sample code for the domain adaptation model, and all I want to do is to save the model and load it,
#tf.custom_gradient
def grad_reverse(x):
y = tf.identity(x)
def custom_grad(dy):
return -dy
return y, custom_grad
class GradReverse(tf.keras.layers.Layer):
def __init__(self):
super().__init__(name="grl")
def call(self, x):
return grad_reverse(x)
def get_adaptable_network(input_shape=x_source_train.shape[1:]):
inputs = Input(shape=input_shape)
x = Conv2D(32, 5, padding='same', activation='relu', name='conv2d_1')(inputs)
x = MaxPool2D(pool_size=2, strides=2, name='max_pooling2d_1')(x)
x = Conv2D(48, 5, padding='same', activation='relu', name='conv2d_2')(x)
x = MaxPool2D(pool_size=2, strides=2, name='max_pooling2d_2')(x)
features = Flatten(name='flatten_1')(x)
x = Dense(100, activation='relu', name='dense_digits_1')(features)
x = Dense(100, activation='relu', name='dense_digits_2')(x)
digits_classifier = Dense(10, activation="softmax", name="digits_classifier")(x)
domain_branch = Dense(100, activation="relu", name="dense_domain")(GradReverse()(features))
domain_classifier = Dense(1, activation="sigmoid", name="domain_classifier")(domain_branch)
return Model(inputs=inputs, outputs=[digits_classifier, domain_classifier])
model = get_adaptable_network()
model.summary()
# download the model in computer for later use
model.save('DA_MNIST_to_MNIST_m.h5')
from tensorflow import keras
model = keras.models.load_model('DA_MNIST_to_MNIST_m.h5',custom_objects={'?':? })
I am not sure what to put on the custom_objects part, since there is a custom gradient reversal layer implemented for domain adaptation in tensorflow. When I do load the model, it gives an error:
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/utils/generic_utils.py in class_and_config_for_serialized_keras_object(config, module_objects, custom_objects, printable_module_name)
294 cls = get_registered_object(class_name, custom_objects, module_objects)
295 if cls is None:
--> 296 raise ValueError('Unknown ' + printable_module_name + ': ' + class_name)
297
298 cls_config = config['config']
ValueError: Unknown layer: GradReverse
I am doing MNIST to MNIST_M domain adaptation, and any help would be useful!

I figured it out, I needed to change the GradReverse layer's init function with **kwargs, This object will then accept any other keyword argument that i haven't included.
class GradReverse(tf.keras.layers.Layer):
def __init__(self, **kwargs):
super().__init__(name="grl")
def call(self, x):
return grad_reverse(x)
In load model, we can use this,
from tensorflow import keras
model = keras.models.load_model('DA_MNIST_to_MNIST_m.h5',custom_objects={'GradReverse':GradReverse})

Related

replace BERT architure with LSTM

I want to train my dataset on Bert and modify the following architecture to LSTM
the full code in here https://www.geeksforgeeks.org/fine-tuning-bert-model-for-sentiment-analysis/
class BERT_architecture(nn.Module):
def __init__(self, bert):
super(BERT_architecture, self).__init__()
self.bert = bert
# dropout layer
self.dropout = nn.Dropout(0.2)
# relu activation function
self.relu = nn.ReLU()
# dense layer 1
self.fc1 = nn.Linear(768,512)
# dense layer 2 (Output layer)
self.fc2 = nn.Linear(512,2)
#softmax activation function
self.softmax = nn.LogSoftmax(dim=1)
#define the forward pass
def forward(self, sent_id, mask):
#pass the inputs to the model
_, cls_hs = self.bert(sent_id, attention_mask=mask, return_dict=False)
x = self.fc1(cls_hs)
x = self.relu(x)
x = self.dropout(x)
# output layer
x = self.fc2(x)
# apply softmax activation
x = self.softmax(x)
return x

How to reshape last layer of pytorch CNN model while doing transfer learning

Actually i am trying to replicate keras structure to pytorch.(new in pytorch).
Here is keras architecture
base_model = InceptionV3(weights='imagenet', include_top=False)
x = base_model.output
x = Dense(512, activation='relu')(x)
predictions = Dense(49*6,activation='sigmoid')(x)
reshape=Reshape((49,6))(predictions)
model = Model(inputs=base_model.input, outputs=reshape)
for layer in base_model.layers:
layer.trainable = False
I want to reshape last layer of my netwrok. I have implemented transfer learning.
model = models.inception_v3(pretrained=True)
for param in model.parameters():
param.requires_grad = False
num_ftrs = model.fc.in_features
I believe if i can attach last layer of resnet with my following architecture, problem can be solved. But i dont know how i can attach them
class CNN(nn.Module):
def __init__(self):
super(CNN, self).__init__()
self.fc1 = nn.Linear(num_ftrs, 512)
self.fc2 = nn.Linear(512, 49*6)
def forward(self, x):
print (x.shape)
x = x.view(-1,num_ftrs)
#print (x.shape)
x = F.relu(self.fc1(x))
x = self.fc2(x)
x=torch.sigmoid(x.view(49,6))
return x
Any idea, how this problem can be resolved

Accessing Variables of Custom Layers in Keras

Let's say we have a custom layer in Keras like this:
import numpy as np
import tensorflow as tf
from keras import backend as K
from keras.layers import Layer
class Custom_Layer(Layer):
def __init__(self,**kwargs):
super(ProbabilisticActivation, self).__init__(**kwargs)
self.params_1 = 0
self.params_2 = 0
def build(self, input_shape):
self.params_1 = K.variable(np.zeros(shape=input_shape[1::]))
self.params_2 = K.variable(np.zeros(shape=input_shape[1::]))
super(Custom_Layer,self).build(input_shape)
def call(self, x, training=None):
# DO SOMETHING
How could I access the value of the parameters (params_1, params_2) in the training process? I tried to get parameters by using model.get_layer('Name of Custom Layer').params_1, but in this case, I can not access the value of the parameters.
Here is the model architecture:
def get_model(img_height, img_width:
input_layer = Input(shape=(img_height, img_width, 3))
x = Conv2D(32, (3, 3), padding='same', name='conv2d_1', activation='relu')(input_layer)
x = Custom_Layer()(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Dropout(0.25)(x)
x = Conv2D(64, kernel_size=(3, 3), name='conv2d_2', activation='relu')(x)
x = Conv2D(64, (3, 3), name='conv2d_4', activation='relu')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Dropout(0.25)(x)
x = Flatten()(x)
x = Dense(512)(x)
x = Activation('relu')(x)
x = Dropout(0.5)(x)
x = Dense(10)(x)
x = Activation('softmax')(x)
model = Model(inputs=[input_layer], outputs=[x])
model.summary()
return model
Note that params_1 and params_2 are TensorFlow tensors. To get their value, you should run them within a tf.Session. You could do something along the lines of:
from keras import backend as K
# ... train model
sess = K.get_session()
params_1 = model.get_layer('Name of Custom Layer').params_1
values_1 = sess.run(params_1)
print(values_1)
NOTE: Not tested.

PyTorch version of as simple Keras LSTM model

Trying to translate a simple LSTM model in Keras to PyTorch code. The Keras model converges after just 200 epochs, while the PyTorch model:
needs many more epochs to reach the same loss level (200 vs. ~8000)
seems to overfit the inputs because the predicted value is not near 100
This is the Keras code:
from numpy import array
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense
X = array([10,20,30,20,30,40,30,40,50,40,50,60,50,60,70,60,70,80]).reshape((6,3,1))
y = array([40,50,60,70,80,90])
model = Sequential()
model.add(LSTM(50, activation='relu', recurrent_activation='sigmoid', input_shape=(3, 1)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')
model.fit(X, y, epochs=200, verbose=1)
x_input = array([70, 80, 90]).reshape((1, 3, 1))
yhat = model.predict(x_input, verbose=0)
print(yhat)
And this is the equivalent PyTorch code:
from numpy import array
import torch
import torch.nn as nn
import torch.nn.functional as F
X = torch.tensor([10,20,30,20,30,40,30,40,50,40,50,60,50,60,70,60,70,80]).float().reshape(6,3,1)
y = torch.tensor([40,50,60,70,80,90]).float().reshape(6,1)
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.lstm = nn.LSTM(input_size=1, hidden_size=50, num_layers=1, batch_first=True)
self.fc = nn.Linear(50, 1)
def forward(self, x):
batches = x.size(0)
h0 = torch.zeros([1, batches, 50])
c0 = torch.zeros([1, batches, 50])
(x, _) = self.lstm(x, (h0, c0))
x = x[:,-1,:] # Keep only the output of the last iteration. Before shape (6,3,50), after shape (6,50)
x = F.relu(x)
x = self.fc(x)
return x
model = Model()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters())
n_epochs = 8000
for epoch in range(n_epochs):
model.train()
optimizer.zero_grad()
y_ = model(X)
loss = criterion(y_, y)
loss.backward()
optimizer.step()
print(f"Epoch {epoch+1}/{n_epochs}, loss = {loss.item()}")
model.eval()
x_input = torch.tensor([70, 80, 90]).float().reshape((1, 3, 1))
yhat = model(x_input)
print(yhat)
The only possible difference is the initial weight and bias values, but I don't think that slightly different weights and biases may account for such a big difference in behavior.
What am I missing in the PyTorch code?
The behaviour difference is because of the activation function in the LSTM API. By changing the activation to tanh, I can reproduce the problem in Keras too.
model.add(LSTM(50, activation='tanh', recurrent_activation='sigmoid', input_shape=(3, 1)))
There is no option to change the activation function to 'relu' in the pytorch LSTM API.
https://pytorch.org/docs/stable/nn.html#lstm
Taking the LSTM implementation from here, https://github.com/huggingface/torchMoji/blob/master/torchmoji/lstm.py
and changing hardsigmoid/tanh to sigmoid/relu, the model converges in pytorch as well.
I think you are initializing h0,c0 every time which is require at initial. So, better use the code below that i have modified. You can go through this link for RNN in pytorch: https://pytorch.org/docs/stable/nn.html?highlight=rnn#torch.nn.RNN
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.rnn = nn.RNN(input_size=1, hidden_size=50, num_layers=1, nonlinearity="relu", batch_first=True)
self.fc = nn.Linear(50, 1)
def forward(self, x):
# batches = x.size(0)
# h0 = torch.zeros([1, batches, 50])
# c0 = torch.zeros([1, batches, 50])
# (x, _) = self.lstm(x, (h0, c0))
(x, _) = self.rnn(x)
x = x[:,-1,:] # Keep only the output of the last iteration. Before shape (6,3,50), after shape (6,50)
x = F.relu(x)
x = self.fc(x)
return x
This gives good result of prediction within 2500 epochs.
I want to know why have you written below line of code and what is the purpose of it. So, that i can try to make it better.
x = x[:,-1,:] # Keep only the output of the last iteration. Before shape (6,3,50), after shape (6,50)

Proper models import from pytorch in keras

How to import this model from pytorch to keras? I write model from post bottom but Keras and pytorch models give different results.
class net_pytorch(torch.nn.Module):
def __init__(self,Nin=6,Nout=1,Nlinear=112*60):
super(vel_regressor, self).__init__()
self.model1 = torch.nn.Sequential(
torch.nn.Conv1d(Nin,60,kernel_size=3,stride=1,groups=Nin),
torch.nn.ReLU(),
torch.nn.Conv1d(60,120,kernel_size=3,stride=1,groups=Nin),
torch.nn.ReLU(),
torch.nn.Conv1d(120,240,kernel_size=3,stride=1),
torch.nn.ReLU(),
torch.nn.MaxPool1d(10, stride=6),
)
self.model2=model2=torch.nn.Sequential(
torch.nn.Linear(Nlinear, 10*40),
torch.nn.ReLU(),
torch.nn.Linear(10*40, 100),
torch.nn.ReLU(),
torch.nn.Linear(100, Nout)
)
def forward(self, x):
x = self.model1(x)
x = x.view(x.size(0), -1)
x = self.model2(x)
return x
How I write it in keras:
def net_keras():
model2 = Sequential()
model2.add(layers.SeparableConv1D(60, 3, strides=1, activation='relu', depth_multiplier = 6 , name = 'model1.0', input_shape=(200, 6)))
model2.add(layers.SeparableConv1D(120, 3, strides=1, activation='relu', depth_multiplier = 6, name = 'model1.2'))
model2.add(layers.SeparableConv1D(240, 3, strides=1, activation='relu', name = 'model1.4'))
model2.add(layers.GlobalAveragePooling1D())
model2.add(layers.Dense(6720, activation='relu', name = 'model2.0'))
model2.add(layers.Dense(400, activation='relu', name = 'model2.2'))
model2.add(layers.Dense(100, activation='relu', name = 'model2.4'))
model2.add(layers.Dense(3))
model2.compile(optimizer=Adam(), loss='mae')
return model2
I try to use nn-transfer to convert but have error:
Layer names in PyTorch state_dict ['model1.0', 'model1.2', 'model1.4', 'model2.0', 'model2.2', 'model2.4']
Layer names in Keras HDF5 ['dense_1', 'global_average_pooling1d_1', 'model1.0', 'model1.2', 'model1.4', 'model2.0', 'model2.2', 'model2.4']
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-49-a3986379ed2b> in <module>()
----> 1 transfer.pytorch_to_keras(pytorch_network, model2)
/content/nn-transfer/nn_transfer/transfer.py in pytorch_to_keras(pytorch_model, keras_model, flip_filters, flip_channels, verbose)
122 for layer in pytorch_layer_names:
123
--> 124 params = util.dig_to_params(model_weights[layer])
125
126 weight_key = layer + '.weight'
/content/nn-transfer/nn_transfer/util.py in dig_to_params(keras_h5_layer)
23 # ['dense_2']['dense_3']['conv2d_7']['dense_4']['conv1']
24 while not _contains_weights(keras_h5_layer):
---> 25 keras_h5_layer = keras_h5_layer[list(keras_h5_layer.keys())[0]]
26
27 return keras_h5_layer
AttributeError: 'Dataset' object has no attribute 'keys'
Also I try to use pytorch2keras but it dont work with groups != 1.
MMdnn also dont work with this model (error in image).
MMdnn error
Here is the error place :
model.add(layers.Conv1D(60, 3, strides=1, activation='relu', input_shape=(None, 200), name='model1.0'))
You should use input_shape=(None,6) unless your input is always BatchSizex200x6.
In addition, there are tools to convert models between different architectures, e.g. https://github.com/Microsoft/MMdnn.

Resources