initialising and accessing an array of weights in a custom keras layer - keras

I am writing a custom keras layer for convolution in a cnn architecture in fourier domain:
class Fourier_Conv2D(Layer):
def __init__(self, no_of_kernels, **kwargs):
self.no_of_kernels = no_of_kernels
super(Fourier_Conv2D, self).__init__(**kwargs)
def build(self, input_shape):
self.kernel = self.add_weight(name = 'kernel',
shape = input_shape + (self.no_of_kernels,),
initializer = 'uniform', trainable = True)
super(Fourier_Conv2D, self).build(input_shape)
def call(self, x):
return K.dot(x, self.kernel[0])
In the call function, I need to do pointwise multiplication of the fft of input with fft of each kernel (according to the convolution theorem) and add the products before passing this sum to activation function. But how can I access each weight separately in the call function, as using array index to do so is giving the following attribute error -
AttributeError Traceback (most recent call last)
<ipython-input-71-9617a8e7ab2e> in <module>()
1 x = Fourier_Conv2D(5)
----> 2 x.call((2,2,1))
<ipython-input-70-02ded53b8f6f> in call(self, x)
11
12 def call(self, x):
---> 13 return K.dot(x, self.kernel[0])
14
AttributeError: 'Fourier_Conv2D' object has no attribute 'kernel'
Thanks in advance for any help in solving the error.

You are not using your layer correctly. The line x.call((2,2,1)) makes no sense since you need to pass a tensor to the layer. You should instead do something like this:
x = Input((3,4))
custom_layer = Fourier_Conv2D(10)
output = custom_layer(x)
Moreover, there are some errors in the definition of your layer. The following should work:
class Fourier_Conv2D(Layer):
def __init__(self, no_of_kernels, **kwargs):
self.no_of_kernels = no_of_kernels
super(Fourier_Conv2D, self).__init__(**kwargs)
def build(self, input_shape):
# Note the changes to the shape parameter
self.kernel = self.add_weight(name = 'kernel',
shape = (int(input_shape[-1]), self.no_of_kernels),
initializer = 'uniform', trainable = True)
super(Fourier_Conv2D, self).build(input_shape)
def call(self, x):
return K.dot(x, self.kernel) # kernel[0] --> kernel

Related

TypeError: linear(): argument 'input' (position 1) must be Tensor, not tuple

I am new to transformers, so I tried to implement Bert class, where I receive this error message:
TypeError: linear(): argument 'input' (position 1) must be Tensor, not tuple
I googled it and it is said everywhere, the problem can be solved by adding the parameter 'return_dict=False'. I tried it this way:
class BertFakesClassifier(nn.Module):
def __init__(self):
super(BertFakesClassifier, self).__init__()
self.bert = BertForSequenceClassification.from_pretrained("bert-base-multilingual-uncased", return_dict=False)
self.relu = nn.ReLU() # relu activation function
self.dense1_l = nn.Linear(768,512) # dense layer 1
self.output_l = nn.Linear(512,2) # dense layer 2 (Output layer)
self.softmax = nn.LogSoftmax(dim=1) # softmax activation function
def forward(self, tokens, attention_mask):
outputs = self.bert(tokens, attention_mask)
x = self.dense1_l(outputs)
x = self.relu(x)
x = self.output_l(x) # output layer
logits = self.softmax(x)
return logits
I tried to add it to the forward method:
class BertFakesClassifier(nn.Module):
def __init__(self):
super(BertFakesClassifier, self).__init__()
self.bert = BertForSequenceClassification.from_pretrained("bert-base-multilingual-uncased")
self.relu = nn.ReLU() # relu activation function
self.dense1_l = nn.Linear(768,512) # dense layer 1
self.output_l = nn.Linear(512,2) # dense layer 2 (Output layer)
self.softmax = nn.LogSoftmax(dim=1) # softmax activation function
def forward(self, tokens, attention_mask):
outputs = self.bert(tokens, attention_mask, return_dict=False)
x = self.dense1_l(outputs)
x = self.relu(x)
x = self.output_l(x) # output layer
logits = self.softmax(x)
return logits
I tried:
def forward(self, tokens, attention_mask, return_dict):
outputs = self.bert(tokens, attention_mask, return_dict=False)
I added return_dict=False at self.bert and forward method at the same time. I also tried to use instead:
outputs = self.bert(tokens, attention_mask)
x = outputs['last_hidden_state'][:, 0, :]
But nothing is working. and I either get error message about input being a tuple or
TypeError: linear(): argument 'input' (position 1) must be Tensor, not SequenceClassifierOutput
or another one
typeerror: forward() got an unexpected keyword argument 'return_dict'
I would really appreciate if anyone could help solving this issue.
Thanks in advance!

LSTM Autoencoder set-up for multiple features using Pytorch

I am building an LSTM autoencoder to denoise signals and will take more than 1 feature as it's input.
I have setup the model Encoder part as follows which works for single feature inputs (i.e. sequences with just one feature):
class Encoder(nn.Module):
def __init__(self, seq_len, n_features, num_layers=1, embedding_dim=64):
super(Encoder, self).__init__()
self.seq_len = seq_len
self.n_features = n_features
self.num_layers = num_layers
self.embedding_dim = embedding_dim
self.hidden_dim = 2 * embedding_dim
# input: batch_size, seq_len, features
self.lstm1 = nn.LSTM(
input_size=self.n_features,
hidden_size=self.hidden_dim,
num_layers=self.num_layers,
batch_first=True
) # output: batch size, seq_len, hidden_dim
# input: batch_size, seq_len, hidden_dim
self.lstm2 = nn.LSTM(
input_size=self.hidden_dim,
hidden_size = self.embedding_dim,
num_layers = self.num_layers,
batch_first=True
) # output: batch_size, seq_len, embedding_dim
def forward(self, x):
print(x)
x = x.reshape((1, self.seq_len, self.n_features))
print(x.shape)
x, (_, _) = self.lstm1(x)
print(x.shape)
x, (hidden_n, _) = self.lstm2(x)
print(x.shape, hidden_n.shape)
print(hidden_n)
return hidden_n.reshape((self.n_features, self.embedding_dim))
When I test this setup as follows:
model = Encoder(1024, 1)
model.forward(torch.randn(1024, 1))
with the 1 representing a single feature all is well. However, when I do the following (where 2 represents a sequence of 2 features):
model = Encoder(1024, 2)
model.forward(torch.randn(1024, 2))
I get the following error:
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
Input In [296], in <cell line: 1>()
----> 1 model.forward(torch.randn(1024, 2))
Input In [294], in Encoder.forward(self, x)
36 print(hidden_n)
37 # print(hidden_n.reshape((self.n_features, self.embedding_dim)).shape)
---> 39 return hidden_n.reshape((self.n_features, self.embedding_dim))
RuntimeError: shape '[2, 64]' is invalid for input of size 64
The hidden_n shape comes out as torch.Size([1, 1, 64]). I would like to understand that if we have more than 1 feature, e.g. 2, do we want to get that shape into the format of 1, 2, 64 such that the hidden state has weights for both features?
Can someone please explain why reshape is not liking the way I'm trying to restructure the output of the Encoder and how I should do it such that the model is able to take any feature size into account.
What am I missing/ perhaps misunderstanding here?

Problem running GRU model; missing argument for forward()

I am working on a GRU and when I try to make predictions I get an error indicating that I need to define h for forward(). I have tried several things and ran out of patience after googling and scouring stack overflow for hours.
This is the class:
class GRUNet(nn.Module):
def __init__(self, input_dim, hidden_dim, output_dim, n_layers, drop_prob = 0.2):
super(GRUNet, self).__init__()
self.hidden_dim = hidden_dim
self.n_layers = n_layers
self.gru = nn.GRU(input_dim, hidden_dim, n_layers, batch_first=True, dropout=drop_prob)
self.fc = nn.Linear(hidden_dim, output_dim)
self.relu = nn.ReLU()
def forward(self, x, h):
out, h = self.gru(x,h)
out = self.fc(self.relu(out[:,-1]))
return out, h
def init_hidden(self, batch_size):
weight = next(self.parameters()).data
hidden = weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().to(device)
return hidden
and then this is where I load the model and try to make a prediction. Both of these are in the same script.
inputs = np.load('.//Pred//input_list.npy')
print(inputs.ndim, inputs.shape)
Gmodel = GRUNet(24,256,1,2)
Gmodel = torch.load('.//GRU//GRU_1028_48.pkl')
Gmodel.eval()
pred = Gmodel(inputs)
Without any other arguments to Gmodel I get the following:
Traceback (most recent call last):
File ".\grunet.py", line 136, in <module>
pred = Gmodel(inputs)
File "C:\Users\ryang\Anaconda-3\envs\tf-gpu\lib\site-packages\torch\nn\modules\module.py", line 547, in __call__
result = self.forward(*input, **kwargs)
TypeError: forward() missing 1 required positional argument: 'h'
You need to provide the hidden state as well which is usually initially all zeros or simply None!
That is you either need to explicitly provide one like this :
hidden_state = torch.zeros(size=(num_layers*direction, batch_size, hidden_dim)).to(device)
pred = Gmodel(inputs, hidden_state)
or simply do :
hidden_state = None
pred = Gmodel(inputs, hidden_state)

Pytorch DataParallel doesn't work when the model contain tensor operation

If my model contains only nn.Module layers such as nn.Linear, nn.DataParallel works fine.
x = torch.randn(100,10)
class normal_model(torch.nn.Module):
def __init__(self):
super(normal_model, self).__init__()
self.layer = torch.nn.Linear(10,1)
def forward(self, x):
return self.layer(x)
model = normal_model()
model = nn.DataParallel(model.to('cuda:0'))
model(x)
However, when my model contains a tensor operation such as the following
class custom_model(torch.nn.Module):
def __init__(self):
super(custom_model, self).__init__()
self.layer = torch.nn.Linear(10,5)
self.weight = torch.ones(5,1, device='cuda:0')
def forward(self, x):
return self.layer(x) # self.weight
model = custom_model()
model = torch.nn.DataParallel(model.to('cuda:0'))
model(x)
It gives me the following error
RuntimeError: Caught RuntimeError in replica 1 on device 1. Original
Traceback (most recent call last): File
"/opt/conda/lib/python3.6/site-packages/torch/nn/parallel/parallel_apply.py",
line 60, in _worker
output = module(*input, **kwargs) File "/opt/conda/lib/python3.6/site-packages/torch/nn/modules/module.py",
line 541, in call
result = self.forward(*input, **kwargs) File "", line 7, in forward
return self.layer(x) # self.weight RuntimeError: arguments are located on different GPUs at
/pytorch/aten/src/THC/generic/THCTensorMathBlas.cu:277
How to avoid this error when we have some tensor operations in our model?
I have no experience with DataParallel, but I think it might be because your tensor is not part of the model parameters. You can do this by writing:
torch.nn.Parameter(torch.ones(5,1))
Note that you don't have to move it to the gpu when initializing, because now when you call model.to('cuda:0') this is done automatically.
I can imagine that DataParallel uses the model parameters to move them to the appropriate gpu.
See this answer for more on the difference between a torch tensor and torch.nn.Parameter.
If you don't want the tensor values to be updated by backpropagation during training, you can add requires_grad=False.
Another way that might work is to override the to method, and initialize the tensor in the forward pass:
class custom_model(torch.nn.Module):
def __init__(self):
super(custom_model, self).__init__()
self.layer = torch.nn.Linear(10,5)
def forward(self, x):
return self.layer(x) # torch.ones(5,1, device=self.device)
def to(self, device: str):
new_self = super(custom_model, self).to(device)
new_self.device = device
return new_self
or something like this:
class custom_model(torch.nn.Module):
def __init__(self, device:str):
super(custom_model, self).__init__()
self.layer = torch.nn.Linear(10,5)
self.weight = torch.ones(5,1, device=device)
def forward(self, x):
return self.layer(x) # self.weight
def to(self, device: str):
new_self = super(custom_model, self).to(device)
new_self.device = device
new_self.weight = torch.ones(5,1, device=device)
return new_self
Adding to the answer from #Elgar de Groot since OP also wanted to freeze that layer. To do so you can still use torch.nn.Parameter but then you explicitly set requires_grad to false like this:
self.layer = torch.nn.Parameter(torch.ones(5,1))
self.layer.requires_grad = False

Keras Custom Layer Error (Operation IsVariableInitialized has been marked as not fetchable)

I'm trying to create a custom Keras layer on a toy dataset, and am having issues. At a high level, I want to create an "Input Gate" layer, which would have trainable weights to turn each column of input on or off. So I'm starting with just trying to multiply the inputs by a sigmoid'd version of the learned weights. My code is as follows:
### This is my custom layer
class InputGate(Layer):
def __init__(self, **kwargs):
super(InputGate, self).__init__(**kwargs)
def build(self, input_shape):
self.kernel = self.add_weight(name='input_gate',
shape=input_shape[1:],
initializer='random_uniform',
trainable=True)
super(InputGate, self).build(input_shape) # Be sure to call this somewhere!
def call(self, inputs):
gate_amount = K.sigmoid(self.kernel)
return inputs * gate_amount
def get_config(self):
config = {}
base_config = super(InputGate, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
def compute_output_shape(self, input_shape):
return input_shape
def create_linear_model(x, y, num_noise_vars = 0, reg_strength=0):
new_x = get_x_with_noise(x, num_noise_vars=num_noise_vars)
model = Sequential([
InputGate(input_shape=(1+num_noise_vars,)),
Dense(1, kernel_regularizer=l2(reg_strength))
])
model.compile(optimizer="rmsprop", loss="mse")
model.optimizer.lr = 0.001
return {"model": model, "new_x": new_x}
def get_x_with_noise(x, num_noise_vars):
noise_vars = []
for noise_var in range(num_noise_vars):
noise_vars.append(np.random.random(len(x)))
noise_vars.append(x)
x_with_noise = noise_vars
new_x = np.array(list(zip(*x_with_noise)))
return new_x
x = np.random.random(500)
y = (x * 3) + 10
num_noise_vars = 5
info = create_linear_model(x, y, num_noise_vars=num_noise_vars)
model = info["model"]
new_x = info["new_x"]
results = model.fit(new_x, y, epochs=num_epochs, verbose=0)
And then I get the following error:
ValueError: Operation 'input_gate_14/IsVariableInitialized' has been marked as not fetchable.
This layer is mostly taken from the docs(https://keras.io/layers/writing-your-own-keras-layers/). I'm using Keras 2.0.9, with Tensorflow backend on a CPU (Macbook Air).
This layer seems as simple as can be, and googling the error leads me to discussions that don't seem relevant. Anyone have ideas of what's causing this?
Any help is much appreciated! Thanks!

Resources