Problem running GRU model; missing argument for forward() - pytorch

I am working on a GRU and when I try to make predictions I get an error indicating that I need to define h for forward(). I have tried several things and ran out of patience after googling and scouring stack overflow for hours.
This is the class:
class GRUNet(nn.Module):
def __init__(self, input_dim, hidden_dim, output_dim, n_layers, drop_prob = 0.2):
super(GRUNet, self).__init__()
self.hidden_dim = hidden_dim
self.n_layers = n_layers
self.gru = nn.GRU(input_dim, hidden_dim, n_layers, batch_first=True, dropout=drop_prob)
self.fc = nn.Linear(hidden_dim, output_dim)
self.relu = nn.ReLU()
def forward(self, x, h):
out, h = self.gru(x,h)
out = self.fc(self.relu(out[:,-1]))
return out, h
def init_hidden(self, batch_size):
weight = next(self.parameters()).data
hidden = weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().to(device)
return hidden
and then this is where I load the model and try to make a prediction. Both of these are in the same script.
inputs = np.load('.//Pred//input_list.npy')
print(inputs.ndim, inputs.shape)
Gmodel = GRUNet(24,256,1,2)
Gmodel = torch.load('.//GRU//GRU_1028_48.pkl')
Gmodel.eval()
pred = Gmodel(inputs)
Without any other arguments to Gmodel I get the following:
Traceback (most recent call last):
File ".\grunet.py", line 136, in <module>
pred = Gmodel(inputs)
File "C:\Users\ryang\Anaconda-3\envs\tf-gpu\lib\site-packages\torch\nn\modules\module.py", line 547, in __call__
result = self.forward(*input, **kwargs)
TypeError: forward() missing 1 required positional argument: 'h'

You need to provide the hidden state as well which is usually initially all zeros or simply None!
That is you either need to explicitly provide one like this :
hidden_state = torch.zeros(size=(num_layers*direction, batch_size, hidden_dim)).to(device)
pred = Gmodel(inputs, hidden_state)
or simply do :
hidden_state = None
pred = Gmodel(inputs, hidden_state)

Related

Pytorch DataParallel doesn't work when the model contain tensor operation

If my model contains only nn.Module layers such as nn.Linear, nn.DataParallel works fine.
x = torch.randn(100,10)
class normal_model(torch.nn.Module):
def __init__(self):
super(normal_model, self).__init__()
self.layer = torch.nn.Linear(10,1)
def forward(self, x):
return self.layer(x)
model = normal_model()
model = nn.DataParallel(model.to('cuda:0'))
model(x)
However, when my model contains a tensor operation such as the following
class custom_model(torch.nn.Module):
def __init__(self):
super(custom_model, self).__init__()
self.layer = torch.nn.Linear(10,5)
self.weight = torch.ones(5,1, device='cuda:0')
def forward(self, x):
return self.layer(x) # self.weight
model = custom_model()
model = torch.nn.DataParallel(model.to('cuda:0'))
model(x)
It gives me the following error
RuntimeError: Caught RuntimeError in replica 1 on device 1. Original
Traceback (most recent call last): File
"/opt/conda/lib/python3.6/site-packages/torch/nn/parallel/parallel_apply.py",
line 60, in _worker
output = module(*input, **kwargs) File "/opt/conda/lib/python3.6/site-packages/torch/nn/modules/module.py",
line 541, in call
result = self.forward(*input, **kwargs) File "", line 7, in forward
return self.layer(x) # self.weight RuntimeError: arguments are located on different GPUs at
/pytorch/aten/src/THC/generic/THCTensorMathBlas.cu:277
How to avoid this error when we have some tensor operations in our model?
I have no experience with DataParallel, but I think it might be because your tensor is not part of the model parameters. You can do this by writing:
torch.nn.Parameter(torch.ones(5,1))
Note that you don't have to move it to the gpu when initializing, because now when you call model.to('cuda:0') this is done automatically.
I can imagine that DataParallel uses the model parameters to move them to the appropriate gpu.
See this answer for more on the difference between a torch tensor and torch.nn.Parameter.
If you don't want the tensor values to be updated by backpropagation during training, you can add requires_grad=False.
Another way that might work is to override the to method, and initialize the tensor in the forward pass:
class custom_model(torch.nn.Module):
def __init__(self):
super(custom_model, self).__init__()
self.layer = torch.nn.Linear(10,5)
def forward(self, x):
return self.layer(x) # torch.ones(5,1, device=self.device)
def to(self, device: str):
new_self = super(custom_model, self).to(device)
new_self.device = device
return new_self
or something like this:
class custom_model(torch.nn.Module):
def __init__(self, device:str):
super(custom_model, self).__init__()
self.layer = torch.nn.Linear(10,5)
self.weight = torch.ones(5,1, device=device)
def forward(self, x):
return self.layer(x) # self.weight
def to(self, device: str):
new_self = super(custom_model, self).to(device)
new_self.device = device
new_self.weight = torch.ones(5,1, device=device)
return new_self
Adding to the answer from #Elgar de Groot since OP also wanted to freeze that layer. To do so you can still use torch.nn.Parameter but then you explicitly set requires_grad to false like this:
self.layer = torch.nn.Parameter(torch.ones(5,1))
self.layer.requires_grad = False

AttributeError: dataset object has no attribute 'c' FastAI

I am trying to train a ResNet based UNet for image segmentation. I have the location of images and mask images in a csv file, that's why I have created my own dataloader, which is as follows:
X = list(df['input_img'])
y = list(df['mask_img'])
X_train, X_valid, y_train, y_valid = train_test_split(
X, y, test_size=0.33, random_state=42)
class NumbersDataset():
def __init__(self, inputs, labels):
self.X = inputs
self.y = labels
def __len__(self):
return len(self.X)
def __getitem__(self, idx):
img_train = cv2.imread(self.X[idx])
img_mask = cv2.imread(self.y[idx])
img_train = cv2.resize(img_train, (427,240), interpolation = cv2.INTER_LANCZOS4)
img_mask = cv2.resize(img_mask, (427,240), interpolation = cv2.INTER_LANCZOS4)
return img_train, img_mask
I then call this datagenerator in the __main__ function:
if __name__ == '__main__':
dataset_train = NumbersDataset(X_train, y_train)
dataloader_train = DataLoader(dataset_train, batch_size=4, shuffle=True, num_workers=2)
dataset_valid = NumbersDataset(X_valid, y_valid)
dataloader_valid = DataLoader(dataset_valid, batch_size=4, shuffle=True, num_workers=2)
datas = DataBunch(train_dl = dataloader_train, valid_dl = dataloader_valid)
leaner = unet_learner(data = datas, arch = models.resnet34)
But I end up getting the following error:
Traceback (most recent call last):
File "dataset_test.py", line 70, in <module>
leaner = unet_learner(data = datas, arch = models.resnet34)
File "/home/sarvagya/miniconda3/envs/gr/lib/python3.6/site-packages/fastai/vision/learner.py", line 118, in unet_learner
model = to_device(models.unet.DynamicUnet(body, n_classes=data.c, img_size=size, blur=blur, blur_final=blur_final,
File "/home/sarvagya/miniconda3/envs/gr/lib/python3.6/site-packages/fastai/basic_data.py", line 122, in __getattr__
def __getattr__(self,k:int)->Any: return getattr(self.train_dl, k)
File "/home/sarvagya/miniconda3/envs/gr/lib/python3.6/site-packages/fastai/basic_data.py", line 38, in __getattr__
def __getattr__(self,k:str)->Any: return getattr(self.dl, k)
File "/home/sarvagya/miniconda3/envs/gr/lib/python3.6/site-packages/fastai/basic_data.py", line 20, in DataLoader___getattr__
def DataLoader___getattr__(dl, k:str)->Any: return getattr(dl.dataset, k)
AttributeError: 'NumbersDataset' object has no attribute 'c'
I tried searching and even tried using SegmentationItemList.from_df but nothing helped. What am I getting wrong here?
You should add the attribute c into your NumbersDataset, like this:
def __init__(self, inputs, labels, c):
self.inputs = inputs
self.labels = labels
self.c = c

How to feed a dense layer with special state vectors of RNN layer in keras

I'd like to know if we can feed the dense layer with selected number of lower RNN state vectors in Keras. In other words, i'd like to know if a manual intention-based model can be implemented in Keras.
Illustrative image of the desired model
I've created a DynamicSelection layer :
class DynamicSelection(Layer):
def __init__(self, **kwargs):
super(DynamicSelection, self).__init__(**kwargs)
def call(self, inputs, **kwargs):
results = []
for idx in range(focusedElems):
if inputs[1][idx] == -1:
results = results + inputs[0][idx]
else:
results = results + ([0] * wordRnnUnitNum)
return results
def compute_output_shape(self, input_shape):
return (focusedElems* wordRnnUnitNum, )
and used it to create my model:
tokens = Input((tokenNum,))
embeddingLayer = Embedding(vocabularySize, tokenDim, trainable=trainable)(tokens)
rnnLayer = GRU(unitNum)(embeddingLayer)
selectedTimeSteps = Input((setlectedtimeStepNum,))
dynamicSelector = DynamicSelection()([rnnLayer, inputIdxs])
denseLayer = Dense(dense1UnitNumber,activation='relu')(dynamicSelector)
softmaxLayer = Dense(4, activation='softmax')(denseLayer)
model = Model(inputs=[inputIdxs, inputToken], outputs=softmaxLayer)
but i get the following exception:
dynamicSelector = DynamicSelection()([rnnLayer, inputIdxs])
File "/Users/halsaied/anaconda2/lib/python2.7/site-packages/keras/engine/base_layer.py", line 503, in __call__
arguments=user_kwargs)
File "/Users/halsaied/anaconda2/lib/python2.7/site-packages/keras/engine/base_layer.py", line 571, in _add_inbound_node
output_tensors[i]._keras_shape = output_shapes[i]
AttributeError: 'int' object has no attribute '_keras_shape'

initialising and accessing an array of weights in a custom keras layer

I am writing a custom keras layer for convolution in a cnn architecture in fourier domain:
class Fourier_Conv2D(Layer):
def __init__(self, no_of_kernels, **kwargs):
self.no_of_kernels = no_of_kernels
super(Fourier_Conv2D, self).__init__(**kwargs)
def build(self, input_shape):
self.kernel = self.add_weight(name = 'kernel',
shape = input_shape + (self.no_of_kernels,),
initializer = 'uniform', trainable = True)
super(Fourier_Conv2D, self).build(input_shape)
def call(self, x):
return K.dot(x, self.kernel[0])
In the call function, I need to do pointwise multiplication of the fft of input with fft of each kernel (according to the convolution theorem) and add the products before passing this sum to activation function. But how can I access each weight separately in the call function, as using array index to do so is giving the following attribute error -
AttributeError Traceback (most recent call last)
<ipython-input-71-9617a8e7ab2e> in <module>()
1 x = Fourier_Conv2D(5)
----> 2 x.call((2,2,1))
<ipython-input-70-02ded53b8f6f> in call(self, x)
11
12 def call(self, x):
---> 13 return K.dot(x, self.kernel[0])
14
AttributeError: 'Fourier_Conv2D' object has no attribute 'kernel'
Thanks in advance for any help in solving the error.
You are not using your layer correctly. The line x.call((2,2,1)) makes no sense since you need to pass a tensor to the layer. You should instead do something like this:
x = Input((3,4))
custom_layer = Fourier_Conv2D(10)
output = custom_layer(x)
Moreover, there are some errors in the definition of your layer. The following should work:
class Fourier_Conv2D(Layer):
def __init__(self, no_of_kernels, **kwargs):
self.no_of_kernels = no_of_kernels
super(Fourier_Conv2D, self).__init__(**kwargs)
def build(self, input_shape):
# Note the changes to the shape parameter
self.kernel = self.add_weight(name = 'kernel',
shape = (int(input_shape[-1]), self.no_of_kernels),
initializer = 'uniform', trainable = True)
super(Fourier_Conv2D, self).build(input_shape)
def call(self, x):
return K.dot(x, self.kernel) # kernel[0] --> kernel

Keras Custom Layer Error (Operation IsVariableInitialized has been marked as not fetchable)

I'm trying to create a custom Keras layer on a toy dataset, and am having issues. At a high level, I want to create an "Input Gate" layer, which would have trainable weights to turn each column of input on or off. So I'm starting with just trying to multiply the inputs by a sigmoid'd version of the learned weights. My code is as follows:
### This is my custom layer
class InputGate(Layer):
def __init__(self, **kwargs):
super(InputGate, self).__init__(**kwargs)
def build(self, input_shape):
self.kernel = self.add_weight(name='input_gate',
shape=input_shape[1:],
initializer='random_uniform',
trainable=True)
super(InputGate, self).build(input_shape) # Be sure to call this somewhere!
def call(self, inputs):
gate_amount = K.sigmoid(self.kernel)
return inputs * gate_amount
def get_config(self):
config = {}
base_config = super(InputGate, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
def compute_output_shape(self, input_shape):
return input_shape
def create_linear_model(x, y, num_noise_vars = 0, reg_strength=0):
new_x = get_x_with_noise(x, num_noise_vars=num_noise_vars)
model = Sequential([
InputGate(input_shape=(1+num_noise_vars,)),
Dense(1, kernel_regularizer=l2(reg_strength))
])
model.compile(optimizer="rmsprop", loss="mse")
model.optimizer.lr = 0.001
return {"model": model, "new_x": new_x}
def get_x_with_noise(x, num_noise_vars):
noise_vars = []
for noise_var in range(num_noise_vars):
noise_vars.append(np.random.random(len(x)))
noise_vars.append(x)
x_with_noise = noise_vars
new_x = np.array(list(zip(*x_with_noise)))
return new_x
x = np.random.random(500)
y = (x * 3) + 10
num_noise_vars = 5
info = create_linear_model(x, y, num_noise_vars=num_noise_vars)
model = info["model"]
new_x = info["new_x"]
results = model.fit(new_x, y, epochs=num_epochs, verbose=0)
And then I get the following error:
ValueError: Operation 'input_gate_14/IsVariableInitialized' has been marked as not fetchable.
This layer is mostly taken from the docs(https://keras.io/layers/writing-your-own-keras-layers/). I'm using Keras 2.0.9, with Tensorflow backend on a CPU (Macbook Air).
This layer seems as simple as can be, and googling the error leads me to discussions that don't seem relevant. Anyone have ideas of what's causing this?
Any help is much appreciated! Thanks!

Resources