Many-to-One LSTM Input Shape - pytorch

https://wandb.ai/ayush-thakur/dl-question-bank/reports/One-to-Many-Many-to-One-and-Many-to-Many-LSTM-Examples-in-Keras--VmlldzoyMDIzOTM
"One-to-many sequence problems are sequence problems where the input data has one time-step, and the output contains a vector of multiple values or multiple time-steps."
I am trying to make a One-to-many LSTM based model in pytorch.
It is a binary classification problem there is only 2 classes. However, the labels should be a vector of 2 classes so for example:
LABEL VECTOR [array([0., 1.]), array([0., 1.]), array([0., 1.]), array([0., 1.]), array([0., 1.]), array([0., 1.]), array([0., 1.]), array([0., 1.]), array([0., 1.]), array([0., 1.]), array([1., 0.]), array([1., 0.]), array([1., 0.]), array([1., 0.]), array([1., 0.]), array([0., 1.])]
num_classes = 2
from torch import nn
#define Model
class LSTMClassifier(nn.Module):
def __init__(self, input_size, lstm1_hidden_size,num_layers, num_classes):
super(LSTMClassifier, self).__init__()
#shape = (1,8192,16)
self.lstm1 = nn.LSTM(input_size=input_size, hidden_size=lstm1_hidden_size, num_layers=num_layers, batch_first=True)
self.classifier = nn.Linear(lstm1_hidden_size, num_classes)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
lstm_out, _ = self.lstm1(x) #hidden state & cell state returned
pred = self.classifier(lstm_out)
pred = self.sigmoid(pred)
return pred
This model outputs a target shape of (1,num_segments,2)
The shape of the data is:
(1,num_segments,8192)
The shape of the labels is:
(1,num_segments,16,2)
Again the labels look like the following:
there are always fixed 16 labels, each having binary classification with 2 columns. Each column may be a 0 or 1. Thus 2 classes. So I want the output of the LSTM model to be a sequence of binary classifications.
LABEL VECTOR [array([0., 1.]), array([0., 1.]), array([0., 1.]), array([0., 1.]), array([0., 1.]), array([0., 1.]), array([0., 1.]), array([0., 1.]), array([0., 1.]), array([0., 1.]), array([1., 0.]), array([1., 0.]), array([1., 0.]), array([1., 0.]), array([1., 0.]), array([0., 1.])]
Right now the error I am getting is:
ValueError: Target size (torch.Size([1, 7, 16, 2])) must be the same as input size (torch.Size([1, 7, 2]))
How can I structure this LSTM pytorch model to get an output as a vector of Binary Classification labels?

Related

How to use "LeakyRelu" and Parametric Leaky Relu "PReLU" in Keras Tuner

I am using Keras Tuner and using RandomSearch() to hypertune my regression model. While I can hypertune using "relu" and "selu", I am unable to do the same for Leaky Relu. I understand that the reason "relu" and "selu" string works because, for "relu" and "selu", string aliases are available. String alias is not available for Leaky Relu. I tried passing a callable object of Leaky Relu (see my example below) but it doesn't seem to work. Can you please advise me how to do that? I have the same issue with using Parametric Leaky Relu,
Thank you in advance!
def build_model(hp):
model = Sequential()
model.add(
Dense(
units = 18,
kernel_initializer = 'normal',
activation = 'relu',
input_shape = (18, )
)
)
for i in range(hp.Int( name = "num_layers", min_value = 1, max_value = 5)):
model.add(
Dense(
units = hp.Int(
name = "units_" + str(i),
min_value = 18,
max_value = 180,
step = 18),
kernel_initializer = 'normal',
activation = hp.Choice(
name = 'dense_activation',
values=['relu', 'selu', LeakyReLU(alpha=0.01) ],
default='relu'
)
)
)
model.add( Dense( units = 1 ) )
model.compile(
optimizer = tf.keras.optimizers.Adam(
hp.Choice(
name = "learning_rate", values = [1e-2, 1e-3, 1e-4]
)
),
loss = 'mse'
)
return model
As a work-around, you can add another activation function in the tf.keras.activations.* module by modifying the source file ( which you'll see is activations.py )
Here's the code for tf.keras.activations.relu which you'll see in activations.py,
#keras_export('keras.activations.relu')
#dispatch.add_dispatch_support
def relu(x, alpha=0., max_value=None, threshold=0):
"""Applies the rectified linear unit activation function.
With default values, this returns the standard ReLU activation:
`max(x, 0)`, the element-wise maximum of 0 and the input tensor.
Modifying default parameters allows you to use non-zero thresholds,
change the max value of the activation,
and to use a non-zero multiple of the input for values below the threshold.
For example:
>>> foo = tf.constant([-10, -5, 0.0, 5, 10], dtype = tf.float32)
>>> tf.keras.activations.relu(foo).numpy()
array([ 0., 0., 0., 5., 10.], dtype=float32)
>>> tf.keras.activations.relu(foo, alpha=0.5).numpy()
array([-5. , -2.5, 0. , 5. , 10. ], dtype=float32)
>>> tf.keras.activations.relu(foo, max_value=5).numpy()
array([0., 0., 0., 5., 5.], dtype=float32)
>>> tf.keras.activations.relu(foo, threshold=5).numpy()
array([-0., -0., 0., 0., 10.], dtype=float32)
Arguments:
x: Input `tensor` or `variable`.
alpha: A `float` that governs the slope for values lower than the
threshold.
max_value: A `float` that sets the saturation threshold (the largest value
the function will return).
threshold: A `float` giving the threshold value of the activation function
below which values will be damped or set to zero.
Returns:
A `Tensor` representing the input tensor,
transformed by the relu activation function.
Tensor will be of the same shape and dtype of input `x`.
"""
return K.relu(x, alpha=alpha, max_value=max_value, threshold=threshold)
Copy this code and paste it just below. Change #keras_export('keras.activations.relu') to #keras_export( 'keras.activations.leaky_relu' ) and also change the value of alpha to 0.2, like,
#keras_export('keras.activations.leaky_relu')
#dispatch.add_dispatch_support
def relu(x, alpha=0.2, max_value=None, threshold=0):
"""Applies the rectified linear unit activation function.
With default values, this returns the standard ReLU activation:
`max(x, 0)`, the element-wise maximum of 0 and the input tensor.
Modifying default parameters allows you to use non-zero thresholds,
change the max value of the activation,
and to use a non-zero multiple of the input for values below the threshold.
For example:
>>> foo = tf.constant([-10, -5, 0.0, 5, 10], dtype = tf.float32)
>>> tf.keras.activations.relu(foo).numpy()
array([ 0., 0., 0., 5., 10.], dtype=float32)
>>> tf.keras.activations.relu(foo, alpha=0.5).numpy()
array([-5. , -2.5, 0. , 5. , 10. ], dtype=float32)
>>> tf.keras.activations.relu(foo, max_value=5).numpy()
array([0., 0., 0., 5., 5.], dtype=float32)
>>> tf.keras.activations.relu(foo, threshold=5).numpy()
array([-0., -0., 0., 0., 10.], dtype=float32)
Arguments:
x: Input `tensor` or `variable`.
alpha: A `float` that governs the slope for values lower than the
threshold.
max_value: A `float` that sets the saturation threshold (the largest value
the function will return).
threshold: A `float` giving the threshold value of the activation function
below which values will be damped or set to zero.
Returns:
A `Tensor` representing the input tensor,
transformed by the relu activation function.
Tensor will be of the same shape and dtype of input `x`.
"""
return K.relu(x, alpha=alpha, max_value=max_value, threshold=threshold)
You can use the String alias keras.activations.leaky_relu.
# Custom activation function
from keras.layers import Activation
from keras import backend as K
from keras.utils.generic_utils import get_custom_objects
## Add leaky-relu so we can use it as a string
get_custom_objects().update({'leaky-relu': Activation(LeakyReLU(alpha=0.2))})
## Main activation functions available to use
activation_functions = ['sigmoid', 'relu', 'elu', 'leaky-relu', 'selu', 'gelu',"swish"]

Indexing using pyTorch tensors along one specific dimension with 3 dimensional tensor

I have 2 tensors:
A with shape (batch, sequence, vocab)
and B with shape (batch, sequence).
A = torch.tensor([[[ 1., 2., 3.],
[ 5., 6., 7.]],
[[ 9., 10., 11.],
[13., 14., 15.]]])
B = torch.tensor([[0, 2],
[1, 0]])
I want to get the following:
C = torch.zeros_like(B)
for i in range(B.shape[0]):
for j in range(B.shape[1]):
C[i,j] = A[i,j,B[i,j]]
But in a vectorized way. I tried torch.gather and other stuff but I cannot make it work.
Can anyone please help me?
>>> import torch
>>> A = torch.tensor([[[ 1., 2., 3.],
... [ 5., 6., 7.]],
...
... [[ 9., 10., 11.],
... [13., 14., 15.]]])
>>> B = torch.tensor([[0, 2],
... [1, 0]])
>>> A.shape
torch.Size([2, 2, 3])
>>> B.shape
torch.Size([2, 2])
>>> C = torch.zeros_like(B)
>>> for i in range(B.shape[0]):
... for j in range(B.shape[1]):
... C[i,j] = A[i,j,B[i,j]]
...
>>> C
tensor([[ 1, 7],
[10, 13]])
>>> torch.gather(A, -1, B.unsqueeze(-1))
tensor([[[ 1.],
[ 7.]],
[[10.],
[13.]]])
>>> torch.gather(A, -1, B.unsqueeze(-1)).shape
torch.Size([2, 2, 1])
>>> torch.gather(A, -1, B.unsqueeze(-1)).squeeze(-1)
tensor([[ 1., 7.],
[10., 13.]])
Hi, you can use torch.gather(A, -1, B.unsqueeze(-1)).squeeze(-1).
the first -1 between A and B.unsqueeze(-1) is indicating the dimension along which you want to pick the element.
the second -1 in B.unsqueeze(-1) is to add one dim to B to make the two tensor the same dims otherwise you get RuntimeError: Index tensor must have the same number of dimensions as input tensor.
the last -1 is to reshape the result from torch.Size([2, 2, 1]) to torch.Size([2, 2])

How dropout is implemented in Keras mobilenet v3 imagenet weights during transfer learning when some layers are frozen (made un-trainable)?

I am working on an image classification problem and was using 90% pre-trained Keras mobilenet v3 on ImageNet and remaining 10% layers are made trainable whilst applying dropout of 0.2. I was wondering how this was being handled in the backend.
MobileNetV3Small(input_shape=(IMG_HEIGHT, IMG_WIDTH, DEPTH),
alpha=1.0,
minimalistic=False,
include_top=False,
weights='imagenet',
input_tensor=None,
pooling='max',
dropout_rate=0.2)
If the layer is called with parameter training=False, like when you predict, nothing will happen. Let's start with some input:
import tensorflow as tf
rate = 0.4
dropout = tf.keras.layers.Dropout(rate)
x = tf.cast(tf.reshape(tf.range(1, 10), (3, 3)), tf.float32)
<tf.Tensor: shape=(3, 3), dtype=float32, numpy=
array([[1., 2., 3.],
[4., 5., 6.],
[7., 8., 9.]], dtype=float32)>
Now, let's call the dropout model while training:
dropout(x, training=True)
<tf.Tensor: shape=(3, 3), dtype=float32, numpy=
array([[ 0. , 3.3333333, 0. ],
[ 6.6666665, 8.333333 , 0. ],
[11.666666 , 13.333333 , 15. ]], dtype=float32)>
As you can see, all the remaining values are multiplied by 1/(1-p). Now let's call the network when training=False:
dropout(x, training=False)
<tf.Tensor: shape=(3, 3), dtype=float32, numpy=
array([[1., 2., 3.],
[4., 5., 6.],
[7., 8., 9.]], dtype=float32)>
Nothing happens.

How to add to pytorch tensor at indices?

I have to admit, I'm a bit confused by the scatter* and index* operations - I'm not sure any of them do exactly what I'm looking for, which is very simple:
Given some 2-D tensor
z = tensor([[1., 1., 1., 1.],
[1., 1., 1., 1.],
[1., 1., 1., 1.]])
And a list (or tensor?) of 2-d indexes:
inds = tensor([[0, 0],
[1, 1],
[1, 2]])
I want to add a scalar to z at those indexes (and do it efficiently):
znew = z.something_add(inds, 3)
->
znew = tensor([[4., 1., 1., 1.],
[1., 4., 4., 1.],
[1., 1., 1., 1.]])
If I have to I can make that scalar a tensor of whatever shape (where all elements = 3), but I'd rather not...
You must provide two lists to your indexing. The first having the row positions and the second the column positions. In your example, it would be:
z[[0, 1, 1], [0, 1, 2]] += 3
torch.Tensor indexing follows Numpy. See https://docs.scipy.org/doc/numpy/reference/arrays.indexing.html#integer-array-indexing for more details.
This code achieves what you want:
z_new = z.clone() # copy the tensor
z_new[inds[:, 0], inds[:, 1]] += 3 # modify selected indices of new tensor
In PyTorch, you can index each axis of a tensor with another tensor.

How to feed a model with "a list of outputs"?

Sorry for the title but I could't come up with a better description here.
I am trying to apply batches for training on a model which should have 13 fully connected output layers. Each output layer has only two nodes (but are fully connected as stated).
Building the model's output looks like this:
outputs = list()
for i in range(num_labels):
out_y = Dense(2, activation='softmax', name='out_{:d}'.format(i))(convolution_layer)
outputs.append(out_y)
self.model = Model(input=inputs, output=outputs)
However, I can't manage to feed this model. I've tried to go with a [batch_size, 13, 1, 2] sized output array:
y = np.zeros((batch_size, 13, 1, 2))
But for a batch of size 2 I get:
ValueError: The model expects 13 input arrays, but only received one array. Found: array with shape (2, 13, 1, 2)
I've tried several other things but it's simply not clear to me how the input for the model looks like.
How can I train this model?
I have also tried to pass a list of lists of numpy arrays:
where the first level of the batch represent the sample (here 2) and the second level is the sample with the list of 13 numpy arrays. Yet I am getting:
ValueError: Error when checking model target: you are passing a list as input to your model, but the model expects a list of 13 Numpy arrays instead. The list you passed was: [[array([ 0., 1.]), array([ 0., 1.]), array([ 0., 1.]), array([ 0., 1.]), array([ 0., 1.]), array([ 0., 1.]), array([ 0., 1.]), array([ 0., 1.]), array([ 0., 1.]), array([ 1., 0.]), array([
As suggested, I also tried to return a list() of numpy arrays of size [13,2]:
Where the error becomes:
ValueError: Error when checking model target: the list of Numpy arrays that you are passing to your model is not the size the model expected. Expected to see 13 arrays but instead got the following list of 2 arrays: [array([[ 0., 1.],
[ 0., 1.],
[ 0., 1.],
[ 0., 1.],
[ 0., 1.],
[ 0., 1.],
[ 0., 1.],
[ 0., 1.],
[ 0., 1.],
[ 1., 0.],
[ ...
The code
Below you can find the current code which generates one sample in sample_generator and a full batch in batch_generator (which uses sample_generator).
def batch_generator(w2v, file_path, meta_info, batch_size, sample_generator_fn, embedding_size):
Please note: The code shows now how I generate a list() of [13,2] ndarrays whereas the number of such ndarrays in that list is defined by batch_size.
try:
x = np.zeros((batch_size, meta_info.max_sequence_length, embedding_size, 1))
y = list() #np.zeros((batch_size, 13, 1, 2))
file = open(file_path)
while True:
x[:] = 0.0
#y[:] = 0.0
for batch in range(batch_size):
sentence_info_json = file.readline()
if sentence_info_json == '':
file.seek(0)
sentence_info_json = file.readline()
sample = sample_generator_fn(w2v, sentence_info_json, meta_info)
if not sample:
continue
sentence_embedding = sample[0]
final_length = len(sentence_embedding)
x[batch, :final_length, :, 0] = sentence_embedding
y.append(sample[1])
shuffled = np.asarray(range(batch_size))
np.random.shuffle(shuffled)
x = x[shuffled]
#y = y[shuffled]
y = [y[i] for i in shuffled]
yield x, y
except Exception as e:
print('Error in generator.')
print(e)
raise e
def sample_generator(w2v, sentence_info_json, meta_info):
if not sentence_info_json:
print('???')
sentence_info = json.loads(sentence_info_json)
tokens = [token['word'] for token in sentence_info['corenlp']['tokens']]
sentence = Sentence(tokens=tokens)
sentence_embedding = w2v.get_word_vectors(sentence.tokens.tolist())
sentence_embedding = np.asarray([word_vector for word_vector in sentence_embedding if word_vector is not None])
final_length = len(sentence_embedding)
if final_length == 0:
return None
y = np.zeros((2, len(meta_info.category_dict)))
y[1, :] = 1.
#y_list = []
y_tar = np.zeros((len(meta_info.category_dict), 2))
for i in range(len(meta_info.category_dict)):
y_tar[i][1] = 1.0
# y_list.append(np.asarray([0.0, 1.0]))
for opinion in sentence_info['opinions']:
index = meta_info.category_dict[opinion['category']]
y_tar[index][0] = 1.0
y_tar[index][1] = 0.0
#y_list[index][0] = 1.0
#y_list[index][1] = 0.0
return sentence_embedding, y_tar
As requested, the call to fit_generator()
cnn.model.fit_generator(generator=batch_generator(word2vec,
train_file, train_meta_info,
num_batches, sample_generator,
embedding_size),
samples_per_epoch=2000,
nb_epoch=2,
# validation_data=batch_generator(test_file_path, train_meta_info),
# nb_val_samples=100,
verbose=True)
Your output should be a list as specified in the error. Each element of the list should be a numpy array of size [batch_size, nb_outputs]. So a list of 13 elements of size [batch_size,2] in your case.

Resources