Normalizing multivariate time-series data with different sequence length - scikit-learn

I have a multivariate time-series dataset with different sequence lengths. I filled the missing values in the sequences with zeros. I am trying to use a recurrent neural network model for forcasting with Time Series. I noticed my results of the model degrade when the range of the data is outside -1 and 1. I wrote the following normalization class using MinMaxScaler. However, I don't know how to exclude the missing values in the sequences during computation of MinMaxScaler. Here is my code
from sklearn.preprocessing import MinMaxScaler
from collections import OrderedDict
import numpy as np
import torch
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
def get_mask_from_sequence_lengths(
sequence_lengths: torch.Tensor, max_length: int
) -> torch.BoolTensor:
# (batch_size, max_length)
ones = sequence_lengths.new_ones(sequence_lengths.size(0), max_length)
range_tensor = ones.cumsum(dim=1)
return sequence_lengths.unsqueeze(1) >= range_tensor
class Normalizer1D(nn.Module):
# Data size of (batch_size, seq_len, input_size)
def __init__(self, input_dim, inputs ):
super(Normalizer1D, self).__init__()
self.input_dim = input_dim
self.to(device)
self._norm = self.build_normalizers(inputs)
max_len=inputs.shape[-1]
data = torch.from_numpy(inputs)
length = torch.LongTensor([torch.max((data[i,0,:]!=0).nonzero()).item()+1 for i in range(data.shape[0])])
mask = get_mask_from_sequence_lengths( length, max_len)
def build_normalizers(self, x):
normalizers = OrderedDict()
for i in range(self.input_dim):
if np.min(x[:,i,:])<0:
scaler = MinMaxScaler(feature_range=(-1, 1))
else:
scaler = MinMaxScaler(feature_range=(0, 1))
scaler = scaler.fit(x[:,i,:])
normalizers[str(i)] = scaler
return normalizers
def normalize(self, x):
#(B, D, T)
d = x.cpu().detach().numpy()
n_x=[]
for i in range(x.shape[1]):
n_x.append(self._norm[str(i)].fit_transform(d[:,i,:]))
x =np.stack(n_x, axis=1)
return torch.from_numpy(x).to(device)
def unnormalize(self, x):
#(T, B, D)==>(B, T, D)
d = x.cpu().detach().numpy()
n_x=[]
for i in range(x.shape[1]):
n_x.append(self._norm[str(i)].inverse_transform(d[:,i,:]))
x =np.stack(n_x, axis=1)
return torch.from_numpy(x).to(device)
#property
def min_(self):
#(T, B, D)
min_ = []
for i in range(len(self._norm)):
min_.append(self._norm[str(i)].min_)
return torch.from_numpy(np.stack(min_, axis=1))
#property
def scale_(self):
#(T, B, D)
scale_ = []
for i in range(len(self._norm)):
scale_.append(self._norm[str(i)].scale_)
return torch.from_numpy(np.stack(scale_, axis=1))
def unnormalize_mean(self, x_mu):
Xscale = self.scale_()
Xmin = self.min_()
normX = x_mu.mul_(Xscale)
return normX.add_(Xmin)
def unnormalize_sigma(self, x_sigma):
Xscale =self.scale_()
return x_sigma.mul_(Xscale)
# compute the normalizers
def compute_normalizer(loader_train):
##batch_size, input_dim, seq_len
for i, (u, y) in enumerate(loader_train):
if i ==0:
#input u torch.Size([B, D, T])
inputs = u
outputs = y
else:
inputs = torch.cat([inputs,u], dim=0)
outputs = torch.cat([outputs,y], dim=0)
inputs = inputs.cpu().detach().numpy()
outputs = outputs.cpu().detach().numpy()
# initialization
u_normalizer = Normalizer1D(inputs.shape[1], inputs)
y_normalizer = Normalizer1D(outputs.shape[1], outputs)
return u_normalizer, y_normalizer
I will appreciate if someone could suggest a way to exclude the missing values from the normalization process.

Related

PyTorch Dataloader bucket by tensor length

I've been trying to create a custom Dataloader that can serve batches of data that are all same-sized to feed into a Conv2d layer for classification purposes.
Here's some test data
X is a NUMBER OF POINTS x CHOICES x NUM_FEATURES, while y is the label (that can be any integer CHOICES-1)
I'm having trouble writing the Sampler and Dataloader.
import random
import torch
from collections import defaultdict
from sklearn.utils import shuffle
from torch.utils.data import Dataset, DataLoader
from typing import Sequence, Iterator
import numpy as np
sample_probs = np.array([2.04302017e-03, 6.84249612e-03, 3.18776004e-02, 6.69332322e-01,
1.79056125, 1.63388916, 1.31819391, 1.43798623,
2.44057406, 5.51664089e-01, 9.66624185e-02, 1.67495225e-02,
3.59960696e-03, 2.43216687e-05])
X = []
y = []
train_datasets = []
i_dict = {0: 19,
1: 63,
2: 30,
3: 6192,
4: 16564,
5: 15115,
6: 12195,
7: 13303,
8: 22578,
9: 5103,
10: 894,
11: 155,
12: 33,
13: 2}
for i in range(2,16):
temp_x = []
temp_y = []
for j in range(i_dict[i-2]):
temp_x.append(torch.rand(i, 4, 1))
temp_y.append(torch.tensor(random.randint(0,i-1)))
X = torch.stack(temp_x)
y = torch.stack(temp_y)
train_datasets.append((X.clone(),y.clone()))
class WeightedBucketSampler(torch.utils.data.Sampler):
def __init__(self, data, weights: Sequence[float], num_samples: int,
replacement: bool = True, generator=None, shuffle=True, drop_last=False):
super().__init__(data)
self.shuffle = shuffle
self.drop_last = drop_last
self.weights = torch.as_tensor(weights, dtype=torch.double)
self.num_samples = num_samples
self.replacement = replacement
self.generator = generator
self.buckets = defaultdict(list)
'''data is a CustomDataset containing a tensor of COUNT x NUM_ROUTES x FEATURES x 1 and a tensor with the corresponding labels'''
counter = 0
for i in range(len(data)):
self.buckets[i+2] += [data[i][0],data[i][1]]
counter += len(data[i][0])
self.length = counter
def __iter__(self) -> Iterator[int]:
# Choose a bucket depending on the weighted sample
rand_bucket = torch.multinomial(self.weights, self.num_samples, self.replacement, generator=self.generator).tolist()[0]
shifter = sum([len(self.buckets[i+2][0]) for i in range(rand_bucket)])
# Generate random indices from the bucket
rand_tensor = torch.randperm(len(self.buckets[rand_bucket+2][0]), generator=self.generator)
yield from torch.add(rand_tensor, shifter).tolist()
def __len__(self):
return self.length
class CustomDataset(Dataset):
def __init__(self, data):
self.routes = dict()
self.choice = dict()
counter = 0
for i in range(len(data)):
for j in range(len(data[i][0])):
self.routes[counter] = data[i][0][j]
self.choice[counter] = data[i][1][j]
counter += 1
def __len__(self):
return len(self.choice)
def __getitem__(self, idx):
choice = self.choice[idx]
routes = self.routes[idx]
return routes, choice
train_datasets_ds = CustomDataset(train_datasets)
bucket_sampler = WeightedBucketSampler(train_datasets, sample_probs,len(sample_probs), shuffle=True, drop_last=False)
loader = DataLoader(train_datasets_ds, sampler=bucket_sampler, batch_size=32, pin_memory=True)
for X,y in loader:
print(X.size(),y.size())
This code is a combination of WeightedRandomSampler and Bucket sampling code
I'm essentially sampling via the sample weights of each classification to choose a bucket, and from that bucket choose randomly to form a batch up to batch_size.
However, when going through loader, I get the output:
...
torch.Size([32, 10, 4, 1]) torch.Size([32])
torch.Size([32, 10, 4, 1]) torch.Size([32])
torch.Size([32, 10, 4, 1]) torch.Size([32])
torch.Size([18, 10, 4, 1]) torch.Size([18])
The sum of all these batches add up to the elements in bucket 10. So it's right, but it's not jumping to another bucket. Rerunning the code
for X,y in loader:
print(X.size(),y.size())
will produce another bucket's batches.
I'm still learning PyTorch, so some of the code might be inefficient. Would love some advice as well!
Thanks to some help on the unofficial PyTorch Discord channel (sudomaze), I've fixed my problem. There's a need to iterate through all the data in the sampler.
The __len__ function in the sampler also needed fixing.
class WeightedBucketSampler(Sampler[List[int]]):
def __init__(self, data, weights: Sequence[float], num_samples: int,
replacement: bool = True, generator=None, shuffle=True, batch_size=32, drop_last=False):
super().__init__(data)
self.shuffle = shuffle
self.drop_last = drop_last
self.weights = torch.as_tensor(weights, dtype=torch.double)
self.num_samples = num_samples
self.replacement = replacement
self.generator = generator
self.batch_size = batch_size
self.buckets = defaultdict(list)
'''data is a CustomDataset containing a tensor of COUNT x NUM_ROUTES x FEATURES x 1 and a tensor with the corresponding labels'''
counter = 0
for i in range(len(data)):
self.buckets[i+2] += [data[i][0],data[i][1]]
counter += len(data[i][0])
self.length = counter
def __iter__(self) -> Iterator[int]:
# Choose a bucket depending on the weighted sample
rand_bucket = torch.multinomial(self.weights, self.num_samples, self.replacement, generator=self.generator)
batch = [0] * self.batch_size
idx_in_batch = 0
for bucket_idx in rand_bucket.tolist():
bucketsample_count = 0
shifter = sum([len(self.buckets[i+2][0]) for i in range(bucket_idx)])
# Generate random indices from the bucket and shift them
rand_tensor = torch.randperm(len(self.buckets[bucket_idx+2][0]), generator=self.generator)
# print(len(self.buckets[bucket_idx+2][0]), len(rand_tensor.tolist()))
for idx in rand_tensor.tolist():
batch[idx_in_batch] = idx+shifter
idx_in_batch += 1
if idx_in_batch == self.batch_size:
bucketsample_count += self.batch_size
yield batch
idx_in_batch = 0
batch = [0] * self.batch_size
if idx_in_batch > 0:
bucketsample_count += idx_in_batch
yield batch[:idx_in_batch]
# The last remaining tensors are added into one batch. Terminate batch and move to next bucket
idx_in_batch = 0
batch = [0] * self.batch_size
continue
def __len__(self):
return (self.length + (self.batch_size - 1)) // self.batch_size
class CustomDataset(Dataset):
def __init__(self, data):
self.routes = dict()
self.choice = dict()
counter = 0
for i in range(len(data)):
for j in range(len(data[i][0])):
self.routes[counter] = data[i][0][j]
self.choice[counter] = data[i][1][j]
counter += 1
def __len__(self):
return len(self.choice)
def __getitem__(self, idx):
choice = self.choice[idx]
routes = self.routes[idx]
return routes, choice
w = np.array([len(i[0]) for i in train_datasets])
sample_probs = 1/sample_probs*w
train_datasets_ds = CustomDataset(train_datasets)
bucket_sampler = WeightedBucketSampler(train_datasets, sample_probs,len(sample_probs), shuffle=True, batch_size=batch_size, drop_last=False)
train_loader = DataLoader(train_datasets_ds, batch_sampler=bucket_sampler)

tensorflow.python.framework.errors_impl.InvalidArgumentError: Incompatible shapes: [100,200] vs. [100,10,200]

The shape of the tensor input to my model is(None, 10, 256),after processing by the attention layer, the shape becomes(None, 256),How should I modify layercompute_output_shape(self, input_shape) so that the shape of the model does not change?
attention layer
class Attention_layer(Layer):
def __init__(self,
W_regularizer=None, b_regularizer=None,
W_constraint=None, b_constraint=None,
bias=True, **kwargs):
self.supports_masking = True
self.init = initializers.get('glorot_uniform')
self.W_regularizer = regularizers.get(W_regularizer)
self.b_regularizer = regularizers.get(b_regularizer)
self.W_constraint = constraints.get(W_constraint)
self.b_constraint = constraints.get(b_constraint)
self.bias = bias
super(Attention_layer, self).__init__(**kwargs)
def build(self, input_shape):
assert len(input_shape) == 3
self.W = self.add_weight(name='att_weight',shape=(input_shape[-1], input_shape[-1],),
initializer=self.init,
regularizer=self.W_regularizer,
constraint=self.W_constraint
)
if self.bias:
self.b = self.add_weight((input_shape[-1],),
initializer='zero',
name='{}_b'.format(self.name),
regularizer=self.b_regularizer,
constraint=self.b_constraint)
super(Attention_layer, self).build(input_shape)
def compute_mask(self, input, input_mask=None):#build(input_shape):
# do not pass the mask to the next layers
return None
def call(self, x, mask=None):#call(x):
uit = K.dot(x, self.W)
if self.bias:
uit += self.b
uit = K.tanh(uit)
a = K.exp(uit)
# apply mask after the exp. will be re-normalized next
if mask is not None:
# Cast the mask to floatX to avoid float64 upcasting in theano
a *= K.cast(mask, K.floatx())
# in some cases especially in the early stages of training the sum may be almost zero
# and this results in NaN's. A workaround is to add a very small positive number to the sum.
# a /= K.cast(K.sum(a, axis=1, keepdims=True), K.floatx())
a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())
# a = K.expand_dims(a)
weighted_input = x * a
print(weighted_input)
return K.sum(weighted_input, axis=1)#output.shape = (batch_size, embedding_size)
def compute_output_shape(self, input_shape):
return input_shape[0], input_shape[-1]

PyTorch: GRU, one-to-many / many-to-one

I would like to implement a GRU able to encode a sequence of vectors to one vector (many-to-one), and then another GRU able to decode a vector to a sequence of vector (one-to-many). The size of the vectors wouldn't be changed. I would like to have an opinion about what I implemented.
Here is the code:
class AEGRU(nn.Module):
def __init__(self, opt):
super(AEGRU, self).__init__()
self.length = 256
self.latent_space = 256
self.num_layers = 1
self.GRU_enc = nn.GRU(input_size=3, hidden_size=self.latent_space, num_layers=self.num_layers, batch_first=True)
self.fc_enc = nn.Linear(self.latent_space, self.latent_space)
self.GRU_dec = nn.GRU(input_size=self.latent_space, hidden_size=3, num_layers=self.num_layers, batch_first=True)
self.fc_dec = nn.Linear(3, 3)
def enc(self, x):
# x has shape: Batch_size x self.length x 3
h0 = torch.zeros(self.num_layers, x.shape[0], self.latent_space).cuda()
out, _ = self.GRU_enc(x, h0)
out = out[:, -1, :]
out = self.fc_enc(out)
return out
def dec(self, x):
# x has shape: Batch_size x self.latent_space
x = x[:, None, :]
h = torch.zeros(self.num_layers, x.shape[0], 3).cuda()
# method 1 ??
'''outputs = torch.zeros(x.shape[0], self.length, 3).cuda()
for i in range(self.length):
out, h = self.GRU_dec(x, h)
outputs[:, i, :] = out[:, 0, :]'''
# method 2 ??
x = x.repeat(1, self.length, 1)
outputs, _ = self.GRU_dec(x, h)
# linear layer
outputs = self.fc_dec(outputs)
return outputs
def forward(self, x):
self.indices = []
latent = self.enc(x)
output = self.dec(latent)
return output
I am not sure whether this is the good way to do a one-to-many GRU. Could I have some opinions about this?
Thanks for reading!

Implementing one to many LSTM/RNN, PyTorch

I have a matrix sized m x n, and want to predict by 1 x n vector (x at the picture with the network structure) the whole next (m-1) x n matrix (y^{i} at the picture), using RNN or LSTM, I don't understand how to implement feeding each
1 x n vector to the next hidden state and get all the
(m-1) x n vectors simultaneously and how to compute error over all y^{i}
I have this vanilla RNN-model and don't know how to modify it
class RNNModel(nn.Module):
def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
super(RNNModel, self).__init__()
self.hidden_dim = hidden_dim
self.layer_dim = layer_dim
# (batch_dim, seq_dim, feature_dim)
self.RNN = nn.RNN(input_dim, hidden_dim, layer_dim, batch_first=True, nonlinearity='tanh')
self.fc = nn.Linear(hidden_dim, output_dim)
def forward(self, x):
# Initialize hidden state with zeros
h0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).requires_grad_()
out, h_t = self.RNN(x, h0)
#out = self.fc(h_t[:, -1, :])
out = self.fc(out[:, -1, :])
return out
Stainley, try this:
you initiate hidden state only if no other hidden state is passed. then you return the hidden state and pass it to forward() at the next iteration.
def forward(self, x, h=None):
if h is None: # if no hidden state is passed
h = torch.zeros( # Initialize hidden state with zeros
self.layer_dim, x.size(0),
self.hidden_dim).requires_grad_()
out, h_t = self.RNN(x, h)
out = self.fc(out[:, -1, :])
return out, h_t
in training code you run the cycle like this like this:
x = seed
h = None
for i in range (...)
optimizer.zero_grad()
...
x, h = model.forward (x, h)
...
loss = ...
loss.backward()
optimizer.step()

TensorFlow, losses after training the model are different than losses printed during the last Epoch of Stochastic Gradient Descent.

I'm trying to do binary classification on two spirals. For testing, I am feeding my neural network the exact spiral data with no noise, and the model seems to work as the losses near 0 during SGD. However, after using my model to infer the exact same data points after SGD has completed, I get completely different losses than what was printed during the last epoch of SGD.
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
np.set_printoptions(threshold=np.nan)
# get the spiral points
t_p = np.linspace(0, 4, 1000)
x1_p = t_p * np.cos(t_p*2*np.pi)
y1_p = t_p * np.sin(t_p*2*np.pi)
x2_p = t_p * np.cos(t_p*2*np.pi + np.pi)
y2_p = t_p * np.sin(t_p*2*np.pi + np.pi)
plt.plot(x1_p, y1_p, x2_p, y2_p)
# generate data points
x1_dat = x1_p
y1_dat = y1_p
x2_dat = x2_p
y2_dat = y2_p
def model_variable(shape, name, initializer):
variable = tf.get_variable(name=name,
dtype=tf.float32,
shape=shape,
initializer=initializer
)
tf.add_to_collection('model_variables', variable)
return variable
class Model():
#layer specifications includes bias nodes
def __init__(self, sess, data, nEpochs, learning_rate, layer_specifications):
self.sess = sess
self.data = data
self.nEpochs = nEpochs
self.learning_rate = learning_rate
if layer_specifications[0] != 2 or layer_specifications[-1] != 1:
raise ValueError('First layer only two nodes, last layer only 1 node')
else:
self.layer_specifications = layer_specifications
self.build_model()
def build_model(self):
# x is the two nodes that will be layer one, will input an x, y coordinate
# and need to classify which spiral is it on, the non phase shifted or the phase
# shifted one.
# y is the output of the model
self.x = tf.placeholder(tf.float32, shape=[2, 1])
self.y = tf.placeholder(tf.float32, shape=[])
self.thetas = []
self.biases = []
for i in range(1, len(self.layer_specifications)):
self.thetas.append(model_variable([self.layer_specifications[i], self.layer_specifications[i-1]], 'theta'+str(i), tf.random_normal_initializer(stddev=0.1)))
self.biases.append(model_variable([self.layer_specifications[i], 1], 'bias'+str(i), tf.constant_initializer()))
#forward propagation
intermediate = self.x
for i in range(0, len(self.layer_specifications)-1):
if i != (len(self.layer_specifications) - 2):
intermediate = tf.nn.elu(tf.add(tf.matmul(self.thetas[i], intermediate), self.biases[i]))
else:
intermediate = tf.add(tf.matmul(self.thetas[i], intermediate), self.biases[i])
self.yhat = tf.squeeze(intermediate)
self.loss = tf.nn.sigmoid_cross_entropy_with_logits(self.yhat, self.y);
def train_init(self):
model_variables = tf.get_collection('model_variables')
self.optim = (
tf.train.GradientDescentOptimizer(learning_rate=self.learning_rate)
.minimize(self.loss, var_list=model_variables)
)
self.check = tf.add_check_numerics_ops()
self.sess.run(tf.initialize_all_variables())
# here is where x and y combine to get just x in tf with shape [2, 1] and where label becomes y in tf
def train_iter(self, x, y):
loss, _, _ = sess.run([self.loss, self.optim, self.check],
feed_dict = {self.x: x, self.y: y})
print('loss: {0} on:{1}'.format(loss, x))
# here x and y are still x and y coordinates, label is separate
def train(self):
for _ in range(self.nEpochs):
for x, y, label in self.data():
print(label)
self.train_iter([[x], [y]], label)
print("NEW ONE:\n")
# here x and y are still x and y coordinates, label is separate
def infer(self, x, y, label):
return self.sess.run((tf.sigmoid(self.yhat), self.loss), feed_dict={self.x : [[x], [y]], self.y : label})
def data():
#so first spiral is label 0, second is label 1
for _ in range(len(x1_dat)-1, -1, -1):
for dat in range(2):
if dat == 0:
yield x1_dat[_], y1_dat[_], 0
else:
yield x2_dat[_], y2_dat[_], 1
layer_specifications = [2, 100, 100, 100, 1]
sess = tf.Session()
model = Model(sess, data, nEpochs=10, learning_rate=1.1e-2, layer_specifications=layer_specifications)
model.train_init()
model.train()
inferrences_1 = []
inferrences_2 = []
losses = 0
for i in range(len(t_p)-1, -1, -1):
infer, loss = model.infer(x1_p[i], y1_p[i], 0)
if infer >= 0.5:
print('loss: {0} on point {1}, {2}'.format(loss, x1_p[i], y1_p[i]))
losses = losses + 1
inferrences_1.append('r')
else:
inferrences_1.append('g')
for i in range(len(t_p)-1, -1, -1):
infer, loss = model.infer(x2_p[i], y2_p[i], 1)
if infer >= 0.5:
inferrences_2.append('r')
else:
print('loss: {0} on point {1}, {2}'.format(loss, x2_p[i], y2_p[i]))
losses = losses + 1
inferrences_2.append('g')
print('total losses: {}'.format(losses))
plt.scatter(x1_p, y1_p, c=inferrences_1)
plt.scatter(x2_p, y2_p, c=inferrences_2)
plt.show()

Resources