How to load multiple .mat files each with its own sequence in pytorch - pytorch

so i have multiple files in my directory that begin with either P or C. I am trying to train a RNN to predict values of C given a sequence of P
Now each file has a signal. I will break the signal into smaller part each with dimension (sequence length, 1) as there is only feature. Ideally my output dimension should be something like (num_batches, batch_size, seq_length, features). However as i have multiple files, i get something like (num_files,num_batches, batch_size, seq_length, features)
Here's my code
class MyDataset(Dataset):
def __init__(self, PATH, seq_length):
self.seq_length=seq_length
self.c_paths=[]
self.p_paths=[]
for i in os.scandir(PATH):
name=i.name
if name.split('.')[-1] == 'mat':
file_name = name.split('.')[0]
if 'C' in file_name:
self.c_paths.append(i.path)
if 'P' in file_name:
self.p_paths.append(i.path)
def __getitem__(self, index):
p_noise = sio.loadmat(self.p_paths[index])['P_noise']
cm = sio.loadmat(self.c_paths[index])['Cm']
inputs=[]
outputs=[]
start=0
for j in range (len(p_noise) - self.seq_length):
stop = start + self.seq_length
input = p_noise[start:stop]
output = cm[stop-1]
start += 1
inputs.append(input)
outputs.append(output)
inputs = torch.from_numpy(np.array(inputs).reshape((-1, self.seq_length,1)))
outputs= torch.from_numpy(np.array(outputs).reshape((-1, 1)))
self.x=inputs
self.y=outputs
return self.x, self.y
def __len__(self):
return len(self.c_paths)
Heres the output
PATH='Dataset'
dataset=MyDataset(PATH, seq_length=400)
dataloader = DataLoader(dataset=dataset, batch_size=2, shuffle=False)
datatiter=iter(dataloader)
data=datatiter.next()
x,y=data
x.shape, y.shape

Related

Normalizing multivariate time-series data with different sequence length

I have a multivariate time-series dataset with different sequence lengths. I filled the missing values in the sequences with zeros. I am trying to use a recurrent neural network model for forcasting with Time Series. I noticed my results of the model degrade when the range of the data is outside -1 and 1. I wrote the following normalization class using MinMaxScaler. However, I don't know how to exclude the missing values in the sequences during computation of MinMaxScaler. Here is my code
from sklearn.preprocessing import MinMaxScaler
from collections import OrderedDict
import numpy as np
import torch
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
def get_mask_from_sequence_lengths(
sequence_lengths: torch.Tensor, max_length: int
) -> torch.BoolTensor:
# (batch_size, max_length)
ones = sequence_lengths.new_ones(sequence_lengths.size(0), max_length)
range_tensor = ones.cumsum(dim=1)
return sequence_lengths.unsqueeze(1) >= range_tensor
class Normalizer1D(nn.Module):
# Data size of (batch_size, seq_len, input_size)
def __init__(self, input_dim, inputs ):
super(Normalizer1D, self).__init__()
self.input_dim = input_dim
self.to(device)
self._norm = self.build_normalizers(inputs)
max_len=inputs.shape[-1]
data = torch.from_numpy(inputs)
length = torch.LongTensor([torch.max((data[i,0,:]!=0).nonzero()).item()+1 for i in range(data.shape[0])])
mask = get_mask_from_sequence_lengths( length, max_len)
def build_normalizers(self, x):
normalizers = OrderedDict()
for i in range(self.input_dim):
if np.min(x[:,i,:])<0:
scaler = MinMaxScaler(feature_range=(-1, 1))
else:
scaler = MinMaxScaler(feature_range=(0, 1))
scaler = scaler.fit(x[:,i,:])
normalizers[str(i)] = scaler
return normalizers
def normalize(self, x):
#(B, D, T)
d = x.cpu().detach().numpy()
n_x=[]
for i in range(x.shape[1]):
n_x.append(self._norm[str(i)].fit_transform(d[:,i,:]))
x =np.stack(n_x, axis=1)
return torch.from_numpy(x).to(device)
def unnormalize(self, x):
#(T, B, D)==>(B, T, D)
d = x.cpu().detach().numpy()
n_x=[]
for i in range(x.shape[1]):
n_x.append(self._norm[str(i)].inverse_transform(d[:,i,:]))
x =np.stack(n_x, axis=1)
return torch.from_numpy(x).to(device)
#property
def min_(self):
#(T, B, D)
min_ = []
for i in range(len(self._norm)):
min_.append(self._norm[str(i)].min_)
return torch.from_numpy(np.stack(min_, axis=1))
#property
def scale_(self):
#(T, B, D)
scale_ = []
for i in range(len(self._norm)):
scale_.append(self._norm[str(i)].scale_)
return torch.from_numpy(np.stack(scale_, axis=1))
def unnormalize_mean(self, x_mu):
Xscale = self.scale_()
Xmin = self.min_()
normX = x_mu.mul_(Xscale)
return normX.add_(Xmin)
def unnormalize_sigma(self, x_sigma):
Xscale =self.scale_()
return x_sigma.mul_(Xscale)
# compute the normalizers
def compute_normalizer(loader_train):
##batch_size, input_dim, seq_len
for i, (u, y) in enumerate(loader_train):
if i ==0:
#input u torch.Size([B, D, T])
inputs = u
outputs = y
else:
inputs = torch.cat([inputs,u], dim=0)
outputs = torch.cat([outputs,y], dim=0)
inputs = inputs.cpu().detach().numpy()
outputs = outputs.cpu().detach().numpy()
# initialization
u_normalizer = Normalizer1D(inputs.shape[1], inputs)
y_normalizer = Normalizer1D(outputs.shape[1], outputs)
return u_normalizer, y_normalizer
I will appreciate if someone could suggest a way to exclude the missing values from the normalization process.

nn.CrossEntropyLoss attribute Error while trying to develop video to caption generator in pytorch

I am getting
AttributeError: 'CrossEntropyLoss' object has no attribute 'dim'
This is the code block which I run and got the error
hidden_size=256
encoder1=VideoEncoderGRU(hidden_size)
encoder1=accelerator.prepare(encoder1)
decoder1 =DecoderRNN(hidden_size, vcd.lang_object.n_words).to(device)
decoder1=accelerator.prepare(decoder1)
encoder_hidden=encoder1.initHidden()
trainIters_modified(encoder1, decoder1,encoder_hidden)
A more detailed code of each layer.
trainIters_modified - It just a function that trains for a multiple number of times.
def trainIters_modified(encoder,decoder,encoder_hidden,print_every=10,plot_every=10):
start=time.time()
plot_losses=[]
print_loss_total=0 # Reset every print
plot_loss_total=0 #plot every
encoder_optimizer = optim.Adagrad(encoder.parameters())
decoder_optimizer = optim.Adagrad(decoder.parameters())
encoder_optimizer=accelerator.prepare(encoder_optimizer)
decoder_optimizer=accelerator.prepare(decoder_optimizer)
criterion = nn.CrossEntropyLoss()
for ep in range(1):
for vid,lab in train_loader:
n_iters=vid.shape[0]
for iter in range(1, n_iters + 1):
input_tensor = vid[iter-1]
target_tensor = lab[iter-1]
loss,encoder_hidden = train(input_tensor, target_tensor, encoder,decoder, encoder_optimizer, decoder_optimizer, criterion,encoder_hidden)
print_loss_total += loss
plot_loss_total += loss
if iter % print_every == 0:
print_loss_avg = print_loss_total / print_every
print_loss_total = 0
print('%s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters),iter, iter / n_iters * 100, print_loss_avg))
if iter % plot_every == 0:
plot_loss_avg = plot_loss_total / plot_every
plot_losses.append(plot_loss_avg)
plot_loss_total = 0
#break
showPlot(plot_losses)
train function
teacher_forcing_ratio=0.5
def train(input_tensor,target_tensor,encoder,decoder,encoder_optimizer,decoder_optimizer,encoder_hidden,criterion,max_length=MAX_LENGTH):
encoder_hidden=encoder_hidden
encoder_optimizer.zero_grad() #set's encoder gradients to zero
decoder_optimizer.zero_grad() #set's decoder gradients to zero
input_length = input_tensor.size(0) #no_of_wordsin*
target_length = target_tensor.size(0)
#print(f"input.shape:{input_tensor.shape},input_length:{input_length}")
#print(f"target.shape{target_tensor.shape},target_length:{target_length}")
encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)
#print(f"encoder_outputs:{encoder_outputs.shape},max_length:{max_length},encoder.hidden_size={encoder.hidden_size}")
loss = 0
for ei in range(input_length):
in_tensor=torch.permute(input_tensor[ei],(2,1,0))
#print("passing input tensor.shape",in_tensor.shape) #torch.Size([1])
#print("encoder_hidden.shape:",encoder_hidden.shape) #torch.Size([1, 1, 256])
encoder_output, encoder_hidden = encoder(in_tensor, encoder_hidden) #
#print("encoder_output:",encoder_output.shape)
#print("encoder_hidden:",encoder_hidden.shape)
#the outputs are being stored in encoder_outputs matrix
encoder_outputs[ei]=encoder_output[0,0]
#after we have trained encoder for one epoch
decoder_input = torch.tensor([[SOS_token]], device=device) #SOS_token and EOS_token where defined above.
#set the encoder hidden as decoder hidden state
decoder_hidden = encoder_output
use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False
if use_teacher_forcing:
#teacher forcing , feed the target as next input
for di in range(target_length):
decoder_output,decoder_hidden = decoder(decoder_input,decoder_hidden)
loss += criterion(decoder_output, target_tensor[di])
decoder_input = target_tensor[di]
else:
# Without teacher forcing: use its own predictions as the next input
for di in range(target_length):
decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
#print("decoder_output.shape:",decoder_output.shape,"decoder_output:",decoder_output)
topv, topi = decoder_output.topk(1)
#print("tov,topi:",topv,topi)
decoder_input = topi.squeeze().detach() # detach from history as input
#print("decoder_input:",decoder_input.shape,"decoder_input:",decoder_input)
loss += criterion(decoder_output, target_tensor[di])
#print(loss)
if decoder_input.item() == EOS_token:
break
#loss.backward()
accelerator.backward(loss)
encoder_optimizer.step()
decoder_optimizer.step()
return (loss.item() / target_length) ,encoder_hidden
EncoderGRU
class VideoEncoderGRU(nn.Module):
def __init__(self,hidden_size):
super(VideoEncoderGRU,self).__init__()
self.vgg=vgg16(weights=VGG16_Weights.IMAGENET1K_V1)
self.vgg.classifier=nn.Sequential(*list(self.vgg.classifier.children())[0:0])
self.vegru_classifier=nn.Sequential(
nn.Linear(512 * 7 * 7, 1024)
)
self.gru=nn.GRU(1024,hidden_size)
self.hidden_size=hidden_size
def initHidden(self):
return torch.rand(1, 1, self.hidden_size, device=device)
def forward(self,input,hidden):
out=self.vgg(input)
out=torch.reshape(out,(-1,))
out=self.vegru_classifier(out)
out=out.view(1,1,-1)
out,hidden=self.gru(out,hidden)
return out,hidden
DecoderGRU
class DecoderRNN(nn.Module):
def __init__(self, hidden_size, output_size):
super(DecoderRNN, self).__init__()
self.hidden_size = hidden_size
self.embedding = nn.Embedding(output_size, hidden_size)
self.gru = nn.GRU(hidden_size, hidden_size)
self.out = nn.Linear(hidden_size, output_size)
self.softmax = nn.LogSoftmax(dim=1)
def forward(self, input, hidden):
output = self.embedding(input).view(1, 1, -1)
output = F.relu(output)
output, hidden = self.gru(output, hidden)
output = self.softmax(self.out(output[0]))
return output, hidden
def initHidden(self):
return torch.rand(1, 1, self.hidden_size, device=device)
My dataset consists of videos stored in a folder and csv file containing names of video along with thier captions. I think I have loaded my data correctly and convert it into a video of frame size =8 and each caption is changed to a tensor of max_length =8 containing indices of each wod in caption.
You can look at my whole source code in this notebook file
There might be a simple logical error I just knew the theory behind this problem and so I coded it
I tried to implement an LSTM encoder which takes frames of video as input. These frames are first directly passed into vgg16 network which encodes it and encoded representation is passed into each frame and final output of encoder is passed as initial hidden state to decoder along with SOS_token as first input to decoder. I wanted this model to work and predict output

Distributed sequential windowed data in pytorch

At every epoch of my training, I need to split my dataset in n batches of t consecutive samples. For example, if my data is [1,2,3,4,5,6,7,8,9,10], n = 2 and t = 3 then valid batches would be
[1-2-3, 4-5-6] and [7-8-9, 10-1-2]
[2-3-4, 8-9-10] and [5-6-7, 1-2-3]
My old version is the following, but it samples every point in the data, meaning that I would parse the whole dataset t times per epoch.
train_dataset = list(range(n))
train_sampler = None
if distributed:
train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
train_loader = torch.utils.data.DataLoader(
train_dataset, batch_size=bsize, shuffle=(train_sampler is None),
pin_memory=True, sampler=train_sampler)
for epoch in range(epochs):
if distributed:
train_sampler.set_epoch(epoch)
for starting_i in train_loader:
batch = np.array([np.mod(np.arange(i, i + t), n) for i in starting_i])
I have now implemented my own sampling function that splits the data into random batches where each sample is far from the two closest exactly t. In the non-distributed scenario, I can do
for epoch in range(epochs):
pad = np.random.randint(n)
train_loader = np.mod(np.arange(pad, n + pad, t), n)
np.random.shuffle(train_loader)
train_loader = np.array_split(train_loader,
np.ceil(len(train_loader) / bsize))
for starting_i in train_loader:
batch = np.array([np.mod(np.arange(i, i + t), n) for i in starting_i])
How do I make this version distributed? Do I need to make a custom torch.nn.parallel.DistributedDataParallel or torch.utils.data.DataLoader?
I have checked the DistributedSampler class
and my guess is that I have to override the __iter__ method. Am I right?
How does DistributedSampler split the dataset? Is it sequentially among num_replicas?
Say num_replicas = 2. Would my dataset be split into [1,2,3,4,5] and [6,7,8,9,10] between the 2 workers? Or is it random? Like [1,4,7,3,10] and [2,9,5,8,6]? First case would be ok for me because keeps samples sequential, but second would not.
I ended up making my own Dataset where the data is [t, t + window, ... t + n * window]. Every time it is called it randomizes the starting indices of the window. Then the sampler does the shuffling as usual. For reproducibility, it has a set_seed method similar to set_epoch of samplers.
class SequentialWindowedDataset(Dataset):
def __init__(self, size, window):
self.size = size
self.window = window
self.seed = 0
self.data = np.arange(0, self.size, self.window)
def __getitem__(self, index):
rng = np.random.default_rng(self.seed)
pad = rng.integers(0, self.size)
data = (self.data + pad) % self.size
return data[index]
def __len__(self):
return len(self.data)
def set_seed(self, seed):
self.seed = seed
The following version randomizes the data outside the call and it is much much faster.
class SequentialWindowedDataset(Dataset):
def __init__(self, size, window):
self.size = size
self.window = window
self.data = np.arange(0, self.size, self.window)
def __getitem__(self, index):
return self.data[index]
def __len__(self):
return len(self.data)
def randomize(self, seed):
rng = np.random.default_rng(seed)
pad = rng.integers(0, self.size)
self.data = (self.data + pad) % self.size

Pytorch sequential data loader

I have looked through the documentation with for ex class IterableDataset and Start / End but I'm just not good enough to solve this one at the moment.
Training my model with random batches is fine, but using it for predictions I need it to start from min(index) up to max(index). So I wanted to re-use below and change to fit that.
Now it will take random items from the range so I can get duplicate predictions of the same index number. ex range(5) in index 1,2,3,4,5 might give 4,2,2,3,4 not desired 1,2,3,4,5.
test_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=True)
DataLoader shuffle = False, then it just takes len / max of index.
I probably need to change the sampler.
class CompanyDataset(Dataset):
def __init__(self, csv_name, root_dir, training_length, forecast_window):
"""
Args:
csv_file (string): Path to the csv file.
root_dir (string): Directory
"""
# load raw data file
csv_file = os.path.join(root_dir, csv_name)
self.df = pd.read_csv(csv_file)
self.root_dir = root_dir
self.transform = MinMaxScaler()
self.T = training_length
self.S = forecast_window
def __len__(self):
# return number of sensors
return len(self.df.groupby(by=["index"]))
# Will pull an index between 0 and __len__.
def __getitem__(self, idx):
# Sensors are indexed from 1
idx = idx + 1
# np.random.seed(0)
start = np.random.randint(0, len(self.df[self.df["index"] == idx]) - self.T - self.S)
Company = str(self.df[self.df["index"] == idx][["station"]][start:start + 1].values.item())
index_in = torch.tensor([i for i in range(start, start + self.T)])
index_tar = torch.tensor([i for i in range(start + self.T, start + self.T + self.S)])
_input = torch.tensor(self.df[self.df["index"] == idx][
["A1","A2","A3","A4","A5","A6","A7","A8", "A9", "A10", "A11"]][
start: start + self.T].values)
target = torch.tensor(self.df[self.df["index"] == idx][
[["A1","A2","A3","A4","A5","A6","A7","A8", "A9", "A10", "A11"]][
start + self.T: start + self.T + self.S].values)
scaler = self.transform
scaler.fit(_input[:, 0].unsqueeze(-1))
_input[:, 0] = torch.tensor(scaler.transform(_input[:, 0].unsqueeze(-1)).squeeze(-1))
target[:, 0] = torch.tensor(scaler.transform(target[:, 0].unsqueeze(-1)).squeeze(-1))
dump(scaler, 'scalar_item.joblib')
return index_in, index_tar, _input, target, station

Pytorch NLP sequence length of target in Transformer

I'm trying to understand the code of Transformer (https://github.com/SamLynnEvans/Transformer).
If seeing the train_model function in "train" script, I wonder why need to use the different sequence length of trg_input from trg:
trg_input = trg[:, :-1]
In this case, the sequence length of trg_input is "seq_len(trg) - 1".
It means that trg is like:
<sos> tok1 tok2 tokn <eos>
and trg_input is like:
<sos> tok1 tok2 tokn (no eos token)
Please let me know the reason.
Thank you.
The related code is like below:
for i, batch in enumerate(opt.train):
src = batch.src.transpose(0, 1).to('cuda')
trg = batch.trg.transpose(0, 1).to('cuda')
trg_input = trg[:, :-1]
src_mask, trg_mask = create_masks(src, trg_input, opt)
preds = model(src, trg_input, src_mask, trg_mask)
ys = trg[:, 1:].contiguous().view(-1)
opt.optimizer.zero_grad()
loss = F.cross_entropy(preds.view(-1, preds.size(-1)), ys, ignore_index=opt.trg_pad)
loss.backward()
opt.optimizer.step()
def create_masks(src, trg, opt):
src_mask = (src != opt.src_pad).unsqueeze(-2)
if trg is not None:
trg_mask = (trg != opt.trg_pad).unsqueeze(-2)
size = trg.size(1) # get seq_len for matrix
np_mask = nopeak_mask(size, opt)
if trg.is_cuda:
np_mask.cuda()
trg_mask = trg_mask & np_mask
else:
trg_mask = None
return src_mask, trg_mask
That's because the entire aim is to generate the next token based on the tokens we've seen so far. Take a look at the input into the model when we get our predictions. We're not just feeding the source sequence, but also the target sequence up until our current step. The model inside Models.py looks like:
class Transformer(nn.Module):
def __init__(self, src_vocab, trg_vocab, d_model, N, heads, dropout):
super().__init__()
self.encoder = Encoder(src_vocab, d_model, N, heads, dropout)
self.decoder = Decoder(trg_vocab, d_model, N, heads, dropout)
self.out = nn.Linear(d_model, trg_vocab)
def forward(self, src, trg, src_mask, trg_mask):
e_outputs = self.encoder(src, src_mask)
#print("DECODER")
d_output = self.decoder(trg, e_outputs, src_mask, trg_mask)
output = self.out(d_output)
return output
So you can see that the forward method receives src and trg, which are each fed into the encoder and decoder. This is a bit easier to grasp if you take a look at the model architecture from the original paper:
The "Outputs (shifted right)" corresponds to trg[:, :-1] in the code.

Resources