From two Tensors (labels, inputs) to DataLoader - pytorch

I'm working with two tensors, inputs and labels, and I want to have them together to train a model. I'm using torch 1.7, but I can't use the function TensorDataset() and then apply DataLoader(), due to some incompatibilities with other packages when I use TensorDataset(). There is another solution to my problem?
Summary:
2 Tensors --> DataLoader without using TensorDataset()

You can construct your own custom DataSet:
class MyDataSet(torch.utils.data.Dataset):
def __init__(self, x, y):
super(MyDataSet, self).__init__()
# store the raw tensors
self._x = x
self._y = y
def __len__(self):
# a DataSet must know it size
return self._x.shape[0]
def __getitem__(self, index):
x = self._x[index, :]
y = self._y[index, :]
return x, y

Related

torch - subsample each dataset differently and concatenate them

I have two datasets, but one is larger than the other and I want to subsample it (resample in each epoch).
I probably cannot use dataloader argument sampler, as I would pass to Dataloader the already concatenated dataset.
How do I achieve this simply?
I think one solution would be to write a class SubsampledDataset(IterableDataset) which would resample every time __iter__ is called (each epoch).
(Or better use a map-style dataset, but is there a hook that gets called every epoch, like __iter__ gets?)
This is what I have so far (untested). Usage:
dataset1: Any = ...
# subsample original_dataset2, so that it is equally large in each epoch
dataset2 = RandomSampledDataset(original_dataset2, num_samples=len(dataset1))
concat_dataset = ConcatDataset([dataset1, dataset2])
data_loader = torch.utils.data.DataLoader(
concat_dataset,
sampler=RandomSamplerWithNewEpochHook(dataset2.new_epoch_hook, concat_dataset)
)
The result is that the concat_dataset will be shuffled each epoch (RandomSampler), in addition, the dataset2 component is a new sample of the (possibly larger) original_dataset2, different in each epoch.
You can add more datasets to be subsampled by doing instead of:
sampler=RandomSamplerWithNewEpochHook(dataset2.new_epoch_hook
this:
sampler=RandomSamplerWithNewEpochHook(lambda: dataset2.new_epoch_hook and dataset3.new_epoch_hook and dataset4.new_epoch_hook, ...
Code:
class RandomSamplerWithNewEpochHook(RandomSampler):
""" Wraps torch.RandomSampler and calls supplied new_epoch_hook before each epoch. """
def __init__(self, new_epoch_hook: Callable, data_source: Sized, replacement: bool = False,
num_samples: Optional[int] = None, generator=None):
super().__init__(data_source, replacement, num_samples, generator)
self.new_epoch_hook = new_epoch_hook
def __iter__(self):
self.new_epoch_hook()
return super().__iter__()
class RandomSampledDataset(Dataset):
""" Subsamples a dataset. The sample is different in each epoch.
This helps when concatenating datasets, as the subsampling rate can be different for each dataset.
Call new_epoch_hook before each epoch. (This can be done using e.g. RandomSamplerWithNewEpochHook.)
This would be arguably harder to achieve with a concatenated dataset and a sampler argument to Dataloader. The
sampler would have to be aware of the indices of subdatasets' items in the concatenated dataset, of the subsampling
for each subdataset."""
def __init__(self, dataset, num_samples, transform=lambda im: im):
self.dataset = dataset
self.transform = transform
self.num_samples = num_samples
self.sampler = RandomSampler(dataset, num_samples=num_samples)
self.current_epoch_samples = None
def new_epoch_hook(self):
self.current_epoch_samples = torch.tensor(iter(self.sampler), dtype=torch.int)
def __len__(self):
return self.num_samples
def __getitem__(self, item):
if item < 0 or item >= len(self):
raise IndexError
img = self.dataset[self.current_epoch_samples[item].item()]
return self.transform(img)
You can stop to iterate by raising StopIteration. This error is caught by Dataloader and simply stop the iteration. So you can do something like that:
class SubDataset(Dataset):
"""SubDataset class."""
def __init__(self, dataset, length):
self.dataset = dataset
self.elem = 0
self.length = length
def __getitem__(self, index):
self.elem += 1
if self.elem > self.length:
self.elem = 0
raise StopIteration # caught by DataLoader
return self.dataset[index]
def __len__(self):
return len(self.dataset)
if __name__ == '__main__':
torch.manual_seed(0)
dataloader = DataLoader(SubDataset(torch.arange(10), 5), shuffle=True)
for _ in range(3):
for x in dataloader:
print(x)
print(len(dataloader)) # 10!!
Output:
Note that setting __len__ to self.length will cause a problem because dataloader will use only indices between 0 and length-1 (that is not what you want). Unfortunately I found nothing to set the actually length without having this behaviour (due to Dataloader restriction). Thus be careful: len(dataset) is the original length and dataset.length is the new length.

How do I set the dimensions of Conv1d correctly?

This is a toy example as I'm learning PyTorch and using it on one-dimensional time series, in this case a sine wave.
I'm trying to use Conv1d, but I get the following error:
RuntimeError: Given groups=1, weight of size [5, 1, 2], expected input[1, 994, 5] to have 1 channels, but got 994 channels instead
My 'lookback' is 5 time steps, and the shape of my data batch is [994, 5].
What am I doing wrong?
import torch;from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F;import pytorch_lightning as pl
from torch import nn, tensor
class TsDs(torch.utils.data.Dataset):
def __init__(self, s, l=5): super().__init__();self.l,self.s=l,s
def __len__(self): return self.s.shape[0] - 1 - self.l
def __getitem__(self, i): return self.s[i:i+self.l], torch.log(self.s[i+self.l+1]/self.s[i+self.l])
def plt(self): plt.plot(self.s)
class TsDm(pl.LightningDataModule):
def __init__(self, length=5000, batch_size=1000): super().__init__();self.batch_size=batch_size;self.s = torch.sin(torch.arange(length)*0.2) + 5
def train_dataloader(self): return DataLoader(TsDs(self.s[:3999]), batch_size=self.batch_size, shuffle=False)
def val_dataloader(self): return DataLoader(TsDs(self.s[4000:]), batch_size=self.batch_size)
dm = TsDm()
class MyModel(pl.LightningModule):
def __init__(self, learning_rate=0.01):
super().__init__();self.learning_rate = learning_rate
super().__init__();self.learning_rate = learning_rate
self.network = nn.Sequential(nn.Conv1d(1,5,2),nn.ReLU(),nn.Linear(5,3),nn.ReLU(),nn.Linear(3,1), nn.Tanh())
# self.network = nn.Sequential(nn.Linear(5,5),nn.ReLU(),nn.Linear(5,3),nn.ReLU(),nn.Linear(3,1), nn.Tanh())
def forward(self, x): return self.network(x)
def step(self, batch, batch_idx, stage):
x, y = batch
loss = -torch.mean(self(x)*y)
print(loss)
return loss
def training_step(self, batch, batch_idx): return self.step(batch, batch_idx, "train")
def validation_step(self, batch, batch_idx): return self.step(batch, batch_idx, "val")
def configure_optimizers(self): return torch.optim.SGD(self.parameters(), lr=self.learning_rate)
mm = MyModel(0.01);trainer = pl.Trainer(max_epochs=10)
trainer.fit(mm, datamodule=dm)
There are two issues in your code:
Looking at the documentation of nn.Conv1d, your input shape should be (B, C, L). In your default case, you have L=5, the sequence length, but you need to create that extra dimension representing the feature size of a sequence element, here C=1. You can do so by changing TsDs's __getitem__ function to:
def __getitem__(self, i):
x = self.s[i:i+self.l] # minibatch x shaped (1, self.l)
y = torch.log(self.s[i+self.l+1]/self.s[i+self.l]) # minibatch y shaped (1,)
return x, y
Your convolutional layer has a stride of 1 and a size of 2, this means its output will be shaped (B, 5, L-1=4). The following layer is a fully connected layer instantiated as nn.Linear(5, 3), which means it expects (*, H_in=5) and will output (*, H_out). You can either
You can flatten the conv1d output with nn.Flatten and feed it to a bigger fully connected layer (for instance nn.Linear(20, 3).
You can use a convolutional layer with a wider kernel, if you use a kernel of 5 (your sequence length you will end up with a tensor of (B, 5, 1) which you feed to a nn.Linear(5, 3). Although this approach doesn't really scale when L is changed.
You could apply a nn.AvgPool1d to get an average representation of the sequence after the convolutional layers have been applied.
Those are just a few directions...

What should I think about when writing a custom loss function?

I'm trying to get my toy network to learn a sine wave.
I output (via tanh) a number between -1 and 1, and I want the network to minimise the following loss, where self(x) are the predictions.
loss = -torch.mean(self(x)*y)
This should be equivalent to trading a stock with a sinusoidal price, where self(x) is our desired position, and y are the returns of the next time step.
The issue I'm having is that the network doesn't learn anything. It does work if I change the loss function to be torch.mean((self(x)-y)**2) (MSE), but this isn't what I want. I'm trying to focus the network on 'making a profit', not making a prediction.
I think the issue may be related to the convexity of the loss function, but I'm not sure, and I'm not certain how to proceed. I've experimented with differing learning rates, but alas nothing works.
What should I be thinking about?
Actual code:
%load_ext tensorboard
import matplotlib.pyplot as plt; plt.rcParams["figure.figsize"] = (30,8)
import torch;from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F;import pytorch_lightning as pl
from torch import nn, tensor
def piecewise(x): return 2*(x>0)-1
class TsDs(torch.utils.data.Dataset):
def __init__(self, s, l=5): super().__init__();self.l,self.s=l,s
def __len__(self): return self.s.shape[0] - 1 - self.l
def __getitem__(self, i): return self.s[i:i+self.l], torch.log(self.s[i+self.l+1]/self.s[i+self.l])
def plt(self): plt.plot(self.s)
class TsDm(pl.LightningDataModule):
def __init__(self, length=5000, batch_size=1000): super().__init__();self.batch_size=batch_size;self.s = torch.sin(torch.arange(length)*0.2) + 5 + 0*torch.rand(length)
def train_dataloader(self): return DataLoader(TsDs(self.s[:3999]), batch_size=self.batch_size, shuffle=True)
def val_dataloader(self): return DataLoader(TsDs(self.s[4000:]), batch_size=self.batch_size)
dm = TsDm()
class MyModel(pl.LightningModule):
def __init__(self, learning_rate=0.01):
super().__init__();self.learning_rate = learning_rate
super().__init__();self.learning_rate = learning_rate
self.conv1 = nn.Conv1d(1,5,2)
self.lin1 = nn.Linear(20,3);self.lin2 = nn.Linear(3,1)
# self.network = nn.Sequential(nn.Conv1d(1,5,2),nn.ReLU(),nn.Linear(20,3),nn.ReLU(),nn.Linear(3,1), nn.Tanh())
# self.network = nn.Sequential(nn.Linear(5,5),nn.ReLU(),nn.Linear(5,3),nn.ReLU(),nn.Linear(3,1), nn.Tanh())
def forward(self, x):
out = x.unsqueeze(1)
out = self.conv1(out)
out = out.reshape(-1,20)
out = nn.ReLU()(out)
out = self.lin1(out)
out = nn.ReLU()(out)
out = self.lin2(out)
return nn.Tanh()(out)
def step(self, batch, batch_idx, stage):
x, y = batch
loss = -torch.mean(self(x)*y)
# loss = torch.mean((self(x)-y)**2)
print(loss)
self.log("loss", loss, prog_bar=True)
return loss
def training_step(self, batch, batch_idx): return self.step(batch, batch_idx, "train")
def validation_step(self, batch, batch_idx): return self.step(batch, batch_idx, "val")
def configure_optimizers(self): return torch.optim.SGD(self.parameters(), lr=self.learning_rate)
#logger = pl.loggers.TensorBoardLogger(save_dir="/content/")
mm = MyModel(0.1);trainer = pl.Trainer(max_epochs=10)
# trainer.tune(mm, dm)
trainer.fit(mm, datamodule=dm)
#
If I understand you correctly, I think that you were trying to maximize the unnormalized correlation between the network's prediction, self(x), and the target value y.
As you mention, the problem is the convexity of the loss wrt the model weights. One way to see the problem is to consider that the model is a simple linear predictor w'*x, where w is the model weights, w' it's transpose, and x the input feature vector (assume a scalar prediction for now). Then, if you look at the derivative of the loss wrt the weight vector (i.e., the gradient), you'll find that it no longer depends on w!
One way to fix this is change the loss to,
loss = -torch.mean(torch.square(self(x)*y))
or
loss = -torch.mean(torch.abs(self(x)*y))
You will have another big problem, however: these loss functions encourage unbound growth of the model weights. In the linear case, one solves this by a Lagrangian relaxation of a hard constraint on, for example, the norm of the model weight vector. I'm not sure how this would be done with neural networks as each layer would need it's own Lagrangian parameter...

How to drop running stats to default value for Norm layer in pyTorch?

I trained model on some images. Now to fit similar dataset but with another colors I want to load this model but also i want to drop all running stats from Batchnorm layers (set them to default value, like totally untrained). What parameters should i reset? Simple model looks like this
import torch
import torch.nn as nn
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv0 = nn.Conv2d(3, 3, 3, padding = 1)
self.norm = nn.BatchNorm2d(3)
self.conv = nn.Conv2d(3, 3, 3, padding = 1)
def forward(self, x):
x = self.conv0(x)
x = self.norm(x)
return self.conv(x)
net = Net()
##or for pretrained it will be
##net = torch.load('net.pth')
def drop_to_default():
for m in net.modules():
if type(m) == nn.BatchNorm2d:
####???####
drop_to_default()
Simplest way to do that is to run reset_running_stats() method on BatchNorm objects:
def drop_to_default():
for m in net.modules():
if type(m) == nn.BatchNorm2d:
m.reset_running_stats()
Below is this method's source code:
def reset_running_stats(self) -> None:
if self.track_running_stats:
# running_mean/running_var/num_batches... are registered at runtime depending
# if self.track_running_stats is on
self.running_mean.zero_() # Zero (neutral) mean
self.running_var.fill_(1) # One (neutral) variance
self.num_batches_tracked.zero_() # Number of batches tracked
You can see the source code here, _NormBase class.

initialising and accessing an array of weights in a custom keras layer

I am writing a custom keras layer for convolution in a cnn architecture in fourier domain:
class Fourier_Conv2D(Layer):
def __init__(self, no_of_kernels, **kwargs):
self.no_of_kernels = no_of_kernels
super(Fourier_Conv2D, self).__init__(**kwargs)
def build(self, input_shape):
self.kernel = self.add_weight(name = 'kernel',
shape = input_shape + (self.no_of_kernels,),
initializer = 'uniform', trainable = True)
super(Fourier_Conv2D, self).build(input_shape)
def call(self, x):
return K.dot(x, self.kernel[0])
In the call function, I need to do pointwise multiplication of the fft of input with fft of each kernel (according to the convolution theorem) and add the products before passing this sum to activation function. But how can I access each weight separately in the call function, as using array index to do so is giving the following attribute error -
AttributeError Traceback (most recent call last)
<ipython-input-71-9617a8e7ab2e> in <module>()
1 x = Fourier_Conv2D(5)
----> 2 x.call((2,2,1))
<ipython-input-70-02ded53b8f6f> in call(self, x)
11
12 def call(self, x):
---> 13 return K.dot(x, self.kernel[0])
14
AttributeError: 'Fourier_Conv2D' object has no attribute 'kernel'
Thanks in advance for any help in solving the error.
You are not using your layer correctly. The line x.call((2,2,1)) makes no sense since you need to pass a tensor to the layer. You should instead do something like this:
x = Input((3,4))
custom_layer = Fourier_Conv2D(10)
output = custom_layer(x)
Moreover, there are some errors in the definition of your layer. The following should work:
class Fourier_Conv2D(Layer):
def __init__(self, no_of_kernels, **kwargs):
self.no_of_kernels = no_of_kernels
super(Fourier_Conv2D, self).__init__(**kwargs)
def build(self, input_shape):
# Note the changes to the shape parameter
self.kernel = self.add_weight(name = 'kernel',
shape = (int(input_shape[-1]), self.no_of_kernels),
initializer = 'uniform', trainable = True)
super(Fourier_Conv2D, self).build(input_shape)
def call(self, x):
return K.dot(x, self.kernel) # kernel[0] --> kernel

Resources