How to extract the encoded features after running a PyTorch LSTM autoencoder model? - pytorch

I am very new to PyTorch and Python in general, and I am now struggling to get the encoded features from my pre-trained LSTM autoencoder which can be seen below:
import torch
import torch.nn as nn
# Bulding an LSTM autoencoder
class Encoder(nn.Module):
def __init__(self, seq_len, n_features, embedding_dim=32):
super(Encoder, self).__init__()
self.seq_len, self.n_features = seq_len, n_features
self.embedding_dim, self.hidden_dim1, self.hidden_dim2 = embedding_dim, 4 * embedding_dim, 2* embedding_dim
self.rnn1 = nn.LSTM(
input_size=n_features,
hidden_size=self.hidden_dim1, #128
num_layers=1,
batch_first=True
)
self.rnn2 = nn.LSTM(
input_size=self.hidden_dim1,
hidden_size=self.hidden_dim2, #64
num_layers=1,
batch_first=True
)
self.rnn3 = nn.LSTM(
input_size=self.hidden_dim2,
hidden_size=embedding_dim, #32
num_layers=1,
batch_first=True
)
def forward(self, x):
x = x.reshape((1, self.seq_len, self.n_features))
x, (_, _) = self.rnn1(x)
x, (_, _) = self.rnn2(x)
x, (hidden_n, _) = self.rnn3(x)
return hidden_n.reshape((self.n_features, self.embedding_dim))
class Decoder(nn.Module):
def __init__(self, seq_len, input_dim=32, n_features=1):
super(Decoder, self).__init__()
self.seq_len, self.input_dim = seq_len, input_dim
self.hidden_dim2, self.hidden_dim1, self.n_features = 4 * input_dim,2 * input_dim, n_features
self.rnn1 = nn.LSTM(
input_size=input_dim,
hidden_size=input_dim,
num_layers=1,
batch_first=True
)
self.rnn2 = nn.LSTM(
input_size=input_dim,
hidden_size=self.hidden_dim1,
num_layers=1,
batch_first=True
)
self.rnn3 = nn.LSTM(
input_size=self.hidden_dim1,
hidden_size=self.hidden_dim2,
num_layers=1,
batch_first=True
)
self.output_layer = nn.Linear(self.hidden_dim2, n_features)
def forward(self, x):
x = x.repeat(self.seq_len, self.n_features)
x = x.reshape((self.n_features, self.seq_len, self.input_dim))
x, (hidden_n, cell_n) = self.rnn1(x)
x, (hidden_n, cell_n) = self.rnn2(x)
x, (hidden_n, cell_n) = self.rnn3(x)
x = x.reshape((self.seq_len, self.hidden_dim2))
return self.output_layer(x)
class RAE(nn.Module):
def __init__(self,seq_len, n_features, embedding_dim=32):
super(RAE, self).__init__()
self.seq_len, self.n_features = seq_len, n_features
self.embedding_dim = embedding_dim
self.encoder = Encoder (seq_len, n_features, embedding_dim).to(device)
self.decoder = Decoder (seq_len, embedding_dim, n_features).to(device)
def forward(self,x):
x = self.encoder(x)
x = self.decoder(x)
return x
### TRAINING
def train_model(model,train_dataset,val_dataset, n_epochs):
optimizer = torch.optim.Adam(model.parameters(), lr = 1e-3)
criterion = nn.MSELoss(reduction='mean').to(device) # nn.L1Loss sum
history = dict(train = [], val = [])
for epoch in range(1, n_epochs + 1):
model = model.train()
train_losses = []
for seq_true in train_dataset:
optimizer.zero_grad()
seq_true = seq_true.to(device)
seq_pred = model(seq_true)
loss = criterion(seq_pred, seq_true)
loss.backward()
optimizer.step()
train_losses.append(loss.item())
val_losses = []
model = model.eval()
with torch.no_grad():
for seq_true in val_dataset:
seq_true = seq_true.to(device)
seq_pred =model(seq_true)
loss = criterion(seq_pred, seq_true)
val_losses.append(loss.item())
#add accuracy
train_loss = np.mean(train_losses)
val_loss = np.mean(val_losses)
history['train'].append(train_loss)
history['val'].append(val_loss)
print(f'Epoch {epoch}: train loss {train_loss} val loss {val_loss}')
return model.eval(),history
Once I trained my model I followed the advice given by ptrblck here and implemented it as follows:
activation = {}
def get_activation(name):
def hook(model, input, output):
activation[name] = output.detach()
return hook
model.encoder.register_forward_hook(get_activation('encoder'))
x = test_dataset_SR[1] # instead of using his random example I used one example from my training set
x = x.cuda()
output = model(x)
print(activation['encoder'])
but this gives me this error:
2 def get_activation(name):
3 def hook(model, input, output):
----> 4 activation[name] = output.detach()
5 return hook
AttributeError: 'tuple' object has no attribute 'detach'
Can you please help me solve this issue? I want to take these encoded features, store them and use them as input to another network. I know I could probably train the encoder separately(not sure), but I will need both encoder and decoder so I thought hooks will be my salvation.

Related

Pytorch Parameterized Layer not Updated

I have a problem here, so I want to make a layer where the weight value (and the bias) is based on the other frozen weight. So, let’s say I have a frozen weight (FW) as a base value, then my current model layer will have weight W = FW + D, where D is the trainable parameter. Later, when I train the model, I hope the only parameter that gets updated is D.
I made this simple code for illustration:
frozen = nn.Linear(100,10)
frozen.weight.requires_grad = False
frozen.bias.requires_grad = False
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.fc = nn.Linear(100,10)
self.dw = nn.Parameter(torch.tensor(1.0, requires_grad=True))
self.db = nn.Parameter(torch.tensor(1.0, requires_grad=True))
def forward(self, x):
# the weight (and the bias) of fc layer is from FW and D
self.fc.weight = nn.Parameter(torch.add(frozen.weight, self.dw))
self.fc.bias = nn.Parameter(torch.add(frozen.bias, self.db))
return torch.sigmoid(self.fc(x))
model = Net()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
x = torch.rand(100)
y = torch.tensor([0]*9+[1], dtype=torch.float32)
for _ in range(10):
out = model(x)
loss = criterion(out, y)
print(loss)
optimizer.zero_grad()
loss.backward()
optimizer.step()
But when I run that code, the model doesn’t train, and the self.dw and self.db doesn’t change. I am not sure whether my concept is wrong, so it’s not possible to train D, or I made a mistake in the implementation.
I also tried to implement using nn.utils.parameterize, but it still doesn’t work (I am new to using this, so I am not sure I implemented it correctly)
frozen = nn.Linear(100,10)
frozen.weight.requires_grad = False
frozen.bias.requires_grad = False
class Adder(nn.Module):
def __init__(self, delta, frozen):
super().__init__()
self.delta = nn.Parameter(torch.tensor(delta, requires_grad=True))
self.frozen=frozen
def forward(self, x):
return torch.add(self.frozen, self.delta)
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.fc = nn.Linear(100,10)
def forward(self, x):
nn.utils.parametrize.register_parametrization(self.fc, "weight", Adder(1.0, frozen.weight))
nn.utils.parametrize.register_parametrization(self.fc, "bias", Adder(1.0, frozen.bias))
return torch.sigmoid(self.fc(x))
model = Net()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
x = torch.rand(100)
y = torch.tensor([0]*9+[1], dtype=torch.float32)
for _ in range(10):
out = model(x)
loss = criterion(out, y)
print(loss)
optimizer.zero_grad()
loss.backward()
optimizer.step()
Thank you for any responses.
Instead of recreating new weight and bias by
self.fc.weight = nn.Parameter(torch.add(frozen.weight, self.dw))
self.fc.bias = nn.Parameter(torch.add(frozen.bias, self.db))
You can utilize nn.functional.linear and intermediate variables
weight = self.weight + frozen.weight
bias = self.bias + frozen.bias
F.linear(x, weight, bias)
Complete version:
import torch
import torch.nn as nn
import torch.nn.functional as F
class Net(nn.Module):
def __init__(self, frozen):
super(Net, self).__init__()
self.weight = nn.Parameter(torch.ones(10, 100, dtype=torch.float32))
self.bias = nn.Parameter(torch.zeros(10, dtype=torch.float32))
self.frozen = frozen
#property
def weight_bias(self):
weight = self.weight + self.frozen.weight
bias = self.bias + self.frozen.bias
return weight, bias
def forward(self, x):
# the weight (and the bias) of fc layer is from FW and D
weight, bias = self.weight_bias
return F.linear(x, weight, bias) # this should return raw logits as required by nn.CrossEntropyLoss
frozen = nn.Linear(100, 10)
frozen.weight.requires_grad = False
frozen.bias.requires_grad = False
model = Net(frozen)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
x = torch.rand(100).unsqueeze(0)
y = torch.tensor([0]*9+[1], dtype=torch.float32).unsqueeze(0)
for _ in range(10):
out = model(x)
loss = criterion(out, y)
print(loss)
optimizer.zero_grad()
loss.backward()
optimizer.step()

While running pytorch i got an error 'TypeError: object of type'CatsAndDogsDataset' has no len()' and I want to know how to fix it

I got an error while running pytorch
I train artificial intelligence with ResNet, and I wrote my own custom dataset for the dataset. After loading the data set from ResnNet, training data and test data were set separately by learning with artificial intelligence. But even though I ran it, an error occurred, but I don't know what kind of problem occurred.
Next, I will attach my ResNet.py code and my own data set CustomDataset.py code.
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import transforms, datasets, models
from customDataset import CatsAndDogsDataset
USE_CUDA = torch.cuda.is_available()
DEVICE = torch.device("cuda" if USE_CUDA else "cpu")
EPOCHS = 3
BATCH_SIZE = 10
dataset = CatsAndDogsDataset(csv_file = 'cats_dogs.csv', root_dir = 'cats_dogs_resized',transform = transforms.ToTensor())
train_set, test_set = torch.utils.data.random_split(dataset, [28,4])
train_loader = DataLoader(dataset=train_set, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(dataset=test_set, batch_size=BATCH_SIZE, shuffle=True)
class BasicBlock(nn.Module):
def __init__(self, in_planes, planes, stride=1):
super(BasicBlock, self).__init__()
self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3,stride=stride, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,stride=1, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.shortcut = nn.Sequential()
if stride != 1 or in_planes != planes:
self.shortcut = nn.Sequential(nn.Conv2d(in_planes, planes,kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(planes))
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.bn2(self.conv2(out))
out += self.shortcut(x)
out = F.relu(out)
return out
class ResNet(nn.Module):
def __init__(self, num_classes=10):
super(ResNet, self).__init__()
self.in_planes = 16
self.conv1 = nn.Conv2d(3, 16, kernel_size=3,stride=1, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(16)
self.layer1 = self._make_layer(16, 2, stride=1)
self.layer2 = self._make_layer(32, 2, stride=2)
self.layer3 = self._make_layer(64, 2, stride=2)
self.linear = nn.Linear(64, num_classes)
def _make_layer(self, planes, num_blocks, stride):
strides = [stride] + [1] * (num_blocks - 1)
layers = []
for stride in strides:
layers.append(BasicBlock(self.in_planes, planes, stride))
self.in_planes = planes
return nn.Sequential(*layers)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.layer1(out)
out = self.layer2(out)
out = self.layer3(out)
out = F.avg_pool2d(out, 8)
out = out.view(out.size(0), -1)
out = self.linear(out)
return out
model = ResNet().to(DEVICE)
optimizer = optim.SGD(model.parameters(), lr=0.1,momentum=0.9, weight_decay=0.0005)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.1)
print(model)
def train(model, train_loader, optimizer, epoch):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(DEVICE), target.to(DEVICE)
optimizer.zero_grad()
output = model(data)
loss = F.cross_entropy(output, target)
loss.backward()
optimizer.step()
def evaluate(model, test_loader):
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
for data, target in test_loader:
data, target = data.to(DEVICE), target.to(DEVICE)
output = model(data)
test_loss += F.cross_entropy(output, target,reduction='sum').item()
pred = output.max(1, keepdim=True)[1]
correct += pred.eq(target.view_as(pred)).sum().item()
test_loss /= length(test_loader.dataset)
test_accuracy = 100. * correct / length(test_loader.dataset)
return test_loss, test_accuracy
for epoch in range(1, EPOCHS + 1):
scheduler.step()
train(model, train_loader, optimizer, epoch)
test_loss, test_accuracy = evaluate(model, test_loader)
print('[{}] Test Loss: {:.4f}, Accuracy: {:.2f}%'.format(
epoch, test_loss, test_accuracy))
import os
import pandas as pd
import torch
from torch.utils.data import Dataset
from skimage import io
class CatsAndDogsDataset(Dataset):
def __init__(self, csv_file, root_dir, transform=None):
self.annotations = pd.read_csv(csv_file)
self.root_dir = root_dir
self.transform = transform
def __length__(self):
return length(self.annotations)
def __getitem__(self, index):
img_path = os.path.join(self.root_dir, self.annotations.iloc[index, 0])
image = io.imread(img_path)
y_label = torch.tensor(int(self.annotations.iloc[index, 1]))
if self.transform:
image = self.transform(image)
return (image, ylabel)
In this way, if I write the code and then run it
TypeError: object of type'CatsAndDogsDataset' has no len()
I wonder why I can't have len(). In addition, an error occurred in the result of running Backend.ai instead of pycharm, but the error content is
Cannot verify that dataset is Sized
if sum(lengths) != len(dataset):
It is raise ValueError("sum of input lengths does not equal the length of the input dataset!").
Error appears. Is there a workaround? help me plz
You need to define the function __len__ for your custom dataset (which you seem to have currently incorrectly defined as __length__).
This documentation provides details. Relevant excerpt:
torch.utils.data.Dataset is an abstract class representing a dataset.
Your custom dataset should inherit Dataset and override the following
methods:
__len__ so that len(dataset) returns the size of the dataset.
__getitem__ to support the indexing such that dataset[i] can be used to get i th sample.

RuntimeError: input.size(-1) must be equal to input_size. Expected 28, got 0

Here are my code by using Pysft
class Arguments:
def __init__(self):
# self.cuda = False
self.no_cuda = True
self.seed = 1
self.batch_size = 50
self.test_batch_size = 1000
self.epochs = 10
self.lr = 0.01
self.momentum = 0.5
self.log_interval = 10
hook = sy.TorchHook(torch)
bob = sy.VirtualWorker(hook, id="bob")
alice = sy.VirtualWorker(hook, id="alice")
Here is my LSTM model, in can run successfully by only use pytorch, but it can't run with pysyft
class Model(torch.nn.Module):
def __init__(self):
super(Model, self).__init__()
self.rnn = torch.nn.RNN(input_size=28,
hidden_size=16,
num_layers=2,
batch_first=True,
bidirectional=True)
self.fc = torch.nn.Linear(32, 10)
def forward(self, x):
print(np.shape(x))
x = x.squeeze()
x, _ = self.rnn(x)
x = self.fc(x[:, -1, :])
return x.view(-1, 10)
def train(args, model, device, federated_train_loader, optimizer, epoch):
model.train()
for batch_idx, (data, target) in enumerate(federated_train_loader):
model.send(data.location) # <-- NEW: send the model to the right location
data, target = data.to(device), target.to(device)
# data, target = data.cuda(), target.cuda()
optimizer.zero_grad()
output = model(data.to(device))
loss = F.nll_loss(output, target)
loss.backward()
optimizer.step()
model.get() # <-- NEW: get the model back
if batch_idx % args.log_interval == 0:
loss = loss.get() # <-- NEW: get the loss back
losses.append(loss.item())
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * args.batch_size, len(federated_train_loader) * args.batch_size,
100. * batch_idx / len(federated_train_loader), loss.item()))
When I use Pysyft to run my LSTM model,there is a mistakes.But if I use my model without Pysyft,it an run scuccessfully.I don't know how to resolve it?
import torch
import matplotlib.pyplot as plt
from torchvision import datasets, transforms
import torch.nn.functional as F
import time
import numpy as np
import syft as sy
class Arguments:
def __init__(self):
self.cuda = False
self.no_cuda = True
self.seed = 1
self.batch_size = 50
self.test_batch_size = 1000
self.epochs = 10
self.lr = 0.01
self.momentum = 0.5
self.log_interval = 10
hook = sy.TorchHook(torch) # <-- NEW: hook PyTorch ie add extra functionalities to support Federated Learning
bob = sy.VirtualWorker(hook, id="bob") # <-- NEW: define remote worker bob
alice = sy.VirtualWorker(hook, id="alice") # <-- NEW: and alice
class Model(torch.nn.Module):
def __init__(self):
super(Model, self).__init__()
self.rnn = torch.nn.RNN(input_size=28,
hidden_size=16,
num_layers=2,
batch_first=True,
bidirectional=True)
self.fc = torch.nn.Linear(32, 10)
def forward(self, x):
print(np.shape(x))
x = x.squeeze()
x, _ = self.rnn(x)
x = self.fc(x[:, -1, :])
return x.view(-1, 10)
def train(args, model, device, federated_train_loader, optimizer, epoch):
model.train()
for batch_idx, (data, target) in enumerate(federated_train_loader): # <-- now it is a distributed dataset
model.send(data.location) # <-- NEW: send the model to the right location
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data.to(device))
loss = F.nll_loss(output, target)
loss.backward()
optimizer.step()
model.get() # <-- NEW: get the model back
if batch_idx % args.log_interval == 0:
loss = loss.get() # <-- NEW: get the loss back
losses.append(loss.item())
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * args.batch_size, len(federated_train_loader) * args.batch_size,
100. * batch_idx / len(federated_train_loader), loss.item()))
if __name__ == '__main__':
args = Arguments()
use_cuda = not args.no_cuda and torch.cuda.is_available()
torch.manual_seed(args.seed)
device = torch.device("cuda" if use_cuda else "cpu")
kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
losses = []
federated_train_loader = sy.FederatedDataLoader(
datasets.MNIST('../data', train=True, download=True,
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
]))
.federate((bob, alice)), # <-- NEW: we distribute the dataset across all the workers, it's now a FederatedDataset
batch_size=args.batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
datasets.MNIST('../data', train=False, transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])),
batch_size=args.test_batch_size, shuffle=True, **kwargs)
model = Model().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
t = time.time()
for epoch in range(1, args.epochs + 1):
train(args, model, device, federated_train_loader, optimizer, epoch)
test(args, model, device, test_loader)
plt.plot(range(0,160),losses,marker='o')
plt.xlabel("iterator")
plt.ylabel("loss")
plt.show()
total_time = time.time() - t
print(total_time)
Here are the whole codes
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import syft as sy
hook = sy.TorchHook(torch)
bob = sy.VirtualWorker(hook, id="bob")
alice = sy.VirtualWorker(hook, id="alice")
class Arguments():
def __init__(self):
self.batch_size = 64
self.test_batch_size = 1000
self.epochs = 10
self.lr = 0.01
self.momentum = 0.5
self.no_cuda = False
self.seed = 1
self.log_interval = 10
self.save_model = False
args = Arguments()
use_cuda = not args.no_cuda and torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
federated_train_loader = sy.FederatedDataLoader( # <-- this is now a FederatedDataLoader
datasets.MNIST('../data', train=True, download=True,
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
]))
.federate((bob, alice)),
batch_size=args.batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
datasets.MNIST('../data', train=False, transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])),
batch_size=args.test_batch_size, shuffle=True, **kwargs)
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 20, 5, 1)
self.conv2 = nn.Conv2d(20, 50, 5, 1)
self.fc1 = nn.Linear(4*4*50, 500)
self.fc2 = nn.Linear(500, 10)
def forward(self, x):
x = F.relu(self.conv1(x))
x = F.max_pool2d(x, 2, 2)
x = F.relu(self.conv2(x))
x = F.max_pool2d(x, 2, 2)
x = x.view(-1, 4*4*50)
x = F.relu(self.fc1(x))
x = self.fc2(x)
return F.log_softmax(x, dim=1)
model = Net()
model = model.to(device) #pushing the model into available device.
optimizer = optim.SGD(model.parameters(), lr=0.01)
for epoch in range(1, args.epochs + 1):
# Train the model
model.train()
for batch_idx, (data, target) in enumerate(federated_train_loader): # iterate through each worker's dataset
model.send(data.location) #send the model to the right location ; data.location returns the worker name in which the data is present
data, target = data.to(device), target.to(device) # pushing both the data and target labels onto the available device.
optimizer.zero_grad() # 1) erase previous gradients (if they exist)
output = model(data) # 2) make a prediction
loss = F.nll_loss(output, target) # 3) calculate how much we missed
loss.backward() # 4) figure out which weights caused us to miss
optimizer.step() # 5) change those weights
model.get() # get the model back (with gradients)
if batch_idx % args.log_interval == 0:
loss = loss.get() #get the loss back
print('Epoch: {} [Training: {:.0f}%]\tLoss: {:.6f}'.format(epoch, 100. * batch_idx / len(federated_train_loader), loss.item()))
# Test the model
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
for data, target in test_loader:
data, target = data.to(device), target.to(device)
output = model(data) # Getting a prediction
test_loss += F.nll_loss(output, target, reduction='sum').item() #updating test loss
pred = output.argmax(1, keepdim=True) # get the index of the max log-probability
correct += pred.eq(target.view_as(pred)).sum().item() #correct pred in the current test set.
test_loss /= len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(test_loss, correct, len(test_loader.dataset), 100. * correct / len(test_loader.dataset)))
torch.save(model.state_dict(), "mnist_cnn.pt")
I hav tested the above code in torch 1.x and pysyft 0.2.5,And its working. (but with cnn model)...
just change the dataloader and model here.

Building Autoencoder with Softmax classifier - Input mismatch error

I am trying to train an auto-encoder with a softmax classifier to replicate the results in this paper Intriguing properties of neural networks.
My implementation is the following:
n_embedded = 400
class AE400_10(nn.Module):
def __init__(self):
super(AE400_10, self).__init__()
self.encoder = nn.Sequential(nn.Linear(28*28, n_embedded), nn.Sigmoid())
self.decoder = nn.Sequential(nn.Linear(n_embedded, 28*28))
self.classifier = nn.Sequential(nn.Linear(28*28, 10))
def forward(self, x):
x = x.view(-1, 28*28)
encoded = self.encoder(x)
decoded = self.decoder(encoded)
out = self.classifier(decoded) ##NEW UPDATED
return decoded, F.log_softmax(out)
For the training I have the following:
model = AE400_10().to(device)
criterion1 = nn.MSELoss()
criterion2 = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)
for epoch in range(epochs):
total_batch = len(train_set) // batch_size_train
for batch_idx, (data, target) in enumerate(MNSIT_train):
X = data.to(device)
Y = target.to(device)
optimizer.zero_grad()
decoded, out = model(X)
loss1 = criterion1(decoded, inputs)
loss2 = criterion2(out, labels)
loss = loss1 + loss2
loss.backward()
optimizer.step()
if (batch_idx+1) % 100 == 0:
print('Epoch [%d/%d], lter [%d/%d], Loss: %.4f'%(epoch+1, epochs, batch_idx+1, total_batch, cost.item()))
But I am getting the following error:
RuntimeError: size mismatch, m1: [128 x 400], m2: [784 x 10] at
/Users/soumith/mc3build/conda-bld/pytorch_1549593514549/work/aten/src/TH/generic/THTensorMath.cpp:940
I understand this is an error in the dimension but I am not sure why it is happening.
::UPDATE::
I fixed the input to the classifier based on the comments below and now I am getting the following error:
RuntimeError: The size of tensor a (784) must match the size of tensor
b (28) at non-singleton dimension 3
I don't use nn.Sequential so I'm not sure why exactly this happens but if you
replace
self.classifier = nn.Sequential(nn.Linear(28*28, 10))
with
self.classifier = nn.Linear(28*28, 10)
your code works
-->
import torch
import torch.nn as nn
import torch.nn.functional as F
n_embedded = 400
class AE400_10(nn.Module):
def __init__(self):
super(AE400_10, self).__init__()
self.encoder = nn.Sequential(nn.Linear(28*28, n_embedded), nn.Sigmoid())
self.decoder = nn.Sequential(nn.Linear(n_embedded, 28*28))
self.test = nn.Linear(28*28, 10)
self.classifier = nn.Sequential(nn.Linear(28*28, 10))
def forward(self, x):
x = x.view(-1,28*28)
encoded = self.encoder(x)
decoded = self.decoder(encoded)
out = self.classifier(decoded)
return decoded, F.log_softmax(out)
x = torch.ones(128,28,28)
model = AE400_10()
model(x)
instead of encoded out = self.classifier(encoded)
put decoded as input of classifier
out = self.classifier(decoded)
I think, here is why you are getting the mismatch, because the classifier is expecting a tensor of 28 *28 as input as defined in your code.
Then,when calling the criterions:
loss1 = criterion1(decoded, X)
loss2 = criterion2(out, Y)

Simple Data recall RNN in Pytorch

I am learning Pytorch and am trying to make a network that can remember previous inputs.
I have tried 2 different input/output structures(see below) but haven't gotten anything to work the way I would like.
input 1:
in:[4,2,7,8]
output [[0,0,4],[0,4,2],[4,2,7],[2,7,8]]
code:
def histroy(num_samples=4,look_back=3):
data=np.random.randint(10,size=(num_samples)).tolist()
lab=[[0]*look_back]
for i in data:
lab.append(lab[-1][1:]+[i])
return data,lab[1:]
input2:
in:[4,2,7,8]
out:[0,4,2,7]
def histroy(num_samples=4):
data=np.random.randint(10,size=(num_samples)).tolist()
lab=[0]
for i in data:
lab.append(i)
return data,lab
I have tried a number of different network structures and training methods but nothing seems to stick.
The only things I think I have right are net.hidden = net.init_hidden()
should go outside of each epoch and loss.backward(retain_graph=True) but that doesnt seem to do anything
Currently, it can learn the last number in the sequence but never seems to learn any of the others
My last attempt:
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim
def histroy(num_samples=4,look_back=3):
data=np.random.randint(10,size=(num_samples)).tolist()
lab=[[0]*look_back]
for i in data:
lab.append(lab[-1][1:]+[i])
return data,lab[1:]
class Net(nn.Module):
def __init__(self, input_dim, hidden_dim, batch_size, output_dim=10, num_layers=1):
super(Net, self).__init__()
self.input_dim = input_dim
self.hidden_dim = hidden_dim
self.batch_size = batch_size
self.num_layers = num_layers
self.memory = nn.RNN(self.input_dim,self.hidden_dim,self.num_layers)
self.linear = nn.Linear(self.hidden_dim, output_dim)
self.first=True
def init_hidden(self):
# This is what we'll initialise our hidden state as
return (torch.zeros(self.num_layers, self.batch_size, self.hidden_dim),
torch.zeros(self.num_layers, self.batch_size, self.hidden_dim))
def forward(self, input):
self.memory_out, self.hidden = self.memory(input.view(len(input), self.batch_size, -1))
y_pred = self.linear(self.memory_out[-1].view(self.batch_size, -1))
return y_pred.view(-1)
if __name__ == '__main__':
data_amount = 10000
batch_size = 1 # default is 32
data_amount-=data_amount%batch_size
number_of_times_on_the_same_data = 250
look_back=5
net=Net(input_dim=1,hidden_dim=25,batch_size=batch_size,output_dim=look_back)
data,labs=histroy(data_amount,look_back)
data = torch.Tensor(data).float()
labs = torch.Tensor(labs).float()
optimizer = optim.Adam(net.parameters())
criterion = torch.nn.MSELoss(size_average=False)
for epoch in range(number_of_times_on_the_same_data): # loop over the dataset multiple times
running_loss = 0.0
data, labs = histroy(data_amount, look_back)
data = torch.Tensor(data).float()
labs = torch.Tensor(labs).float()
net.hidden = net.init_hidden()
print("epoch",epoch)
for i in range(0, data_amount, batch_size):
inputs = data[i:i + batch_size]
labels = labs[i:i + batch_size]
optimizer.zero_grad()
# forward + backward + optimize
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward(retain_graph=True)
optimizer.step()
running_loss += loss.item()
if i >= data_amount-batch_size:
print("loss",loss)
net.hidden = net.init_hidden()
print("Outputs",outputs)
print("Input", data[-1*look_back:])
print("labels",labels)
The problem that your network presents, it's the fact that your input is of shape 1:
for i in range(0, data_amount, batch_size):
inputs = data[i:i + batch_size]
labels = labs[i:i + batch_size]
print(inputs.shape,labels.shape)
>>>torch.Size([1]) torch.Size([1, 5])
>>>torch.Size([1]) torch.Size([1, 5])...
That's the reason your RNN is predicting only your last number, because in this case you're not using your look_back attribute. You have to fix your code in order to have inputs of size [1,5]. Your code should look something like this:
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim
def histroy(num_samples=4,look_back=3):
data=np.random.randint(10,size=(num_samples)).tolist()
lab=[[0]*look_back]
for i in data:
lab.append(lab[-1][1:]+[i])
return lab[:-1],lab[1:]
class Net(nn.Module):
def __init__(self, input_dim, hidden_dim, batch_size, output_dim=10, num_layers=1):
super(Net, self).__init__()
self.input_dim = input_dim
self.hidden_dim = hidden_dim
self.batch_size = batch_size
self.num_layers = num_layers
self.memory = nn.RNN(self.input_dim,self.hidden_dim,self.num_layers)
self.linear = nn.Linear(self.hidden_dim, output_dim)
self.first=True
def init_hidden(self):
# This is what we'll initialise our hidden state as
return (torch.zeros(self.num_layers, self.batch_size, self.hidden_dim),
torch.zeros(self.num_layers, self.batch_size, self.hidden_dim))
def forward(self, input):
self.memory_out, self.hidden = self.memory(input.view(len(input), self.batch_size, -1))
y_pred = self.linear(self.memory_out[-1].view(self.batch_size, -1))
return y_pred.view(-1)
if __name__ == '__main__':
data_amount = 10000
batch_size = 1 # default is 32
data_amount-=data_amount%batch_size
number_of_times_on_the_same_data = 250
look_back=5
net=Net(input_dim=1,hidden_dim=25,batch_size=batch_size,output_dim=look_back)
data,labs=histroy(data_amount,look_back)
data = torch.Tensor(data).float()
labs = torch.Tensor(labs).float()
optimizer = optim.Adam(net.parameters())
criterion = torch.nn.MSELoss(size_average=False)
for epoch in range(number_of_times_on_the_same_data): # loop over the dataset multiple times
running_loss = 0.0
data, labs = histroy(data_amount, look_back)
data = torch.Tensor(data).float()
labs = torch.Tensor(labs).float()
net.hidden = net.init_hidden()
print("epoch",epoch)
for i in range(0, data_amount, batch_size):
inputs = data[i:i + batch_size].view(-1)
labels = labs[i:i + batch_size]
optimizer.zero_grad()
# forward + backward + optimize
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward(retain_graph=True)
optimizer.step()
running_loss += loss.item()
if i >= data_amount-batch_size:
print("loss",loss)
net.hidden = net.init_hidden()
print("Outputs",outputs)
print("Input", data[i:i + batch_size][-1])
print("labels",labels)
Output:
>>>epoch 0
>>>loss tensor(17.7415, grad_fn=<MseLossBackward>)
>>>Outputs tensor([2.0897, 3.1410, 4.7382, 1.0532, 4.2003], grad_fn=<ViewBackward>)
>>>Input tensor([8., 2., 3., 5., 1.])
>>>labels tensor([[2., 3., 5., 1., 0.]])...

Resources