I want to train a Variational Auto-Encoder for a simple task: learn 3-class dataset and generate new samples from it's latent space. The problem is that despite it seems like an easy task I can't make it work. I expect it to produce the same 3 clusters when generating new samples from decoder (2 image).
I played with different values for beta. It's doesn't make it better.
How I can fix it and get the desired output or there some restrictions? Thanks in advance.
It's the code below, you may easy copypast the 3 code pieces into 3 cells in notebook and run.
Imports:
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import minimize
import torch
from torch.utils.data import Dataset
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.datasets import make_classification
Here's dataset:
class TrainDataset(Dataset):
def __init__(self, X):
self.data = X.astype('float32')
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
return self.data[idx], 0
X, y = make_classification(n_samples=100, n_features=3, n_informative=3, n_redundant=0, n_classes=3, n_clusters_per_class=1, class_sep=2, random_state=16)
train_ratio = 0.7
batch_size = 8
train_loader = torch.utils.data.DataLoader(TrainDataset(X[:int(train_ratio * len(X))]), batch_size=batch_size)
test_loader = torch.utils.data.DataLoader(TrainDataset(X[int(train_ratio * len(X)):]), batch_size=batch_size)
fig = plt.figure(figsize=(10, 10))
ax = fig.add_subplot(projection='3d')
ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=y)
plt.show()
Now the VAE:
class VAEmini(nn.Module):
def __init__(self, latent_d=2):
super().__init__()
self.fc1 = nn.Linear(3, 3)
self.fc21 = nn.Linear(3, latent_d)
self.fc22 = nn.Linear(3, latent_d)
self.fc3 = nn.Linear(latent_d, 3)
self.fc4 = nn.Linear(3, 3)
def encode(self, x):
h1 = F.relu(self.fc1(x))
return self.fc21(h1), self.fc22(h1)
def reparameterize(self, mu, logvar):
std = torch.exp(0.5*logvar)
eps = torch.randn_like(std)
return mu + eps*std
def decode(self, z):
h3 = F.relu(self.fc3(z))
return self.fc4(h3)
def forward(self, x):
mu, logvar = self.encode(x)
z = self.reparameterize(mu, logvar)
return self.decode(z), mu, logvar
def loss_function(recon_x, x, mu, logvar, beta):
BCE = F.mse_loss(recon_x, x, reduction='mean')
KLD = -0.5 * torch.mean(1 + logvar - mu.pow(2) - logvar.exp())
return BCE + beta * KLD
def train(epoch, beta):
model_mini.train()
train_loss = 0
for batch_idx, (data, _) in enumerate(train_loader):
data = data.to(device)
optimizer.zero_grad()
recon_batch, mu, logvar = model_mini(data)
loss = loss_function(recon_batch, data, mu, logvar, beta)
loss.backward()
train_loss += loss.item()
optimizer.step()
if batch_idx % log_interval == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader),
loss.item() / len(data)))
print('====> Epoch: {} Average loss: {:.4f}'.format(
epoch, train_loss / len(train_loader.dataset)))
def test(epoch, beta):
model_mini.eval()
test_loss = 0
with torch.no_grad():
for i, (data, _) in enumerate(test_loader):
data = data.to(device)
recon_batch, mu, logvar = model_mini(data)
test_loss += loss_function(recon_batch, data, mu, logvar, beta).item()
print('====> Test set loss: {:.4f}'.format(test_loss / len(test_loader.dataset)))
return test_loss / len(test_loader.dataset)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_mini = VAEmini(latent_d=2).to(device)
log_interval = 5
epochs = 100
optimizer = optim.Adam(model_mini.parameters(), lr=1e-2)
beta=0.01
for epoch in range(1, epochs + 1):
train(epoch, beta=beta)
test(epoch, beta=beta)
with torch.no_grad():
sample = torch.randn(1000, 2).to(device)
sample = model_mini.decode(sample).cpu()
fig = plt.figure(figsize=(20, 20))
ax = fig.add_subplot(projection='3d')
ax.scatter(sample[:, 0], sample[:, 1], sample[:, 2])
plt.show()
Related
I am trying to build Graph Convolutional Network. I converted my dataframe to PyTorch
required format using below code.
class S_Dataset(Dataset):
def __init__(self, df, transform=None):
self.df = df
self.transform = transform
def __len__(self):
return len(self.df)
def __getitem__(self, idx):
row = self.df.iloc[idx]
x = torch.tensor([row.date.to_pydatetime().timestamp(), row.s1, row.s2, row.s3, row.s4, row.temp ,row.rh, row.Location, row.Node ], dtype=torch.float)
y = torch.tensor([row.Location], dtype=torch.long)
weight1 = torch.tensor([row.neighbor1_distance], dtype=torch.float)
weight2 = torch.tensor([row.neighbor2_distance], dtype=torch.float)
weight3 = torch.tensor([row.neighbor3_distance], dtype=torch.float)
edge_index1 = torch.tensor([[row.Location, row.neighbor1_name]], dtype=torch.long).t()
edge_index2 = torch.tensor([[row.Location, row.neighbor2_name]], dtype=torch.long).t()
edge_index3 = torch.tensor([[row.Location, row.neighbor3_name]], dtype=torch.long).t()
edge_index = torch.cat([edge_index1, edge_index2, edge_index3 ], dim=1)
weight = torch.cat([weight1, weight2, weight3], dim=0)
if self.transform:
x, y, edge_index, weight = self.transform(x, y, edge_index, weight)
return x, y, edge_index, weight
Process_Data = S_Dataset(df)
Next I divided data into train and test set:
train_size = int(len(Process_Data) * 0.8)
test_size = len(Process_Data) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(Process_Data, [train_size, test_size])
# Create dataloaders
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True )
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=True )
I designed a simple model:
import torch
import torch.nn as nn
import torch.optim as optim
from torch_geometric.nn import GCNConv
# Create the model
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = GCNConv(9, 128)
self.conv2 = GCNConv(128, 64)
self.fc1 = nn.Linear(64, 32)
self.fc2 = nn.Linear(32, len(location_to_id))
def forward(self, x, edge_index, weight):
x = self.conv1(x, edge_index, weight)
x = torch.relu(x)
x = self.conv2(x, edge_index, weight)
x = torch.relu(x)
x = x.view(-1, 64)
x = self.fc1(x)
x = torch.relu(x)
x = self.fc2(x)
return x
Finally to train the model:
model = Net()
optimizer = optim.Adam(model.parameters(), lr=0.01)
criterion = nn.CrossEntropyLoss()
for epoch in range(100):
total_loss = 0
for batch in train_loader:
optimizer.zero_grad()
x, y, edge_index, weight = batch
y_pred = model(x, edge_index, weight)
loss = criterion(y_pred, y)
loss.backward()
optimizer.step()
total_loss += loss.item()
print('Epoch: {} Loss: {:.4f}'.format(epoch, total_loss / len(train_loader)))
I am facing following error:
IndexError: The shape of the mask [2, 3] at index 0 does not match the shape of the indexed tensor [32, 3] at index 0
x, y, edge_index, weight = batch
This line is causing error.
How can I resphae my data so I can train my model?
The batch size is set at 32, but there might not be enough samples to fit in the batch size of 32.
I am assuming, this error occurs after the code runs for some time, I would appreciate more context on the problem
A general solution could be decreasing the size of batch to something smaller and trying the code again. Making sure all samples are covered in the epoch.
I have tried to train a GCN model.I defined the custom layer I needed. However, It cause some dimension mismatch when I do some batch training.
the codes are as following :
import math
import numpy as np
import torch
import torch.nn as nn
from torch.nn.parameter import Parameter
from torch.nn.modules.module import Module
import torch.nn.functional as F
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from tqdm import tqdm
# =============================================================================
# model define
# =============================================================================
class GraphConvolution(Module):
"""
Simple GCN layer, similar to https://arxiv.org/abs/1609.02907
"""
def __init__(self, in_features, out_features, bias=True):
super(GraphConvolution, self).__init__()
self.in_features = in_features
self.out_features = out_features
self.weight = Parameter(torch.FloatTensor(in_features, out_features))
if bias:
self.bias = Parameter(torch.FloatTensor(out_features))
else:
self.register_parameter('bias', None)
self.reset_parameters()
def reset_parameters(self):
stdv = 1. / math.sqrt(self.weight.size(1))
self.weight.data.uniform_(-stdv, stdv)
if self.bias is not None:
self.bias.data.uniform_(-stdv, stdv)
def forward(self, input, adj):
support = torch.mm(input, self.weight)
output = torch.spmm(adj, support)
if self.bias is not None:
return output + self.bias
else:
return output
def __repr__(self):
return self.__class__.__name__ + ' (' \
+ str(self.in_features) + ' -> ' \
+ str(self.out_features) + ')'
class GCN(nn.Module):
def __init__(self, nfeat, nhid, nclass):
super(GCN, self).__init__()
self.gc1 = GraphConvolution(nfeat, nhid)
self.gc2 = GraphConvolution(nhid, nclass)
self.linear = nn.Linear(nclass, 1)
# self.dropout = dropout
def forward(self, x, adj):
x = F.relu(self.gc1(x, adj))
# x = F.dropout(x, self.dropout, training=self.training)
x = F.relu(self.gc2(x, adj))
x = self.linear(x)
return x
def train(dataloader, model, loss_fn, optimizer,adj):
size = len(dataloader.dataset)
model.train()
for batch, (X, y) in enumerate(dataloader):
X, y = X.to(device), y.to(device)
# Compute prediction error
pred = model(X,adj)
loss = loss_fn(pred, y)
# Backpropagation
optimizer.zero_grad()
loss.backward()
optimizer.step()
if batch % 100 == 0:
loss, current = loss.item(), batch * len(X)
print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")
def test(dataloader, model, loss_fn,adj):
size = len(dataloader.dataset)
num_batches = len(dataloader)
model.eval()
test_loss, correct = 0, 0
with torch.no_grad():
for X, y in dataloader:
X, y = X.to(device), y.to(device)
pred = model(X,adj)
test_loss += loss_fn(pred, y).item()
# correct += (pred.argmax(1) == y).type(torch.float).sum().item()
test_loss /= num_batches
# correct /= size
# Accuracy: {(100*correct):>0.1f}%,
print(f"Test Error: \n Avg loss: {test_loss:>8f} \n")
when I run the code :
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")
model = GCN(1,1,1).to(device)
print(model)
# model(X).shape
loss_fn = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)
epochs = 10
for t in range(epochs):
print(f"Epoch {t+1}\n-------------------------------")
train(train_dataloader, model, loss_fn, optimizer,Adjacency_matrix)
test(test_dataloader, model, loss_fn,Adjacency_matrix)
print("Done!")
I got the error :
when I looking inside this ,I find the model is working well when I drop the dimension of batch-size. How I need to do to tell the model that this dimension is the batch-size which don't need to compute?
the error you're seeing is due to you trying to matrix multiple a 3d tensor (your input) by your 2D weights.
To get around this you can simply reshape your data, as we only really care about the last dim when doing matmuls:
def forward(self, input, adj):
b_size = input.size(0)
input = input.view(-1, input.shape[-1])
support = torch.mm(input, self.weight)
output = torch.spmm(adj, support)
output = output.view(b_size,-1,output.shape[-1])
if self.bias is not None:
return output + self.bias
else:
return output
Here are my code by using Pysft
class Arguments:
def __init__(self):
# self.cuda = False
self.no_cuda = True
self.seed = 1
self.batch_size = 50
self.test_batch_size = 1000
self.epochs = 10
self.lr = 0.01
self.momentum = 0.5
self.log_interval = 10
hook = sy.TorchHook(torch)
bob = sy.VirtualWorker(hook, id="bob")
alice = sy.VirtualWorker(hook, id="alice")
Here is my LSTM model, in can run successfully by only use pytorch, but it can't run with pysyft
class Model(torch.nn.Module):
def __init__(self):
super(Model, self).__init__()
self.rnn = torch.nn.RNN(input_size=28,
hidden_size=16,
num_layers=2,
batch_first=True,
bidirectional=True)
self.fc = torch.nn.Linear(32, 10)
def forward(self, x):
print(np.shape(x))
x = x.squeeze()
x, _ = self.rnn(x)
x = self.fc(x[:, -1, :])
return x.view(-1, 10)
def train(args, model, device, federated_train_loader, optimizer, epoch):
model.train()
for batch_idx, (data, target) in enumerate(federated_train_loader):
model.send(data.location) # <-- NEW: send the model to the right location
data, target = data.to(device), target.to(device)
# data, target = data.cuda(), target.cuda()
optimizer.zero_grad()
output = model(data.to(device))
loss = F.nll_loss(output, target)
loss.backward()
optimizer.step()
model.get() # <-- NEW: get the model back
if batch_idx % args.log_interval == 0:
loss = loss.get() # <-- NEW: get the loss back
losses.append(loss.item())
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * args.batch_size, len(federated_train_loader) * args.batch_size,
100. * batch_idx / len(federated_train_loader), loss.item()))
When I use Pysyft to run my LSTM model,there is a mistakes.But if I use my model without Pysyft,it an run scuccessfully.I don't know how to resolve it?
import torch
import matplotlib.pyplot as plt
from torchvision import datasets, transforms
import torch.nn.functional as F
import time
import numpy as np
import syft as sy
class Arguments:
def __init__(self):
self.cuda = False
self.no_cuda = True
self.seed = 1
self.batch_size = 50
self.test_batch_size = 1000
self.epochs = 10
self.lr = 0.01
self.momentum = 0.5
self.log_interval = 10
hook = sy.TorchHook(torch) # <-- NEW: hook PyTorch ie add extra functionalities to support Federated Learning
bob = sy.VirtualWorker(hook, id="bob") # <-- NEW: define remote worker bob
alice = sy.VirtualWorker(hook, id="alice") # <-- NEW: and alice
class Model(torch.nn.Module):
def __init__(self):
super(Model, self).__init__()
self.rnn = torch.nn.RNN(input_size=28,
hidden_size=16,
num_layers=2,
batch_first=True,
bidirectional=True)
self.fc = torch.nn.Linear(32, 10)
def forward(self, x):
print(np.shape(x))
x = x.squeeze()
x, _ = self.rnn(x)
x = self.fc(x[:, -1, :])
return x.view(-1, 10)
def train(args, model, device, federated_train_loader, optimizer, epoch):
model.train()
for batch_idx, (data, target) in enumerate(federated_train_loader): # <-- now it is a distributed dataset
model.send(data.location) # <-- NEW: send the model to the right location
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data.to(device))
loss = F.nll_loss(output, target)
loss.backward()
optimizer.step()
model.get() # <-- NEW: get the model back
if batch_idx % args.log_interval == 0:
loss = loss.get() # <-- NEW: get the loss back
losses.append(loss.item())
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * args.batch_size, len(federated_train_loader) * args.batch_size,
100. * batch_idx / len(federated_train_loader), loss.item()))
if __name__ == '__main__':
args = Arguments()
use_cuda = not args.no_cuda and torch.cuda.is_available()
torch.manual_seed(args.seed)
device = torch.device("cuda" if use_cuda else "cpu")
kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
losses = []
federated_train_loader = sy.FederatedDataLoader(
datasets.MNIST('../data', train=True, download=True,
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
]))
.federate((bob, alice)), # <-- NEW: we distribute the dataset across all the workers, it's now a FederatedDataset
batch_size=args.batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
datasets.MNIST('../data', train=False, transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])),
batch_size=args.test_batch_size, shuffle=True, **kwargs)
model = Model().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
t = time.time()
for epoch in range(1, args.epochs + 1):
train(args, model, device, federated_train_loader, optimizer, epoch)
test(args, model, device, test_loader)
plt.plot(range(0,160),losses,marker='o')
plt.xlabel("iterator")
plt.ylabel("loss")
plt.show()
total_time = time.time() - t
print(total_time)
Here are the whole codes
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import syft as sy
hook = sy.TorchHook(torch)
bob = sy.VirtualWorker(hook, id="bob")
alice = sy.VirtualWorker(hook, id="alice")
class Arguments():
def __init__(self):
self.batch_size = 64
self.test_batch_size = 1000
self.epochs = 10
self.lr = 0.01
self.momentum = 0.5
self.no_cuda = False
self.seed = 1
self.log_interval = 10
self.save_model = False
args = Arguments()
use_cuda = not args.no_cuda and torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
federated_train_loader = sy.FederatedDataLoader( # <-- this is now a FederatedDataLoader
datasets.MNIST('../data', train=True, download=True,
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
]))
.federate((bob, alice)),
batch_size=args.batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
datasets.MNIST('../data', train=False, transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])),
batch_size=args.test_batch_size, shuffle=True, **kwargs)
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 20, 5, 1)
self.conv2 = nn.Conv2d(20, 50, 5, 1)
self.fc1 = nn.Linear(4*4*50, 500)
self.fc2 = nn.Linear(500, 10)
def forward(self, x):
x = F.relu(self.conv1(x))
x = F.max_pool2d(x, 2, 2)
x = F.relu(self.conv2(x))
x = F.max_pool2d(x, 2, 2)
x = x.view(-1, 4*4*50)
x = F.relu(self.fc1(x))
x = self.fc2(x)
return F.log_softmax(x, dim=1)
model = Net()
model = model.to(device) #pushing the model into available device.
optimizer = optim.SGD(model.parameters(), lr=0.01)
for epoch in range(1, args.epochs + 1):
# Train the model
model.train()
for batch_idx, (data, target) in enumerate(federated_train_loader): # iterate through each worker's dataset
model.send(data.location) #send the model to the right location ; data.location returns the worker name in which the data is present
data, target = data.to(device), target.to(device) # pushing both the data and target labels onto the available device.
optimizer.zero_grad() # 1) erase previous gradients (if they exist)
output = model(data) # 2) make a prediction
loss = F.nll_loss(output, target) # 3) calculate how much we missed
loss.backward() # 4) figure out which weights caused us to miss
optimizer.step() # 5) change those weights
model.get() # get the model back (with gradients)
if batch_idx % args.log_interval == 0:
loss = loss.get() #get the loss back
print('Epoch: {} [Training: {:.0f}%]\tLoss: {:.6f}'.format(epoch, 100. * batch_idx / len(federated_train_loader), loss.item()))
# Test the model
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
for data, target in test_loader:
data, target = data.to(device), target.to(device)
output = model(data) # Getting a prediction
test_loss += F.nll_loss(output, target, reduction='sum').item() #updating test loss
pred = output.argmax(1, keepdim=True) # get the index of the max log-probability
correct += pred.eq(target.view_as(pred)).sum().item() #correct pred in the current test set.
test_loss /= len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(test_loss, correct, len(test_loader.dataset), 100. * correct / len(test_loader.dataset)))
torch.save(model.state_dict(), "mnist_cnn.pt")
I hav tested the above code in torch 1.x and pysyft 0.2.5,And its working. (but with cnn model)...
just change the dataloader and model here.
I am learning Pytorch and am trying to make a network that can remember previous inputs.
I have tried 2 different input/output structures(see below) but haven't gotten anything to work the way I would like.
input 1:
in:[4,2,7,8]
output [[0,0,4],[0,4,2],[4,2,7],[2,7,8]]
code:
def histroy(num_samples=4,look_back=3):
data=np.random.randint(10,size=(num_samples)).tolist()
lab=[[0]*look_back]
for i in data:
lab.append(lab[-1][1:]+[i])
return data,lab[1:]
input2:
in:[4,2,7,8]
out:[0,4,2,7]
def histroy(num_samples=4):
data=np.random.randint(10,size=(num_samples)).tolist()
lab=[0]
for i in data:
lab.append(i)
return data,lab
I have tried a number of different network structures and training methods but nothing seems to stick.
The only things I think I have right are net.hidden = net.init_hidden()
should go outside of each epoch and loss.backward(retain_graph=True) but that doesnt seem to do anything
Currently, it can learn the last number in the sequence but never seems to learn any of the others
My last attempt:
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim
def histroy(num_samples=4,look_back=3):
data=np.random.randint(10,size=(num_samples)).tolist()
lab=[[0]*look_back]
for i in data:
lab.append(lab[-1][1:]+[i])
return data,lab[1:]
class Net(nn.Module):
def __init__(self, input_dim, hidden_dim, batch_size, output_dim=10, num_layers=1):
super(Net, self).__init__()
self.input_dim = input_dim
self.hidden_dim = hidden_dim
self.batch_size = batch_size
self.num_layers = num_layers
self.memory = nn.RNN(self.input_dim,self.hidden_dim,self.num_layers)
self.linear = nn.Linear(self.hidden_dim, output_dim)
self.first=True
def init_hidden(self):
# This is what we'll initialise our hidden state as
return (torch.zeros(self.num_layers, self.batch_size, self.hidden_dim),
torch.zeros(self.num_layers, self.batch_size, self.hidden_dim))
def forward(self, input):
self.memory_out, self.hidden = self.memory(input.view(len(input), self.batch_size, -1))
y_pred = self.linear(self.memory_out[-1].view(self.batch_size, -1))
return y_pred.view(-1)
if __name__ == '__main__':
data_amount = 10000
batch_size = 1 # default is 32
data_amount-=data_amount%batch_size
number_of_times_on_the_same_data = 250
look_back=5
net=Net(input_dim=1,hidden_dim=25,batch_size=batch_size,output_dim=look_back)
data,labs=histroy(data_amount,look_back)
data = torch.Tensor(data).float()
labs = torch.Tensor(labs).float()
optimizer = optim.Adam(net.parameters())
criterion = torch.nn.MSELoss(size_average=False)
for epoch in range(number_of_times_on_the_same_data): # loop over the dataset multiple times
running_loss = 0.0
data, labs = histroy(data_amount, look_back)
data = torch.Tensor(data).float()
labs = torch.Tensor(labs).float()
net.hidden = net.init_hidden()
print("epoch",epoch)
for i in range(0, data_amount, batch_size):
inputs = data[i:i + batch_size]
labels = labs[i:i + batch_size]
optimizer.zero_grad()
# forward + backward + optimize
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward(retain_graph=True)
optimizer.step()
running_loss += loss.item()
if i >= data_amount-batch_size:
print("loss",loss)
net.hidden = net.init_hidden()
print("Outputs",outputs)
print("Input", data[-1*look_back:])
print("labels",labels)
The problem that your network presents, it's the fact that your input is of shape 1:
for i in range(0, data_amount, batch_size):
inputs = data[i:i + batch_size]
labels = labs[i:i + batch_size]
print(inputs.shape,labels.shape)
>>>torch.Size([1]) torch.Size([1, 5])
>>>torch.Size([1]) torch.Size([1, 5])...
That's the reason your RNN is predicting only your last number, because in this case you're not using your look_back attribute. You have to fix your code in order to have inputs of size [1,5]. Your code should look something like this:
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim
def histroy(num_samples=4,look_back=3):
data=np.random.randint(10,size=(num_samples)).tolist()
lab=[[0]*look_back]
for i in data:
lab.append(lab[-1][1:]+[i])
return lab[:-1],lab[1:]
class Net(nn.Module):
def __init__(self, input_dim, hidden_dim, batch_size, output_dim=10, num_layers=1):
super(Net, self).__init__()
self.input_dim = input_dim
self.hidden_dim = hidden_dim
self.batch_size = batch_size
self.num_layers = num_layers
self.memory = nn.RNN(self.input_dim,self.hidden_dim,self.num_layers)
self.linear = nn.Linear(self.hidden_dim, output_dim)
self.first=True
def init_hidden(self):
# This is what we'll initialise our hidden state as
return (torch.zeros(self.num_layers, self.batch_size, self.hidden_dim),
torch.zeros(self.num_layers, self.batch_size, self.hidden_dim))
def forward(self, input):
self.memory_out, self.hidden = self.memory(input.view(len(input), self.batch_size, -1))
y_pred = self.linear(self.memory_out[-1].view(self.batch_size, -1))
return y_pred.view(-1)
if __name__ == '__main__':
data_amount = 10000
batch_size = 1 # default is 32
data_amount-=data_amount%batch_size
number_of_times_on_the_same_data = 250
look_back=5
net=Net(input_dim=1,hidden_dim=25,batch_size=batch_size,output_dim=look_back)
data,labs=histroy(data_amount,look_back)
data = torch.Tensor(data).float()
labs = torch.Tensor(labs).float()
optimizer = optim.Adam(net.parameters())
criterion = torch.nn.MSELoss(size_average=False)
for epoch in range(number_of_times_on_the_same_data): # loop over the dataset multiple times
running_loss = 0.0
data, labs = histroy(data_amount, look_back)
data = torch.Tensor(data).float()
labs = torch.Tensor(labs).float()
net.hidden = net.init_hidden()
print("epoch",epoch)
for i in range(0, data_amount, batch_size):
inputs = data[i:i + batch_size].view(-1)
labels = labs[i:i + batch_size]
optimizer.zero_grad()
# forward + backward + optimize
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward(retain_graph=True)
optimizer.step()
running_loss += loss.item()
if i >= data_amount-batch_size:
print("loss",loss)
net.hidden = net.init_hidden()
print("Outputs",outputs)
print("Input", data[i:i + batch_size][-1])
print("labels",labels)
Output:
>>>epoch 0
>>>loss tensor(17.7415, grad_fn=<MseLossBackward>)
>>>Outputs tensor([2.0897, 3.1410, 4.7382, 1.0532, 4.2003], grad_fn=<ViewBackward>)
>>>Input tensor([8., 2., 3., 5., 1.])
>>>labels tensor([[2., 3., 5., 1., 0.]])...
I am using MNIST dataset to study MLP(multi layer perceptron) in python. While running the code given below I get the following error message:
UnboundLocalError: local variable 'x' referenced before assignment
Where should I insert "global x" or what should I do? Here is my code:
def homework(train_X, train_y, test_X, test_y):
epoch = 10000
batch_size = 20
learning_rate = 1e-3
input_size = 784
hidden_size = 100
output_size = 10
data_num = train_X.shape[0]
np.random.seed(0)
W1 = np.random.randn(input_size, hidden_size)
b1 = np.zeros(hidden_size)
W2 = np.random.randn(hidden_size, output_size)
b2 = np.zeros(output_size)
for n in range(epoch):
loss_sum = 0
for i in range(0, data_num, batch_size):
x = train_X[i:i+batch_size]
y = train_y[i:i+batch_size]
fwd = forward(x)
loss_sum += cross_entropy(y, fwd['prob'])
grad = network.gradient(x, y)
for key in ('W1', 'b1', 'W2', 'b2'):
network.params[key] -= learning_rate * grad[key]
loss = network.loss(x, y)
train_loss_list.append(loss)
if np.mod(n, 1000) == 0:
pred_y = np.argmax(forward(test_X)['prob'], axis=1)
accuracy = f1_score(test_y, pred_y, average='macro')
print("epoch: %5d, loss_sum: %.5f, accuracy: %.5f" % (n, loss_sum, accuracy))
pred_y = np.argmax(forward(test_X)['prob'], axis=1)
return pred_y
def softmax(x):
x -= np.max(x, axis=1).reshape((-1, 1))
return np.exp(x) / np.sum(np.exp(x), axis=1).reshape((-1, 1))
def cross_entropy(y, output):
batch_size = y.shape[0]
return -np.sum(np.log(output[np.arange(batch_size), y])) / batch_size
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def forward(x):
fwd = {}
fwd['h1'] = sigmoid(np.dot(x, W1) + b1)
fwd['prob'] = softmax(np.dot(fwd['h1'], W2) + b2)
return fwd
from sklearn.utils import shuffle
from sklearn.metrics import f1_score
from sklearn.datasets import fetch_mldata
from sklearn.model_selection import train_test_split
import numpy as np
def load_mnist():
mnist = fetch_mldata('MNIST original')
mnist_X, mnist_y = shuffle(mnist.data.astype('float32'),
mnist.target.astype('int32'), random_state=42)
mnist_X = mnist_X / 255.0
return train_test_split(mnist_X, mnist_y,
test_size=0.2,
random_state=42)
def validate_homework():
train_X, test_X, train_y, test_y = load_mnist()
train_X_mini = train_X[:100]
train_y_mini = train_y[:100]
test_X_mini = test_X[:100]
test_y_mini = test_y[:100]
pred_y = homework(train_X_mini, train_y_mini, test_X_mini, test_y_mini)
print(f1_score(test_y_mini, pred_y, average='macro'))
def score_homework():
train_X, test_X, train_y, test_y = load_mnist()
pred_y = homework(train_X, train_y, test_X, test_y)
print(f1_score(test_y, pred_y, average='macro'))
validate_homework()
# score_homework()