I'm trying to use pytorch-lightning for token-classification model. I have already built a model for token classification without lightning. I'm confused on what changes should be done with existing code to integrate pytorch-lightning.
Following is my pytorch code:
model = BertForTokenClassification.from_pretrained(
'bert-large-cased',
num_labels=len(tag2idx),
output_attentions = False,
output_hidden_states = False
)
for _ in trange(epochs, desc="Epoch"):
# ========================================
# Training
# ========================================
model.train()
total_loss = 0
for step, batch in enumerate(train_dataloader):
batch = tuple(t.to(device) for t in batch)
b_input_ids, b_input_mask, b_labels = batch
model.zero_grad()
outputs = model(b_input_ids, token_type_ids=None,
attention_mask=b_input_mask, labels=b_labels)
loss = outputs[0]
loss.backward()
total_loss += loss.item()
torch.nn.utils.clip_grad_norm_(parameters=model.parameters(), max_norm=max_grad_norm)
optimizer.step()
scheduler.step()
avg_train_loss = total_loss / len(train_dataloader)
loss_values.append(avg_train_loss)
# ========================================
# Validation
# ========================================
model.eval()
eval_loss, eval_accuracy = 0, 0
nb_eval_steps, nb_eval_examples = 0, 0
predictions , true_labels = [], []
for batch in valid_dataloader:
batch = tuple(t.to(device) for t in batch)
b_input_ids, b_input_mask, b_labels = batch
with torch.no_grad():
outputs = model(b_input_ids, token_type_ids=None,
attention_mask=b_input_mask, labels=b_labels)
logits = outputs[1].detach().cpu().numpy()
label_ids = b_labels.to('cpu').numpy()
eval_loss += outputs[0].mean().item()
predictions.extend([list(p) for p in np.argmax(logits, axis=2)])
true_labels.extend(label_ids)
eval_loss = eval_loss / len(valid_dataloader)
validation_loss_values.append(eval_loss)
pred_tags = [tag_values[p_i] for p, l in zip(predictions, true_labels)
for p_i, l_i in zip(p, l) if tag_values[l_i] != "PAD"]
valid_tags = [tag_values[l_i] for l in true_labels
for l_i in l if tag_values[l_i] != "PAD"]
f1 = f1_score([valid_tags], [pred_tags])
Following is the code which I tried for pytorch lightning.
class LightningModule(pl.LightningModule):
def __init__(self, lr, lr_backbone, weight_decay, batch_size):
super().__init__()
self.model = BertForTokenClassification.from_pretrained("bert-large-cased",
num_labels=len(tag2idx),
output_attentions = False,
output_hidden_states = False)
self.lr = lr
self.lr_backbone = lr_backbone
self.weight_decay = weight_decay
self.batch_size = batch_size
def forward(self, input_ids, attention_mask, labels):
outputs = self.model(
input_ids, token_type_ids=None, attention_mask=attention_mask, labels=labels
)
loss = outputs[0]
logits = outputs[1]
return loss, logits
def training_step(self, batch, batch_idx):
b_input_ids, b_input_mask, b_labels = batch
outputs = self.model(b_input_ids, token_type_ids=None,
attention_mask=b_input_mask, labels=b_labels)
loss = outputs[0]
self.log("train_loss", loss)
return loss
def validation_step(self, batch, batch_idx):
b_input_ids, b_input_mask, b_labels = batch
outputs = self.model(b_input_ids, token_type_ids=None,
attention_mask=b_input_mask, labels=b_labels)
eval_loss = outputs[0]
self.log("val_loss", eval_loss)
return eval_loss
def validation_end(self, outputs):
eval_loss = np.mean([x["val_loss"] for x in outputs])
self.log("val_loss", eval_loss)
pred_tags = [tag_values[p_i] for p, l in zip(self.predictions, self.true_labels)
for p_i, l_i in zip(p, l) if tag_values[l_i] != "PAD"]
valid_tags = [tag_values[l_i] for l in self.true_labels
for l_i in l if tag_values[l_i] != "PAD"]
f1 = f1_score([valid_tags], [pred_tags])
self.log("val_f1", f1)
def configure_optimizers(self):
# optimizer = torch.optim.NAdam(optimizer_grouped_parameters,lr=4e-6,eps=1e-8)
# scheduler = scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=0, num_training_steps=total_steps )
return torch.optim.NAdam(optimizer_grouped_parameters,lr=4e-6,eps=1e-8)
def train_dataloader(self):
return train_dataloader # return your dataloader
def val_dataloader(self):
return valid_dataloader # return your validation dataloader
model = LightningModule(lr=1e-6, lr_backbone=1e-5, weight_decay=1e-4, batch_size=32)
trainer = pl.Trainer(accelerator='gpu', gradient_clip_val=0.1, max_epochs=epochs, auto_scale_batch_size=None, default_root_dir="lightning_output/", enable_checkpointing=False)
trainer.fit(model)
But, when I run inference, I get the following error.
TypeError: forward() missing 2 required positional arguments: 'attention_mask' and 'labels'
Related
Trying to train a basic model for this competition. This is a multi-class problem where the values are from 1 to 5 (with 0.5 size steps). The problem is that the training error is very bad and so is the inference.
Is my model correct for this problem? I didn't use warmup at all and I ran it only for a few epochs but I don't think it will make much difference.
Also, here's a result of an inference:
(0, tensor([[0.0024, 0.4513, 1.3045, 0.0350, 0.4182, 0.6813]]))
loss is 0?
predictions are too low (we should start from 1)
Clearly something is wrong with the model - What is it?
Here's my code (which is based on this article):
class LangKnolModel(pl.LightningModule):
def __init__(self, n_classes: int, n_training_steps=None, n_warmup_steps=None):
super().__init__()
self.bert = BertModel.from_pretrained(BERT_MODEL_NAME, return_dict=True)
self.classifier = nn.Linear(self.bert.config.hidden_size, 6)
self.n_training_steps = n_training_steps
self.max_epochs=8,
self.n_warmup_steps = n_warmup_steps
self.criterion = nn.MSELoss()
def forward(self, input_ids, attention_mask, labels=None):
output = self.bert(input_ids, attention_mask=attention_mask)
output = self.classifier(output.pooler_output)
# output = torch.sigmoid(output)
loss = 0
if labels is not None:
loss = self.criterion(output, labels)
return loss, output
def training_step(self, batch, batch_idx):
input_ids = batch["input_ids"]
attention_mask = batch["attention_mask"]
labels = batch["labels"]
loss, outputs = self(input_ids, attention_mask, labels)
self.log("train_loss", loss, prog_bar=True, logger=True)
return {"loss": loss, "predictions": outputs, "labels": labels}
def validation_step(self, batch, batch_idx):
input_ids = batch["input_ids"]
attention_mask = batch["attention_mask"]
labels = batch["labels"]
loss, outputs = self(input_ids, attention_mask, labels)
return loss
def test_step(self, batch, batch_idx):
input_ids = batch["input_ids"]
attention_mask = batch["attention_mask"]
labels = batch["labels"]
loss, outputs = self(input_ids, attention_mask, labels)
self.log("test_loss", loss, prog_bar=True, logger=True)
return loss
def configure_optimizers(self):
optimizer = AdamW(self.parameters(), lr=2e-5)
scheduler = get_linear_schedule_with_warmup(
optimizer,
num_warmup_steps=self.n_warmup_steps,
num_training_steps=self.n_training_steps
)
return dict(
optimizer=optimizer,
lr_scheduler=dict(
scheduler=scheduler,
interval='step'
)
)
training
model = LangKnolModel(
n_classes=len(LABELS_COLS),
n_warmup_steps=1,
n_training_steps=16,
)
trainer = pl.Trainer(
gpus=1,
max_epochs=5
)
trainer.fit(model, data_module)
I am training a multi-label text classifier using PyTorch with Roberta. However after 2nd epoch ram fills and kernel crashes I checked and ram is not freed after every epoch. I have 64GB RAM, 8 CPU cores
What can be the problem?
Here is the my PyTorch implementation:
class ReaderTextDataset(Dataset):
def __init__(self,
data: pd.DataFrame,
tokenizer: RobertaTokenizer,
max_token_len: int = 512):
self.tokenizer = tokenizer
self.data = data
self.max_token_len = max_token_len
def __len__(self):
return len(self.data)
def __getitem__(self, index: int):
data_row = self.data.iloc[index]
readerText = data_row.readerText
labels = data_row[LABEL_COLUMNS]
encoding = self.tokenizer.encode_plus(
readerText,
add_special_tokens=True,
max_length=self.max_token_len,
return_token_type_ids=False,
padding="max_length",
truncation=True,
return_attention_mask=True,
return_tensors='pt',
)
return dict(readerText=readerText,
input_ids=encoding["input_ids"].flatten(),
attention_mask=encoding["attention_mask"].flatten(),
labels=torch.FloatTensor(labels))
train_dataset = ReaderTextDataset(train_df, tokenizer, max_token_len=512)
roberta_model = RobertaModel.from_pretrained(BERT_MODEL_NAME, return_dict=True)
NUM_WORKERS = 6
class ReaderTextDataModule(pl.LightningDataModule):
def __init__(self,
train_df,
test_df,
tokenizer,
batch_size=8,
max_token_len=512):
super().__init__()
self.batch_size = batch_size
self.train_df = train_df
self.test_df = test_df
self.tokenizer = tokenizer
self.max_token_len = max_token_len
def setup(self, stage=None):
self.train_dataset = ReaderTextDataset(self.train_df, self.tokenizer,
self.max_token_len)
self.test_dataset = ReaderTextDataset(self.test_df, self.tokenizer,
self.max_token_len)
def train_dataloader(self):
return DataLoader(self.train_dataset,
batch_size=self.batch_size,
shuffle=True,
num_workers=NUM_WORKERS)
def val_dataloader(self):
return DataLoader(self.test_dataset,
batch_size=self.batch_size,
num_workers=NUM_WORKERS)
def test_dataloader(self):
return DataLoader(self.test_dataset,
batch_size=self.batch_size,
num_workers=NUM_WORKERS)
class ReaderTextTagger(pl.LightningModule):
def __init__(self,
n_classes: int,
n_training_steps=None,
n_warmup_steps=None):
super().__init__()
self.bert = RobertaModel.from_pretrained(BERT_MODEL_NAME,
return_dict=True)
self.classifier = nn.Linear(self.bert.config.hidden_size, n_classes)
self.n_training_steps = n_training_steps
self.n_warmup_steps = n_warmup_steps
self.criterion = nn.BCELoss()
def forward(self, input_ids, attention_mask, labels=None):
output = self.bert(input_ids, attention_mask=attention_mask)
output = self.classifier(output.pooler_output)
output = torch.sigmoid(output)
loss = 0
if labels is not None:
loss = self.criterion(output, labels)
return loss, output
def training_step(self, batch, batch_idx):
input_ids = batch["input_ids"]
attention_mask = batch["attention_mask"]
labels = batch["labels"]
loss, outputs = self(input_ids, attention_mask, labels)
self.log("train_loss", loss, prog_bar=True, logger=True)
return {"loss": loss, "predictions": outputs, "labels": labels}
def validation_step(self, batch, batch_idx):
input_ids = batch["input_ids"]
attention_mask = batch["attention_mask"]
labels = batch["labels"]
loss, outputs = self(input_ids, attention_mask, labels)
self.log("val_loss", loss, prog_bar=True, logger=True)
return loss
def test_step(self, batch, batch_idx):
input_ids = batch["input_ids"]
attention_mask = batch["attention_mask"]
labels = batch["labels"]
loss, outputs = self(input_ids, attention_mask, labels)
self.log("test_loss", loss, prog_bar=True, logger=True)
return loss
def training_epoch_end(self, outputs):
labels = []
predictions = []
for output in outputs:
for out_labels in output["labels"].detach().cpu():
labels.append(out_labels)
for out_predictions in output["predictions"].detach().cpu():
predictions.append(out_predictions)
labels = torch.stack(labels).int()
predictions = torch.stack(predictions)
for i, name in enumerate(LABEL_COLUMNS):
class_roc_auc = auroc(predictions[:, i], labels[:, i])
self.logger.experiment.add_scalar(f"{name}_roc_auc/Train",
class_roc_auc,
self.current_epoch)
def configure_optimizers(self):
optimizer = AdamW(self.parameters(), lr=1e-5)
scheduler = get_linear_schedule_with_warmup(
optimizer,
num_warmup_steps=self.n_warmup_steps,
num_training_steps=self.n_training_steps)
return dict(optimizer=optimizer,
lr_scheduler=dict(scheduler=scheduler, interval='step'))
enter code here
You return output, which stores in the GPU, in every train_step. Try to move it to cpu if you really need to store output by
return {"loss": loss, "predictions": outputs.cpu(), "labels": labels}
I am very new to PyTorch and Python in general, and I am now struggling to get the encoded features from my pre-trained LSTM autoencoder which can be seen below:
import torch
import torch.nn as nn
# Bulding an LSTM autoencoder
class Encoder(nn.Module):
def __init__(self, seq_len, n_features, embedding_dim=32):
super(Encoder, self).__init__()
self.seq_len, self.n_features = seq_len, n_features
self.embedding_dim, self.hidden_dim1, self.hidden_dim2 = embedding_dim, 4 * embedding_dim, 2* embedding_dim
self.rnn1 = nn.LSTM(
input_size=n_features,
hidden_size=self.hidden_dim1, #128
num_layers=1,
batch_first=True
)
self.rnn2 = nn.LSTM(
input_size=self.hidden_dim1,
hidden_size=self.hidden_dim2, #64
num_layers=1,
batch_first=True
)
self.rnn3 = nn.LSTM(
input_size=self.hidden_dim2,
hidden_size=embedding_dim, #32
num_layers=1,
batch_first=True
)
def forward(self, x):
x = x.reshape((1, self.seq_len, self.n_features))
x, (_, _) = self.rnn1(x)
x, (_, _) = self.rnn2(x)
x, (hidden_n, _) = self.rnn3(x)
return hidden_n.reshape((self.n_features, self.embedding_dim))
class Decoder(nn.Module):
def __init__(self, seq_len, input_dim=32, n_features=1):
super(Decoder, self).__init__()
self.seq_len, self.input_dim = seq_len, input_dim
self.hidden_dim2, self.hidden_dim1, self.n_features = 4 * input_dim,2 * input_dim, n_features
self.rnn1 = nn.LSTM(
input_size=input_dim,
hidden_size=input_dim,
num_layers=1,
batch_first=True
)
self.rnn2 = nn.LSTM(
input_size=input_dim,
hidden_size=self.hidden_dim1,
num_layers=1,
batch_first=True
)
self.rnn3 = nn.LSTM(
input_size=self.hidden_dim1,
hidden_size=self.hidden_dim2,
num_layers=1,
batch_first=True
)
self.output_layer = nn.Linear(self.hidden_dim2, n_features)
def forward(self, x):
x = x.repeat(self.seq_len, self.n_features)
x = x.reshape((self.n_features, self.seq_len, self.input_dim))
x, (hidden_n, cell_n) = self.rnn1(x)
x, (hidden_n, cell_n) = self.rnn2(x)
x, (hidden_n, cell_n) = self.rnn3(x)
x = x.reshape((self.seq_len, self.hidden_dim2))
return self.output_layer(x)
class RAE(nn.Module):
def __init__(self,seq_len, n_features, embedding_dim=32):
super(RAE, self).__init__()
self.seq_len, self.n_features = seq_len, n_features
self.embedding_dim = embedding_dim
self.encoder = Encoder (seq_len, n_features, embedding_dim).to(device)
self.decoder = Decoder (seq_len, embedding_dim, n_features).to(device)
def forward(self,x):
x = self.encoder(x)
x = self.decoder(x)
return x
### TRAINING
def train_model(model,train_dataset,val_dataset, n_epochs):
optimizer = torch.optim.Adam(model.parameters(), lr = 1e-3)
criterion = nn.MSELoss(reduction='mean').to(device) # nn.L1Loss sum
history = dict(train = [], val = [])
for epoch in range(1, n_epochs + 1):
model = model.train()
train_losses = []
for seq_true in train_dataset:
optimizer.zero_grad()
seq_true = seq_true.to(device)
seq_pred = model(seq_true)
loss = criterion(seq_pred, seq_true)
loss.backward()
optimizer.step()
train_losses.append(loss.item())
val_losses = []
model = model.eval()
with torch.no_grad():
for seq_true in val_dataset:
seq_true = seq_true.to(device)
seq_pred =model(seq_true)
loss = criterion(seq_pred, seq_true)
val_losses.append(loss.item())
#add accuracy
train_loss = np.mean(train_losses)
val_loss = np.mean(val_losses)
history['train'].append(train_loss)
history['val'].append(val_loss)
print(f'Epoch {epoch}: train loss {train_loss} val loss {val_loss}')
return model.eval(),history
Once I trained my model I followed the advice given by ptrblck here and implemented it as follows:
activation = {}
def get_activation(name):
def hook(model, input, output):
activation[name] = output.detach()
return hook
model.encoder.register_forward_hook(get_activation('encoder'))
x = test_dataset_SR[1] # instead of using his random example I used one example from my training set
x = x.cuda()
output = model(x)
print(activation['encoder'])
but this gives me this error:
2 def get_activation(name):
3 def hook(model, input, output):
----> 4 activation[name] = output.detach()
5 return hook
AttributeError: 'tuple' object has no attribute 'detach'
Can you please help me solve this issue? I want to take these encoded features, store them and use them as input to another network. I know I could probably train the encoder separately(not sure), but I will need both encoder and decoder so I thought hooks will be my salvation.
For the following training program, training and validation are all ok.
Once reach to Test method, I have CUDA out of memory. What should I change so that I have enough memory to test as well.
import torch
from torchvision import datasets, transforms
import torch.nn.functional as f
class CnnLstm(nn.Module):
def __init__(self):
super(CnnLstm, self).__init__()
self.cnn = CNN()
self.rnn = nn.LSTM(input_size=180000, hidden_size=256, num_layers=2, batch_first=True)#stacked LSTM with 2 layers
#print(num_classes)
self.linear = nn.Linear(256, num_classes)
#print('after num_classes')
def forward(self, x):
#print(x.shape)
batch_size, time_steps, channels, height, width = x.size()
c_in = x.view(batch_size * time_steps, channels, height, width)
_, c_out = self.cnn(c_in)
r_in = c_out.view(batch_size, time_steps, -1)
r_out, (_, _) = self.rnn(r_in)
r_out2 = self.linear(r_out[:, -1, :])
return f.log_softmax(r_out2, dim=1)
class TrainCNNLSTM:
def __init__(self):
self.seed = 1
self.batch_size = 8
self.validate_batch_size = 8
self.test_batch_size = 1
self.epoch = 20
self.learning_rate = 0.01
self.step = 100
self.train_loader = None
self.validate_loader = None
self.test_loader = None
#print('before')
self.model = CnnLstm().to(device)
#print('after')
self.criterion = nn.CrossEntropyLoss()
def load_data(self):
data_loader = DataLoader()
self.train_loader = data_loader.get_train_data(self.batch_size)
self.validate_loader = data_loader.get_validate_data(self.validate_batch_size)
self.test_loader = data_loader.get_test_data(self.test_batch_size)
def train(self):
optimizer = torch.optim.SGD(self.model.parameters(), lr=self.learning_rate, momentum=0.9)
scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer, base_lr=self.learning_rate/100.0, max_lr=self.learning_rate, step_size_up=13)
#optimizer = torch.optim.SGD(self.model.parameters(), lr=self.learning_rate)
for epoch in range(self.epoch):
t_losses=[]
for iteration, (data, target) in enumerate(self.train_loader):
data = np.expand_dims(data, axis=1)
data = torch.FloatTensor(data)
data, target = data.cuda(), target.cuda()
data, target = Variable(data), Variable(target)
optimizer.zero_grad()
output = self.model(data)
loss = self.criterion(output, target)
#loss = f.nll_loss(output, target)
t_losses.append(loss)
loss.backward()
optimizer.step()
scheduler.step()
if iteration % self.step == 0:
print('Epoch: {} | train loss: {:.4f}'.format(epoch, loss.item()))
avgd_trainloss = sum(t_losses)/len(t_losses)
self.validate(epoch, avgd_trainloss)
def validate(self, epoch, avg_tloss):
v_losses=[]
with torch.no_grad():
for iteration, (data, target) in enumerate(self.validate_loader):
data = np.expand_dims(data, axis=1)
data = torch.FloatTensor(data)
data, target = data.cuda(), target.cuda()
data, target = Variable(data), Variable(target)
output = self.model(data)
loss = self.criterion(output, target)
#loss = f.nll_loss(output, target)
v_losses.append(loss)
avgd_validloss = sum(v_losses)/len(v_losses)
print('Epoch: {} | train loss: {:.4f} | validate loss: {:.4f}'.format(epoch, avg_tloss, avgd_validloss))
def test(self):
test_loss = []
correct = 0
for data, target in self.test_loader:
data = np.expand_dims(data, axis=1)
data = torch.FloatTensor(data)
data, target = data.cuda(), target.cuda()
data, target = Variable(data, volatile=True), Variable(target)
output = self.model(data)
loss = self.criterion(output, target)
#f.nll_loss(output, target, size_average=False).item() # sum up batch loss
test_loss.append(loss)
pred = torch.max(output, 1)[1].data.squeeze()
correct += pred.eq(target.data.view_as(pred)).long().cpu().sum()
test_loss = sum(test_loss)/len(test_loss)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(test_loss, correct, len(self.test_loader.dataset),
100. * correct / len(self.test_loader.dataset)))
train = TrainCNNLSTM()
train.load_data()
train.train()
train.test()
You should call .item() on your loss when appending it to the list of losses:
loss = self.criterion(output, target)
test_loss.append(loss.item())
This avoids accumulating tensors in a list which are still attached to the computational graph. I would say the same for your accuracy.
Here are my code by using Pysft
class Arguments:
def __init__(self):
# self.cuda = False
self.no_cuda = True
self.seed = 1
self.batch_size = 50
self.test_batch_size = 1000
self.epochs = 10
self.lr = 0.01
self.momentum = 0.5
self.log_interval = 10
hook = sy.TorchHook(torch)
bob = sy.VirtualWorker(hook, id="bob")
alice = sy.VirtualWorker(hook, id="alice")
Here is my LSTM model, in can run successfully by only use pytorch, but it can't run with pysyft
class Model(torch.nn.Module):
def __init__(self):
super(Model, self).__init__()
self.rnn = torch.nn.RNN(input_size=28,
hidden_size=16,
num_layers=2,
batch_first=True,
bidirectional=True)
self.fc = torch.nn.Linear(32, 10)
def forward(self, x):
print(np.shape(x))
x = x.squeeze()
x, _ = self.rnn(x)
x = self.fc(x[:, -1, :])
return x.view(-1, 10)
def train(args, model, device, federated_train_loader, optimizer, epoch):
model.train()
for batch_idx, (data, target) in enumerate(federated_train_loader):
model.send(data.location) # <-- NEW: send the model to the right location
data, target = data.to(device), target.to(device)
# data, target = data.cuda(), target.cuda()
optimizer.zero_grad()
output = model(data.to(device))
loss = F.nll_loss(output, target)
loss.backward()
optimizer.step()
model.get() # <-- NEW: get the model back
if batch_idx % args.log_interval == 0:
loss = loss.get() # <-- NEW: get the loss back
losses.append(loss.item())
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * args.batch_size, len(federated_train_loader) * args.batch_size,
100. * batch_idx / len(federated_train_loader), loss.item()))
When I use Pysyft to run my LSTM model,there is a mistakes.But if I use my model without Pysyft,it an run scuccessfully.I don't know how to resolve it?
import torch
import matplotlib.pyplot as plt
from torchvision import datasets, transforms
import torch.nn.functional as F
import time
import numpy as np
import syft as sy
class Arguments:
def __init__(self):
self.cuda = False
self.no_cuda = True
self.seed = 1
self.batch_size = 50
self.test_batch_size = 1000
self.epochs = 10
self.lr = 0.01
self.momentum = 0.5
self.log_interval = 10
hook = sy.TorchHook(torch) # <-- NEW: hook PyTorch ie add extra functionalities to support Federated Learning
bob = sy.VirtualWorker(hook, id="bob") # <-- NEW: define remote worker bob
alice = sy.VirtualWorker(hook, id="alice") # <-- NEW: and alice
class Model(torch.nn.Module):
def __init__(self):
super(Model, self).__init__()
self.rnn = torch.nn.RNN(input_size=28,
hidden_size=16,
num_layers=2,
batch_first=True,
bidirectional=True)
self.fc = torch.nn.Linear(32, 10)
def forward(self, x):
print(np.shape(x))
x = x.squeeze()
x, _ = self.rnn(x)
x = self.fc(x[:, -1, :])
return x.view(-1, 10)
def train(args, model, device, federated_train_loader, optimizer, epoch):
model.train()
for batch_idx, (data, target) in enumerate(federated_train_loader): # <-- now it is a distributed dataset
model.send(data.location) # <-- NEW: send the model to the right location
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data.to(device))
loss = F.nll_loss(output, target)
loss.backward()
optimizer.step()
model.get() # <-- NEW: get the model back
if batch_idx % args.log_interval == 0:
loss = loss.get() # <-- NEW: get the loss back
losses.append(loss.item())
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * args.batch_size, len(federated_train_loader) * args.batch_size,
100. * batch_idx / len(federated_train_loader), loss.item()))
if __name__ == '__main__':
args = Arguments()
use_cuda = not args.no_cuda and torch.cuda.is_available()
torch.manual_seed(args.seed)
device = torch.device("cuda" if use_cuda else "cpu")
kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
losses = []
federated_train_loader = sy.FederatedDataLoader(
datasets.MNIST('../data', train=True, download=True,
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
]))
.federate((bob, alice)), # <-- NEW: we distribute the dataset across all the workers, it's now a FederatedDataset
batch_size=args.batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
datasets.MNIST('../data', train=False, transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])),
batch_size=args.test_batch_size, shuffle=True, **kwargs)
model = Model().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
t = time.time()
for epoch in range(1, args.epochs + 1):
train(args, model, device, federated_train_loader, optimizer, epoch)
test(args, model, device, test_loader)
plt.plot(range(0,160),losses,marker='o')
plt.xlabel("iterator")
plt.ylabel("loss")
plt.show()
total_time = time.time() - t
print(total_time)
Here are the whole codes
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import syft as sy
hook = sy.TorchHook(torch)
bob = sy.VirtualWorker(hook, id="bob")
alice = sy.VirtualWorker(hook, id="alice")
class Arguments():
def __init__(self):
self.batch_size = 64
self.test_batch_size = 1000
self.epochs = 10
self.lr = 0.01
self.momentum = 0.5
self.no_cuda = False
self.seed = 1
self.log_interval = 10
self.save_model = False
args = Arguments()
use_cuda = not args.no_cuda and torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
federated_train_loader = sy.FederatedDataLoader( # <-- this is now a FederatedDataLoader
datasets.MNIST('../data', train=True, download=True,
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
]))
.federate((bob, alice)),
batch_size=args.batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
datasets.MNIST('../data', train=False, transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])),
batch_size=args.test_batch_size, shuffle=True, **kwargs)
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 20, 5, 1)
self.conv2 = nn.Conv2d(20, 50, 5, 1)
self.fc1 = nn.Linear(4*4*50, 500)
self.fc2 = nn.Linear(500, 10)
def forward(self, x):
x = F.relu(self.conv1(x))
x = F.max_pool2d(x, 2, 2)
x = F.relu(self.conv2(x))
x = F.max_pool2d(x, 2, 2)
x = x.view(-1, 4*4*50)
x = F.relu(self.fc1(x))
x = self.fc2(x)
return F.log_softmax(x, dim=1)
model = Net()
model = model.to(device) #pushing the model into available device.
optimizer = optim.SGD(model.parameters(), lr=0.01)
for epoch in range(1, args.epochs + 1):
# Train the model
model.train()
for batch_idx, (data, target) in enumerate(federated_train_loader): # iterate through each worker's dataset
model.send(data.location) #send the model to the right location ; data.location returns the worker name in which the data is present
data, target = data.to(device), target.to(device) # pushing both the data and target labels onto the available device.
optimizer.zero_grad() # 1) erase previous gradients (if they exist)
output = model(data) # 2) make a prediction
loss = F.nll_loss(output, target) # 3) calculate how much we missed
loss.backward() # 4) figure out which weights caused us to miss
optimizer.step() # 5) change those weights
model.get() # get the model back (with gradients)
if batch_idx % args.log_interval == 0:
loss = loss.get() #get the loss back
print('Epoch: {} [Training: {:.0f}%]\tLoss: {:.6f}'.format(epoch, 100. * batch_idx / len(federated_train_loader), loss.item()))
# Test the model
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
for data, target in test_loader:
data, target = data.to(device), target.to(device)
output = model(data) # Getting a prediction
test_loss += F.nll_loss(output, target, reduction='sum').item() #updating test loss
pred = output.argmax(1, keepdim=True) # get the index of the max log-probability
correct += pred.eq(target.view_as(pred)).sum().item() #correct pred in the current test set.
test_loss /= len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(test_loss, correct, len(test_loader.dataset), 100. * correct / len(test_loader.dataset)))
torch.save(model.state_dict(), "mnist_cnn.pt")
I hav tested the above code in torch 1.x and pysyft 0.2.5,And its working. (but with cnn model)...
just change the dataloader and model here.