Finetuning (German) Bert. Is it underfitting?

Finetuning (German) Bert. Is it underfitting? - pytorch

With pytorch-lightning and transformers, I finetuned a Bert model on german service tickets. The dataset has the following size:
FULL Dataset: (1220, 2)
TRAIN Dataset: (854, 2)
VAL Dataset: (366, 2)
Every ticket can be in exactly 1 out of 10 categories. This is why my model is initilized in def init like:
#changing the configuration to X lables instead of 2
self.bert = transformers.BertModel.from_pretrained(MODEL_NAME)
self.drop = th.nn.Dropout(p=0.1)
self.out = th.nn.Linear(self.bert.config.hidden_size, NUM_LABELS)
self.softmax = th.nn.Softmax(dim=1)
self.loss = th.nn.CrossEntropyLoss(reduction="none")
This produceds a probability distributuion over the 10 classes for each sample. As the forward function is:
def forward(self, input_ids, mask):
_, pooled_output = self.bert(
input_ids=input_ids,
attention_mask=mask
)
output= self.drop(pooled_output)
output = self.out(output)
return self.softmax(output)
As a loss function torch.nn.CrossEntropyLoss is defined and within the training_step called. With a Batch_Size of 16, logits.shape = [16,10] and batch['targets'].shape = [16] and batch['targets'] = [1,5,2,4,8,6,9,0,0,1,2,7,7,7,5,3]. Is CrossEntropyLoss the right loss function? And is the optimizer even working?
def training_step(self, batch, batch_idx):
logits = self.forward(batch['input_ids'], batch['mask']).squeeze()
loss = self.loss(logits, batch['targets']).mean()
return {'loss': loss, 'log': {'train_loss': loss}}
Same goes for validation_step:
def validation_step(self,batch, batch_idx):
logits = self.forward(batch['input_ids'], batch['mask']).squeeze()
acc = (logits.argmax(-1) == batch['targets']).float()
loss = self.loss(logits, batch['targets'])
return {'loss': loss, 'acc': acc}
In the end, the model produces the same probabilities no matter what input sequence it gets. Is this underfitting?
A example is:
model.eval()
text = "Warum kann mein Kollege in SAP keine Transaktionen mehr ausführen?"
input = tokenizer(
text,
None,
add_special_tokens=True,
max_length=200,
pad_to_max_length=True,
return_token_type_ids=True,
truncation=True,
padding='max_length',
return_tensors="pt"
)
input = input.to(device)
out = model(input_ids=input['input_ids'], mask=input['attention_mask'])
text, out
--> ('Warum kann mein Kollege in SAP keine Transaktionen mehr ausführen?',
tensor([[2.9374e-03, 3.1926e-03, 8.7949e-03, 3.0573e-01, 2.6428e-04, 5.2946e-02,
2.4758e-01, 6.2161e-03, 3.6930e-01, 3.0384e-03]], device='cuda:0',
grad_fn=<SoftmaxBackward>))
And another example is:
('Auf meinem Telefon erscheinen keine Nummern mehr.',
tensor([[2.9374e-03, 3.1926e-03, 8.7949e-03, 3.0573e-01, 2.6428e-04, 5.2946e-02,
2.4758e-01, 6.2161e-03, 3.6930e-01, 3.0384e-03]], device='cuda:0',
grad_fn=<SoftmaxBackward>))
It is german, but as you can see, the predictions match exactly. Which is a shame :D And my problem.

Related

Find Training/Validation Accuracy & Loss of Faster-RCNN PyTorch model

I am trying to find the training/validation accuracy and loss of my model for each epoch as I train it to find the best epoch to use from now on. I appreciate that there is lots of information on this now but this topic is very new to me, and I find it very difficult to find the right answer for my situation.
I assume that I need to add in one or two bits to the train_one_epoch() and evaluate() functions in order to do this?
My model setup is:
model = torchvision.models.detection.fasterrcnn_resnet50_fpn_v2(weights=models.detection.FasterRCNN_ResNet50_FPN_V2_Weights.DEFAULT)
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
model.to(device)
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.02, momentum=0.9, weight_decay=0.0001)
lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[20,40], gamma=0.1)
And my training function is:
epochs = 50
for epoch in range(epochs):
train_one_epoch(model, optimizer, train_data_loader, device, epoch, print_freq=20)
lr_scheduler.step()
evaluate(model, val_data_loader, device=device)
print("\n\n")
torch.save(model, f'./Models/trained_{ds}_model_Epoch{epochs}_LR0_02.pt')
I am using coco-like annotations, for example:
{'boxes': tensor([[316.9700, 242.5500, 464.1000, 442.1700], [ 39.2200, 172.6700, 169.8400, 430.9600]]), 'labels': tensor([2, 2]), 'image_id': tensor(1416), 'area': tensor([29370.1094, 33738.3789]), 'iscrowd': tensor([0, 0])}
The train_one_epoch and evaluate functions are from 'engine.py' from Torchvision.
It seems like using Tensorboard is a good tool to use, but I don't really know how to use it.
The engine.py is:
def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq, scaler=None):
model.train()
metric_logger = utils.MetricLogger(delimiter=" ")
metric_logger.add_meter("lr", utils.SmoothedValue(window_size=1, fmt="{value:.6f}"))
header = f"Epoch: [{epoch}]"
lr_scheduler = None
if epoch == 0:
warmup_factor = 1.0 / 1000
warmup_iters = min(1000, len(data_loader) - 1)
lr_scheduler = torch.optim.lr_scheduler.LinearLR(
optimizer, start_factor=warmup_factor, total_iters=warmup_iters
)
for images, targets in metric_logger.log_every(data_loader, print_freq, header):
images = list(image.to(device) for image in images)
targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
with torch.cuda.amp.autocast(enabled=scaler is not None):
loss_dict = model(images, targets)
losses = sum(loss for loss in loss_dict.values())
# reduce losses over all GPUs for logging purposes
loss_dict_reduced = utils.reduce_dict(loss_dict)
losses_reduced = sum(loss for loss in loss_dict_reduced.values())
loss_value = losses_reduced.item()
if not math.isfinite(loss_value):
print(f"Loss is {loss_value}, stopping training")
print(loss_dict_reduced)
sys.exit(1)
optimizer.zero_grad()
if scaler is not None:
scaler.scale(losses).backward()
scaler.step(optimizer)
scaler.update()
else:
losses.backward()
optimizer.step()
if lr_scheduler is not None:
lr_scheduler.step()
metric_logger.update(loss=losses_reduced, **loss_dict_reduced)
metric_logger.update(lr=optimizer.param_groups[0]["lr"])
return metric_logger
The evaluate function is:
def evaluate(model, data_loader, device):
n_threads = torch.get_num_threads()
# FIXME remove this and make paste_masks_in_image run on the GPU
torch.set_num_threads(1)
cpu_device = torch.device("cpu")
model.eval()
metric_logger = utils.MetricLogger(delimiter=" ")
header = "Test:"
coco = get_coco_api_from_dataset(data_loader.dataset)
iou_types = _get_iou_types(model)
coco_evaluator = CocoEvaluator(coco, iou_types)
for images, targets in metric_logger.log_every(data_loader, 100, header):
images = list(img.to(device) for img in images)
if torch.cuda.is_available():
torch.cuda.synchronize()
model_time = time.time()
outputs = model(images)
outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]
model_time = time.time() - model_time
res = {target["image_id"].item(): output for target, output in zip(targets, outputs)}
evaluator_time = time.time()
coco_evaluator.update(res)
evaluator_time = time.time() - evaluator_time
metric_logger.update(model_time=model_time, evaluator_time=evaluator_time)
# gather the stats from all processes
metric_logger.synchronize_between_processes()
print("Averaged stats:", metric_logger)
coco_evaluator.synchronize_between_processes()
# accumulate predictions from all images
coco_evaluator.accumulate()
coco_evaluator.summarize()
torch.set_num_threads(n_threads)
return coco_evaluator

Why is my PyTorch lightning model performing bad?

Trying to train a basic model for this competition. This is a multi-class problem where the values are from 1 to 5 (with 0.5 size steps). The problem is that the training error is very bad and so is the inference.
Is my model correct for this problem? I didn't use warmup at all and I ran it only for a few epochs but I don't think it will make much difference.
Also, here's a result of an inference:
(0, tensor([[0.0024, 0.4513, 1.3045, 0.0350, 0.4182, 0.6813]]))
loss is 0?
predictions are too low (we should start from 1)
Clearly something is wrong with the model - What is it?
Here's my code (which is based on this article):
class LangKnolModel(pl.LightningModule):
def __init__(self, n_classes: int, n_training_steps=None, n_warmup_steps=None):
super().__init__()
self.bert = BertModel.from_pretrained(BERT_MODEL_NAME, return_dict=True)
self.classifier = nn.Linear(self.bert.config.hidden_size, 6)
self.n_training_steps = n_training_steps
self.max_epochs=8,
self.n_warmup_steps = n_warmup_steps
self.criterion = nn.MSELoss()
def forward(self, input_ids, attention_mask, labels=None):
output = self.bert(input_ids, attention_mask=attention_mask)
output = self.classifier(output.pooler_output)
# output = torch.sigmoid(output)
loss = 0
if labels is not None:
loss = self.criterion(output, labels)
return loss, output
def training_step(self, batch, batch_idx):
input_ids = batch["input_ids"]
attention_mask = batch["attention_mask"]
labels = batch["labels"]
loss, outputs = self(input_ids, attention_mask, labels)
self.log("train_loss", loss, prog_bar=True, logger=True)
return {"loss": loss, "predictions": outputs, "labels": labels}
def validation_step(self, batch, batch_idx):
input_ids = batch["input_ids"]
attention_mask = batch["attention_mask"]
labels = batch["labels"]
loss, outputs = self(input_ids, attention_mask, labels)
return loss
def test_step(self, batch, batch_idx):
input_ids = batch["input_ids"]
attention_mask = batch["attention_mask"]
labels = batch["labels"]
loss, outputs = self(input_ids, attention_mask, labels)
self.log("test_loss", loss, prog_bar=True, logger=True)
return loss
def configure_optimizers(self):
optimizer = AdamW(self.parameters(), lr=2e-5)
scheduler = get_linear_schedule_with_warmup(
optimizer,
num_warmup_steps=self.n_warmup_steps,
num_training_steps=self.n_training_steps
)
return dict(
optimizer=optimizer,
lr_scheduler=dict(
scheduler=scheduler,
interval='step'
)
)
training
model = LangKnolModel(
n_classes=len(LABELS_COLS),
n_warmup_steps=1,
n_training_steps=16,
)
trainer = pl.Trainer(
gpus=1,
max_epochs=5
)
trainer.fit(model, data_module)

Extracting Autoencoder features from the hidden layer

I have developed some code to apply Autoencoder on my dataset, in order to extract hidden features from it. I have a dataset that consists of 84 variables, and they have been normalised.
epochs = 10
batch_size = 128
lr = 0.008
# Convert Input and Output data to Tensors and create a TensorDataset
input = torch.Tensor(input.to_numpy())
output = torch.tensor(output.to_numpy())
data = torch.utils.data.TensorDataset(input, output)
# Split to Train, Validate and Test sets using random_split
number_rows = len(input) # The size of our dataset or the number of rows in excel table.
test_split = int(number_rows*0.3)
train_split = number_rows - test_split
train_set, test_set = random_split(data, [train_split, test_split])
# Create Dataloader to read the data within batch sizes and put into memory.
train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle = True)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=batch_size)
The model structure:
# Model structure
class AutoEncoder(nn.Module):
def __init__(self):
super(AutoEncoder, self).__init__()
# Encoder
self.encoder = nn.Sequential(
nn.Linear(84, 128),
nn.Tanh(),
nn.Linear(128, 64),
nn.Tanh(),
nn.Linear(64, 16),
nn.Tanh(),
nn.Linear(16, 2),
)
# Decoder
self.decoder = nn.Sequential(
nn.Linear(2, 16),
nn.Tanh(),
nn.Linear(16, 64),
nn.Tanh(),
nn.Linear(64, 128),
nn.Tanh(),
nn.Linear(128, 84),
nn.Sigmoid()
)
def forward(self, inputs):
codes = self.encoder(inputs)
decoded = self.decoder(codes)
return codes, decoded
Optimiser and Loss function
# Optimizer and loss function
model = AutoEncoder()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
loss_function = nn.MSELoss()
The training steps:
# Train
for epoch in range(epochs):
for data, labels in train_loader:
inputs = data.view(-1, 84)
# Forward
codes, decoded = model(inputs)
# Backward
optimizer.zero_grad()
loss = loss_function(decoded, inputs)
loss.backward()
optimizer.step()
# Show progress
print('[{}/{}] Loss:'.format(epoch+1, epochs), loss.item())
The Autoencoder model is saved as:
# Save
torch.save(model,'autoencoder.pth')
At this point, I would like to ask some help to understand how I could extract the features from the hidden layer. These features extracted from the hidden layer will be used in another classification algorithm.

You need to place an hook to your model. And you can use this hook to extract features from any layer. However it is a lot easier if you don't use nn.Sequential because it combines the layer together and they act as one. I run your code using this function:
There is a function for Feature Extraction which basically takes model as an input and place a hook using index of layer.
class FE(nn.Module):
def __init__(self,model_instance, output_layers, *args):
super().__init__(*args)
self.output_layers = output_layers
self.selected_out = OrderedDict()
self.pretrained = model_instance
self.fhooks = []
print("model_instance._modules.keys():",model_instance._modules.keys())
for i,l in enumerate(list(self.pretrained._modules.keys())):
print("index:",i, ", keys:",l )
if i in self.output_layers:
print("------------------------ > Hook is placed output of :" , l )
self.fhooks.append(getattr(self.pretrained,l).register_forward_hook(self.forward_hook(l)))
def forward_hook(self,layer_name):
def hook(module, input, output):
self.selected_out[layer_name] = output
return hook
def forward(self, x):
out = self.pretrained(x,None)
return out, self.selected_out
And to use:
model_hooked=FE(model ,output_layers = [0])
model_instance._modules.keys(): odict_keys(['encoder', 'decoder'])
index: 0 , keys: encoder
------------------------ > Hook is placed output of : encoder
index: 1 , keys: decoder
After placing the hook you can simply put data to new hooked model and it will output 2 values.First one is original output from last layer and second output will be the output from hooked layer
out, layerout = model_hooked(data_sample)
If you want to extract features from a loaders you can use this function:
def extract_features(FE ,layer_name, train_loader, test_loader):
extracted_features=[]
lbls=[]
extracted_features_test=[]
lbls_test=[]
for data , target in train_loader:
out, layerout = FE(data)
a=layerout[layer_name]
extracted_features.extend(a)
lbls.extend(target)
for data , target in test_loader:
out, layerout = FE(data)
a=layerout[layer_name]
extracted_features_test.extend(a)
lbls_test.extend(target)
extracted_features = torch.stack(extracted_features)
extracted_features_test = torch.stack(extracted_features_test)
lbls = torch.stack(lbls)
lbls_test = torch.stack(lbls_test)
return extracted_features, lbls ,extracted_features_test, lbls_test
And usage is like this :
Features_TRAINLOADER , lbls , Features_TESTLOADER, lbls_test =extract_features(model_hooked, "encoder", train_loader, test_loader)

Almost non-existent training accuracy and low test accuracy

I am really new to Machine Learning and I am not so well versed in coding in general. However there is need to look through the customers feedback at our store, that average quite a lot each year, yet we cannot tell % of positive, negative and neutral.
Currently I am trying to train a Bert Model to do simple multi labeled sentiment analysis. The input is our store's customers feedback. The customers feedback is not always so clearly defined since customers do tend to tell long and long about their experience and their sentiment is not always so clear. However we managed to get positive, negative and neutral, each set 2247 samples.
But when I try to train it the training accuracy is around 0.4% which is super low. Validation score is around 60%. F1-score is around 60% for each of the label. I wonder what can be done to improve this training accuracy. I have been stuck for a while. Please take a look at my codes and help me out with this.
I have tried changing learning rate (tried all learning rate Bert suggested and 1e-5),changing Max LEN, changing amount of EPOCH, changing drop out rate (0.1, 0.2, 0.3, 0.4, 0.5), but so far nothing yielded results.
#read dataset
df = pd.read_csv("data.csv",header=None, names=['content', 'sentiment'], sep='\;', lineterminator='\r',encoding = "ISO-8859-1",engine="python")
from sklearn.utils import shuffle
df = shuffle(df)
df['sentiment'] = df['sentiment'].replace(to_replace = [-1, 0, 1], value = [0, 1, 2])
df.head()
#Load pretrained FinBert model and get bert tokenizer from it
PRE_TRAINED_MODEL_NAME = 'TurkuNLP/bert-base-finnish-cased-v1'
tokenizer = BertTokenizer.from_pretrained(PRE_TRAINED_MODEL_NAME)
#Choose sequence Length
token_lens = []
for txt in df.content:
tokens = tokenizer.encode(txt, max_length=512)
token_lens.append(len(tokens))
sns.distplot(token_lens)
plt.xlim([0, 256]);
plt.xlabel('Token count');
MAX_LEN = 260
#Make a PyTorch dataset
class FIDataset(Dataset):
def __init__(self, texts, targets, tokenizer, max_len):
self.texts = texts
self.targets = targets
self.tokenizer = tokenizer
self.max_len = max_len
def __len__(self):
return len(self.texts)
def __getitem__(self, item):
text = str(self.texts[item])
target = self.targets[item]
encoding = self.tokenizer.encode_plus(
text,
add_special_tokens=True,
max_length=self.max_len,
return_token_type_ids=False,
pad_to_max_length=True,
return_attention_mask=True,
return_tensors='pt',
)
return {
'text': text,
'input_ids': encoding['input_ids'].flatten(),
'attention_mask': encoding['attention_mask'].flatten(),
'targets': torch.tensor(target, dtype=torch.long)
}
#split test and train
df_train, df_test = train_test_split(
df,
test_size=0.1,
random_state=RANDOM_SEED
)
df_val, df_test = train_test_split(
df_test,
test_size=0.5,
random_state=RANDOM_SEED
)
df_train.shape, df_val.shape, df_test.shape
#data loader function
def create_data_loader(df, tokenizer, max_len, batch_size):
ds = FIDataset(
texts=df.content.to_numpy(),
targets=df.sentiment.to_numpy(),
tokenizer=tokenizer,
max_len=max_len
)
return DataLoader(
ds,
batch_size=batch_size,
num_workers=4
)
#Load data into train, test, val
BATCH_SIZE = 16
train_data_loader = create_data_loader(df_train, tokenizer, MAX_LEN, BATCH_SIZE)
val_data_loader = create_data_loader(df_val, tokenizer, MAX_LEN, BATCH_SIZE)
test_data_loader = create_data_loader(df_test, tokenizer, MAX_LEN, BATCH_SIZE)
# Sentiment Classifier based on Bert model just loaded
class SentimentClassifier(nn.Module):
def __init__(self, n_classes):
super(SentimentClassifier, self).__init__()
self.bert = BertModel.from_pretrained(PRE_TRAINED_MODEL_NAME)
self.drop = nn.Dropout(p=0.1)
self.out = nn.Linear(self.bert.config.hidden_size, n_classes)
def forward(self, input_ids, attention_mask):
returned = self.bert(
input_ids=input_ids,
attention_mask=attention_mask
)
pooled_output = returned["pooler_output"]
output = self.drop(pooled_output)
return self.out(output)
#Create a Classifier instance and move to GPU
model = SentimentClassifier(3)
model = model.to(device)
#Optimize with AdamW
EPOCHS = 5
optimizer = AdamW(model.parameters(), lr= 2e-5, correct_bias=False)
total_steps = len(train_data_loader) * EPOCHS
scheduler = get_linear_schedule_with_warmup(
optimizer,
num_warmup_steps=0,
num_training_steps=total_steps
)
loss_fn = nn.CrossEntropyLoss().to(device)
#Train each Epoch function
def train_epoch(
model,
data_loader,
loss_fn,
optimizer,
device,
scheduler,
n_examples
):
model = model.train()
losses = []
correct_predictions = 0
for d in data_loader:
input_ids = d["input_ids"].to(device)
attention_mask = d["attention_mask"].to(device)
targets = d["targets"].to(device)
outputs = model(
input_ids=input_ids,
attention_mask=attention_mask
)
_, preds = torch.max(outputs, dim=1)
loss = loss_fn(outputs, targets)
correct_predictions += torch.sum(preds == targets)
losses.append(loss.item())
loss.backward()
nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
optimizer.step()
scheduler.step()
optimizer.zero_grad()
return correct_predictions.double() / n_examples, np.mean(losses)
#Eval model function
def eval_model(model, data_loader, loss_fn, device, n_examples):
model = model.eval()
losses = []
correct_predictions = 0
with torch.no_grad():
torch.cuda.empty_cache()
for d in data_loader:
input_ids = d["input_ids"].to(device)
attention_mask = d["attention_mask"].to(device)
targets = d["targets"].to(device)
outputs = model(
input_ids=input_ids,
attention_mask=attention_mask
)
_, preds = torch.max(outputs, dim=1)
loss = loss_fn(outputs, targets)
correct_predictions += torch.sum(preds == targets)
losses.append(loss.item())
return correct_predictions.double() / n_examples, np.mean(losses)
#training loop through each epochs
import torch
torch.cuda.empty_cache()
history = defaultdict(list)
best_accuracy = 0
if __name__ == '__main__':
for epoch in range(EPOCHS):
print(f'Epoch {epoch + 1}/{EPOCHS}')
print('-' * 10)
train_acc, train_loss = train_epoch(
model,
train_data_loader,
loss_fn,
optimizer,
device,
scheduler,
len(df_train)
)
print(f'Train loss {train_loss} accuracy {train_acc}')
val_acc, val_loss = eval_model(
model,
val_data_loader,
loss_fn,
device,
len(df_val)
)
print(f'Val loss {val_loss} accuracy {val_acc}')
print()
history['train_acc'].append(train_acc)
history['train_loss'].append(train_loss)
history['val_acc'].append(val_acc)
history['val_loss'].append(val_loss)
if val_acc > best_accuracy:
torch.save(model.state_dict(), 'best_model_state.bin')
best_accuracy = val_acc
-- Edit: I have printed out preds and targets as well as train and val accuracy

Here _, preds = torch.max(outputs, dim=1), you probably want argmax, not max?
Print out preds and targets to better see what's going on.
Edit after preds and targets printed out. For epochs 4 and 5, preds matches targets exactly, so train accuracy should be 1. I think the issue is that the accuracy is divided by n_examples, which is a number of examples in the whole train dataset, while it should be divided by the number of examples in the epoch.

How to create a data preprocessing pipeline in pytorch outside the Dataloader class?

I am trying to make a model for data with 40 features which have to classified into 10 classes. I am new to PyTorch and this is my first project in it.
I am given a custom Dataset class (which I am not allowed to change) which is as follows:
class MyData(Dataset):
def _init_(self, mode):
with open(mode+'.pkl', 'rb') as handle:
data = pickle.load(handle)
self.X = data['x'].astype('float')
self.y = data['y'].astype('long')
def _len_(self):
return len(self.X)
def _getitem_(self, idx):
if torch.is_tensor(idx):
idx = idx.tolist()
sample = (self.X[idx], self.y[idx])
return sample
I have done some preprocessing on the data like normalization and then trained and saved the model. As I wasn't allowed to change the dataset class, I made the changes outside of it and then used the DataLoader method. The preprocessing is as follows :
train_data=MyData("train")
features, labels = train_data[:]
df = pd.DataFrame(features)
x = df.values
min_max_scaler = preprocessing.MinMaxScaler()
x_scaled = min_max_scaler.fit_transform(x)
input_array = x_scaled
output_array = labels
inputs = torch.Tensor(input_array)
targets = torch.Tensor(output_array).type(torch.LongTensor)
dataset = TensorDataset(inputs, targets)
train_ds, val_ds = random_split(dataset, [3300, 300])
batch_size = 300
n_epochs = 200
log_interval = 10
train_losses = []
train_counter = []
test_losses = []
train_loader = DataLoader(train_ds, batch_size, shuffle=True)
val_loader = DataLoader(val_ds, batch_size)
test_counter = [i*len(train_loader.dataset) for i in range(n_epochs + 1)]
After this I define the training and testing functions ( and remove the print statements as the autograder will not be able to grade my assignment if I do so) as follows:
def train(epoch):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
optimizer.zero_grad()
output = model(data.double())
loss = criterion(output, target)
loss.backward()
optimizer.step()
if batch_idx % log_interval == 0:
train_losses.append(loss.item())
train_counter.append(
(batch_idx*32) + ((epoch-1)*len(train_loader.dataset)))
save_model(model)
def test():
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
for data, target in val_loader:
output = model(data.double())
test_loss += criterion(output, target).item()
pred = output.data.max(1, keepdim=True)[1]
correct += pred.eq(target.data.view_as(pred)).sum()
test_loss /= len(val_loader.dataset)
test_losses.append(test_loss)
test()
for epoch in range(1, n_epochs + 1):
train(epoch)
test()
Even after doing that, the autograder is still not able to grade my code. I mainly think it's because maybe I am making an error with how I input the data to the model but I am not able to narrow down to what exactly is the problem and how do I correct it. As I'm new to pytorch, I was looking at how to do the preprocessing but all of them involved the Dataset Class so I'm not sure how to go about it.
My model is as follows:
class MyModel(nn.Module):
def __init__(self):
super(MyModel, self).__init__()
#self.flatten=nn.Flatten()
self.net_stack=nn.Sequential(
nn.Conv1d(in_channels=40, out_channels=256, kernel_size=1, stride=2), #applying batch norm
nn.ReLU(),
nn.MaxPool1d(kernel_size=1),
nn.Dropout(p=0.1),
nn.BatchNorm1d(256, affine=True),
nn.Conv1d(in_channels=256, out_channels=128, kernel_size=1, stride=2), #applying batch norm
nn.ReLU(),
nn.MaxPool1d(kernel_size=1),
nn.Dropout(p=0.1),
nn.BatchNorm1d(128, affine=True),
nn.Conv1d(in_channels=128, out_channels=64, kernel_size=1, stride=2), #applying batch norm
nn.ReLU(),
nn.MaxPool1d(kernel_size=1),
nn.Dropout(p=0.1),
nn.BatchNorm1d(64, affine=True),
nn.Conv1d(in_channels=64, out_channels=32, kernel_size=1, stride=2), #applying batch norm
nn.ReLU(),
nn.MaxPool1d(kernel_size=1),
nn.Dropout(p=0.1),
nn.BatchNorm1d(32, affine=True),
nn.Flatten(),
nn.Linear(32, 10),
nn.Softmax(dim=1)).double()
def forward(self,x):
# result=self.net_stack(x[None])
x=x.double()
result=self.net_stack(x[:, :, None]).double()
print(result.size())
return result
One instruction I've got is that they've written:
# Please make sure we can load your model with:
# model = MyModel()
# This means you must give default values to all parameters you may wish to set, such as output size.

You can try to do it within the training loop
for batch_idx, (data, target) in enumerate(train_loader):
# you can do something here to manipulate your input
data = transform(data)
data.to('cuda') # Move to gpu, i noticed you didnt do it in your training loop
# Forward pass
output = model(data)

Develop Reference

node.js excel linux python-3.x azure haskell apache-spark rust .htaccess string

Finetuning (German) Bert. Is it underfitting? - pytorch

Related

Find Training/Validation Accuracy & Loss of Faster-RCNN PyTorch model

Why is my PyTorch lightning model performing bad?

Extracting Autoencoder features from the hidden layer

Almost non-existent training accuracy and low test accuracy

How to create a data preprocessing pipeline in pytorch outside the Dataloader class?

Categories

Resources