I m try to train an rbf network... I used MNIST database. And pytorch framework...
The results are the same in each epoch...
The results....:
Epoch: 1
Accuracy: 0.785 Loss: 2.435 Recall: 0.386 Precision: 0.258
Epoch: 2
Accuracy: 0.785 Loss: 2.435 Recall: 0.386 Precision: 0.258
Epoch: 3
Accuracy: 0.785 Loss: 2.435 Recall: 0.386 Precision: 0.258
Epoch: 4
Accuracy: 0.785 Loss: 2.435 Recall: 0.386 Precision: 0.258
My Code... I think that the problem is somewhere in the linear layer. The model has no improve after the training epoch, maybe it's the linear layer. It seems like the weights no change...! But i don't know why...?
class RBF(nn.Module):
def __init__(self, in_layers, centers, sigmas):
super(RBF, self).__init__()
self.in_layers = in_layers
self.centers = nn.Parameter(centers)
self.sigmas = nn.Parameter(torch.Tensor(self.centers.size(0)))
torch.nn.init.constant_(self.sigmas, sigmas)
def forward(self, x):
x = x.view(-1, self.in_layers)
size = [self.centers.size(0), x.size(0)]
sigma = self.sigmas.view(-1).to(device)**2
dists = torch.empty(size).to(device)
for i,c in enumerate(self.centers):
c = c.reshape(-1,c.size(0))
temp = (x-c).pow(2).sum(-1).pow(0.5)
dists[i] = temp
dists = dists.permute(1,0)
phi = torch.exp(-1*(dists/(2*sigma))) #gaussian
return phi
class Net(nn.Module):
def __init__(self, in_layers, centers, sigmas):
super(Net, self).__init__()
self.rbf_layers = nn.ModuleList()
self.linear_layers = nn.ModuleList()
for i in range(len(in_layers) - 1):
self.rbf_layers.append(RBF(in_layers[i], centers, sigmas))
self.linear_layers.append(nn.Linear(centers.size(0), in_layers[i+1], bias = True))
def forward(self, x):
out = x
for i in range(len(self.rbf_layers)):
out = self.rbf_layers[i](out)
out = F.sigmoid( self.linear_layers[i](out.float()) )
return out
def training(engine, batch, device, model, criterion, optimizer):
inputs, labels = batch[0].to(device), batch[1].to(device)
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
return outputs, labels
Ofcourse the code continuous but i think these are enough to solve the problem(if you want smthing extra 'i m here').... Do you have any ideas???
and the training part of code....
def training(engine, batch, device, model, criterion, optimizer):
inputs, labels = batch[0].to(device), batch[1].to(device)
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
return outputs, labels
def nn_run1(batch, classes, dim, learning_rate, epochs, clusters):
# ---Load Model's Parameters---
train_loader, test_loader = data_loading(batch, shuffle=False)
kmeans_input = train_loader.dataset.train_data
kmeans_input = torch.reshape(kmeans_input.double(), (kmeans_input.size(0), -1))
_, centers = Kmeans(kmeans_input, clusters)
centers = centers.to(device)
sigma = Sigmas(centers)
layers = in_layers(dim, len(classes), layers = 1)
# ---Model Setup---
model = Net(layers, centers, sigma)
model.cuda()
criterion = nn.CrossEntropyLoss()
print(model.parameters)
optimizer = torch.optim.SGD(model.parameters(), learning_rate)
Related
I am training a simple LSTM model for binary text classification. Here is the model class:
class LSTM(nn.Module):
def __init__(self, vocabulary_size, embeddings_size, num_classes):
super(LSTM, self).__init__()
self.vocabulary_size = vocabulary_size
self.embeddings_size = embeddings_size
self.embedding = nn.Embedding(num_embeddings=vocabulary_size,
embedding_dim=embeddings_size,
padding_idx=0)
self.lstm = nn.LSTM(input_size=embeddings_size,
hidden_size=128,
num_layers=1,
batch_first=True)
self.fc = nn.Linear(in_features=128,
out_features=num_classes)
def forward(self, x):
out = self.embedding(x)
out, _ = self.lstm(out)
out = out[:, -1]
out = self.fc(out)
out = torch.sigmoid(out)
return out
I am using BCELoss and Adam optimizer created with the following code:
criterion = nn.BCELoss()
optimizer = Adam(model.parameters(), lr=learning_rate)
This is the training loop that I am using:
train_steps = len(train_data_loader)
for epoch in range(epochs):
train_loss = 0
model.train()
for i, (sequences, labels) in enumerate(train_data_loader):
optimizer.zero_grad()
sequences = sequences.to(device)
labels = labels.to(device)
outputs = model(sequences)
loss = criterion(outputs, labels)
train_loss += loss.item()
loss.backward()
optimizer.step()
print(f'Epoch {epoch+1}/{epochs}, Train Loss: {train_loss / train_steps:.4f}')
I have experimented with different datasets, number of epochs, learning rate, batch size. However, the model does not seem to learn - the loss is always around 0.7 and only the 0 class is predicted.
Does anyone know what the issue could be?
I am working with the MNIST dataset and I have created the following network. I want to overfit the training data and I think I am doing that here. My training loss is lower than my validation loss. This is the code that I have come up with. Please look at it and let me know if I am overfitting the training data, if I am not then how do I go about it?
class NN(nn.Module):
def __init__(self):
super().__init__()
self.layers = nn.Sequential(
nn.Flatten(),
nn.Linear(784,4096),
nn.ReLU(),
nn.Linear(4096,2048),
nn.ReLU(),
nn.Linear(2048,1024),
nn.ReLU(),
nn.Linear(1024,512),
nn.ReLU(),
nn.Linear(512,256),
nn.ReLU(),
nn.Linear(256,128),
nn.ReLU(),
nn.Linear(128,64),
nn.ReLU(),
nn.Linear(64,32),
nn.ReLU(),
nn.Linear(32,16),
nn.ReLU(),
nn.Linear(16,10))
def forward(self,x):
return self.layers(x)
def accuracy_and_loss(model, loss_function, dataloader):
total_correct = 0
total_loss = 0
total_examples = 0
n_batches = 0
with torch.no_grad():
for data in testloader:
images, labels = data
outputs = model(images)
batch_loss = loss_function(outputs,labels)
n_batches += 1
total_loss += batch_loss.item()
_, predicted = torch.max(outputs, dim=1)
total_examples += labels.size(0)
total_correct += (predicted == labels).sum().item()
accuracy = total_correct / total_examples
mean_loss = total_loss / n_batches
return (accuracy, mean_loss)
def define_and_train(model,dataset_training, dataset_test):
trainloader = torch.utils.data.DataLoader( small_trainset, batch_size=500, shuffle=True)
testloader = torch.utils.data.DataLoader( dataset_test, batch_size=500, shuffle=True)
values = [1e-8,1e-7,1e-6,1e-5]
model = NN()
for params in values:
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay = 1e-7)
train_acc = []
val_acc = []
train_loss = []
val_loss = []
for epoch in range(100):
total_loss = 0
total_correct = 0
total_examples = 0
n_mini_batches = 0
for i,mini_batch in enumerate(trainloader,0):
images,labels = mini_batch
optimizer.zero_grad()
outputs = model(images)
loss = loss_function(outputs,labels)
loss.backward()
optimizer.step()
n_mini_batches += 1
total_loss += loss.item()
_, predicted = torch.max(outputs, dim=1)
total_examples += labels.size(0)
total_correct += (predicted == labels).sum().item()
epoch_training_accuracy = total_correct / total_examples
epoch_training_loss = total_loss / n_mini_batches
epoch_val_accuracy, epoch_val_loss = accuracy_and_loss( model, loss_function, testloader )
print('Params %f Epoch %d loss: %.3f acc: %.3f val_loss: %.3f val_acc: %.3f'
%(params, epoch+1, epoch_training_loss, epoch_training_accuracy, epoch_val_loss, epoch_val_accuracy))
train_loss.append( epoch_training_loss )
train_acc.append( epoch_training_accuracy )
val_loss.append( epoch_val_loss )
val_acc.append( epoch_val_accuracy )
history = { 'train_loss': train_loss,
'train_acc': train_acc,
'val_loss': val_loss,
'val_acc': val_acc }
return ( history, model )
history1, net1 = define_and_train(model,dataset_training,dataset_test)
I am trying to overfit the training data so that later i can apply regularization and then reduce the overfitting which will give me a better understanding of the process
Although I won't attempt to provide a rigorous definition, the term "overfit" typically means that the training loss continues to decrease whereas the validation loss stays stagnant at a position higher than the training loss, or continues to increase with more iterations.
Therefore, it is difficult to know whether your network is overfitting solely based on your code alone. Since dense, fully-connected networks tend to overfit easily in the absence of dropout layers or other regularizers, my hunch would be that your network is indeed overfitting according to your intention. However, we would have to see your tensorboard logs or loss plot to determine whether the model is overfitting.
If you want to overfit your network to the dataset, I suggest that you construct a much larger model with more hidden layers. Overfitting occurs when the dataset is "too easy" for the model and it starts to remember the training set itself without learning generalizable patterns that can be applied to the validation set.
I am writing a classifier that takes a surname and predicts a language it belongs to. I found that small batch sizes (256 and less) perform poorly compared to big batch sizes (2048 and more). Could someone give me some insight on why this is happening and how to fix it? Thank you.
Training code:
def indices_to_packed(names, input_size):
names = [F.one_hot(item, input_size).float() for item in names]
names_packed = pack_sequence(names, enforce_sorted=False)
return names_packed
def infer(model, data, labels, lengths, device):
data_packed = indices_to_packed(data, model.rnn.input_size)
data_packed, labels, lengths = data_packed.to(device), labels.to(device), lengths.to(device)
preds = model(data_packed, lengths)
loss = loss_fn(preds, labels)
return loss, preds
results = {}
epochs = 100
for BATCH_SIZE in [4096, 2048, 256]:
train_loader = data.DataLoader(train_data, BATCH_SIZE, sampler=train_sampler, collate_fn=partial(my_collate, input_size=input_size, output_size=output_size))
val_loader = data.DataLoader(val_data, BATCH_SIZE, sampler=val_sampler, collate_fn=partial(my_collate, input_size=input_size, output_size=output_size))
model = LSTM(input_size, HIDDEN_SIZE, NUM_LAYERS, DROPOUT, output_size)
optimizer = torch.optim.Adam(model.parameters())
model.to(device)
train_losses = []
val_losses = []
cur_losses = {}
duration = 0
for epoch in range(epochs):
start = time.time()
train_loss = 0
model.train()
# Using PackedSequence
for names, langs, lengths in train_loader:
optimizer.zero_grad()
loss, _ = infer(model, names, langs, lengths, device)
loss.backward()
optimizer.step()
train_loss += loss
train_loss /= len(train_data)
train_losses.append(train_loss.cpu().detach().numpy())
model.eval()
val_loss = 0
with torch.no_grad():
for names, langs, lengths in val_loader:
loss, _ = infer(model, names, langs, lengths, device)
val_loss += loss
val_loss /= len(val_data)
val_losses.append(val_loss.cpu().detach().numpy())
cur_duration = time.time() - start
duration += cur_duration
log_line = (f"BATCH_SIZE: {BATCH_SIZE} epoch: {epoch} train loss: "
f"{train_loss:.5f} val loss: {val_loss:.5f}")
print(log_line)
cur_losses["train_losses"] = train_losses
cur_losses["val_losses"] = val_losses
results[BATCH_SIZE] = {"losses" : cur_losses, "duration" : duration, "model": model}
Model:
class LSTM(nn.Module):
def __init__(self, input_size, hidden_size, num_layers, dropout, output_size):
super().__init__()
self.rnn = nn.LSTM(input_size, hidden_size, num_layers, dropout=DROPOUT)
self.linear = nn.Linear(hidden_size, output_size)
self.softmax = nn.LogSoftmax(dim=1)
def forward(self, x, lengths):
lstm_out, _ = self.rnn(x)
# https://discuss.pytorch.org/t/get-each-sequences-last-item-from-packed-sequence/41118/7
sum_batch_sizes = torch.cat((
torch.zeros(2, dtype=torch.int64),
torch.cumsum(lstm_out.batch_sizes, 0)
))
sorted_lengths = lengths[lstm_out.sorted_indices]
last_seq_idxs = sum_batch_sizes[sorted_lengths] + torch.arange(lengths.size(0))
last_seq_items = lstm_out.data[last_seq_idxs]
lstm_last_out = last_seq_items[lstm_out.unsorted_indices]
linear_out = self.linear(lstm_last_out)
softmax_out = self.softmax(linear_out)
return softmax_out
Losses with different batch sizes:
It looks like there issue is how the loss is calculated.
train_loss += loss line accumulates the loss. When batch size is higher, there will be fewer steps to do. The code normalizes this by dividing by the length of train data, train_loss /= len(train_data), but should probably take into account the batch size: train_loss /= (len(train_data) / BATCH_SIZE).
The same for validation loss, but the effect is different probably because of smaller data size compared to training data.
I encounter the following problem.
I perform an increasing cross-validation; I have 20 subjects in my dataset and try to classify images. I start with 3 subjects and perform a cross-validation with k=3; that is I train 3 different models and validate on the subject left out. And this is what I do for 4, 5, ..., 20 drivers. Hence, I have a lot of models trained.
Now I wanted to check the performance of all models on another dataset, but for some reason the accuracy is the same for all models, which must be a bug somewhere.
I already use copy.deepcopy(), so I must have an error somewhere else.
I'm open for any hints!
Here is the code for the training function:
def train_model(model, num_classes, dirname, trainloader, valloader, trainset_size, valset_size, criterion, optimizer, scheduler, patience, min_delta, num_epochs, fold):
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
since = time.time()
train_loss, train_acc, val_loss, val_acc = [], [], [], []
best_model_wts = copy.deepcopy(model.state_dict())
best_acc = 0.0
early_stopping = False
counter = 0
last_train_epoch = 0
for epoch in range(num_epochs):
if early_stopping:
print('\nEarly Stopping')
break
print('Epoch {}/{}'.format(epoch+1, num_epochs))
print('-' * 10)
# Each epoch has a training and validation phase
for phase in ['train', 'val']:
if phase == 'train':
model.train() # Set model to training mode
dataloader = trainloader
dataset_size = trainset_size
else:
model.eval() # Set model to evaluate mode
dataloader = valloader
dataset_size = valset_size
running_loss = 0.0
running_corrects = 0
# Iterate over data.
for inputs, labels in dataloader:
model = model.to(device)
inputs = inputs.to(device)
#labels = labels.long().to(device)
labels = labels.to(device) #test_tensor.type(torch.FloatTensor)
# forward
# track history if only in train
with torch.set_grad_enabled(phase == 'train'):
# zero the parameter gradients
optimizer.zero_grad()
outputs = model(inputs)
_, preds = torch.max(outputs, 1)
loss = criterion(outputs, labels)
# backward + optimize only if in training phase
if phase == 'train':
loss.backward()
optimizer.step()
# statistics
running_loss += loss.item() * inputs.size(0)
running_corrects += torch.sum(preds == labels.data)
if phase == 'train':
scheduler.step()
epoch_loss = running_loss / dataset_size
epoch_acc = running_corrects.double() / dataset_size
if phase == 'train':
train_loss.append(epoch_loss)
train_acc.append(epoch_acc)
else:
val_loss.append(epoch_loss)
val_acc.append(epoch_acc)
print('{} Loss: {:.4f} Acc: {:.4f}'.format(
phase, epoch_loss, epoch_acc))
# early stopping
if phase == 'val':
if counter == patience:
early_stopping = True
break
if epoch == 0:
best_loss = epoch_loss
else:
if best_loss >= epoch_loss + min_delta:
print('Validation loss decreased ({:.4f} --> {:.4f}). Saving model ...'.format(best_loss,epoch_loss))
best_model_wts = copy.deepcopy(model.state_dict())
torch.save(model.state_dict(), '{}/weights/model_fold_{}.pth'.format(dirname,fold))
last_train_epoch = epoch + 1
best_loss = epoch_loss
counter = 0
else:
counter += 1
time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(
time_elapsed // 60, time_elapsed % 60))
print('Best val Acc: {:4f}'.format(best_acc))
# load best model weights
model.load_state_dict(best_model_wts)
# save best model
return model, train_acc, train_loss, val_acc, val_loss, last_train_epoch
Here is how I call the function:
model = net
model = model.to(device)
# train
[model, train_acc, train_loss, val_acc, val_loss, last_train_epoch] = train_model(model, num_classes, dirname,\
trainloader, valloader,\
trainset_size, valset_size,\
criterion, optimizer, \
exp_lr_scheduler, patience, \
min_delta, num_epochs, fold=val_index)
# test model
[preds_val, labels_val, idx_false_val, pred_time_val_fold] = test(model, valloader)
[preds_tr, labels_tr, idx_false_train, pred_time_train_fold] = test(model, trainloader)
[preds_all, labels_all, idx_false_all, pred_time_all_fold] = test(model, allloader)
print('Accuracy on all data: ', accuracy_score(labels_all, preds_all))
and for the sake of completeness, this is what the test() function looks like:
def test(model, dataloader):
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
#device = torch.device("cpu")
pred_labels, gt_labels, idx_false, pred_time = [], [], [], []
was_training = model.training
model.eval()
with torch.no_grad():
for i, (inputs, labels) in enumerate(dataloader):
inputs = inputs.to(device)
labels = labels.to(device)
start_pred = time.clock()
outputs = model(inputs)
_, preds = torch.max(outputs, 1)
end_pred = time.clock()
pred_time.append(end_pred-start_pred)
for j in range(inputs.size()[0]):
pred_labels.append(preds[j].item())
gt_labels.append(labels[j].item())
for i in range(len(pred_labels)):
if pred_labels[i] != gt_labels[i]:
idx_false.append(i)
model.train(mode=was_training)
return pred_labels, gt_labels, idx_false, pred_time
Edit: It looks as if it always saves the same models even though I try to make sure that only the updated weights of the best model are saved.
I am new to PyTorch. I'm trying to use a pre-trained Faster RCNN torchvision.models.detection.fasterrcnn_resnet50_fpn() for object detection project. I have created a CustomDataset(Dataset) class to handle the custom dataset.
Here is the custom class implementation
class ToTensor(object):
"""Convert ndarrays in sample to Tensors."""
def __call__(self, sample):
image, landmarks = sample['image'], sample['meta_data']
# swap color axis because
# numpy image: H x W x C
# torch image: C X H X W
image = image.transpose((2, 0, 1))
return {'image': torch.from_numpy(image),
'meta_data': landmarks}
class CustomDataset(Dataset):
"""Custom Landmarks dataset."""
def __init__(self, data_dir, root_dir, transform=None):
"""
Args:
data_dir (string): Directory with all the labels(json).
root_dir (string): Directory with all the images.
transform (callable, optional): Optional transform to be applied
on a sample.
"""
self.data_dir = data_dir
self.root_dir = root_dir
self.transform = transform
def __len__(self):
return len(os.listdir(self.data_dir))
def __getitem__(self, idx):
img_name = sorted(os.listdir(self.root_dir))[idx]
image = io.imread(self.root_dir+'/'+img_name, plugin='matplotlib')
json_file = sorted(os.listdir(self.data_dir))[idx]
with open(self.data_dir+'/'+json_file) as f:
meta_data = json.load(f)
meta_data = meta_data['annotation']['object']
sample = {'image': image, 'meta_data': meta_data}
to_tensor = ToTensor()
transformed_sample = to_tensor(sample)
if self.transform:
sample = self.transform(sample)
return transformed_sample
Here is the train_model function
def train_model(model, criterion, optimizer, lr_scheduler, num_epochs=25):
since = time.time()
best_model = model
best_acc = 0.0
for epoch in range(num_epochs):
print('Epoch {}/{}'.format(epoch, num_epochs - 1))
print('-' * 10)
# Each epoch has a training and validation phase
for phase in ['train', 'test']:
if phase == 'train':
optimizer = lr_scheduler(optimizer, epoch)
model.train() # Set model to training mode
else:
model.eval() # Set model to evaluate mode
running_loss = 0.0
running_corrects = 0
for data in dset_loaders[phase]:
# get the inputs
inputs, labels = data['image'], data['meta_data']
inputs= inputs.to(device) # ,
# zero the parameter gradients
optimizer.zero_grad()
# forward
outputs = model(inputs, labels)
_, preds = torch.max(outputs.data, 1)
loss = criterion(outputs, labels)
# backward + optimize only if in training phase
if phase == 'train':
loss.backward()
optimizer.step()
# statistics
running_loss += loss.item()
running_corrects += torch.sum(preds == labels).item()
epoch_loss = running_loss / dset_sizes[phase]
epoch_acc = running_corrects / dset_sizes[phase]
print('{} Loss: {:.4f} Acc: {:.4f}'.format(
phase, epoch_loss, epoch_acc))
# deep copy the model
if phase == 'test' and epoch_acc > best_acc:
best_acc = epoch_acc
best_model = copy.deepcopy(model)
print()
time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(
time_elapsed // 60, time_elapsed % 60))
print('Best val Acc: {:4f}'.format(best_acc))
return best_model
While performing model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=25) I am getting "RuntimeError: _thnn_upsample_bilinear2d_forward not supported on CUDAType for Byte"
It appears your datapoints are byte tensors, i.e type uint8. Try casting your data into float32
# Replace this
inputs = inputs.to(device)
# With this
inputs = inputs.float().to(device)
Note that the torchvision models expect data to be normalized in a specific way. Check here for the procedure, which basically entails using
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
for normalizing your data.