i try to access the huggingface arabert model but this error appears,
`RuntimeError
Traceback (most recent call last)
in
28 lr_scheduler = get_linear_schedule_with_warmup(optimizer=opti, num_warmup_steps=num_warmup_steps, num_training_steps=t_total)
29
---> 30 train_bert(net, criterion, opti, lr, lr_scheduler, train_loader, val_loader, epochs, iters_to_accumulate)
3 frames
/usr/local/lib/python3.8/dist-packages/torch/serialization.py in init(self, name)
285 class _open_zipfile_writer_file(_opener):
286 def init(self, name) -> None:
--> 287 super(_open_zipfile_writer_file, self).init(torch._C.PyTorchFileWriter(str(name)))
288
289 def exit(self, *args) -> None:
RuntimeError: Parent directory models/aubmindlab does not exist.`
i copied the model name and i piped the farasapy but it still occur with me, how can i accesed?
here is the calling block:
def train_bert(net, criterion, opti, lr, lr_scheduler, train_loader, val_loader, epochs, iters_to_accumulate):
best_loss = np.Inf
best_ep = 1
nb_iterations = len(train_loader)
print_every = nb_iterations // 5 # print the training loss 5 times per epoch
iters = []
train_losses = []
val_losses = []
scaler = GradScaler()
for ep in range(epochs):
net.train()
running_loss = 0.0
for it, (seq, attn_masks, token_type_ids, labels) in enumerate(tqdm(train_loader)):
# Converting to cuda tensors
seq, attn_masks, token_type_ids, labels = \
seq.to(device), attn_masks.to(device), token_type_ids.to(device), labels.to(device)
# Enables autocasting for the forward pass (model + loss)
with autocast():
# Obtaining the logits from the model
logits = net(seq, attn_masks, token_type_ids)
# Computing loss
loss = criterion(logits.squeeze(-1), labels.float())
loss = loss / iters_to_accumulate # Normalize the loss because it is averaged
# Backpropagating the gradients
# Scales loss. Calls backward() on scaled loss to create scaled gradients.
scaler.scale(loss).backward()
if (it + 1) % iters_to_accumulate == 0:
# Optimization step
# scaler.step() first unscales the gradients of the optimizer's assigned params.
# If these gradients do not contain infs or NaNs, opti.step() is then called,
# otherwise, opti.step() is skipped.
scaler.step(opti)
# Updates the scale for next iteration.
scaler.update()
# Adjust the learning rate based on the number of iterations.
lr_scheduler.step()
# Clear gradients
opti.zero_grad()
running_loss += loss.item()
if (it + 1) % print_every == 0: # Print training loss information
print()
print("Iteration {}/{} of epoch {} complete. Loss : {} "
.format(it+1, nb_iterations, ep+1, running_loss / print_every))
running_loss = 0.0
val_loss = evaluate_loss(net, device, criterion, val_loader) # Compute validation loss
print()
print("Epoch {} complete! Validation Loss : {}".format(ep+1, val_loss))
if val_loss < best_loss:
print("Best validation loss improved from {} to {}".format(best_loss, val_loss))
print()
net_copy = copy.deepcopy(net) # save a copy of the model
best_loss = val_loss
best_ep = ep + 1
# Saving the model
path_to_model='models/{}_lr_{}_val_loss_{}_ep_{}.pt'.format(bert_model, lr, round(best_loss, 5), best_ep)
torch.save(net_copy.state_dict(), path_to_model)
print("The model has been saved in {}".format(path_to_model))
del loss
torch.cuda.empty_cache()
and here is the block that has an error the last line where it is occur:
`# Set all seeds to make reproducible results
set_seed(1)
# Creating instances of training and validation set
print("Reading training data...")
train_set = CustomDataset(df_train, maxlen, bert_model)
print("Reading validation data...")
val_set = CustomDataset(df_val, maxlen, bert_model)
# Creating instances of training and validation dataloaders
train_loader = DataLoader(train_set, batch_size=bs, num_workers=5)
val_loader = DataLoader(val_set, batch_size=bs, num_workers=5)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
net = SentencePairClassifier(bert_model, freeze_bert=freeze_bert)
if torch.cuda.device_count() > 1: # if multiple GPUs
print("Let's use", torch.cuda.device_count(), "GPUs!")
net = nn.DataParallel(net)
net.to(device)
criterion = nn.BCEWithLogitsLoss()
opti = AdamW(net.parameters(), lr=lr, weight_decay=1e-2)
num_warmup_steps = 0 # The number of steps for the warmup phase.
num_training_steps = epochs * len(train_loader) # The total number of training steps
t_total = (len(train_loader) // iters_to_accumulate) * epochs # Necessary to take into account Gradient accumulation
lr_scheduler = get_linear_schedule_with_warmup(optimizer=opti, num_warmup_steps=num_warmup_steps, num_training_steps=t_total)
train_bert(net, criterion, opti, lr, lr_scheduler, train_loader, val_loader, epochs, iters_to_accumulate)`
I am working on time series prediction using RNNs implemented in Keras on Google Colaboratory. I implemented the RNN as follows:
from tensorflow import keras
mae = keras.losses.MeanAbsoluteError()
hidden_neurons = 50
output_neurons = 1
epoch_size = 50
batch_size = 72
# x_train has shape (500, 1, 23)
LSTM_layer = keras.layers.LSTM(hidden_neurons, input_shape = (x_train.shape[1], x_train.shape[2]), dropout = 0.05)
output_layer = keras.layers.Dense(1)
test_model = keras.Sequential(layers = (LSTM_layer, output_layer))
test_model.reset_states()
test_model.compile(optimizer = 'adam', loss = mae)
test_model.summary()
history = test_model.fit(tf.expand_dims(x_train, axis=-1), y_train, epochs = epoch_size, batch_size = batch_size, validation_data=(x_test, y_test), shuffle = False)
# y_train has shape (500, 1)
# x_test has shape (500, 1, 23)
# y_test has shape (500, 1)
I have the above code (except the import) in a single code cell. When I start a fresh runtime, the network trains fine as expected. But after executing the code cell for around 3-4 times, Colab throws the following error:
ValueError Traceback (most recent call last)
<ipython-input-23-3ac5cc808611> in <module>
12 test_model.compile(optimizer = 'adam', loss = mae)
13 test_model.summary()
---> 14 history = test_model.fit(tf.expand_dims(x_train, axis=-1), y_train, epochs = epoch_size, batch_size = batch_size, validation_data=(x_test, y_test), shuffle = False)
...
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/func_graph.py in autograph_handler(*args, **kwargs)
1145 except Exception as e: # pylint:disable=broad-except
1146 if hasattr(e, "ag_error_metadata"):
-> 1147 raise e.ag_error_metadata.to_exception(e)
1148 else:
1149 raise
ValueError: Input 0 of layer "sequential_2" is incompatible with the layer: expected shape=(None, 1, 23), found shape=(None, 23)
The error persists if tf.expand_dims(x_train, axis=-1)) is omitted in test_model.fit() while fitting the Sequential model.
I guess this has something to do with the layer inputs somehow being changed during execution. I have tried using test_model.reset_states() and running
keras.backend.clear_session()
del test_model
in a separate code cell, but only forcibly killing the Colab runtime seems to work:
import os
os.kill(os.getpid(), 9)
What could cause the layer inputs to change midway during program run?
EDIT: I got the same error when I tried running the cells on Jupyter Notebook on my PC rather than on Colab.
I have created a function for evaluation a function. It takes as an input the model and validation data loader and return the validation accuracy, validation loss and f1_weighted score.
def evaluate(model, val_dataloader):
"""
After the completion of each training epoch, measure the model's performance
on our validation set.
"""
# Put the model into the evaluation mode. The dropout layers are disabled during
# the test time.
model.eval()
# Tracking variables
val_accuracy = []
val_loss = []
f1_weighted = []
# For each batch in our validation set...
for batch in val_dataloader:
# Load batch to GPU
b_input_ids, b_attn_mask, b_labels = tuple(t.to(device) for t in batch)
# Compute logits
with torch.no_grad():
logits = model(b_input_ids, b_attn_mask)
# Compute loss
loss = loss_fn(logits, b_labels)
val_loss.append(loss.item())
# Get the predictions
preds = torch.argmax(logits, dim=1).flatten()
# Calculate the accuracy rate
accuracy = (preds == b_labels).cpu().numpy().mean() * 100
val_accuracy.append(accuracy)
# Calculate the f1 weighted score
f1_metric = F1Score('weighted')
f1_weighted = f1_metric(preds, b_labels)
# Compute the average accuracy and loss over the validation set.
val_loss = np.mean(val_loss)
val_accuracy = np.mean(val_accuracy)
f1_weighted = np.mean(f1_weighted)
return val_loss, val_accuracy, f1_weighted
The core for f1 score can be found here
Measuring F1 score for multiclass classification natively in PyTorch
Before the evaluation function there is a function which trains a bert model and has the following inputs
train(model, train_dataloader, val_dataloader, epochs, evaluation).
Thus if the evaluation = True, then the validation accuracy seems in the end of each epoch.
As for the dataloaders are created with the following way:
# Convert other data types to torch.Tensor
train_labels = torch.tensor(authors_train)
# Create the DataLoader for our training set
train_data = TensorDataset(train_inputs, train_masks, train_labels)
train_sampler = RandomSampler(train_data)
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)
With a similar way you cal create the dataloader for validation and testing set.
Update:
I changed the line
f1_weighted = f1_metric(preds, b_labels)
with this one
f1_weighted.append(f1_metric(preds, b_labels))
and now I have the following error
AttributeError Traceback (most recent call last)
<ipython-input-49-0e0f6d227c4f> in <module>()
1 set_seed(42) # Set seed for reproducibility
2 bert_classifier, optimizer, scheduler = initialize_model(epochs=4)
----> 3 train(bert_classifier, train_dataloader, val_dataloader, epochs=4, evaluation=True)
4
5 #1. 77.28
3 frames
<__array_function__ internals> in mean(*args, **kwargs)
/usr/local/lib/python3.7/dist-packages/numpy/core/_methods.py in _mean(a, axis, dtype, out, keepdims)
168 ret = arr.dtype.type(ret / rcount)
169 else:
--> 170 ret = ret.dtype.type(ret / rcount)
171 else:
172 ret = ret / rcount
AttributeError: 'torch.dtype' object has no attribute 'type'
'''
I am using SHAP, for my model analysis, and while calling 'DeepExplainer' I am getting "AttributeError: 'Sequential' object has no attribute 'eval' "
I am using theano with keras instead of tensor-flow, because there is some version mismatch issue with SHAP, same i have posted in other question. So now i am trying same stuff but this time, _Backend i am using PyTorch and model building is fine but while using SHAP DeepExplainer it is throwing Attribute error, i am newbee to this type of error for Model Explainer domain
'''
Input:
print('Pad sequences (samples x time)')
x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)
print('Build model...')
model = Sequential()
model.add(Embedding(max_features, 128))
model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(1, activation='sigmoid'))
# try using different optimizers and different optimizer configs
model.compile(loss='binary_crossentropy',
optimizer='adam',
metrics=['accuracy'])
print('Train...')
model.fit(x_train, y_train,
batch_size=batch_size,
epochs=1,
validation_data=(x_test, y_test))
score, acc = model.evaluate(x_test, y_test,
batch_size=batch_size)
print('Test score:', score)
print('Test accuracy:', acc)
output: No Error
After That:
Input:
import shap
# we use the first 100 training examples as our background dataset to integrate over
explainer = shap.DeepExplainer(model, x_train[:100])
Error:
AttributeError Traceback (most recent call last)
<ipython-input-12-9cca779d01d2> in <module>
1 # we use the first 100 training examples as our background dataset to integrate over
----> 2 explainer = shap.DeepExplainer(model,1)
c:\users\shubh\.conda\envs\pytorch_cpu\lib\site-packages\shap\explainers\deep\__init__.py in __init__(self, model, data, session, learning_phase_flags)
79 self.explainer = TFDeepExplainer(model, data, session, learning_phase_flags)
80 elif framework == 'pytorch':
---> 81 self.explainer = PyTorchDeepExplainer(model, data)
82
83 self.expected_value = self.explainer.expected_value
c:\users\shubh\.conda\envs\pytorch_cpu\lib\site-packages\shap\explainers\deep\deep_pytorch.py in __init__(self, model, data)
47 self.target_handle.remove()
48 del self.layer.target_input
---> 49 self.model = model.eval()
50
51 self.multi_output = False
AttributeError: 'Sequential' object has no attribute 'eval'
Any help or direction for resolving this error?
I've trained a vgg16 model to predict 102 classes of flowers.
It works however now that I'm trying to understand one of it's predictions I feel it's not acting normally.
model layout
# Imports here
import os
import numpy as np
import torch
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import json
from pprint import pprint
from scipy import misc
%matplotlib inline
data_dir = 'flower_data'
train_dir = data_dir + '/train'
test_dir = data_dir + '/valid'
json_data=open('cat_to_name.json').read()
main_classes = json.loads(json_data)
main_classes = {int(k):v for k,v in classes.items()}
train_transform_2 = transforms.Compose([transforms.RandomResizedCrop(224),
transforms.RandomRotation(30),
transforms.RandomHorizontalFlip(),
transforms.ToTensor()])
test_transform_2= transforms.Compose([transforms.RandomResizedCrop(224),
transforms.ToTensor()])
# TODO: Load the datasets with ImageFolder
train_data = datasets.ImageFolder(train_dir, transform=train_transform_2)
test_data = datasets.ImageFolder(test_dir, transform=test_transform_2)
# define dataloader parameters
batch_size = 20
num_workers=0
# TODO: Using the image datasets and the trainforms, define the dataloaders
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size,
num_workers=num_workers, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size,
num_workers=num_workers, shuffle=True)
vgg16 = models.vgg16(pretrained=True)
# Freeze training for all "features" layers
for param in vgg16.features.parameters():
param.requires_grad = False
import torch.nn as nn
n_inputs = vgg16.classifier[6].in_features
# add last linear layer (n_inputs -> 102 flower classes)
# new layers automatically have requires_grad = True
last_layer = nn.Linear(n_inputs, len(classes))
vgg16.classifier[6] = last_layer
import torch.optim as optim
# specify loss function (categorical cross-entropy)
criterion = nn.CrossEntropyLoss()
# specify optimizer (stochastic gradient descent) and learning rate = 0.001
optimizer = optim.SGD(vgg16.classifier.parameters(), lr=0.001)
pre_trained_model=torch.load("model.pt")
new=list(pre_trained_model.items())
my_model_kvpair=vgg16.state_dict()
count=0
for key,value in my_model_kvpair.items():
layer_name, weights = new[count]
my_model_kvpair[key] = weights
count+=1
# number of epochs to train the model
n_epochs = 6
# initialize tracker for minimum validation loss
valid_loss_min = np.Inf # set initial "min" to infinity
for epoch in range(1, n_epochs+1):
# keep track of training and validation loss
train_loss = 0.0
valid_loss = 0.0
###################
# train the model #
###################
# model by default is set to train
vgg16.train()
for batch_i, (data, target) in enumerate(train_loader):
# clear the gradients of all optimized variables
optimizer.zero_grad()
# forward pass: compute predicted outputs by passing inputs to the model
output = vgg16(data)
# calculate the batch loss
loss = criterion(output, target)
# backward pass: compute gradient of the loss with respect to model parameters
loss.backward()
# perform a single optimization step (parameter update)
optimizer.step()
# update training loss
train_loss += loss.item()
if batch_i % 20 == 19: # print training loss every specified number of mini-batches
print('Epoch %d, Batch %d loss: %.16f' %
(epoch, batch_i + 1, train_loss / 20))
train_loss = 0.0
######################
# validate the model #
######################
vgg16.eval() # prep model for evaluation
for data, target in test_loader:
# forward pass: compute predicted outputs by passing inputs to the model
output = vgg16(data)
# calculate the loss
loss = criterion(output, target)
# update running validation loss
valid_loss += loss.item()
# print training/validation statistics
# calculate average loss over an epoch
train_loss = train_loss/len(train_loader.dataset)
valid_loss = valid_loss/len(test_loader.dataset)
print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(
epoch+1,
train_loss,
valid_loss
))
# save model if validation loss has decreased
if valid_loss <= valid_loss_min:
print('Validation loss decreased ({:.6f} --> {:.6f}). Saving model ...'.format(
valid_loss_min,
valid_loss))
torch.save(vgg16.state_dict(), 'model.pt')
valid_loss_min = valid_loss
testing on a single image
tensor = torch.from_numpy(test_image)
reshaped = tensor.permute(2, 0, 1).unsqueeze(0)
floatified = reshaped.to(torch.float32) / 255
vgg16(floatified)
>>>
tensor([[ 2.5686, -1.1964, -0.0872, -1.7010, -1.6669, -1.0638, 0.4515, 0.1124,
0.0166, 0.3156, 1.1699, 1.5374, 1.8720, 2.5184, 2.9046, -0.8241,
-1.1949, -0.5700, 0.8692, -1.0485, 0.0390, -1.3783, -3.4632, -0.0143,
1.0986, 0.2667, -1.1127, -0.8515, 0.7759, -0.7528, 1.6366, -0.1170,
-0.4983, -2.6970, 0.7545, 0.0188, 0.1094, 0.5002, 0.8838, -0.0006,
-1.7993, -1.3706, 0.4964, -0.3251, -1.7313, 1.8731, 2.4963, 1.1713,
-1.5726, 1.5476, 3.9576, 0.7388, 0.0228, 0.3947, -1.7237, -1.8350,
-2.0297, 1.4088, -1.3469, 1.6128, -1.0851, 2.0257, 0.5881, 0.7498,
0.0738, 2.0592, 1.8034, -0.5468, 1.9512, 0.4534, 0.7746, -1.0465,
-0.7254, 0.3333, -1.6506, -0.4242, 1.9529, -0.4542, 0.2396, -1.6804,
-2.7987, -0.6367, -0.3599, 1.0102, 2.6319, 0.8305, -1.4333, 3.3043,
-0.4021, -0.4877, 0.9125, 0.0607, -1.0326, 1.3186, -2.5861, 0.1211,
-2.3177, -1.5040, 1.0416, 1.4008, 1.4225, -2.7291]],
grad_fn=<ThAddmmBackward>)
sum([ 2.5686, -1.1964, -0.0872, -1.7010, -1.6669, -1.0638, 0.4515, 0.1124,
0.0166, 0.3156, 1.1699, 1.5374, 1.8720, 2.5184, 2.9046, -0.8241,
-1.1949, -0.5700, 0.8692, -1.0485, 0.0390, -1.3783, -3.4632, -0.0143,
1.0986, 0.2667, -1.1127, -0.8515, 0.7759, -0.7528, 1.6366, -0.1170,
-0.4983, -2.6970, 0.7545, 0.0188, 0.1094, 0.5002, 0.8838, -0.0006,
-1.7993, -1.3706, 0.4964, -0.3251, -1.7313, 1.8731, 2.4963, 1.1713,
-1.5726, 1.5476, 3.9576, 0.7388, 0.0228, 0.3947, -1.7237, -1.8350,
-2.0297, 1.4088, -1.3469, 1.6128, -1.0851, 2.0257, 0.5881, 0.7498,
0.0738, 2.0592, 1.8034, -0.5468, 1.9512, 0.4534, 0.7746, -1.0465,
-0.7254, 0.3333, -1.6506, -0.4242, 1.9529, -0.4542, 0.2396, -1.6804,
-2.7987, -0.6367, -0.3599, 1.0102, 2.6319, 0.8305, -1.4333, 3.3043,
-0.4021, -0.4877, 0.9125, 0.0607, -1.0326, 1.3186, -2.5861, 0.1211,
-2.3177, -1.5040, 1.0416, 1.4008, 1.4225, -2.7291])
>>>
5.325799999999998
given this as how I test it on a single image (and the model as usual is trained and tested on batches it returns a prediction matrix that doesn't seem to be normalized or add up to 1.
Is this normal?
I cannot tell with certainty without seeing your training code, but it's most likely your model was trained with cross-entropy loss and as such it outputs logits rather than class probabilities. You can turn them into proper probabilities by applying the softmax function.