HuggingFace Trainer logging train data - pytorch

I'm following this tutorial to train some models:
https://huggingface.co/transformers/training.html
I'd like to track not only the evaluation loss and accuracy but also the train loss and accuracy, to monitor overfitting. While running the code in Jupyter, I do see all of htis:
Epoch Training Loss Validation Loss Accuracy Glue
1 0.096500 0.928782 {'accuracy': 0.625} {'accuracy': 0.625, 'f1': 0.0}
2 0.096500 1.203832 {'accuracy': 0.625} {'accuracy': 0.625, 'f1': 0.0}
3 0.096500 1.643788 {'accuracy': 0.625} {'accuracy': 0.625, 'f1': 0.0}
but when I go into trainer.state.log_history, that stuff is not there. This really doesn't make sense to me.
for obj in trainer.state.log_history:
print(obj)
{'loss': 0.0965, 'learning_rate': 4.5833333333333334e-05, 'epoch': 0.25, 'step': 1}
{'eval_loss': 0.9287818074226379, 'eval_accuracy': {'accuracy': 0.625}, 'eval_glue': {'accuracy': 0.625, 'f1': 0.0}, 'eval_runtime': 1.3266, 'eval_samples_per_second': 6.03, 'eval_steps_per_second': 0.754, 'epoch': 1.0, 'step': 4}
{'eval_loss': 1.2038320302963257, 'eval_accuracy': {'accuracy': 0.625}, 'eval_glue': {'accuracy': 0.625, 'f1': 0.0}, 'eval_runtime': 1.3187, 'eval_samples_per_second': 6.067, 'eval_steps_per_second': 0.758, 'epoch': 2.0, 'step': 8}
{'eval_loss': 1.6437877416610718, 'eval_accuracy': {'accuracy': 0.625}, 'eval_glue': {'accuracy': 0.625, 'f1': 0.0}, 'eval_runtime': 1.3931, 'eval_samples_per_second': 5.742, 'eval_steps_per_second': 0.718, 'epoch': 3.0, 'step': 12}
{'train_runtime': 20.9407, 'train_samples_per_second': 1.146, 'train_steps_per_second': 0.573, 'total_flos': 6314665328640.0, 'train_loss': 0.07855576276779175, 'epoch': 3.0, 'step': 12}
How do I get these back in an object, and not a printout?
Thanks
Edit: Reproducable code below:
import numpy as np
from datasets import load_metric, load_dataset
from transformers import TrainingArguments, AutoModelForSequenceClassification, Trainer, AutoTokenizer
from datasets import list_metrics
raw_datasets = load_dataset("imdb")
tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
def tokenize_function(examples):
return tokenizer(examples["text"], padding="max_length", truncation=True)
tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)
small_train_dataset = tokenized_datasets["train"].shuffle(seed=42).select(range(8))
small_eval_dataset = tokenized_datasets["test"].shuffle(seed=42).select(range(8))
model = AutoModelForSequenceClassification.from_pretrained("bert-base-cased", num_labels=2)
training_args = TrainingArguments("IntroToBERT", evaluation_strategy="epoch")
training_args.logging_strategy = 'step'
training_args.logging_first_step = True
training_args.logging_steps = 1
training_args.num_train_epochs = 3
training_args.per_device_train_batch_size = 2
training_args.eval_steps = 1
metrics = {}
for metric in ['accuracy','glue']:
metrics[metric] = load_metric(metric,'mrpc')
def compute_metrics(eval_pred):
logits, labels = eval_pred
predictions = np.argmax(logits, axis=-1)
out = {}
for metric in metrics.keys():
out[metric] = metrics[metric].compute(predictions=predictions, references=labels)
return out
trainer = Trainer(
model=model,
args=training_args,
train_dataset=small_train_dataset,
eval_dataset=small_eval_dataset,
compute_metrics=compute_metrics,
)
trainer.train()
# here the printout is as shown
for obj in trainer.state.log_history:
print(obj)
# here the logging data is displayed

You can use the methods log_metrics to format your logs and save_metrics to save them. Here is the code:
# rest of the training args
# ...
training_args.logging_dir = 'logs' # or any dir you want to save logs
# training
train_result = trainer.train()
# compute train results
metrics = train_result.metrics
max_train_samples = len(small_train_dataset)
metrics["train_samples"] = min(max_train_samples, len(small_train_dataset))
# save train results
trainer.log_metrics("train", metrics)
trainer.save_metrics("train", metrics)
# compute evaluation results
metrics = trainer.evaluate()
max_val_samples = len(small_eval_dataset)
metrics["eval_samples"] = min(max_val_samples, len(small_eval_dataset))
# save evaluation results
trainer.log_metrics("eval", metrics)
trainer.save_metrics("eval", metrics)
You can also save all logs at once by setting the split parameter in log_metrics and save_metrics to "all" i.e. trainer.save_metrics("all", metrics); but I prefer this way as you can customize the results based on your need.
Here is the complete source provided by transformers 🤗 from which you can read more.

Related

wandb pytorch: top1 accuracy per class

I have 5 classes in validation set and i want to draw a graph based on top1 results per class in validation loop using wandb . I have tried a single accuracy graph based on the average of 5 classes and it works fine but i want to do a separate way like top1 accuracy for each class. I am unable to achieve, are there any way to achieve it?
Validation Loader
val_loaders = []
for nuisance in val_nuisances:
val_loaders.append((nuisance, torch.utils.data.DataLoader(
datasets.ImageFolder(os.path.join(valdir, nuisance), transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
normalize,
])),
batch_size=args.batch_size, shuffle=False,
num_workers=args.workers, pin_memory=True,
)))
val_nuisances = ['shape', 'pose', 'texture', 'context', 'weather']
Validation Loop
def validate(val_loaders, model, criterion, args):
overall_top1 = 0
for nuisance, val_loader in val_loaders:
batch_time = AverageMeter('Time', ':6.3f', Summary.NONE)
losses = AverageMeter('Loss', ':.4e', Summary.NONE)
top1 = AverageMeter('Acc#1', ':6.2f', Summary.AVERAGE)
top5 = AverageMeter('Acc#5', ':6.2f', Summary.AVERAGE)
progress = ProgressMeter(
len(val_loader),
[batch_time, losses, top1, top5],
prefix=f'Test {nuisance}: ')
# switch to evaluate mode
model.eval()
with torch.no_grad():
end = time.time()
for i, (images, target) in enumerate(val_loader):
if args.gpu is not None:
images = images.cuda(args.gpu, non_blocking=True)
if torch.cuda.is_available():
target = target.cuda(args.gpu, non_blocking=True)
# compute output
output = model(images)
loss = criterion(output, target)
# measure accuracy and record loss
acc1, acc5 = accuracy(output, target, topk=(1, 5))
losses.update(loss.item(), images.size(0))
top1.update(acc1[0], images.size(0))
top5.update(acc5[0], images.size(0))
# measure elapsed time
batch_time.update(time.time() - end)
end = time.time()
if i % args.print_freq == 0:
progress.display(i)
progress.display_summary()
overall_top1 += top1.avg
overall_top1 /= len(val_loaders)
return top1.avg
I don't see any log to W&B in your code, but logging the top1 accuracy per class would just be
class_names = ['shape', 'pose', 'texture', 'context', 'weather']
top1_accuracies = [0.9, 0.8, 0.9, 0.9, 0.8]
wandb.log({class_names[0]: top1_accuracies[0], class_names[1]: top1_accuracies[1], ...}
In the above example, it looks like you're not actually creating a variable for the top1 accuracy of each class. You'll want to do that first. Taken from https://stackoverflow.com/a/50977153/3959708
You can use sklearn's confusion matrix to get the accuracy
from sklearn.metrics import confusion_matrix
import numpy as np
y_true = [0, 1, 2, 2, 2]
y_pred = [0, 0, 2, 2, 1]
target_names = ['class 0', 'class 1', 'class 2']
#Get the confusion matrix
cm = confusion_matrix(y_true, y_pred)
#array([[1, 0, 0],
# [1, 0, 0],
# [0, 1, 2]])
#Now the normalize the diagonal entries
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
#array([[1. , 0. , 0. ],
# [1. , 0. , 0. ],
# [0. , 0.33333333, 0.66666667]])
#The diagonal entries are the accuracies of each class
cm.diagonal()
#array([1. , 0. , 0.66666667])

Make a prediction with Keras models trained using the Genetic Algorithm with PyGAD

I successfully run the code (original link where to find code) to train Keras Models using the Genetic Algorithm with PyGAD:
import tensorflow.keras
import pygad.kerasga
import numpy
import pygad
def fitness_func(solution, sol_idx):
global data_inputs, data_outputs, keras_ga, model
model_weights_matrix = pygad.kerasga.model_weights_as_matrix(model=model,
weights_vector=solution)
model.set_weights(weights=model_weights_matrix)
predictions = model.predict(data_inputs)
mae = tensorflow.keras.losses.MeanAbsoluteError()
abs_error = mae(data_outputs, predictions).numpy() + 0.00000001
solution_fitness = 1.0 / abs_error
return solution_fitness
def callback_generation(ga_instance):
print("Generation = {generation}".format(generation=ga_instance.generations_completed))
print("Fitness = {fitness}".format(fitness=ga_instance.best_solution()[1]))
input_layer = tensorflow.keras.layers.Input(3)
dense_layer1 = tensorflow.keras.layers.Dense(5, activation="relu")(input_layer)
output_layer = tensorflow.keras.layers.Dense(1, activation="linear")(dense_layer1)
model = tensorflow.keras.Model(inputs=input_layer, outputs=output_layer)
weights_vector = pygad.kerasga.model_weights_as_vector(model=model)
keras_ga = pygad.kerasga.KerasGA(model=model,
num_solutions=10)
# Data inputs
data_inputs = numpy.array([[0.02, 0.1, 0.15],
[0.7, 0.6, 0.8],
[1.5, 1.2, 1.7],
[3.2, 2.9, 3.1]])
# Data outputs
data_outputs = numpy.array([[0.1],
[0.6],
[1.3],
[2.5]])
num_generations = 10
num_parents_mating = 5
initial_population = keras_ga.population_weights
ga_instance = pygad.GA(num_generations=num_generations,
num_parents_mating=num_parents_mating,
initial_population=initial_population,
fitness_func=fitness_func,
on_generation=callback_generation)
ga_instance.run()
# After the generations complete, some plots are showed that summarize how the outputs/fitness values evolve over generations.
ga_instance.plot_result(title="PyGAD & Keras - Iteration vs. Fitness", linewidth=4)
# Returning the details of the best solution.
solution, solution_fitness, solution_idx = ga_instance.best_solution()
print("Fitness value of the best solution = {solution_fitness}".format(solution_fitness=solution_fitness))
print("Index of the best solution : {solution_idx}".format(solution_idx=solution_idx))
# Fetch the parameters of the best solution.
best_solution_weights = pygad.kerasga.model_weights_as_matrix(model=model,
weights_vector=solution)
model.set_weights(best_solution_weights)
predictions = model.predict(data_inputs)
print("Predictions : \n", predictions)
mae = tensorflow.keras.losses.MeanAbsoluteError()
abs_error = mae(data_outputs, predictions).numpy()
print("Absolute Error : ", abs_error)
Out:
Fitness value of the best solution = 5.007608966738384
Index of the best solution : 0
1/1 [==============================] - 0s 18ms/step
Predictions :
[[0.4351511 ]
[0.78366435]
[1.3436508 ]
[2.736318 ]]
Absolute Error : 0.1996961
As I understand, the code above should train a model to help me forecast a new 3-dimension input such as [0.9, 0.7, 0.85].
I wonder how could I modify the code to adapt to the input and output data as below, or call model then make a predictions for new data_inputs = numpy.array([[0.9, 0.7, 0.85]]):
# Data inputs
data_inputs = numpy.array([[0.02, 0.1, 0.15],
[0.7, 0.6, 0.8],
[1.5, 1.2, 1.7],
[3.2, 2.9, 3.1],
[0.9, 0.7, 0.85] # new entry which need forecast
])
# Data outputs
data_outputs = numpy.array([[0.1],
[0.6],
[1.3],
[2.5]]) # Output data for training
Thanks a lot for your help at advance.
My trial code:
from tensorflow import keras
# Load model and weights
with open("./ga_model.json", "r") as json_file:
model_json = json_file.read()
model = keras.models.model_from_json(model_json)
model.load_weights("./ga_model.h5")
# Data inputs
new_data_inputs = numpy.array([
[0.9, 0.7, 0.85] # new entry which need forecast
])
predictions = model.predict(new_data_inputs)
print("Predictions : \n", predictions)
Out:
Predictions :
[[0.8672837]]

The result is empty when prediction of Faster RCNN model (Pytorch)

I'm trying to train Faster RCNN model. After training, I try to predict the result of image but the result is empty.
My data is w: 1600, h: 800, c: 3, classes: 7, bounding boxes:(x1, y1, x2, y2)
My model is below.
My model
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator
def get_instance_segmentation_model(num_classes):
backbone = torchvision.models.vgg16(pretrained=True).features
backbone.out_channels = 512
anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),),
aspect_ratios=((0.5, 1.0, 2.0),))
roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'],
output_size=7,
sampling_ratio=2)
model = FasterRCNN(backbone,
num_classes=2,
rpn_anchor_generator=anchor_generator,
box_roi_pool=roi_pooler)
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
return model
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
num_classes = 2
model = get_instance_segmentation_model(num_classes)
model.to(device)
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005,
momentum=0.9, weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
step_size=3,
gamma=0.1)
training
# let's train it for 10 epochs
num_epochs = 10
for epoch in range(num_epochs):
# train for one epoch, printing every 10 iterations
train_one_epoch(model, optimizer, train_data_loader, device, epoch, print_freq=10)
# update the learning rate
lr_scheduler.step()
# evaluate on the test dataset
evaluate(model, valid_data_loader, device=device)
prediction:
prediction
[{'boxes': tensor([], device='cuda:0', size=(0, 4)),
'labels': tensor([], device='cuda:0', dtype=torch.int64),
'scores': tensor([], device='cuda:0')}]
You should change the number of classes to
model = FasterRCNN(backbone,
num_classes=YOUR_CLASSES+1, # +1 is for the background
rpn_anchor_generator=anchor_generator,
box_roi_pool=roi_pooler)
Remember that the class 0 is reserved for the background, so your classes should start from 1.
Also, please make sure your network has converged during the training.

Parameter Tuning using gridsearchcv for gradientboosting classifier in python

I am trying to run GradientBoostingClassifier() with the help of gridsearchcv.
For every combination of parameter, I also need "Precison", "recall" and accuracy in tabular format.
Here is the code:
scoring= ['accuracy', 'precision','recall']
parameters = {#'nthread':[3,4], #when use hyperthread, xgboost may become slower
"criterion": ["friedman_mse", "mae"],
"loss":["deviance","exponential"],
"max_features":["log2","sqrt"],
'learning_rate': [0.01,0.05,0.1,1,0.5], #so called `eta` value
'max_depth': [3,4,5],
'min_samples_leaf': [4,5,6],
'subsample': [0.6,0.7,0.8],
'n_estimators': [5,10,15,20],#number of trees, change it to 1000 for better results
'scoring':scoring
}
# sorted(sklearn.metrics.SCORERS.keys()) # To see different loss functions
#clf_xgb = GridSearchCV(xgb_model, parameters, n_jobs=5,verbose=2, refit=True,cv = 8)
clf_gbm = GridSearchCV(gbm_model, parameters, n_jobs=5,cv = 8)
clf_gbm.fit(X_train,y_train)
print(clf_gbm.best_params_)
print(clf_gbm.best_score_)
feature_importances = pd.DataFrame(clf_gbm.best_estimator_.feature_importances_,
index = X_train.columns,
columns=['importance']).sort_values('importance', ascending=False)
print(feature_importances)
depth=clf_gbm.cv_results_["param_max_depth"]
score=clf_gbm.cv_results_["mean_test_score"]
params=clf_gbm.cv_results_["params"]
I get error as:
ValueError: Invalid parameter seed for estimator GradientBoostingClassifier(criterion='friedman_mse', init=None,
learning_rate=0.01, loss='deviance', max_depth=3,
max_features='log2', max_leaf_nodes=None,
min_impurity_decrease=0.0, min_impurity_split=None,
min_samples_leaf=4, min_samples_split=2,
min_weight_fraction_leaf=0.0, n_estimators=5, presort='auto',
random_state=None, subsample=1.0, verbose=0,
warm_start=False). Check the list of available parameters with `estimator.get_params().keys()`.
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import make_scorer
#creating Scoring parameter:
scoring = {'accuracy': make_scorer(accuracy_score),
'precision': make_scorer(precision_score),'recall':make_scorer(recall_score)}
# A sample parameter
parameters = {
"loss":["deviance"],
"learning_rate": [0.01, 0.025, 0.05, 0.075, 0.1, 0.15, 0.2],
"min_samples_split": np.linspace(0.1, 0.5, 12),
"min_samples_leaf": np.linspace(0.1, 0.5, 12),
"max_depth":[3,5,8],
"max_features":["log2","sqrt"],
"criterion": ["friedman_mse", "mae"],
"subsample":[0.5, 0.618, 0.8, 0.85, 0.9, 0.95, 1.0],
"n_estimators":[10]
}
#passing the scoring function in the GridSearchCV
clf = GridSearchCV(GradientBoostingClassifier(), parameters,scoring=scoring,refit=False,cv=2, n_jobs=-1)
clf.fit(trainX, trainY)
#converting the clf.cv_results to dataframe
df=pd.DataFrame.from_dict(clf.cv_results_)
#here Possible inputs for cross validation is cv=2, there two split split0 and split1
df[['split0_test_accuracy','split1_test_accuracy','split0_test_precision','split1_test_precision','split0_test_recall','split1_test_recall']]
find the best parameter based on the accuracy_score, precision_score or recall and refit the model and prediction on the test data
#find the best parameter based on the accuracy_score
#taking the average of the accuracy_score
df['accuracy_score']=(df['split0_test_accuracy']+df['split1_test_accuracy'])/2
df.loc[df['accuracy_score'].idxmax()]['params']
Prediction on the test data
clf =GradientBoostingClassifier(criterion='mae',
learning_rate=0.1,
loss='deviance',
max_depth= 5,
max_features='sqrt',
min_samples_leaf= 0.1,
min_samples_split= 0.42727272727272736,
n_estimators=10,
subsample=0.8)
clf.fit(trainX, trainY)
correct_test = correct_data(test)
testX = correct_test[predictor].values
result = clf.predict(testX)

Why am I getting different results from Scikit-learn API vs Learning API of XGBoost?

I used the Scikit-learn API for XGBoost (in python). My accuracy was ~ 68%. I used the same parameter set and used the Learning API for XGBoost; my accuracy was ~ 60%. My understanding is that Scikit-learn API is a wrapper around Learning API and thus they should give me the same results. I do not understand why I am getting different results from these two APIs.
cores=16
random_state=0
params = {
'n_estimators': 100,
'learning_rate': 0.1,
'max_depth': 3,
'min_child_weight': 1.0,
'subsample': 1.0,
'gamma': 0.0,
'tree_method':'gpu_exact',
'colsample_bytree': 1.0,
'alpha' : 0.0,
'lambda': 1.0,
'nthread': cores,
'objective': 'binary:logistic',
'booster': 'gbtree',
'seed': random_state,
'eta':0.1,
'silent': 1
}
model = XGBClassifier(**params)
r = model.fit(X_train,y_train)
print(model)
# make predictions for test data
y_pred = model.predict(X_test)
predictions = [round(value) for value in y_pred]
# evaluate predictions
accuracy = accuracy_score(y_test, predictions)
print("Accuracy: %.2f%%" % (accuracy * 100.0))
Results:
XGBClassifier(alpha=0.0, base_score=0.5, booster='gbtree',
colsample_bylevel=1, colsample_bytree=1.0, eta=0.1, gamma=0.0,
lambda=1.0, learning_rate=0.1, max_delta_step=0, max_depth=3,
min_child_weight=1.0, missing=None, n_estimators=100, n_jobs=1,
nthread=16, objective='binary:logistic', random_state=0,
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=0, silent=1,
subsample=1.0, tree_method='gpu_exact')
Accuracy: 68.32%
dtrain = xgb.DMatrix(X_train, label=y_train)
dvalid = xgb.DMatrix(X_test, label=y_test)
# fit model no training data
model = xgb.train(params=params,dtrain=dtrain)
# make predictions for test data
y_pred = model.predict(dvalid)
predictions = [round(value) for value in y_pred]
# evaluate predictions
accuracy = accuracy_score(y_test, predictions)
print("Accuracy: %.2f%%" % (accuracy * 100.0))
Results:
Accuracy: 60.25%
I believe the difference is because you have not specified the number of boosting rounds in the standard xgboost API (xgb.train()). As a result it is using the default of 10.
'n_estimators' is an sklearn specific terminology.
Also, contrary to the comment given above, this particular algorithm is expected to be deterministic when run multiple times on the same system.

Resources