I'm following this tutorial to train some models:
https://huggingface.co/transformers/training.html
I'd like to track not only the evaluation loss and accuracy but also the train loss and accuracy, to monitor overfitting. While running the code in Jupyter, I do see all of htis:
Epoch Training Loss Validation Loss Accuracy Glue
1 0.096500 0.928782 {'accuracy': 0.625} {'accuracy': 0.625, 'f1': 0.0}
2 0.096500 1.203832 {'accuracy': 0.625} {'accuracy': 0.625, 'f1': 0.0}
3 0.096500 1.643788 {'accuracy': 0.625} {'accuracy': 0.625, 'f1': 0.0}
but when I go into trainer.state.log_history, that stuff is not there. This really doesn't make sense to me.
for obj in trainer.state.log_history:
print(obj)
{'loss': 0.0965, 'learning_rate': 4.5833333333333334e-05, 'epoch': 0.25, 'step': 1}
{'eval_loss': 0.9287818074226379, 'eval_accuracy': {'accuracy': 0.625}, 'eval_glue': {'accuracy': 0.625, 'f1': 0.0}, 'eval_runtime': 1.3266, 'eval_samples_per_second': 6.03, 'eval_steps_per_second': 0.754, 'epoch': 1.0, 'step': 4}
{'eval_loss': 1.2038320302963257, 'eval_accuracy': {'accuracy': 0.625}, 'eval_glue': {'accuracy': 0.625, 'f1': 0.0}, 'eval_runtime': 1.3187, 'eval_samples_per_second': 6.067, 'eval_steps_per_second': 0.758, 'epoch': 2.0, 'step': 8}
{'eval_loss': 1.6437877416610718, 'eval_accuracy': {'accuracy': 0.625}, 'eval_glue': {'accuracy': 0.625, 'f1': 0.0}, 'eval_runtime': 1.3931, 'eval_samples_per_second': 5.742, 'eval_steps_per_second': 0.718, 'epoch': 3.0, 'step': 12}
{'train_runtime': 20.9407, 'train_samples_per_second': 1.146, 'train_steps_per_second': 0.573, 'total_flos': 6314665328640.0, 'train_loss': 0.07855576276779175, 'epoch': 3.0, 'step': 12}
How do I get these back in an object, and not a printout?
Thanks
Edit: Reproducable code below:
import numpy as np
from datasets import load_metric, load_dataset
from transformers import TrainingArguments, AutoModelForSequenceClassification, Trainer, AutoTokenizer
from datasets import list_metrics
raw_datasets = load_dataset("imdb")
tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
def tokenize_function(examples):
return tokenizer(examples["text"], padding="max_length", truncation=True)
tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)
small_train_dataset = tokenized_datasets["train"].shuffle(seed=42).select(range(8))
small_eval_dataset = tokenized_datasets["test"].shuffle(seed=42).select(range(8))
model = AutoModelForSequenceClassification.from_pretrained("bert-base-cased", num_labels=2)
training_args = TrainingArguments("IntroToBERT", evaluation_strategy="epoch")
training_args.logging_strategy = 'step'
training_args.logging_first_step = True
training_args.logging_steps = 1
training_args.num_train_epochs = 3
training_args.per_device_train_batch_size = 2
training_args.eval_steps = 1
metrics = {}
for metric in ['accuracy','glue']:
metrics[metric] = load_metric(metric,'mrpc')
def compute_metrics(eval_pred):
logits, labels = eval_pred
predictions = np.argmax(logits, axis=-1)
out = {}
for metric in metrics.keys():
out[metric] = metrics[metric].compute(predictions=predictions, references=labels)
return out
trainer = Trainer(
model=model,
args=training_args,
train_dataset=small_train_dataset,
eval_dataset=small_eval_dataset,
compute_metrics=compute_metrics,
)
trainer.train()
# here the printout is as shown
for obj in trainer.state.log_history:
print(obj)
# here the logging data is displayed
You can use the methods log_metrics to format your logs and save_metrics to save them. Here is the code:
# rest of the training args
# ...
training_args.logging_dir = 'logs' # or any dir you want to save logs
# training
train_result = trainer.train()
# compute train results
metrics = train_result.metrics
max_train_samples = len(small_train_dataset)
metrics["train_samples"] = min(max_train_samples, len(small_train_dataset))
# save train results
trainer.log_metrics("train", metrics)
trainer.save_metrics("train", metrics)
# compute evaluation results
metrics = trainer.evaluate()
max_val_samples = len(small_eval_dataset)
metrics["eval_samples"] = min(max_val_samples, len(small_eval_dataset))
# save evaluation results
trainer.log_metrics("eval", metrics)
trainer.save_metrics("eval", metrics)
You can also save all logs at once by setting the split parameter in log_metrics and save_metrics to "all" i.e. trainer.save_metrics("all", metrics); but I prefer this way as you can customize the results based on your need.
Here is the complete source provided by transformers 🤗 from which you can read more.
Related
I have 5 classes in validation set and i want to draw a graph based on top1 results per class in validation loop using wandb . I have tried a single accuracy graph based on the average of 5 classes and it works fine but i want to do a separate way like top1 accuracy for each class. I am unable to achieve, are there any way to achieve it?
Validation Loader
val_loaders = []
for nuisance in val_nuisances:
val_loaders.append((nuisance, torch.utils.data.DataLoader(
datasets.ImageFolder(os.path.join(valdir, nuisance), transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
normalize,
])),
batch_size=args.batch_size, shuffle=False,
num_workers=args.workers, pin_memory=True,
)))
val_nuisances = ['shape', 'pose', 'texture', 'context', 'weather']
Validation Loop
def validate(val_loaders, model, criterion, args):
overall_top1 = 0
for nuisance, val_loader in val_loaders:
batch_time = AverageMeter('Time', ':6.3f', Summary.NONE)
losses = AverageMeter('Loss', ':.4e', Summary.NONE)
top1 = AverageMeter('Acc#1', ':6.2f', Summary.AVERAGE)
top5 = AverageMeter('Acc#5', ':6.2f', Summary.AVERAGE)
progress = ProgressMeter(
len(val_loader),
[batch_time, losses, top1, top5],
prefix=f'Test {nuisance}: ')
# switch to evaluate mode
model.eval()
with torch.no_grad():
end = time.time()
for i, (images, target) in enumerate(val_loader):
if args.gpu is not None:
images = images.cuda(args.gpu, non_blocking=True)
if torch.cuda.is_available():
target = target.cuda(args.gpu, non_blocking=True)
# compute output
output = model(images)
loss = criterion(output, target)
# measure accuracy and record loss
acc1, acc5 = accuracy(output, target, topk=(1, 5))
losses.update(loss.item(), images.size(0))
top1.update(acc1[0], images.size(0))
top5.update(acc5[0], images.size(0))
# measure elapsed time
batch_time.update(time.time() - end)
end = time.time()
if i % args.print_freq == 0:
progress.display(i)
progress.display_summary()
overall_top1 += top1.avg
overall_top1 /= len(val_loaders)
return top1.avg
I don't see any log to W&B in your code, but logging the top1 accuracy per class would just be
class_names = ['shape', 'pose', 'texture', 'context', 'weather']
top1_accuracies = [0.9, 0.8, 0.9, 0.9, 0.8]
wandb.log({class_names[0]: top1_accuracies[0], class_names[1]: top1_accuracies[1], ...}
In the above example, it looks like you're not actually creating a variable for the top1 accuracy of each class. You'll want to do that first. Taken from https://stackoverflow.com/a/50977153/3959708
You can use sklearn's confusion matrix to get the accuracy
from sklearn.metrics import confusion_matrix
import numpy as np
y_true = [0, 1, 2, 2, 2]
y_pred = [0, 0, 2, 2, 1]
target_names = ['class 0', 'class 1', 'class 2']
#Get the confusion matrix
cm = confusion_matrix(y_true, y_pred)
#array([[1, 0, 0],
# [1, 0, 0],
# [0, 1, 2]])
#Now the normalize the diagonal entries
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
#array([[1. , 0. , 0. ],
# [1. , 0. , 0. ],
# [0. , 0.33333333, 0.66666667]])
#The diagonal entries are the accuracies of each class
cm.diagonal()
#array([1. , 0. , 0.66666667])
I successfully run the code (original link where to find code) to train Keras Models using the Genetic Algorithm with PyGAD:
import tensorflow.keras
import pygad.kerasga
import numpy
import pygad
def fitness_func(solution, sol_idx):
global data_inputs, data_outputs, keras_ga, model
model_weights_matrix = pygad.kerasga.model_weights_as_matrix(model=model,
weights_vector=solution)
model.set_weights(weights=model_weights_matrix)
predictions = model.predict(data_inputs)
mae = tensorflow.keras.losses.MeanAbsoluteError()
abs_error = mae(data_outputs, predictions).numpy() + 0.00000001
solution_fitness = 1.0 / abs_error
return solution_fitness
def callback_generation(ga_instance):
print("Generation = {generation}".format(generation=ga_instance.generations_completed))
print("Fitness = {fitness}".format(fitness=ga_instance.best_solution()[1]))
input_layer = tensorflow.keras.layers.Input(3)
dense_layer1 = tensorflow.keras.layers.Dense(5, activation="relu")(input_layer)
output_layer = tensorflow.keras.layers.Dense(1, activation="linear")(dense_layer1)
model = tensorflow.keras.Model(inputs=input_layer, outputs=output_layer)
weights_vector = pygad.kerasga.model_weights_as_vector(model=model)
keras_ga = pygad.kerasga.KerasGA(model=model,
num_solutions=10)
# Data inputs
data_inputs = numpy.array([[0.02, 0.1, 0.15],
[0.7, 0.6, 0.8],
[1.5, 1.2, 1.7],
[3.2, 2.9, 3.1]])
# Data outputs
data_outputs = numpy.array([[0.1],
[0.6],
[1.3],
[2.5]])
num_generations = 10
num_parents_mating = 5
initial_population = keras_ga.population_weights
ga_instance = pygad.GA(num_generations=num_generations,
num_parents_mating=num_parents_mating,
initial_population=initial_population,
fitness_func=fitness_func,
on_generation=callback_generation)
ga_instance.run()
# After the generations complete, some plots are showed that summarize how the outputs/fitness values evolve over generations.
ga_instance.plot_result(title="PyGAD & Keras - Iteration vs. Fitness", linewidth=4)
# Returning the details of the best solution.
solution, solution_fitness, solution_idx = ga_instance.best_solution()
print("Fitness value of the best solution = {solution_fitness}".format(solution_fitness=solution_fitness))
print("Index of the best solution : {solution_idx}".format(solution_idx=solution_idx))
# Fetch the parameters of the best solution.
best_solution_weights = pygad.kerasga.model_weights_as_matrix(model=model,
weights_vector=solution)
model.set_weights(best_solution_weights)
predictions = model.predict(data_inputs)
print("Predictions : \n", predictions)
mae = tensorflow.keras.losses.MeanAbsoluteError()
abs_error = mae(data_outputs, predictions).numpy()
print("Absolute Error : ", abs_error)
Out:
Fitness value of the best solution = 5.007608966738384
Index of the best solution : 0
1/1 [==============================] - 0s 18ms/step
Predictions :
[[0.4351511 ]
[0.78366435]
[1.3436508 ]
[2.736318 ]]
Absolute Error : 0.1996961
As I understand, the code above should train a model to help me forecast a new 3-dimension input such as [0.9, 0.7, 0.85].
I wonder how could I modify the code to adapt to the input and output data as below, or call model then make a predictions for new data_inputs = numpy.array([[0.9, 0.7, 0.85]]):
# Data inputs
data_inputs = numpy.array([[0.02, 0.1, 0.15],
[0.7, 0.6, 0.8],
[1.5, 1.2, 1.7],
[3.2, 2.9, 3.1],
[0.9, 0.7, 0.85] # new entry which need forecast
])
# Data outputs
data_outputs = numpy.array([[0.1],
[0.6],
[1.3],
[2.5]]) # Output data for training
Thanks a lot for your help at advance.
My trial code:
from tensorflow import keras
# Load model and weights
with open("./ga_model.json", "r") as json_file:
model_json = json_file.read()
model = keras.models.model_from_json(model_json)
model.load_weights("./ga_model.h5")
# Data inputs
new_data_inputs = numpy.array([
[0.9, 0.7, 0.85] # new entry which need forecast
])
predictions = model.predict(new_data_inputs)
print("Predictions : \n", predictions)
Out:
Predictions :
[[0.8672837]]
I'm trying to train Faster RCNN model. After training, I try to predict the result of image but the result is empty.
My data is w: 1600, h: 800, c: 3, classes: 7, bounding boxes:(x1, y1, x2, y2)
My model is below.
My model
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator
def get_instance_segmentation_model(num_classes):
backbone = torchvision.models.vgg16(pretrained=True).features
backbone.out_channels = 512
anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),),
aspect_ratios=((0.5, 1.0, 2.0),))
roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'],
output_size=7,
sampling_ratio=2)
model = FasterRCNN(backbone,
num_classes=2,
rpn_anchor_generator=anchor_generator,
box_roi_pool=roi_pooler)
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
return model
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
num_classes = 2
model = get_instance_segmentation_model(num_classes)
model.to(device)
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005,
momentum=0.9, weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
step_size=3,
gamma=0.1)
training
# let's train it for 10 epochs
num_epochs = 10
for epoch in range(num_epochs):
# train for one epoch, printing every 10 iterations
train_one_epoch(model, optimizer, train_data_loader, device, epoch, print_freq=10)
# update the learning rate
lr_scheduler.step()
# evaluate on the test dataset
evaluate(model, valid_data_loader, device=device)
prediction:
prediction
[{'boxes': tensor([], device='cuda:0', size=(0, 4)),
'labels': tensor([], device='cuda:0', dtype=torch.int64),
'scores': tensor([], device='cuda:0')}]
You should change the number of classes to
model = FasterRCNN(backbone,
num_classes=YOUR_CLASSES+1, # +1 is for the background
rpn_anchor_generator=anchor_generator,
box_roi_pool=roi_pooler)
Remember that the class 0 is reserved for the background, so your classes should start from 1.
Also, please make sure your network has converged during the training.
I am trying to run GradientBoostingClassifier() with the help of gridsearchcv.
For every combination of parameter, I also need "Precison", "recall" and accuracy in tabular format.
Here is the code:
scoring= ['accuracy', 'precision','recall']
parameters = {#'nthread':[3,4], #when use hyperthread, xgboost may become slower
"criterion": ["friedman_mse", "mae"],
"loss":["deviance","exponential"],
"max_features":["log2","sqrt"],
'learning_rate': [0.01,0.05,0.1,1,0.5], #so called `eta` value
'max_depth': [3,4,5],
'min_samples_leaf': [4,5,6],
'subsample': [0.6,0.7,0.8],
'n_estimators': [5,10,15,20],#number of trees, change it to 1000 for better results
'scoring':scoring
}
# sorted(sklearn.metrics.SCORERS.keys()) # To see different loss functions
#clf_xgb = GridSearchCV(xgb_model, parameters, n_jobs=5,verbose=2, refit=True,cv = 8)
clf_gbm = GridSearchCV(gbm_model, parameters, n_jobs=5,cv = 8)
clf_gbm.fit(X_train,y_train)
print(clf_gbm.best_params_)
print(clf_gbm.best_score_)
feature_importances = pd.DataFrame(clf_gbm.best_estimator_.feature_importances_,
index = X_train.columns,
columns=['importance']).sort_values('importance', ascending=False)
print(feature_importances)
depth=clf_gbm.cv_results_["param_max_depth"]
score=clf_gbm.cv_results_["mean_test_score"]
params=clf_gbm.cv_results_["params"]
I get error as:
ValueError: Invalid parameter seed for estimator GradientBoostingClassifier(criterion='friedman_mse', init=None,
learning_rate=0.01, loss='deviance', max_depth=3,
max_features='log2', max_leaf_nodes=None,
min_impurity_decrease=0.0, min_impurity_split=None,
min_samples_leaf=4, min_samples_split=2,
min_weight_fraction_leaf=0.0, n_estimators=5, presort='auto',
random_state=None, subsample=1.0, verbose=0,
warm_start=False). Check the list of available parameters with `estimator.get_params().keys()`.
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import make_scorer
#creating Scoring parameter:
scoring = {'accuracy': make_scorer(accuracy_score),
'precision': make_scorer(precision_score),'recall':make_scorer(recall_score)}
# A sample parameter
parameters = {
"loss":["deviance"],
"learning_rate": [0.01, 0.025, 0.05, 0.075, 0.1, 0.15, 0.2],
"min_samples_split": np.linspace(0.1, 0.5, 12),
"min_samples_leaf": np.linspace(0.1, 0.5, 12),
"max_depth":[3,5,8],
"max_features":["log2","sqrt"],
"criterion": ["friedman_mse", "mae"],
"subsample":[0.5, 0.618, 0.8, 0.85, 0.9, 0.95, 1.0],
"n_estimators":[10]
}
#passing the scoring function in the GridSearchCV
clf = GridSearchCV(GradientBoostingClassifier(), parameters,scoring=scoring,refit=False,cv=2, n_jobs=-1)
clf.fit(trainX, trainY)
#converting the clf.cv_results to dataframe
df=pd.DataFrame.from_dict(clf.cv_results_)
#here Possible inputs for cross validation is cv=2, there two split split0 and split1
df[['split0_test_accuracy','split1_test_accuracy','split0_test_precision','split1_test_precision','split0_test_recall','split1_test_recall']]
find the best parameter based on the accuracy_score, precision_score or recall and refit the model and prediction on the test data
#find the best parameter based on the accuracy_score
#taking the average of the accuracy_score
df['accuracy_score']=(df['split0_test_accuracy']+df['split1_test_accuracy'])/2
df.loc[df['accuracy_score'].idxmax()]['params']
Prediction on the test data
clf =GradientBoostingClassifier(criterion='mae',
learning_rate=0.1,
loss='deviance',
max_depth= 5,
max_features='sqrt',
min_samples_leaf= 0.1,
min_samples_split= 0.42727272727272736,
n_estimators=10,
subsample=0.8)
clf.fit(trainX, trainY)
correct_test = correct_data(test)
testX = correct_test[predictor].values
result = clf.predict(testX)
I used the Scikit-learn API for XGBoost (in python). My accuracy was ~ 68%. I used the same parameter set and used the Learning API for XGBoost; my accuracy was ~ 60%. My understanding is that Scikit-learn API is a wrapper around Learning API and thus they should give me the same results. I do not understand why I am getting different results from these two APIs.
cores=16
random_state=0
params = {
'n_estimators': 100,
'learning_rate': 0.1,
'max_depth': 3,
'min_child_weight': 1.0,
'subsample': 1.0,
'gamma': 0.0,
'tree_method':'gpu_exact',
'colsample_bytree': 1.0,
'alpha' : 0.0,
'lambda': 1.0,
'nthread': cores,
'objective': 'binary:logistic',
'booster': 'gbtree',
'seed': random_state,
'eta':0.1,
'silent': 1
}
model = XGBClassifier(**params)
r = model.fit(X_train,y_train)
print(model)
# make predictions for test data
y_pred = model.predict(X_test)
predictions = [round(value) for value in y_pred]
# evaluate predictions
accuracy = accuracy_score(y_test, predictions)
print("Accuracy: %.2f%%" % (accuracy * 100.0))
Results:
XGBClassifier(alpha=0.0, base_score=0.5, booster='gbtree',
colsample_bylevel=1, colsample_bytree=1.0, eta=0.1, gamma=0.0,
lambda=1.0, learning_rate=0.1, max_delta_step=0, max_depth=3,
min_child_weight=1.0, missing=None, n_estimators=100, n_jobs=1,
nthread=16, objective='binary:logistic', random_state=0,
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=0, silent=1,
subsample=1.0, tree_method='gpu_exact')
Accuracy: 68.32%
dtrain = xgb.DMatrix(X_train, label=y_train)
dvalid = xgb.DMatrix(X_test, label=y_test)
# fit model no training data
model = xgb.train(params=params,dtrain=dtrain)
# make predictions for test data
y_pred = model.predict(dvalid)
predictions = [round(value) for value in y_pred]
# evaluate predictions
accuracy = accuracy_score(y_test, predictions)
print("Accuracy: %.2f%%" % (accuracy * 100.0))
Results:
Accuracy: 60.25%
I believe the difference is because you have not specified the number of boosting rounds in the standard xgboost API (xgb.train()). As a result it is using the default of 10.
'n_estimators' is an sklearn specific terminology.
Also, contrary to the comment given above, this particular algorithm is expected to be deterministic when run multiple times on the same system.