fill-mask usage from transformers pipeline - nlp

I fine-tune a gpt2 language model and I am generation the text according to my model by using following lines of code:
generator = pipeline('text-generation', tokenizer='gpt2', model='data/out')
print(generator('Once upon a time', max_length=40)[0]['generated_text'])
Now I want to do the prediction of only next word with the probabilities. I know we can do it by using 'fill-mask' but I don't know how to do it. When I put 'fill-mask' inplace of 'text-generation', I am getting this error:
"Unrecognized configuration class <class 'transformers.models.gpt2.configuration_gpt2.GPT2Config'> for this kind of AutoModel: AutoModelForMaskedLM.
Model type should be one of BigBirdConfig, Wav2Vec2Config, ConvBertConfig, LayoutLMConfig, DistilBertConfig, AlbertConfig, BartConfig, MBartConfig, CamembertConfig, XLMRobertaConfig, LongformerConfig, RobertaConfig, SqueezeBertConfig, BertConfig, MobileBertConfig, FlaubertConfig, XLMConfig, ElectraConfig, ReformerConfig, FunnelConfig, MPNetConfig, TapasConfig, DebertaConfig, DebertaV2Config, IBertConfig.".
generator = pipeline('fill-mask', tokenizer='gpt2', model='data/out') // this line is giving me the above mentioned error.
Please let me know how can I fix this issue. Any kind of help would be greatly appreciated.
Thanks in advance.
The whole code for better understanding.
from transformers import (
GPT2Tokenizer,
DataCollatorForLanguageModeling,
TextDataset,
GPT2LMHeadModel,
TrainingArguments,
Trainer,
pipeline)
train_path = 'parsed_data.txt'
test_path = 'parsed_data.txt'
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
train_dataset = TextDataset(tokenizer=tokenizer, file_path=train_path, block_size=128)
test_dataset = TextDataset(tokenizer=tokenizer, file_path=test_path, block_size=128)
model = GPT2LMHeadModel.from_pretrained('gpt2')
training_args = TrainingArguments(output_dir = 'data/out', overwrite_output_dir = True, per_device_train_batch_size = 32, per_device_eval_batch_size = 32, learning_rate = 5e-5, num_train_epochs = 3,)
trainer = Trainer(model = model, args = training_args, data_collator=data_collator, train_dataset = train_dataset, eval_dataset = test_dataset)
trainer.train()
trainer.save_model()
generator = pipeline('fill-mask', tokenizer='gpt2', model='data/out')

Related

Extract the features of last layer from the pytorch-fasterrcnn-resnet50-fpn

I have a image where i have to use the pytorch-fasterrcnn-resnet50-fpn to extract the features of the image. Below is the code that I am trying
import torchvision
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained = True)
### strip the last layer
feature_extractor = torch.nn.Sequential(*list(model_ft.children())[:-1])
inputs = feature_extractor(images=image, return_tensors="pt")
with torch.no_grad():
outputs = model(**inputs)
last_hidden_states = outputs.last_hidden_state
Here the type of image is PIL.JpegImagePlugin.JpegImageFile
The above code is not working for getting the features. Can anyone tell me how to solve this?

Detectron2: No instances in prediction

I'm trying to train Detectron2 on a custom dataset that I annotated with coco-annotator. After training I wanted to predict Instances of my Image, but I dont get any shown.
Training:
from detectron2.engine import DefaultTrainer
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("TrashTron_train",)
cfg.DATASETS.TEST = ("TrashTron_val",)
# cfg.DATASETS.TEST = ()
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml") # Let training initialize from model zoo
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.00025 # pick a good LR
cfg.SOLVER.MAX_ITER = 300 # 300 iterations seems good enough for this toy dataset; you will need to train longer for a practical dataset
cfg.SOLVER.STEPS = [] # do not decay learning rate
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512 # faster, and good enough for this toy dataset (default: 512)
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 24 # only has one class (ballon). (see https://detectron2.readthedocs.io/tutorials/datasets.html#update-the-config-for-new-datasets)
# NOTE: this config means the number of classes, but a few popular unofficial tutorials incorrect uses num_classes+1 here.
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = DefaultTrainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()
Prediction:
test_data = [{'1191.jpg': '/content/datasets/val/1191.jpg',
'image_id': 1308}]
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7 # set a custom testing threshold
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth") # path to the model we just trained
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 24
predictor = DefaultPredictor(cfg)
outputs = predictor(im)
# print(outputs["instances"].pred_densepose)
im = cv2.imread(test_data[0]["1191.jpg"])
v = Visualizer(im[:, :, ::-1],
metadata=MetadataCatalog.get(cfg.DATASETS.TRAIN[0]),
scale=0.5,
instance_mode=ColorMode.IMAGE_BW)
out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
img = cv2.cvtColor(out.get_image()[:, :, ::-1], cv2.COLOR_RGBA2RGB)
plt.imshow(img)
The corresponding image is shown, but no instances.
Any suggestions? The overall evaluation scores aren't that great, but I picked the best class and there I also dont get any predictions...
I would try to lower the threshold, since you have said that overall training scores were not great.
In this answer in official repo, following code is suggested to change the threshold:
cfg.MODEL.TENSOR_MASK.SCORE_THRESH_TEST = 0.5
at another answer at the same thread, other thresholds are modified as well.
cfg.MODEL.RETINANET.SCORE_THRESH_TEST = args.confidence_threshold
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = args.confidence_threshold
cfg.MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH = args.confidence_threshold

“Concatenate layer” problem when doing GRAD-CAM. How to overcome this in my custom functional model?

I am having problems with grad-cam. I would be grateful if anyone could help. my codes are here
https://www.kaggle.com/mervearmagan/gradcamproblem
Sorry, I couldn't fix the error I got
ValueError: Input 0 is incompatible with layer model_1: expected
shape=(None, 512, 512, 3), found shape=(512, 512, 3)
img = tf.keras.layers.Input(shape = IMG_SHAPE)
gender = tf.keras.layers.Input(shape=(1,))
base_model = tf.keras.applications.InceptionV3(input_shape = IMG_SHAPE, include_top = False, weights = 'imagenet')
cnn_vec=base_model(img)
cnn_vec = tf.keras.layers.GlobalAveragePooling2D()(cnn_vec)
cnn_vec = tf.keras.layers.Dropout(0.20)(cnn_vec)
gender_vec = tf.keras.layers.Dense(32,activation = 'relu')(gender)
features = tf.keras.layers.Concatenate(axis=-1)([cnn_vec,gender_vec])
dense_layer = tf.keras.layers.Dense(256,activation = 'relu')(features)
dense_layer = tf.keras.layers.Dropout(0.1)(dense_layer)
dense_layer = tf.keras.layers.Dense(128,activation = 'relu')(dense_layer)
dense_layer = tf.keras.layers.Dropout(0.1)(dense_layer)
dense_layer = tf.keras.layers.Dense(64,activation = 'relu')(dense_layer)
output_layer = tf.keras.layers.Dense(1, activation = 'linear')(dense_layer)
model = tf.keras.Model(inputs=[img,gender],outputs=output_layer`
def make_gradcam_heatmap(img_array, model, last_conv_layer_name, classifier_layer_names):
last_conv_layer = model.get_layer(last_conv_layer_name)
last_conv_layer_model = tf.keras.Model(model.inputs, last_conv_layer.output)
classifier_input = tf.keras.layers.Input(shape=last_conv_layer.output.shape)
#classifier_input = tf.keras.layers.Input(shape=last_conv_layer.output.shape[1:])
x = classifier_input
for layer_name in classifier_layer_names:
x = model.get_layer(layer_name)(x)
classifier_model = tf.keras.Model(classifier_input, x)
with tf.GradientTape() as tape:
last_conv_layer_output =last_conv_layer_model(img_array)
#last_conv_layer_model(img_array)
tape.watch(last_conv_layer_output)
preds = classifier_model(last_conv_layer_output)
top_pred_index = tf.argmax(preds[0])
top_class_channel = preds[:, top_pred_index]
grads = tape.gradient(top_class_channel, last_conv_layer_output)
pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))
last_conv_layer_output = last_conv_layer_output.numpy()[0]
pooled_grads = pooled_grads.numpy()
for i in range(pooled_grads.shape[-1]):
last_conv_layer_output[:, :, i] *= pooled_grads[i]
heatmap = np.mean(last_conv_layer_output, axis=-1)
heatmap = np.maximum(heatmap, 0) / np.max(heatmap)
return heatmap
last_conv_layer_name = 'global_average_pooling2d'
classifier_layer_names = ['dense_4']
img = get_input('4360.png' )
inputgender=tf.ones((1,1))
image=tf.reshape(img,(1,512,512,3))
heatmap = make_gradcam_heatmap([image,inputgender], model, last_conv_layer_name, classifier_layer_names)
When running the model remember to test the model using inputs in form:
model([tf.ones((1,512,512,3)),tf.ones((1,1))])
...in case where you input one image and one gender to the network. The first "1" in the tensors means the first "batch" of samples, and so on. That kind of input should give as a result like:
...which looks like OK at this stage. Go through your code and check this "stage" first and then go forward in your program.
This is a handy way to covert image in numpy array format to a tensor having a extra dimension in it, to be compatible with the neural network input:
#Advice how to convert image to format of tensor...
import tensorflow as tf
import numpy as np
#Download image...and suppose it has size 512x512,3...e.g. using PIL or whatever suitable library...
#image = Image.open('smile_or_not.png')
#Convert the image to numpy...here we simulate it because no real image was loaded...
image_np=np.random.rand(512,512,3)
#Let's see its shape...
print("Size of input image:",image_np.shape)
#And convert it to a tensor of shape (1,height,widht,3)
in_tensor_format=tf.reshape(image_np,(1,512,512,3))
print("...has a shape of: ", in_tensor_format.shape, "...when converted to tensor")

recursionerror: maximum recursion depth exceeded in comparison in tensorflow with skopt

I want to compute a Bayesian Search with [skopt] (https://scikit-optimize.github.io/stable/auto_examples/bayesian-optimization.html).
My dataset is a Time series, and t is my time step.
But i have a error:
recursionerror: maximum recursion depth exceeded in comparison
This is my code :
def Grid_search_class(X_train=X_train[:,0:t+1,:]
,y_train=y_train
,X_test=X_test[:,0:t+1,:],
y_test=y_test
,n_calls=20,
print_score=False,t=t):
""" INPUTS : Train Test data
n_calls Number of calls to func"""
import tensorflow as tf
Adam = tf.keras.optimizers.Adam(learning_rate=0.007)
Adagrad = tf.keras.optimizers.Adagrad(learning_rate=0.007)
dim_num_input_text = Categorical([16,32,64,128,256,512,1024,2048], name='num_dense_layers_text')
dim_num_dense_text = Integer(low=0, high=5, name='num_HLD_nodes_text')
dim_drop_text = Categorical([0.01,0.05,0.1,0.15,0.2,0.25,0.3,0.35,0.4], name='drop_text')
dim_num_input_temp = Categorical([16,32,64,128,256,512,1024,2048], name='num_dense_layers_temp')
dim_num_dense_temp = Integer(low=0, high=5, name='num_HLD_nodes_temp')
dim_drop_temp = Categorical([0.01,0.05,0.1,0.15,0.2,0.25,0.3,0.35,0.4], name='drop_temp')
dim_num_input_fixe = Categorical([16,32,64,128,256,512,1024,2048], name='num_dense_layers_fixe')
dim_num_dense_fixe = Integer(low=0, high=5, name='num_HLD_nodes_fixe')
dim_drop_fixe = Categorical([0.01,0.05,0.1,0.15,0.2,0.25,0.3,0.35,0.4], name='drop_fixe')
dim_num_input_merge = Categorical([16,32,64,128,256,512,1024,2048], name='num_dense_layers_merge')
dim_num_dense_merge = Integer(low=0, high=5, name='num_HLD_nodes_merge')
dim_drop_merge = Categorical([0.01,0.05,0.1,0.15,0.2,0.25,0.3,0.35,0.4], name='drop_merge')
dim_optim=Categorical([Adam,Adagrad], name='optim')
dimensions = [dim_num_input_text,
dim_num_dense_text,
dim_drop_text,
dim_num_input_temp,
dim_num_dense_temp,
dim_drop_temp,
dim_num_input_fixe,
dim_num_dense_fixe,
dim_drop_fixe,
dim_num_input_merge,
dim_num_dense_merge,
dim_drop_merge,
dim_optim
]
default_parameters = [512,0,0.1,512,0,0.1,512,0,0.1,512,0,0.1,Adam]
def create_model(num_dense_layers_text,num_HLD_nodes_text,drop_text,
num_dense_layers_temp,num_HLD_nodes_temp,drop_temp,
num_dense_layers_fixe,num_HLD_nodes_fixe,drop_fixe,
num_dense_layers_merge,num_HLD_nodes_merge,drop_merge,optim,t=t):
x_text = model_text.layers[ind_list[-1]-1].output
if num_dense_layers_text>0:
for i in range(num_dense_layers_text):
x_text =tf.keras.layers.Dense(num_HLD_nodes_text,activation='relu')(x_text)
x_text=tf.keras.layers.Dropout(drop_text)(x_text)
x_temp = model_temp[t].layers[ind_list[t]].output
if num_dense_layers_temp>0:
for i in range(num_dense_layers_temp):
x_temp =tf.keras.layers.Dense(num_HLD_nodes_temp,activation='relu')(x_temp)
x_temp=tf.keras.layers.Dropout(drop_temp)(x_temp)
x_fixe= model_fixe.layers[1].output
if num_dense_layers_fixe>0:
for i in range(num_dense_layers_fixe):
x_fixe =tf.keras.layers.Dense(num_HLD_nodes_fixe,activation='relu')(x_fixe)
x_fixe=tf.keras.layers.Dropout(drop_fixe)(x_fixe)
merge = tf.keras.layers.concatenate([x_text,x_temp,x_fixe])
if num_dense_layers_merge>0:
for i in range(num_dense_layers_merge):
merge =tf.keras.layers.Dense(num_HLD_nodes_merge,activation='relu')(merge)
merge=tf.keras.layers.Dropout(drop_merge)(merge)
#add our classification layer.
predictions = tf.keras.layers.Dense(3,activation='softmax')(merge)
model = tf.keras.Model(inputs = [model_text.input,model_temp[t].input,model_fixe.input], outputs = predictions)
#setup our optimizer and compile
model.compile(optimizer=optim, loss=ncce,
metrics=[ tf.keras.metrics.Precision(name='precision'),
tf.keras.metrics.Recall(name='recall'),F1Score(num_classes=3,name='F1',average='macro')])
return model
score='val_F1'
#use_named_args(dimensions=dimensions)
def fitness(num_dense_layers_text,num_HLD_nodes_text,drop_text,
num_dense_layers_temp,num_HLD_nodes_temp,drop_temp,
num_dense_layers_fixe,num_HLD_nodes_fixe,drop_fixe,
num_dense_layers_merge,num_HLD_nodes_merge,drop_merge,optim):
print(num_dense_layers_text,num_HLD_nodes_text,drop_text,
num_dense_layers_temp,num_HLD_nodes_temp,drop_temp,
num_dense_layers_fixe,num_HLD_nodes_fixe,drop_fixe,
num_dense_layers_merge,num_HLD_nodes_merge,drop_merge,optim)
model = create_model(num_dense_layers_text=num_dense_layers_text,
num_HLD_nodes_text=num_HLD_nodes_text,drop_text=drop_text,
num_dense_layers_temp=num_dense_layers_temp,
num_HLD_nodes_temp=num_HLD_nodes_temp,drop_temp=drop_temp,
num_dense_layers_fixe=num_dense_layers_fixe,
num_HLD_nodes_fixe=num_HLD_nodes_fixe,drop_fixe=drop_fixe,
num_dense_layers_merge=num_dense_layers_merge,drop_merge=drop_merge,
num_HLD_nodes_merge=num_HLD_nodes_merge,optim=optim,t=t)
callback=tf.keras.callbacks.EarlyStopping(
monitor=score, min_delta=0.01, patience=1, verbose=0, mode='auto',
baseline=0, restore_best_weights=False
)
#named blackbox becuase it represents the structure
blackbox = model.fit(x=X_train,
y=y_train,verbose=1,
epochs=2,
batch_size=32,
validation_data=(X_test,y_test)
)
#return the validation accuracy for the last epoch.
val_loss = blackbox.history[score][-1]
if score=='val_F1':
val_loss=-val_loss
# Print the classification accuracy.
if print_score :
print()
print("val_score: {}".format(val_loss))
print()
# Delete the Keras model with these hyper-parameters from memory.
del model
# Clear the Keras session, otherwise it will keep adding new
# models to the same TensorFlow graph each time we create
# a model with a different set of hyper-parameters.
tf.keras.backend.clear_session()
tf.compat.v1.reset_default_graph()
# the optimizer aims for the lowest score, so we return our negative accuracy
return -val_loss
gp_result = gp_minimize(fitness,
dimensions=dimensions,
n_calls=n_calls,n_random_starts=7,
noise= 0.01,
x0=default_parameters)
a=pd.concat([pd.DataFrame(gp_result.x_iters, columns = ["dense layers text","HLD nodes text","drop text",
"dense layers temp","HLD nodes temp","drop temp",
"dense layers fixe","HLD nodes fixe","drop fixe",
"dense layers merge","HLD nodes merge","drop merge",
"optim","batch size"]),
(pd.Series(gp_result.func_vals*-1, name="val_loss"))], axis=1)
a.sort_values(by=['val_loss'], inplace=True,ascending=False)
print(a.iloc[:10])
return a
This step is looking for the best parameters a step t
def Run_Grid_search_temp(j=0,n_calls=25):
while j<X_train.shape[1] :
temp=Grid_search_class(t=j,n_calls=n_calls)
print(temp)
j+=1
return
And this one is a loop on the step.
sys.setrecursionlimit(10000)
seems to resolve my problem.

'CrossValidatorModel' object has no attribute 'featureImportances'

I'm trying to extract the feature importance's of a random forest classifier model I have trained using Pyspark. I referred to the following article to get the feature importance scores for the random forest model I trained.
PySpark & MLLib: Random Forest Feature Importances
However, as I use the method describe in this article I get the following error
'CrossValidatorModel' object has no attribute 'featureImportances'
Here is the code I used to train my model
cols = new_data.columns
stages = []
label_stringIdx = StringIndexer(inputCol = 'Bought_Fibre', outputCol = 'label')
stages += [label_stringIdx]
numericCols = new_data.schema.names[1:-1]
assembler = VectorAssembler(inputCols=numericCols, outputCol="features")
stages += [assembler]
pipeline = Pipeline(stages = stages)
pipelineModel = pipeline.fit(new_data)
new_data.fillna(0, subset=cols)
new_data = pipelineModel.transform(new_data)
new_data.fillna(0, subset=cols)
new_data.printSchema()
train_initial, test = new_data.randomSplit([0.7, 0.3], seed = 1045)
train_initial.groupby('label').count().toPandas()
test.groupby('label').count().toPandas()
train_sampled = train_initial.sampleBy("label", fractions={0: 0.1, 1: 1.0}, seed=0)
train_sampled.groupBy("label").count().orderBy("label").show()
labelIndexer = StringIndexer(inputCol='label',
outputCol='indexedLabel').fit(train_sampled)
featureIndexer = VectorIndexer(inputCol='features',
outputCol='indexedFeatures',
maxCategories=2).fit(train_sampled)
from pyspark.ml.classification import RandomForestClassifier
rf_model = RandomForestClassifier(labelCol="indexedLabel", featuresCol="indexedFeatures")
labelConverter = IndexToString(inputCol="prediction", outputCol="predictedLabel",
labels=labelIndexer.labels)
pipeline = Pipeline(stages=[labelIndexer, featureIndexer, rf_model, labelConverter])
paramGrid = ParamGridBuilder() \
.addGrid(rf_model.numTrees, [ 200, 400,600,800,1000]) \
.addGrid(rf_model.impurity,['entropy','gini']) \
.addGrid(rf_model.maxDepth,[2,3,4,5]) \
.build()
crossval = CrossValidator(estimator=pipeline,
estimatorParamMaps=paramGrid,
evaluator=BinaryClassificationEvaluator(),
numFolds=5)
train_model = crossval.fit(train_sampled)
Please help to resolve the above mentioned error and help to extract the features
That's because the CrossValidatorModel doesn't have a feature importance attribute, but the RandomForestModel model has.
Since you are using a Pipeline and CrossValidator to fit your data, you'll need to get the underlying stage of the best fitted model :
# '2' is the index of your RandomForestModel inside of the Pipeline
your_model = cvModel.bestModel.stages[2]
var_imp = your_model.featureImportances

Resources