Tensor flow model import error as NewRandomFileAccess - python-3.x

I get this error when i try to load the model into keras as pb file. This is how file structure looks like
-SavedModel
-variables
variables.index
saved_model.pb
-SavedImgModel
-variables
variables.index
saved_model.pb
def load_model():
global model
global img_model
model_path = os.path.join('.','SavedModel\\',)
img_modelpath = os.path.join('.','SavedImgModel\\')
print(model_path)
print(("* Loading Keras model and Flask starting server...please wait until server has fully started"))
model = tf.keras.models.load_model(model_path)
img_model = tf.keras.models.load_model(img_modelpath)
# initialize our Flask application and the Keras model
app = Flask(__name__)
load_model()
detector = MTCNN(
Error as :
raise errors_impl.OpError(None, None, error_message, errors_impl.UNKNOWN)
tensorflow.python.framework.errors_impl.OpError: NewRandomAccessFile failed to Create/Open: .\SavedModel\variables\variables.data-00000-of-00001 : The system cannot find the file specified.
; No such file or director

Related

Optimizing Sentence Transformer models using HuggingFace Optimum

I am looking to optimize some of the sentence transformer models from huggingface using optimum library. I am following the below documentation:
https://huggingface.co/blog/optimum-inference
I understand the process but I am not able to use model_id because our network restricts accessing huggingface using its APIs. I have downloaded these models locally and I am trying to following the same procedure but I cannot find any examples on how to pass the onnx checkpoint model to ORTOptimizer class.
This is how I converted the vanilla model to onnx checkpoint:
from pathlib import Path
import transformers
from transformers.onnx import FeaturesManager
from transformers import AutoConfig, AutoTokenizer, AutoModel
# load model and tokenizer
feature = "question-answering"
model_path=r'local\path\to\all-MiniLM-L12-v2'
model = AutoModel.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)
model_kind, model_onnx_config = FeaturesManager.check_supported_model_or_raise(model, feature=feature)
onnx_config = model_onnx_config(model.config)
onnx_path = Path("onnx")
# export
onnx_inputs, onnx_outputs = transformers.onnx.export(
preprocessor=tokenizer,
model=model,
config=onnx_config,
opset=13,
output=onnx_path
)
This saves the model as onnx checkpoint. Then I try to load the onnx model along with its original model configuration:
from optimum.onnxruntime import ORTModelForQuestionAnswering
task = "question-answering"
ORTModelForQuestionAnswering(model= onnx_path, config = r'local\path\to\all-MiniLM-L12-v2\config.json')
I get the following error from the above code:
C:\Anaconda3\lib\site-packages\optimum\onnxruntime\modeling_ort.py in __init__(self, model, config, use_io_binding, **kwargs)
641
642 def __init__(self, model=None, config=None, use_io_binding=True, **kwargs):
--> 643 super().__init__(model, config, use_io_binding, **kwargs)
644 # create {name:idx} dict for model outputs
645 self.model_outputs = {output_key.name: idx for idx, output_key in enumerate(self.model.get_outputs())}
C:\Anaconda3\lib\site-packages\optimum\onnxruntime\modeling_ort.py in __init__(self, model, config, use_io_binding, **kwargs)
123 self.model_save_dir = kwargs.get("model_save_dir", None)
124 self.latest_model_name = kwargs.get("latest_model_name", "model.onnx")
--> 125 self.providers = model.get_providers()
126 self._device = get_device_for_provider(self.providers[0])
127
AttributeError: 'WindowsPath' object has no attribute 'get_providers'
How do I resolve this issue and is there any examples for the process that I am trying to do i.e. load sentence transformer models locally rather than using model_id.
Appreciate all the help!

How to save and access pickle/hdf5 files in azure machine learning studio

I have a pickle file parameters.pkl containing some parameters and their values of a model. The pickle file has been created through the following process:
dict={'scaler': scaler,
'features': z_tags,
'Z_reconstruction_loss': Z_reconstruction_loss}
pickle.dump(dict, open('parameters.pkl', 'wb'))
model_V2.hdf5
I am new to azure machine learning studio.It will be helpful to know, how the pickle file and hdf5 files can be stored in Azure machine Learning Studio and an API endpoint be created, so that the the pickle file can be accessed through API. Objective is to access the pickle file and its contents through API.. I have tried the following:
pip install azureml , azureml-core
from azureml.core import Workspace
from azureml.core.webservice import AciWebservice
from azureml.core.webservice import Webservice
from azureml.core.model import InferenceConfig
from azureml.core.environment import Environment
from azureml.core import Workspace
from azureml.core.model import Model
from azureml.core.conda_dependencies import CondaDependencies
ws = Workspace.create(
name='myworkspace',
subscription_id='<azure-subscription-id>',
resource_group='myresourcegroup',
create_resource_group=True,
location='eastus2'
)
ws.write_config()
ws = Workspace.from_config()
model = Model.register(workspace = ws,
model_path ="model/parameters.pkl",
model_name = "parameters",
tags = {"version": "1"},
description = "parameters",
)
# to install required packages
env = Environment('env')
cd = CondaDependencies.create(pip_packages=['pandas==1.1.5', 'azureml-defaults','joblib==0.17.0'], conda_packages = ['scikit-learn==0.23.2'])
env.python.conda_dependencies = cd
# Register environment to re-use later
env.register(workspace = ws)
print("Registered Environment")
myenv = Environment.get(workspace=ws, name="env")
myenv.save_to_directory('./environ', overwrite=True)
aciconfig = AciWebservice.deploy_configuration(
cpu_cores=1,
memory_gb=1,
tags={"data":"parameters"},
description='parameters MODEL',
)
inference_config = InferenceConfig(entry_script="score.py", environment=myenv)
What to modify in following score script, as I don't want to predict anything but access the parameter values stored in the pickle file.
def init():
global modelmodel_path = Model.get_model_path("parameters")
print("Model Path is ", model_path)
model = joblib.load(model_path)
def run(data):
try:
data = json.loads(data)
result = model.predict(data['data'])
return {'data' : result.tolist() , 'message' : "Successfully
accessed"}
except Exception as e:
error = str(e)
return {'data' : error , 'message' : 'Failed to access'}
Deploy the Model
service = Model.deploy(workspace=ws,
name='iris-model',
models=[model],
inference_config=inference_config,
deployment_config=aciconfig,
overwrite = True)
service.wait_for_deployment(show_output=True)
url = service.scoring_uri
print(url)
We need to deploy the model as the external service and as a web service application to azure machine learning studio.
Follow the below steps to reproduce the work to deploy the model as the web service application to perform API calls.
Note: Before proceeding with the steps, make the models pickle file handy
Open https://studio.azureml.net/
Click on New
Then click on upload from local files
Select the zip file to upload
Under experiments upload the dataset.
Use the following canvas steps to create the model
After mapping all the elements and also executing the python script. Click on set up web service
Click on deploy web service.

Not able to read model.pkl from output folder in Azure ML

I'm try to read the model.pkl file from the artifacts output folder like this
def init():
global model
# infile = open('model.pkl','rb')
# model = pickle.load(infile)
#model = joblib.load('model.pkl')
model_path = Model.get_model_path(model_name = '<<modelname>>')
model_path="outputs/model.pkl"
# deserialize the model file back into a sklearn model
model = joblib.load(model_path)
But still its not working please guide me how to read model.pkl file from artifacts output folder, because of this it is failing to deploy into Azure container instance

cannot import name 'RollingOriginValidator'

I'm writing a scoring step with Azure Machine Learning pipeline.
This is the score code:
import os
import pickle
import pandas as pd
from azureml.core.model import Model
import argparse
from azureml.core.run import Run, _OfflineRun
from azureml.automl.core.shared.rolling_origin_validator import RollingOriginValidator
# Called when the deployed service starts
def init():
global model
# Get the path where the deployed model can be found.
run = Run.get_context()
model_path = Model.get_model_path('best_model_data')
print(model_path)
with open(model_path,"rb") as f:
model = pickle.load(f)
print("loaded")
# Handle requests to the service
def run(data):
try:
# Pick out the text property of the JSON request.
# This expects a request in the form of {"text": "some text to score for sentiment"}
prediction = predict(data)
#Return prediction
return prediction
except Exception as e:
error = str(e)
return error
# Predict sentiment using the model
def predict(data, include_neutral=True):
# Tokenize text
test_data_features=data.drop('ProposalId', 1).drop('CombinedTactics',1)
test_data_combos=data['CombinedTactics']
print("data")
# Predict
score = model.predict_proba(test_data_features)
print("predicted")
df=pd.DataFrame({'score':score[:, 1],'CombinedTactics':test_data_combos})
return df
This is the pipeline step definition:
parallel_run_config = ParallelRunConfig(
environment=myenv,
entry_script="use_model.py",
source_directory="./",
output_action="append_row",
mini_batch_size="20",
error_threshold=1,
compute_target=compute_target,
process_count_per_node=2,
node_count=2
)
parallel_step_name = "batchscoring-" + datetime.now().strftime("%Y%m%d%H%M")
batch_score_step = ParallelRunStep(
name=parallel_step_name,
inputs=[test_data.as_named_input("test_data")],
output=output_dir,
parallel_run_config=parallel_run_config,
allow_reuse=True
)
However, I met below error:
File "/mnt/batch/tasks/shared/LS_root/jobs/ucmopp-ws/azureml/70828787-7515-4db4-b448-a5a4b6c0c8ff/mounts/workspaceblobstore/azureml/70828787-7515-4db4-b448-a5a4b6c0c8ff/driver/azureml_user/parallel_run/score_module.py", line 139, in call_init
self.init()
File "/mnt/batch/tasks/shared/LS_root/jobs/ucmopp-ws/azureml/70828787-7515-4db4-b448-a5a4b6c0c8ff/mounts/workspaceblobstore/azureml/70828787-7515-4db4-b448-a5a4b6c0c8ff/use_model.py", line 17, in init
model = pickle.load(f)
File "/azureml-envs/azureml_7e62c7905267a978aa40f8554487e9b9/lib/python3.6/site-packages/azureml/automl/runtime/featurization/init.py", line 8, in
from .data_transformer import DataTransformer, TransformerAndMapper
File "/azureml-envs/azureml_7e62c7905267a978aa40f8554487e9b9/lib/python3.6/site-packages/azureml/automl/runtime/featurization/data_transformer.py", line 54, in
from ..featurizer.transformer import (AutoMLTransformer, CategoricalFeaturizers, DateTimeFeaturesTransformer,
File "/azureml-envs/azureml_7e62c7905267a978aa40f8554487e9b9/lib/python3.6/site-packages/azureml/automl/runtime/featurizer/transformer/init.py", line 28, in
from .timeseries import TimeSeriesTransformer, TimeSeriesPipelineType, NumericalizeTransformer,
File "/azureml-envs/azureml_7e62c7905267a978aa40f8554487e9b9/lib/python3.6/site-packages/azureml/automl/runtime/featurizer/transformer/timeseries/init.py", line 65, in
from azureml.automl.core.shared.rolling_origin_validator import RollingOriginValidator
ImportError: cannot import name 'RollingOriginValidator'
Does anyone have any idea about this error?

Why does my Flask app work when executing using `python app.py` but not when using `heroku local web` or `flask run`?

I wrote a Flask-based web app that takes text from users and returns the probability that it is of a given classification (full script below). The app loads some of the trained models needed to make predictions before any requests are made. I am currently trying to deploy it on Heroku and experiencing some problems.
I am able to run it locally when I execute python ml_app.py. But when I use the Heroku CLI command heroku local web to try to run it locally to test before deployment, I get the following error
AttributeError: module '__main__' has no attribute 'tokenize'
This error is associated with the loading of a text vectorizer called TFIDF found in the line
tfidf_model = joblib.load('models/tfidf_vectorizer_train.pkl')
I have imported the required function at the top of the script to ensure that this is loaded properly (from utils import tokenize). This works given that I can run it when I use python ml_app.py. But for reasons I do not know, it doesn't load when I use heroku local web. It also doesn't work when I use the Flask CLI command flask run when trying to run it locally. Any idea why?
I admit that I do not have a good understanding of what is going on under the hood here (with respect to the web dev./deployment aspect of the code) so any explanation helps.
from flask import Flask, request, render_template
from sklearn.externals import joblib
from utils import tokenize # custom tokenizer required for tfidf model loaded in load_tfidf_model()
app = Flask(__name__)
models_directory = 'models'
#app.before_first_request
def nbsvm_models():
global tfidf_model
global logistic_identity_hate_model
global logistic_insult_model
global logistic_obscene_model
global logistic_severe_toxic_model
global logistic_threat_model
global logistic_toxic_model
tfidf_model = joblib.load('models/tfidf_vectorizer_train.pkl')
logistic_identity_hate_model = joblib.load('models/logistic_identity_hate.pkl')
logistic_insult_model = joblib.load('models/logistic_insult.pkl')
logistic_obscene_model = joblib.load('models/logistic_obscene.pkl')
logistic_severe_toxic_model = joblib.load('models/logistic_severe_toxic.pkl')
logistic_threat_model = joblib.load('models/logistic_threat.pkl')
logistic_toxic_model = joblib.load('models/logistic_toxic.pkl')
#app.route('/')
def my_form():
return render_template('main.html')
#app.route('/', methods=['POST'])
def my_form_post():
"""
Takes the comment submitted by the user, apply TFIDF trained vectorizer to it, predict using trained models
"""
text = request.form['text']
comment_term_doc = tfidf_model.transform([text])
dict_preds = {}
dict_preds['pred_identity_hate'] = logistic_identity_hate_model.predict_proba(comment_term_doc)[:, 1][0]
dict_preds['pred_insult'] = logistic_insult_model.predict_proba(comment_term_doc)[:, 1][0]
dict_preds['pred_obscene'] = logistic_obscene_model.predict_proba(comment_term_doc)[:, 1][0]
dict_preds['pred_severe_toxic'] = logistic_severe_toxic_model.predict_proba(comment_term_doc)[:, 1][0]
dict_preds['pred_threat'] = logistic_threat_model.predict_proba(comment_term_doc)[:, 1][0]
dict_preds['pred_toxic'] = logistic_toxic_model.predict_proba(comment_term_doc)[:, 1][0]
for k in dict_preds:
perc = dict_preds[k] * 100
dict_preds[k] = "{0:.2f}%".format(perc)
return render_template('main.html', text=text,
pred_identity_hate=dict_preds['pred_identity_hate'],
pred_insult=dict_preds['pred_insult'],
pred_obscene=dict_preds['pred_obscene'],
pred_severe_toxic=dict_preds['pred_severe_toxic'],
pred_threat=dict_preds['pred_threat'],
pred_toxic=dict_preds['pred_toxic'])
if __name__ == '__main__':
app.run(debug=True)
Fixed it. It was due to the way I picked the class instance stored in tfidf_vectorizer_train.pkl. The model was created in an ipython notebook where one of its attributes depended on a tokenizer function that I defined interactively in the notebook. I soon learned that pickling does not save the exact instance of a class, which means tfidf_vectorizer_train.pkl does not contain the function I defined in the notebook.
To fix this, I moved the tokenizer function to a separate utilities python file and imported the function in both the file where I trained and subsequently pickled the model and in the file where I unpickled it.
In code, I did
from utils import tokenize
...
tfidfvectorizer = TfidfVectorizer(ngram_range=(1, 2), tokenizer=tokenize,
min_df=3, max_df=0.9, strip_accents='unicode',
use_idf=1, smooth_idf=True, sublinear_tf=1)
train_term_doc = tfidfvectorizer.fit_transform(train[COMMENT])
joblib.dump(tfidfvectorizer, 'models/tfidf_vectorizer_train.pkl')
...
in the file where I trained the model and
from utils import tokenize
...
#app.before_first_request
def load_models():
# from utils import tokenize
global tfidf_model
tfidf_model =
joblib.load('{}/tfidf_vectorizer_train.pkl'.format(models_directory))
...
in the file containing the web app code.

Resources