How to add a Custom components to spaCy? - python-3.x

I want to add custom components to spacy pipeline, e.g. add additional metadata to tokens or the document or to add entities.
So I build the following ExtractorComponent component that will be added to the spaCy pipeline. However, we create a new custom attribute at the Doc level called doids.
import progressbar
from pronto import Ontology
from spacy.tokens import Doc, Span, Token
from spacy.matcher import PhraseMatcher
from spacy.util import filter_spans
from spacy.language import Language
#Language.component("doid_extractor")
class DOIDExtractorComponent(object):
# name of the component
name = "doid_extractor"
def __init__(self, nlp, label="DOID"):
# label that is applied to the matches
self.label = label
# load ontology
print("Loading DOID ontology")
doid = Ontology("http://purl.obolibrary.org/obo/doid.obo")
# init terms and patterns
self.terms = {}
patterns = []
i = 0
nr_terms = len(doid.terms())
# init progress bar as loading terms takes long
print("Importing terms")
bar = progressbar.ProgressBar(maxval=nr_terms,
widgets=[progressbar.Bar('=', '[', ']'), ' ', progressbar.Percentage()])
bar.start()
# iterate over terms in ontology
for term in doid.terms():
# if term has a name
if term.name is not None:
self.terms[term.name.lower()] = {'id': term.id}
patterns.append(nlp(term.name))
i += 1
bar.update(i)
bar.finish()
# initialize matcher and add patterns
self.matcher = PhraseMatcher(nlp.vocab, attr='LOWER')
self.matcher.add(label, None, *patterns)
# set extensions to tokens, spans and docs
Token.set_extension("is_doid_term", default=False, force=True)
Token.set_extension("doid_id", default=False, force=True)
Token.set_extension("merged_concept", default=False, force=True)
Doc.set_extension("has_doids", getter=self.has_doids, force=True)
Doc.set_extension("doids", default=[], force=True)
Span.set_extension("has_doids", getter=self.has_doids, force=True)
def __call__(self, doc):
matches = self.matcher(doc)
spans = [Span(doc, match[1], match[2], label=self.label) for match in matches]
for i, span in enumerate(spans):
span._.set("has_doids", True)
for token in span:
token._.set("is_doid_term", True)
token._.set("doid_id", self.terms[span.text.lower()]["id"])
with doc.retokenize() as retokenizer:
for span in filter_spans(spans):
retokenizer.merge(span, attrs={"_": {"merged_concept": True}})
doc._.doids = list(doc._.doids) + [span]
return doc
# setter function for doc level
def has_doids(self, tokens):
return any([t._.get("is_doid_term") for t in tokens])
and I use the component like that:
from pronto import Ontology
import progressbar
import spacy
from spacy import displacy
from spacy.tokens import Doc, Span, Token
from spacy.lang.en import English
from spacy.matcher import PhraseMatcher
from spacy.util import filter_spans
from src.iccoExtractor import DOIDExtractorComponent
from spacy.language import Language
# # python -m spacy download en_core_web_sm
nlp = spacy.load("en_core_web_sm")
nlp.add_pipe("doid_extractor", after="ner")
text = """Primary education, Intermediate education, and secondary education combined are sometimes referred to as K-12 (Kindergarten through Grade 12). Secondary schooling, known as high school, collegiate institute, école secondaire or secondary school, consists of different grades depending on the province in which one resides"""
doc = nlp(text)
So, when I execute the above code I get the following exception
Traceback (most recent call last): ] 0%
File "C:\Users\Admin\Documents\onto.py", line 21, in <module>
doc = nlp(text)
File "C:\Users\Admin\miniconda3\envs\projet1\lib\site-packages\spacy\language.py", line 1025, in __call__
error_handler(name, proc, [doc], e)
File "C:\Users\Admin\miniconda3\envs\projet1\lib\site-packages\spacy\util.py", line 1630, in raise_error
raise e
File "C:\Users\Admin\miniconda3\envs\projet1\lib\site-
packages\spacy\language.py", line 1020, in __call__
doc = proc(doc, **component_cfg.get(name, {})) # type: ignore[call-arg]
File "C:\Users\Admin\Documents\src\iccoExtractor.py", line 38, in __init__
patterns.append(nlp(term.name))
TypeError: 'spacy.tokens.doc.Doc' object is not callable
I dont know exactly what that means? any suggestion to solve this issue?

Related

unicode error during model inference in Sagemaker notebook

I am doing inference on a model trained in the sagemaker notebook. I am getting Unicode error while passing the input.
Before deploying, I tried the following and it worked - process the text with input_fn and then pass its output to predict_fn for prediction. But I am facing issue when I use the deploy fn of the sagemaker endpoint. How can I resolve this.
input_text = "BACKGROUND: COVID-19 is associated with pulmonary embolism (PE) in adults."
deployment.predict(json.dumps({"data":input_text}))
Error
Traceback (most recent call last): File "/miniconda3/lib/python3.7/site-packages/sagemaker_containers/_functions.py", line 93, in wrapper return fn(*args, **kwargs) File "/opt/ml/code/train_nmf.py", line 311, in input_fn input_data = json.loads(serialized_input_data) File "/miniconda3/lib/python3.7/json/__init__.py", line 343, in loads s = s.decode(detect_encoding(s), 'surrogatepass')
Training in Sagemaker Notebook
from sagemaker.sklearn.estimator import SKLearn
script_path = 'train_nmf.py'
sklearn = SKLearn(
entry_point=script_path,
instance_type="ml.m4.xlarge",
framework_version="0.23-1",
py_version="py3",
role=role,
sagemaker_session=sagemaker_session,
output_path=output_data_uri,
code_location=training_desc_uri,
source_dir='/home/ec2-user/SageMaker/src')
Train NMF Code
import os
import numpy as np
import pandas as pd
import joblib
import json
CONTENT_TYPE_JSON = "application/json"
def process_text(text):
text = [each.lower() for each in text]
return text
def model_fn(model_dir):
# SageMaker automatically load the model.tar.gz from the S3 and
# mount the folders inside the docker container. The 'model_dir'
# points to the root of the extracted tar.gz file.
model = joblib.load(os.path.join(model_dir, "nmf_model.pkl"))
return model
def predict_fn(input_data, model):
# Do your inference
predicted_topics = model.transform(input_data)
return predicted_topics
def input_fn(serialized_input_data, model_dir, content_type=CONTENT_TYPE_JSON):
input_data = json.loads(serialized_input_data)
input_text_processed = pd.Series(input_data).apply(process_text)
tf_idf_model = joblib.load(os.path.join(model_dir, "tf_idf.pkl"))
processed_sample_text = tf_idf_model.transform(input_text_processed)
return processed_sample_text
def output_fn(prediction_output, model_dir, accept=CONTENT_TYPE_JSON):
if accept == CONTENT_TYPE_JSON:
topic_keywords = joblib.load(
os.path.join(model_dir, "topic_keywords.pkl")
)
pred_dominant_topic = np.argmax(prediction_output, axis=1)
pred_df = pd.DataFrame(prediction_output, columns=topic_keywords)
pred_df["dominant_topic"] = pred_dominant_topic
return json.dumps(pred_df.to_dict("records")), accept
raise Exception('Unsupported content type')

Azure ML Inference Schema - "List index out of range" error

I have an ML model deployed on Azure ML Studio and I was updating it with an inference schema to allow compatibility with Power BI as described here.
When sending data up to the model via REST api (before adding this inference schema), everything works fine and I get results returned. However, once adding the schema as described in the instructions linked above and personalising to my data, the same data sent via REST api only returns the error "list index out of range". The deployment goes ahead fine and is designated as "healthy" with no error messages.
Any help would be greatly appreciated. Thanks.
EDIT:
Entry script:
import numpy as np
import pandas as pd
import joblib
from azureml.core.model import Model
from inference_schema.schema_decorators import input_schema, output_schema
from inference_schema.parameter_types.standard_py_parameter_type import StandardPythonParameterType
from inference_schema.parameter_types.numpy_parameter_type import NumpyParameterType
from inference_schema.parameter_types.pandas_parameter_type import PandasParameterType
def init():
global model
#Model name is the name of the model registered under the workspace
model_path = Model.get_model_path(model_name = 'databricksmodelpowerbi2')
model = joblib.load(model_path)
#Provide 3 sample inputs for schema generation for 2 rows of data
numpy_sample_input = NumpyParameterType(np.array([[2400.0, 78.26086956521739, 11100.0, 3.612565445026178, 3.0, 0.0], [368.55, 96.88311688311687, 709681.1600000012, 73.88059701492537, 44.0, 0.0]], dtype = 'float64'))
pandas_sample_input = PandasParameterType(pd.DataFrame({'1': [2400.0, 368.55], '2': [78.26086956521739, 96.88311688311687], '3': [11100.0, 709681.1600000012], '4': [3.612565445026178, 73.88059701492537], '5': [3.0, 44.0], '6': [0.0, 0.0]}))
standard_sample_input = StandardPythonParameterType(0.0)
# This is a nested input sample, any item wrapped by `ParameterType` will be described by schema
sample_input = StandardPythonParameterType({'input1': numpy_sample_input,
'input2': pandas_sample_input,
'input3': standard_sample_input})
sample_global_parameters = StandardPythonParameterType(1.0) #this is optional
sample_output = StandardPythonParameterType([1.0, 1.0])
#input_schema('inputs', sample_input)
#input_schema('global_parameters', sample_global_parameters) #this is optional
#output_schema(sample_output)
def run(inputs, global_parameters):
try:
data = inputs['input1']
# data will be convert to target format
assert isinstance(data, np.ndarray)
result = model.predict(data)
return result.tolist()
except Exception as e:
error = str(e)
return error
Prediction script:
import requests
import json
from ast import literal_eval
# URL for the web service
scoring_uri = ''
## If the service is authenticated, set the key or token
#key = '<your key or token>'
# Two sets of data to score, so we get two results back
data = {"data": [[2400.0, 78.26086956521739, 11100.0, 3.612565445026178, 3.0, 0.0], [368.55, 96.88311688311687, 709681.1600000012, 73.88059701492537, 44.0, 0.0]]}
# Convert to JSON string
input_data = json.dumps(data)
# Set the content type
headers = {'Content-Type': 'application/json'}
## If authentication is enabled, set the authorization header
#headers['Authorization'] = f'Bearer {key}'
# Make the request and display the response
resp = requests.post(scoring_uri, input_data, headers=headers)
print(resp.text)
result = literal_eval(resp.text)
The Microsoft documentation say's: "In order to generate conforming swagger for automated web service consumption, scoring script run() function must have API shape of:
A first parameter of type "StandardPythonParameterType", named
Inputs and nested.
An optional second parameter of type "StandardPythonParameterType",
named GlobalParameters.
Return a dictionary of type "StandardPythonParameterType" named
Results and nested."
I've already test this and it is case sensitive
So it will be like this:
import numpy as np
import pandas as pd
import joblib
from azureml.core.model import Model
from inference_schema.schema_decorators import input_schema, output_schema
from inference_schema.parameter_types.standard_py_parameter_type import
StandardPythonParameterType
from inference_schema.parameter_types.numpy_parameter_type import NumpyParameterType
from inference_schema.parameter_types.pandas_parameter_type import PandasParameterType
def init():
global model
# Model name is the name of the model registered under the workspace
model_path = Model.get_model_path(model_name = 'databricksmodelpowerbi2')
model = joblib.load(model_path)
# Provide 3 sample inputs for schema generation for 2 rows of data
numpy_sample_input = NumpyParameterType(np.array([[2400.0, 78.26086956521739, 11100.0,
3.612565445026178, 3.0, 0.0], [368.55, 96.88311688311687, 709681.1600000012,
73.88059701492537, 44.0, 0.0]], dtype = 'float64'))
pandas_sample_input = PandasParameterType(pd.DataFrame({'value': [2400.0, 368.55],
'delayed_percent': [78.26086956521739, 96.88311688311687], 'total_value_delayed':
[11100.0, 709681.1600000012], 'num_invoices_per30_dealing_days': [3.612565445026178,
73.88059701492537], 'delayed_streak': [3.0, 44.0], 'prompt_streak': [0.0, 0.0]}))
standard_sample_input = StandardPythonParameterType(0.0)
# This is a nested input sample, any item wrapped by `ParameterType` will be described
by schema
sample_input = StandardPythonParameterType({'input1': numpy_sample_input,
'input2': pandas_sample_input,
'input3': standard_sample_input})
sample_global_parameters = StandardPythonParameterType(1.0) #this is optional
numpy_sample_output = NumpyParameterType(np.array([1.0, 2.0]))
# 'Results' is case sensitive
sample_output = StandardPythonParameterType({'Results': numpy_sample_output})
# 'Inputs' is case sensitive
#input_schema('Inputs', sample_input)
#input_schema('global_parameters', sample_global_parameters) #this is optional
#output_schema(sample_output)
def run(Inputs, global_parameters):
try:
data = inputs['input1']
# data will be convert to target format
assert isinstance(data, np.ndarray)
result = model.predict(data)
return result.tolist()
except Exception as e:
error = str(e)
return error
`
I'm not sure if you've figured it out yet or not but I was having similar issues, and I couldn't get Power BI to see my ML model. In the end I just created a service specifically for Power BI (pandas df type) using the following schema:
import json
import pandas as pd
import numpy as np
import os
import joblib
from sklearn.ensemble import RandomForestClassifier
from inference_schema.schema_decorators import input_schema, output_schema
from inference_schema.parameter_types.standard_py_parameter_type import StandardPythonParameterType
from inference_schema.parameter_types.numpy_parameter_type import NumpyParameterType
from inference_schema.parameter_types.pandas_parameter_type import PandasParameterType
import pickle
import azureml.train.automl
# Called when the service is loaded
def init():
# AZUREML_MODEL_DIR is an environment variable created during deployment. Join this path with the filename of the model file.
# It holds the path to the directory that contains the deployed model (./azureml-models/$MODEL_NAME/$VERSION).
# If there are multiple models, this value is the path to the directory containing all deployed models (./azureml-models).
global model
model_path = os.path.join(os.getenv('AZUREML_MODEL_DIR'), 'model.pkl')
# Get the path to the deployed model file and load it
# Deserialize the model file back into a sklearn model
model = joblib.load(model_path)
input_sample = PandasParameterType(pd.DataFrame({
'input1': [0.0, 20.0],
'input2': [0.0, 20.0],
'input2': [0.0, 20.0]
}))
output_sample = PandasParameterType(pd.DataFrame([0.8, 0.2]))
# Called when a request is received
#input_schema('data', input_sample)
#output_schema(output_sample)
def run(data):
try:
result = model.predict(data)
# You can return any data type, as long as it is JSON serializable.
return result.tolist()
except Exception as e:
error = str(e)
return error

cannot import name 'RollingOriginValidator'

I'm writing a scoring step with Azure Machine Learning pipeline.
This is the score code:
import os
import pickle
import pandas as pd
from azureml.core.model import Model
import argparse
from azureml.core.run import Run, _OfflineRun
from azureml.automl.core.shared.rolling_origin_validator import RollingOriginValidator
# Called when the deployed service starts
def init():
global model
# Get the path where the deployed model can be found.
run = Run.get_context()
model_path = Model.get_model_path('best_model_data')
print(model_path)
with open(model_path,"rb") as f:
model = pickle.load(f)
print("loaded")
# Handle requests to the service
def run(data):
try:
# Pick out the text property of the JSON request.
# This expects a request in the form of {"text": "some text to score for sentiment"}
prediction = predict(data)
#Return prediction
return prediction
except Exception as e:
error = str(e)
return error
# Predict sentiment using the model
def predict(data, include_neutral=True):
# Tokenize text
test_data_features=data.drop('ProposalId', 1).drop('CombinedTactics',1)
test_data_combos=data['CombinedTactics']
print("data")
# Predict
score = model.predict_proba(test_data_features)
print("predicted")
df=pd.DataFrame({'score':score[:, 1],'CombinedTactics':test_data_combos})
return df
This is the pipeline step definition:
parallel_run_config = ParallelRunConfig(
environment=myenv,
entry_script="use_model.py",
source_directory="./",
output_action="append_row",
mini_batch_size="20",
error_threshold=1,
compute_target=compute_target,
process_count_per_node=2,
node_count=2
)
parallel_step_name = "batchscoring-" + datetime.now().strftime("%Y%m%d%H%M")
batch_score_step = ParallelRunStep(
name=parallel_step_name,
inputs=[test_data.as_named_input("test_data")],
output=output_dir,
parallel_run_config=parallel_run_config,
allow_reuse=True
)
However, I met below error:
File "/mnt/batch/tasks/shared/LS_root/jobs/ucmopp-ws/azureml/70828787-7515-4db4-b448-a5a4b6c0c8ff/mounts/workspaceblobstore/azureml/70828787-7515-4db4-b448-a5a4b6c0c8ff/driver/azureml_user/parallel_run/score_module.py", line 139, in call_init
self.init()
File "/mnt/batch/tasks/shared/LS_root/jobs/ucmopp-ws/azureml/70828787-7515-4db4-b448-a5a4b6c0c8ff/mounts/workspaceblobstore/azureml/70828787-7515-4db4-b448-a5a4b6c0c8ff/use_model.py", line 17, in init
model = pickle.load(f)
File "/azureml-envs/azureml_7e62c7905267a978aa40f8554487e9b9/lib/python3.6/site-packages/azureml/automl/runtime/featurization/init.py", line 8, in
from .data_transformer import DataTransformer, TransformerAndMapper
File "/azureml-envs/azureml_7e62c7905267a978aa40f8554487e9b9/lib/python3.6/site-packages/azureml/automl/runtime/featurization/data_transformer.py", line 54, in
from ..featurizer.transformer import (AutoMLTransformer, CategoricalFeaturizers, DateTimeFeaturesTransformer,
File "/azureml-envs/azureml_7e62c7905267a978aa40f8554487e9b9/lib/python3.6/site-packages/azureml/automl/runtime/featurizer/transformer/init.py", line 28, in
from .timeseries import TimeSeriesTransformer, TimeSeriesPipelineType, NumericalizeTransformer,
File "/azureml-envs/azureml_7e62c7905267a978aa40f8554487e9b9/lib/python3.6/site-packages/azureml/automl/runtime/featurizer/transformer/timeseries/init.py", line 65, in
from azureml.automl.core.shared.rolling_origin_validator import RollingOriginValidator
ImportError: cannot import name 'RollingOriginValidator'
Does anyone have any idea about this error?

How to solve 'lengths' argument should be a 1D CPU int64?

I'm working on multi-text-classification LSTM model, but in the preprocessing of the text I'm getting an error that before I wasn't getting. I think it was an update on: standfordnlp.
Code were I'm getting the error:
modules:
# StanfordNLP
!pip install stanfordnlp
import stanfordnlp
stanfordnlp.download('es', confirm_if_exists = True, version = 'latest')
stNLP = stanfordnlp.Pipeline(processors='tokenize,mwt,pos,lemma', lang='es', treebank = 'es_ancora', use_gpu=True)
# SpaCy
!spacy download es_core_news_sm # sm md
import spacy
spNLP = spacy.load('es_core_news_sm') #sm md
activated = spacy.prefer_gpu()
spacy.require_gpu()
import pandas as pd
import numpy as np
Getting rid of the stopwords:
def get_stop_words():
# Getting in a list all the stopwords of the dataframe with is_stop() from SpaCy
spacy_stop_words = list(dict.fromkeys([str(i) for i in spNLP(' '.join([elem for elem in new_df['descripcion']])) if i.is_stop == True]))
stop_words = stopwords.words('spanish') # defining the language
stop_words.extend(spec_stopwords) # extending the specific stopwords
stop_words.extend(spacy_stop_words) # extending the spacy stopwords
stop_words = set(stop_words)
return stop_words
stop_words = get_stop_words() # defining the stop_words set in a variable to better understanding whe applying on the dataframe
# Applying stopwords on the dataframe
new_df['descripcion'] = new_df['descripcion'].apply(lambda x: ' '.join([word for word in x.split() if word not in stop_words]))
Lemmatizing:
def stanford_lemma(text):
doc = stNLP(text)
return ' '.join([word.lemma for sent in doc.sentences for word in sent.words])
# Lemmatization of dataframe
new_df['descripcion'] = new_df['descripcion'].apply(lambda x: stanford_lemma(x))
# Getting new stop_words after lemmatization
get_stop_words()
# applying new stop_words on the dataframe
new_df['descripcion'] = new_df['descripcion'].apply(lambda x: ' '.join(
[word for word in x.split() if word not in stop_words]))
Traceback:
RuntimeError Traceback (most recent call last)
<ipython-input-18-60972fc225b2> in <module>()
----> 1 new_df['descripcion'] = new_df['descripcion'].apply(lambda x: stanford_lemma(x))
2
3 # Getting new stop_words after lemmatization (Lemmatizing: personalidades, personlidad = stopword)
4 get_stop_words()
5
9 frames
pandas/_libs/lib.pyx in pandas._libs.lib.map_infer()
/usr/local/lib/python3.6/dist-packages/torch/nn/utils/rnn.py in pack_padded_sequence(input, lengths, batch_first, enforce_sorted)
231
232 data, batch_sizes = \
--> 233 _VF._pack_padded_sequence(input, lengths, batch_first)
234 return PackedSequence(data, batch_sizes, sorted_indices, None)
235
RuntimeError: 'lengths' argument should be a 1D CPU int64 tensor
Update: using the new library Stanza, I'm getting the same issue. An the issue will persist even if I try the Lemma Example Usage:
!pip install stanza
import stanza
stanza.download('es', package='ancora', processors='tokenize,mwt,pos,lemma', verbose=True)
stNLP = stanza.Pipeline(processors='tokenize,mwt,pos,lemma',
lang='es',
use_gpu=True)
doc = nlp('Barack Obama nació en Hawaii.')
print(*[f'word: {word.text+" "}\tlemma: {word.lemma}' for sent in doc.sentences for word in sent.words], sep='\n')
Requests:
Dataset (#Crystina): new_df
Any suggetion to improve the question, will be considered.
It will only work if you use Stanza without loading spacy.
This is the error raised when you load spacy.prefer_gpu()
So don't load both the libraries in single GPU processing. Use them seperately.

Keras Version Error

I am working on Udacity Self-driving Car project which teaches a car to run autonomously(Behavior Clonning).
I am getting a weird Unicode error.
The Error Stated is as follows:
(dl) Vidits-MacBook-Pro-2:BehavioralClonning-master ViditShah$ python drive.py model.h5
Using TensorFlow backend.
You are using Keras version b'2.1.2' , but the model was built using b'1.2.1'
Traceback (most recent call last):
File "drive.py", line 122, in
model = load_model(args.model)
File "/Users/ViditShah/anaconda/envs/dl/lib/python3.6/site-packages/keras/models.py", line 240, in load_model
model = model_from_config(model_config, custom_objects=custom_objects)
File "/Users/ViditShah/anaconda/envs/dl/lib/python3.6/site-packages/keras/models.py", line 314, in model_from_config
return layer_module.deserialize(config, custom_objects=custom_objects)
File "/Users/ViditShah/anaconda/envs/dl/lib/python3.6/site-packages/keras/layers/init.py", line 55, in deserialize
printable_module_name='layer')
File "/Users/ViditShah/anaconda/envs/dl/lib/python3.6/site-packages/keras/utils/generic_utils.py", line 140, in deserialize_keras_object
list(custom_objects.items())))
File "/Users/ViditShah/anaconda/envs/dl/lib/python3.6/site-packages/keras/models.py", line 1323, in from_config
layer = layer_module.deserialize(conf, custom_objects=custom_objects)
File "/Users/ViditShah/anaconda/envs/dl/lib/python3.6/site-packages/keras/layers/init.py", line 55, in deserialize
printable_module_name='layer')
File "/Users/ViditShah/anaconda/envs/dl/lib/python3.6/site-packages/keras/utils/generic_utils.py", line 140, in deserialize_keras_object
list(custom_objects.items())))
File "/Users/ViditShah/anaconda/envs/dl/lib/python3.6/site-packages/keras/layers/core.py", line 699, in from_config
function = func_load(config['function'], globs=globs)
File "/Users/ViditShah/anaconda/envs/dl/lib/python3.6/site-packages/keras/utils/generic_utils.py", line 224, in func_load
raw_code = codecs.decode(code.encode('ascii'), 'base64')
UnicodeEncodeError: 'ascii' codec can't encode character '\xe3' in position 0: ordinal not in range(128)
I'm in my anaconda Environment dl.
The file drive.py is as follows.(This file was given during assignment and no edits has been suggested).
import argparse
import base64
from datetime import datetime
import os
import shutil
import numpy as np
import socketio
import eventlet
import eventlet.wsgi
from PIL import Image
from flask import Flask
from io import BytesIO
from keras.models import load_model
import h5py
from keras import __version__ as keras_version
sio = socketio.Server()
app = Flask(__name__)
model = None
prev_image_array = None
class SimplePIController:
def __init__(self, Kp, Ki):
self.Kp = Kp
self.Ki = Ki
self.set_point = 0.
self.error = 0.
self.integral = 0.
def set_desired(self, desired):
self.set_point = desired
def update(self, measurement):
# proportional error
self.error = self.set_point - measurement
# integral error
self.integral += self.error
return self.Kp * self.error + self.Ki * self.integral
controller = SimplePIController(0.1, 0.002)
set_speed = 9
controller.set_desired(set_speed)
#sio.on('telemetry')
def telemetry(sid, data):
if data:
# The current steering angle of the car
steering_angle = data["steering_angle"]
# The current throttle of the car
throttle = data["throttle"]
# The current speed of the car
speed = data["speed"]
# The current image from the center camera of the car
imgString = data["image"]
image = Image.open(BytesIO(base64.b64decode(imgString)))
image_array = np.asarray(image)
steering_angle = float(model.predict(image_array[None, :, :, :], batch_size=1))
throttle = controller.update(float(speed))
print(steering_angle, throttle)
send_control(steering_angle, throttle)
# save frame
if args.image_folder != '':
timestamp = datetime.utcnow().strftime('%Y_%m_%d_%H_%M_%S_%f')[:-3]
image_filename = os.path.join(args.image_folder, timestamp)
image.save('{}.jpg'.format(image_filename))
else:
# NOTE: DON'T EDIT THIS.
sio.emit('manual', data={}, skip_sid=True)
#sio.on('connect')
def connect(sid, environ):
print("connect ", sid)
send_control(0, 0)
def send_control(steering_angle, throttle):
sio.emit(
"steer",
data={
'steering_angle': steering_angle.__str__(),
'throttle': throttle.__str__()
},
skip_sid=True)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Remote Driving')
parser.add_argument(
'model',
type=str,
help='Path to model h5 file. Model should be on the same path.'
)
parser.add_argument(
'image_folder',
type=str,
nargs='?',
default='',
help='Path to image folder. This is where the images from the run will be saved.'
)
args = parser.parse_args()
# check that model Keras version is same as local Keras version
f = h5py.File(args.model, mode='r')
model_version = f.attrs.get('keras_version')
keras_version = str(keras_version).encode('utf8')
if model_version != keras_version:
print('You are using Keras version ', keras_version,
', but the model was built using ', model_version)
model = load_model(args.model)
if args.image_folder != '':
print("Creating image folder at {}".format(args.image_folder))
if not os.path.exists(args.image_folder):
os.makedirs(args.image_folder)
else:
shutil.rmtree(args.image_folder)
os.makedirs(args.image_folder)
print("RECORDING THIS RUN ...")
else:
print("NOT RECORDING THIS RUN ...")
# wrap Flask application with engineio's middleware
app = socketio.Middleware(sio, app)
# deploy as an eventlet WSGI server
eventlet.wsgi.server(eventlet.listen(('', 4567)), app)
You are getting this error because it seems that the model you are attempting to load was trained and saved in a previous version of Keras than the one you are using, as suggested by:
You are using Keras version b'2.1.2' , but the model was built using b'1.2.1' Traceback (most recent call last): File "drive.py", line 122, in model = load_model(args.model)
Seems that a solution to this may be to train your model with the same version you plan on using it, so you can load it smoothly. The other option would be to use version 1.2.1 to load that model and work with it.
This is probably due to differences between the way Keras saves models between versions, as some mayor changes should have taken place between v.1.2.1 and v.2.1.2.

Resources