Keras Version Error - python-3.x

I am working on Udacity Self-driving Car project which teaches a car to run autonomously(Behavior Clonning).
I am getting a weird Unicode error.
The Error Stated is as follows:
(dl) Vidits-MacBook-Pro-2:BehavioralClonning-master ViditShah$ python drive.py model.h5
Using TensorFlow backend.
You are using Keras version b'2.1.2' , but the model was built using b'1.2.1'
Traceback (most recent call last):
File "drive.py", line 122, in
model = load_model(args.model)
File "/Users/ViditShah/anaconda/envs/dl/lib/python3.6/site-packages/keras/models.py", line 240, in load_model
model = model_from_config(model_config, custom_objects=custom_objects)
File "/Users/ViditShah/anaconda/envs/dl/lib/python3.6/site-packages/keras/models.py", line 314, in model_from_config
return layer_module.deserialize(config, custom_objects=custom_objects)
File "/Users/ViditShah/anaconda/envs/dl/lib/python3.6/site-packages/keras/layers/init.py", line 55, in deserialize
printable_module_name='layer')
File "/Users/ViditShah/anaconda/envs/dl/lib/python3.6/site-packages/keras/utils/generic_utils.py", line 140, in deserialize_keras_object
list(custom_objects.items())))
File "/Users/ViditShah/anaconda/envs/dl/lib/python3.6/site-packages/keras/models.py", line 1323, in from_config
layer = layer_module.deserialize(conf, custom_objects=custom_objects)
File "/Users/ViditShah/anaconda/envs/dl/lib/python3.6/site-packages/keras/layers/init.py", line 55, in deserialize
printable_module_name='layer')
File "/Users/ViditShah/anaconda/envs/dl/lib/python3.6/site-packages/keras/utils/generic_utils.py", line 140, in deserialize_keras_object
list(custom_objects.items())))
File "/Users/ViditShah/anaconda/envs/dl/lib/python3.6/site-packages/keras/layers/core.py", line 699, in from_config
function = func_load(config['function'], globs=globs)
File "/Users/ViditShah/anaconda/envs/dl/lib/python3.6/site-packages/keras/utils/generic_utils.py", line 224, in func_load
raw_code = codecs.decode(code.encode('ascii'), 'base64')
UnicodeEncodeError: 'ascii' codec can't encode character '\xe3' in position 0: ordinal not in range(128)
I'm in my anaconda Environment dl.
The file drive.py is as follows.(This file was given during assignment and no edits has been suggested).
import argparse
import base64
from datetime import datetime
import os
import shutil
import numpy as np
import socketio
import eventlet
import eventlet.wsgi
from PIL import Image
from flask import Flask
from io import BytesIO
from keras.models import load_model
import h5py
from keras import __version__ as keras_version
sio = socketio.Server()
app = Flask(__name__)
model = None
prev_image_array = None
class SimplePIController:
def __init__(self, Kp, Ki):
self.Kp = Kp
self.Ki = Ki
self.set_point = 0.
self.error = 0.
self.integral = 0.
def set_desired(self, desired):
self.set_point = desired
def update(self, measurement):
# proportional error
self.error = self.set_point - measurement
# integral error
self.integral += self.error
return self.Kp * self.error + self.Ki * self.integral
controller = SimplePIController(0.1, 0.002)
set_speed = 9
controller.set_desired(set_speed)
#sio.on('telemetry')
def telemetry(sid, data):
if data:
# The current steering angle of the car
steering_angle = data["steering_angle"]
# The current throttle of the car
throttle = data["throttle"]
# The current speed of the car
speed = data["speed"]
# The current image from the center camera of the car
imgString = data["image"]
image = Image.open(BytesIO(base64.b64decode(imgString)))
image_array = np.asarray(image)
steering_angle = float(model.predict(image_array[None, :, :, :], batch_size=1))
throttle = controller.update(float(speed))
print(steering_angle, throttle)
send_control(steering_angle, throttle)
# save frame
if args.image_folder != '':
timestamp = datetime.utcnow().strftime('%Y_%m_%d_%H_%M_%S_%f')[:-3]
image_filename = os.path.join(args.image_folder, timestamp)
image.save('{}.jpg'.format(image_filename))
else:
# NOTE: DON'T EDIT THIS.
sio.emit('manual', data={}, skip_sid=True)
#sio.on('connect')
def connect(sid, environ):
print("connect ", sid)
send_control(0, 0)
def send_control(steering_angle, throttle):
sio.emit(
"steer",
data={
'steering_angle': steering_angle.__str__(),
'throttle': throttle.__str__()
},
skip_sid=True)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Remote Driving')
parser.add_argument(
'model',
type=str,
help='Path to model h5 file. Model should be on the same path.'
)
parser.add_argument(
'image_folder',
type=str,
nargs='?',
default='',
help='Path to image folder. This is where the images from the run will be saved.'
)
args = parser.parse_args()
# check that model Keras version is same as local Keras version
f = h5py.File(args.model, mode='r')
model_version = f.attrs.get('keras_version')
keras_version = str(keras_version).encode('utf8')
if model_version != keras_version:
print('You are using Keras version ', keras_version,
', but the model was built using ', model_version)
model = load_model(args.model)
if args.image_folder != '':
print("Creating image folder at {}".format(args.image_folder))
if not os.path.exists(args.image_folder):
os.makedirs(args.image_folder)
else:
shutil.rmtree(args.image_folder)
os.makedirs(args.image_folder)
print("RECORDING THIS RUN ...")
else:
print("NOT RECORDING THIS RUN ...")
# wrap Flask application with engineio's middleware
app = socketio.Middleware(sio, app)
# deploy as an eventlet WSGI server
eventlet.wsgi.server(eventlet.listen(('', 4567)), app)

You are getting this error because it seems that the model you are attempting to load was trained and saved in a previous version of Keras than the one you are using, as suggested by:
You are using Keras version b'2.1.2' , but the model was built using b'1.2.1' Traceback (most recent call last): File "drive.py", line 122, in model = load_model(args.model)
Seems that a solution to this may be to train your model with the same version you plan on using it, so you can load it smoothly. The other option would be to use version 1.2.1 to load that model and work with it.
This is probably due to differences between the way Keras saves models between versions, as some mayor changes should have taken place between v.1.2.1 and v.2.1.2.

Related

Not able to access data even though it exists, using Pandas dataframe, when training a deep learning model using Python3

I am trying to do k-fold cross validation using sklearn and using pandas dataframe. This is not working as expected. Looks like the data cannot be accessed for some reason even though it exists. The code works for some time, but is unable to complete a full epoch.
Here is the error:
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
94773248/94765736 [==============================] - 1s 0us/step
94781440/94765736 [==============================] - 1s 0us/step
458/610 [=====================>........] - ETA: 21s - loss: 0.1640 - accuracy: 0.1621
---------------------------------------------------------------------------
UnknownError Traceback (most recent call last)
<ipython-input-7-28b7c7367434> in <module>()
60 validation_data=valid_gen,
61 validation_steps=len(test_index)//valid_batch_size,
---> 62 verbose=1)
...
UnknownError: Graph execution error:
2 root error(s) found.
(0) UNKNOWN: IndexError: single positional indexer is out-of-bounds
Traceback (most recent call last):
File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/ops/script_ops.py", line 271, in __call__
ret = func(*args)
File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/autograph/impl/api.py", line 642, in wrapper
return func(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/data/ops/dataset_ops.py", line 1004, in generator_py_func
values = next(generator_state.get_iterator(iterator_id))
File "/usr/local/lib/python3.7/dist-packages/keras/engine/data_adapter.py", line 830, in wrapped_generator
for data in generator_fn():
File "<ipython-input-4-8914ea8c1843>", line 6, in get_data_generator
r = df.iloc[i]
File "/usr/local/lib/python3.7/dist-packages/pandas/core/indexing.py", line 931, in __getitem__
return self._getitem_axis(maybe_callable, axis=axis)
File "/usr/local/lib/python3.7/dist-packages/pandas/core/indexing.py", line 1566, in _getitem_axis
self._validate_integer(key, axis)
File "/usr/local/lib/python3.7/dist-packages/pandas/core/indexing.py", line 1500, in _validate_integer
raise IndexError("single positional indexer is out-of-bounds")
IndexError: single positional indexer is out-of-bounds
[[{{node PyFunc}}]]
[[IteratorGetNext]]
[[IteratorGetNext/_2]]
(1) UNKNOWN: IndexError: single positional indexer is out-of-bounds
Traceback (most recent call last):
File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/ops/script_ops.py", line 271, in __call__
ret = func(*args)
File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/autograph/impl/api.py", line 642, in wrapper
return func(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/data/ops/dataset_ops.py", line 1004, in generator_py_func
values = next(generator_state.get_iterator(iterator_id))
File "/usr/local/lib/python3.7/dist-packages/keras/engine/data_adapter.py", line 830, in wrapped_generator
for data in generator_fn():
File "<ipython-input-4-8914ea8c1843>", line 6, in get_data_generator
r = df.iloc[i]
File "/usr/local/lib/python3.7/dist-packages/pandas/core/indexing.py", line 931, in __getitem__
return self._getitem_axis(maybe_callable, axis=axis)
File "/usr/local/lib/python3.7/dist-packages/pandas/core/indexing.py", line 1566, in _getitem_axis
self._validate_integer(key, axis)
File "/usr/local/lib/python3.7/dist-packages/pandas/core/indexing.py", line 1500, in _validate_integer
raise IndexError("single positional indexer is out-of-bounds")
IndexError: single positional indexer is out-of-bounds
[[{{node PyFunc}}]]
[[IteratorGetNext]]
0 successful operations.
0 derived errors ignored. [Op:__inference_train_function_13498]
Here is the code that I am trying to do:
# using google colab
! pip install --upgrade --no-cache-dir gdown
! gdown 1_DgB2a2Q7eYJpXtKWfl4XPUgTIW1sXw1
! unzip -qq Train.zip
import matplotlib.pyplot as plt
import numpy as np
import cv2
import glob
import csv
import pandas as pd
# create a pandas data frame of images, age, gender and race
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.vgg16 import preprocess_input
from keras.models import Model
from tensorflow.keras.optimizers import Adam
from keras.layers import Dense, Flatten, GlobalAveragePooling2D, Multiply, Dropout
from tensorflow.keras import regularizers
from tensorflow.keras.utils import to_categorical
from PIL import Image
from keras.preprocessing.image import ImageDataGenerator
from keras import backend as K
from sklearn.model_selection import KFold
import tensorflow as tf
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow import keras
from datetime import datetime
from tensorflow import keras
from tqdm import tqdm
import pandas as pd
def get_data_generator(df, indices, batch_size=16):
images, labels = [], []
while True:
for i in indices:
# print(i," - ",end="")
r = df.iloc[i]
file_, label = r['file'], r['label']
im_gray = Image.open(file_).convert('L')
im_gray = im_gray.resize((360, 360))
im = np.zeros(shape=(360, 360,3))
im[:,:,0] = im_gray
im[:,:,1] = im_gray
im[:,:,2] = im_gray
im = np.array(im) / 255.0
images.append(im)
new_label = label/100.0
labels.append(new_label)
if len(images) >= batch_size:
yield np.array(images), np.array(labels)
images, labels = [], []
np.random.seed(42)
EPOCHS = 1
MODEL_NAME = 'ResNet50'
IMG_SIZE = '360x360'
all_train_imgs = glob.glob('Train/*')
# print("Length of all training images = ",len(all_train_imgs))
all_training_files_name = []
all_training_perc = []
with open('Train.csv') as f:
contents = f.readlines()
for item in contents:
# make the changes in the folder here
img_name = "Train/"+item.split(',')[0]
perc_infc = float(item.split(',')[1])
num_pat = item.split(',')[2]
# print(img_name," - ",perc_infc," - ",num_pat)
all_training_files_name.append(img_name)
all_training_perc.append(perc_infc)
attributes = {'label':all_training_perc, 'file':all_training_files_name}
df_all = pd.DataFrame(attributes)
df_all = df_all.dropna()
print(df_all.head())
kf = KFold(n_splits=5)
kf.get_n_splits(all_training_files_name)
fold_no = 0
for train_index, test_index in kf.split(all_training_files_name):
fold_no += 1
#########################################################################################
OUTPUT = 1
frozen = ResNet50 (weights="imagenet", input_shape=(360,360,3), include_top=False)
trainable = frozen.output
trainable = Dropout(0.5)(GlobalAveragePooling2D()(trainable))
trainable = Dense(2048, activation="relu")(trainable)
trainable = Dense(1024, activation="relu")(trainable)
trainable = Dense(OUTPUT, activation="sigmoid")(trainable)
model = Model(inputs=frozen.input, outputs=trainable)
opt = Adam(learning_rate=1e-5)
model.compile(optimizer=opt, loss=tf.keras.losses.MeanAbsoluteError(),#loss='binary_crossentropy',
#experimental_run_tf_function=False,
metrics = ['accuracy']
)
#########################################################################################
batch_size = 4
valid_batch_size = 4
df_train = df_all.loc[train_index.astype(int)]
df_val = df_all.loc[test_index.astype(int)]
train_gen = get_data_generator(df_train, train_index, batch_size=batch_size)
valid_gen = get_data_generator(df_val, test_index, batch_size=valid_batch_size)
callbacks = [
ModelCheckpoint("./model_checkpoint", monitor='val_loss'),
#ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=4)
]
# for storing logs into tensorboard
logdir="logs/fit/" + datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = keras.callbacks.TensorBoard(log_dir=logdir)
history = model.fit(train_gen,
steps_per_epoch=len(train_index)//batch_size,
epochs=EPOCHS,
callbacks=[tensorboard_callback,callbacks],
validation_data=valid_gen,
validation_steps=len(test_index)//valid_batch_size,
verbose=1)
Here is the code for reproducing in google colab : https://colab.research.google.com/drive/11C-GP6xCB3CCwvz6gj8gy6mTOJIc3Zld?usp=sharing
I figured it out, there were some error in dataframe creation. For this problem, making these following changes works, just using the full dataframe.
#df_train = df_all.loc[train_index.astype(int)]
#df_val = df_all.loc[test_index.astype(int)]
train_gen = get_data_generator(df_all, train_index, batch_size=batch_size)
valid_gen = get_data_generator(df_all, test_index, batch_size=valid_batch_size)

unicode error during model inference in Sagemaker notebook

I am doing inference on a model trained in the sagemaker notebook. I am getting Unicode error while passing the input.
Before deploying, I tried the following and it worked - process the text with input_fn and then pass its output to predict_fn for prediction. But I am facing issue when I use the deploy fn of the sagemaker endpoint. How can I resolve this.
input_text = "BACKGROUND: COVID-19 is associated with pulmonary embolism (PE) in adults."
deployment.predict(json.dumps({"data":input_text}))
Error
Traceback (most recent call last): File "/miniconda3/lib/python3.7/site-packages/sagemaker_containers/_functions.py", line 93, in wrapper return fn(*args, **kwargs) File "/opt/ml/code/train_nmf.py", line 311, in input_fn input_data = json.loads(serialized_input_data) File "/miniconda3/lib/python3.7/json/__init__.py", line 343, in loads s = s.decode(detect_encoding(s), 'surrogatepass')
Training in Sagemaker Notebook
from sagemaker.sklearn.estimator import SKLearn
script_path = 'train_nmf.py'
sklearn = SKLearn(
entry_point=script_path,
instance_type="ml.m4.xlarge",
framework_version="0.23-1",
py_version="py3",
role=role,
sagemaker_session=sagemaker_session,
output_path=output_data_uri,
code_location=training_desc_uri,
source_dir='/home/ec2-user/SageMaker/src')
Train NMF Code
import os
import numpy as np
import pandas as pd
import joblib
import json
CONTENT_TYPE_JSON = "application/json"
def process_text(text):
text = [each.lower() for each in text]
return text
def model_fn(model_dir):
# SageMaker automatically load the model.tar.gz from the S3 and
# mount the folders inside the docker container. The 'model_dir'
# points to the root of the extracted tar.gz file.
model = joblib.load(os.path.join(model_dir, "nmf_model.pkl"))
return model
def predict_fn(input_data, model):
# Do your inference
predicted_topics = model.transform(input_data)
return predicted_topics
def input_fn(serialized_input_data, model_dir, content_type=CONTENT_TYPE_JSON):
input_data = json.loads(serialized_input_data)
input_text_processed = pd.Series(input_data).apply(process_text)
tf_idf_model = joblib.load(os.path.join(model_dir, "tf_idf.pkl"))
processed_sample_text = tf_idf_model.transform(input_text_processed)
return processed_sample_text
def output_fn(prediction_output, model_dir, accept=CONTENT_TYPE_JSON):
if accept == CONTENT_TYPE_JSON:
topic_keywords = joblib.load(
os.path.join(model_dir, "topic_keywords.pkl")
)
pred_dominant_topic = np.argmax(prediction_output, axis=1)
pred_df = pd.DataFrame(prediction_output, columns=topic_keywords)
pred_df["dominant_topic"] = pred_dominant_topic
return json.dumps(pred_df.to_dict("records")), accept
raise Exception('Unsupported content type')

utf-8 decode error when trying to feed an image into a model

I have successfully created a webpage that takes an image file and passes it to the API I built. The only problem is that once I feed that image to preprocessing.image.load_img from tensorflow, I get this error:
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xff in position 0: invalid start byte
Here is the API:
from starlette.responses import RedirectResponse
from fastapi import FastAPI, File, UploadFile
from tensorflow.keras import preprocessing
from fastapi.staticfiles import StaticFiles
from keras.models import load_model
import numpy as np
import uvicorn
app = FastAPI()
app.mount("/Templates", StaticFiles(directory="Templates"), name="Templates")
model_dir = 'F:\\Saved-Models\\Dog-Cat-Models\\First_Generation_dog_cat_optuna.h5'
model = load_model(model_dir)
#app.get('/')
async def index():
return RedirectResponse(url="/Templates/index.html")
#app.post('/prediction_page')
async def prediction_form(dogcat_img: UploadFile = File(...)):
dogcat_img_bytes = dogcat_img.file.read()
pp_dogcat_image = preprocessing.image.load_img(dogcat_img_bytes, target_size=(150, 150))
pp_dogcat_image_arr = preprocessing.image.img_to_array(pp_dogcat_image)
input_arr = np.array([pp_dogcat_image_arr])
prediction = np.argmax(model.predict(input_arr), axis=-1)
print(prediction)
if __name__ == '__main__':
uvicorn.run(app, host='localhost', port=8000)
With no full traceback from the exception it may be difficult to help but looking at the docs, tf.keras.utils.load_img requires a path to the image file (not the raw image data).
You can try something like this instead (the underlying library does something similar):
--- orig.py 2021-09-20 10:47:22.465636386 +0100
+++ new.py 2021-09-20 10:48:50.760734720 +0100
## -6,6 +6,8 ##
import numpy as np
import uvicorn
+from PIL import Image
+
app = FastAPI()
app.mount("/Templates", StaticFiles(directory="Templates"), name="Templates")
## -20,9 +22,10 ##
#app.post('/prediction_page')
async def prediction_form(dogcat_img: UploadFile = File(...)):
- dogcat_img_bytes = dogcat_img.file.read()
+ # dogcat_img_bytes = dogcat_img.file.read()
- pp_dogcat_image = preprocessing.image.load_img(dogcat_img_bytes, target_size=(150, 150))
+ # pp_dogcat_image = preprocessing.image.load_img(dogcat_img_bytes, target_size=(150, 150))
+ pp_dogcat_image = Image.open(dogcat_img.file).resize((150, 150), Image.NEAREST).convert("RGB")
pp_dogcat_image_arr = preprocessing.image.img_to_array(pp_dogcat_image)
input_arr = np.array([pp_dogcat_image_arr])
prediction = np.argmax(model.predict(input_arr), axis=-1)
Btw you may want to also consider using BackgroundTasks for the image processing, otherwise a single long running (async) request will block other requests.

cannot import name 'RollingOriginValidator'

I'm writing a scoring step with Azure Machine Learning pipeline.
This is the score code:
import os
import pickle
import pandas as pd
from azureml.core.model import Model
import argparse
from azureml.core.run import Run, _OfflineRun
from azureml.automl.core.shared.rolling_origin_validator import RollingOriginValidator
# Called when the deployed service starts
def init():
global model
# Get the path where the deployed model can be found.
run = Run.get_context()
model_path = Model.get_model_path('best_model_data')
print(model_path)
with open(model_path,"rb") as f:
model = pickle.load(f)
print("loaded")
# Handle requests to the service
def run(data):
try:
# Pick out the text property of the JSON request.
# This expects a request in the form of {"text": "some text to score for sentiment"}
prediction = predict(data)
#Return prediction
return prediction
except Exception as e:
error = str(e)
return error
# Predict sentiment using the model
def predict(data, include_neutral=True):
# Tokenize text
test_data_features=data.drop('ProposalId', 1).drop('CombinedTactics',1)
test_data_combos=data['CombinedTactics']
print("data")
# Predict
score = model.predict_proba(test_data_features)
print("predicted")
df=pd.DataFrame({'score':score[:, 1],'CombinedTactics':test_data_combos})
return df
This is the pipeline step definition:
parallel_run_config = ParallelRunConfig(
environment=myenv,
entry_script="use_model.py",
source_directory="./",
output_action="append_row",
mini_batch_size="20",
error_threshold=1,
compute_target=compute_target,
process_count_per_node=2,
node_count=2
)
parallel_step_name = "batchscoring-" + datetime.now().strftime("%Y%m%d%H%M")
batch_score_step = ParallelRunStep(
name=parallel_step_name,
inputs=[test_data.as_named_input("test_data")],
output=output_dir,
parallel_run_config=parallel_run_config,
allow_reuse=True
)
However, I met below error:
File "/mnt/batch/tasks/shared/LS_root/jobs/ucmopp-ws/azureml/70828787-7515-4db4-b448-a5a4b6c0c8ff/mounts/workspaceblobstore/azureml/70828787-7515-4db4-b448-a5a4b6c0c8ff/driver/azureml_user/parallel_run/score_module.py", line 139, in call_init
self.init()
File "/mnt/batch/tasks/shared/LS_root/jobs/ucmopp-ws/azureml/70828787-7515-4db4-b448-a5a4b6c0c8ff/mounts/workspaceblobstore/azureml/70828787-7515-4db4-b448-a5a4b6c0c8ff/use_model.py", line 17, in init
model = pickle.load(f)
File "/azureml-envs/azureml_7e62c7905267a978aa40f8554487e9b9/lib/python3.6/site-packages/azureml/automl/runtime/featurization/init.py", line 8, in
from .data_transformer import DataTransformer, TransformerAndMapper
File "/azureml-envs/azureml_7e62c7905267a978aa40f8554487e9b9/lib/python3.6/site-packages/azureml/automl/runtime/featurization/data_transformer.py", line 54, in
from ..featurizer.transformer import (AutoMLTransformer, CategoricalFeaturizers, DateTimeFeaturesTransformer,
File "/azureml-envs/azureml_7e62c7905267a978aa40f8554487e9b9/lib/python3.6/site-packages/azureml/automl/runtime/featurizer/transformer/init.py", line 28, in
from .timeseries import TimeSeriesTransformer, TimeSeriesPipelineType, NumericalizeTransformer,
File "/azureml-envs/azureml_7e62c7905267a978aa40f8554487e9b9/lib/python3.6/site-packages/azureml/automl/runtime/featurizer/transformer/timeseries/init.py", line 65, in
from azureml.automl.core.shared.rolling_origin_validator import RollingOriginValidator
ImportError: cannot import name 'RollingOriginValidator'
Does anyone have any idea about this error?

Scikit Learn OpenCV SVM IndexError: list index out of range

I'm training an SVM based on features extracted from a pictures dataset.
The code goes as follows:
import os
import sys
import argparse
import pickle as cPickle
import numpy as np
from sklearn.multiclass import OneVsOneClassifier
from sklearn.svm import LinearSVC
from sklearn import preprocessing
from sklearn.svm import SVC
from sklearn.utils import check_random_state
def build_arg_parser():
parser = argparse.ArgumentParser(description='Trains the classifier models')
parser.add_argument("--feature-map-file", dest="feature_map_file", required=True,
help="Input pickle file containing the feature map")
parser.add_argument("--svm-file", dest="svm_file", required=False,
help="Output file where the pickled SVM model will be stored")
return parser
class ClassifierTrainer(object):
def __init__(self, X, label_words):
self.le = preprocessing.LabelEncoder()
self.clf = OneVsOneClassifier(LinearSVC(random_state=0))
y = self._encodeLabels(label_words)
X = np.asarray(X)
self.clf.fit(X, y)
def _fit(self, X):
X = np.asarray(X)
return self.clf.predict(X)
def _encodeLabels(self, labels_words):
self.le.fit(labels_words)
return np.array(self.le.transform(labels_words), dtype=np.float32)
def classify(self, X):
labels_nums = self._fit(X)
labels_words = self.le.inverse_transform([int(x) for x in labels_nums])
return labels_words
if __name__=='__main__':
args = build_arg_parser().parse_args()
feature_map_file = args.feature_map_file
svm_file = args.svm_file
# Load the feature map
with open(feature_map_file, 'rb') as f:
feature_map = cPickle.load(f)
# Extract feature vectors and the labels
labels_words = [x['label'] for x in feature_map]
dim_size = feature_map[0]['feature_vector'].shape[1]
X = [np.reshape(x['feature_vector'], (dim_size,)) for x in feature_map]
# Train the SVM
svm = ClassifierTrainer(X, labels_words)
if args.svm_file:
with open(args.svm_file, 'wb') as f:
cPickle.dump(svm, f)
This is the error that the system throws:
Traceback (most recent call last):
File "training.py", line 59, in <module>
svm = ClassifierTrainer(X, labels_words)
File "training.py", line 29, in __init__
self.clf.fit(X, y)
File "/home/pi/.virtualenvs/cv/lib/python3.4/site-packages/sklearn/multiclass.py", line 496, in fit
self.estimators_ = estimators_indices[0]
IndexError: list index out of range
Any ideas what am I doing wrong? It seems there is a problem with the multiclass.py in Python site packages.

Resources