sklearn.impute.SimpleImputer, Nan to mean, not working - python-3.x

I have a dataset Data.csv
Country,Age,Salary,Purchased
France,44,72000,No
Spain,27,48000,Yes
Germany,30,54000,No
Spain,38,61000,No
Germany,40,,Yes
France,35,58000,Yes
Spain,,52000,No
France,48,79000,Yes
Germany,50,83000,No
France,37,67000,Yes
I tried to fill nan values using sklearn.impute.SimpleImputer by using following code
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
# Importing the dataset
dataset = pd.read_csv('Data.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values
# Taking care of missing data
from sklearn.impute import SimpleImputer
imputer = SimpleImputer(missing_values = 'NaN', strategy = 'mean')
imputer = imputer.fit(X[:, 1:3])
X[:, 1:3] = imputer.transform(X[:, 1:3])
But I get a error which says:
File "C:\Users\Krishna Rohith\Machine Learning A-Z\Part 1 - Data Preprocessing\Section 2 ----------- --------- Part 1 - Data Preprocessing --------------------\missing_data.py", line 16, in <module>
imputer = imputer.fit(X[:, 1:3])
File "C:\Users\Krishna Rohith\Anaconda3\lib\site-packages\sklearn\impute\_base.py", line 268, in fit
X = self._validate_input(X)
File "C:\Users\Krishna Rohith\Anaconda3\lib\site-packages\sklearn\impute\_base.py", line 242, in _validate_input
raise ve
File "C:\Users\Krishna Rohith\Anaconda3\lib\site-packages\sklearn\impute\_base.py", line 235, in _validate_input
force_all_finite=force_all_finite, copy=self.copy)
File "C:\Users\Krishna Rohith\Anaconda3\lib\site-packages\sklearn\utils\validation.py", line 562, in check_array
allow_nan=force_all_finite == 'allow-nan')
File "C:\Users\Krishna Rohith\Anaconda3\lib\site-packages\sklearn\utils\validation.py", line 60, in _assert_all_finite
msg_dtype if msg_dtype is not None else X.dtype)
ValueError: Input contains NaN, infinity or a value too large for dtype('float64').
I know how to do it numpy but can someone please tell me using sklearn.impute?

imputer = SimpleImputer(missing_values = np.nan, strategy = 'mean')
Replace 'NaN' by numpy default Nan np.nan

Related

Not able to access data even though it exists, using Pandas dataframe, when training a deep learning model using Python3

I am trying to do k-fold cross validation using sklearn and using pandas dataframe. This is not working as expected. Looks like the data cannot be accessed for some reason even though it exists. The code works for some time, but is unable to complete a full epoch.
Here is the error:
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
94773248/94765736 [==============================] - 1s 0us/step
94781440/94765736 [==============================] - 1s 0us/step
458/610 [=====================>........] - ETA: 21s - loss: 0.1640 - accuracy: 0.1621
---------------------------------------------------------------------------
UnknownError Traceback (most recent call last)
<ipython-input-7-28b7c7367434> in <module>()
60 validation_data=valid_gen,
61 validation_steps=len(test_index)//valid_batch_size,
---> 62 verbose=1)
...
UnknownError: Graph execution error:
2 root error(s) found.
(0) UNKNOWN: IndexError: single positional indexer is out-of-bounds
Traceback (most recent call last):
File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/ops/script_ops.py", line 271, in __call__
ret = func(*args)
File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/autograph/impl/api.py", line 642, in wrapper
return func(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/data/ops/dataset_ops.py", line 1004, in generator_py_func
values = next(generator_state.get_iterator(iterator_id))
File "/usr/local/lib/python3.7/dist-packages/keras/engine/data_adapter.py", line 830, in wrapped_generator
for data in generator_fn():
File "<ipython-input-4-8914ea8c1843>", line 6, in get_data_generator
r = df.iloc[i]
File "/usr/local/lib/python3.7/dist-packages/pandas/core/indexing.py", line 931, in __getitem__
return self._getitem_axis(maybe_callable, axis=axis)
File "/usr/local/lib/python3.7/dist-packages/pandas/core/indexing.py", line 1566, in _getitem_axis
self._validate_integer(key, axis)
File "/usr/local/lib/python3.7/dist-packages/pandas/core/indexing.py", line 1500, in _validate_integer
raise IndexError("single positional indexer is out-of-bounds")
IndexError: single positional indexer is out-of-bounds
[[{{node PyFunc}}]]
[[IteratorGetNext]]
[[IteratorGetNext/_2]]
(1) UNKNOWN: IndexError: single positional indexer is out-of-bounds
Traceback (most recent call last):
File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/ops/script_ops.py", line 271, in __call__
ret = func(*args)
File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/autograph/impl/api.py", line 642, in wrapper
return func(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/data/ops/dataset_ops.py", line 1004, in generator_py_func
values = next(generator_state.get_iterator(iterator_id))
File "/usr/local/lib/python3.7/dist-packages/keras/engine/data_adapter.py", line 830, in wrapped_generator
for data in generator_fn():
File "<ipython-input-4-8914ea8c1843>", line 6, in get_data_generator
r = df.iloc[i]
File "/usr/local/lib/python3.7/dist-packages/pandas/core/indexing.py", line 931, in __getitem__
return self._getitem_axis(maybe_callable, axis=axis)
File "/usr/local/lib/python3.7/dist-packages/pandas/core/indexing.py", line 1566, in _getitem_axis
self._validate_integer(key, axis)
File "/usr/local/lib/python3.7/dist-packages/pandas/core/indexing.py", line 1500, in _validate_integer
raise IndexError("single positional indexer is out-of-bounds")
IndexError: single positional indexer is out-of-bounds
[[{{node PyFunc}}]]
[[IteratorGetNext]]
0 successful operations.
0 derived errors ignored. [Op:__inference_train_function_13498]
Here is the code that I am trying to do:
# using google colab
! pip install --upgrade --no-cache-dir gdown
! gdown 1_DgB2a2Q7eYJpXtKWfl4XPUgTIW1sXw1
! unzip -qq Train.zip
import matplotlib.pyplot as plt
import numpy as np
import cv2
import glob
import csv
import pandas as pd
# create a pandas data frame of images, age, gender and race
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.vgg16 import preprocess_input
from keras.models import Model
from tensorflow.keras.optimizers import Adam
from keras.layers import Dense, Flatten, GlobalAveragePooling2D, Multiply, Dropout
from tensorflow.keras import regularizers
from tensorflow.keras.utils import to_categorical
from PIL import Image
from keras.preprocessing.image import ImageDataGenerator
from keras import backend as K
from sklearn.model_selection import KFold
import tensorflow as tf
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow import keras
from datetime import datetime
from tensorflow import keras
from tqdm import tqdm
import pandas as pd
def get_data_generator(df, indices, batch_size=16):
images, labels = [], []
while True:
for i in indices:
# print(i," - ",end="")
r = df.iloc[i]
file_, label = r['file'], r['label']
im_gray = Image.open(file_).convert('L')
im_gray = im_gray.resize((360, 360))
im = np.zeros(shape=(360, 360,3))
im[:,:,0] = im_gray
im[:,:,1] = im_gray
im[:,:,2] = im_gray
im = np.array(im) / 255.0
images.append(im)
new_label = label/100.0
labels.append(new_label)
if len(images) >= batch_size:
yield np.array(images), np.array(labels)
images, labels = [], []
np.random.seed(42)
EPOCHS = 1
MODEL_NAME = 'ResNet50'
IMG_SIZE = '360x360'
all_train_imgs = glob.glob('Train/*')
# print("Length of all training images = ",len(all_train_imgs))
all_training_files_name = []
all_training_perc = []
with open('Train.csv') as f:
contents = f.readlines()
for item in contents:
# make the changes in the folder here
img_name = "Train/"+item.split(',')[0]
perc_infc = float(item.split(',')[1])
num_pat = item.split(',')[2]
# print(img_name," - ",perc_infc," - ",num_pat)
all_training_files_name.append(img_name)
all_training_perc.append(perc_infc)
attributes = {'label':all_training_perc, 'file':all_training_files_name}
df_all = pd.DataFrame(attributes)
df_all = df_all.dropna()
print(df_all.head())
kf = KFold(n_splits=5)
kf.get_n_splits(all_training_files_name)
fold_no = 0
for train_index, test_index in kf.split(all_training_files_name):
fold_no += 1
#########################################################################################
OUTPUT = 1
frozen = ResNet50 (weights="imagenet", input_shape=(360,360,3), include_top=False)
trainable = frozen.output
trainable = Dropout(0.5)(GlobalAveragePooling2D()(trainable))
trainable = Dense(2048, activation="relu")(trainable)
trainable = Dense(1024, activation="relu")(trainable)
trainable = Dense(OUTPUT, activation="sigmoid")(trainable)
model = Model(inputs=frozen.input, outputs=trainable)
opt = Adam(learning_rate=1e-5)
model.compile(optimizer=opt, loss=tf.keras.losses.MeanAbsoluteError(),#loss='binary_crossentropy',
#experimental_run_tf_function=False,
metrics = ['accuracy']
)
#########################################################################################
batch_size = 4
valid_batch_size = 4
df_train = df_all.loc[train_index.astype(int)]
df_val = df_all.loc[test_index.astype(int)]
train_gen = get_data_generator(df_train, train_index, batch_size=batch_size)
valid_gen = get_data_generator(df_val, test_index, batch_size=valid_batch_size)
callbacks = [
ModelCheckpoint("./model_checkpoint", monitor='val_loss'),
#ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=4)
]
# for storing logs into tensorboard
logdir="logs/fit/" + datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = keras.callbacks.TensorBoard(log_dir=logdir)
history = model.fit(train_gen,
steps_per_epoch=len(train_index)//batch_size,
epochs=EPOCHS,
callbacks=[tensorboard_callback,callbacks],
validation_data=valid_gen,
validation_steps=len(test_index)//valid_batch_size,
verbose=1)
Here is the code for reproducing in google colab : https://colab.research.google.com/drive/11C-GP6xCB3CCwvz6gj8gy6mTOJIc3Zld?usp=sharing
I figured it out, there were some error in dataframe creation. For this problem, making these following changes works, just using the full dataframe.
#df_train = df_all.loc[train_index.astype(int)]
#df_val = df_all.loc[test_index.astype(int)]
train_gen = get_data_generator(df_all, train_index, batch_size=batch_size)
valid_gen = get_data_generator(df_all, test_index, batch_size=valid_batch_size)

Model is unable to Predict due to a type error, applying a model on UTF-8 encoded URDU Dataset

I am trying to run an algorithm on an URDU dataset, I have a Logistic regression model for english but runs in to errors when I try to use it on Urdu Language UTF-8 encoded dataset.
I have applied the code given below on the problem.
import pandas as pd
import re
from nltk.corpus import stopwords
import nltk
import logging
from gensim.models import word2vec
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.preprocessing import StandardScaler
from sklearn.feature_extraction.text import TfidfVectorizer
from scipy.sparse import hstack
from scipy.sparse import coo_matrix
from tqdm import tqdm
from scipy import sparse
import numpy
#import score
import re, nltk, scipy
#import gensim
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier,BaggingClassifier,VotingClassifier,AdaBoostClassifier,GradientBoostingClassifier
from sklearn.model_selection import KFold
dataset1 = pd.read_csv("fakenews.csv", encoding = 'UTF-8')
ds = dataset1.copy()
print(ds.shape)
df = ds.copy()
df = ds.copy()
df.drop('FileName',axis=1,inplace = True)
df.drop('label',axis=1,inplace = True)
da = ds.copy()
da.drop('FileName',axis=1,inplace = True)
da.drop('title',axis=1,inplace = True)
da.drop('text',axis=1,inplace = True)
def extract_word_overlap(title, text):
word_overlap = []
for i, (title, body) in tqdm(enumerate(zip(title, text))):
#preprocess_headline = preprocess(headline)
#preprocess_body = preprocess(body)
features = len(set(title).intersection(text)) / float(len(set(title).union(text)))
word_overlap.append(features)
word_overlap_sparse = scipy.sparse.coo_matrix(numpy.array(word_overlap))
return word_overlap_sparse
def combine_features(tfidf_vectors, word_overlap):
combined_features = sparse.bmat([[tfidf_vectors, word_overlap.T]])
return combined_features
from sklearn.model_selection import train_test_split
x_train, x_test,y_train,y_test = train_test_split(df,da, test_size = 0.3,random_state=42)
training_bodies = x_train['text']
training_headlines = x_train['title']
test_bodies = x_test['text']
test_headlines = x_test['title']
print("\t-Extracting tfidf vectors..")
body_vectorizer = TfidfVectorizer(ngram_range=(1, 2))
bodies_tfidf = body_vectorizer.fit_transform(training_bodies)
headline_vectorizer = TfidfVectorizer(ngram_range=(1, 2))
headlines_tfidf = headline_vectorizer.fit_transform(training_headlines)
bodies_tfidf_test = body_vectorizer.transform(test_bodies)
headlines_tfidf_test = headline_vectorizer.transform(test_headlines)
training_tfidf = scipy.sparse.hstack([bodies_tfidf, headlines_tfidf])
test_tfidf = scipy.sparse.hstack([bodies_tfidf_test, headlines_tfidf_test])
training_overlap = extract_word_overlap(training_headlines, training_bodies)
test_overlap = extract_word_overlap(test_headlines, test_bodies)
training_features = combine_features(training_tfidf, training_overlap)
test_features = combine_features(test_tfidf, test_overlap)
print("[3] Fitting model..")
print("\t-Logistic Regression")
lr = LogisticRegression(C = 1.0, class_weight='balanced', solver="lbfgs", max_iter=150)
y_pred = lr.fit(training_features, y_train).predict(test_features)
from sklearn import metrics
from sklearn.metrics import classification_report, accuracy_score, f1_score
print('classification report:')
print(classification_report(y_test, y_pred))
score = metrics.accuracy_score(y_test, y_pred)
print("Accuracy of Regression: %0.3f" % score)
print('Macro f1:', f1_score(y_test, y_pred, average='macro'))
print("[4] Evaluating model..")
score.report_score(y_test, y_pred)
print(confusion_matrix(y_test, y_pred))
"""
i expected the model to predict but it give me a type error. TypeError: '<' not supported between instances of 'float' and 'str'
761: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
y = column_or_1d(y, warn=True)
Traceback (most recent call last):
File "<ipython-input-1-800030f783cf>", line 1, in <module>
runfile('C:/Users/door/Desktop/af/pycode.py', wdir='C:/Users/door/Desktop/af')
File "C:\Users\door\Anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 704, in runfile
execfile(filename, namespace)
File "C:\Users\door\Anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 108, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "C:/Users/door/Desktop/af/pycode.py", line 101, in <module>
y_pred = lr.fit(training_features, y_train).predict(test_features)
File "C:\Users\door\Anaconda3\lib\site-packages\sklearn\linear_model\logistic.py", line 1286, in fit
check_classification_targets(y)
File "C:\Users\door\Anaconda3\lib\site-packages\sklearn\utils\multiclass.py", line 168, in check_classification_targets
y_type = type_of_target(y)
File "C:\Users\door\Anaconda3\lib\site-packages\sklearn\utils\multiclass.py", line 287, in type_of_target
if (len(np.unique(y)) > 2) or (y.ndim >= 2 and len(y[0]) > 1):
File "C:\Users\door\Anaconda3\lib\site-packages\numpy\lib\arraysetops.py", line 233, in unique
ret = _unique1d(ar, return_index, return_inverse, return_counts)
File "C:\Users\door\Anaconda3\lib\site-packages\numpy\lib\arraysetops.py", line 281, in _unique1d
ar.sort()
TypeError: '<' not supported between instances of 'float' and 'str'
Dataset i am using looks like this
FileName title text label
0001a میں استعفیٰ نہیں دے رہا فیاض الحسن چوہان "صوبائی وزیر اطلاعات فیاض الحسن چوہان نے کہا ہے کہ میں استعفیٰ نہیں دے رہا اور نہ مجھ سے استعفیٰ مانگا گیا ہے مجھے محتاط بیان دینے کا کہا گیا ہے
اور میں نے اپنی بیان میں ہندو مذہب یا ہندو برادری کو نہیں بلکہ بھارتی فوج اور بھارتی میڈیا کو مخاطب کیا تھا" Agree

Training Dataset - ValueError: Unknown label type: 'continuous'

I am trying to normalize and train my dataset. However, I keep getting this error and I don't know the cause of it. I am experimenting with the different preprocessing types and models to see what works best for the dataset. df_norm is of type "numpy.float64" and I read that converting it into an int would work but the methodology is questionable. Is the issue the fact that the values are floats? If so, is there a practical method to fix this without it being 'questionable'? Thanks in advance.
Code:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import preprocessing, linear_model, svm
from sklearn.model_selection import train_test_split
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
raw_df = pd.read_csv('parkinsons_updrs.data.txt', index_col=False)
#Check for missing data
#print(pd.isnull(raw_df).sum())
#Grouping the patients by subject #
df_mean = pd.DataFrame()
group = raw_df.groupby('subject#')
for patient, medical_data in group:
#print(patient)
#print(medical_data)
df_mean = df_mean.append(medical_data.agg(np.mean), ignore_index=True)
df_mean.set_index('subject#', inplace=True)
#df_mean.to_html('Parkinsons Patients Mean Data.html')
#Data Scaling
#Normalization
df_norm = preprocessing.normalize(df_mean)
cols = df_mean.columns.values
df_norm = pd.DataFrame(df_norm, columns=cols)
labels_norm = df_norm.pop('total_UPDRS')
#Label Encoding
df_le = pd.DataFrame()
le = preprocessing.LabelEncoder()
for col in df_mean.columns.values:
le.fit(df_mean[col])
df_le[col] = le.transform(df_mean[col])
labels_le = df_le.pop('total_UPDRS')
#Split the data
x_train, x_test, y_train, y_test = train_test_split(df_norm, labels_norm, test_size=0.2, random_state=0)
#Make the Model - Logistic Regression
log_regr = linear_model.LogisticRegression()
log_regr.fit(x_train, y_train)
#Predict
y_pred_norm = log_regr.predict(x_test)
correct = 0
for i in range(len(y_pred_norm)):
if y_pred_norm[i] == y_test.iloc[i]:
correct += 1
print('Normalized Accuracy: ', correct / len(y_pred_norm))
Error:
Warning (from warnings module):
File "C:\Users\andre\AppData\Local\Programs\Python\Python37\lib\site-packages\sklearn\linear_model\logistic.py", line 433
FutureWarning)
FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
Traceback (most recent call last):
File "C:/Users/andre/AppData/Local/Programs/Python/Python37/Machine Learning/Parkinsons Telemonitoring Data/Parkinsons Telemonitoring Data - Attempt 2.py", line 112, in <module>
log_regr.fit(x_train, y_train)
File "C:\Users\andre\AppData\Local\Programs\Python\Python37\lib\site-packages\sklearn\linear_model\logistic.py", line 1289, in fit
check_classification_targets(y)
File "C:\Users\andre\AppData\Local\Programs\Python\Python37\lib\site-packages\sklearn\utils\multiclass.py", line 171, in check_classification_targets
raise ValueError("Unknown label type: %r" % y_type)
ValueError: Unknown label type: 'continuous'

Convert string to float error in pandas machine learning

For my machine learning code, I have some unknown values with '?' in my csv file. So, I am trying to replace them with 'Nan' but it throws some error. The following code is for the replacement of '?' that I have used. Can anyone please solve this?
Thanks in advance !
import numpy
import pandas as pd
import matplotlib as plot
import numpy as np
df = pd.read_csv('cdk.csv')
x=df.iloc[:,0:24].values
y=df.iloc[:,24].values
from sklearn.preprocessing import Imputer
imputer = Imputer(missing_values='NaN', strategy='most_frequent', axis =0,copy=False)
imputer = imputer.fit(x[:,0:5])
imputer.fit_transform(x[:,0:5])
imputer = Imputer(missing_values='normal', strategy='mode', axis =0,copy=False)
imputer = imputer.fit(x[:,5:7])
imputer.fit_transform(x[:,5:7])
This is what error it throws,
Traceback (most recent call last):
File "kidney.py", line 10, in <module>
imputer = imputer.fit(x[:,0:5])
File "C:\Users\YAASHI\AppData\Local\Programs\Python\Python36\lib\site-packages\sklearn\preprocessing\imputation.py", line 155, in fit
force_all_finite=False)
File "C:\Users\YAASHI\AppData\Local\Programs\Python\Python36\lib\site-packages\sklearn\utils\validation.py", line 433, in check_array
array = np.array(array, dtype=dtype, order=order, copy=copy)
ValueError: could not convert string to float: '?'
Link for the csv file
If you want to replace all ? strings with NaN, do this:
df.replace('?', np.nan, inplace=True)
Or better yet, load them as NaN as you read the CSV:
df = pd.read_csv('cdk.csv', na_values=['?'])

Scikit Learn OpenCV SVM IndexError: list index out of range

I'm training an SVM based on features extracted from a pictures dataset.
The code goes as follows:
import os
import sys
import argparse
import pickle as cPickle
import numpy as np
from sklearn.multiclass import OneVsOneClassifier
from sklearn.svm import LinearSVC
from sklearn import preprocessing
from sklearn.svm import SVC
from sklearn.utils import check_random_state
def build_arg_parser():
parser = argparse.ArgumentParser(description='Trains the classifier models')
parser.add_argument("--feature-map-file", dest="feature_map_file", required=True,
help="Input pickle file containing the feature map")
parser.add_argument("--svm-file", dest="svm_file", required=False,
help="Output file where the pickled SVM model will be stored")
return parser
class ClassifierTrainer(object):
def __init__(self, X, label_words):
self.le = preprocessing.LabelEncoder()
self.clf = OneVsOneClassifier(LinearSVC(random_state=0))
y = self._encodeLabels(label_words)
X = np.asarray(X)
self.clf.fit(X, y)
def _fit(self, X):
X = np.asarray(X)
return self.clf.predict(X)
def _encodeLabels(self, labels_words):
self.le.fit(labels_words)
return np.array(self.le.transform(labels_words), dtype=np.float32)
def classify(self, X):
labels_nums = self._fit(X)
labels_words = self.le.inverse_transform([int(x) for x in labels_nums])
return labels_words
if __name__=='__main__':
args = build_arg_parser().parse_args()
feature_map_file = args.feature_map_file
svm_file = args.svm_file
# Load the feature map
with open(feature_map_file, 'rb') as f:
feature_map = cPickle.load(f)
# Extract feature vectors and the labels
labels_words = [x['label'] for x in feature_map]
dim_size = feature_map[0]['feature_vector'].shape[1]
X = [np.reshape(x['feature_vector'], (dim_size,)) for x in feature_map]
# Train the SVM
svm = ClassifierTrainer(X, labels_words)
if args.svm_file:
with open(args.svm_file, 'wb') as f:
cPickle.dump(svm, f)
This is the error that the system throws:
Traceback (most recent call last):
File "training.py", line 59, in <module>
svm = ClassifierTrainer(X, labels_words)
File "training.py", line 29, in __init__
self.clf.fit(X, y)
File "/home/pi/.virtualenvs/cv/lib/python3.4/site-packages/sklearn/multiclass.py", line 496, in fit
self.estimators_ = estimators_indices[0]
IndexError: list index out of range
Any ideas what am I doing wrong? It seems there is a problem with the multiclass.py in Python site packages.

Resources