AttributeError: 'module' object has no attribute 'cuda' - keras

I was trying to run this repository: https://github.com/WaqasSultani/AnomalyDetectionCVPR2018
In the Test_Anomaly_Detector_public.py I am stuck with error:theano.sandbox.cuda.use('gpu0')
AttributeError: 'module' object has no attribute 'cuda'.
I am using theano as backend
This is Test_Anomaly_Detector_public.py:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.regularizers import l2
from keras.optimizers import SGD ,Adagrad
from scipy.io import loadmat, savemat
from keras.models import model_from_json
import theano.tensor as T
import theano
import csv
import ConfigParser
import collections
import time
import csv
import os
from os import listdir
import skimage.transform
from skimage import color
from os.path import isfile, join
import numpy as np
import numpy
from datetime import datetime
from scipy.spatial.distance import cdist,pdist,squareform
import theano.sandbox
import shutil
theano.sandbox.cuda.use('gpu0')
seed = 7
numpy.random.seed(seed)
def load_model(json_path): # Function to load the model
model = model_from_json(open(json_path).read())
return model
def load_weights(model, weight_path): # Function to load the model weights
dict2 = loadmat(weight_path)
dict = conv_dict(dict2)
i = 0
for layer in model.layers:
weights = dict[str(i)]
layer.set_weights(weights)
i += 1
return model
def conv_dict(dict2):
i = 0
dict = {}
for i in range(len(dict2)):
if str(i) in dict2:
if dict2[str(i)].shape == (0, 0):
dict[str(i)] = dict2[str(i)]
else:
weights = dict2[str(i)][0]
weights2 = []
for weight in weights:
if weight.shape in [(1, x) for x in range(0, 5000)]:
weights2.append(weight[0])
else:
weights2.append(weight)
dict[str(i)] = weights2
return dict
# Load Video
def load_dataset_One_Video_Features(Test_Video_Path):
VideoPath =Test_Video_Path
f = open(VideoPath, "r")
words = f.read().split()
num_feat = len(words) / 4096
# Number of features per video to be loaded. In our case num_feat=32, as we divide the video into 32 segments. Note that
# we have already computed C3D features for the whole video and divided the video features into 32 segments.
count = -1;
VideoFeatues = []
for feat in xrange(0, num_feat):
feat_row1 = np.float32(words[feat * 4096:feat * 4096 + 4096])
count = count + 1
if count == 0:
VideoFeatues = feat_row1
if count > 0:
VideoFeatues = np.vstack((VideoFeatues, feat_row1))
AllFeatures = VideoFeatues
return AllFeatures
print("Starting testing...")
AllTest_Video_Path = '/newdata/UCF_Anomaly_Dataset/Dataset/CVPR_Data/C3D_Complete_Video_txt/Test/'
# AllTest_Video_Path contains C3D features (txt file) of each video. Each file contains 32 features, each of 4096 dimensions.
Results_Path = '../Eval_Res/'
# Results_Path is the folder where you can save your results
Model_dir='../Trained_AnomalyModel/'
# Model_dir is the folder where we have placed our trained weights
weights_path = Model_dir + 'weights_L1L2.mat'
# weights_path is Trained model weights
model_path = Model_dir + 'model.json'
if not os.path.exists(Results_Path):
os.makedirs(Results_Path)
All_Test_files= listdir(AllTest_Video_Path)
All_Test_files.sort()
model=load_model(model_path)
load_weights(model, weights_path)
nVideos=len(All_Test_files)
time_before = datetime.now()
for iv in range(nVideos):
Test_Video_Path = os.path.join(AllTest_Video_Path, All_Test_files[iv])
inputs=load_dataset_One_Video_Features(Test_Video_Path) # 32 segments features for one testing video
predictions = model.predict_on_batch(inputs) # Get anomaly prediction for each of 32 video segments.
aa=All_Test_files[iv]
aa=aa[0:-4]
A_predictions_path = Results_Path + aa + '.mat' # Save array of 1*32, containing anomaly score for each segment. Please see Evaluate Anomaly Detector to compute ROC.
print "Total Time took: " + str(datetime.now() - time_before)
My .theanorc file:
[global]
floatX = float32
device = cuda0
[gpuarray]
preallocate = 1

You can comment out this line. When you run please follow this
THEANO_FLAGS=mode=FAST_RUN,device=cuda0,floatX=float32 python [...]

Related

Get support and ranking attributes for RFE using Pipeline in Python 3

The code I have so far is below and it works perfectly. However, I would like to print the following RFE attributes for each number of features tested: "rfe.support_[i]", "rfe.ranking_[i]" and the name of the selected features since "i" refers to the index, the first attribute returns True or False (if the columns were selected or not) and the second one returns their respective rankings.
In other words, I would like to print the columns considered in each RFE and that they do not remain as something abstract.
# Explore the number of selected features for RFE
from numpy import mean
from numpy import std
from sklearn.model_selection import RepeatedKFold, cross_val_score, GridSearchCV
from sklearn.feature_selection import RFE
from sklearn.tree import DecisionTreeRegressor
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from category_encoders import OneHotEncoder
from sklearn.compose import ColumnTransformer
# Get the dataset
def get_dataset(df, target):
X, y = df.drop(columns = target), df[[target]].values.flatten()
return X, y
# Get a list of models to evaluate
def get_models(list_num_cols, list_cat_cols):
num_transformer = Pipeline(steps = [('num_imputer', SimpleImputer(strategy = 'median'))])
cat_transformer = Pipeline(steps = [('cat_imputer', SimpleImputer(strategy = 'most_frequent')),
('one-hot-encoder', OneHotEncoder())])
preprocessor = ColumnTransformer(transformers = [('num', num_transformer, list_num_cols),
('cat', cat_transformer, list_cat_cols)])
models = dict()
for i in range(2, 4):
rfe_dtr = RFE(estimator = DecisionTreeRegressor(), n_features_to_select = i)
model_dtr = DecisionTreeRegressor()
models['DecisionTreeRegressor_' + str(i)] = Pipeline(steps = [('preprocessor', preprocessor),
('s_dtr', rfe_dtr),
('m_dtr', model_dtr)])
return models
# Evaluate a give model using cross-validation
def evaluate_model(model, X, y):
cv = RepeatedKFold(n_splits = 10, n_repeats = 3, random_state = 7)
scores = cross_val_score(model, X, y, scoring = 'neg_mean_absolute_error', cv = cv,
n_jobs = -1, error_score = 'raise')
return scores
# Define the dataset
X, y = get_dataset(my_df, 'my_target') # It begins here
# Get the models to evaluate
models = get_models(X.select_dtypes(include = 'number').columns.tolist(),
X.select_dtypes(include = 'object').columns.tolist())
# Evaluate the models and store results
results, names = list(), list()
for name, model in models.items():
scores = evaluate_model(model, X, y)
results.append(scores)
names.append(name)
print('%s %.3f (%.3f)' % (name, mean(scores), std(scores)))
The following is returning errors:
models['DecisionTreeRegressor_2'].named_steps['s_dtr'].support_[0] # Returns: AttributeError: 'RFE' object has no attribute 'support_'
models['DecisionTreeRegressor_2'].named_steps['s_dtr'].ranking_[0] # Returns: AttributeError: 'RFE' object has no attribute 'ranking_'
Point is that you haven't explicitly fitted the 'DecisionTreeRegressor_2' pipeline.
Indeed, though cross_val_score already takes care of fitting the estimator as you might see here, cross_val_score does not return the estimator instance, as .fit() method does. Therefore you're not able to access the RFE instance attributes.
Here's a toy example from your setting:
from sklearn.feature_selection import RFE
from sklearn.tree import DecisionTreeRegressor
from sklearn.pipeline import Pipeline
from sklearn.datasets import make_regression
X, y = make_regression()
models = dict()
for i in range(2, 4):
rfe_dtr = RFE(estimator = DecisionTreeRegressor(), n_features_to_select = i)
model_dtr = DecisionTreeRegressor()
models['DecisionTreeRegressor_' + str(i)] = Pipeline(
[
('s_dtr', rfe_dtr),
('m_dtr', model_dtr)
])
models['DecisionTreeRegressor_2'].named_steps['s_dtr'].support_ # this does not work
You might see, instead, that after fitting your model, you'll be able to access the support_ and ranking_ attributes:
models['DecisionTreeRegressor_2'].fit(X,y)
models['DecisionTreeRegressor_2'].named_steps['s_dtr'].support_ # this works
models['DecisionTreeRegressor_2'].named_steps['s_dtr'].ranking_ # this works
I answered the question. I'm posting it in case it can help someone. It consists of using "cross_validate", instead of "cross_val_score", with the option "return_estimator = True" to be able to retrieve the pipelines in the different folds and RFE, and access them by index. Then you can use "named_steps".
# Explore the number of selected features for RFE
from numpy import mean
from numpy import std
from sklearn.model_selection import RepeatedKFold, cross_val_score, GridSearchCV
from sklearn.feature_selection import RFE
from sklearn.tree import DecisionTreeRegressor
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from category_encoders import OneHotEncoder
from sklearn.compose import ColumnTransformer
# Get the dataset
def get_dataset(df, target):
X, y = df.drop(columns = target), df[[target]].values.flatten()
return X, y
# Get a list of models to evaluate
def get_models(list_num_cols, list_cat_cols):
num_transformer = Pipeline(steps = [('num_imputer', SimpleImputer(strategy = 'median'))])
cat_transformer = Pipeline(steps = [('cat_imputer', SimpleImputer(strategy = 'most_frequent')),
('one-hot-encoder', OneHotEncoder())])
preprocessor = ColumnTransformer(transformers = [('num', num_transformer, list_num_cols),
('cat', cat_transformer, list_cat_cols)])
models = dict()
for i in range(2, 4):
rfe_dtr = RFE(estimator = DecisionTreeRegressor(), n_features_to_select = i)
model_dtr = DecisionTreeRegressor()
models['DecisionTreeRegressor_' + str(i)] = Pipeline(steps = [('preprocessor', preprocessor),
('s_dtr', rfe_dtr),
('m_dtr', model_dtr)])
return models
# Evaluate a give model using cross-validation
def evaluate_model(model, X, y):
cv = RepeatedKFold(n_splits = 10, n_repeats = 3, random_state = 7)
output = cross_validate(model, X, y, scoring = 'neg_mean_absolute_error', cv = cv,
n_jobs = -1, error_score = 'raise', return_estimator = True)
return output
# Define the dataset
X, y = get_dataset(my_df, 'my_target') # It begins here
# Get the models to evaluate
models = get_models(X.select_dtypes(include = 'number').columns.tolist(),
X.select_dtypes(include = 'object').columns.tolist())
# Evaluate the models and store results
results, names = list(), list()
for name, model in models.items():
output = evaluate_model(model, X, y)
results.append(output['test_score'])
names.append(name)
print('%s %.3f (%.3f)' % (name, mean(output['test_score']), std(output['test_score'])))
print(output)
print(output['estimator'][0].named_steps['s_dtr'].support_)
print(output['estimator'][0].named_steps['s_dtr'].ranking_)
print(output['estimator'][0].named_steps['s_dtr'].support_[2])
print(output['estimator'][0].named_steps['s_dtr'].ranking_[2])

Loading pickle NotFittedError: TfidfVectorizer - Vocabulary wasn't fitted

multilabel classification
I am trying to predict a multilabel classification using scikit-learn/pandas/OneVsRestClassifier/logistic regression. Building and evaluating the model works but attempting to classify new sample text does not.
scenario 1:
Once I build a model saved the model with the name(sample.pkl) and restarting my kernel, but when I load the saved model(sample.pkl) during prediction on sample text getting its giving error:
NotFittedError: TfidfVectorizer - Vocabulary wasn't fitted.
I build the model and evaluate the model and i save it the model wtith the name sample.pkl. i restrat my kernal then i load the model making prediction on sample text NotFittedError: TfidfVectorizer - Vocabulary wasn't fitted
inference
import pickle,os
import collections
import numpy as np
import pandas as pd
import seaborn as sns
from tqdm import tqdm
import matplotlib.pyplot as plt
from collections import Counter
from nltk.corpus import stopwords
import json, nltk, re, csv, pickle
from sklearn.metrics import f1_score # performance matrix
from sklearn.multiclass import OneVsRestClassifier # binary relavance
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
stop_words = set(stopwords.words('english'))
def cleanHtml(sentence):
'''' remove the tags '''
cleanr = re.compile('<.*?>')
cleantext = re.sub(cleanr, ' ', str(sentence))
return cleantext
def cleanPunc(sentence):
''' function to clean the word of any
punctuation or special characters '''
cleaned = re.sub(r'[?|!|\'|"|#]',r'',sentence)
cleaned = re.sub(r'[.|,|)|(|\|/]',r' ',cleaned)
cleaned = cleaned.strip()
cleaned = cleaned.replace("\n"," ")
return cleaned
def keepAlpha(sentence):
""" keep the alpha sentenes """
alpha_sent = ""
for word in sentence.split():
alpha_word = re.sub('[^a-z A-Z]+', ' ', word)
alpha_sent += alpha_word
alpha_sent += " "
alpha_sent = alpha_sent.strip()
return alpha_sent
def remove_stopwords(text):
""" remove stop words """
no_stopword_text = [w for w in text.split() if not w in stop_words]
return ' '.join(no_stopword_text)
test1 = pd.read_csv("C:\\Users\\abc\\Downloads\\test1.csv")
test1.columns
test1.head()
siNo plot movie_name genre_new
1 The story begins with Hannah... sing [drama,teen]
2 Debbie's favorite band is Dream.. the bigeest fan [drama]
3 This story of a Zulu family is .. come back,africa [drama,Documentary]
getting Error
I am getting the error here when iam inference on sample text
def infer_tags(q):
q = cleanHtml(q)
q = cleanPunc(q)
q = keepAlpha(q)
q = remove_stopwords(q)
multilabel_binarizer = MultiLabelBinarizer()
tfidf_vectorizer = TfidfVectorizer()
q_vec = tfidf_vectorizer.transform([q])
q_pred = clf.predict(q_vec)
return multilabel_binarizer.inverse_transform(q_pred)
for i in range(5):
print(i)
k = test1.sample(1).index[0]
print("Movie: ", test1['movie_name'][k], "\nPredicted genre: ", infer_tags(test1['plot'][k])), print("Actual genre: ",test1['genre_new'][k], "\n")
solved
I solved the i save tfidf and multibiniraze into pickle model
from sklearn.externals import joblib
pickle.dump(tfidf_vectorizer, open("tfidf_vectorizer.pickle", "wb"))
pickle.dump(multilabel_binarizer, open("multibinirizer_vectorizer.pickle", "wb"))
vectorizer = joblib.load('/abc/downloads/tfidf_vectorizer.pickle')
multilabel_binarizer = joblib.load('/abc/downloads/multibinirizer_vectorizer.pickle')
def infer_tags(q):
q = cleanHtml(q)
q = cleanPunc(q)
q = keepAlpha(q)
q = remove_stopwords(q)
q_vec = vectorizer .transform([q])
q_pred = rf_model.predict(q_vec)
return multilabel_binarizer.inverse_transform(q_pred)
i go though the below link i got the solution
,How do I store a TfidfVectorizer for future use in scikit-learn?>
This happens because you are only dumping the classifier into the pickle and not the vectorizer.
During inference, when you call
tfidf_vectorizer = TfidfVectorizer()
, your vectorizer is not fitted on the training vocabulary, which is giving the error.
What you should do is, dump both the classifier and the vectorizer to pickle. Load them both during inference.

How to train an image similarity model on 20 millions images(total size 10GB)?

My system is configured with 16GB RAM. I have tried to train image similarity model on 20 millions images(total size 10GB) using VGG19 and KNN's nearest neighbor. When tried to read images i am getting Memory error. Even I have tried to train model on 200000(total size 770MB) but issue is same. How I can read millions of images to train ML models.
Ubuntu 18.04.2 LTS,Core™ i7,Intel® HD Graphics 5500 (Broadwell GT2), 64-bit, 16GB RAM
import os
import skimage.io
import tensorflow as tf
from skimage.transform import resize
import numpy as np
from sklearn.neighbors import NearestNeighbors
import matplotlib.pyplot as plt
from matplotlib import offsetbox
from matplotlib.offsetbox import OffsetImage, AnnotationBbox
from sklearn import manifold
import pickle
skimage.io.use_plugin('matplotlib')
dirPath = 'train_data'
args = [os.path.join(dirPath, filename) for filename in os.listdir(dirPath)]
imgs_train = [skimage.io.imread(arg, as_gray=False) for arg in args]
shape_img = (130, 130, 3)
model = tf.keras.applications.VGG19(weights='imagenet', include_top=False,
input_shape=shape_img)
model.summary()
shape_img_resize = tuple([int(x) for x in model.input.shape[1:]])
input_shape_model = tuple([int(x) for x in model.input.shape[1:]])
output_shape_model = tuple([int(x) for x in model.output.shape[1:]])
n_epochs = None
def resize_img(img, shape_resized):
img_resized = resize(img, shape_resized,
anti_aliasing=True,
preserve_range=True)
assert img_resized.shape == shape_resized
return img_resized
def normalize_img(img):
return img / 255.
def transform_img(img, shape_resize):
img_transformed = resize_img(img, shape_resize)
img_transformed = normalize_img(img_transformed)
return img_transformed
def apply_transformer(imgs, shape_resize):
imgs_transform = [transform_img(img, shape_resize) for img in imgs]
return imgs_transform
imgs_train_transformed = apply_transformer(imgs_train, shape_img_resize)
X_train = np.array(imgs_train_transformed).reshape((-1,) + input_shape_model)
E_train = model.predict(X_train)
E_train_flatten = E_train.reshape((-1, np.prod(output_shape_model)))
knn = NearestNeighbors(n_neighbors=5, metric="cosine")
knn.fit(E_train_flatten)
Knowing that keras is working well with generator, you should consider using one:
python generator tutorial,
using a generator with keras (example)
It allows you to load your image during your training, batch by batch.

IndexError: index 2 is out of bounds for axis 1 with size 2

I am receiving an error Index is out of bounds in my line doctopic = clf.fit_transform(dtm) and in my Data folder I have two CSV files and could someone explain how to fix this Index error.
import os
print (os.getcwd())
import numpy as np
import langdetect
from stop_words import get_stop_words
CORPUS_PATH = os.path.join('C:\\Users\\mike120\\Downloads\\TM 09-25\\Data')
filenames = sorted([os.path.join(CORPUS_PATH, fn) for fn in
os.listdir(CORPUS_PATH)])
len(filenames)
filenames[:5]
import sklearn.feature_extraction.text as text
#lang = langdetect.detect(CORPUS_PATH)
lang_stop = get_stop_words('en')
vectorizer = text.CountVectorizer(input='filename', stop_words=lang_stop, min_df=2)
dtm = vectorizer.fit_transform(filenames).toarray()
vocab = np.array(vectorizer.get_feature_names())
dtm.shape
from sklearn import decomposition
num_topics = 20
num_top_words = 20
clf = decomposition.NMF(n_components=num_topics, random_state=1)
doctopic = clf.fit_transform(dtm)

Scikit Learn OpenCV SVM IndexError: list index out of range

I'm training an SVM based on features extracted from a pictures dataset.
The code goes as follows:
import os
import sys
import argparse
import pickle as cPickle
import numpy as np
from sklearn.multiclass import OneVsOneClassifier
from sklearn.svm import LinearSVC
from sklearn import preprocessing
from sklearn.svm import SVC
from sklearn.utils import check_random_state
def build_arg_parser():
parser = argparse.ArgumentParser(description='Trains the classifier models')
parser.add_argument("--feature-map-file", dest="feature_map_file", required=True,
help="Input pickle file containing the feature map")
parser.add_argument("--svm-file", dest="svm_file", required=False,
help="Output file where the pickled SVM model will be stored")
return parser
class ClassifierTrainer(object):
def __init__(self, X, label_words):
self.le = preprocessing.LabelEncoder()
self.clf = OneVsOneClassifier(LinearSVC(random_state=0))
y = self._encodeLabels(label_words)
X = np.asarray(X)
self.clf.fit(X, y)
def _fit(self, X):
X = np.asarray(X)
return self.clf.predict(X)
def _encodeLabels(self, labels_words):
self.le.fit(labels_words)
return np.array(self.le.transform(labels_words), dtype=np.float32)
def classify(self, X):
labels_nums = self._fit(X)
labels_words = self.le.inverse_transform([int(x) for x in labels_nums])
return labels_words
if __name__=='__main__':
args = build_arg_parser().parse_args()
feature_map_file = args.feature_map_file
svm_file = args.svm_file
# Load the feature map
with open(feature_map_file, 'rb') as f:
feature_map = cPickle.load(f)
# Extract feature vectors and the labels
labels_words = [x['label'] for x in feature_map]
dim_size = feature_map[0]['feature_vector'].shape[1]
X = [np.reshape(x['feature_vector'], (dim_size,)) for x in feature_map]
# Train the SVM
svm = ClassifierTrainer(X, labels_words)
if args.svm_file:
with open(args.svm_file, 'wb') as f:
cPickle.dump(svm, f)
This is the error that the system throws:
Traceback (most recent call last):
File "training.py", line 59, in <module>
svm = ClassifierTrainer(X, labels_words)
File "training.py", line 29, in __init__
self.clf.fit(X, y)
File "/home/pi/.virtualenvs/cv/lib/python3.4/site-packages/sklearn/multiclass.py", line 496, in fit
self.estimators_ = estimators_indices[0]
IndexError: list index out of range
Any ideas what am I doing wrong? It seems there is a problem with the multiclass.py in Python site packages.

Resources