Explaining LSTM keras with Eli5 library - keras

I'm trying to use Eli5 for explaining an LSTM keras model for time series prediction. The keras model receives as input an array with shape (nsamples, timesteps, nfeatures).
This is my code:
def baseline_model():
model = Sequential()
model.add(LSTM(32, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dropout(0.2))
model.add(Dense(1))
model.compile(loss='logcosh', optimizer='adam')
return model
from keras.wrappers.scikit_learn import KerasClassifier, KerasRegressor
import eli5
from eli5.sklearn import PermutationImportance
my_model = KerasRegressor(build_fn= baseline_model, nb_epoch= 30, batch_size= 32, verbose= False)
history = my_model.fit(X_train, y_train)
So far, everything is ok. The problem is when I execute the following line that launchs an error:
# X_train has a shape equal to (nsamples, timesteps, nfeatures) and y_train has a shape (nsamples)
perm = PermutationImportance(my_model, random_state=1).fit(X_train, y_train)
Error:
ValueError Traceback (most recent call last)
in ()
2 d2_train_dataset = X_train.reshape((nsamples, timesteps * features))
3
----> 4 perm = PermutationImportance(my_model, random_state=1).fit(X_train, y_train)
5 #eli5.show_weights(perm, feature_names = X.columns.tolist())
~/anaconda3/lib/python3.6/site-packages/eli5/sklearn/permutation_importance.py in fit(self, X, y, groups, **fit_params)
183 self.estimator_.fit(X, y, **fit_params)
184
--> 185 X = check_array(X)
186
187 if self.cv not in (None, "prefit"):
~/anaconda3/lib/python3.6/site-packages/sklearn/utils/validation.py in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)
568 if not allow_nd and array.ndim >= 3:
569 raise ValueError("Found array with dim %d. %s expected <= 2."
--> 570 % (array.ndim, estimator_name))
571 if force_all_finite:
572 _assert_all_finite(array,
ValueError: Found array with dim 3. Estimator expected <= 2.
What can I do to fix this error? How can I use eli5 with my LSTM Keras Model?

Related

assertion failed: [Condition x == y did not hold element-wise:]

I have built a BiLSTM model with an attention layer for sentence classification task but I am getting an error that my assertion has failed due to mismatch in number of parameters. The attention layer code is here and the error is below the code.
class attention(Layer):
def __init__(self, return_sequences=True):
self.return_sequences = return_sequences
super(attention,self).__init__()
def build(self, input_shape):
self.W=self.add_weight(name="att_weight", shape=(input_shape[-1],1),
initializer="normal")
self.b=self.add_weight(name="att_bias", shape=(input_shape[1],1),
initializer="zeros")
super(attention,self).build(input_shape)
def call(self, x):
e = K.tanh(K.dot(x,self.W)+self.b)
a = K.softmax(e, axis=1)
output = x*a
if self.return_sequences:
return output
return K.sum(output, axis=1)
When i am training the model with attention layer included, it is giving an error that assertion failed.
Epoch 1/10
---------------------------------------------------------------------------
InvalidArgumentError Traceback (most recent call last)
<ipython-input-45-ac310033130c> in <module>()
1 #Early stopping, Adam, dropout = 0.3, 0.5, 0.5
2 #history = model.fit(sequences_matrix, Y_train, batch_size=256, epochs=5, validation_split=0.1, callbacks=[EarlyStopping(monitor='val_loss', min_delta=0.0001)])
----> 3 history = model.fit(sequences_matrix, Y_train, batch_size=32, epochs=10, validation_split=0.1)
8 frames
/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
58 ctx.ensure_initialized()
59 tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
---> 60 inputs, attrs, num_outputs)
61 except core._NotOkStatusException as e:
62 if name is not None:
InvalidArgumentError: assertion failed: [Condition x == y did not hold element-wise:] [x (sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/Shape_1:0) = ] [32 1] [y (sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/strided_slice:0) = ] [32 758]
[[node sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/assert_equal_1/Assert/Assert (defined at <ipython-input-45-ac310033130c>:3) ]] [Op:__inference_train_function_19854]
Function call stack:
train_function
My model is
model = Sequential()
model.add(Embedding(max_words, 768, input_length=max_len, weights=[embedding]))
model.add(BatchNormalization())
model.add(Activation('tanh'))
model.add(SpatialDropout1D(0.1))
model.add(Conv1D(16, kernel_size=11, activation='relu'))
model.add(Bidirectional(LSTM(16, return_sequences=True)))
model.add(attention(return_sequences=True))
model.add(BatchNormalization())
model.add(Activation('tanh'))
model.add(Dropout(0.2))
model.add(Dense(2, activation='softmax', use_bias=True, kernel_regularizer=regularizers.l1_l2(l1=1e-5, l2=1e-4), bias_regularizer=regularizers.l2(1e-4),
activity_regularizer=regularizers.l2(1e-5)))
model.summary()
Shape of Y_train is
max_words = 48369
max_len = 768
tok = Tokenizer(num_words = max_words)
tok.fit_on_texts(X_train)
sequences = tok.texts_to_sequences(X_train)
sequences_matrix = sequence.pad_sequences(sequences, maxlen = max_len)
Y_train = np.array(Y_train)
Y_test = np.array(Y_test)
print(Y_train.shape)
(43532, 1)
your target is in 2D so you need to set return_sequences=False in the last attention layer in order to return output in 2D format
Add flatten layer before Dropout and then execute.
model.add(Flatten())

Can I use probabilistic label when train model in logistic regression?

I use sklearn.linear_model.LogisticRegression and would like to use probabilistic label when train model.
But as following code I got error when I attempt to use train data with probability label for training logistic regression model.
Is there an any way to use probablity label for training logistic regression model?
import numpy as np
from sklearn.linear_model import LogisticRegression
x = np.array([1966, 1967, 1968, 1969, 1970,
1971, 1972, 1973, 1974, 1975,
1976, 1977, 1978, 1979, 1980,
1981, 1982, 1983, 1984]).reshape(-1, 1)
y = np.array([0.003, 0.016, 0.054, 0.139, 0.263,
0.423, 0.611, 0.758, 0.859, 0.903,
0.937, 0.954, 0.978, 0.978, 0.982,
0.985, 0.989, 0.988, 0.992])
lr = LogisticRegression()
lr.fit(x, y)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-26-6f0a54f18841> in <module>()
13
14 lr = LogisticRegression()
---> 15 lr.fit(x, y) # => ValueError: Unknown label type: 'continuous'
/home/sudot/anaconda3/lib/python3.6/site-packages/sklearn/linear_model/logistic.py in fit(self, X, y, sample_weight)
1172 X, y = check_X_y(X, y, accept_sparse='csr', dtype=np.float64,
1173 order="C")
-> 1174 check_classification_targets(y)
1175 self.classes_ = np.unique(y)
1176 n_samples, n_features = X.shape
/home/sudot/anaconda3/lib/python3.6/site-packages/sklearn/utils/multiclass.py in check_classification_targets(y)
170 if y_type not in ['binary', 'multiclass', 'multiclass-multioutput',
171 'multilabel-indicator', 'multilabel-sequences']:
--> 172 raise ValueError("Unknown label type: %r" % y_type)
173
174
ValueError: Unknown label type: 'continuous'
Logistic Regression is a binary classification model. You can't pass non-categorical values as target.
Just round values of y before fitting.
y = y.round(0) # Add this line
lr = LogisticRegression()
lr.fit(x, y)

UnknownError when trying to use a pre-trained model to predict classes

Using the below tutorial, I've tried to save a pre-trained model into a .h5 file using the model.save().
https://www.tensorflow.org/tutorials/keras/save_and_restore_models#save_the_entire_model
I've loaded this model in a new document using load_model() and when I'm trying to use it to predict on new data, I'm getting an UnknownError. I am using tensorflow-gpu.
The code I've used for training the model is as below:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout, Embedding, CuDNNLSTM, Bidirectional
from sklearn.model_selection import train_test_split
"""
My dataset is a two-column DataFrame of which the first column (X) contains
a pre-processed and encoded tweet which has been padded to a length of 47 words. The reason as to why I did this is because I'm passing in these values to an Embedding layer
The second column (Y) is a label associated with that tweet
X_train.shape = (89552, 47)
Y.shape = (89552,)
vocab_size = 66167
max_sent_len = 47
emd_dim = 75
"""
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=420)
x_train, x_val, Y_train, y_val = train_test_split(X_train, y_train, test_size=0.1, random_state=420)
model = Sequential([
Embedding(input_dim=vocab_size, output_dim=emb_dim, input_length=max_sent_len, trainable=True),
Bidirectional(CuDNNLSTM(64, return_sequences=False)),
Dropout(0.5),
Dense(2, activation='softmax')
])
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(x_train, Y_train, epochs=50, batch_size=300, shuffle=True, validation_data=(x_val, y_val))
model.summary()
model.save('trained/model-2-f.h5')
I am not getting any errors when trying to use this model for predictions.
I'm trying to test whether this model has been saved properly in another file and using another dataset of the same properties to test it on.
from tensorflow.keras.models import load_model
trained_model = load_model('trained/model-2-f.h5')
"""
w_x is the data I'm using to predict values for
w_x.shape = (201, 47)
"""
w_pred = trained_model.predict_classes(w_x)
The error I'm getting is:
UnknownError Traceback (most recent call last)
<ipython-input-18-cd338bbebc52> in <module>
----> 1 w_pred = trained_model.predict_classes(w_x)
2 w_pred
~\Anaconda3\envs\nlp\lib\site-packages\tensorflow\python\keras\engine\sequential.py in predict_classes(self, x, batch_size, verbose)
316 A numpy array of class predictions.
317 """
--> 318 proba = self.predict(x, batch_size=batch_size, verbose=verbose)
319 if proba.shape[-1] > 1:
320 return proba.argmax(axis=-1)
~\Anaconda3\envs\nlp\lib\site-packages\tensorflow\python\keras\engine\training.py in predict(self, x, batch_size, verbose, steps, callbacks, max_queue_size, workers, use_multiprocessing)
1076 verbose=verbose,
1077 steps=steps,
-> 1078 callbacks=callbacks)
1079
1080 def reset_metrics(self):
~\Anaconda3\envs\nlp\lib\site-packages\tensorflow\python\keras\engine\training_arrays.py in model_iteration(model, inputs, targets, sample_weights, batch_size, epochs, verbose, callbacks, val_inputs, val_targets, val_sample_weights, shuffle, initial_epoch, steps_per_epoch, validation_steps, validation_freq, mode, validation_in_fit, prepared_feed_values_from_dataset, steps_name, **kwargs)
361
362 # Get outputs.
--> 363 batch_outs = f(ins_batch)
364 if not isinstance(batch_outs, list):
365 batch_outs = [batch_outs]
~\Anaconda3\envs\nlp\lib\site-packages\tensorflow\python\keras\backend.py in __call__(self, inputs)
3290
3291 fetched = self._callable_fn(*array_vals,
-> 3292 run_metadata=self.run_metadata)
3293 self._call_fetch_callbacks(fetched[-len(self._fetches):])
3294 output_structure = nest.pack_sequence_as(
~\Anaconda3\envs\nlp\lib\site-packages\tensorflow\python\client\session.py in __call__(self, *args, **kwargs)
1456 ret = tf_session.TF_SessionRunCallable(self._session._session,
1457 self._handle, args,
-> 1458 run_metadata_ptr)
1459 if run_metadata:
1460 proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)
UnknownError: 2 root error(s) found.
(0) Unknown: Fail to find the dnn implementation.
[[{{node bidirectional_2/CudnnRNN_1}}]]
[[dense_2/Softmax/_243]]
(1) Unknown: Fail to find the dnn implementation.
[[{{node bidirectional_2/CudnnRNN_1}}]]
0 successful operations.
0 derived errors ignored.

sentiment classification using keras

I very new to deep learning classification. I have reviews data with the label(pos, neg) and I 'm trying to classify the data using keras. here is my code:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
sentences_train, sentences_test, y_train, y_test =
train_test_split(review_list2, label_list, test_size=0.25, random_state=1000)
vectorizer = CountVectorizer()
vectorizer.fit(sentences_train)
X_train = vectorizer.transform(sentences_train)
X_test = vectorizer.transform(sentences_test)
#build the model
from keras.models import Sequential
from keras import layers
input_dim = X_train.shape[1]
model = Sequential()
model.add(layers.Dense(8, input_dim=input_dim, activation='relu'))
model.add(layers.Dense(2, activation='softmax'))
model.compile(loss='binary_crossentropy',
optimizer='adam', metrics=['accuracy'])
model.summary()
history = model.fit(X_train, y_train, epochs=100, verbose=False, validation_data=(X_test, y_test), batch_size=10)
I got an error:
AttributeError Traceback (most recent call last)
<ipython-input-52-34c39f53e335> in <module>
----> 1 history = model.fit(X_train, y_train, epochs=100, verbose=False, validation_data=(X_test, y_test), batch_size=10)
d:\py-ver35\lib\site-packages\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, **kwargs)
950 sample_weight=sample_weight,
951 class_weight=class_weight,
--> 952 batch_size=batch_size)
953 # Prepare validation data.
954 do_validation = False
d:\py-ver35\lib\site-packages\keras\engine\training.py in _standardize_user_data(self, x, y, sample_weight, class_weight, check_array_lengths, batch_size)
787 feed_output_shapes,
788 check_batch_axis=False, # Don't enforce the batch size.
--> 789 exception_prefix='target')
790
791 # Generate sample-wise weight values given the `sample_weight` and
d:\py-ver35\lib\site-packages\keras\engine\training_utils.py in standardize_input_data(data, names, shapes, check_batch_axis, exception_prefix)
90 data = data.values if data.__class__.__name__ == 'DataFrame' else data
91 data = [data]
---> 92 data = [standardize_single_array(x) for x in data]
93
94 if len(data) != len(names):
d:\py-ver35\lib\site-packages\keras\engine\training_utils.py in <listcomp>(.0)
90 data = data.values if data.__class__.__name__ == 'DataFrame' else data
91 data = [data]
---> 92 data = [standardize_single_array(x) for x in data]
93
94 if len(data) != len(names):
d:\py-ver35\lib\site-packages\keras\engine\training_utils.py in standardize_single_array(x)
25 'Got tensor with shape: %s' % str(shape))
26 return x
---> 27 elif x.ndim == 1:
28 x = np.expand_dims(x, 1)
29 return x
AttributeError: 'str' object has no attribute 'ndim'
I have tried every solution mention related to this error but still cannot fix.Any help? thanks in Advance

Keras error when finetuning InceptionV3

I am trying to follow the "Fine-tune InceptionV3 on a new set of classes" sample code to freeze the first 172 layers and re-train the last layers on cats/dogs dataset. I keep getting an error which I have noted at the bottom. Please help. I am using Ubuntu 16.04, keras 1.2.1, theano 0.9.0beta1.dev, numpy 1.12.0 and python 3.5.
from PIL import Image
import os
import matplotlib.pyplot as plt
import numpy as np
data_root_dir = "/home/ubuntu/ML/data/dogscats/"
train_dir = os.path.join(data_root_dir,"sample", "train")
valid_dir = os.path.join(data_root_dir, "valid")
from keras.applications.inception_v3 import InceptionV3
from keras.preprocessing import image
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D
from keras import backend as K
# create the base pre-trained model
base_model = InceptionV3(weights='imagenet', include_top=True)
# add a global spatial average pooling layer
x = base_model.output
#x = GlobalAveragePooling2D()(x)
# let's add a fully-connected layer
x = Dense(1024, activation='relu')(x)
# and a logistic layer -- let's say we have 200 classes
predictions = Dense(2, activation='softmax')(x)
# this is the model we will train
model = Model(input=base_model.input, output=predictions)
for layer in model.layers[:172]:
layer.trainable = False
for layer in model.layers[172:]:
layer.trainable = True
from keras.optimizers import SGD
model.compile(optimizer=SGD(lr=0.0001, momentum=0.9), loss='categorical_crossentropy')
from sklearn.preprocessing import OneHotEncoder
def get_data(path, target_size=(299,299)):
batches = get_batches(path, shuffle=False, batch_size=1, class_mode=None, target_size=target_size)
return np.concatenate([batches.next() for i in range(batches.nb_sample)])
def get_batches(dirname, gen=image.ImageDataGenerator(), shuffle=True, batch_size=2, class_mode='categorical',
target_size=(299,299)):
return gen.flow_from_directory(dirname, target_size=target_size,
class_mode=class_mode, shuffle=shuffle, batch_size=batch_size)
def onehot(x): return np.array(OneHotEncoder().fit_transform(x.reshape(-1,1)).todense())
# Use batch size of 1 since we're just doing preprocessing on the CPU
val_batches = get_batches(valid_dir, shuffle=False, batch_size=10)
train_batches = get_batches(train_dir, shuffle=False, batch_size=10)
val_classes = val_batches.classes
trn_classes = train_batches.classes
val_labels = onehot(val_classes)
trn_labels = onehot(trn_classes)
model.fit_generator(train_batches, samples_per_epoch=train_batches.n, nb_epoch=10,
validation_data=val_batches, nb_val_samples=val_batches.n)
The exception is: padding must be zero for average_exc_pad
Here is the full stack-trace:
ValueError Traceback (most recent call last)
/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/theano/compile/function_module.py in __call__(self, *args, **kwargs)
883 outputs =\
--> 884 self.fn() if output_subset is None else\
885 self.fn(output_subset=output_subset)
ValueError: padding must be zero for average_exc_pad
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
<ipython-input-4-369d7760ec6e> in <module>()
34
35 model.fit_generator(train_batches, samples_per_epoch=train_batches.n, nb_epoch=10,
---> 36 validation_data=val_batches, nb_val_samples=val_batches.n)
/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/keras/engine/training.py in fit_generator(self, generator, samples_per_epoch, nb_epoch, verbose, callbacks, validation_data, nb_val_samples, class_weight, max_q_size, nb_worker, pickle_safe, initial_epoch)
1551 outs = self.train_on_batch(x, y,
1552 sample_weight=sample_weight,
-> 1553 class_weight=class_weight)
1554
1555 if not isinstance(outs, list):
/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/keras/engine/training.py in train_on_batch(self, x, y, sample_weight, class_weight)
1314 ins = x + y + sample_weights
1315 self._make_train_function()
-> 1316 outputs = self.train_function(ins)
1317 if len(outputs) == 1:
1318 return outputs[0]
/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/keras/backend/theano_backend.py in __call__(self, inputs)
957 def __call__(self, inputs):
958 assert isinstance(inputs, (list, tuple))
--> 959 return self.function(*inputs)
960
961
/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/theano/compile/function_module.py in __call__(self, *args, **kwargs)
896 node=self.fn.nodes[self.fn.position_of_error],
897 thunk=thunk,
--> 898 storage_map=getattr(self.fn, 'storage_map', None))
899 else:
900 # old-style linkers raise their own exceptions
/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/theano/gof/link.py in raise_with_op(node, thunk, exc_info, storage_map)
323 # extra long error message in that case.
324 pass
--> 325 reraise(exc_type, exc_value, exc_trace)
326
327
/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/six.py in reraise(tp, value, tb)
683 value = tp()
684 if value.__traceback__ is not tb:
--> 685 raise value.with_traceback(tb)
686 raise value
687
/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/theano/compile/function_module.py in __call__(self, *args, **kwargs)
882 try:
883 outputs =\
--> 884 self.fn() if output_subset is None else\
885 self.fn(output_subset=output_subset)
886 except Exception:
ValueError: padding must be zero for average_exc_pad
Apply node that caused the error: AveragePoolGrad{ignore_border=True, mode='average_exc_pad', ndim=2}(Join.0, IncSubtensor{InplaceInc;::, ::, :int64:, :int64:}.0, TensorConstant{(2,) of 3}, TensorConstant{(2,) of 1}, TensorConstant{(2,) of 1})
Toposort index: 5270
Inputs types: [TensorType(float32, 4D), TensorType(float32, 4D), TensorType(int64, vector), TensorType(int64, vector), TensorType(int64, vector)]
Inputs shapes: [(10, 2048, 8, 8), (10, 2048, 8, 8), (2,), (2,), (2,)]
Inputs strides: [(524288, 256, 32, 4), (524288, 256, 32, 4), (8,), (8,), (8,)]
Inputs values: ['not shown', 'not shown', array([3, 3]), array([1, 1]), array([1, 1])]
Outputs clients: [[Elemwise{add,no_inplace}(CorrMM_gradInputs{half, (1, 1), (1, 1)}.0, CorrMM_gradInputs{half, (1, 1), (1, 1)}.0, CorrMM_gradInputs{half, (1, 1), (1, 1)}.0, AveragePoolGrad{ignore_border=True, mode='average_exc_pad', ndim=2}.0)]]
Fine-tuning in that situation possibly means using the convolutional layers as pre-trained feature extractors. So you don't really want the top layers (densely connected layers) of the Inception network.
Changing
base_model = InceptionV3(weights='imagenet', include_top=True)
to
base_model = InceptionV3(weights='imagenet', include_top=False)
should work.
Also, if you have 200 classes you should change
# and a logistic layer -- let's say we have 200 classes
predictions = Dense(2, activation='softmax')(x)
to
predictions = Dense(200, activation='softmax')(x)
So your last layer will have the desired 200 elements.

Resources