how to do reshape in custom function in keras - python-3.x

I'm trying to do reshape in custom function in tensorflow keras,
I'm trying to following kind of loss function in tensorflow as custom loss function,
#Since WRMSSE calucated for each stores so we have 3049 rows and 9180 time series
# Function to do quick rollups:
def rollup_nn(v):
'''
v - np.array of size (3049 rows, n day columns)
v_rolledup - array of size (n, 9180)
'''
return roll_mat_csr*v #(v.T*roll_mat_csr.T).T
# Function to calculate WRMSSE:
key = 0
def wrmsse_nn(preds, y_true):
'''
preds - Predictions: pd.DataFrame of size (3049 rows, N day columns)
y_true - True values: pd.DataFrame of size (3049 rows, N day columns)
sequence_length - np.array of size (9180,)
sales_weight - sales weights based on last 28 days: np.array (9180,)
'''
preds = preds[-(3049 * 28):]
y_true = y_true.get_label()[-(3049 * 28):]
preds = preds.reshape(28, 3049).T
y_true = y_true.reshape(28, 3049).T
return 'wrmsse', np.sum(np.sqrt(np.mean(np.square(rollup(preds-y_true)),axis=1)) * SW_store)/12,False
where I need to do the reshape in custom loss function
I 'm doing reshape function by using the following code
tf.reshape(preds,[28, 3049])
I'm getting the following error
AttributeError: 'NoneType' object has no attribute 'get_shape'
The complete error message is
Tensor("dense_23_target:0", shape=(?, ?), dtype=float32) Tensor("dense_23_1/BiasAdd:0", shape=(?, 1), dtype=float32)
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-101-05dfd1dadcca> in <module>()
7 # model.add(Dense(units=16,activation='relu',kernel_initializer=initializer.he_normal(seed=0)))
8 model.add(Dense(units=1))
----> 9 model.compile(loss=wrmsse_nn,optimizer='adam')
~/anaconda3/lib/python3.6/site-packages/tensorflow/python/training/checkpointable/base.py in _method_wrapper(self, *args, **kwargs)
440 self._setattr_tracking = False # pylint: disable=protected-access
441 try:
--> 442 method(self, *args, **kwargs)
443 finally:
444 self._setattr_tracking = previous_value # pylint: disable=protected-access
~/anaconda3/lib/python3.6/site-packages/tensorflow/python/keras/engine/training.py in compile(self, optimizer, loss, metrics, loss_weights, sample_weight_mode, weighted_metrics, target_tensors, distribute, **kwargs)
447 else:
448 weighted_loss = training_utils.weighted_masked_objective(loss_fn)
--> 449 output_loss = weighted_loss(y_true, y_pred, sample_weight, mask)
450
451 if len(self.outputs) > 1:
~/anaconda3/lib/python3.6/site-packages/tensorflow/python/keras/engine/training_utils.py in weighted(y_true, y_pred, weights, mask)
661 # Update dimensions of weights to match with values if possible.
662 score_array, _, weights = squeeze_or_expand_dimensions(
--> 663 score_array, None, weights)
664 try:
665 # Broadcast weights if possible.
~/anaconda3/lib/python3.6/site-packages/tensorflow/python/keras/utils/losses_utils.py in squeeze_or_expand_dimensions(y_pred, y_true, sample_weight)
66 return y_pred, y_true, sample_weight
67
---> 68 y_pred_shape = y_pred.get_shape()
69 y_pred_rank = y_pred_shape.ndims
70 if (y_pred_rank is not None) and (weights_rank is not None):
AttributeError: 'NoneType' object has no attribute 'get_shape'
How can I do it ?

Related

A simple linear regression model with a DenseVariational layer in Tensorflow-Probability returns: TypeError: 'NoneType' object is not callable

This is an attempt to use Tensforflow-Probability and more specifically the DenseVariational layer but it fails for some reason. How can I correct the code?
x_train = np.linspace(-1, 1, 100)[:, np.newaxis]
y_train = x_train + 0.3*np.random.randn(100)[:, np.newaxis]
def prior(kernel_size, bias_size, dtype = None):
n = kernel_size + bias_size
prior_model = Sequential([
tfpl.DistributionLambda(
lambda t: tfd.MultivariateNormalDiag(loc = tf.zeros(n) , scale_diag = tf.ones(n)
))
])
def posterior(kernel_size, bias_size, dtype = None):
n = kernel_size + bias_size
posterior_model = Sequential([
tfpl.VariableLayer(tfpl.MultivariateNormalTriL.params_size(n) , dtype = dtype), # The parameters of the model are declared Variables that are trainable
tfpl.MultivariateNormalTriL(n) # The posterior function will return to the Variational layer that will call it a MultivariateNormalTril object that will have as many dimensions
# as the parameters of the Variational Dense Layer. That means that each parameter will be generated by a distinct Normal Gaussian shifted and scaled
# by a mu and sigma learned from the data, independently of all the other weights. The output of this Variablelayer will become the input to the
# MultivariateNormalTriL object.
# The shape of the VariableLayer object will be defined by the number of parameters needed to create the MultivariateNormalTriL object given
# that it will live in a Space of n dimensions (event_size = n). This number is returned by the tfpl.MultivariateNormalTriL.params_size(n)
])
return(posterior_model)
model = Sequential([
tfpl.DenseVariational(
input_shape = (1, ), # The input is of dimensionality 1, a series
units = 1, # A linear regression is represented by a Dense layer with one single unit
make_prior_fn = prior, # We pass the function we have defined which returns the prior distribution on the weights
make_posterior_fn = posterior, # We pass the function we have defined which returns the variational approximation of the posterior distribution on the weights
kl_weight = 1/ x_train.shape[0], # Tensorflow scales the likelihood loss calculated using the mini-batch to become an unbiased estimator of the true loss but does not do the
# same for the DL divergence loss. Here we instruct it to do the necessary scaling.
kl_use_exact = True # Unless there is a closed form equation for the KL divergence in the library of Tensorflow setting True will return error. By setting False instead
# the KL Divergence will be approxiated using Sampling
)
])
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-14-e7cf0bfd5902> in <module>
17 # same for the DL divergence loss. Here we instruct it to do the necessary scaling.
18
---> 19 kl_use_exact = True # Unless there is a closed form equation for the KL divergence in the library of Tensorflow setting True will return error. By setting False instead
20 # the KL Divergence will be approxiated using Sampling
21
~\Anaconda3\envs\tf2\lib\site-packages\tensorflow\python\training\tracking\base.py in _method_wrapper(self, *args, **kwargs)
455 self._self_setattr_tracking = False # pylint: disable=protected-access
456 try:
--> 457 result = method(self, *args, **kwargs)
458 finally:
459 self._self_setattr_tracking = previous_value # pylint: disable=protected-access
~\Anaconda3\envs\tf2\lib\site-packages\tensorflow\python\keras\engine\sequential.py in __init__(self, layers, name)
140 layers = [layers]
141 for layer in layers:
--> 142 self.add(layer)
143
144 #property
~\Anaconda3\envs\tf2\lib\site-packages\tensorflow\python\training\tracking\base.py in _method_wrapper(self, *args, **kwargs)
455 self._self_setattr_tracking = False # pylint: disable=protected-access
456 try:
--> 457 result = method(self, *args, **kwargs)
458 finally:
459 self._self_setattr_tracking = previous_value # pylint: disable=protected-access
~\Anaconda3\envs\tf2\lib\site-packages\tensorflow\python\keras\engine\sequential.py in add(self, layer)
204 # and create the node connecting the current layer
205 # to the input layer we just created.
--> 206 layer(x)
207 set_inputs = True
208
~\Anaconda3\envs\tf2\lib\site-packages\tensorflow\python\keras\engine\base_layer.py in __call__(self, *args, **kwargs)
924 if _in_functional_construction_mode(self, inputs, args, kwargs, input_list):
925 return self._functional_construction_call(inputs, args, kwargs,
--> 926 input_list)
927
928 # Maintains info about the `Layer.call` stack.
~\Anaconda3\envs\tf2\lib\site-packages\tensorflow\python\keras\engine\base_layer.py in _functional_construction_call(self, inputs, args, kwargs, input_list)
1115 try:
1116 with ops.enable_auto_cast_variables(self._compute_dtype_object):
-> 1117 outputs = call_fn(cast_inputs, *args, **kwargs)
1118
1119 except errors.OperatorNotAllowedInGraphError as e:
~\Anaconda3\envs\tf2\lib\site-packages\tensorflow\python\autograph\impl\api.py in wrapper(*args, **kwargs)
253 try:
254 with conversion_ctx:
--> 255 return converted_call(f, args, kwargs, options=options)
256 except Exception as e: # pylint:disable=broad-except
257 if hasattr(e, 'ag_error_metadata'):
~\Anaconda3\envs\tf2\lib\site-packages\tensorflow\python\autograph\impl\api.py in converted_call(f, args, kwargs, caller_fn_scope, options)
455 if conversion.is_in_whitelist_cache(f, options):
456 logging.log(2, 'Whitelisted %s: from cache', f)
--> 457 return _call_unconverted(f, args, kwargs, options, False)
458
459 if ag_ctx.control_status_ctx().status == ag_ctx.Status.DISABLED:
~\Anaconda3\envs\tf2\lib\site-packages\tensorflow\python\autograph\impl\api.py in _call_unconverted(f, args, kwargs, options, update_cache)
337
338 if kwargs is not None:
--> 339 return f(*args, **kwargs)
340 return f(*args)
341
~\Anaconda3\envs\tf2\lib\site-packages\tensorflow_probability\python\layers\dense_variational_v2.py in call(self, inputs)
120
121 q = self._posterior(inputs)
--> 122 r = self._prior(inputs)
123 self.add_loss(self._kl_divergence_fn(q, r))
124
TypeError: 'NoneType' object is not callable
Did you fail to return ?
def prior(kernel_size, bias_size, dtype=None):
n = kernel_size + bias_size
prior_model = tf.keras.Sequential([
tfp.layers.DistributionLambda(
lambda t: tfd.MultivariateNormalDiag(loc=tf.zeros(n), scale_diag=tf.ones(n)
))
])
return (prior_model)

K-Means GridSearchCV hyperparameter tuning

I am trying to perform hyperparameter tuning for Spatio-Temporal K-Means clustering by using it in a pipeline with a Decision Tree classifier. The idea is to use K-Means clustering algorithm to generate cluster-distance space matrix and clustered labels which will be then passed to Decision Tree classifier. For hyperparameter tuning, just use parameters for K-Means algorithm.
I am using Python 3.8 and sklearn 0.22.
The data I am interested is having 3 columns/attributes: 'time', 'x' and 'y' (x and y are spatial coordinates).
The code is:
class ST_KMeans(BaseEstimator, TransformerMixin):
# class ST_KMeans():
"""
Note that K-means clustering algorithm is designed for Euclidean distances.
It may stop converging with other distances, when the mean is no longer a
best estimation for the cluster 'center'.
The 'mean' minimizes squared differences (or, squared Euclidean distance).
If you want a different distance function, you need to replace the mean with
an appropriate center estimation.
Parameters:
k: number of clusters
eps1 : float, default=0.5
The spatial density threshold (maximum spatial distance) between
two points to be considered related.
eps2 : float, default=10
The temporal threshold (maximum temporal distance) between two
points to be considered related.
metric : string default='euclidean'
The used distance metric - more options are
‘braycurtis’, ‘canberra’, ‘chebyshev’, ‘cityblock’, ‘correlation’,
‘cosine’, ‘dice’, ‘euclidean’, ‘hamming’, ‘jaccard’, ‘jensenshannon’,
‘kulsinski’, ‘mahalanobis’, ‘matching’, ‘rogerstanimoto’, ‘sqeuclidean’,
‘russellrao’, ‘seuclidean’, ‘sokalmichener’, ‘sokalsneath’, ‘yule’.
n_jobs : int or None, default=-1
The number of processes to start; -1 means use all processors (BE AWARE)
Attributes:
labels : array, shape = [n_samples]
Cluster labels for the data - noise is defined as -1
"""
def __init__(self, k, eps1 = 0.5, eps2 = 10, metric = 'euclidean', n_jobs = 1):
self.k = k
self.eps1 = eps1
self.eps2 = eps2
# self.min_samples = min_samples
self.metric = metric
self.n_jobs = n_jobs
def fit(self, X, Y = None):
"""
Apply the ST K-Means algorithm
X : 2D numpy array. The first attribute of the array should be time attribute
as float. The following positions in the array are treated as spatial
coordinates.
The structure should look like this [[time_step1, x, y], [time_step2, x, y]..]
For example 2D dataset:
array([[0,0.45,0.43],
[0,0.54,0.34],...])
Returns:
self
"""
# check if input is correct
X = check_array(X)
# type(X)
# numpy.ndarray
# Check arguments for DBSCAN algo-
if not self.eps1 > 0.0 or not self.eps2 > 0.0:
raise ValueError('eps1, eps2, minPts must be positive')
# Get dimensions of 'X'-
# n - number of rows
# m - number of attributes/columns-
n, m = X.shape
# Compute sqaured form Euclidean Distance Matrix for 'time' and spatial attributes-
time_dist = squareform(pdist(X[:, 0].reshape(n, 1), metric = self.metric))
euc_dist = squareform(pdist(X[:, 1:], metric = self.metric))
'''
Filter the euclidean distance matrix using time distance matrix. The code snippet gets all the
indices of the 'time_dist' matrix in which the time distance is smaller than 'eps2'.
Afterward, for the same indices in the euclidean distance matrix the 'eps1' is doubled which results
in the fact that the indices are not considered during clustering - as they are bigger than 'eps1'.
'''
# filter 'euc_dist' matrix using 'time_dist' matrix-
dist = np.where(time_dist <= self.eps2, euc_dist, 2 * self.eps1)
# Initialize K-Means clustering model-
self.kmeans_clust_model = KMeans(
n_clusters = self.k, init = 'k-means++',
n_init = 10, max_iter = 300,
precompute_distances = 'auto', algorithm = 'auto')
# Train model-
self.kmeans_clust_model.fit(dist)
self.labels = self.kmeans_clust_model.labels_
self.X_transformed = self.kmeans_clust_model.fit_transform(X)
return self
def transform(self, X):
if not isinstance(X, np.ndarray):
# Convert to numpy array-
X = X.values
# Get dimensions of 'X'-
# n - number of rows
# m - number of attributes/columns-
n, m = X.shape
# Compute sqaured form Euclidean Distance Matrix for 'time' and spatial attributes-
time_dist = squareform(pdist(X[:, 0].reshape(n, 1), metric = self.metric))
euc_dist = squareform(pdist(X[:, 1:], metric = self.metric))
# filter 'euc_dist' matrix using 'time_dist' matrix-
dist = np.where(time_dist <= self.eps2, euc_dist, 2 * self.eps1)
# return self.kmeans_clust_model.transform(X)
return self.kmeans_clust_model.transform(dist)
# Initialize ST-K-Means object-
st_kmeans_algo = ST_KMeans(
k = 5, eps1=0.6,
eps2=9, metric='euclidean',
n_jobs=1
)
Y = np.zeros(shape = (501,))
# Train on a chunk of dataset-
st_kmeans_algo.fit(data.loc[:500, ['time', 'x', 'y']], Y)
# Get clustered data points labels-
kmeans_labels = st_kmeans_algo.labels
kmeans_labels.shape
# (501,)
# Get labels for points clustered using trained model-
# kmeans_transformed = st_kmeans_algo.X_transformed
kmeans_transformed = st_kmeans_algo.transform(data.loc[:500, ['time', 'x', 'y']])
kmeans_transformed.shape
# (501, 5)
dtc = DecisionTreeClassifier()
dtc.fit(kmeans_transformed, kmeans_labels)
y_pred = dtc.predict(kmeans_transformed)
# Get model performance metrics-
accuracy = accuracy_score(kmeans_labels, y_pred)
precision = precision_score(kmeans_labels, y_pred, average='macro')
recall = recall_score(kmeans_labels, y_pred, average='macro')
print("\nDT model metrics are:")
print("accuracy = {0:.4f}, precision = {1:.4f} & recall = {2:.4f}\n".format(
accuracy, precision, recall
))
# DT model metrics are:
# accuracy = 1.0000, precision = 1.0000 & recall = 1.0000
# Hyper-parameter Tuning:
# Define steps of pipeline-
pipeline_steps = [
('st_kmeans_algo' ,ST_KMeans(k = 5, eps1=0.6, eps2=9, metric='euclidean', n_jobs=1)),
('dtc', DecisionTreeClassifier())
]
# Instantiate a pipeline-
pipeline = Pipeline(pipeline_steps)
kmeans_transformed.shape, kmeans_labels.shape
# ((501, 5), (501,))
# Train pipeline-
pipeline.fit(kmeans_transformed, kmeans_labels)
# Specify parameters to be hyper-parameter tuned-
params = [
{
'st_kmeans_algo__k': [3, 5, 7]
}
]
# Initialize GridSearchCV object-
grid_cv = GridSearchCV(estimator=pipeline, param_grid=params, cv = 2)
# Train GridSearch on computed data from above-
grid_cv.fit(kmeans_transformed, kmeans_labels)
The 'grid_cv.fit()' call gives the following error:
ValueError Traceback (most recent call
last) in
5
6 # Train GridSearch on computed data from above-
----> 7 grid_cv.fit(kmeans_transformed, kmeans_labels)
~/.local/lib/python3.8/site-packages/sklearn/model_selection/_search.py
in fit(self, X, y, groups, **fit_params)
708 return results
709
--> 710 self._run_search(evaluate_candidates)
711
712 # For multi-metric evaluation, store the best_index_, best_params_ and
~/.local/lib/python3.8/site-packages/sklearn/model_selection/_search.py
in _run_search(self, evaluate_candidates) 1149 def
_run_search(self, evaluate_candidates): 1150 """Search all candidates in param_grid"""
-> 1151 evaluate_candidates(ParameterGrid(self.param_grid)) 1152 1153
~/.local/lib/python3.8/site-packages/sklearn/model_selection/_search.py
in evaluate_candidates(candidate_params)
680 n_splits, n_candidates, n_candidates * n_splits))
681
--> 682 out = parallel(delayed(_fit_and_score)(clone(base_estimator),
683 X, y,
684 train=train, test=test,
~/.local/lib/python3.8/site-packages/joblib/parallel.py in
call(self, iterable) 1002 # remaining jobs. 1003 self._iterating = False
-> 1004 if self.dispatch_one_batch(iterator): 1005 self._iterating = self._original_iterator is not None 1006
~/.local/lib/python3.8/site-packages/joblib/parallel.py in
dispatch_one_batch(self, iterator)
833 return False
834 else:
--> 835 self._dispatch(tasks)
836 return True
837
~/.local/lib/python3.8/site-packages/joblib/parallel.py in
_dispatch(self, batch)
752 with self._lock:
753 job_idx = len(self._jobs)
--> 754 job = self._backend.apply_async(batch, callback=cb)
755 # A job can complete so quickly than its callback is
756 # called before we get here, causing self._jobs to
~/.local/lib/python3.8/site-packages/joblib/_parallel_backends.py in
apply_async(self, func, callback)
207 def apply_async(self, func, callback=None):
208 """Schedule a func to be run"""
--> 209 result = ImmediateResult(func)
210 if callback:
211 callback(result)
~/.local/lib/python3.8/site-packages/joblib/_parallel_backends.py in
init(self, batch)
588 # Don't delay the application, to avoid keeping the input
589 # arguments in memory
--> 590 self.results = batch()
591
592 def get(self):
~/.local/lib/python3.8/site-packages/joblib/parallel.py in
call(self)
253 # change the default number of processes to -1
254 with parallel_backend(self._backend, n_jobs=self._n_jobs):
--> 255 return [func(*args, **kwargs)
256 for func, args, kwargs in self.items]
257
~/.local/lib/python3.8/site-packages/joblib/parallel.py in
(.0)
253 # change the default number of processes to -1
254 with parallel_backend(self._backend, n_jobs=self._n_jobs):
--> 255 return [func(*args, **kwargs)
256 for func, args, kwargs in self.items]
257
~/.local/lib/python3.8/site-packages/sklearn/model_selection/_validation.py
in _fit_and_score(estimator, X, y, scorer, train, test, verbose,
parameters, fit_params, return_train_score, return_parameters,
return_n_test_samples, return_times, return_estimator, error_score)
542 else:
543 fit_time = time.time() - start_time
--> 544 test_scores = _score(estimator, X_test, y_test, scorer)
545 score_time = time.time() - start_time - fit_time
546 if return_train_score:
~/.local/lib/python3.8/site-packages/sklearn/model_selection/_validation.py
in _score(estimator, X_test, y_test, scorer)
589 scores = scorer(estimator, X_test)
590 else:
--> 591 scores = scorer(estimator, X_test, y_test)
592
593 error_msg = ("scoring must return a number, got %s (%s) "
~/.local/lib/python3.8/site-packages/sklearn/metrics/_scorer.py in
call(self, estimator, *args, **kwargs)
87 *args, **kwargs)
88 else:
---> 89 score = scorer(estimator, *args, **kwargs)
90 scores[name] = score
91 return scores
~/.local/lib/python3.8/site-packages/sklearn/metrics/_scorer.py in
_passthrough_scorer(estimator, *args, **kwargs)
369 def _passthrough_scorer(estimator, *args, **kwargs):
370 """Function that wraps estimator.score"""
--> 371 return estimator.score(*args, **kwargs)
372
373
~/.local/lib/python3.8/site-packages/sklearn/utils/metaestimators.py
in (*args, **kwargs)
114
115 # lambda, but not partial, allows help() to work with update_wrapper
--> 116 out = lambda *args, **kwargs: self.fn(obj, *args, **kwargs)
117 # update the docstring of the returned function
118 update_wrapper(out, self.fn)
~/.local/lib/python3.8/site-packages/sklearn/pipeline.py in
score(self, X, y, sample_weight)
617 if sample_weight is not None:
618 score_params['sample_weight'] = sample_weight
--> 619 return self.steps[-1][-1].score(Xt, y, **score_params)
620
621 #property
~/.local/lib/python3.8/site-packages/sklearn/base.py in score(self, X,
y, sample_weight)
367 """
368 from .metrics import accuracy_score
--> 369 return accuracy_score(y, self.predict(X), sample_weight=sample_weight)
370
371
~/.local/lib/python3.8/site-packages/sklearn/metrics/_classification.py
in accuracy_score(y_true, y_pred, normalize, sample_weight)
183
184 # Compute accuracy for each possible representation
--> 185 y_type, y_true, y_pred = _check_targets(y_true, y_pred)
186 check_consistent_length(y_true, y_pred, sample_weight)
187 if y_type.startswith('multilabel'):
~/.local/lib/python3.8/site-packages/sklearn/metrics/_classification.py
in _check_targets(y_true, y_pred)
78 y_pred : array or indicator matrix
79 """
---> 80 check_consistent_length(y_true, y_pred)
81 type_true = type_of_target(y_true)
82 type_pred = type_of_target(y_pred)
~/.local/lib/python3.8/site-packages/sklearn/utils/validation.py in
check_consistent_length(*arrays)
209 uniques = np.unique(lengths)
210 if len(uniques) > 1:
--> 211 raise ValueError("Found input variables with inconsistent numbers of"
212 " samples: %r" % [int(l) for l in lengths])
213
ValueError: Found input variables with inconsistent numbers of
samples: [251, 250]
The different dimensions/shapes are:
kmeans_transformed.shape, kmeans_labels.shape, data.loc[:500, ['time', 'x', 'y']].shape
# ((501, 5), (501,), (501, 3))
I don't get it how the error arrives at the "samples: [251, 25]" ?
What's going wrong?
Thanks!
250 and 251 are respectively the shapes of your train and validation in GridSearchCV
look at your custom estimator...
def transform(self, X):
return self.X_transformed
the original transform method doesn't apply any sort of operation it simply returns the train data. we need an estimator that is able to transform the new data (in sour case the validation inside gridsearch) in a flexible way. change the transform method in this way
def transform(self, X):
return self.kmeans_clust_model.transform(X)

Why am I getting a Pytorch Runtime Error on Test Set

I have a model that is a binary image classification model with the resnext model. I keep getting a run time error when it gets to the test set. Error message is
RuntimeError: Expected object of backend CPU but got backend CUDA for argument #2 'weight'
I am sending my test set tensors to my GPU like my train model. I've looked at the following and I'm doing what was suggested here as stated above.
Here is my model code:
resnext = models.resnext50_32x4d(pretrained=True)
resnext = resnext.to(device)
for param in resnext.parameters():
param.requires_grad = True
resnext.classifier = nn.Sequential(nn.Linear(2048, 1000),
nn.ReLU(),
nn.Dropout(0.4),
nn.Linear(1000, 2),
nn.Softmax(dim = 1))
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(resnext.classifier.parameters(), lr=0.001)
import time
start_time = time.time()
epochs = 1
max_trn_batch = 5
max_tst_batch = 156
y_val_list = []
policy_list = []
train_losses = []
test_losses = []
train_correct = []
test_correct = []
for i in range(epochs):
for i in tqdm(range(0, max_trn_batch)):
trn_corr = 0
tst_corr = 0
# Run the training batches
for b, (X_train, y_train, policy) in enumerate(train_loader):
#print(y_train, policy)
X_train = X_train.to(device)
y_train = y_train.to(device)
if b == max_trn_batch:
break
b+=1
# Apply the model
y_pred = resnext(X_train)
loss = criterion(y_pred, y_train)
# Tally the number of correct predictions
predicted = torch.max(y_pred.data, 1)[1]
batch_corr = (predicted == y_train).sum()
trn_corr += batch_corr
# Update parameters
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Print interim results
if b%1 == 0:
print(f'epoch: {i:2} batch: {b:4} [{100*b:6}/63610] loss: {loss.item():10.8f} \
accuracy: {trn_corr.item()/(100*b):7.3f}%')
train_losses.append(loss)
train_correct.append(trn_corr)
# Run the testing batches
with torch.no_grad():
for b, (X_test, y_test, policy) in enumerate(test_loader):
policy_list.append(policy)
X_test.to(device)
y_test.to(device)
if b == max_tst_batch:
break
# Apply the model
y_val = resnext(X_test)
y_val_list.append(y_val.data)
# Tally the number of correct predictions
predicted = torch.max(y_val.data, 1)[1]
tst_corr += (predicted == y_test).sum()
loss = criterion(y_val, y_test)
test_losses.append(loss)
test_correct.append(tst_corr)
print(f'\nDuration: {time.time() - start_time:.0f} seconds') # print the time elapsed
Here is the full traceback:
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-84-48bce2e8d4fa> in <module>
60
61 # Apply the model
---> 62 y_val = resnext(X_test)
63 y_val_list.append(y_val.data)
64 # Tally the number of correct predictions
C:\ProgramData\Anaconda3\lib\site-packages\torch\nn\modules\module.py in __call__(self, *input, **kwargs)
545 result = self._slow_forward(*input, **kwargs)
546 else:
--> 547 result = self.forward(*input, **kwargs)
548 for hook in self._forward_hooks.values():
549 hook_result = hook(self, input, result)
C:\ProgramData\Anaconda3\lib\site-packages\torchvision\models\resnet.py in forward(self, x)
194
195 def forward(self, x):
--> 196 x = self.conv1(x)
197 x = self.bn1(x)
198 x = self.relu(x)
C:\ProgramData\Anaconda3\lib\site-packages\torch\nn\modules\module.py in __call__(self, *input, **kwargs)
545 result = self._slow_forward(*input, **kwargs)
546 else:
--> 547 result = self.forward(*input, **kwargs)
548 for hook in self._forward_hooks.values():
549 hook_result = hook(self, input, result)
C:\ProgramData\Anaconda3\lib\site-packages\torch\nn\modules\conv.py in forward(self, input)
341
342 def forward(self, input):
--> 343 return self.conv2d_forward(input, self.weight)
344
345 class Conv3d(_ConvNd):
C:\ProgramData\Anaconda3\lib\site-packages\torch\nn\modules\conv.py in conv2d_forward(self, input, weight)
338 _pair(0), self.dilation, self.groups)
339 return F.conv2d(input, weight, self.bias, self.stride,
--> 340 self.padding, self.dilation, self.groups)
341
342 def forward(self, input):
RuntimeError: Expected object of backend CPU but got backend CUDA for argument #2 'weight'
Again, my tensors and the model are sent to the GPU so I'm not sure what is going on. Does anyone see my mistake?
[...] my tensors and the model are sent to the GPU [...]
Not the test Tensors. It is a simple mistake:
X_test.to(device)
y_test.to(device)
should be
X_test = X_test.to(device)
y_test = y_test.to(device)

How to use SHAP with a linear SVC model from sklearn using Pipeline?

I am doing text classification using a linear SVC model from sklearn. Now I want to visualize which words/tokens have the highest impact on the classification decision by using SHAP (https://github.com/slundberg/shap).
Right now this does not work because I am getting an error that seems to originate from the vectorizer step in the pipeline I have defined - whats wrong here?
Is my general approach on how to use SHAP in this case correct?
x_Train, x_Test, y_Train, y_Test = train_test_split(df_all['PDFText'], df_all['class'], test_size = 0.2, random_state = 1234)
pipeline = Pipeline([
(
'tfidv',
TfidfVectorizer(
ngram_range=(1,3),
analyzer='word',
strip_accents = ascii,
use_idf = True,
sublinear_tf=True,
max_features=6000,
min_df=2,
max_df=1.0
)
),
(
'lin_svc',
svm.SVC(
C=1.0,
probability=True,
kernel='linear'
)
)
])
pipeline.fit(x_Train, y_Train)
shap.initjs()
explainer = shap.KernelExplainer(pipeline.predict_proba, x_Train)
shap_values = explainer.shap_values(x_Test, nsamples=100)
shap.force_plot(explainer.expected_value[0], shap_values[0][0,:], x_Test.iloc[0,:])
This is the error message I get:
Provided model function fails when applied to the provided data set.
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-81-4bca63616b3b> in <module>
3
4 # use Kernel SHAP to explain test set predictions
----> 5 explainer = shap.KernelExplainer(pipeline.predict_proba, x_Train)
6 shap_values = explainer.shap_values(x_Test, nsamples=100)
7
c:\users\s.p\appdata\local\programs\python\python37\lib\site-packages\shap\explainers\kernel.py in __init__(self, model, data, link, **kwargs)
95 self.keep_index_ordered = kwargs.get("keep_index_ordered", False)
96 self.data = convert_to_data(data, keep_index=self.keep_index)
---> 97 model_null = match_model_to_data(self.model, self.data)
98
99 # enforce our current input type limitations
c:\users\s.p\appdata\local\programs\python\python37\lib\site-packages\shap\common.py in match_model_to_data(model, data)
80 out_val = model.f(data.convert_to_df())
81 else:
---> 82 out_val = model.f(data.data)
83 except:
84 print("Provided model function fails when applied to the provided data set.")
c:\users\s.p\appdata\local\programs\python\python37\lib\site-packages\sklearn\utils\metaestimators.py in <lambda>(*args, **kwargs)
116
117 # lambda, but not partial, allows help() to work with update_wrapper
--> 118 out = lambda *args, **kwargs: self.fn(obj, *args, **kwargs)
119 # update the docstring of the returned function
120 update_wrapper(out, self.fn)
c:\users\s.p\appdata\local\programs\python\python37\lib\site-packages\sklearn\pipeline.py in predict_proba(self, X)
379 for name, transform in self.steps[:-1]:
380 if transform is not None:
--> 381 Xt = transform.transform(Xt)
382 return self.steps[-1][-1].predict_proba(Xt)
383
c:\users\s.p\appdata\local\programs\python\python37\lib\site-packages\sklearn\feature_extraction\text.py in transform(self, raw_documents, copy)
1631 check_is_fitted(self, '_tfidf', 'The tfidf vector is not fitted')
1632
-> 1633 X = super(TfidfVectorizer, self).transform(raw_documents)
1634 return self._tfidf.transform(X, copy=False)
c:\users\s.p\appdata\local\programs\python\python37\lib\site-packages\sklearn\feature_extraction\text.py in transform(self, raw_documents)
1084
1085 # use the same matrix-building strategy as fit_transform
-> 1086 _, X = self._count_vocab(raw_documents, fixed_vocab=True)
1087 if self.binary:
1088 X.data.fill(1)
c:\users\s.p\appdata\local\programs\python\python37\lib\site-packages\sklearn\feature_extraction\text.py in _count_vocab(self, raw_documents, fixed_vocab)
940 for doc in raw_documents:
941 feature_counter = {}
--> 942 for feature in analyze(doc):
943 try:
944 feature_idx = vocabulary[feature]
c:\users\s.p\appdata\local\programs\python\python37\lib\site-packages\sklearn\feature_extraction\text.py in <lambda>(doc)
326 tokenize)
327 return lambda doc: self._word_ngrams(
--> 328 tokenize(preprocess(self.decode(doc))), stop_words)
329
330 else:
c:\users\s.p\appdata\local\programs\python\python37\lib\site-packages\sklearn\feature_extraction\text.py in <lambda>(x)
254
255 if self.lowercase:
--> 256 return lambda x: strip_accents(x.lower())
257 else:
258 return strip_accents
AttributeError: 'numpy.ndarray' object has no attribute 'lower'
KernelExplainer expects to receive a classification model as the first argument. Please check the use of Pipeline with Shap following the link.
In your case, you can use the Pipeline as follows:
x_Train = pipeline.named_steps['tfidv'].fit_transform(x_Train)
explainer = shap.KernelExplainer(pipeline.named_steps['lin_svc'].predict_proba, x_Train)

DNN Linear Regression. MAE measurement error

I am trying to implement MAE as a performance measurement for my DNN regression model. I am using DNN to predict the number of comments a facebook post will get. As I understand, if it is a classification problem, then we use accuracy. If it is regression problem, then we use either RMSE or MAE. My code is the following:
with tf.name_scope("eval"):
correct = tf.metrics.mean_absolute_error(labels = y, predictions = logits)
mae = tf.reduce_mean(tf.cast(correct, tf.int64))
mae_summary = tf.summary.scalar('mae', accuracy)
For some reason, I get the following error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-396-313ddf858626> in <module>()
1 with tf.name_scope("eval"):
----> 2 correct = tf.metrics.mean_absolute_error(labels = y, predictions = logits)
3 mae = tf.reduce_mean(tf.cast(correct, tf.int64))
4 mae_summary = tf.summary.scalar('mae', accuracy)
~/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/metrics_impl.py in mean_absolute_error(labels, predictions, weights, metrics_collections, updates_collections, name)
736 predictions, labels, weights = _remove_squeezable_dimensions(
737 predictions=predictions, labels=labels, weights=weights)
--> 738 absolute_errors = math_ops.abs(predictions - labels)
739 return mean(absolute_errors, weights, metrics_collections,
740 updates_collections, name or 'mean_absolute_error')
~/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/math_ops.py in binary_op_wrapper(x, y)
883 if not isinstance(y, sparse_tensor.SparseTensor):
884 try:
--> 885 y = ops.convert_to_tensor(y, dtype=x.dtype.base_dtype, name="y")
886 except TypeError:
887 # If the RHS is not a tensor, it might be a tensor aware object
~/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py in convert_to_tensor(value, dtype, name, preferred_dtype)
834 name=name,
835 preferred_dtype=preferred_dtype,
--> 836 as_ref=False)
837
838
~/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py in internal_convert_to_tensor(value, dtype, name, as_ref, preferred_dtype, ctx)
924
925 if ret is None:
--> 926 ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
927
928 if ret is NotImplemented:
~/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py in _TensorTensorConversionFunction(t, dtype, name, as_ref)
772 raise ValueError(
773 "Tensor conversion requested dtype %s for Tensor with dtype %s: %r" %
--> 774 (dtype.name, t.dtype.name, str(t)))
775 return t
776
ValueError: Tensor conversion requested dtype float32 for Tensor with dtype int64: 'Tensor("eval_9/remove_squeezable_dimensions/cond_1/Merge:0", dtype=int64)'
This line in your code:
correct = tf.metrics.mean_absolute_error(labels = y, predictions = logits)
executes in a way where TensorFlow is first subtracting predictions from labels as seen in the backrace:
absolute_errors = math_ops.abs(predictions - labels)
In order to do the subtraction, the two tensors need to be the same datatype. Presumably your predictions (logits) are float32 and from the error message your labels are int64. You either have to do an explicit conversion with tf.to_float or an implicit one you suggest in your comment: defining the placeholder as float32 to start with, and trusting TensorFlow to do the conversion when the feed dictionary is processed.

Resources