Related
This is my code and it has an error that I don't know how to fix
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime
from time import time
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
Data
proof_df = pd.read_excel("WORK_NUM_3.xlsx")
Show information (only 10 of 1550)
proof_df.head(10)
results
ORDEN DATA N1 N2 N3 N4 N5
0 1 1994-03-13 25 45 60 76 79
1 2 1994-03-17 13 30 58 63 64
2 3 1994-03-20 5 15 32 33 48
3 4 1994-03-24 27 57 60 61 77
4 5 1994-03-27 19 44 53 54 71
5 6 1994-04-03 4 45 54 65 67
6 7 1994-04-07 9 21 37 42 68
7 8 1994-04-10 5 16 26 28 62
8 9 1994-04-14 4 15 44 64 73
9 10 1994-04-17 20 32 49 54 62
declare variables
y = proof_df.iloc[:, 2:len(quina_df.columns)]
X = proof_df[['ORDEN','DATA']]
regression algorithm
regresor = SVR(kernel='linear')
hora_inicio = time()
train of algorithm
regresor.fit(X_train.values, y_train.values.ravel())
print('train finish in {time() - hora_inicio} segundos')
result:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Input In [37], in <cell line: 2>()
1 # Entrenamiento del algoritmo
----> 2 regresor.fit(X_train.values, y_train.values.ravel())
3 print('Entrenamiento finalizado en {time() - hora_inicio} segundos')
File ~\anaconda3\lib\site-packages\sklearn\svm\_base.py:190, in BaseLibSVM.fit(self, X, y, sample_weight)
188 check_consistent_length(X, y)
189 else:
--> 190 X, y = self._validate_data(
191 X,
192 y,
193 dtype=np.float64,
194 order="C",
195 accept_sparse="csr",
196 accept_large_sparse=False,
197 )
199 y = self._validate_targets(y)
201 sample_weight = np.asarray(
202 [] if sample_weight is None else sample_weight, dtype=np.float64
203 )
File ~\anaconda3\lib\site-packages\sklearn\base.py:581, in BaseEstimator._validate_data(self, X, y, reset, validate_separately, **check_params)
579 y = check_array(y, **check_y_params)
580 else:
--> 581 X, y = check_X_y(X, y, **check_params)
582 out = X, y
584 if not no_val_X and check_params.get("ensure_2d", True):
File ~\anaconda3\lib\site-packages\sklearn\utils\validation.py:964, in check_X_y(X, y, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, estimator)
961 if y is None:
962 raise ValueError("y cannot be None")
--> 964 X = check_array(
965 X,
966 accept_sparse=accept_sparse,
967 accept_large_sparse=accept_large_sparse,
968 dtype=dtype,
969 order=order,
970 copy=copy,
971 force_all_finite=force_all_finite,
972 ensure_2d=ensure_2d,
973 allow_nd=allow_nd,
974 ensure_min_samples=ensure_min_samples,
975 ensure_min_features=ensure_min_features,
976 estimator=estimator,
977 )
979 y = _check_y(y, multi_output=multi_output, y_numeric=y_numeric)
981 check_consistent_length(X, y)
File ~\anaconda3\lib\site-packages\sklearn\utils\validation.py:746, in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator)
744 array = array.astype(dtype, casting="unsafe", copy=False)
745 else:
--> 746 array = np.asarray(array, order=order, dtype=dtype)
747 except ComplexWarning as complex_warning:
748 raise ValueError(
749 "Complex data not supported\n{}\n".format(array)
750 ) from complex_warning
TypeError: float() argument must be a string or a number, not 'Timestamp'
I I am trying to work with the dates as the variable ( X ) that the model should receive since the variable ( y ) are the other results
I really appreciate any help to help understand what is going on
I am new to xgboost, I trained a model, that works pretty well. Now I am trying to use eli5 to see the weights and I get: KeyError: 'bias'
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
in
3 clf6 = model6.named_steps['clf']
4 vec6 = model6.named_steps['transformer']
----> 5 explain_weights_xgboost(clf6, vec=vec6)
~/dev/envs/env3.7/lib/python3.7/site-packages/eli5/xgboost.py in explain_weights_xgboost(xgb, vec, top, target_names, targets, feature_names, feature_re, feature_filter, importance_type)
80 description=DESCRIPTION_XGBOOST,
81 is_regression=is_regression,
---> 82 num_features=coef.shape[-1],
83 )
84
~/dev/envs/env3.7/lib/python3.7/site-packages/eli5/_feature_importances.py in get_feature_importance_explanation(estimator, vec, coef, feature_names, feature_filter, feature_re, top, description, is_regression, estimator_feature_names, num_features, coef_std)
35 feature_filter=feature_filter,
36 feature_re=feature_re,
---> 37 num_features=num_features,
38 )
39 feature_importances = get_feature_importances_filtered(
~/dev/envs/env3.7/lib/python3.7/site-packages/eli5/sklearn/utils.py in get_feature_names_filtered(clf, vec, bias_name, feature_names, num_features, feature_filter, feature_re, estimator_feature_names)
124 feature_names=feature_names,
125 num_features=num_features,
--> 126 estimator_feature_names=estimator_feature_names,
127 )
128 return feature_names.handle_filter(feature_filter, feature_re)
~/dev/envs/env3.7/lib/python3.7/site-packages/eli5/sklearn/utils.py in get_feature_names(clf, vec, bias_name, feature_names, num_features, estimator_feature_names)
77 features are named x0, x1, x2, etc.
78 """
---> 79 if not has_intercept(clf):
80 bias_name = None
81
~/dev/envs/env3.7/lib/python3.7/site-packages/eli5/sklearn/utils.py in has_intercept(estimator)
60 if hasattr(estimator, 'fit_intercept'):
61 return estimator.fit_intercept
---> 62 if hasattr(estimator, 'intercept_'):
63 if estimator.intercept_ is None:
64 return False
~/dev/envs/env3.7/lib/python3.7/site-packages/xgboost/sklearn.py in intercept_(self)
743 .format(self.booster))
744 b = self.get_booster()
--> 745 return np.array(json.loads(b.get_dump(dump_format='json')[0])['bias'])
746
747
KeyError: 'bias'
Thank you!
I had the same issue and fixed it by specifying explicitly the argument booster when creating the estimator:
clf = XGBClassifier(booster='gbtree')
I’m new to the cvxpy package. I’m trying to use it to work through an example from the following blog:
https://towardsdatascience.com/integer-programming-in-python-1cbdfa240df2
Where we’re trying to optimize the combination of marketing channels sent to a customer.
There’s been some recent changes to the cvxpy package and I’m getting the error below when I try to run the sum_entries step, (which has in the latest version been changed to cvxpy.sum)
I think the problem is coming from the dimensions of “selection” and “TRANSFORMER” being incompatible, but I’m not familiar enough with the cvxpy package to know. Any tips are greatly appreciated.
Code:
test_probs.shape
(200, 8)
Code:
# selection = cvxpy.Bool(*test_probs.shape) # syntax changed in latest version
selection = cvxpy.Variable(*test_probs.shape, boolean=True)
# constraints
# Constant matrix that counts how many of each
# material we sent to each customer
TRANSFORMER = np.array([[1,0,0],
[0,1,0],
[0,0,1],
[1,1,0],
[1,0,1],
[0,1,1],
[1,1,1],
[0,0,0]])
# can't send customer more promotion than there is supply
# note: sum_entries changed to sum in latest cvxpy version
supply_constraint = cvxpy.sum(selection * TRANSFORMER, axis=0) <= supply
Error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-47-f2ebf41a00af> in <module>()
18 # note: sum_entries changed to sum in latest cvxpy version
19
---> 20 supply_constraint = cvxpy.sum(selection * TRANSFORMER, axis=0) <= supply
21
22
~/anaconda2/envs/py36/lib/python3.6/site-packages/cvxpy/expressions/expression.py in cast_op(self, other)
47 """
48 other = self.cast_to_const(other)
---> 49 return binary_op(self, other)
50 return cast_op
51
~/anaconda2/envs/py36/lib/python3.6/site-packages/cvxpy/expressions/expression.py in __mul__(self, other)
385 return cvxtypes.multiply_expr()(self, other)
386 elif self.is_constant() or other.is_constant():
--> 387 return cvxtypes.mul_expr()(self, other)
388 else:
389 warnings.warn("Forming a nonconvex expression.")
~/anaconda2/envs/py36/lib/python3.6/site-packages/cvxpy/atoms/affine/binary_operators.py in __init__(self, lh_exp, rh_exp)
41
42 def __init__(self, lh_exp, rh_exp):
---> 43 super(BinaryOperator, self).__init__(lh_exp, rh_exp)
44
45 def name(self):
~/anaconda2/envs/py36/lib/python3.6/site-packages/cvxpy/atoms/atom.py in __init__(self, *args)
42 self.args = [Atom.cast_to_const(arg) for arg in args]
43 self.validate_arguments()
---> 44 self._shape = self.shape_from_args()
45 if len(self._shape) > 2:
46 raise ValueError("Atoms must be at most 2D.")
~/anaconda2/envs/py36/lib/python3.6/site-packages/cvxpy/atoms/affine/binary_operators.py in shape_from_args(self)
107 """Returns the (row, col) shape of the expression.
108 """
--> 109 return u.shape.mul_shapes(self.args[0].shape, self.args[1].shape)
110
111 def is_atom_convex(self):
~/anaconda2/envs/py36/lib/python3.6/site-packages/cvxpy/utilities/shape.py in mul_shapes(lh_shape, rh_shape)
140 lh_old = lh_shape
141 rh_old = rh_shape
--> 142 lh_shape, rh_shape, shape = mul_shapes_promote(lh_shape, rh_shape)
143 if lh_shape != lh_old:
144 shape = shape[1:]
~/anaconda2/envs/py36/lib/python3.6/site-packages/cvxpy/utilities/shape.py in mul_shapes_promote(lh_shape, rh_shape)
107 if lh_mat_shape[1] != rh_mat_shape[0]:
108 raise ValueError("Incompatible dimensions %s %s" % (
--> 109 lh_shape, rh_shape))
110 if lh_shape[:-2] != rh_shape[:-2]:
111 raise ValueError("Incompatible dimensions %s %s" % (
ValueError: Incompatible dimensions (1, 200) (8, 3)
Update:
I tried changing the selection shape as suggested in the comment below.
code:
selection = cvxpy.Variable(test_probs.shape, boolean=True)
and now I get the new error when I run the supply_constraint part of the code below.
code:
# constraints
# Constant matrix that counts how many of each
# material we sent to each customer
TRANSFORMER = np.array([[1,0,0],
[0,1,0],
[0,0,1],
[1,1,0],
[1,0,1],
[0,1,1],
[1,1,1],
[0,0,0]])
# can't send customer more promotion than there is supply
# note: sum_entries changed to sum in latest cvxpy version
supply_constraint = cvxpy.sum(selection * TRANSFORMER, axis=0) <= supply
Error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-10-6eb7a55ea896> in <module>()
18 # note: sum_entries changed to sum in latest cvxpy version
19
---> 20 supply_constraint = cvxpy.sum(selection * TRANSFORMER, axis=0) <= supply
21
22
~/anaconda2/envs/py36/lib/python3.6/site-packages/cvxpy/expressions/expression.py in cast_op(self, other)
47 """
48 other = self.cast_to_const(other)
---> 49 return binary_op(self, other)
50 return cast_op
51
~/anaconda2/envs/py36/lib/python3.6/site-packages/cvxpy/expressions/expression.py in __le__(self, other)
482 """NonPos : Creates an inequality constraint.
483 """
--> 484 return NonPos(self - other)
485
486 def __lt__(self, other):
~/anaconda2/envs/py36/lib/python3.6/site-packages/cvxpy/expressions/expression.py in cast_op(self, other)
47 """
48 other = self.cast_to_const(other)
---> 49 return binary_op(self, other)
50 return cast_op
51
~/anaconda2/envs/py36/lib/python3.6/site-packages/cvxpy/expressions/expression.py in __sub__(self, other)
370 """Expression : The difference of two expressions.
371 """
--> 372 return self + -other
373
374 #_cast_other
~/anaconda2/envs/py36/lib/python3.6/site-packages/cvxpy/expressions/expression.py in cast_op(self, other)
47 """
48 other = self.cast_to_const(other)
---> 49 return binary_op(self, other)
50 return cast_op
51
~/anaconda2/envs/py36/lib/python3.6/site-packages/cvxpy/expressions/expression.py in __add__(self, other)
358 """Expression : Sum two expressions.
359 """
--> 360 return cvxtypes.add_expr()([self, other])
361
362 #_cast_other
~/anaconda2/envs/py36/lib/python3.6/site-packages/cvxpy/atoms/affine/add_expr.py in __init__(self, arg_groups)
34 # For efficiency group args as sums.
35 self._arg_groups = arg_groups
---> 36 super(AddExpression, self).__init__(*arg_groups)
37 self.args = []
38 for group in arg_groups:
~/anaconda2/envs/py36/lib/python3.6/site-packages/cvxpy/atoms/atom.py in __init__(self, *args)
42 self.args = [Atom.cast_to_const(arg) for arg in args]
43 self.validate_arguments()
---> 44 self._shape = self.shape_from_args()
45 if len(self._shape) > 2:
46 raise ValueError("Atoms must be at most 2D.")
~/anaconda2/envs/py36/lib/python3.6/site-packages/cvxpy/atoms/affine/add_expr.py in shape_from_args(self)
42 """Returns the (row, col) shape of the expression.
43 """
---> 44 return u.shape.sum_shapes([arg.shape for arg in self.args])
45
46 def expand_args(self, expr):
~/anaconda2/envs/py36/lib/python3.6/site-packages/cvxpy/utilities/shape.py in sum_shapes(shapes)
50 raise ValueError(
51 "Cannot broadcast dimensions " +
---> 52 len(shapes)*" %s" % tuple(shapes))
53
54 longer = shape if len(shape) >= len(t) else t
ValueError: Cannot broadcast dimensions (3,) (1, 3)
Your issue is happening when you create the selection variable. You are unpacking the shape tuple into multiple arguments. The first argument to Variable should be a shape. So the correct construction is:
selection = cvxpy.Variable(test_probs.shape, boolean=True)
You can verify this is correct by inspecting the shape attribute:
selection.shape
Which should now give:
(200, 8)
Here i want to predict the same values with time (regression neural network) using python. Here I have two outputs with three inputs. when I run the code it gives me an error "variance_scaling_initializer() got an unexpected keyword argument 'distribution'". Can you help me to solve the problem.?
Here I upload my code,
n_neurons_1 = 24
n_neurons_2 = 12
n_target = 2
softmax = 2
weight_initializer = tf.contrib.layers.variance_scaling_initializer(mode= "FAN_AVG", distribution ="uniform", scale = softmax)
bias_initializer = tf.zeros_initializer()
w_hidden_1 = tf.Variable(weight_initializer([n_time_dimensions,n_neurons_1]))
bias_hidden_1= tf.Variable(bias_initializer([n_neurons_1]))
w_hidden_2= tf.Variable(weight_initializer([n_neurons_1,n_neurons_2]))
bias_hidden_2 = tf.Variable(bias_initializer([n_neurons_2]))
w_out = tf.Variable(weight_initializer([n_neurons_2,2]))
bias_out = tf.Variable(bias_initializer([2]))
hidden_1 = tf.nn.relu(tf.add(tf.matmul(X, w_hidden_1),bias_hidden_1))
hidden_2 = tf.nn.relu(tf.add(tf.matmul(X, w_hidden_2),bias_hidden_2))
out = tf.transpose(tf.add(tf.matmul(hidden_2, w_out),bias_out))
My dataset is,
date time g p c apparentg
6/8/2018 0:06:15 141 131 136 141
6/8/2018 0:09:25 95 117 95 95
6/8/2018 0:11:00 149 109 139 149
6/8/2018 0:13:50 120 103 95 120
6/8/2018 0:16:20 135 97 105 135
6/8/2018 0:19:00 63 NaN 97 63
6/8/2018 0:20:00 111 NaN 100 111
6/8/2018 0:22:10 115 NaN 115 115
6/8/2018 0:23:40 287 NaN NaN 287
error is,
TypeError Traceback (most recent call last)
<ipython-input-26-9ceeb97429b1> in <module>()
31 n_target = 2
32 softmax = 2
---> 33 weight_initializer = tf.contrib.layers.variance_scaling_initializer(mode= "FAN_AVG", distribution ="uniform", scale = softmax)
34 bias_initializer = tf.zeros_initializer()
35 w_hidden_1 = tf.Variable(weight_initializer([n_time_dimensions,n_neurons_1]))
TypeError: variance_scaling_initializer() got an unexpected keyword argument 'distribution'
Looking into Documentation https://www.tensorflow.org/api_docs/python/tf/contrib/layers/variance_scaling_initializer
tf.contrib.layers.variance_scaling_initializer(
factor=2.0,
mode='FAN_IN',
uniform=False,
seed=None,
dtype=tf.float32
)
and
uniform: Whether to use uniform or normal distributed random initialization.
So try
uniform = True
instead of
distribution ="uniform"
in your function call
tf.contrib.layers.variance_scaling_initializer(mode= "FAN_AVG", distribution ="uniform", scale = softmax)
also there seems to be no scale= attribute in that function.
Working through the featuretools "predict_next_purchase" demo against my own data. I've created the entity set, and have also created a new pandas.dataframe comprised of the labels and times. I'm to the point of using ft.dfs for deep feature synthesis, and am getting a RuntimeError: maximum recursion depth exceeded. Below is the stack trace:
feature_matrix, features = ft.dfs(target_entity='projects',
cutoff_time=labels.reset_index().loc[:,['jobnumber','time']],
training_window=inst_defn['training_window'],
entityset=es,
verbose=True)
Stack Trace:
Building features: 0it [00:00, ?it/s]
RuntimeError: maximum recursion depth exceeded
RuntimeErrorTraceback (most recent call last)
<ipython-input-743-f05fc567dd1b> in <module>()
3 training_window=inst_defn['training_window'],
4 entityset=es,
----> 5 verbose=True)
/Users/nbernini/OneDrive/PSC/venv/ml20/lib/python2.7/site-packages/featuretools/synthesis/dfs.pyc in dfs(entities, relationships, entityset, target_entity, cutoff_time, instance_ids, agg_primitives, trans_primitives, allowed_paths, max_depth, ignore_entities, ignore_variables, seed_features, drop_contains, drop_exact, where_primitives, max_features, cutoff_time_in_index, save_progress, features_only, training_window, approximate, verbose)
164 seed_features=seed_features)
165
--> 166 features = dfs_object.build_features(verbose=verbose)
167
168 if features_only:
/Users/nbernini/OneDrive/PSC/venv/ml20/lib/python2.7/site-packages/featuretools/synthesis/deep_feature_synthesis.pyc in build_features(self, variable_types, verbose)
227 self.where_clauses = defaultdict(set)
228 self._run_dfs(self.es[self.target_entity_id], [],
--> 229 all_features, max_depth=self.max_depth)
230
231 new_features = list(all_features[self.target_entity_id].values())
/Users/nbernini/OneDrive/PSC/venv/ml20/lib/python2.7/site-packages/featuretools/synthesis/deep_feature_synthesis.pyc in _run_dfs(self, entity, entity_path, all_features, max_depth)
353 entity_path=list(entity_path),
354 all_features=all_features,
--> 355 max_depth=new_max_depth)
356
357 """
/Users/nbernini/OneDrive/PSC/venv/ml20/lib/python2.7/site-packages/featuretools/synthesis/deep_feature_synthesis.pyc in _run_dfs(self, entity, entity_path, all_features, max_depth)
338 if self._apply_traversal_filters(entity, self.es[b_id],
339 entity_path,
--> 340 forward=False) and
341 b_id not in self.ignore_entities]
342 for b_entity_id in backward_entities:
/Users/nbernini/OneDrive/PSC/venv/ml20/lib/python2.7/site-packages/featuretools/synthesis/deep_feature_synthesis.pyc in _apply_traversal_filters(self, parent_entity, child_entity, entity_path, forward)
429 child_entity=child_entity,
430 target_entity_id=self.target_entity_id,
--> 431 entity_path=entity_path, forward=forward):
432 return False
433
/Users/nbernini/OneDrive/PSC/venv/ml20/lib/python2.7/site-packages/featuretools/synthesis/dfs_filters.pyc in is_valid(self, feature, entity, target_entity_id, child_feature, child_entity, entity_path, forward, where)
53
54 if type(feature) != list:
---> 55 return func(*args)
56
57 else:
/Users/nbernini/OneDrive/PSC/venv/ml20/lib/python2.7/site-packages/featuretools/synthesis/dfs_filters.pyc in apply_filter(self, parent_entity, child_entity, target_entity_id, entity_path, forward)
76 if (parent_entity.id == target_entity_id or
77 es.find_backward_path(parent_entity.id,
---> 78 target_entity_id) is None):
79 return True
80 path = es.find_backward_path(parent_entity.id, child_entity.id)
/Users/nbernini/OneDrive/PSC/venv/ml20/lib/python2.7/site-packages/featuretools/entityset/base_entityset.pyc in find_backward_path(self, start_entity_id, goal_entity_id)
308 is returned if no path exists.
309 """
--> 310 forward_path = self.find_forward_path(goal_entity_id, start_entity_id)
311 if forward_path is not None:
312 return forward_path[::-1]
/Users/nbernini/OneDrive/PSC/venv/ml20/lib/python2.7/site-packages/featuretools/entityset/base_entityset.pyc in find_forward_path(self, start_entity_id, goal_entity_id)
287
288 for r in self.get_forward_relationships(start_entity_id):
--> 289 new_path = self.find_forward_path(r.parent_entity.id, goal_entity_id)
290 if new_path is not None:
291 return [r] + new_path
... last 1 frames repeated, from the frame below ...
/Users/nbernini/OneDrive/PSC/venv/ml20/lib/python2.7/site-packages/featuretools/entityset/base_entityset.pyc in find_forward_path(self, start_entity_id, goal_entity_id)
287
288 for r in self.get_forward_relationships(start_entity_id):
--> 289 new_path = self.find_forward_path(r.parent_entity.id, goal_entity_id)
290 if new_path is not None:
291 return [r] + new_path
RuntimeError: maximum recursion depth exceeded
The issue here is cyclical relationships in your entity set. Currently, Deep Feature Synthesis can only create features when there is one unique path between two entities. If you have an entity with a relationship to itself, you would also get this error.
A future release of Featuretools will offer better support for this use case.