Tensorflow: function.defun with a a while loop in the body is throwing shape error - python-3.x

I am using a while loop to calculate a cost function for memory reasons. When calculating the gradient, tensorflow will store Nm tensors where Nm is the number of iterations in my while loop (this cuases the same memory issues I had with the original energy functions). I do not want that as I don't have enough memory. So I want to register a new op along with a gradient function that both use a while loop. However I am having issues with using function.defun and a while loop. To simplify things, I have a small test example below:
import numpy as np
import tensorflow as tf
from tensorflow.python.framework import ops
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import sparse_ops
from tensorflow.python.framework import function
def _run(tensor):
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
res = sess.run(tensor)
return res
#function.Defun(tf.float32,tf.float32,func_name ='tf_test_log')#,grad_func=tf_test_logGrad)
def tf_test_log(t_x,t_y):
#N = t_x.shape[0].value
condition = lambda i,m1: i<N
def body(index,x):
#return[(index+1),tf.concat([x, tf.expand_dims(tf.exp( tf.add( t_x[:,index],t_y[:,index]) ),1) ],1 ) ]
return[(index+1),tf.add(x, tf.exp( tf.add( t_x[:,0],t_y[:,0]) ) ) ]
i0 = tf.constant(0,dtype=tf.int32)
m0 = tf.zeros([N,1],dType)
ijk_0 = [i0,m0]
L,t_log_x = tf.while_loop(condition,body,ijk_0,
shape_invariants=[i0.get_shape(),
tf.TensorShape([N,None])]
)
return t_log_x
dType = tf.float32
N = np.int32(100)
t_N = tf.constant(N,dtype = tf.int32)
t_x = tf.constant(np.random.randn(N,N),dtype = dType)
t_y = tf.constant(np.random.randn(N,N),dtype = dType)
ys = _run(tf_test_log(t_x,t_y))
I then try to test the new op:
I get a Value error: The shape for while/Merge_1:0 is not an invariant for the loop. It enters the loop with shape (100, ?), but has shape after one iteration. Provide shape invariants using either the shape_invariants argument of tf.while_loop or set_shape() on the loop variables.
Note that calling
If i use a concatenate operation (instead of the add operation that gets returned by my while loop), I do not get any issues.
However, If I do not set N as a global variable (i.e. I do N = t_x.shape[0]) inside the body of the tf_test_log function, I get a Value error.
ValueError: Cannot convert a partially known TensorShape to a Tensor: (?, 1)
What is wrong with my code? Any help is greatly appreciated!
I am using python 3.5 on ubuntu 16.04 and tensorflow 1.4
full output:
ValueError Traceback (most recent call last)
~/Documents/TheEffingPhDHatersGonnaHate/PAM/defun_while.py in <module>()
51 t_x = tf.constant(np.random.randn(N,N),dtype = dType)
52 t_y = tf.constant(np.random.randn(N,N),dtype = dType)
---> 53 ys = _run(tf_test_log(t_x,t_y))
54
55
~/environments/tf_1_4_gpu/lib/python3.5/site-packages/tensorflow/python/framework/function.py in __call__(self, *args, **kwargs)
503
504 def __call__(self, *args, **kwargs):
--> 505 self.add_to_graph(ops.get_default_graph())
506 args = [ops.convert_to_tensor(_) for _ in args] + self._extra_inputs
507 ret, op = _call(self._signature, *args, **kwargs)
~/environments/tf_1_4_gpu/lib/python3.5/site-packages/tensorflow/python/framework/function.py in add_to_graph(self, g)
484 def add_to_graph(self, g):
485 """Adds this function into the graph g."""
--> 486 self._create_definition_if_needed()
487
488 # Adds this function into 'g'.
~/environments/tf_1_4_gpu/lib/python3.5/site-packages/tensorflow/python/framework/function.py in _create_definition_if_needed(self)
319 """Creates the function definition if it's not created yet."""
320 with context.graph_mode():
--> 321 self._create_definition_if_needed_impl()
322
323 def _create_definition_if_needed_impl(self):
~/environments/tf_1_4_gpu/lib/python3.5/site-packages/tensorflow/python/framework/function.py in _create_definition_if_needed_impl(self)
336 # Call func and gather the output tensors.
337 with vs.variable_scope("", custom_getter=temp_graph.getvar):
--> 338 outputs = self._func(*inputs)
339
340 # There is no way of distinguishing between a function not returning
~/Documents/TheEffingPhDHatersGonnaHate/PAM/defun_while.py in tf_test_log(t_x, t_y)
39 L,t_log_x = tf.while_loop(condition,body,ijk_0,
40 shape_invariants=[i0.get_shape(),
---> 41 tf.TensorShape([N,None])]
42 )
43 return t_log_x
~/environments/tf_1_4_gpu/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py in while_loop(cond, body, loop_vars, shape_invariants, parallel_iterations, back_prop, swap_memory, name)
2814 loop_context = WhileContext(parallel_iterations, back_prop, swap_memory) # pylint: disable=redefined-outer-name
2815 ops.add_to_collection(ops.GraphKeys.WHILE_CONTEXT, loop_context)
-> 2816 result = loop_context.BuildLoop(cond, body, loop_vars, shape_invariants)
2817 return result
2818
~/environments/tf_1_4_gpu/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py in BuildLoop(self, pred, body, loop_vars, shape_invariants)
2638 self.Enter()
2639 original_body_result, exit_vars = self._BuildLoop(
-> 2640 pred, body, original_loop_vars, loop_vars, shape_invariants)
2641 finally:
2642 self.Exit()
~/environments/tf_1_4_gpu/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py in _BuildLoop(self, pred, body, original_loop_vars, loop_vars, shape_invariants)
2619 for m_var, n_var in zip(merge_vars, next_vars):
2620 if isinstance(m_var, ops.Tensor):
-> 2621 _EnforceShapeInvariant(m_var, n_var)
2622
2623 # Exit the loop.
~/environments/tf_1_4_gpu/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py in _EnforceShapeInvariant(merge_var, next_var)
576 "Provide shape invariants using either the `shape_invariants` "
577 "argument of tf.while_loop or set_shape() on the loop variables."
--> 578 % (merge_var.name, m_shape, n_shape))
579 else:
580 if not isinstance(var, (ops.IndexedSlices, sparse_tensor.SparseTensor)):
ValueError: The shape for while/Merge_1:0 is not an invariant for the loop. It enters the loop with shape (100, ?), but has shape <unknown> after one iteration. Provide shape invariants using either the `shape_invariants` argument of tf.while_loop or set_shape() on the loop variables.

Thanks #Alexandre Passos for the suggestion in the comment above!
The following piece of code is a modification of the original with a set_shape function added inside the body.
import numpy as np
import tensorflow as tf
from tensorflow.python.framework import ops
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import sparse_ops
from tensorflow.python.framework import function
def _run(tensor):
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
res = sess.run(tensor)
return res
#function.Defun(tf.float32,tf.float32,tf.float32,func_name ='tf_test_logGrad')
def tf_test_logGrad(t_x,t_y,grad):
return grad
#function.Defun(tf.float32,tf.float32,func_name ='tf_test_log')#,grad_func=tf_test_logGrad)
def tf_test_log(t_x,t_y):
#N = t_x.shape[0].value
condition = lambda i,m1: i<N
def body(index,x):
#return[(index+1),tf.concat([x, tf.expand_dims(tf.exp( tf.add( t_x[:,index],t_y[:,index]) ),1) ],1 ) ]
x = tf.add(x, tf.exp( tf.add( t_x[:,0],t_y[:,0]) ) )
x.set_shape([N])
return[(index+1), x]
i0 = tf.constant(0,dtype=tf.int32)
m0 = tf.zeros([N],dType)
ijk_0 = [i0,m0]
L,t_log_x = tf.while_loop(condition,body,ijk_0,
shape_invariants=[i0.get_shape(),
tf.TensorShape([N])]
)
return t_log_x
dType = tf.float32
N = np.int32(100)
t_N = tf.constant(N,dtype = tf.int32)
t_x = tf.constant(np.random.randn(N,N),dtype = dType)
t_y = tf.constant(np.random.randn(N,N),dtype = dType)
ys = _run(tf_test_log(t_x,t_y))
The Issue of global N still persists.
You still need to set the shape of the loop tensors as a global variable outside of the defun decorator. If you try to get it from the shape of the inputs of the defun decorator, you get:
TypeError Traceback (most recent call last)
~/environments/tf_1_4_gpu/lib/python3.5/site-packages/tensorflow/python/ops/array_ops.py in zeros(shape, dtype, name)
1438 shape = tensor_shape.as_shape(shape)
-> 1439 output = constant(zero, shape=shape, dtype=dtype, name=name)
1440 except (TypeError, ValueError):
~/environments/tf_1_4_gpu/lib/python3.5/site-packages/tensorflow/python/framework/constant_op.py in constant(value, dtype, shape, name, verify_shape)
207 tensor_util.make_tensor_proto(
--> 208 value, dtype=dtype, shape=shape, verify_shape=verify_shape))
209 dtype_value = attr_value_pb2.AttrValue(type=tensor_value.tensor.dtype)
~/environments/tf_1_4_gpu/lib/python3.5/site-packages/tensorflow/python/framework/tensor_util.py in make_tensor_proto(values, dtype, shape, verify_shape)
379 # exception when dtype is set to np.int64
--> 380 if shape is not None and np.prod(shape, dtype=np.int64) == 0:
381 nparray = np.empty(shape, dtype=np_dt)
~/environments/tf_1_4_gpu/lib/python3.5/site-packages/numpy/core/fromnumeric.py in prod(a, axis, dtype, out, keepdims)
2517 return _methods._prod(a, axis=axis, dtype=dtype,
-> 2518 out=out, **kwargs)
2519
~/environments/tf_1_4_gpu/lib/python3.5/site-packages/numpy/core/_methods.py in _prod(a, axis, dtype, out, keepdims)
34 def _prod(a, axis=None, dtype=None, out=None, keepdims=False):
---> 35 return umr_prod(a, axis, dtype, out, keepdims)
36
TypeError: __int__ returned non-int (type NoneType)
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
~/Documents/TheEffingPhDHatersGonnaHate/PAM/defun_while.py in <module>()
52 t_x = tf.constant(np.random.randn(N,N),dtype = dType)
53 t_y = tf.constant(np.random.randn(N,N),dtype = dType)
---> 54 ys = _run(tf_test_log(t_x,t_y))
55
56
~/environments/tf_1_4_gpu/lib/python3.5/site-packages/tensorflow/python/framework/function.py in __call__(self, *args, **kwargs)
503
504 def __call__(self, *args, **kwargs):
--> 505 self.add_to_graph(ops.get_default_graph())
506 args = [ops.convert_to_tensor(_) for _ in args] + self._extra_inputs
507 ret, op = _call(self._signature, *args, **kwargs)
~/environments/tf_1_4_gpu/lib/python3.5/site-packages/tensorflow/python/framework/function.py in add_to_graph(self, g)
484 def add_to_graph(self, g):
485 """Adds this function into the graph g."""
--> 486 self._create_definition_if_needed()
487
488 # Adds this function into 'g'.
~/environments/tf_1_4_gpu/lib/python3.5/site-packages/tensorflow/python/framework/function.py in _create_definition_if_needed(self)
319 """Creates the function definition if it's not created yet."""
320 with context.graph_mode():
--> 321 self._create_definition_if_needed_impl()
322
323 def _create_definition_if_needed_impl(self):
~/environments/tf_1_4_gpu/lib/python3.5/site-packages/tensorflow/python/framework/function.py in _create_definition_if_needed_impl(self)
336 # Call func and gather the output tensors.
337 with vs.variable_scope("", custom_getter=temp_graph.getvar):
--> 338 outputs = self._func(*inputs)
339
340 # There is no way of distinguishing between a function not returning
~/Documents/TheEffingPhDHatersGonnaHate/PAM/defun_while.py in tf_test_log(t_x, t_y)
33
34 i0 = tf.constant(0,dtype=tf.int32)
---> 35 m0 = tf.zeros([N],dType)
36
37
~/environments/tf_1_4_gpu/lib/python3.5/site-packages/tensorflow/python/ops/array_ops.py in zeros(shape, dtype, name)
1439 output = constant(zero, shape=shape, dtype=dtype, name=name)
1440 except (TypeError, ValueError):
-> 1441 shape = ops.convert_to_tensor(shape, dtype=dtypes.int32, name="shape")
1442 output = fill(shape, constant(zero, dtype=dtype), name=name)
1443 assert output.dtype.base_dtype == dtype
~/environments/tf_1_4_gpu/lib/python3.5/site-packages/tensorflow/python/framework/ops.py in convert_to_tensor(value, dtype, name, preferred_dtype)
834 name=name,
835 preferred_dtype=preferred_dtype,
--> 836 as_ref=False)
837
838
~/environments/tf_1_4_gpu/lib/python3.5/site-packages/tensorflow/python/framework/ops.py in internal_convert_to_tensor(value, dtype, name, as_ref, preferred_dtype, ctx)
924
925 if ret is None:
--> 926 ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
927
928 if ret is NotImplemented:
~/environments/tf_1_4_gpu/lib/python3.5/site-packages/tensorflow/python/framework/constant_op.py in _tensor_shape_tensor_conversion_function(s, dtype, name, as_ref)
248 if not s.is_fully_defined():
249 raise ValueError(
--> 250 "Cannot convert a partially known TensorShape to a Tensor: %s" % s)
251 s_list = s.as_list()
252 int64_value = 0
ValueError: Cannot convert a partially known TensorShape to a Tensor: (?,)

Related

fitting ARIMA model including exogeneos predictor getting DLASCL error

I have two pandas dataframes. the dataframes are named stationary_train and test_exog. I have the shape of the dataframes below. I also have some sample data from the dataframes. I'm trying to fit an arima model to the data using ARIMA from statsmodel. I want to forecast the endogeneous variable "stationary_train" using exogeneous variable "test_exog". I'm using the code below. I'm getting the error below, I'm unclear why. the two dataframes are the same shape, and I don't see any missing values in either dataframe. can anyone see what the issue is and suggest how to fix it?
data:
test_exog[:-1].shape
(203, 1)
stationary_train.shape
(203, 1)
print(exog_auto_model.order)
(11, 0, 6)
print(test_exog[:-1].head())
exog_passengers
month
2000-01-01 46513.9
2000-02-01 48555.7
2000-03-01 58812.4
2000-04-01 56101.1
2000-05-01 58237.4
print(stationary_train.head())
passengers
month
2000-02-01 2034.0
2000-03-01 10238.0
2000-04-01 -2731.0
2000-05-01 2168.0
2000-06-01 2872.0
code:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# statmodels
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.arima_model import ARMA, ARIMA
# datetime
from datetime import datetime
ARIMA(endog=stationary_train.values.reshape(-1,1),
exog=test_exog[:-1],
order=exog_auto_model.order).fit()
error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-90-ae6d07b32cf7> in <module>
1 exog_predict= ARIMA(endog=stationary_train.values.reshape(-1,1),
2 exog=test_exog[:-1],
----> 3 order=exog_auto_model.order).fit()
~/anaconda3/envs/arima_forecast/lib/python3.6/site-packages/statsmodels/tsa/arima_model.py in fit(self, start_params, trend, method, transparams, solver, maxiter, full_output, disp, callback, start_ar_lags, **kwargs)
1028 maxiter=maxiter,
1029 full_output=full_output, disp=disp,
-> 1030 callback=callback, **kwargs)
1031 params = mlefit.params
1032
~/anaconda3/envs/arima_forecast/lib/python3.6/site-packages/statsmodels/base/model.py in fit(self, start_params, method, maxiter, full_output, disp, fargs, callback, retall, skip_hessian, **kwargs)
525 callback=callback,
526 retall=retall,
--> 527 full_output=full_output)
528
529 # NOTE: this is for fit_regularized and should be generalized
~/anaconda3/envs/arima_forecast/lib/python3.6/site-packages/statsmodels/base/optimizer.py in _fit(self, objective, gradient, start_params, fargs, kwargs, hessian, method, maxiter, full_output, disp, callback, retall)
225 disp=disp, maxiter=maxiter, callback=callback,
226 retall=retall, full_output=full_output,
--> 227 hess=hessian)
228
229 optim_settings = {'optimizer': method, 'start_params': start_params,
~/anaconda3/envs/arima_forecast/lib/python3.6/site-packages/statsmodels/base/optimizer.py in _fit_lbfgs(f, score, start_params, fargs, kwargs, disp, maxiter, callback, retall, full_output, hess)
630 callback=callback, args=fargs,
631 bounds=bounds, disp=disp,
--> 632 **extra_kwargs)
633
634 if full_output:
~/anaconda3/envs/arima_forecast/lib/python3.6/site-packages/scipy/optimize/lbfgsb.py in fmin_l_bfgs_b(func, x0, fprime, args, approx_grad, bounds, m, factr, pgtol, epsilon, iprint, maxfun, maxiter, disp, callback, maxls)
196
197 res = _minimize_lbfgsb(fun, x0, args=args, jac=jac, bounds=bounds,
--> 198 **opts)
199 d = {'grad': res['jac'],
200 'task': res['message'],
~/anaconda3/envs/arima_forecast/lib/python3.6/site-packages/scipy/optimize/lbfgsb.py in _minimize_lbfgsb(fun, x0, args, jac, bounds, disp, maxcor, ftol, gtol, eps, maxfun, maxiter, iprint, callback, maxls, finite_diff_rel_step, **unknown_options)
306 sf = _prepare_scalar_function(fun, x0, jac=jac, args=args, epsilon=eps,
307 bounds=new_bounds,
--> 308 finite_diff_rel_step=finite_diff_rel_step)
309
310 func_and_grad = sf.fun_and_grad
~/anaconda3/envs/arima_forecast/lib/python3.6/site-packages/scipy/optimize/optimize.py in _prepare_scalar_function(fun, x0, jac, args, bounds, epsilon, finite_diff_rel_step, hess)
260 # calculation reduces overall function evaluations.
261 sf = ScalarFunction(fun, x0, args, grad, hess,
--> 262 finite_diff_rel_step, bounds, epsilon=epsilon)
263
264 return sf
~/anaconda3/envs/arima_forecast/lib/python3.6/site-packages/scipy/optimize/_differentiable_functions.py in __init__(self, fun, x0, args, grad, hess, finite_diff_rel_step, finite_diff_bounds, epsilon)
74
75 self._update_fun_impl = update_fun
---> 76 self._update_fun()
77
78 # Gradient evaluation
~/anaconda3/envs/arima_forecast/lib/python3.6/site-packages/scipy/optimize/_differentiable_functions.py in _update_fun(self)
164 def _update_fun(self):
165 if not self.f_updated:
--> 166 self._update_fun_impl()
167 self.f_updated = True
168
~/anaconda3/envs/arima_forecast/lib/python3.6/site-packages/scipy/optimize/_differentiable_functions.py in update_fun()
71
72 def update_fun():
---> 73 self.f = fun_wrapped(self.x)
74
75 self._update_fun_impl = update_fun
~/anaconda3/envs/arima_forecast/lib/python3.6/site-packages/scipy/optimize/_differentiable_functions.py in fun_wrapped(x)
68 def fun_wrapped(x):
69 self.nfev += 1
---> 70 return fun(x, *args)
71
72 def update_fun():
~/anaconda3/envs/arima_forecast/lib/python3.6/site-packages/statsmodels/base/model.py in f(params, *args)
499
500 def f(params, *args):
--> 501 return -self.loglike(params, *args) / nobs
502
503 if method == 'newton':
~/anaconda3/envs/arima_forecast/lib/python3.6/site-packages/statsmodels/tsa/arima_model.py in loglike(self, params, set_sigma2)
839 method = self.method
840 if method in ['mle', 'css-mle']:
--> 841 return self.loglike_kalman(params, set_sigma2)
842 elif method == 'css':
843 return self.loglike_css(params, set_sigma2)
~/anaconda3/envs/arima_forecast/lib/python3.6/site-packages/statsmodels/tsa/arima_model.py in loglike_kalman(self, params, set_sigma2)
849 Compute exact loglikelihood for ARMA(p,q) model by the Kalman Filter.
850 """
--> 851 return KalmanFilter.loglike(params, self, set_sigma2)
852
853 def loglike_css(self, params, set_sigma2=True):
~/anaconda3/envs/arima_forecast/lib/python3.6/site-packages/statsmodels/tsa/kalmanf/kalmanfilter.py in loglike(cls, params, arma_model, set_sigma2)
218 loglike, sigma2 = kalman_loglike.kalman_loglike_double(
219 y, k, k_ar, k_ma, k_lags, int(nobs),
--> 220 Z_mat, R_mat, T_mat)
221 elif np.issubdtype(paramsdtype, np.complex128):
222 loglike, sigma2 = kalman_loglike.kalman_loglike_complex(
statsmodels/tsa/kalmanf/kalman_loglike.pyx in statsmodels.tsa.kalmanf.kalman_loglike.kalman_loglike_double()
statsmodels/tsa/kalmanf/kalman_loglike.pyx in statsmodels.tsa.kalmanf.kalman_loglike.kalman_filter_double()
<__array_function__ internals> in pinv(*args, **kwargs)
~/anaconda3/envs/arima_forecast/lib/python3.6/site-packages/numpy/linalg/linalg.py in pinv(a, rcond, hermitian)
2001 return wrap(res)
2002 a = a.conjugate()
-> 2003 u, s, vt = svd(a, full_matrices=False, hermitian=hermitian)
2004
2005 # discard small singular values
<__array_function__ internals> in svd(*args, **kwargs)
~/anaconda3/envs/arima_forecast/lib/python3.6/site-packages/numpy/linalg/linalg.py in svd(a, full_matrices, compute_uv, hermitian)
1659
1660 signature = 'D->DdD' if isComplexType(t) else 'd->ddd'
-> 1661 u, s, vh = gufunc(a, signature=signature, extobj=extobj)
1662 u = u.astype(result_t, copy=False)
1663 s = s.astype(_realType(result_t), copy=False)
ValueError: On entry to DLASCL parameter number 4 had an illegal value

How to perform Min Max Scaler on an array which contains columns with string and numbers?

please i really need your help, i'm struggling with MinMaxScaler, i would like to apply this technique on the array below that contains columns with string and numbers. I only want to implement this technique on the columns that contains numbers.
clean_tweets_no_urls = pd.DataFrame(counts_no_urls.most_common(15),
columns=['words', 'count'])
clean_tweets_no_urls.head()
That's my array
minmax_scaling(clean_tweets_no_urls, columns=['words', 'count'])
For that, i'm getting this result :
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-108-eeb7b44d7121> in <module>
----> 1 minmax_scaling(clean_tweets_no_urls, columns=['words', 'count'])
C:\ProgramData\Anaconda3\lib\site-packages\mlxtend\preprocessing\scaling.py in minmax_scaling(array, columns, min_val, max_val)
36
37 """
---> 38 ary_new = array.astype(float)
39 if len(ary_new.shape) == 1:
40 ary_new = ary_new[:, np.newaxis]
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\generic.py in astype(self, dtype, copy, errors)
5696 else:
5697 # else, only a single dtype is given
-> 5698 new_data = self._data.astype(dtype=dtype, copy=copy, errors=errors)
5699 return self._constructor(new_data).__finalize__(self)
5700
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\internals\managers.py in astype(self, dtype, copy, errors)
580
581 def astype(self, dtype, copy: bool = False, errors: str = "raise"):
--> 582 return self.apply("astype", dtype=dtype, copy=copy, errors=errors)
583
584 def convert(self, **kwargs):
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\internals\managers.py in apply(self, f, filter, **kwargs)
440 applied = b.apply(f, **kwargs)
441 else:
--> 442 applied = getattr(b, f)(**kwargs)
443 result_blocks = _extend_blocks(applied, result_blocks)
444
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\internals\blocks.py in astype(self, dtype, copy, errors)
623 vals1d = values.ravel()
624 try:
--> 625 values = astype_nansafe(vals1d, dtype, copy=True)
626 except (ValueError, TypeError):
627 # e.g. astype_nansafe can fail on object-dtype of strings
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\dtypes\cast.py in astype_nansafe(arr, dtype, copy, skipna)
895 if copy or is_object_dtype(arr) or is_object_dtype(dtype):
896 # Explicit copy, or required since NumPy can't view from / to object.
--> 897 return arr.astype(dtype, copy=True)
898
899 return arr.view(dtype)
ValueError: could not convert string to float: 'joebiden'
from sklearn.preprocessing import minmax_scale
clean_tweets_no_urls[['count']] = minmax_scale.fit_transform(clean_tweets_no_urls[['count']])
This may be used to automate finding numeric columns.

how to fix ImportError: DLL load failed

I am running Python 3.8 on Windows 10 Pro. I am attempting to fit a multinomial fixed effects model to some data. All the packages appear to load fine, the data loads and can be manipulated and viewed. But I get an error when I call a module that my collaborator wrote to fit the model (called contactai).
Here is the code for contactai:
Note: tt stands for theano.tensor and pm stands for pymc3
def fixed_effects_model(X, y):
Parameters
X : array contains covariates
Design matrix n x p
y : array or shared theano variable
Multinomial response variable, n x 3
Returns
pymc3 model object containing fitted model
yarray = y.eval()
else:
yarray = y
if type(X) == tt.sharedvar.TensorSharedVariable:
Xmat = X.eval()
else:
Xmat = X
model = pm.Model()
with model:
# Transformed coefficients
betaD = pm.Normal('betad', mu=0, sd=5, shape=(Xmat.shape[1], 1))
betaI = pm.Normal('betai', mu=0, sd=5, shape=(Xmat.shape[1], 1))
# Softmax transformation with no contact as baseline
etad = tt.exp(tt.dot(X, betaD))
etai = tt.exp(tt.dot(X, betaI))
p_d = etad / (1 + etad + etai)
p_i = etai / (1 + etad + etai)
p_n = 1 - p_d - p_i
ps = tt.stack([p_n, p_d, p_i], axis=1).reshape((len(yarray), 3)) # Stack column-wise
Y_obs = pm.Multinomial("Y_obs", n=1, p=ps, observed=y)
return(model)
When I call contaciai.py in my jupyter notebook using
test_mod = contactai.fixed_effects_model(Xmat1, Yres.values)
I get this error message:
ImportError Traceback (most recent call last)
<ipython-input-6-275ca67d4b63> in <module>
1 # Run Fixed Effects model
----> 2 test_mod = contactai.fixed_effects_model(Xmat1, Yres.values)
3 niter = 1000
4
5 with test_mod:
~\dropbox\Brooke\ai_multinomial_model\contactai.py in fixed_effects_model(X, y)
50 # Transformed coefficients, what are D and I? In other examples this is where priors are
51 #specified.
---> 52 betaD = pm.Normal('betad', mu=0, sd=5, shape=(Xmat.shape[1], 1))
53 betaI = pm.Normal('betai', mu=0, sd=5, shape=(Xmat.shape[1], 1))
54 # First argument is the name of the random variable
c:\users\webblab\appdata\local\programs\python\python38-32\lib\site-packages\pymc3\distributions\distribution.py in __new__(cls, name, *args, **kwargs)
44 raise TypeError("observed needs to be data but got: {}".format(type(data)))
45 total_size = kwargs.pop('total_size', None)
---> 46 dist = cls.dist(*args, **kwargs)
47 return model.Var(name, dist, data, total_size)
48 else:
c:\users\webblab\appdata\local\programs\python\python38-32\lib\site-packages\pymc3\distributions\distribution.py in dist(cls, *args, **kwargs)
55 def dist(cls, *args, **kwargs):
56 dist = object.__new__(cls)
---> 57 dist.__init__(*args, **kwargs)
58 return dist
59
c:\users\webblab\appdata\local\programs\python\python38-32\lib\site-packages\pymc3\distributions\continuous.py in __init__(self, mu, sigma, tau, sd, **kwargs)
468
469 self.mean = self.median = self.mode = self.mu = mu = tt.as_tensor_variable(floatX(mu))
--> 470 self.variance = 1. / self.tau
471
472 assert_negative_support(sigma, 'sigma', 'Normal')
c:\users\webblab\appdata\local\programs\python\python38-32\lib\site-packages\theano\tensor\var.py in __rtruediv__(self, other)
204
205 def __rtruediv__(self, other):
--> 206 return theano.tensor.basic.true_div(other, self)
207
208 def __rfloordiv__(self, other):
c:\users\webblab\appdata\local\programs\python\python38-32\lib\site-packages\theano\gof\op.py in __call__(self, *inputs, **kwargs)
667
668 # compute output value once with test inputs to validate graph
--> 669 thunk = node.op.make_thunk(node, storage_map, compute_map,
670 no_recycling=[])
671 thunk.inputs = [storage_map[v] for v in node.inputs]
c:\users\webblab\appdata\local\programs\python\python38-32\lib\site-packages\theano\gof\op.py in make_thunk(self, node, storage_map, compute_map, no_recycling, impl)
952 compute_map=compute_map, impl='c')
953 try:
--> 954 return self.make_c_thunk(node, storage_map, compute_map,
955 no_recycling)
956 except (NotImplementedError, utils.MethodNotDefined):
c:\users\webblab\appdata\local\programs\python\python38-32\lib\site-packages\theano\gof\op.py in make_c_thunk(self, node, storage_map, compute_map, no_recycling)
855 raise NotImplementedError("float16")
856 _logger.debug('Trying CLinker.make_thunk')
--> 857 outputs = cl.make_thunk(input_storage=node_input_storage,
858 output_storage=node_output_storage)
859 thunk, node_input_filters, node_output_filters = outputs
c:\users\webblab\appdata\local\programs\python\python38-32\lib\site-packages\theano\gof\cc.py in make_thunk(self, input_storage, output_storage, storage_map, keep_lock)
1213 """
1214 init_tasks, tasks = self.get_init_tasks()
-> 1215 cthunk, module, in_storage, out_storage, error_storage = self.__compile__(
1216 input_storage, output_storage, storage_map,
1217 keep_lock=keep_lock)
c:\users\webblab\appdata\local\programs\python\python38-32\lib\site-packages\theano\gof\cc.py in __compile__(self, input_storage, output_storage, storage_map, keep_lock)
1151 input_storage = tuple(input_storage)
1152 output_storage = tuple(output_storage)
-> 1153 thunk, module = self.cthunk_factory(error_storage,
1154 input_storage,
1155 output_storage,
c:\users\webblab\appdata\local\programs\python\python38-32\lib\site-packages\theano\gof\cc.py in cthunk_factory(self, error_storage, in_storage, out_storage, storage_map, keep_lock)
1621 for node in self.node_order:
1622 node.op.prepare_node(node, storage_map, None, 'c')
-> 1623 module = get_module_cache().module_from_key(
1624 key=key, lnk=self, keep_lock=keep_lock)
1625
c:\users\webblab\appdata\local\programs\python\python38-32\lib\site-packages\theano\gof\cmodule.py in module_from_key(self, key, lnk, keep_lock)
1187 try:
1188 location = dlimport_workdir(self.dirname)
-> 1189 module = lnk.compile_cmodule(location)
1190 name = module.__file__
1191 assert name.startswith(location)
c:\users\webblab\appdata\local\programs\python\python38-32\lib\site-packages\theano\gof\cc.py in compile_cmodule(self, location)
1518 try:
1519 _logger.debug("LOCATION %s", str(location))
-> 1520 module = c_compiler.compile_str(
1521 module_name=mod.code_hash,
1522 src_code=src_code,
c:\users\webblab\appdata\local\programs\python\python38-32\lib\site-packages\theano\gof\cmodule.py in compile_str(module_name, src_code, location, include_dirs, lib_dirs, libs, preargs, py_module, hide_symbols)
2418 open(os.path.join(location, "__init__.py"), 'w').close()
2419 assert os.path.isfile(lib_filename)
-> 2420 return dlimport(lib_filename)
2421
2422
c:\users\webblab\appdata\local\programs\python\python38-32\lib\site-packages\theano\gof\cmodule.py in dlimport(fullpath, suffix)
315 warnings.filterwarnings("ignore",
316 message="numpy.ndarray size changed")
--> 317 rval = __import__(module_name, {}, {}, [module_name])
318 t1 = time.time()
319 import_time += t1 - t0
ImportError: DLL load failed while importing m54e1ce4760689129b813fce15fee7f8d468afbe167bc036205eea6bd301309d7: The specified module could not be found.
My collaborator does not get this error when running identical code on their macbook so the issue is not with the code. I installed all packages with pip. Any help would be greatly appreciated, this has brought me to a standstill on this project.

pymc3 / theano error when using power function

I am trying to recreate this example of bayesian PK/PD modelling using pymc3.....
The video shows the WinBUGS code and I am trying to convert to pymc3
https://www.youtube.com/watch?v=AQDXRoBan6Y
model here....
http://imgur.com/ckoKPRF
WinBUGS code is here ....
http://imgur.com/TsViyBC
My code is ....
from pymc3 import Model, Normal, Lognormal, Uniform
import numpy as np
import pandas as pd
data = pd.read_csv('/Users/Home/Documents/pymc3/fxa.data.csv' )
cobs = np.array(data['cobs'])
fxa = np.array(data['fxa.inh.obs'])
pkpd_model = Model()
with pkpd_model:
# Priors for unknown model parameters
emax = Uniform ('emax', lower =0, upper =100)
ec50 = Lognormal('ec50', mu=0, tau = 100000)
gamma = Uniform('gamma', lower=0, upper =10)
sigma = Uniform('sigma', lower = 0, upper = 1000 )
# Expected value of outcome
fxaMean = emax*(np.power(cobs, gamma)) / (np.power(ec50, gamma) + np.power(cobs, gamma))
# Likelihood (sampling distribution) of observations
fxa = Normal('fxa', mu=fxaMean, sd=sigma, observed=fxa )
But when I run the code I get the following error, which seems to relate to the way theano is interpreting the np.power function.
I am not sure how to proceed as I am a noob to pymc3 and theano and PK/PD modelling too!
Thanks in advance
Applied interval-transform to emax and added transformed emax_interval to model.
Applied log-transform to ec50 and added transformed ec50_log to model.
Applied interval-transform to gamma and added transformed gamma_interval to model.
Applied interval-transform to sigma and added transformed sigma_interval to model.
---------------------------------------------------------------------------
AsTensorError Traceback (most recent call last)
<ipython-input-28-1fa311a15ed0> in <module>()
14
15 # Likelihood (sampling distribution) of observations
---> 16 fxa = Normal('fxa', mu=fxaMean, sd=sigma, observed=fxa )
//anaconda/lib/python2.7/site-packages/pymc3/distributions/distribution.pyc in __new__(cls, name, *args, **kwargs)
23 data = kwargs.pop('observed', None)
24 dist = cls.dist(*args, **kwargs)
---> 25 return model.Var(name, dist, data)
26 elif name is None:
27 return object.__new__(cls) # for pickle
//anaconda/lib/python2.7/site-packages/pymc3/model.pyc in Var(self, name, dist, data)
282 self.named_vars[v.name] = v
283 else:
--> 284 var = ObservedRV(name=name, data=data, distribution=dist, model=self)
285 self.observed_RVs.append(var)
286 if var.missing_values:
//anaconda/lib/python2.7/site-packages/pymc3/model.pyc in __init__(self, type, owner, index, name, data, distribution, model)
556 self.missing_values = data.missing_values
557
--> 558 self.logp_elemwiset = distribution.logp(data)
559 self.model = model
560 self.distribution = distribution
//anaconda/lib/python2.7/site-packages/pymc3/distributions/continuous.pyc in logp(self, value)
191 sd = self.sd
192 mu = self.mu
--> 193 return bound((-tau * (value - mu)**2 + T.log(tau / np.pi / 2.)) / 2.,
194 tau > 0, sd > 0)
195
//anaconda/lib/python2.7/site-packages/theano/tensor/var.pyc in __radd__(self, other)
232 # ARITHMETIC - RIGHT-OPERAND
233 def __radd__(self, other):
--> 234 return theano.tensor.basic.add(other, self)
235
236 def __rsub__(self, other):
//anaconda/lib/python2.7/site-packages/theano/gof/op.pyc in __call__(self, *inputs, **kwargs)
609 """
610 return_list = kwargs.pop('return_list', False)
--> 611 node = self.make_node(*inputs, **kwargs)
612
613 if config.compute_test_value != 'off':
//anaconda/lib/python2.7/site-packages/theano/tensor/elemwise.pyc in make_node(self, *inputs)
541 using DimShuffle.
542 """
--> 543 inputs = list(map(as_tensor_variable, inputs))
544 shadow = self.scalar_op.make_node(
545 *[get_scalar_type(dtype=i.type.dtype).make_variable()
//anaconda/lib/python2.7/site-packages/theano/tensor/basic.pyc in as_tensor_variable(x, name, ndim)
206 except Exception:
207 str_x = repr(x)
--> 208 raise AsTensorError("Cannot convert %s to TensorType" % str_x, type(x))
209
210 # this has a different name, because _as_tensor_variable is the
AsTensorError: ('Cannot convert [Elemwise{mul,no_inplace}.0 Elemwise{mul,no_inplace}.0\n Elemwise{mul,no_inplace}.0 ..., Elemwise{mul,no_inplace}.0\n Elemwise{mul,no_inplace}.0 Elemwise{mul,no_inplace}.0] to TensorType', <type 'numpy.ndarray'>)
Doh - replaced np.power with ** ! working fine!

sklearn RidgeCV with sample_weight

I'm trying to do a weighted Ridge Regression with sklearn. However, the code breaks when I call the fit method. The exception I get is :
Exception: Data must be 1-dimensional
But I'm sure (by checking through print-statements) that the data I'm passing has the right shapes.
print temp1.shape #(781, 21)
print temp2.shape #(781,)
print weights.shape #(781,)
result=RidgeCV(normalize=True).fit(temp1,temp2,sample_weight=weights)
What could be going wrong ??
Here's the whole output :
---------------------------------------------------------------------------
Exception Traceback (most recent call last)
<ipython-input-65-a5b1eba5d9cf> in <module>()
22
23
---> 24 result=RidgeCV(normalize=True).fit(temp2,temp1, sample_weight=weights)
25
26
/usr/local/lib/python2.7/dist-packages/sklearn/linear_model/ridge.pyc in fit(self, X, y, sample_weight)
868 gcv_mode=self.gcv_mode,
869 store_cv_values=self.store_cv_values)
--> 870 estimator.fit(X, y, sample_weight=sample_weight)
871 self.alpha_ = estimator.alpha_
872 if self.store_cv_values:
/usr/local/lib/python2.7/dist-packages/sklearn/linear_model/ridge.pyc in fit(self, X, y, sample_weight)
793 else alpha)
794 if error:
--> 795 out, c = _errors(weighted_alpha, y, v, Q, QT_y)
796 else:
797 out, c = _values(weighted_alpha, y, v, Q, QT_y)
/usr/local/lib/python2.7/dist-packages/sklearn/linear_model/ridge.pyc in _errors(self, alpha, y, v, Q, QT_y)
685 w = 1.0 / (v + alpha)
686 c = np.dot(Q, self._diag_dot(w, QT_y))
--> 687 G_diag = self._decomp_diag(w, Q)
688 # handle case where y is 2-d
689 if len(y.shape) != 1:
/usr/local/lib/python2.7/dist-packages/sklearn/linear_model/ridge.pyc in _decomp_diag(self, v_prime, Q)
672 def _decomp_diag(self, v_prime, Q):
673 # compute diagonal of the matrix: dot(Q, dot(diag(v_prime), Q^T))
--> 674 return (v_prime * Q ** 2).sum(axis=-1)
675
676 def _diag_dot(self, D, B):
/usr/local/lib/python2.7/dist-packages/pandas/core/ops.pyc in wrapper(left, right, name)
531 return left._constructor(wrap_results(na_op(lvalues, rvalues)),
532 index=left.index, name=left.name,
--> 533 dtype=dtype)
534 return wrapper
535
/usr/local/lib/python2.7/dist-packages/pandas/core/series.pyc in __init__(self, data, index, dtype, name, copy, fastpath)
209 else:
210 data = _sanitize_array(data, index, dtype, copy,
--> 211 raise_cast_failure=True)
212
213 data = SingleBlockManager(data, index, fastpath=True)
/usr/local/lib/python2.7/dist-packages/pandas/core/series.pyc in _sanitize_array(data, index, dtype, copy, raise_cast_failure)
2683 elif subarr.ndim > 1:
2684 if isinstance(data, np.ndarray):
-> 2685 raise Exception('Data must be 1-dimensional')
2686 else:
2687 subarr = _asarray_tuplesafe(data, dtype=dtype)
Exception: Data must be 1-dimensional
The error seems to be due to sample_weights being a Pandas series rather than a numpy array:
from sklearn.linear_model import RidgeCV
temp1 = pd.DataFrame(np.random.rand(781, 21))
temp2 = pd.Series(temp1.sum(1))
weights = pd.Series(1 + 0.1 * np.random.rand(781))
result = RidgeCV(normalize=True).fit(temp1, temp2,
sample_weight=weights)
# Exception: Data must be 1-dimensional
If you use a numpy array instead, the error goes away:
result = RidgeCV(normalize=True).fit(temp1, temp2,
sample_weight=weights.values)
This seems to be a bug; I've opened a scikit-learn issue to report this.

Resources