Using apply function Dataframe - python-3.x

I need help correcting an error I am getting.
I have the following dataframe:
x = [-0.75853, -0.75853, -0.75853, -0.75852]
y = [-0.63435, -0.63434, -0.63435, -0.63436]
z = [-0.10488, -0.10490, -0.10492, -0.10495]
w = [-0.10597, -0.10597, -0.10597, -0.10596]
df = pd.DataFrame([x, y, z, w], columns=['x', 'y', 'z', 'w'])
I created the following functions:
import math
def roll(qw, qx, qy, qz):
# x-axis rotation
sinr_cosp = +2.0 * (qw * qx + qy + qz)
cosr_cosp = +1.0 - 2.0 * (qx * qx + qy * qy)
roll = math.atan2(sinr_cosp, cosr_cosp)
return roll
def pitch(qw, qx, qy, qz):
# y-axis rotation
sinp = +2.0 * (qw * qy - qz * qx)
if(math.fabs(sinp) >= 1):
pitch = copysign(M_PI/2, sinp)
else:
pitch = math.asin(sinp)
return sinp
def yaw(qw, qx, qy, qz):
# z-axis rotation
siny_cosp = +2.0 * (qw * qz + qx * qy)
cosy_cosp = +1.0 - 2.0 * (qy * qy + qz * qz)
yaw = math.atan2(siny_cosp, cosy_cosp)
return yaw
Finally, using Pandas apply function, I tried to associate the result with a new column:
q_w = df['w']
q_x = df['x']
q_y = df['y']
q_z = df['z']
df['row'] = df.apply(roll(q_w, q_x, q_y, q_z))
The same error occurs when using the other functions.
I saw an issue right here on Stack where this bug was fixed using Numpy. I believe this is not possible here because I am using functions specific to the Math package.
TypeError Traceback (most recent call
last) /usr/local/lib/python3.6/dist-packages/pandas/core/series.py in
wrapper(self)
92 raise TypeError("cannot convert the series to "
---> 93 "{0}".format(str(converter)))
94
TypeError: cannot convert the series to
The above exception was the direct cause of the following exception:
SystemError Traceback (most recent call
last) 4 frames in ()
----> 1 df['row'] = df.apply(roll(q_w, q_x, q_y, q_z))
in roll(qw, qx, qy, qz)
4 sinr_cosp = +2.0 * (qw * qx + qy + qz)
5 cosr_cosp = +1.0 - 2.0 * (qx * qx + qy * qy)
----> 6 roll = math.atan2(sinr_cosp, cosr_cosp)
7 return roll
8
/usr/local/lib/python3.6/dist-packages/pandas/core/series.py in
wrapper(self)
88
89 def wrapper(self):
---> 90 if len(self) == 1:
91 return converter(self.iloc[0])
92 raise TypeError("cannot convert the series to "
/usr/local/lib/python3.6/dist-packages/pandas/core/series.py in
len(self)
593 Return the length of the Series.
594 """
--> 595 return len(self._data)
596
597 def view(self, dtype=None):
/usr/local/lib/python3.6/dist-packages/pandas/core/internals/managers.py
in len(self)
290
291 def len(self):
--> 292 return len(self.items)
293
294 def unicode(self):
SystemError: PyEval_EvalFrameEx returned a result with an error set

You should using apply like
df.apply(lambda x : roll(x['w'],x['x'],x['y'],x['z']),1)
Out[291]:
0 -2.175472
1 -1.909103
2 -0.394163
3 -0.397885
dtype: float64

You could also modify your function.
def roll(df):
# x-axis rotation
sinr_cosp = +2.0 * (df.w * df.x + df.y + df.z)
cosr_cosp = +1.0 - 2.0 * (df.x * df.x + df.y * df.y)
roll = math.atan2(sinr_cosp, cosr_cosp)
return roll
df.apply(roll, axis=1)
Out:
0 -2.175472
1 -1.909103
2 -0.394163
3 -0.397885
dtype: float64

Related

How to calculate the distance between latitudes and longitudes of two stations in a pandas dataframe

I am having a dataframe with station info including latitudes and longitudes as follows:
start_lat start_lng end_lat end_lng
41.877726 -87.654787 41.888716 -87.644448
41.930000 -87.700000 41.910000 -87.700000
41.910000 -87.690000 41.930000 -87.700000
and like wise.
I want to create a distance column from these info where the distance can be either in km or in miles for distance between these start and end points.
(As shared in the kin below, when I try to implement the SO answer, I encounter an error.)
from math import sin, cos, sqrt, atan2
dlon = data.end_lng - data.start_lng
dlat = data.end_lat - data.start_lat
a = ((sin(dlat/2))**2 + cos(lat1) * cos(lat2) * (sin(dlon/2))**2)
c = 2 * atan2(sqrt(a), sqrt(1-a))
data['distance'] = R * c
TypeError Traceback (most recent call last)
<ipython-input-8-a8f8b698a81b> in <module>()
2 dlon = data.end_lng - data.start_lng
3 dlat = data.end_lat - data.start_lat
----> 4 a = ((sin(dlat/2))**2 + cos(lat1) * cos(lat2) * (sin(dlon/2))**2).apply(lambda x: float(x))
5 c = 2 * atan2(sqrt(a), sqrt(1-a))
6 data['distance'] = R * c
/usr/local/lib/python3.7/dist-packages/pandas/core/series.py in wrapper(self)
127 if len(self) == 1:
128 return converter(self.iloc[0])
--> 129 raise TypeError(f"cannot convert the series to {converter}")
130
131 wrapper.__name__ = f"__{converter.__name__}__"
TypeError: cannot convert the series to <class 'float'>
How to resolve this?
You need to do the calculation on every row, one way is to use itterows (no guarantee on the distance calculation itself):
def get_distance(row, R = 6371): #km
dlon = row[1]['end_lng'] - row[1]['start_lng']
dlat = row[1]['end_lat'] - row[1]['start_lat']
a = ((sin(dlat/2))**2 + cos(row[1]['start_lat']) * cos(row[1]['end_lat']) * (sin(dlon/2))**2)
c = 2 * atan2(sqrt(a), sqrt(1-a))
return R * c
data['distance'] = [get_distance(row) for row in data.iterrows()]

Cannot cast ufunc subtract output from dtype('float64') to dtype('int64') with casting rule 'same_kind' despite forced convertion

I have a data Series ts:
0 2599.0
1 2599.0
2 3998.0
3 3998.0
4 1299.0
5 1499.0
6 1499.0
7 2997.5
8 749.5
Name: 0, dtype: float64
and I would like to predict the next period using ARIMA:
import statsmodels.tsa.api as smt
array = []
for i, row in test.iterrows():
print("row['shop_id']: ", row['shop_id'], " row['item_id']: ", row['item_id'])
ts = pd.DataFrame(sales_monthly.loc[pd.IndexSlice[:, [row['shop_id']],[row['item_id']]], :]['item_price'].values*sales_monthly.loc[pd.IndexSlice[:, [row['shop_id']],[row['item_id']]], :]['item_cnt_day'].values).T.iloc[0]
rng = range(5)
for i in rng:
for j in rng:
try:
tmp_mdl = smt.ARMA(ts, order = (i, j)).fit(method='mle', trand='nc')
tmp_aic = tmp_mdl.aic
if tmp_aic < best_aic:
best_aic = tmp_aic
best_order = (i, j)
best_mdl = tmp_mdl
except:
continue
if best_mdl.predict()<0:
y_pred = 0
else:
y_pred = best_mdl.predict()
d = {'id':row['ID'], 'item_cnt_month': y_pred}
array.append(d)
df = pd.DataFrame(array)
df
But I get:
---------------------------------------------------------------------------
UFuncTypeError Traceback (most recent call last)
<ipython-input-104-85dfa2fa67c1> in <module>()
22 except:
23 continue
---> 24 if best_mdl.predict()<0:
25 y_pred = 0
26 else:
3 frames
/usr/local/lib/python3.6/dist-packages/statsmodels/tsa/arima_model.py in geterrors(self, params)
686 k = self.k_exog + self.k_trend
687 if k > 0:
--> 688 y -= dot(self.exog, params[:k])
689
690 k_ar = self.k_ar
UFuncTypeError: Cannot cast ufunc 'subtract' output from dtype('float64') to dtype('int64') with casting rule 'same_kind'
So I used best_mdl.predict().astype('float32') but it didn't changed anything.

Sympy solve returns `NoneType object not scriptable`

I have tested this with both sympy==1.7.1 and sympy==1.5.1 and get the same result.
from sympy import *
import inspect
THT,HT,DBH,dib, c0,c1, c2,c3,c4 = symbols('THT HT DBH dib c0 c1 c2 c3 c4')
init_printing()
f = c2 + (c3*DBH) + (c4 * THT)
rhs = 1 - (((HT-1)/(THT - 1))**c1)* (1- exp(c0/f))
eqn = solve( dib - (DBH * (c0 - (f * ln(rhs)))), HT)
Which returns
# Out [37]:
# output
TypeErrorTraceback (most recent call last)
<ipython-input-37-619e8281e2ae> in <module>
9 # original rhs = 1 - (((HT-1)/(THT - 1))**c1)* (1- exp(c0/f))
10
---> 11 eqn = solve( dib - (DBH * (c0 - (f * ln(rhs)))), HT)
12
13 #eqn = solve( dib - (DBH * (b1+b2 * ln(1-(lam*p)))), HT)
~/.pyenv/versions/3.7.0/lib/python3.7/site-packages/sympy/solvers/solvers.py in solve(f, *symbols, **flags)
1095 ok = False
1096 if fi.has(*symset):
-> 1097 ok = True
1098 else:
1099 if fi.is_number:
~/.pyenv/versions/3.7.0/lib/python3.7/site-packages/sympy/solvers/solvers.py in _solve(f, *symbols, **flags)
1458 result = list(result)
1459 if check:
-> 1460 # all solutions have been checked but now we must
1461 # check that the solutions do not set denominators
1462 # in any factor to zero
TypeError: 'NoneType' object is not subscriptable
:end:
I can solve the following paired-down version:
In [47]: solve( f * ln(rhs), HT)
Out[47]:
⎡c1___ c1___ ⎤
⎣╲╱ 0 ⋅THT - ╲╱ 0 + 1⎦
and adding an integer in place of c0 it will solve:
solve( 2-(f * ln(rhs)), HT)
But adding a symbol causes the NoneType error. Any suggestions would be much appreciated.

How to handle JAX reshape with JIT

I am trying to implement entmax-alpha as is described in here.
Here is the code.
import jax
import jax.numpy as jnp
from jax import custom_jvp
from jax import jit
from jax import lax
from jax import vmap
#jax.partial(jit, static_argnums=(2,))
def p_tau(z, tau, alpha=1.5):
return jnp.clip((alpha - 1) * z - tau, a_min=0) ** (1 / (alpha - 1))
#jit
def get_tau(tau, tau_max, tau_min, z_value):
return lax.cond(z_value < 1,
lambda _: (tau, tau_min),
lambda _: (tau_max, tau),
operand=None
)
#jit
def body(kwargs, x):
tau_min = kwargs['tau_min']
tau_max = kwargs['tau_max']
z = kwargs['z']
alpha = kwargs['alpha']
tau = (tau_min + tau_max) / 2
z_value = p_tau(z, tau, alpha).sum()
taus = get_tau(tau, tau_max, tau_min, z_value)
tau_max, tau_min = taus[0], taus[1]
return {'tau_min': tau_min, 'tau_max': tau_max, 'z': z, 'alpha': alpha}, None
#jax.partial(jit, static_argnums=(1, 2,))
def map_row(z_input, alpha, T):
z = (alpha - 1) * z_input
tau_min, tau_max = jnp.min(z) - 1, jnp.max(z) - z.shape[0] ** (1 - alpha)
result, _ = lax.scan(body, {'tau_min': tau_min, 'tau_max': tau_max, 'z': z, 'alpha': alpha}, xs=None,
length=T)
tau = (result['tau_max'] + result['tau_min']) / 2
result = p_tau(z, tau, alpha)
return result / result.sum()
#jax.partial(custom_jvp, nondiff_argnums=(1, 2, 3,))
def entmax(input, axis=-1, alpha=1.5, T=10):
reduce_length = input.shape[axis]
input = jnp.swapaxes(input, -1, axis)
input = input.reshape(input.size / reduce_length, reduce_length)
result = vmap(jax.partial(map_row, alpha=alpha, T=T), 0)(input)
return jnp.swapaxes(result, -1, axis)
#jax.partial(jit, static_argnums=(1, 2,))
def _entmax_jvp_impl(axis, alpha, T, primals, tangents):
input = primals[0]
Y = entmax(input, axis, alpha, T)
gppr = Y ** (2 - alpha)
grad_output = tangents[0]
dX = grad_output * gppr
q = dX.sum(axis=axis) / gppr.sum(axis=axis)
q = jnp.expand_dims(q, axis=axis)
dX -= q * gppr
return Y, dX
#entmax.defjvp
def entmax_jvp(axis, alpha, T, primals, tangents):
return _entmax_jvp_impl(axis, alpha, T, primals, tangents)
When I call it with the following code:
import numpy as np
from jax import value_and_grad
input = jnp.array(np.random.randn(64, 10))
weight = jnp.array(np.random.randn(64, 10))
def toy(input, weight):
return (weight*entmax(input, axis=-1, alpha=1.5, T=20)).sum()
value_and_grad(toy)(input, weight)
I got the following error.
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-3-3a62e54c67d2> in <module>()
7 return (weight*entmax(input, axis=-1, alpha=1.5, T=20)).sum()
8
----> 9 value_and_grad(toy)(input, weight)
35 frames
<ipython-input-1-d85b1daec668> in entmax(input, axis, alpha, T)
49 #jax.partial(custom_jvp, nondiff_argnums=(1, 2, 3,))
50 def entmax(input, axis=-1, alpha=1.5, T=10):
---> 51 reduce_length = input.shape[axis]
52 input = jnp.swapaxes(input, -1, axis)
53 input = input.reshape(input.size / reduce_length, reduce_length)
TypeError: tuple indices must be integers or slices, not DynamicJaxprTracer
It seems to be always connected to the reshape operations. I am not sure why this happens, and any help will be really appreciated.
To recreate the problem, here is the colab notebook
Thanks a lot.
The error comes from the fact that you are attempting to index a Python tuple with a traced quantity, axis. You can fix this error by making axis a static argument:
#jax.partial(jit, static_argnums=(0, 1, 2,))
def _entmax_jvp_impl(axis, alpha, T, primals, tangents):
...
Unfortunately, this uncovers another problem: p_tau declares that the alpha parameter is static, but body() calls this with a traced quantity. This quantity cannot be easily marked static in body because it is passed within a dictionary of parameters that contains the input that is being traced.
To fix this, you'll have to rewrite your function signatures, carefully marking in each one which inputs are static and which are not, and making sure the two do not mix across the layers of function calls.

Scipy.optimize minimize function arguments don't work for multiple vector inputs

I am trying to generate the solutions to PCA weights for some (yield-curve) market-data. However, I am getting an error message in my scipy.optimize.minimize function.
The main error is that it seems to be reading the arguments into the minimization function wrong (error_sum).
I looked up the generic form here, but it doesn't work for my code when I utilize it.
Scipy Minimize - Unable to minimize objective function
import scipy as sc
import scipy.optimize as optimize
from scipy.optimize import minimize
w1 = 1.0
w2 = 1.0
w3 = 1.0
row_C = np.zeros(len(df_.columns)) # initialize current row as zero
row_T = df_.iloc[-1].values # get the target row, which we have set as the last row of the panda dataframe
row_c = np.array([-0.35865725, 0.52793819, 0.70654759, -0.28909144, 1.08467752, 0.91287324])
row_t = np.array([1.7971, 2.5756, 2.2005, 1.4966, 1.45 , 1.8022])
def error_sum(row_c, row_t, params): # row_c is estimated and row_t is target
w1 = params[0]
w2 = params[1]
w2 = params[2]
if len(row_c) != len(row_t): return print('error where x and y points are not same length')
for cnt in range(len(row_c)):
row_c[cnt] = w1 * row1[cnt] + w2 * row2[cnt] + w3 * row3[cnt]
return np.sum(np.abs(row_c - row_t))
for cnt in range(len(df_.columns)): # loop to calculate the PCA-based moves
row_c[cnt] = w1 * row1[cnt] + w2 * row2[cnt] + w3 * row3[cnt]
print(np.sum(np.abs(row_c - row_t))) # this is to get the sum of absolute difference errors
print(error_sum(row_c, row_t, x0))
x0 = np.array([1.0, 1.0, 1.0]) # parameters to optimize
bnds = ((-10.0, 10.0), (-10.0, 10.0), (-10.0, 10.0)) # boundary conditions of x0 parameter set
options = {'maxiter': 100}
res = minimize(error_sum, x0 ,(row_c, row_t), bounds = bnds, method='nelder-mead', options={'xtol': 1e-8, 'disp': True})
The error message as per below
error where x and y points are not same length
TypeError Traceback (most recent call last)
<ipython-input-158-8c50b421e58a> in <module>()
32 options = {'maxiter': 100}
33
---> 34 res = minimize(error_sum, x0 ,(row_c, row_t), bounds = bnds, method='nelder-mead', options={'xtol': 1e-8, 'disp': True})
C:\ProgramData\Anaconda3\lib\site-packages\scipy\optimize\_minimize.py in minimize(fun, x0, args, method, jac, hess, hessp, bounds, constraints, tol, callback, options)
473 callback=callback, **options)
474 elif meth == 'nelder-mead':
--> 475 return _minimize_neldermead(fun, x0, args, callback, **options)
476 elif meth == 'powell':
477 return _minimize_powell(fun, x0, args, callback, **options)
C:\ProgramData\Anaconda3\lib\site-packages\scipy\optimize\optimize.py in _minimize_neldermead(func, x0, args, callback, maxiter, maxfev, disp, return_all, initial_simplex, xatol, fatol, **unknown_options)
549 doshrink = 0
550
--> 551 if fxr < fsim[0]:
552 xe = (1 + rho * chi) * xbar - rho * chi * sim[-1]
553 fxe = func(xe)
TypeError: '>' not supported between instances of 'float' and 'NoneType'
Try to change the order of the arguments in the definition of error_sum to
def error_sum(params, row_c, row_t)
if you want to get the optimum of params and call the function like this:
minimize(error_sum, x0, args=(row_c, row_t), bounds = bnds, method='nelder-mead', options={'xtol': 1e-8, 'disp': True})

Resources