error: unpack requires a buffer of 16 bytes - python-3.x

The following code gives me an error:
"error: unpack requires a buffer of 16 bytes"
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
import cartopy
import cartopy.crs as ccrs
from cartopy.io.shapereader import Reader
# Reading the data from shapefiles .shp
basin = Reader('C:\\...\\BasinCOL2014.shp')
fig = plt.figure (figsize = (10,5))
ax = fig.add_subplot (1, 1, 1, projection = ccrs.PlateCarree(central_longitude=0, globe=None))
ax.set_extent ([-66.0, -80.0, -5.0, 13.0])
ax.gridlines (draw_labels = True)
# Aditional elements to display in map
ax.coastlines (resolution = '10m')
ax.add_feature (cartopy.feature.RIVERS, linewidth=4)
ax.add_geometries (basin.geometries(), crs = ccrs.Geodetic(), edgecolor = 't', facecolor = 'none')
I expect the output will be a map with all the three elements Coastlines, Rivers and Basin. I only get two of them (see image below).
Partial results I´m getting from the code above
Currently installed by Anaconda Navigator 1.9.7
- Jupyter notebook 5.7.8
- Cartopy 0.17.0
---------------------------------------------------------------------------
error Traceback (most recent call last)
<ipython-input-6-628330e4110c> in <module>
12 ax.coastlines (resolution = '10m')
13 ax.add_feature (cartopy.feature.RIVERS, linewidth=4)
---> 14 ax.add_geometries (basin.geometries(), crs = ccrs.Geodetic(), edgecolor = 't', facecolor = 'none')
C:\ProgramData\Anaconda3\lib\site-packages\cartopy\mpl\geoaxes.py in add_geometries(self, geoms, crs, **kwargs)
586 """
587 styler = kwargs.pop('styler', None)
--> 588 feature = cartopy.feature.ShapelyFeature(geoms, crs, **kwargs)
589 return self.add_feature(feature, styler=styler)
590
C:\ProgramData\Anaconda3\lib\site-packages\cartopy\feature\__init__.py in __init__(self, geometries, crs, **kwargs)
229 """
230 super(ShapelyFeature, self).__init__(crs, **kwargs)
--> 231 self._geoms = tuple(geometries)
232
233 def geometries(self):
C:\ProgramData\Anaconda3\lib\site-packages\cartopy\io\shapereader.py in geometries(self)
234 geometry_factory = self._geometry_factory
235 for i in range(self._reader.numRecords):
--> 236 shape = self._reader.shape(i)
237 yield _make_geometry(geometry_factory, shape)
238
C:\ProgramData\Anaconda3\lib\site-packages\shapefile.py in shape(self, i)
811 return k
812 shp.seek(offset)
--> 813 return self.__shape()
814
815 def shapes(self):
C:\ProgramData\Anaconda3\lib\site-packages\shapefile.py in __shape(self)
749 # Read m extremes and values
750 if shapeType in (13,15,18,23,25,28,31):
--> 751 (mmin, mmax) = unpack("<2d", f.read(16))
752 # Measure values less than -10e38 are nodata values according to the spec
753 record.m = []
error: unpack requires a buffer of 16 bytes

Related

How do I solve the Valuerror problem that appears when using the optimize.curve_fit function in my code?

What am I doing wrong here? I have tried to see if the shape of ydata and t are the same and they are in fact the same. The only thing that works is when I slice the output of the integrate.odeint function using [:,1] to give me the curve fit of didt. But the thing is, I require all three curves because I plan on graphing all three results. Your help is greatly appreciated.
import pandas as pd
import numpy as np
from datetime import datetime
from scipy import optimize
from scipy import integrate
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
N0=10203134 #susceptible population
ydata=np.array(df_plot.Jordan[253:]) #beginning on 1/10/2020
t=np.arange(len(ydata))
I0=ydata[0] #initial conditions
R0=4821
S0=N0-I0-R0
def SIR_Model(SIR,t,beta,alpha):
S,I,R=SIR
dsdt=-beta*S*I/N0
didt=beta*S*I/N0 -alpha*I
drdt=alpha*I
return dsdt,didt,drdt
def fit_ode(x,beta,alpha):
return integrate.odeint(SIR_Model,(S0,I0,R0),t,args=(beta,alpha))
print(ydata.shape)
print(t.shape)
popt,pcov=optimize.curve_fit(fit_ode,t,ydata)
perr=np.sqrt(np.diag(pcov))
print("standard deviation errors:",str(perr))
print("Optimal Parameters: Beta=",popt[0], "alpha:",popt[1])
ValueError Traceback (most recent call last)
<ipython-input-194-aa1bb286dbba> in <module>
----> 1 popt,pcov=optimize.curve_fit(fit_ode,t,ydata)
2 perr=np.sqrt(np.diag(pcov))
3 print("standard deviation errors:",str(perr))
4 print("Optimal Parameters: Beta=",popt[0], "alpha:",popt[1])
5
C:\ProgramData\Anaconda3\lib\site-packages\scipy\optimize\minpack.py in curve_fit(f, xdata, ydata, p0, sigma, absolute_sigma, check_finite, bounds, method, jac, **kwargs)
782 # Remove full_output from kwargs, otherwise we're passing it in twice.
783 return_full = kwargs.pop('full_output', False)
--> 784 res = leastsq(func, p0, Dfun=jac, full_output=1, **kwargs)
785 popt, pcov, infodict, errmsg, ier = res
786 ysize = len(infodict['fvec'])
C:\ProgramData\Anaconda3\lib\site-packages\scipy\optimize\minpack.py in leastsq(func, x0, args, Dfun, full_output, col_deriv, ftol, xtol, gtol, maxfev, epsfcn, factor, diag)
408 if not isinstance(args, tuple):
409 args = (args,)
--> 410 shape, dtype = _check_func('leastsq', 'func', func, x0, args, n)
411 m = shape[0]
412
C:\ProgramData\Anaconda3\lib\site-packages\scipy\optimize\minpack.py in _check_func(checker, argname, thefunc, x0, args, numinputs, output_shape)
22 def _check_func(checker, argname, thefunc, x0, args, numinputs,
23 output_shape=None):
---> 24 res = atleast_1d(thefunc(*((x0[:numinputs],) + args)))
25 if (output_shape is not None) and (shape(res) != output_shape):
26 if (output_shape[0] != 1):
C:\ProgramData\Anaconda3\lib\site-packages\scipy\optimize\minpack.py in func_wrapped(params)
482 if transform is None:
483 def func_wrapped(params):
--> 484 return func(xdata, *params) - ydata
485 elif transform.ndim == 1:
486 def func_wrapped(params):
ValueError: operands could not be broadcast together with shapes (138,3) (138,)

Jupyter Notebook:Pyhon3

I have edited my question by uploading the whole code, so if you could check this out #Nathon_Marotte Sir.
I am trying to run this code and it gives me an error:
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
observations = 1000
xs = np.random.uniform(low=-10, high=10, size=(observations,1))
zs = np.random.uniform(-10,10,(observations,1))
inputs = np.column_stack((xs,zs))
print(inputs.shape)
noise= np.random.uniform(-1, 1, (observations, 1))
targets = 2*xs - 3*zs + 5 + noise
print(targets.shape)
#observations=1000
targets = targets.reshape(observations,)
fig=plt.figure()
ax = fig.add_subplot(111,projection='3d')
ax.plot(xs, zs, targets)
ax.set_xlabel('xs')
ax.set_ylabel('zs')
ax.set_zlabel('Targets')
ax.view_init(azim=250)
plt.show()
targets=targets.reshape(observations,)
Error:
ValueError Traceback (most recent call last)
<ipython-input-44-28d2a78b4ad5> in <module>
3 ax = fig.add_subplot(111,projection='3d')
4
----> 5 ax.plot(xs, zs, targets)
6
7 ax.set_xlabel('xs')
F:\Softwares\Anaconda\Installed\lib\site-packages\mpl_toolkits\mplot3d\axes3d.py in plot(self, xs, ys, zdir, *args, **kwargs)
1467
1468 # Match length
-> 1469 zs = np.broadcast_to(zs, np.shape(xs))
1470
1471 lines = super().plot(xs, ys, *args, **kwargs)
<__array_function__ internals> in broadcast_to(*args, **kwargs)
F:\Softwares\Anaconda\Installed\lib\site-packages\numpy\lib\stride_tricks.py in broadcast_to(array, shape, subok)
178 [1, 2, 3]])
179 """
--> 180 return _broadcast_to(array, shape, subok=subok, readonly=True)
181
182
F:\Softwares\Anaconda\Installed\lib\site-packages\numpy\lib\stride_tricks.py in _broadcast_to(array, shape, subok, readonly)
121 'negative')
122 extras = []
--> 123 it = np.nditer(
124 (array,), flags=['multi_index', 'refs_ok', 'zerosize_ok'] + extras,
125 op_flags=['readonly'], itershape=shape, order='C')
ValueError: operands could not be broadcast together with remapped shapes [original->remapped]: (1000,) and requested shape (1000,1)
As I am a newbie and not have sufficient knowledge to fix this. So if you could help me out to fix this bug out? That would be great.
Thanking you in advance.
I am done with my question and it worked when I used Google Colab resources.
Thank you all and especially #NathanMarotte

How do I correctly write the syntax for performing and plotting a for loop operation?

I am trying to create a for loop which uses a defined function (B_lambda) and takes in values of wavelength and temperature to produce values of intensity. i.e. I want the loop to take the function B_lambda and to run through every value within my listed wavelength range for each temperature in the temperature list. Then I want to plot the results. I am not very good with the syntax and have tried many ways but nothing is producing what I need and I am mostly getting errors. I have no idea how to use a for loop to plot and all online sources that I have checked out have not helped me with using a defined function in a for loop. I will put my latest code that seems to have the least errors down below with the error message:
import matplotlib.pylab as plt
import numpy as np
from astropy import units as u
import scipy.constants
%matplotlib inline
#Importing constants to use.
h = scipy.constants.h
c = scipy.constants.c
k = scipy.constants.k
wavelengths= np.arange(1000,30000)*1.e-10
temperature=[3000,4000,5000,6000]
for lam in wavelengths:
for T in temperature:
B_lambda = ((2*h*c**2)/(lam**5))*((1)/(np.exp((h*c)/(lam*k*T))-1))
plt.figure()
plt.plot(wavelengths,B_lambda)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-6-73b866241c49> in <module>
17 B_lambda = ((2*h*c**2)/(lam**5))*((1)/(np.exp((h*c)/(lam*k*T))-1))
18 plt.figure()
---> 19 plt.plot(wavelengths,B_lambda)
20
21
/usr/local/lib/python3.6/dist-packages/matplotlib/pyplot.py in plot(scalex, scaley, data, *args, **kwargs)
2787 return gca().plot(
2788 *args, scalex=scalex, scaley=scaley, **({"data": data} if data
-> 2789 is not None else {}), **kwargs)
2790
2791
/usr/local/lib/python3.6/dist-packages/matplotlib/axes/_axes.py in plot(self, scalex, scaley, data, *args, **kwargs)
1663 """
1664 kwargs = cbook.normalize_kwargs(kwargs, mlines.Line2D._alias_map)
-> 1665 lines = [*self._get_lines(*args, data=data, **kwargs)]
1666 for line in lines:
1667 self.add_line(line)
/usr/local/lib/python3.6/dist-packages/matplotlib/axes/_base.py in __call__(self, *args, **kwargs)
223 this += args[0],
224 args = args[1:]
--> 225 yield from self._plot_args(this, kwargs)
226
227 def get_next_color(self):
/usr/local/lib/python3.6/dist-packages/matplotlib/axes/_base.py in _plot_args(self, tup, kwargs)
389 x, y = index_of(tup[-1])
390
--> 391 x, y = self._xy_from_xy(x, y)
392
393 if self.command == 'plot':
/usr/local/lib/python3.6/dist-packages/matplotlib/axes/_base.py in _xy_from_xy(self, x, y)
268 if x.shape[0] != y.shape[0]:
269 raise ValueError("x and y must have same first dimension, but "
--> 270 "have shapes {} and {}".format(x.shape, y.shape))
271 if x.ndim > 2 or y.ndim > 2:
272 raise ValueError("x and y can be no greater than 2-D, but have "
ValueError: x and y must have same first dimension, but have shapes (29000,) and (1,)```
First thing to note (and this is minor) is that astropy is not required to run your code. So, you can simplify the import statements.
import matplotlib.pylab as plt
import numpy as np
import scipy.constants
%matplotlib inline
#Importing constants to use.
h = scipy.constants.h
c = scipy.constants.c
k = scipy.constants.k
wavelengths= np.arange(1000,30000,100)*1.e-10 # here, I chose steps of 100, because plotting 29000 datapoints takes a while
temperature=[3000,4000,5000,6000]
Secondly, to tidy up the loop a bit, you can write a helper function, that youn call from within you loop:
def f(lam, T):
return ((2*h*c**2)/(lam**5))*((1)/(np.exp((h*c)/(lam*k*T))-1))
now you can collect the output of your function, together with the input parameters, e.g. in a list of tuples:
outputs = []
for lam in wavelengths:
for T in temperature:
outputs.append((lam, T, f(lam, T)))
Since you vary both wavelength and temperature, a 3d plot makes sense:
from mpl_toolkits.mplot3d import Axes3D
fig = plt.figure(figsize=(10,10))
ax = fig.add_subplot(111, projection='3d')
ax.plot(*zip(*outputs))
An alternative would be to display the data as an image, using colour to indicate the function output.
I am also including an alternative method to generate the data in this one. Since the function f can take arrays as input, you can feed one temperature at a time, and with it, all the wavelengths simultaneously.
# initialise output as array with proper shape
outputs = np.zeros((len(wavelengths), len(temperature)))
for i, T in enumerate(temperature):
outputs[:,i] = f(wavelengths, T)
The output now is a large matrix, which you can visualise as an image:
fig = plt.figure()
ax = fig.add_subplot(111)
ax.imshow(outputs, aspect=10e8, interpolation='none',
extent=[
np.min(temperature),
np.max(temperature),
np.max(wavelengths),
np.min(wavelengths)]
)

How to do clustering with k-means algorithm for an imported data set with proper scaling of both axis

I m new to data science and python, and jupyter notebook, I m currently studying how to do k means clustering on a data set. I came across ways in which can introduce data
Data = {'x': [25,34,22,27,33,33,31,22,35,34,67,54,57,43,50,57,59,52,65,47,49,48,35,33,44,45,38,43,51,46],
'y': [79,51,53,78,59,74,73,57,69,75,51,32,40,47,53,36,35,58,59,50,25,20,14,12,20,5,29,27,8,7]
}
df = DataFrame(Data,columns=['x','y'])
and use of blobs
data = make_blobs(n_samples=200, n_features=2, centers=4, cluster_std=1.6, random_state=50)
but I would like to know how to do a proper code with a csv file imported from my computer and do a k means with scaling, thank you in advance, I could not find relevant blogs to help me
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
from sklearn.cluster import KMeans
data=pd.read_csv("C:/Users/Dulangi/Downloads/winequality-red.csv")
data
data["alcohol"]=data["alcohol"]/data["alcohol"].max()
data["quality"]=data["quality"]/data["quality"].max()
plt.scatter(data["alcohol"],data['quality'])
plt.xlabel("alcohol")
plt.ylabel('quality')
plt.show()
x=data.copy()
kmeans=KMeans(2)
kmeans.fit(x)
clusters=x.copy()
clusters['cluster_pred']=kmeans.fit_predict(x)
plt.scatter(clusters["alcohol"],clusters['quality'],c=clusters['cluster_pred'],cmap='rainbow')
plt.xlabel("alcohol")
plt.ylabel('quality')
plt.show()
from sklearn import preprocessing
x_scaled=preprocessing.scale(x)
#x_scaled
wcss=[]
for i in range(1,30):
kmeans=KMeans(i)
kmeans.fit(x_scaled)
wcss.append(kmeans.inertia_)
wcss
plt.plot(range(1,30),wcss)
plt.xlabel('Number of clusters')
plt.ylabel('WCSS')
plt.show()
This is what i tried
the error i got
ValueError Traceback (most recent call last)
<ipython-input-12-d4955ce8615e> in <module>
39
40
---> 41 plt.plot(range(1,30),wcss)
42 plt.xlabel('Number of clusters')
43 plt.ylabel('WCSS')
~\Anaconda3\lib\site-packages\matplotlib\pyplot.py in plot(scalex, scaley, data, *args, **kwargs)
2787 return gca().plot(
2788 *args, scalex=scalex, scaley=scaley, **({"data": data} if data
-> 2789 is not None else {}), **kwargs)
2790
2791
~\Anaconda3\lib\site-packages\matplotlib\axes\_axes.py in plot(self, scalex, scaley, data, *args, **kwargs)
1664 """
1665 kwargs = cbook.normalize_kwargs(kwargs, mlines.Line2D._alias_map)
-> 1666 lines = [*self._get_lines(*args, data=data, **kwargs)]
1667 for line in lines:
1668 self.add_line(line)
~\Anaconda3\lib\site-packages\matplotlib\axes\_base.py in __call__(self, *args, **kwargs)
223 this += args[0],
224 args = args[1:]
--> 225 yield from self._plot_args(this, kwargs)
226
227 def get_next_color(self):
~\Anaconda3\lib\site-packages\matplotlib\axes\_base.py in _plot_args(self, tup, kwargs)
389 x, y = index_of(tup[-1])
390
--> 391 x, y = self._xy_from_xy(x, y)
392
393 if self.command == 'plot':
~\Anaconda3\lib\site-packages\matplotlib\axes\_base.py in _xy_from_xy(self, x, y)
268 if x.shape[0] != y.shape[0]:
269 raise ValueError("x and y must have same first dimension, but "
--> 270 "have shapes {} and {}".format(x.shape, y.shape))
271 if x.ndim > 2 or y.ndim > 2:
272 raise ValueError("x and y can be no greater than 2-D, but have "
ValueError: x and y must have same first dimension, but have shapes (29,) and (1,)
You can easily do by using scikit-Learn
import pandas as pd
data=pd.read_csv('myfile.csv')
df=pd.DataFrame(data,index=None)
df.head()
Check if rows contain any null values
df.isnull().sum()
Drop all the rows with null values if any
df_numeric.dropna(inplace=True)
Normalize data
Normalize the data with MinMax scaling provided by sklearn
from sklearn import preprocessing
minmax_processed = preprocessing.MinMaxScaler().fit_transform(df.drop('title',axis=1))
df_numeric_scaled = pd.DataFrame(minmax_processed, index=df.index, columns=df.columns[:-1])
df_numeric_scaled.head()
from sklearn.cluster import KMeans
Apply K-Means Clustering
What k to choose?
Let's fit cluster size 1 to 20 on our data and take a look at the corresponding score value.
Nc = range(1, 20)
kmeans = [KMeans(n_clusters=i) for i in Nc]
score = [kmeans[i].fit(df_numeric_scaled).score(df_numeric_scaled) for i in range(len(kmeans))]
These score values signify how far our observations are from the cluster center. We want to keep this score value around 0. A large positive or a large negative value would indicate that the cluster center is far from the observations.
Based on these scores value, we plot an Elbow curve to decide which cluster size is optimal. Note that we are dealing with tradeoff between cluster size(hence the computation required) and the relative accuracy.
import matplotlib as pl
pl.plot(Nc,score)
pl.xlabel('Number of Clusters')
pl.ylabel('Score')
pl.title('Elbow Curve')
pl.show()
Fit K-Means for clustering with k=5
kmeans = KMeans(n_clusters=5)
kmeans.fit(df_numeric_scaled)
df['cluster'] = kmeans.labels_
df.head()

Create linear model to check correlation tokenize error

I have data like the sample below, which has 4 continuous columns [x0 to x3] and a binary column y. y has two values 1.0 and 0.0. I’m trying to check for correlation between the binary column y and one of the continuous columns x0, using the CatConCor function below, but I’m getting the error message below. The function creates a linear regression model and calcs the p value for the residuals with and without the categorical variable. If anyone can please point out the issue or how to fix it, it would be very much appreciated.
Data:
x_r x0 x1 x2 x3 y
0 0 0.466726 0.030126 0.998330 0.892770 0.0
1 1 0.173168 0.525810 -0.079341 -0.112151 0.0
2 2 -0.854467 0.770712 0.929614 -0.224779 0.0
3 3 -0.370574 0.568183 -0.928269 0.843253 0.0
4 4 -0.659431 -0.948491 -0.091534 0.706157 0.0
Code:
import numpy as np
import pandas as pd
from time import time
import scipy.stats as stats
from IPython.display import display # Allows the use of display() for DataFrames
# Pretty display for notebooks
%matplotlib inline
###########################################
# Suppress matplotlib user warnings
# Necessary for newer version of matplotlib
import warnings
warnings.filterwarnings("ignore", category = UserWarning, module = "matplotlib")
#
# Display inline matplotlib plots with IPython
from IPython import get_ipython
get_ipython().run_line_magic('matplotlib', 'inline')
###########################################
import matplotlib.pyplot as plt
import matplotlib.cm as cm
# correlation between categorical variable and continuous variable
def CatConCor(df,catVar,conVar):
import statsmodels.api as sm
from statsmodels.formula.api import ols
# subsetting data for one categorical column and one continuous column
data2=df.copy()[[catVar,conVar]]
data2[catVar]=data2[catVar].astype('category')
mod = ols(conVar+'~'+catVar,
data=data2).fit()
aov_table = sm.stats.anova_lm(mod, typ=2)
if aov_table['PR(>F)'][0] < 0.05:
print('Correlated p='+str(aov_table['PR(>F)'][0]))
else:
print('Uncorrelated p='+str(aov_table['PR(>F)'][0]))
# checking for correlation between categorical and continuous variables
CatConCor(df=train_df,catVar='y',conVar='x0')
Error:
---------------------------------------------------------------------------
AssertionError Traceback (most recent call last)
<ipython-input-6-80f83b8c8e14> in <module>()
1 # checking for correlation between categorical and continuous variables
2
----> 3 CatConCor(df=train_df,catVar='y',conVar='x0')
<ipython-input-2-35404ba1d697> in CatConCor(df, catVar, conVar)
103
104 mod = ols(conVar+'~'+catVar,
--> 105 data=data2).fit()
106
107 aov_table = sm.stats.anova_lm(mod, typ=2)
~/anaconda2/envs/py36/lib/python3.6/site-packages/statsmodels/base/model.py in from_formula(cls, formula, data, subset, drop_cols, *args, **kwargs)
153
154 tmp = handle_formula_data(data, None, formula, depth=eval_env,
--> 155 missing=missing)
156 ((endog, exog), missing_idx, design_info) = tmp
157
~/anaconda2/envs/py36/lib/python3.6/site-packages/statsmodels/formula/formulatools.py in handle_formula_data(Y, X, formula, depth, missing)
63 if data_util._is_using_pandas(Y, None):
64 result = dmatrices(formula, Y, depth, return_type='dataframe',
---> 65 NA_action=na_action)
66 else:
67 result = dmatrices(formula, Y, depth, return_type='dataframe',
~/anaconda2/envs/py36/lib/python3.6/site-packages/patsy/highlevel.py in dmatrices(formula_like, data, eval_env, NA_action, return_type)
308 eval_env = EvalEnvironment.capture(eval_env, reference=1)
309 (lhs, rhs) = _do_highlevel_design(formula_like, data, eval_env,
--> 310 NA_action, return_type)
311 if lhs.shape[1] == 0:
312 raise PatsyError("model is missing required outcome variables")
~/anaconda2/envs/py36/lib/python3.6/site-packages/patsy/highlevel.py in _do_highlevel_design(formula_like, data, eval_env, NA_action, return_type)
163 return iter([data])
164 design_infos = _try_incr_builders(formula_like, data_iter_maker, eval_env,
--> 165 NA_action)
166 if design_infos is not None:
167 return build_design_matrices(design_infos, data,
~/anaconda2/envs/py36/lib/python3.6/site-packages/patsy/highlevel.py in _try_incr_builders(formula_like, data_iter_maker, eval_env, NA_action)
60 "ascii-only, or else upgrade to Python 3.")
61 if isinstance(formula_like, str):
---> 62 formula_like = ModelDesc.from_formula(formula_like)
63 # fallthrough
64 if isinstance(formula_like, ModelDesc):
~/anaconda2/envs/py36/lib/python3.6/site-packages/patsy/desc.py in from_formula(cls, tree_or_string)
162 tree = tree_or_string
163 else:
--> 164 tree = parse_formula(tree_or_string)
165 value = Evaluator().eval(tree, require_evalexpr=False)
166 assert isinstance(value, cls)
~/anaconda2/envs/py36/lib/python3.6/site-packages/patsy/parse_formula.py in parse_formula(code, extra_operators)
146 tree = infix_parse(_tokenize_formula(code, operator_strings),
147 operators,
--> 148 _atomic_token_types)
149 if not isinstance(tree, ParseNode) or tree.type != "~":
150 tree = ParseNode("~", None, [tree], tree.origin)
~/anaconda2/envs/py36/lib/python3.6/site-packages/patsy/infix_parser.py in infix_parse(tokens, operators, atomic_types, trace)
208
209 want_noun = True
--> 210 for token in token_source:
211 if c.trace:
212 print("Reading next token (want_noun=%r)" % (want_noun,))
~/anaconda2/envs/py36/lib/python3.6/site-packages/patsy/parse_formula.py in _tokenize_formula(code, operator_strings)
92 else:
93 it.push_back((pytype, token_string, origin))
---> 94 yield _read_python_expr(it, end_tokens)
95
96 def test__tokenize_formula():
~/anaconda2/envs/py36/lib/python3.6/site-packages/patsy/parse_formula.py in _read_python_expr(it, end_tokens)
42 origins = []
43 bracket_level = 0
---> 44 for pytype, token_string, origin in it:
45 assert bracket_level >= 0
46 if bracket_level == 0 and token_string in end_tokens:
~/anaconda2/envs/py36/lib/python3.6/site-packages/patsy/util.py in next(self)
330 else:
331 # May raise StopIteration
--> 332 return six.advance_iterator(self._it)
333 __next__ = next
334
~/anaconda2/envs/py36/lib/python3.6/site-packages/patsy/tokens.py in python_tokenize(code)
33 break
34 origin = Origin(code, start, end)
---> 35 assert pytype not in (tokenize.NL, tokenize.NEWLINE)
36 if pytype == tokenize.ERRORTOKEN:
37 raise PatsyError("error tokenizing input "
AssertionError:
Upgrading patsy to 0.5.1 fixed the issue. I found the tip here:
https://github.com/statsmodels/statsmodels/issues/5343

Resources