Related
I have two pandas dataframes. the dataframes are named stationary_train and test_exog. I have the shape of the dataframes below. I also have some sample data from the dataframes. I'm trying to fit an arima model to the data using ARIMA from statsmodel. I want to forecast the endogeneous variable "stationary_train" using exogeneous variable "test_exog". I'm using the code below. I'm getting the error below, I'm unclear why. the two dataframes are the same shape, and I don't see any missing values in either dataframe. can anyone see what the issue is and suggest how to fix it?
data:
test_exog[:-1].shape
(203, 1)
stationary_train.shape
(203, 1)
print(exog_auto_model.order)
(11, 0, 6)
print(test_exog[:-1].head())
exog_passengers
month
2000-01-01 46513.9
2000-02-01 48555.7
2000-03-01 58812.4
2000-04-01 56101.1
2000-05-01 58237.4
print(stationary_train.head())
passengers
month
2000-02-01 2034.0
2000-03-01 10238.0
2000-04-01 -2731.0
2000-05-01 2168.0
2000-06-01 2872.0
code:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# statmodels
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.arima_model import ARMA, ARIMA
# datetime
from datetime import datetime
ARIMA(endog=stationary_train.values.reshape(-1,1),
exog=test_exog[:-1],
order=exog_auto_model.order).fit()
error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-90-ae6d07b32cf7> in <module>
1 exog_predict= ARIMA(endog=stationary_train.values.reshape(-1,1),
2 exog=test_exog[:-1],
----> 3 order=exog_auto_model.order).fit()
~/anaconda3/envs/arima_forecast/lib/python3.6/site-packages/statsmodels/tsa/arima_model.py in fit(self, start_params, trend, method, transparams, solver, maxiter, full_output, disp, callback, start_ar_lags, **kwargs)
1028 maxiter=maxiter,
1029 full_output=full_output, disp=disp,
-> 1030 callback=callback, **kwargs)
1031 params = mlefit.params
1032
~/anaconda3/envs/arima_forecast/lib/python3.6/site-packages/statsmodels/base/model.py in fit(self, start_params, method, maxiter, full_output, disp, fargs, callback, retall, skip_hessian, **kwargs)
525 callback=callback,
526 retall=retall,
--> 527 full_output=full_output)
528
529 # NOTE: this is for fit_regularized and should be generalized
~/anaconda3/envs/arima_forecast/lib/python3.6/site-packages/statsmodels/base/optimizer.py in _fit(self, objective, gradient, start_params, fargs, kwargs, hessian, method, maxiter, full_output, disp, callback, retall)
225 disp=disp, maxiter=maxiter, callback=callback,
226 retall=retall, full_output=full_output,
--> 227 hess=hessian)
228
229 optim_settings = {'optimizer': method, 'start_params': start_params,
~/anaconda3/envs/arima_forecast/lib/python3.6/site-packages/statsmodels/base/optimizer.py in _fit_lbfgs(f, score, start_params, fargs, kwargs, disp, maxiter, callback, retall, full_output, hess)
630 callback=callback, args=fargs,
631 bounds=bounds, disp=disp,
--> 632 **extra_kwargs)
633
634 if full_output:
~/anaconda3/envs/arima_forecast/lib/python3.6/site-packages/scipy/optimize/lbfgsb.py in fmin_l_bfgs_b(func, x0, fprime, args, approx_grad, bounds, m, factr, pgtol, epsilon, iprint, maxfun, maxiter, disp, callback, maxls)
196
197 res = _minimize_lbfgsb(fun, x0, args=args, jac=jac, bounds=bounds,
--> 198 **opts)
199 d = {'grad': res['jac'],
200 'task': res['message'],
~/anaconda3/envs/arima_forecast/lib/python3.6/site-packages/scipy/optimize/lbfgsb.py in _minimize_lbfgsb(fun, x0, args, jac, bounds, disp, maxcor, ftol, gtol, eps, maxfun, maxiter, iprint, callback, maxls, finite_diff_rel_step, **unknown_options)
306 sf = _prepare_scalar_function(fun, x0, jac=jac, args=args, epsilon=eps,
307 bounds=new_bounds,
--> 308 finite_diff_rel_step=finite_diff_rel_step)
309
310 func_and_grad = sf.fun_and_grad
~/anaconda3/envs/arima_forecast/lib/python3.6/site-packages/scipy/optimize/optimize.py in _prepare_scalar_function(fun, x0, jac, args, bounds, epsilon, finite_diff_rel_step, hess)
260 # calculation reduces overall function evaluations.
261 sf = ScalarFunction(fun, x0, args, grad, hess,
--> 262 finite_diff_rel_step, bounds, epsilon=epsilon)
263
264 return sf
~/anaconda3/envs/arima_forecast/lib/python3.6/site-packages/scipy/optimize/_differentiable_functions.py in __init__(self, fun, x0, args, grad, hess, finite_diff_rel_step, finite_diff_bounds, epsilon)
74
75 self._update_fun_impl = update_fun
---> 76 self._update_fun()
77
78 # Gradient evaluation
~/anaconda3/envs/arima_forecast/lib/python3.6/site-packages/scipy/optimize/_differentiable_functions.py in _update_fun(self)
164 def _update_fun(self):
165 if not self.f_updated:
--> 166 self._update_fun_impl()
167 self.f_updated = True
168
~/anaconda3/envs/arima_forecast/lib/python3.6/site-packages/scipy/optimize/_differentiable_functions.py in update_fun()
71
72 def update_fun():
---> 73 self.f = fun_wrapped(self.x)
74
75 self._update_fun_impl = update_fun
~/anaconda3/envs/arima_forecast/lib/python3.6/site-packages/scipy/optimize/_differentiable_functions.py in fun_wrapped(x)
68 def fun_wrapped(x):
69 self.nfev += 1
---> 70 return fun(x, *args)
71
72 def update_fun():
~/anaconda3/envs/arima_forecast/lib/python3.6/site-packages/statsmodels/base/model.py in f(params, *args)
499
500 def f(params, *args):
--> 501 return -self.loglike(params, *args) / nobs
502
503 if method == 'newton':
~/anaconda3/envs/arima_forecast/lib/python3.6/site-packages/statsmodels/tsa/arima_model.py in loglike(self, params, set_sigma2)
839 method = self.method
840 if method in ['mle', 'css-mle']:
--> 841 return self.loglike_kalman(params, set_sigma2)
842 elif method == 'css':
843 return self.loglike_css(params, set_sigma2)
~/anaconda3/envs/arima_forecast/lib/python3.6/site-packages/statsmodels/tsa/arima_model.py in loglike_kalman(self, params, set_sigma2)
849 Compute exact loglikelihood for ARMA(p,q) model by the Kalman Filter.
850 """
--> 851 return KalmanFilter.loglike(params, self, set_sigma2)
852
853 def loglike_css(self, params, set_sigma2=True):
~/anaconda3/envs/arima_forecast/lib/python3.6/site-packages/statsmodels/tsa/kalmanf/kalmanfilter.py in loglike(cls, params, arma_model, set_sigma2)
218 loglike, sigma2 = kalman_loglike.kalman_loglike_double(
219 y, k, k_ar, k_ma, k_lags, int(nobs),
--> 220 Z_mat, R_mat, T_mat)
221 elif np.issubdtype(paramsdtype, np.complex128):
222 loglike, sigma2 = kalman_loglike.kalman_loglike_complex(
statsmodels/tsa/kalmanf/kalman_loglike.pyx in statsmodels.tsa.kalmanf.kalman_loglike.kalman_loglike_double()
statsmodels/tsa/kalmanf/kalman_loglike.pyx in statsmodels.tsa.kalmanf.kalman_loglike.kalman_filter_double()
<__array_function__ internals> in pinv(*args, **kwargs)
~/anaconda3/envs/arima_forecast/lib/python3.6/site-packages/numpy/linalg/linalg.py in pinv(a, rcond, hermitian)
2001 return wrap(res)
2002 a = a.conjugate()
-> 2003 u, s, vt = svd(a, full_matrices=False, hermitian=hermitian)
2004
2005 # discard small singular values
<__array_function__ internals> in svd(*args, **kwargs)
~/anaconda3/envs/arima_forecast/lib/python3.6/site-packages/numpy/linalg/linalg.py in svd(a, full_matrices, compute_uv, hermitian)
1659
1660 signature = 'D->DdD' if isComplexType(t) else 'd->ddd'
-> 1661 u, s, vh = gufunc(a, signature=signature, extobj=extobj)
1662 u = u.astype(result_t, copy=False)
1663 s = s.astype(_realType(result_t), copy=False)
ValueError: On entry to DLASCL parameter number 4 had an illegal value
I'm trying this code for featuretools:
features, feature_names = ft.dfs(entityset = es, target_entity = 'demo',
agg_primitives = ['count', 'max', 'time_since_first', 'median', 'time_since_last', 'avg_time_between',
'sum', 'mean'],
trans_primitives = ['is_weekend', 'year', 'week', 'divide_by_feature', 'percentile'])
But I had this error
TypeError Traceback (most recent call last)
<ipython-input-17-89e925ff895d> in <module>
3 agg_primitives = ['count', 'max', 'time_since_first', 'median', 'time_since_last', 'avg_time_between',
4 'sum', 'mean'],
----> 5 trans_primitives = ['is_weekend', 'year', 'week', 'divide_by_feature', 'percentile'])
~/.local/lib/python3.6/site-packages/featuretools/utils/entry_point.py in function_wrapper(*args, **kwargs)
44 ep.on_error(error=e,
45 runtime=runtime)
---> 46 raise e
47
48 # send return value
~/.local/lib/python3.6/site-packages/featuretools/utils/entry_point.py in function_wrapper(*args, **kwargs)
36 # call function
37 start = time.time()
---> 38 return_value = func(*args, **kwargs)
39 runtime = time.time() - start
40 except Exception as e:
~/.local/lib/python3.6/site-packages/featuretools/synthesis/dfs.py in dfs(entities, relationships, entityset, target_entity, cutoff_time, instance_ids, agg_primitives, trans_primitives, groupby_trans_primitives, allowed_paths, max_depth, ignore_entities, ignore_variables, seed_features, drop_contains, drop_exact, where_primitives, max_features, cutoff_time_in_index, save_progress, features_only, training_window, approximate, chunk_size, n_jobs, dask_kwargs, verbose, return_variable_types)
226 n_jobs=n_jobs,
227 dask_kwargs=dask_kwargs,
--> 228 verbose=verbose)
229 return feature_matrix, features
~/.local/lib/python3.6/site-packages/featuretools/computational_backends/calculate_feature_matrix.py in calculate_feature_matrix(features, entityset, cutoff_time, instance_ids, entities, relationships, cutoff_time_in_index, training_window, approximate, save_progress, verbose, chunk_size, n_jobs, dask_kwargs)
265 cutoff_df_time_var=cutoff_df_time_var,
266 target_time=target_time,
--> 267 pass_columns=pass_columns)
268
269 feature_matrix = pd.concat(feature_matrix)
~/.local/lib/python3.6/site-packages/featuretools/computational_backends/calculate_feature_matrix.py in linear_calculate_chunks(chunks, feature_set, approximate, training_window, verbose, save_progress, entityset, no_unapproximated_aggs, cutoff_df_time_var, target_time, pass_columns)
496 no_unapproximated_aggs,
497 cutoff_df_time_var,
--> 498 target_time, pass_columns)
499 feature_matrix.append(_feature_matrix)
500 # Do a manual garbage collection in case objects from calculate_chunk
~/.local/lib/python3.6/site-packages/featuretools/computational_backends/calculate_feature_matrix.py in calculate_chunk(chunk, feature_set, entityset, approximate, training_window, verbose, save_progress, no_unapproximated_aggs, cutoff_df_time_var, target_time, pass_columns)
341 ids,
342 precalculated_features=precalculated_features_trie,
--> 343 training_window=window)
344
345 id_name = _feature_matrix.index.name
~/.local/lib/python3.6/site-packages/featuretools/computational_backends/utils.py in wrapped(*args, **kwargs)
35 def wrapped(*args, **kwargs):
36 if save_progress is None:
---> 37 r = method(*args, **kwargs)
38 else:
39 time = args[0].to_pydatetime()
~/.local/lib/python3.6/site-packages/featuretools/computational_backends/calculate_feature_matrix.py in calc_results(time_last, ids, precalculated_features, training_window)
316 ignored=all_approx_feature_set)
317
--> 318 matrix = calculator.run(ids)
319 return matrix
320
~/.local/lib/python3.6/site-packages/featuretools/computational_backends/feature_set_calculator.py in run(self, instance_ids)
100 precalculated_trie=self.precalculated_features,
101 filter_variable=target_entity.index,
--> 102 filter_values=instance_ids)
103
104 # The dataframe for the target entity should be stored at the root of
~/.local/lib/python3.6/site-packages/featuretools/computational_backends/feature_set_calculator.py in _calculate_features_for_entity(self, entity_id, feature_trie, df_trie, full_entity_df_trie, precalculated_trie, filter_variable, filter_values, parent_data)
187 columns=columns,
188 time_last=self.time_last,
--> 189 training_window=self.training_window)
190
191 # Step 2: Add variables to the dataframe linking it to all ancestors.
~/.local/lib/python3.6/site-packages/featuretools/entityset/entity.py in query_by_values(self, instance_vals, variable_id, columns, time_last, training_window)
271
272 if columns is not None:
--> 273 df = df[columns]
274
275 return df
~/.local/lib/python3.6/site-packages/pandas/core/frame.py in __getitem__(self, key)
2686 return self._getitem_multilevel(key)
2687 else:
-> 2688 return self._getitem_column(key)
2689
2690 def _getitem_column(self, key):
~/.local/lib/python3.6/site-packages/pandas/core/frame.py in _getitem_column(self, key)
2693 # get column
2694 if self.columns.is_unique:
-> 2695 return self._get_item_cache(key)
2696
2697 # duplicate columns & possible reduce dimensionality
~/.local/lib/python3.6/site-packages/pandas/core/generic.py in _get_item_cache(self, item)
2485 """Return the cached item, item represents a label indexer."""
2486 cache = self._item_cache
-> 2487 res = cache.get(item)
2488 if res is None:
2489 values = self._data.get(item)
TypeError: unhashable type: 'set'
I also tried the simplest code for deep feature synthesis (dfs) as shown below, but it still encountered the same error
features, feature_names = ft.dfs(entityset = es, target_entity = 'demo')
I'm not really sure why I encountered this error, any help or recommendations on how to go about from here is deeply appreciated.
Thanks in advance for your help!
I found a solution, my current version had bugs in it that was fixed by the FeatureTools team. Just run pip install directly from master,
pip install --upgrade https://github.com/featuretools/featuretools/zipball/master
This fixed and has been released in Featuretools 0.9.1. If you upgrade to the latest version of Featuretools, it will go away.
I am trying to create a Choropleth map showing fire incidents throughout a county in NC. I have the data in a Dataframe and last night I was able to export maps. The only problem was that the data exported was not accurate--so there was a problem with my code. I think I managed to fix that, by merging the shapefiles and data dataframes together, but now, when I run the portion that creates the map, I get AttributeError: Unknown property column Full message:
AttributeError Traceback (most recent call last)
<ipython-input-74-61a60b41abbe> in <module>()
13 # create map
14
---> 15 merged_df.plot(column=variable, cmap='Reds', linewidth=0.8, ax=ax, edgecolor='0.8');
16
17 ax.axis('off')
~\Anaconda3\lib\site-packages\pandas\plotting\_core.py in __call__(self, x, y, kind, ax, subplots, sharex, sharey, layout, figsize, use_index, title, grid, legend, style, logx, logy, loglog, xticks, yticks, xlim, ylim, rot, fontsize, colormap, table, yerr, xerr, secondary_y, sort_columns, **kwds)
2939 fontsize=fontsize, colormap=colormap, table=table,
2940 yerr=yerr, xerr=xerr, secondary_y=secondary_y,
-> 2941 sort_columns=sort_columns, **kwds)
2942 __call__.__doc__ = plot_frame.__doc__
2943
~\Anaconda3\lib\site-packages\pandas\plotting\_core.py in plot_frame(data, x, y, kind, ax, subplots, sharex, sharey, layout, figsize, use_index, title, grid, legend, style, logx, logy, loglog, xticks, yticks, xlim, ylim, rot, fontsize, colormap, table, yerr, xerr, secondary_y, sort_columns, **kwds)
1975 yerr=yerr, xerr=xerr,
1976 secondary_y=secondary_y, sort_columns=sort_columns,
-> 1977 **kwds)
1978
1979
~\Anaconda3\lib\site-packages\pandas\plotting\_core.py in _plot(data, x, y, subplots, ax, kind, **kwds)
1802 plot_obj = klass(data, subplots=subplots, ax=ax, kind=kind, **kwds)
1803
-> 1804 plot_obj.generate()
1805 plot_obj.draw()
1806 return plot_obj.result
~\Anaconda3\lib\site-packages\pandas\plotting\_core.py in generate(self)
258 self._compute_plot_data()
259 self._setup_subplots()
--> 260 self._make_plot()
261 self._add_table()
262 self._make_legend()
~\Anaconda3\lib\site-packages\pandas\plotting\_core.py in _make_plot(self)
983 stacking_id=stacking_id,
984 is_errorbar=is_errorbar,
--> 985 **kwds)
986 self._add_legend_handle(newlines[0], label, index=i)
987
~\Anaconda3\lib\site-packages\pandas\plotting\_core.py in _plot(cls, ax, x, y, style, column_num, stacking_id, **kwds)
999 cls._initialize_stacker(ax, stacking_id, len(y))
1000 y_values = cls._get_stacked_values(ax, stacking_id, y, kwds['label'])
-> 1001 lines = MPLPlot._plot(ax, x, y_values, style=style, **kwds)
1002 cls._update_stacker(ax, stacking_id, y)
1003 return lines
~\Anaconda3\lib\site-packages\pandas\plotting\_core.py in _plot(cls, ax, x, y, style, is_errorbar, **kwds)
613 else:
614 args = (x, y)
--> 615 return ax.plot(*args, **kwds)
616
617 def _get_index_name(self):
~\Anaconda3\lib\site-packages\matplotlib\__init__.py in inner(ax, data, *args, **kwargs)
1808 "the Matplotlib list!)" % (label_namer, func.__name__),
1809 RuntimeWarning, stacklevel=2)
-> 1810 return func(ax, *args, **kwargs)
1811
1812 inner.__doc__ = _add_data_doc(inner.__doc__,
~\Anaconda3\lib\site-packages\matplotlib\axes\_axes.py in plot(self, scalex, scaley, *args, **kwargs)
1609 kwargs = cbook.normalize_kwargs(kwargs, mlines.Line2D._alias_map)
1610
-> 1611 for line in self._get_lines(*args, **kwargs):
1612 self.add_line(line)
1613 lines.append(line)
~\Anaconda3\lib\site-packages\matplotlib\axes\_base.py in _grab_next_args(self, *args, **kwargs)
391 this += args[0],
392 args = args[1:]
--> 393 yield from self._plot_args(this, kwargs)
394
395
~\Anaconda3\lib\site-packages\matplotlib\axes\_base.py in _plot_args(self, tup, kwargs)
381 "with non-matching shapes is deprecated.")
382 for j in range(max(ncx, ncy)):
--> 383 seg = func(x[:, j % ncx], y[:, j % ncy], kw, kwargs)
384 ret.append(seg)
385 return ret
~\Anaconda3\lib\site-packages\matplotlib\axes\_base.py in _makeline(self, x, y, kw, kwargs)
286 default_dict = self._getdefaults(None, kw)
287 self._setdefaults(default_dict, kw)
--> 288 seg = mlines.Line2D(x, y, **kw)
289 return seg
290
~\Anaconda3\lib\site-packages\matplotlib\lines.py in __init__(self, xdata, ydata, linewidth, linestyle, color, marker, markersize, markeredgewidth, markeredgecolor, markerfacecolor, markerfacecoloralt, fillstyle, antialiased, dash_capstyle, solid_capstyle, dash_joinstyle, solid_joinstyle, pickradius, drawstyle, markevery, **kwargs)
408 # update kwargs before updating data to give the caller a
409 # chance to init axes (and hence unit support)
--> 410 self.update(kwargs)
411 self.pickradius = pickradius
412 self.ind_offset = 0
~\Anaconda3\lib\site-packages\matplotlib\artist.py in update(self, props)
914
915 with cbook._setattr_cm(self, eventson=False):
--> 916 ret = [_update_property(self, k, v) for k, v in props.items()]
917
918 if len(ret):
~\Anaconda3\lib\site-packages\matplotlib\artist.py in <listcomp>(.0)
914
915 with cbook._setattr_cm(self, eventson=False):
--> 916 ret = [_update_property(self, k, v) for k, v in props.items()]
917
918 if len(ret):
~\Anaconda3\lib\site-packages\matplotlib\artist.py in _update_property(self, k, v)
910 func = getattr(self, 'set_' + k, None)
911 if not callable(func):
--> 912 raise AttributeError('Unknown property %s' % k)
913 return func(v)
914
AttributeError: Unknown property column
I have no idea how to fix this. I've googled and tried changing the dtype from float to int, tried different columns, but no change. I don't understand because it worked last night, but didn't work when I tried to run it today before making changes. Thank you in advance for any help. Below is the bulk of my code that contains the data frame and mapping, everything else is just getting data from csvs:
import pandas as pd
import numpy as np
#import googlemaps
import gmaps
import gmaps.datasets
import geopandas as gpd
#import matplotlib as plt
import matplotlib.pyplot as plt
import os
import plotly.plotly as py
import plotly.tools as tls
This is what the merged dataframe looks like:
OBJECTID_x int64
ZIPNUM float64
address object
address2 object
apt_room object
arrive_date_time object
cleared_date_time object
dispatch_date_time object
exposure int64
incident_number object
incident_type int64
incident_type_description object
platoon object
station float64
Longitude object
Latitude object
Year int64
Date object
Arr Time object
Seconds float64
Incident object
OBJECTID_y int64
ZIPNAME object
ZIPCODE object
NAME object
SHAPEAREA float64
SHAPELEN float64
LAST_EDITE object
geometry object
dtype: object
# set a variable that will call column to visualise on the map
variable = 'ZIPNUM'
# set the range for the choropleth
vmin, vmax = 50, 2000
# create figure and axes for Matplotlib
fig, ax = plt.subplots(1, figsize=(15, 15))
# create map
merged_df.plot(column=variable, cmap='Reds', linewidth=0.8, ax=ax, edgecolor='0.8');
ax.axis('off')
ax.set_title('Fire Incident Rate in Wake County', fontdict={'fontsize': '25', 'fontweight' : '3'})
# Create colorbar as a legend
sm = plt.cm.ScalarMappable(cmap='Reds', norm=plt.Normalize(vmin=vmin, vmax=vmax))
# empty array for the data range
sm._A = []
# add the colorbar to the figure
cbar = fig.colorbar(sm)
ax.annotate('2008-2018',
xy=(0.001, .225), xycoords='figure fraction',
horizontalalignment='left', verticalalignment='top',
fontsize=35)
fig.savefig("Fire Incident Rate in Wake County 2008-2018.png", dpi=300)
The problem is that you are trying to use column as a keyword argument. Since you want to plot the 'ZIPNUM' column of the DataFrame, which you store in a variable called variable, you can just pass it as a positional argument to plot(). If you want to plot a relationship between two variables, you can use keyword arguments merged_df.plot(x=variable1, y=variable2)
For you case, you can use
variable = 'ZIPNUM'
merged_df.plot(variable, cmap='Reds', linewidth=0.8, ax=ax, edgecolor='0.8');
EDIT (based on comments)
You should use markeredgecolor only if you use marker for plotting. edgecolor is not the correct keyword. Moreover, you are assigning a number (string) as color which is again incorrect. Below is a simple example.
df = pd.DataFrame([[1, 2], [3, 4], [5, 6], [7, 8]], columns=["A", "B"])
column='A'
df.plot(column, linewidth=0.8, color='r', marker ='o', markeredgewidth=2,
markeredgecolor='blue')
I was following the code in the new seaborn 0.9.0 release as displayed on the site and I got an error when typing in the following code. The code came from the bottom of this page https://seaborn.pydata.org/tutorial/categorical.html
import seaborn as sns
tips = sns.load_dataset("tips")
sns.catplot(x="day", y="total_bill", hue="smoker",
col="time", aspect=.6,
kind="swarm", data=tips);
This is the output from running the above code. I have tried creating a new environment and everything has been updated. I still do not know why it is not working.
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-21-c1ae50b18a54> in <module>
3 sns.catplot(x="day", y="total_bill", hue="smoker",
4 col="time", aspect=.6,
----> 5 kind="swarm", data=tips);
6 get_ipython().run_line_magic('version_information', '')
~/anaconda3/envs/python3/lib/python3.7/site-packages/seaborn/categorical.py in catplot(x, y, hue, data, row, col, col_wrap, estimator, ci, n_boot, units, order, hue_order, row_order, col_order, kind, height, aspect, orient, color, palette, legend, legend_out, sharex, sharey, margin_titles, facet_kws, **kwargs)
3753
3754 # Draw the plot onto the facets
-> 3755 g.map_dataframe(plot_func, x, y, hue, **plot_kws)
3756
3757 # Special case axis labels for a count type plot
~/anaconda3/envs/python3/lib/python3.7/site-packages/seaborn/axisgrid.py in map_dataframe(self, func, *args, **kwargs)
818
819 # Draw the plot
--> 820 self._facet_plot(func, ax, args, kwargs)
821
822 # Finalize the annotations and layout
~/anaconda3/envs/python3/lib/python3.7/site-packages/seaborn/axisgrid.py in _facet_plot(self, func, ax, plot_args, plot_kwargs)
836
837 # Draw the plot
--> 838 func(*plot_args, **plot_kwargs)
839
840 # Sort out the supporting information
~/anaconda3/envs/python3/lib/python3.7/site-packages/seaborn/categorical.py in swarmplot(x, y, hue, data, order, hue_order, dodge, orient, color, palette, size, edgecolor, linewidth, ax, **kwargs)
2989 linewidth=linewidth))
2990
-> 2991 plotter.plot(ax, kwargs)
2992 return ax
2993
~/anaconda3/envs/python3/lib/python3.7/site-packages/seaborn/categorical.py in plot(self, ax, kws)
1444 def plot(self, ax, kws):
1445 """Make the full plot."""
-> 1446 self.draw_swarmplot(ax, kws)
1447 self.add_legend_data(ax)
1448 self.annotate_axes(ax)
~/anaconda3/envs/python3/lib/python3.7/site-packages/seaborn/categorical.py in draw_swarmplot(self, ax, kws)
1404 kws.update(c=point_colors)
1405 if self.orient == "v":
-> 1406 points = ax.scatter(cat_pos, swarm_data, s=s, **kws)
1407 else:
1408 points = ax.scatter(swarm_data, cat_pos, s=s, **kws)
~/anaconda3/envs/python3/lib/python3.7/site-packages/matplotlib/__init__.py in inner(ax, data, *args, **kwargs)
1803 "the Matplotlib list!)" % (label_namer, func.__name__),
1804 RuntimeWarning, stacklevel=2)
-> 1805 return func(ax, *args, **kwargs)
1806
1807 inner.__doc__ = _add_data_doc(inner.__doc__,
~/anaconda3/envs/python3/lib/python3.7/site-packages/matplotlib/axes/_axes.py in scatter(self, x, y, s, c, marker, cmap, norm, vmin, vmax, alpha, linewidths, verts, edgecolors, **kwargs)
4193 isinstance(c, str) or
4194 (isinstance(c, collections.Iterable) and
-> 4195 isinstance(c[0], str))):
4196 c_array = None
4197 else:
IndexError: index 0 is out of bounds for axis 0 with size 0
This is unfortunately a bug in matplotlib 3.0.1. It's been reported here and fixed by pull/12673.
Options you have are to install either matplotlib 3.0.0 or 3.0.2.
I am using a while loop to calculate a cost function for memory reasons. When calculating the gradient, tensorflow will store Nm tensors where Nm is the number of iterations in my while loop (this cuases the same memory issues I had with the original energy functions). I do not want that as I don't have enough memory. So I want to register a new op along with a gradient function that both use a while loop. However I am having issues with using function.defun and a while loop. To simplify things, I have a small test example below:
import numpy as np
import tensorflow as tf
from tensorflow.python.framework import ops
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import sparse_ops
from tensorflow.python.framework import function
def _run(tensor):
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
res = sess.run(tensor)
return res
#function.Defun(tf.float32,tf.float32,func_name ='tf_test_log')#,grad_func=tf_test_logGrad)
def tf_test_log(t_x,t_y):
#N = t_x.shape[0].value
condition = lambda i,m1: i<N
def body(index,x):
#return[(index+1),tf.concat([x, tf.expand_dims(tf.exp( tf.add( t_x[:,index],t_y[:,index]) ),1) ],1 ) ]
return[(index+1),tf.add(x, tf.exp( tf.add( t_x[:,0],t_y[:,0]) ) ) ]
i0 = tf.constant(0,dtype=tf.int32)
m0 = tf.zeros([N,1],dType)
ijk_0 = [i0,m0]
L,t_log_x = tf.while_loop(condition,body,ijk_0,
shape_invariants=[i0.get_shape(),
tf.TensorShape([N,None])]
)
return t_log_x
dType = tf.float32
N = np.int32(100)
t_N = tf.constant(N,dtype = tf.int32)
t_x = tf.constant(np.random.randn(N,N),dtype = dType)
t_y = tf.constant(np.random.randn(N,N),dtype = dType)
ys = _run(tf_test_log(t_x,t_y))
I then try to test the new op:
I get a Value error: The shape for while/Merge_1:0 is not an invariant for the loop. It enters the loop with shape (100, ?), but has shape after one iteration. Provide shape invariants using either the shape_invariants argument of tf.while_loop or set_shape() on the loop variables.
Note that calling
If i use a concatenate operation (instead of the add operation that gets returned by my while loop), I do not get any issues.
However, If I do not set N as a global variable (i.e. I do N = t_x.shape[0]) inside the body of the tf_test_log function, I get a Value error.
ValueError: Cannot convert a partially known TensorShape to a Tensor: (?, 1)
What is wrong with my code? Any help is greatly appreciated!
I am using python 3.5 on ubuntu 16.04 and tensorflow 1.4
full output:
ValueError Traceback (most recent call last)
~/Documents/TheEffingPhDHatersGonnaHate/PAM/defun_while.py in <module>()
51 t_x = tf.constant(np.random.randn(N,N),dtype = dType)
52 t_y = tf.constant(np.random.randn(N,N),dtype = dType)
---> 53 ys = _run(tf_test_log(t_x,t_y))
54
55
~/environments/tf_1_4_gpu/lib/python3.5/site-packages/tensorflow/python/framework/function.py in __call__(self, *args, **kwargs)
503
504 def __call__(self, *args, **kwargs):
--> 505 self.add_to_graph(ops.get_default_graph())
506 args = [ops.convert_to_tensor(_) for _ in args] + self._extra_inputs
507 ret, op = _call(self._signature, *args, **kwargs)
~/environments/tf_1_4_gpu/lib/python3.5/site-packages/tensorflow/python/framework/function.py in add_to_graph(self, g)
484 def add_to_graph(self, g):
485 """Adds this function into the graph g."""
--> 486 self._create_definition_if_needed()
487
488 # Adds this function into 'g'.
~/environments/tf_1_4_gpu/lib/python3.5/site-packages/tensorflow/python/framework/function.py in _create_definition_if_needed(self)
319 """Creates the function definition if it's not created yet."""
320 with context.graph_mode():
--> 321 self._create_definition_if_needed_impl()
322
323 def _create_definition_if_needed_impl(self):
~/environments/tf_1_4_gpu/lib/python3.5/site-packages/tensorflow/python/framework/function.py in _create_definition_if_needed_impl(self)
336 # Call func and gather the output tensors.
337 with vs.variable_scope("", custom_getter=temp_graph.getvar):
--> 338 outputs = self._func(*inputs)
339
340 # There is no way of distinguishing between a function not returning
~/Documents/TheEffingPhDHatersGonnaHate/PAM/defun_while.py in tf_test_log(t_x, t_y)
39 L,t_log_x = tf.while_loop(condition,body,ijk_0,
40 shape_invariants=[i0.get_shape(),
---> 41 tf.TensorShape([N,None])]
42 )
43 return t_log_x
~/environments/tf_1_4_gpu/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py in while_loop(cond, body, loop_vars, shape_invariants, parallel_iterations, back_prop, swap_memory, name)
2814 loop_context = WhileContext(parallel_iterations, back_prop, swap_memory) # pylint: disable=redefined-outer-name
2815 ops.add_to_collection(ops.GraphKeys.WHILE_CONTEXT, loop_context)
-> 2816 result = loop_context.BuildLoop(cond, body, loop_vars, shape_invariants)
2817 return result
2818
~/environments/tf_1_4_gpu/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py in BuildLoop(self, pred, body, loop_vars, shape_invariants)
2638 self.Enter()
2639 original_body_result, exit_vars = self._BuildLoop(
-> 2640 pred, body, original_loop_vars, loop_vars, shape_invariants)
2641 finally:
2642 self.Exit()
~/environments/tf_1_4_gpu/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py in _BuildLoop(self, pred, body, original_loop_vars, loop_vars, shape_invariants)
2619 for m_var, n_var in zip(merge_vars, next_vars):
2620 if isinstance(m_var, ops.Tensor):
-> 2621 _EnforceShapeInvariant(m_var, n_var)
2622
2623 # Exit the loop.
~/environments/tf_1_4_gpu/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py in _EnforceShapeInvariant(merge_var, next_var)
576 "Provide shape invariants using either the `shape_invariants` "
577 "argument of tf.while_loop or set_shape() on the loop variables."
--> 578 % (merge_var.name, m_shape, n_shape))
579 else:
580 if not isinstance(var, (ops.IndexedSlices, sparse_tensor.SparseTensor)):
ValueError: The shape for while/Merge_1:0 is not an invariant for the loop. It enters the loop with shape (100, ?), but has shape <unknown> after one iteration. Provide shape invariants using either the `shape_invariants` argument of tf.while_loop or set_shape() on the loop variables.
Thanks #Alexandre Passos for the suggestion in the comment above!
The following piece of code is a modification of the original with a set_shape function added inside the body.
import numpy as np
import tensorflow as tf
from tensorflow.python.framework import ops
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import sparse_ops
from tensorflow.python.framework import function
def _run(tensor):
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
res = sess.run(tensor)
return res
#function.Defun(tf.float32,tf.float32,tf.float32,func_name ='tf_test_logGrad')
def tf_test_logGrad(t_x,t_y,grad):
return grad
#function.Defun(tf.float32,tf.float32,func_name ='tf_test_log')#,grad_func=tf_test_logGrad)
def tf_test_log(t_x,t_y):
#N = t_x.shape[0].value
condition = lambda i,m1: i<N
def body(index,x):
#return[(index+1),tf.concat([x, tf.expand_dims(tf.exp( tf.add( t_x[:,index],t_y[:,index]) ),1) ],1 ) ]
x = tf.add(x, tf.exp( tf.add( t_x[:,0],t_y[:,0]) ) )
x.set_shape([N])
return[(index+1), x]
i0 = tf.constant(0,dtype=tf.int32)
m0 = tf.zeros([N],dType)
ijk_0 = [i0,m0]
L,t_log_x = tf.while_loop(condition,body,ijk_0,
shape_invariants=[i0.get_shape(),
tf.TensorShape([N])]
)
return t_log_x
dType = tf.float32
N = np.int32(100)
t_N = tf.constant(N,dtype = tf.int32)
t_x = tf.constant(np.random.randn(N,N),dtype = dType)
t_y = tf.constant(np.random.randn(N,N),dtype = dType)
ys = _run(tf_test_log(t_x,t_y))
The Issue of global N still persists.
You still need to set the shape of the loop tensors as a global variable outside of the defun decorator. If you try to get it from the shape of the inputs of the defun decorator, you get:
TypeError Traceback (most recent call last)
~/environments/tf_1_4_gpu/lib/python3.5/site-packages/tensorflow/python/ops/array_ops.py in zeros(shape, dtype, name)
1438 shape = tensor_shape.as_shape(shape)
-> 1439 output = constant(zero, shape=shape, dtype=dtype, name=name)
1440 except (TypeError, ValueError):
~/environments/tf_1_4_gpu/lib/python3.5/site-packages/tensorflow/python/framework/constant_op.py in constant(value, dtype, shape, name, verify_shape)
207 tensor_util.make_tensor_proto(
--> 208 value, dtype=dtype, shape=shape, verify_shape=verify_shape))
209 dtype_value = attr_value_pb2.AttrValue(type=tensor_value.tensor.dtype)
~/environments/tf_1_4_gpu/lib/python3.5/site-packages/tensorflow/python/framework/tensor_util.py in make_tensor_proto(values, dtype, shape, verify_shape)
379 # exception when dtype is set to np.int64
--> 380 if shape is not None and np.prod(shape, dtype=np.int64) == 0:
381 nparray = np.empty(shape, dtype=np_dt)
~/environments/tf_1_4_gpu/lib/python3.5/site-packages/numpy/core/fromnumeric.py in prod(a, axis, dtype, out, keepdims)
2517 return _methods._prod(a, axis=axis, dtype=dtype,
-> 2518 out=out, **kwargs)
2519
~/environments/tf_1_4_gpu/lib/python3.5/site-packages/numpy/core/_methods.py in _prod(a, axis, dtype, out, keepdims)
34 def _prod(a, axis=None, dtype=None, out=None, keepdims=False):
---> 35 return umr_prod(a, axis, dtype, out, keepdims)
36
TypeError: __int__ returned non-int (type NoneType)
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
~/Documents/TheEffingPhDHatersGonnaHate/PAM/defun_while.py in <module>()
52 t_x = tf.constant(np.random.randn(N,N),dtype = dType)
53 t_y = tf.constant(np.random.randn(N,N),dtype = dType)
---> 54 ys = _run(tf_test_log(t_x,t_y))
55
56
~/environments/tf_1_4_gpu/lib/python3.5/site-packages/tensorflow/python/framework/function.py in __call__(self, *args, **kwargs)
503
504 def __call__(self, *args, **kwargs):
--> 505 self.add_to_graph(ops.get_default_graph())
506 args = [ops.convert_to_tensor(_) for _ in args] + self._extra_inputs
507 ret, op = _call(self._signature, *args, **kwargs)
~/environments/tf_1_4_gpu/lib/python3.5/site-packages/tensorflow/python/framework/function.py in add_to_graph(self, g)
484 def add_to_graph(self, g):
485 """Adds this function into the graph g."""
--> 486 self._create_definition_if_needed()
487
488 # Adds this function into 'g'.
~/environments/tf_1_4_gpu/lib/python3.5/site-packages/tensorflow/python/framework/function.py in _create_definition_if_needed(self)
319 """Creates the function definition if it's not created yet."""
320 with context.graph_mode():
--> 321 self._create_definition_if_needed_impl()
322
323 def _create_definition_if_needed_impl(self):
~/environments/tf_1_4_gpu/lib/python3.5/site-packages/tensorflow/python/framework/function.py in _create_definition_if_needed_impl(self)
336 # Call func and gather the output tensors.
337 with vs.variable_scope("", custom_getter=temp_graph.getvar):
--> 338 outputs = self._func(*inputs)
339
340 # There is no way of distinguishing between a function not returning
~/Documents/TheEffingPhDHatersGonnaHate/PAM/defun_while.py in tf_test_log(t_x, t_y)
33
34 i0 = tf.constant(0,dtype=tf.int32)
---> 35 m0 = tf.zeros([N],dType)
36
37
~/environments/tf_1_4_gpu/lib/python3.5/site-packages/tensorflow/python/ops/array_ops.py in zeros(shape, dtype, name)
1439 output = constant(zero, shape=shape, dtype=dtype, name=name)
1440 except (TypeError, ValueError):
-> 1441 shape = ops.convert_to_tensor(shape, dtype=dtypes.int32, name="shape")
1442 output = fill(shape, constant(zero, dtype=dtype), name=name)
1443 assert output.dtype.base_dtype == dtype
~/environments/tf_1_4_gpu/lib/python3.5/site-packages/tensorflow/python/framework/ops.py in convert_to_tensor(value, dtype, name, preferred_dtype)
834 name=name,
835 preferred_dtype=preferred_dtype,
--> 836 as_ref=False)
837
838
~/environments/tf_1_4_gpu/lib/python3.5/site-packages/tensorflow/python/framework/ops.py in internal_convert_to_tensor(value, dtype, name, as_ref, preferred_dtype, ctx)
924
925 if ret is None:
--> 926 ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
927
928 if ret is NotImplemented:
~/environments/tf_1_4_gpu/lib/python3.5/site-packages/tensorflow/python/framework/constant_op.py in _tensor_shape_tensor_conversion_function(s, dtype, name, as_ref)
248 if not s.is_fully_defined():
249 raise ValueError(
--> 250 "Cannot convert a partially known TensorShape to a Tensor: %s" % s)
251 s_list = s.as_list()
252 int64_value = 0
ValueError: Cannot convert a partially known TensorShape to a Tensor: (?,)