Problems with seaborn (ndim) - python-3.x

I am using seaborn to plot a very simple data set. Here is what I do:
import seaborn as sns
import pandas as pd
df = pd.read_excel('myfile.xlsx')
sns.set(style="white")
g = sns.PairGrid(df, diag_sharey=False)
g.map_lower(sns.kdeplot)
g.map_upper(sns.scatterplot)
g.map_diag(sns.kdeplot, lw=3)
I get the following error: AttributeError: 'NoneType' object has no attribute 'ndim'. Weirdly, the plot is ploted in parts (see below).
Any idea why that is the case and what I can do to solve the issue?
EDIT:
The dataframe has the following attributes:
plan_change int64
user_login float64
new_act_ratio float64
on_time int64
Unfortunately, I cannot upload the data set. However I can say, that plotting other seaborn graphs works just fine.
The total error message is the following:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-16-2dbc61abd2bd> in <module>()
3 g = sns.PairGrid(df, diag_sharey=False)
4 g.map_lower(sns.kdeplot)
----> 5 g.map_upper(sns.scatterplot)
6 g.map_diag(sns.kdeplot, lw=3)
7
/anaconda/lib/python3.5/site-packages/seaborn/axisgrid.py in map_upper(self, func, **kwargs)
1488 color = self.palette[k] if kw_color is None else kw_color
1489 func(data_k[x_var], data_k[y_var], label=label_k,
-> 1490 color=color, **kwargs)
1491
1492 self._clean_axis(ax)
/anaconda/lib/python3.5/site-packages/seaborn/relational.py in scatterplot(x, y, hue, style, size, data, palette, hue_order, hue_norm, sizes, size_order, size_norm, markers, style_order, x_bins, y_bins, units, estimator, ci, n_boot, alpha, x_jitter, y_jitter, legend, ax, **kwargs)
1333 x_bins=x_bins, y_bins=y_bins,
1334 estimator=estimator, ci=ci, n_boot=n_boot,
-> 1335 alpha=alpha, x_jitter=x_jitter, y_jitter=y_jitter, legend=legend,
1336 )
1337
/anaconda/lib/python3.5/site-packages/seaborn/relational.py in __init__(self, x, y, hue, size, style, data, palette, hue_order, hue_norm, sizes, size_order, size_norm, dashes, markers, style_order, x_bins, y_bins, units, estimator, ci, n_boot, alpha, x_jitter, y_jitter, legend)
850
851 plot_data = self.establish_variables(
--> 852 x, y, hue, size, style, units, data
853 )
854
/anaconda/lib/python3.5/site-packages/seaborn/relational.py in establish_variables(self, x, y, hue, size, style, units, data)
155 units=units
156 )
--> 157 plot_data = pd.DataFrame(plot_data)
158
159 # Option 3:
/anaconda/lib/python3.5/site-packages/pandas/core/frame.py in __init__(self, data, index, columns, dtype, copy)
264 dtype=dtype, copy=copy)
265 elif isinstance(data, dict):
--> 266 mgr = self._init_dict(data, index, columns, dtype=dtype)
267 elif isinstance(data, ma.MaskedArray):
268 import numpy.ma.mrecords as mrecords
/anaconda/lib/python3.5/site-packages/pandas/core/frame.py in _init_dict(self, data, index, columns, dtype)
400 arrays = [data[k] for k in keys]
401
--> 402 return _arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype)
403
404 def _init_ndarray(self, values, index, columns, dtype=None, copy=False):
/anaconda/lib/python3.5/site-packages/pandas/core/frame.py in _arrays_to_mgr(arrays, arr_names, index, columns, dtype)
5382
5383 # don't force copy because getting jammed in an ndarray anyway
-> 5384 arrays = _homogenize(arrays, index, dtype)
5385
5386 # from BlockManager perspective
/anaconda/lib/python3.5/site-packages/pandas/core/frame.py in _homogenize(data, index, dtype)
5693 v = lib.fast_multiget(v, oindex.values, default=NA)
5694 v = _sanitize_array(v, index, dtype=dtype, copy=False,
-> 5695 raise_cast_failure=False)
5696
5697 homogenized.append(v)
/anaconda/lib/python3.5/site-packages/pandas/core/series.py in _sanitize_array(data, index, dtype, copy, raise_cast_failure)
2917
2918 # scalar like
-> 2919 if subarr.ndim == 0:
2920 if isinstance(data, list): # pragma: no cover
2921 subarr = np.array(data, dtype=object)
AttributeError: 'NoneType' object has no attribute 'ndim'

Related

How do I pass the values to Catboost?

I'm trying to work with catboost and I've got a problem that I'm really stuck with right now. I have a dataframe with 28 columns, 2 of them are categorical. When the data is numerical there are some even and some fractional numbers, also some 0.00 values that should represent not an empty values but the actual nulls (like 1-1=0).
I'm trying to run this:
train_cl = cb.Pool(data=ret_df.iloc[:580000, :-1], label=ret_df.iloc[:580000, -1], cat_features=cats)
evl_cl = cb.Pool(data=ret_df.iloc[580000:, :-1], label=ret_df.iloc[580000:, -1], cat_features=cats)
But I have this error
---------------------------------------------------------------------------
CatBoostError Traceback (most recent call last)
<ipython-input-112-a515b0ab357b> in <module>
1 train_cl = cb.Pool(data=ret_df.iloc[:580000, :-1], label=ret_df.iloc[:580000, -1], cat_features=cats)
----> 2 evl_cl = cb.Pool(data=ret_df.iloc[580000:, :-1], label=ret_df.iloc[580000:, -1], cat_features=cats)
~\AppData\Local\Programs\Python\Python36\lib\site-packages\catboost\core.py in __init__(self, data, label, cat_features, text_features, embedding_features, column_description, pairs, delimiter, has_header, ignore_csv_quoting, weight, group_id, group_weight, subgroup_id, pairs_weight, baseline, feature_names, thread_count, log_cout, log_cerr)
615 )
616
--> 617 self._init(data, label, cat_features, text_features, embedding_features, pairs, weight, group_id, group_weight, subgroup_id, pairs_weight, baseline, feature_names, thread_count)
618 super(Pool, self).__init__()
619
~\AppData\Local\Programs\Python\Python36\lib\site-packages\catboost\core.py in _init(self, data, label, cat_features, text_features, embedding_features, pairs, weight, group_id, group_weight, subgroup_id, pairs_weight, baseline, feature_names, thread_count)
1081 if label is not None:
1082 self._check_label_type(label)
-> 1083 self._check_label_empty(label)
1084 label = self._label_if_pandas_to_numpy(label)
1085 if len(np.shape(label)) == 1:
~\AppData\Local\Programs\Python\Python36\lib\site-packages\catboost\core.py in _check_label_empty(self, label)
723 """
724 if len(label) == 0:
--> 725 raise CatBoostError("Labels variable is empty.")
726
727 def _check_label_shape(self, label, samples_count):
CatBoostError: Labels variable is empty.
I've googled this trouble, but found nothing. My hypothesis is that there is a problem with 0.00 values, but I do not know how to solve this because I literally can't replace these values with anything.
Please, help me!

How to perform Min Max Scaler on an array which contains columns with string and numbers?

please i really need your help, i'm struggling with MinMaxScaler, i would like to apply this technique on the array below that contains columns with string and numbers. I only want to implement this technique on the columns that contains numbers.
clean_tweets_no_urls = pd.DataFrame(counts_no_urls.most_common(15),
columns=['words', 'count'])
clean_tweets_no_urls.head()
That's my array
minmax_scaling(clean_tweets_no_urls, columns=['words', 'count'])
For that, i'm getting this result :
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-108-eeb7b44d7121> in <module>
----> 1 minmax_scaling(clean_tweets_no_urls, columns=['words', 'count'])
C:\ProgramData\Anaconda3\lib\site-packages\mlxtend\preprocessing\scaling.py in minmax_scaling(array, columns, min_val, max_val)
36
37 """
---> 38 ary_new = array.astype(float)
39 if len(ary_new.shape) == 1:
40 ary_new = ary_new[:, np.newaxis]
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\generic.py in astype(self, dtype, copy, errors)
5696 else:
5697 # else, only a single dtype is given
-> 5698 new_data = self._data.astype(dtype=dtype, copy=copy, errors=errors)
5699 return self._constructor(new_data).__finalize__(self)
5700
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\internals\managers.py in astype(self, dtype, copy, errors)
580
581 def astype(self, dtype, copy: bool = False, errors: str = "raise"):
--> 582 return self.apply("astype", dtype=dtype, copy=copy, errors=errors)
583
584 def convert(self, **kwargs):
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\internals\managers.py in apply(self, f, filter, **kwargs)
440 applied = b.apply(f, **kwargs)
441 else:
--> 442 applied = getattr(b, f)(**kwargs)
443 result_blocks = _extend_blocks(applied, result_blocks)
444
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\internals\blocks.py in astype(self, dtype, copy, errors)
623 vals1d = values.ravel()
624 try:
--> 625 values = astype_nansafe(vals1d, dtype, copy=True)
626 except (ValueError, TypeError):
627 # e.g. astype_nansafe can fail on object-dtype of strings
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\dtypes\cast.py in astype_nansafe(arr, dtype, copy, skipna)
895 if copy or is_object_dtype(arr) or is_object_dtype(dtype):
896 # Explicit copy, or required since NumPy can't view from / to object.
--> 897 return arr.astype(dtype, copy=True)
898
899 return arr.view(dtype)
ValueError: could not convert string to float: 'joebiden'
from sklearn.preprocessing import minmax_scale
clean_tweets_no_urls[['count']] = minmax_scale.fit_transform(clean_tweets_no_urls[['count']])
This may be used to automate finding numeric columns.

AttributeError: Unknown property column in Geopandas plotting of events in zipcode shp file

I am trying to create a Choropleth map showing fire incidents throughout a county in NC. I have the data in a Dataframe and last night I was able to export maps. The only problem was that the data exported was not accurate--so there was a problem with my code. I think I managed to fix that, by merging the shapefiles and data dataframes together, but now, when I run the portion that creates the map, I get AttributeError: Unknown property column Full message:
AttributeError Traceback (most recent call last)
<ipython-input-74-61a60b41abbe> in <module>()
13 # create map
14
---> 15 merged_df.plot(column=variable, cmap='Reds', linewidth=0.8, ax=ax, edgecolor='0.8');
16
17 ax.axis('off')
~\Anaconda3\lib\site-packages\pandas\plotting\_core.py in __call__(self, x, y, kind, ax, subplots, sharex, sharey, layout, figsize, use_index, title, grid, legend, style, logx, logy, loglog, xticks, yticks, xlim, ylim, rot, fontsize, colormap, table, yerr, xerr, secondary_y, sort_columns, **kwds)
2939 fontsize=fontsize, colormap=colormap, table=table,
2940 yerr=yerr, xerr=xerr, secondary_y=secondary_y,
-> 2941 sort_columns=sort_columns, **kwds)
2942 __call__.__doc__ = plot_frame.__doc__
2943
~\Anaconda3\lib\site-packages\pandas\plotting\_core.py in plot_frame(data, x, y, kind, ax, subplots, sharex, sharey, layout, figsize, use_index, title, grid, legend, style, logx, logy, loglog, xticks, yticks, xlim, ylim, rot, fontsize, colormap, table, yerr, xerr, secondary_y, sort_columns, **kwds)
1975 yerr=yerr, xerr=xerr,
1976 secondary_y=secondary_y, sort_columns=sort_columns,
-> 1977 **kwds)
1978
1979
~\Anaconda3\lib\site-packages\pandas\plotting\_core.py in _plot(data, x, y, subplots, ax, kind, **kwds)
1802 plot_obj = klass(data, subplots=subplots, ax=ax, kind=kind, **kwds)
1803
-> 1804 plot_obj.generate()
1805 plot_obj.draw()
1806 return plot_obj.result
~\Anaconda3\lib\site-packages\pandas\plotting\_core.py in generate(self)
258 self._compute_plot_data()
259 self._setup_subplots()
--> 260 self._make_plot()
261 self._add_table()
262 self._make_legend()
~\Anaconda3\lib\site-packages\pandas\plotting\_core.py in _make_plot(self)
983 stacking_id=stacking_id,
984 is_errorbar=is_errorbar,
--> 985 **kwds)
986 self._add_legend_handle(newlines[0], label, index=i)
987
~\Anaconda3\lib\site-packages\pandas\plotting\_core.py in _plot(cls, ax, x, y, style, column_num, stacking_id, **kwds)
999 cls._initialize_stacker(ax, stacking_id, len(y))
1000 y_values = cls._get_stacked_values(ax, stacking_id, y, kwds['label'])
-> 1001 lines = MPLPlot._plot(ax, x, y_values, style=style, **kwds)
1002 cls._update_stacker(ax, stacking_id, y)
1003 return lines
~\Anaconda3\lib\site-packages\pandas\plotting\_core.py in _plot(cls, ax, x, y, style, is_errorbar, **kwds)
613 else:
614 args = (x, y)
--> 615 return ax.plot(*args, **kwds)
616
617 def _get_index_name(self):
~\Anaconda3\lib\site-packages\matplotlib\__init__.py in inner(ax, data, *args, **kwargs)
1808 "the Matplotlib list!)" % (label_namer, func.__name__),
1809 RuntimeWarning, stacklevel=2)
-> 1810 return func(ax, *args, **kwargs)
1811
1812 inner.__doc__ = _add_data_doc(inner.__doc__,
~\Anaconda3\lib\site-packages\matplotlib\axes\_axes.py in plot(self, scalex, scaley, *args, **kwargs)
1609 kwargs = cbook.normalize_kwargs(kwargs, mlines.Line2D._alias_map)
1610
-> 1611 for line in self._get_lines(*args, **kwargs):
1612 self.add_line(line)
1613 lines.append(line)
~\Anaconda3\lib\site-packages\matplotlib\axes\_base.py in _grab_next_args(self, *args, **kwargs)
391 this += args[0],
392 args = args[1:]
--> 393 yield from self._plot_args(this, kwargs)
394
395
~\Anaconda3\lib\site-packages\matplotlib\axes\_base.py in _plot_args(self, tup, kwargs)
381 "with non-matching shapes is deprecated.")
382 for j in range(max(ncx, ncy)):
--> 383 seg = func(x[:, j % ncx], y[:, j % ncy], kw, kwargs)
384 ret.append(seg)
385 return ret
~\Anaconda3\lib\site-packages\matplotlib\axes\_base.py in _makeline(self, x, y, kw, kwargs)
286 default_dict = self._getdefaults(None, kw)
287 self._setdefaults(default_dict, kw)
--> 288 seg = mlines.Line2D(x, y, **kw)
289 return seg
290
~\Anaconda3\lib\site-packages\matplotlib\lines.py in __init__(self, xdata, ydata, linewidth, linestyle, color, marker, markersize, markeredgewidth, markeredgecolor, markerfacecolor, markerfacecoloralt, fillstyle, antialiased, dash_capstyle, solid_capstyle, dash_joinstyle, solid_joinstyle, pickradius, drawstyle, markevery, **kwargs)
408 # update kwargs before updating data to give the caller a
409 # chance to init axes (and hence unit support)
--> 410 self.update(kwargs)
411 self.pickradius = pickradius
412 self.ind_offset = 0
~\Anaconda3\lib\site-packages\matplotlib\artist.py in update(self, props)
914
915 with cbook._setattr_cm(self, eventson=False):
--> 916 ret = [_update_property(self, k, v) for k, v in props.items()]
917
918 if len(ret):
~\Anaconda3\lib\site-packages\matplotlib\artist.py in <listcomp>(.0)
914
915 with cbook._setattr_cm(self, eventson=False):
--> 916 ret = [_update_property(self, k, v) for k, v in props.items()]
917
918 if len(ret):
~\Anaconda3\lib\site-packages\matplotlib\artist.py in _update_property(self, k, v)
910 func = getattr(self, 'set_' + k, None)
911 if not callable(func):
--> 912 raise AttributeError('Unknown property %s' % k)
913 return func(v)
914
AttributeError: Unknown property column
I have no idea how to fix this. I've googled and tried changing the dtype from float to int, tried different columns, but no change. I don't understand because it worked last night, but didn't work when I tried to run it today before making changes. Thank you in advance for any help. Below is the bulk of my code that contains the data frame and mapping, everything else is just getting data from csvs:
import pandas as pd
import numpy as np
#import googlemaps
import gmaps
import gmaps.datasets
import geopandas as gpd
#import matplotlib as plt
import matplotlib.pyplot as plt
import os
import plotly.plotly as py
import plotly.tools as tls
This is what the merged dataframe looks like:
OBJECTID_x int64
ZIPNUM float64
address object
address2 object
apt_room object
arrive_date_time object
cleared_date_time object
dispatch_date_time object
exposure int64
incident_number object
incident_type int64
incident_type_description object
platoon object
station float64
Longitude object
Latitude object
Year int64
Date object
Arr Time object
Seconds float64
Incident object
OBJECTID_y int64
ZIPNAME object
ZIPCODE object
NAME object
SHAPEAREA float64
SHAPELEN float64
LAST_EDITE object
geometry object
dtype: object
# set a variable that will call column to visualise on the map
variable = 'ZIPNUM'
# set the range for the choropleth
vmin, vmax = 50, 2000
# create figure and axes for Matplotlib
fig, ax = plt.subplots(1, figsize=(15, 15))
# create map
merged_df.plot(column=variable, cmap='Reds', linewidth=0.8, ax=ax, edgecolor='0.8');
ax.axis('off')
ax.set_title('Fire Incident Rate in Wake County', fontdict={'fontsize': '25', 'fontweight' : '3'})
# Create colorbar as a legend
sm = plt.cm.ScalarMappable(cmap='Reds', norm=plt.Normalize(vmin=vmin, vmax=vmax))
# empty array for the data range
sm._A = []
# add the colorbar to the figure
cbar = fig.colorbar(sm)
ax.annotate('2008-2018',
xy=(0.001, .225), xycoords='figure fraction',
horizontalalignment='left', verticalalignment='top',
fontsize=35)
fig.savefig("Fire Incident Rate in Wake County 2008-2018.png", dpi=300)
The problem is that you are trying to use column as a keyword argument. Since you want to plot the 'ZIPNUM' column of the DataFrame, which you store in a variable called variable, you can just pass it as a positional argument to plot(). If you want to plot a relationship between two variables, you can use keyword arguments merged_df.plot(x=variable1, y=variable2)
For you case, you can use
variable = 'ZIPNUM'
merged_df.plot(variable, cmap='Reds', linewidth=0.8, ax=ax, edgecolor='0.8');
EDIT (based on comments)
You should use markeredgecolor only if you use marker for plotting. edgecolor is not the correct keyword. Moreover, you are assigning a number (string) as color which is again incorrect. Below is a simple example.
df = pd.DataFrame([[1, 2], [3, 4], [5, 6], [7, 8]], columns=["A", "B"])
column='A'
df.plot(column, linewidth=0.8, color='r', marker ='o', markeredgewidth=2,
markeredgecolor='blue')

Seaborn issue with catplot

I was following the code in the new seaborn 0.9.0 release as displayed on the site and I got an error when typing in the following code. The code came from the bottom of this page https://seaborn.pydata.org/tutorial/categorical.html
import seaborn as sns
tips = sns.load_dataset("tips")
sns.catplot(x="day", y="total_bill", hue="smoker",
col="time", aspect=.6,
kind="swarm", data=tips);
This is the output from running the above code. I have tried creating a new environment and everything has been updated. I still do not know why it is not working.
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-21-c1ae50b18a54> in <module>
3 sns.catplot(x="day", y="total_bill", hue="smoker",
4 col="time", aspect=.6,
----> 5 kind="swarm", data=tips);
6 get_ipython().run_line_magic('version_information', '')
~/anaconda3/envs/python3/lib/python3.7/site-packages/seaborn/categorical.py in catplot(x, y, hue, data, row, col, col_wrap, estimator, ci, n_boot, units, order, hue_order, row_order, col_order, kind, height, aspect, orient, color, palette, legend, legend_out, sharex, sharey, margin_titles, facet_kws, **kwargs)
3753
3754 # Draw the plot onto the facets
-> 3755 g.map_dataframe(plot_func, x, y, hue, **plot_kws)
3756
3757 # Special case axis labels for a count type plot
~/anaconda3/envs/python3/lib/python3.7/site-packages/seaborn/axisgrid.py in map_dataframe(self, func, *args, **kwargs)
818
819 # Draw the plot
--> 820 self._facet_plot(func, ax, args, kwargs)
821
822 # Finalize the annotations and layout
~/anaconda3/envs/python3/lib/python3.7/site-packages/seaborn/axisgrid.py in _facet_plot(self, func, ax, plot_args, plot_kwargs)
836
837 # Draw the plot
--> 838 func(*plot_args, **plot_kwargs)
839
840 # Sort out the supporting information
~/anaconda3/envs/python3/lib/python3.7/site-packages/seaborn/categorical.py in swarmplot(x, y, hue, data, order, hue_order, dodge, orient, color, palette, size, edgecolor, linewidth, ax, **kwargs)
2989 linewidth=linewidth))
2990
-> 2991 plotter.plot(ax, kwargs)
2992 return ax
2993
~/anaconda3/envs/python3/lib/python3.7/site-packages/seaborn/categorical.py in plot(self, ax, kws)
1444 def plot(self, ax, kws):
1445 """Make the full plot."""
-> 1446 self.draw_swarmplot(ax, kws)
1447 self.add_legend_data(ax)
1448 self.annotate_axes(ax)
~/anaconda3/envs/python3/lib/python3.7/site-packages/seaborn/categorical.py in draw_swarmplot(self, ax, kws)
1404 kws.update(c=point_colors)
1405 if self.orient == "v":
-> 1406 points = ax.scatter(cat_pos, swarm_data, s=s, **kws)
1407 else:
1408 points = ax.scatter(swarm_data, cat_pos, s=s, **kws)
~/anaconda3/envs/python3/lib/python3.7/site-packages/matplotlib/__init__.py in inner(ax, data, *args, **kwargs)
1803 "the Matplotlib list!)" % (label_namer, func.__name__),
1804 RuntimeWarning, stacklevel=2)
-> 1805 return func(ax, *args, **kwargs)
1806
1807 inner.__doc__ = _add_data_doc(inner.__doc__,
~/anaconda3/envs/python3/lib/python3.7/site-packages/matplotlib/axes/_axes.py in scatter(self, x, y, s, c, marker, cmap, norm, vmin, vmax, alpha, linewidths, verts, edgecolors, **kwargs)
4193 isinstance(c, str) or
4194 (isinstance(c, collections.Iterable) and
-> 4195 isinstance(c[0], str))):
4196 c_array = None
4197 else:
IndexError: index 0 is out of bounds for axis 0 with size 0
This is unfortunately a bug in matplotlib 3.0.1. It's been reported here and fixed by pull/12673.
Options you have are to install either matplotlib 3.0.0 or 3.0.2.

sklearn RidgeCV with sample_weight

I'm trying to do a weighted Ridge Regression with sklearn. However, the code breaks when I call the fit method. The exception I get is :
Exception: Data must be 1-dimensional
But I'm sure (by checking through print-statements) that the data I'm passing has the right shapes.
print temp1.shape #(781, 21)
print temp2.shape #(781,)
print weights.shape #(781,)
result=RidgeCV(normalize=True).fit(temp1,temp2,sample_weight=weights)
What could be going wrong ??
Here's the whole output :
---------------------------------------------------------------------------
Exception Traceback (most recent call last)
<ipython-input-65-a5b1eba5d9cf> in <module>()
22
23
---> 24 result=RidgeCV(normalize=True).fit(temp2,temp1, sample_weight=weights)
25
26
/usr/local/lib/python2.7/dist-packages/sklearn/linear_model/ridge.pyc in fit(self, X, y, sample_weight)
868 gcv_mode=self.gcv_mode,
869 store_cv_values=self.store_cv_values)
--> 870 estimator.fit(X, y, sample_weight=sample_weight)
871 self.alpha_ = estimator.alpha_
872 if self.store_cv_values:
/usr/local/lib/python2.7/dist-packages/sklearn/linear_model/ridge.pyc in fit(self, X, y, sample_weight)
793 else alpha)
794 if error:
--> 795 out, c = _errors(weighted_alpha, y, v, Q, QT_y)
796 else:
797 out, c = _values(weighted_alpha, y, v, Q, QT_y)
/usr/local/lib/python2.7/dist-packages/sklearn/linear_model/ridge.pyc in _errors(self, alpha, y, v, Q, QT_y)
685 w = 1.0 / (v + alpha)
686 c = np.dot(Q, self._diag_dot(w, QT_y))
--> 687 G_diag = self._decomp_diag(w, Q)
688 # handle case where y is 2-d
689 if len(y.shape) != 1:
/usr/local/lib/python2.7/dist-packages/sklearn/linear_model/ridge.pyc in _decomp_diag(self, v_prime, Q)
672 def _decomp_diag(self, v_prime, Q):
673 # compute diagonal of the matrix: dot(Q, dot(diag(v_prime), Q^T))
--> 674 return (v_prime * Q ** 2).sum(axis=-1)
675
676 def _diag_dot(self, D, B):
/usr/local/lib/python2.7/dist-packages/pandas/core/ops.pyc in wrapper(left, right, name)
531 return left._constructor(wrap_results(na_op(lvalues, rvalues)),
532 index=left.index, name=left.name,
--> 533 dtype=dtype)
534 return wrapper
535
/usr/local/lib/python2.7/dist-packages/pandas/core/series.pyc in __init__(self, data, index, dtype, name, copy, fastpath)
209 else:
210 data = _sanitize_array(data, index, dtype, copy,
--> 211 raise_cast_failure=True)
212
213 data = SingleBlockManager(data, index, fastpath=True)
/usr/local/lib/python2.7/dist-packages/pandas/core/series.pyc in _sanitize_array(data, index, dtype, copy, raise_cast_failure)
2683 elif subarr.ndim > 1:
2684 if isinstance(data, np.ndarray):
-> 2685 raise Exception('Data must be 1-dimensional')
2686 else:
2687 subarr = _asarray_tuplesafe(data, dtype=dtype)
Exception: Data must be 1-dimensional
The error seems to be due to sample_weights being a Pandas series rather than a numpy array:
from sklearn.linear_model import RidgeCV
temp1 = pd.DataFrame(np.random.rand(781, 21))
temp2 = pd.Series(temp1.sum(1))
weights = pd.Series(1 + 0.1 * np.random.rand(781))
result = RidgeCV(normalize=True).fit(temp1, temp2,
sample_weight=weights)
# Exception: Data must be 1-dimensional
If you use a numpy array instead, the error goes away:
result = RidgeCV(normalize=True).fit(temp1, temp2,
sample_weight=weights.values)
This seems to be a bug; I've opened a scikit-learn issue to report this.

Resources