ImportError: cannnot import name 'Imputer' from 'sklearn.preprocessing' - python-3.x

Trying to import Imputer from sklearn.preprocessing,
import pandas as pd
dataset = pd.read_csv('Data.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, 3].values
#PART WHERE ERROR OCCURS:-
from sklearn.preprocessing import Imputer
Shows "ImportError: cannot import name 'Imputer' from 'sklearn.preprocessing' (/home/codeknight13/anaconda3/lib/python3.7/site-packages/sklearn/preprocessing/_init_.py)"

from sklearn.preprocessing import Imputer was deprecated with scikit-learn v0.20.4 and removed as of v0.22.2. See the sklean changelog.
from sklearn.impute import SimpleImputer
import numpy as np
imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
pip install scikit-learn==0.20.4 or conda install scikit-learn=0.20.4 are not a good options because scikit-learn==0.20.4 is more than 3 years out of date.

Related

Python: xarray and h5py incompatibility

The code below sends an error "RuntimeError: NetCDF: HDF error". If I remove the import h5py, I get no error. Are there any suggestions on why this might be happening and how I can fix it? My ultimate aim is to load a hdf5 and write out to netCDF.
import numpy as np
import pandas as pd
import h5py
import xarray as xr
ds = xr.Dataset(
{"foo": (("x", "y"), np.random.rand(4, 5))},
coords={
"x": [10, 20, 30, 40],
"y": pd.date_range("2000-01-01", periods=5),
"z": ("x", list("abcd")),
},
)
ds.to_netcdf("saved_on_disk.nc")
import numpy as np
import pandas as pd
import xarray as xr
Works.
import numpy as np
import pandas as pd
import h5py
import xarray as xr
Doesn't Work
import numpy as np
import pandas as pd
import xarray as xr
import h5py
Doesn't Work
import numpy as np
import pandas as pd
from netCDF4 import Dataset
import xarray as xr
import h5py
Works!
The key was also loading the netCDF4 package.

No OUTPUT from Jupyter Notebook. NO Error Shown

I am running a Jupyter notebook but I do not get any output or error telling me if something is wrong. I have tried installing tornado as some other threads have suggested as well as the command pip install notebook --upgrade
While I do not think there is a problem with my code here it is.
Any help is truly appreciated.
import os
import numpy as np
import pandas as pd
import cv2
from glob import glob
import tensorflow as tf
from tensorflow.keras.layers import *
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
if __name__=="_main_":
path="dog-breed-identification/"
train_path = os.path.join(path, "train/*")
train_path = os.path.join(path, "test/*")
train_path = os.path.join(path, "labels.csv")
labels_df = pd.read_csv(labels_path)
#name of column in csv
breed = labels_df["breed"].unique()
print("Number of Breed: ", len(breed))
enter code here
As it turns out, if I delete
if __name__=="_main_":
I get an output

AttributeError: module 'matplotlib' has no attribute 'scatter'

I'm trying to make cluster of latitude and longitude.
the code gave an error in plt.scatter(data['Lng'],data['Lat']) line
the error is:
AttributeError: module 'matplotlib' has no attribute 'scatter'
code:
import numpy as np
import pandas as pd
import matplotlib as plt
import seaborn as sns
sns.set()
from sklearn.cluster import KMeans
data = pd.read_csv("pk.csv")
data.head()
lat_long = data.drop(['country', 'iso2','admin', 'capital','population',
'population_proper'] , axis = 1)
lat_long.head()
plt.scatter(data['Lng'],data['Lat']) # error here
It should be:
import matplotlib.pyplot as plt
Or it can be:
from matplotlib import pyplot as plt
Also you can read PEP 328 for more information and clearity.

How to plot the distribution of each in feature in cancer dataset

I want to get the distribution of each features in cancer dataset using ggplot but its giving me error.
#pip install plotnine
from plotnine import ggplot
from plotnine import *
from sklearn.datasets import load_breast_cancer
for i in cancer.feature_names:
ggplot(cancer.data)+aes(x=i)+geom_bar(size=10)
This is the error message i got
ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()
I would recommand to use seaborn for that. Here is an example of plotting the distribution of each in feature in cancer dataset by target:
import seaborn as sns
import pandas as pd
import numpy as np
from sklearn.datasets import load_breast_cancer
# loading data
cancer = load_breast_cancer()
data = pd.DataFrame(np.c_[cancer['data'], cancer['target']],
columns= np.append(cancer['feature_names'], ['target']))
df = data.melt(['target'], var_name='cols', value_name='vals')
g = sns.FacetGrid(df, col='cols', hue="target", palette="Set1", col_wrap=4)
g = (g.map(sns.distplot, "vals", hist=True, ))
from plotnine import ggplot
from plotnine import *
from sklearn.datasets import load_breast_cancer
cancer=load_breast_cancer()
import pandas as pd
import matplotlib.pyplot as plt
data=pd.DataFrame(cancer.data,columns=cancer.feature_names)
for i in data.columns:
print(ggplot(data)+aes(x=i)+geom_density(size=1))
print(ggplot(data)+aes(x=i)+geom_bar(size=10))

How to scale a data using Python 3

I am trying to scale my data using Python 3
But I keep getting this error: I am out of ideas as to what could be the issue? Please can you assist me guys? I would deeply appreciate your help!
import pandas as pd
import numpy as np
from numpy.random import randn
from pandas import Series, DataFrame
from pandas.plotting import scatter_matrix
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib import rcParams
from pylab import rcParams
import seaborn as sb
import scipy
from scipy import stats
from scipy.stats import pearsonr
from scipy.stats import spearmanr
from scipy.stats import chi2_contingency
import sklearn
from sklearn import preprocessing
from sklearn.preprocessing import scale
mtcars = pd.read_csv('mtcars.csv')
mtcars.columns = ['Car
names','mpg','cyl','disp','hp','drat','wt','qsec','vs','am','gear','carb']
mpg = mtcars['mpg']
#Scale your data
mpg_matrix = mpg.reshape(-1,1)
scaled = preprocessing.MinMaxScaler()
scaled_mpg = scaled.fit_transform(mpg_matrix)
plt.plot(scaled_mpg)
plt.show()
mpg_matrix = mpg.numpy.reshape(-1,1)
tr__
File "C:\Anaconda\lib\site-packages\pandas\core\generic.py", line 5067, in __getattr__
return object.__getattribute__(self, name)
AttributeError: 'Series' object has no attribute 'numpy'
pandas.core.series.Series doesn't have reshape.
Perhaps:
mpg_matrix = mpg.values.reshape(-1,1)
i.e. get the underlying numpy array and reshape that.

Resources