Plotting multiple boxplots group by two columns - python-3.x

import matplotlib.pyplot as plt
import numpy as np
import matplotlib as mpl
import pandas as pd
filepath='E:/PROJECT ON DATA SCIENCE/boxplot/fee.csv';
X=pd.read_csv(filepath_or_buffer=filepath,index_col=0)
X.boxplot(by='stype', column='fee')
X.boxplot(by='pincode', column='fee')

If you want to boxplot X grouping by both stype and pincode you can use
X.boxplot(column='fee', by=['stype', 'pincode'])
Complete code would be
import matplotlib.pyplot as plt
import numpy as np
import matplotlib as mpl
import pandas as pd
filepath='E:/PROJECT ON DATA SCIENCE/boxplot/fee.csv';
X=pd.read_csv(filepath_or_buffer=filepath,index_col=0)
X.boxplot(column='fee', by=['stype', 'pincode'])

Related

Cannot configure matplotlib rcparams for plot color

I am not able to change default color from blue to red using plt.rc() or mpl.rcparams
According to Matplotlib's official documentation
Here is the code I tried:
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
data = np.random.randn(50)
mpl.rcParams['lines.color'] = 'r'
plt.plot(data)
Output:
If you are using a relatively new matplotlib version (i.e. 1.5+) you should use axes.prop_cycleas described here.
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
data = np.random.randn(50)
import cycler
plt.rcParams['axes.prop_cycle'] = cycler.cycler(color='r')
plt.plot(data)

AttributeError: module 'matplotlib' has no attribute 'scatter'

I'm trying to make cluster of latitude and longitude.
the code gave an error in plt.scatter(data['Lng'],data['Lat']) line
the error is:
AttributeError: module 'matplotlib' has no attribute 'scatter'
code:
import numpy as np
import pandas as pd
import matplotlib as plt
import seaborn as sns
sns.set()
from sklearn.cluster import KMeans
data = pd.read_csv("pk.csv")
data.head()
lat_long = data.drop(['country', 'iso2','admin', 'capital','population',
'population_proper'] , axis = 1)
lat_long.head()
plt.scatter(data['Lng'],data['Lat']) # error here
It should be:
import matplotlib.pyplot as plt
Or it can be:
from matplotlib import pyplot as plt
Also you can read PEP 328 for more information and clearity.

How to scale a data using Python 3

I am trying to scale my data using Python 3
But I keep getting this error: I am out of ideas as to what could be the issue? Please can you assist me guys? I would deeply appreciate your help!
import pandas as pd
import numpy as np
from numpy.random import randn
from pandas import Series, DataFrame
from pandas.plotting import scatter_matrix
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib import rcParams
from pylab import rcParams
import seaborn as sb
import scipy
from scipy import stats
from scipy.stats import pearsonr
from scipy.stats import spearmanr
from scipy.stats import chi2_contingency
import sklearn
from sklearn import preprocessing
from sklearn.preprocessing import scale
mtcars = pd.read_csv('mtcars.csv')
mtcars.columns = ['Car
names','mpg','cyl','disp','hp','drat','wt','qsec','vs','am','gear','carb']
mpg = mtcars['mpg']
#Scale your data
mpg_matrix = mpg.reshape(-1,1)
scaled = preprocessing.MinMaxScaler()
scaled_mpg = scaled.fit_transform(mpg_matrix)
plt.plot(scaled_mpg)
plt.show()
mpg_matrix = mpg.numpy.reshape(-1,1)
tr__
File "C:\Anaconda\lib\site-packages\pandas\core\generic.py", line 5067, in __getattr__
return object.__getattribute__(self, name)
AttributeError: 'Series' object has no attribute 'numpy'
pandas.core.series.Series doesn't have reshape.
Perhaps:
mpg_matrix = mpg.values.reshape(-1,1)
i.e. get the underlying numpy array and reshape that.

Scatter plot is not sort in matplotlib from csv file

My Code:
import pandas as pd
import matplotlib.pyplot as plt
df=pd.read_csv("linear_regression_dataset.csv", sep=";")
plt.scatter(df.Deneyim,df.Maas)
plt.xlabel("deneyim")
plt.ylabel("maas")
plt.show()
Is there a solution proposal?
The graphic I want:
sort the dataframe first and then you can plot
import pandas as pd
import matplotlib.pyplot as plt
df=pd.read_csv("linear_regression_dataset.csv", sep=";")
df['Mass']= df['Mass'].astype(int)
df.sort_values('Maas',inplace=True)
plt.scatter(df.Deneyim,df.Maas)
plt.xlabel("deneyim")
plt.ylabel("maas")
plt.show()

I was visualizing a data set using seaborn in python3 but its giving me an error. unsupported operand type(s) for /: 'str' and 'int'

import pandas as pd
from pandas import Series,DataFrame
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')
%matplotlib inline
poll_df=pd.read_csv('http://elections.huffingtonpost.com/pollster/2012-general-election-romney-vs-obama.csv')
#poll_df is the data which i have read from a csv file.
sns.factorplot('Affiliation',data=poll_df)
I have difficulty understanding the question. Column Affiliation has a str value not numeric.
if you want to count total number of each str category and have a bar plot try:
import pandas as pd
from pandas import Series,DataFrame
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')
%matplotlib inline
poll_df=pd.read_csv('http://elections.huffingtonpost.com/pollster/2012-general-election-romney-vs-obama.csv')
#poll_df is the data which i have read from a csv file.
sns.countplot('Affiliation',data=poll_df)
alternatively upload the image of what kind of plot you would like to have as a result

Resources