Group Box Plots for different numerical variables in one figure - python-3.x

I have a data frame with several numerical variables and I would like to create box plots for each variable and group them in one figure. So each variable should have its own box plot and all these box plots should be in 1 figure.How can I do that in Seaborn or Matplotlib?
Thank you very much!

Yes, you can do with seaborn:
df = pd.DataFrame(np.random.rand(100,4), columns=list('ABCD'))
num_col_list = ['A','B','C','D']
sns.boxplot(data=df.melt(value_vars=num_col_list),
x='variable', y='value')
Output:
Or with just pandas/matplotlib:
df.boxplot(column=num_col_list)
Output:

If you use a pandas data frame you can use the boxplot function:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
df = pd.DataFrame(np.random.randn(10, 4),columns=['Col1', 'Col2', 'Col3', 'Col4'])
df.boxplot(column=['Col1', 'Col2', 'Col3'])
plt.show()

Related

How to create a line plot in python, by importing data from excel and using it to create a plot that shares a common X-Axis?

Trying to create a plot using Python Spyder. I have sample data in excel which I am able to import into Spyder, I want one column ('Frequency') to be the X axis, and the rest of the columns ('C1,C2,C3,C4') to be plotted on the Y axis. How do I do this? This is the data in excel and how the plot looks in excel (https://i.stack.imgur.com/eRug5.png) , the plot and data
This is what I have so far . These commands below (Also seen in the image) give an empty plot.
data = data.head()
#data.plot(kind='line', x='Frequency', y=['C1','C2','C3','C4'])
df = pd.DataFrame(data, columns=["Frequency","C1", "C2","C3","C4"])
df.plot(x = "Frequency",y=["C1", "C2","C3","C4"])
Here is an example, you can change columns names:
import seaborn as sns
import matplotlib.pyplot as plt
df = pd.DataFrame({'X_Axis':[1,3,5,7,10,20],
'col_2':[.4,.5,.4,.5,.5,.4],
'col_3':[.7,.8,.9,.4,.2,.3],
'col_4':[.1,.3,.5,.7,.1,.0],
'col_5':[.5,.3,.6,.9,.2,.4]})
dfm = df.melt('X_Axis', var_name='cols', value_name='vals')
g = sns.catplot(x="X_Axis", y="vals", hue='cols', data=dfm, kind='point')
import pandas as pd
import matplotlib.pyplot as plt
path = r"C:\Users\Alisha.Walia\Desktop\Alisha\SAMPLE.xlsx"
data = pd.read_excel(path)
#df = pd.DataFrame.from_dict(data)
#print(df)
#prints out data from excl in tabular format
dict1 = (data.to_dict()) #print(dict1)
Frequency=data["Frequency "].to_list() #print (Frequency)
C1=data["C1"].to_list() #print(C1)
C2=data["C2"].to_list() #print(C2)
C3=data["C3"].to_list() #print(C3)
C4=data["C4"].to_list() #print(C4)
plt.plot(Frequency,C1)
plt.plot(Frequency,C2)
plt.plot(Frequency,C3)
plt.plot(Frequency,C4)
plt.style.use('ggplot')
plt.title('SAMPLE')
plt.xlabel('Frequency 20Hz-200MHz')
plt.ylabel('Capacitance pF')
plt.xlim(5, 500)
plt.ylim(-20,20)
plt.legend()
plt.show()

how to make scatter plot of two columns and divide x_axis in 3 column f1,f2,and f3

I have dataframe i want to draw a scattor plot by dividing plot in 2 regions in region one only plot f_x_f1 vs A_x_f1, and in region2 plot f_x_f2 vs A_x_f2
please if someone can provide better solution for this problem
here is example of my dataframe
df=pd.DataFrame({'f_x_f1':[0.3,0.28,0.34],'A_x_f1':[0.003,0.28,0.034],'f1':[0.4,0.4,0.4],'f_x_f2':[0.91,0.88,0.96],'A_x_f2':[0.003,0.28,0.034],'f2':[1.3,1.3,1.3]})
Here, using matplotlib!
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
### making some sample data
df = pd.DataFrame({"f_x_f1": np.random.randint(1,100,100)
, "A_x_f1": np.random.randint(1,100,100)
, "f_x_f2": np.random.randint(1,100,100)
, "A_x_f2": np.random.randint(1,100,100) })
fig, ax = plt.subplots(nrows=1, ncols=2)
ax[0].scatter(df.f_x_f1,df.A_x_f1)
ax[0].set_title("f_x_f1 vs A_x_f1")
ax[1].scatter(df.f_x_f2,df.A_x_f2)
ax[1].set_title("f_x_f2 vs A_x_f2")
OUTPUT:

how can i plot the graph for csv data in matplotlib

can you please tell me how to plot the graph for csv data.
csv file have x,y,depth,color values i want to plot the depth and color for x and y axis,i goggled many times but i didn't find anything properly.so please guide me how to plot the graph for that values?
this is i tried :
from matplotlib import pyplot as plt
from matplotlib import style
import pandas as pd
data=pd.read_csv("Tunnel.csv",names=['x','y','z','color'])
data1 =data[data.z==0]
print (data1)
# plt.plot(data[data.x],data[data.y])
plt.ylabel('yaxis')
plt.xlabel('xaxis')
plt.title('Tunnel 2d')
plt.show()
my data is given bellow
I'm assuming that you want the first two columns to be used as plot axis and columns 3 and 4 as plot data.
from matplotlib import pyplot as plt
import pandas as pd
data = pd.read_csv("Tunnel.csv")
x = stats[stats.columns[2]]
y = stats[stats.columns[3]]
xlab = list(stats)[0] #x-axis label
ylab = list(stats)[1] #y-axis label
fig, pli = plt.subplots()
pli.show()
#Assuming it's a line graph that you want to plot
line, = pli.plot(x, y, color='g', linewidth=5, label='depth vs color')
plt.xlabel(xlab)
plt.ylabel(ylab)
plt.title(title)
fig.savefig('./Directory/Graph.png')
I am assuming that you want the color and depth as text annotations.
import stuff
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
create the df
dep=list(np.random.randint(0,100,10))
col=list(np.random.randint(0,100,10))
y=[int(x/3)+1 for x in range(0,10)]
x=list(range(0,10))
my_df=pd.DataFrame({'x':x,'y':y,'colour':col,'depth':dep})
create the annotate column
my_df['my_text']='c= '+my_df.colour.astype(str)+','+'d= '+my_df.depth.astype(str)
plot it
plt.figure(figsize=(20,10))
plt.plot(my_df.x,my_df.y,'o')
for i, txt in enumerate(my_df['my_text']):
plt.annotate(txt, (x[i],y[i]), size=10, xytext=(0,0), ha='left', textcoords='offset points', bbox=dict(facecolor='none', edgecolor='red'))
plt.ylabel('yaxis')
plt.xlabel('xaxis')
plt.title('Tunnel 2d')
plt.show()
Result

Set hue using a range of values in Seaborn stripplot

I am trying to set hue based on a range of values rather than unique values in seaborn stripplot. For example, different colors for different value ranges (1940-1950, 1950-1960 etc.).
sns.stripplot('Condition', 'IM', data=dd3, jitter=0.3, hue= dd3['Year Built'])
Output Figure
Thanks
Looks like you need to bin the data. Use .cut() in the below manner. The years are binned into 5 groups. You can arrange your own step in .arrange() to adjust your ranges.
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
x = np.random.randint(0,100,size=100)
y = np.random.randint(0,100, size=100)
year = np.random.randint(1918, 2019, size=100)
df = pd.DataFrame({
'x':x,
'y':y,
'year':year
})
df['year_bin'] = pd.cut(df['year'], np.arange(min(year), max(year), step=20))
sns.lmplot('x','y', data=df, hue='year_bin')
plt.show()
Output:

x axis labels (date) slips in Python matplotlib

I'm beginner in Python and I have the following problems. I would like to plot a dataset, where the x-axis shows date data. The Dataset look likes the follows:
datum, start, end
2017.09.01 38086 37719,8984
2017.09.04 37707.3906 37465.2617
2017.09.05 37471.5117 37736.1016
2017.09.06 37723.5898 37878.8594
2017.09.07 37878.8594 37783.5117
2017.09.08 37764.7383 37596.75
2017.09.11 37615.5117 37895.8516
2017.09.12 37889.6016 38076.8789
2017.09.13 38089.1406 38119.0898
2017.09.14 38119.2617 38243.1992
2017.09.15 38243.7188 38325.9297
2017.09.18 38325.3086 38387.2188
2017.09.19 38387.2188 38176.0781
2017.09.20 38173.2109 38108.0391
2017.09.21 38107.2617 38109.2109
2017.09.22 38110.4609 38178.6289
2017.09.25 38121.9102 38107.8711
2017.09.26 38127.25 37319.2383
2017.09.27 37360.8398 37244.3008
2017.09.28 37282.1094 37191.6484
2017.09.29 37192.1484 37290.6484
In the first column are the labels of the x-axis (this is the date).
When I write the following code the x axis data slips:
import pandas as pd
import matplotlib.pyplot as plt
bux = pd.read_csv('C:\\Home\\BUX.txt',
sep='\t',
decimal='.',
header=0)
fig1 = bux.plot(marker='o')
fig1.set_xticklabels(bux.datum, rotation='vertical', fontsize=8)
The resulted figure look likes as follows:
The second data row in the dataset is '2017.09.04 37707.3906 37465.2617', BUT '2017.09.04' is yield at the third data row with start value=37471.5117
What shell I do to get correct x axis labels?
Thank you!
Agnes
First, there is a comma in the second line instead of a .. This should be adjusted. Then, you convert the "datum," column to actual dates and simply plot the dataframe with matplotlib.
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv('data/BUX.txt', sep='\s+')
df["datum,"] = pd.to_datetime(df["datum,"], format="%Y.%m.%d")
plt.plot(df["datum,"], df["start,"], marker="o")
plt.plot(df["datum,"], df["end"], marker="o")
plt.gcf().autofmt_xdate()
plt.show()
Thank you! It works perfectly. The key moment was to convert the data to date format. Thank you again!
Agnes
Actually you can easily use the df.plot() to fix it:
import pandas as pd
import matplotlib.pyplot as plt
import io
t="""
date start end
2017.09.01 38086 37719.8984
2017.09.04 37707.3906 37465.2617
2017.09.05 37471.5117 37736.1016
2017.09.06 37723.5898 37878.8594
2017.09.07 37878.8594 37783.5117
2017.09.08 37764.7383 37596.75
2017.09.11 37615.5117 37895.8516
2017.09.12 37889.6016 38076.8789
2017.09.13 38089.1406 38119.0898
2017.09.14 38119.2617 38243.1992
2017.09.15 38243.7188 38325.9297
2017.09.18 38325.3086 38387.2188
2017.09.19 38387.2188 38176.0781
2017.09.20 38173.2109 38108.0391
2017.09.21 38107.2617 38109.2109
2017.09.22 38110.4609 38178.6289
2017.09.25 38121.9102 38107.8711
2017.09.26 38127.25 37319.2383
2017.09.27 37360.8398 37244.3008
2017.09.28 37282.1094 37191.6484
2017.09.29 37192.1484 37290.6484
"""
import numpy as np
data=pd.read_fwf(io.StringIO(t),header=1,parse_dates=['date'])
data.plot(x='date',marker='o')
plt.show()

Resources