Add vertical lines to separate condition-split boxplots using seaborn? - python-3.x

I'm trying to implement vertical lines on my seaborn boxplot to separate each column, so far I can only seem to add major lines ax.xaxis.grid(True, which='major') which run through the middle. Here is my code and an image of what I'm trying to achieve. Thanks!
# Custom palette
my_pal = {"Year A": "#e42628", "Year B": "#377db6"}
plt.figure(figsize=(16, 10))
sns.axes_style("whitegrid")
ax = sns.boxplot(x='variable', y="value", hue="Condition", showmeans=True, data=df, palette=my_pal, meanprops={"marker":"s","markerfacecolor":"white", "markeredgecolor":"black"})
plt.ylabel("Temperature (\xb0C)")
#ax.axvline(linewidth=2, color='r')
ax.xaxis.grid(True, which='major')

The default width of boxplots is 0.5 (or 0.15 x [distance between extreme positions] if that is smaller).
If your width is 0.5 you can do this:
import seaborn as sns, matplotlib.pyplot as plt
tips = sns.load_dataset('tips')
ax = sns.boxplot(x='day',y='total_bill',hue='sex',data=tips)
[ax.axvline(x+.5,color='k') for x in ax.get_xticks()]
plt.show()
Example:

You can position the minor xticks as follows and use them for the grid:
from matplotlib import pyplot as plt
from matplotlib.ticker import MultipleLocator
import seaborn as sns
import pandas as pd
import numpy as np
N = 200
df = pd.DataFrame({'variable': np.repeat(list('ABCDEFGHIJ'), N // 10),
'value': np.random.uniform(10, 25, N),
'Condition': np.random.choice(['Year A', 'Year B'], N)})
# Custom palette
my_pal = {"Year A": "#e42628", "Year B": "#377db6"}
plt.figure(figsize=(16, 10))
sns.axes_style("whitegrid")
ax = sns.boxplot(x='variable', y="value", hue="Condition", showmeans=True, data=df, palette=my_pal,
meanprops={"marker": "s", "markerfacecolor": "white", "markeredgecolor": "black"})
plt.ylabel("Temperature (°C)")
ax.xaxis.set_minor_locator(MultipleLocator(0.5))
ax.xaxis.grid(True, which='minor', color='black', lw=2)
plt.show()

Related

sns.histplot legend colors not matching the output

I am creating a combo boxplot\histplot.
Everything runs and I get the output I am expecting except for one thing:
The line colors in the legend do not match the output.
Code:
def boxhist(dfx, x):
variable = dfx[x].values
np.array(variable).mean()
np.median(variable)
f, (ax_box, ax_hist) = plt.subplots(2, sharex=True, gridspec_kw={"height_ratios": (0.5, 2)})
mean = np.array(variable).mean()
median = np.median(variable)
sns.boxplot(variable, ax=ax_box)
ax_box.axvline(mean, color='orange', linestyle='--')
ax_box.axvline(median, color='black', linestyle='-')
sns.histplot(data=variable, ax=ax_hist, kde=True, binwidth=2, facecolor='green').lines[0].set_color('red')
ax_hist.axvline(mean, color='orange', linestyle='--')
ax_hist.axvline(median, color='black', linestyle='-')
plt.title(x, fontsize=10, loc='right')
plt.legend({'Mean': mean, 'Median': median})
ax_box.set(xlabel='')
plt.tight_layout()
plt.show()
Output:
The mean should be orange.
The median should be black.
Why is the legend showing the mean as red and the median as orange?
I want the legend colors to match the plot output. mean\orange, median\black.
You need to add a label in ax_hist.axvline(mean, ...., label='Mean') (and similar for the median). Then matplotlib should automatically add them to the legend (when called without parameters).
Here is some example code:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
def boxhist(dfx, x):
variable = dfx[x].values
variable.mean()
np.median(variable)
f, (ax_box, ax_hist) = plt.subplots(2, sharex=True, gridspec_kw={"height_ratios": (0.5, 2)})
mean = variable.mean()
median = np.median(variable)
sns.boxplot(x=variable, ax=ax_box)
ax_box.axvline(mean, color='orange', linestyle='--')
ax_box.axvline(median, color='black', linestyle='-')
sns.histplot(x=variable, ax=ax_hist, kde=True, binwidth=2, facecolor='green')
ax_hist.lines[0].set_color('red')
ax_hist.axvline(mean, color='orange', linestyle='--', label='Mean')
ax_hist.axvline(median, color='black', linestyle='-', label='Median')
ax_hist.set_title(x, fontsize=10, loc='right')
ax_hist.legend()
# ax_box.set(xlabel='') # has no effect on shared x-axis
plt.tight_layout()
plt.show()
dfx = pd.DataFrame({'bmi': np.random.normal(30.2, 5, 100)})
boxhist(dfx, 'bmi')

Combine bar plot and line plot in seaborn [duplicate]

I have dataframe like this:
df_meshX_min_select = pd.DataFrame({
'Number of Elements' : [5674, 8810,13366,19751,36491],
'Time (a)' : [42.14, 51.14, 55.64, 55.14, 56.64],
'Different Result(Temperature)' : [0.083849, 0.057309, 0.055333, 0.060516, 0.035343]})
and I tried to combine bar plot (number of elements Vs Different result) and line plot (Number of elements Vs Time) in the same figure, but I found the following problem like this:
it seems that x_value doesn't match when combining 2 plots, but if you see the data frame, the x value is exactly the same value.
My expectation is combining these 2 plots into 1 figure:
and this is the code that I made:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
df_meshX_min_select = pd.DataFrame({
'Number of Elements' : [5674, 8810,13366,19751,36491],
'Time (a)' : [42.14, 51.14, 55.64, 55.14, 56.64],
'Different Result(Temperature)' : [0.083849, 0.057309, 0.055333, 0.060516, 0.035343]})
x1= df_meshX_min_select["Number of Elements"]
t1= df_meshX_min_select["Time (a)"]
T1= df_meshX_min_select["Different Result(Temperature)"]
#Create combo chart
fig, ax1 = plt.subplots(figsize=(10,6))
color = 'tab:green'
#bar plot creation
ax1.set_title('Mesh Analysis', fontsize=16)
ax1.set_xlabel('Number of elements', fontsize=16)
ax1.set_ylabel('Different Result(Temperature)', fontsize=16)
ax1 = sns.barplot(x='Number of Elements', y='Different Result(Temperature)', data = df_meshX_min_select)
ax1.tick_params(axis='y')
#specify we want to share the same x-axis
ax2 = ax1.twinx()
color = 'tab:red'
#line plot creation
ax2.set_ylabel('Time (a)', fontsize=16)
ax2 = sns.lineplot(x='Number of Elements', y='Time (a)', data = df_meshX_min_select, sort=False, color=color, ax=ax2)
ax2.tick_params(axis='y', color=color)
#show plot
plt.show()
Anyone can help me, please?
Seaborn and pandas use a categorical x-axis for bar plots (internally numbered 0,1,2,...) and floating-point numbers for a line plot. Note that your x-values aren't evenly spaced, so either the bars would have weird distances between them, or wouldn't align with the x-values from the line plot.
Here is a solution using standard matplotlib to combine both graphs.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
df_meshx_min_select = pd.DataFrame({
'number of elements': [5674, 8810, 13366, 19751, 36491],
'time (a)': [42.14, 51.14, 55.64, 55.14, 56.64],
'different result(temperature)': [0.083849, 0.057309, 0.055333, 0.060516, 0.035343]})
x1 = df_meshx_min_select["number of elements"]
t1 = df_meshx_min_select["time (a)"]
d1 = df_meshx_min_select["different result(temperature)"]
fig, ax1 = plt.subplots(figsize=(10, 6))
color = 'limegreen'
ax1.set_title('mesh analysis', fontsize=16)
ax1.set_xlabel('number of elements', fontsize=16)
ax1.set_ylabel('different result(temperature)', fontsize=16, color=color)
ax1.bar(x1, height=d1, width=2000, color=color)
ax1.tick_params(axis='y', colors=color)
ax2 = ax1.twinx() # share the x-axis, new y-axis
color = 'crimson'
ax2.set_ylabel('time (a)', fontsize=16, color=color)
ax2.plot(x1, t1, color=color)
ax2.tick_params(axis='y', colors=color)
plt.show()
I was plotting a boxplot with a lineplot and I had the same problem even my two x-axes are identical, so I solved converting my x-axis feature to type string:
df_meshX_min_select['Number of Elements'] = df_meshX_min_select['Number of Elements'].astype('string')
This way the plot works using seaborn:

Matplotlib- Add a color bar below a multi-colored line subplot as shown in the image

I am having a multicolored line plot and I want to add a color bar under it in the same figure like as shown in the image below, Is it possible?
I have attached a color bar image as a reference which I took from another code.
My intention here is to use the color bar like a legend for each segment of the line in the plot.
Edit-1: I want to have the color bar using a mappable object such as an image, So don't want to create a new subplot for the sole purpose of the color bar.
Any suggestion is welcome. Thanks in Advance.
This is the code for multicolored line plot
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.collections import LineCollection
Segments=[[[3,1],[6,1]],[[6,2],[9,2]],[[9,3],[12,3]],[[12,4],[15,4]], [[12,4],[15,4]]]
Points_1 = np.concatenate([Segments[:-1], Segments[1:]], axis=1)
lc = LineCollection(Points_1, colors=['r','g','b','y'], linewidths=2)
fig, ax = plt.subplots()
ax.add_collection(lc)
ax.autoscale()
plt.show()
This is a workaround I'am using:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.collections import LineCollection
import matplotlib.colorbar as mcolorbar
import matplotlib.colors as mcolors
Segments=[[[3,1],[6,1]],[[6,2],[9,2]],[[9,3],[12,3]],[[12,4],[15,4]], [[12,4],[15,4]]]
Points_1 = np.concatenate([Segments[:-1], Segments[1:]], axis=1)
lc = LineCollection(Points_1, colors=['r','g','b','y'], linewidths=2)
fig, ax = plt.subplots(2, 1, gridspec_kw={'height_ratios' : [5,1]})
ax[0].add_collection(lc)
bounds = np.linspace(0, 1, 5)[:-1]
labels = ['Action1', 'Action2', 'Action3', 'Action4']
ax[0].set_xlim([0, 15])
ax[0].set_ylim([0, 10])
cb2 = mcolorbar.ColorbarBase(ax = ax[1], cmap = cmap, orientation = 'horizontal', extendfrac='auto')
cb2.set_ticks(bounds)
cb2.set_ticklabels(labels)
plt.tight_layout()
plt.show()
If you specifically want to avoid subplots, you can use a scalar mappable:
fig, ax = plt.subplots()
ax.add_collection(lc)
ax.autoscale()
cmap = mcolors.ListedColormap(['r','g','b','y'])
sm = plt.cm.ScalarMappable(cmap=cmap)
sm.set_array([]) # this line may be ommitted for matplotlib >= 3.1
cbar = fig.colorbar(sm, ax=ax, orientation='horizontal',aspect=90)
bounds = np.linspace(0, 1, 5)[:-1]
labels = ['Action1', 'Action2', 'Action3', 'Action4']
ax.set_xlim([0, 15])
ax.set_ylim([0, 10])
cbar.set_ticks(bounds)
cbar.set_ticklabels(labels)
plt.tight_layout()
plt.show()
This helped me to get what I asked.
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
from matplotlib.collections import LineCollection
Segments=[[[3,1],[6,1]],[[6,2],[9,2]],[[9,3],[12,3]],[[12,4],[15,4]], [[12,4],[15,4]]]
Points_1 = np.concatenate([Segments[:-1], Segments[1:]], axis=1)
lc = LineCollection(Points_1, colors=['r','g','b','y'], linewidths=2)
fig, ax = plt.subplots()
ax.add_collection(lc)
ax.autoscale()
c=[1,2,3,4,5]
labels = ['Action1', 'Action2', 'Action3', 'Action4']
cmap = mcolors.ListedColormap(['r','g','b','y'])
norm = mcolors.BoundaryNorm([1,2,3,4,5],4)
sm = plt.cm.ScalarMappable(norm=norm, cmap=cmap)
sm.set_array([]) # this line may be ommitted for matplotlib >= 3.1
cbar=fig.colorbar(sm, ticks=c, orientation='horizontal')
cbar.set_ticklabels(['Action1', 'Action2', 'Action3', 'Action4'])
plt.show()

Bar missing while plotting using Matplotlib's Twinx

I'm using matplotlib.axes.Axes.twinx to have a shared x-axis in matplotlib for both . I dont know why instead of 13 bars to be plotted, only 12 of them are getting plotted.
Link of Data set
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
dataFrame=pd.read_csv("NEM.csv",sep=',')
dataFrame['ratio']=dataFrame['Expert']/dataFrame['Novice']
fig, ax1 = plt.subplots(figsize=(9, 6))
ax1.set_title('N-E Analysis')
xticklabels=dataFrame['Task'].tolist()
ax1.plot('Novice', data=dataFrame, marker='', color='dodgerblue', linewidth=2,label='Novice',zorder=100)
ax1.plot('Expert', data=dataFrame, marker='', color='darkorange', linewidth=2,label='Expert',zorder=200)
plt.ylim(0,120)
ax2 = ax1.twinx()
ax2.bar('Task','ratio', data=dataFrame, color='gray',width=0.35,label='NE',zorder=0)
ax1.spines['top'].set_visible(False)
ax1.spines['right'].set_visible(False)
ax1.spines['left'].set_visible(False)
ax2.spines['top'].set_visible(False)
ax2.spines['right'].set_visible(False)
ax2.spines['left'].set_visible(False)
ax1.set_xticklabels(xticklabels, rotation = 45, ha="right")
ax1.yaxis.grid()
ax1.tick_params(left='off',bottom='off')
ax2.tick_params(right='off')
plt.ylim(0,12)
h1, l1 = ax1.get_legend_handles_labels()
h2, l2 = ax2.get_legend_handles_labels()
p=ax1.legend(h2+h1, l2+l1, loc=2,frameon=False)
fig.tight_layout()
plt.show()
When using plots, it could be good practice to say explicitily how many bars or points you are going to plot. For instance, you can create an x-axis this way:
x_axis = np.arange(len(dataFrame[Task].tolist())
then:
ax1.plot(x_axis, dataFrame['Novice'].tolist(), ...)
after that you rename the xticklabels like this:
ax1.set_xticks(x_axis)
ax1.set_xticklabels(dataFrame[Task].tolist())
Do the same with the bar graph:
ax2.bar(x_axis, dataFrame['Ratio'].tolist(), ...)
This should do the trick.
Hope it helps.

Unable to plot 4 histograms of iris dataset features using matplotlib

using the iris dataset
import matplotlib.pyplot as plt
%matplotlib inline
import matplotlib
from sklearn import datasets
iris= datasets.load_iris()
x_index = 3
colors = ['blue', 'red', 'green']
for label, color in zip(range(len(iris.target_names)), colors):
plt.hist(iris.data[iris.target==label, x_index],
label=iris.target_names[label],
color=color)
plt.xlabel(iris.feature_names[x_index])
plt.legend(loc='upper right')
plt.show()
enter image description here
This code is plotting only one histogram with sepal length (image attached) as the x-axis.
To plot other features of iris dataset in a similar manner, I have to change the x_index to 1,2 and 3 (manually) and run this bit of code again.
To plot all four histograms simultaneously, I tried the following code:
import matplotlib.pyplot as plt
%matplotlib inline
import matplotlib
from sklearn import datasets
iris= datasets.load_iris()
fig, axes = plt.subplots(nrows= 2, ncols=2)
colors= ['blue', 'red', 'green', 'black']
x_index= 0
for ax in axes.flat:
for label, color in zip(range(len(iris.target_names)), colors):
ax= plt.hist(iris.data[iris.target==label, x_index], label=
iris.target_names[label], color=color)
plt.xlabel(iris.feature_names[x_index])
plt.legend(loc='upper right')
x_index+=1
plt.show()
This code gives me the following error:
IndexError: index 4 is out of bounds for axis 1 with size 4
Any advice?
Two issues:
ax is the name of the current axes in the loop. You should not redefine but instead use it as this is the axes you want to plot to. Replace ax = plt.hist by ax.hist.
x_index+=1 needs to be in the outer loop, not in the inner loop. Otherwise it will increment up to 11 instead of 3. Better get rid of it entirely and use a normal loop variable.
Complete code:
import matplotlib.pyplot as plt
from sklearn import datasets
iris= datasets.load_iris()
fig, axes = plt.subplots(nrows= 2, ncols=2)
colors= ['blue', 'red', 'green']
for i, ax in enumerate(axes.flat):
for label, color in zip(range(len(iris.target_names)), colors):
ax.hist(iris.data[iris.target==label, i], label=
iris.target_names[label], color=color)
ax.set_xlabel(iris.feature_names[i])
ax.legend(loc='upper right')
plt.show()

Resources