sns.histplot legend colors not matching the output - python-3.x

I am creating a combo boxplot\histplot.
Everything runs and I get the output I am expecting except for one thing:
The line colors in the legend do not match the output.
Code:
def boxhist(dfx, x):
variable = dfx[x].values
np.array(variable).mean()
np.median(variable)
f, (ax_box, ax_hist) = plt.subplots(2, sharex=True, gridspec_kw={"height_ratios": (0.5, 2)})
mean = np.array(variable).mean()
median = np.median(variable)
sns.boxplot(variable, ax=ax_box)
ax_box.axvline(mean, color='orange', linestyle='--')
ax_box.axvline(median, color='black', linestyle='-')
sns.histplot(data=variable, ax=ax_hist, kde=True, binwidth=2, facecolor='green').lines[0].set_color('red')
ax_hist.axvline(mean, color='orange', linestyle='--')
ax_hist.axvline(median, color='black', linestyle='-')
plt.title(x, fontsize=10, loc='right')
plt.legend({'Mean': mean, 'Median': median})
ax_box.set(xlabel='')
plt.tight_layout()
plt.show()
Output:
The mean should be orange.
The median should be black.
Why is the legend showing the mean as red and the median as orange?
I want the legend colors to match the plot output. mean\orange, median\black.

You need to add a label in ax_hist.axvline(mean, ...., label='Mean') (and similar for the median). Then matplotlib should automatically add them to the legend (when called without parameters).
Here is some example code:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
def boxhist(dfx, x):
variable = dfx[x].values
variable.mean()
np.median(variable)
f, (ax_box, ax_hist) = plt.subplots(2, sharex=True, gridspec_kw={"height_ratios": (0.5, 2)})
mean = variable.mean()
median = np.median(variable)
sns.boxplot(x=variable, ax=ax_box)
ax_box.axvline(mean, color='orange', linestyle='--')
ax_box.axvline(median, color='black', linestyle='-')
sns.histplot(x=variable, ax=ax_hist, kde=True, binwidth=2, facecolor='green')
ax_hist.lines[0].set_color('red')
ax_hist.axvline(mean, color='orange', linestyle='--', label='Mean')
ax_hist.axvline(median, color='black', linestyle='-', label='Median')
ax_hist.set_title(x, fontsize=10, loc='right')
ax_hist.legend()
# ax_box.set(xlabel='') # has no effect on shared x-axis
plt.tight_layout()
plt.show()
dfx = pd.DataFrame({'bmi': np.random.normal(30.2, 5, 100)})
boxhist(dfx, 'bmi')

Related

Is it possible to set (in `matplotlib`) `ax.grid` in such a way that lines will go just to bars instead of going by the whole chart?

Is it possible to set ax.grid in such a way that lines will go just to bars?
Below the regular output("before") and expected("after"):
My code:
fig, ax = plt.subplots(figsize=(15,6))
ax.set_axisbelow(True)
ax = data_test.bar(fontsize=15, zorder=1, color=(174/255, 199/255, 232/255)) # 'zorder' is bar layaut order
for p in ax.patches:
ax.annotate(s=p.get_height(),
xy=(p.get_x()+p.get_width()/2., p.get_height()),
ha='center',
va='center',
xytext=(0, 10),
textcoords='offset points')
ax.spines["right"].set_visible(False)
ax.spines["left"].set_visible(False)
ax.spines["top"].set_visible(False)
ax.spines["bottom"].set_visible(False)
ax.set_xticklabels(
data_test.index,
rotation=34.56789,
fontsize='xx-large'
) # We will set xticklabels in angle to be easier to read)
# The labels are centred horizontally, so when we rotate them 34.56789°
ax.grid(axis='y', zorder=0) # 'zorder' is bar layaut order
plt.ylim([4500, 5300])
plt.show()
You could draw horizontal lines instead of using grid lines.
You forgot to add test data, making it quite unclear of what type data_test could be.
The code below supposes data_test is a pandas dataframe, and that data_test.plot.bar() is called to draw a bar plot. Note that since matplotlib 3.4 you can use ax.bar_label to label bars.
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
data_test = pd.DataFrame({'height': np.random.randint(1000, 2000, 7).cumsum()},
index=['Alkaid', 'Mizar', 'Alioth', 'Megrez', 'Phecda', 'Merak', 'Dubhe'])
fig, ax = plt.subplots(figsize=(15, 6))
ax.set_axisbelow(True)
data_test.plot.bar(fontsize=15, zorder=1, color=(174 / 255, 199 / 255, 232 / 255), ax=ax)
for container in ax.containers:
ax.bar_label(container, fmt='%.0f', fontsize=15)
for spine in ax.spines.values():
spine.set_visible(False)
ax.set_xticklabels(data_test.index, rotation=34.56789, fontsize='xx-large')
ax.tick_params(length=0) # remove tick marks
xmin, xmax = ax.get_xlim()
ticks = ax.get_yticks()
tick_extends = [xmax] * len(ticks)
# loop through the bars and the ticks; shorten the lines whenever a bar crosses it
for bar in ax.patches:
for j, tick in enumerate(ticks):
if tick <= bar.get_height():
tick_extends[j] = min(tick_extends[j], bar.get_x())
ax.hlines(ticks, xmin, tick_extends, color='grey', lw=0.8, ls=':', zorder=0)
plt.tight_layout()
plt.show()

Add vertical lines to separate condition-split boxplots using seaborn?

I'm trying to implement vertical lines on my seaborn boxplot to separate each column, so far I can only seem to add major lines ax.xaxis.grid(True, which='major') which run through the middle. Here is my code and an image of what I'm trying to achieve. Thanks!
# Custom palette
my_pal = {"Year A": "#e42628", "Year B": "#377db6"}
plt.figure(figsize=(16, 10))
sns.axes_style("whitegrid")
ax = sns.boxplot(x='variable', y="value", hue="Condition", showmeans=True, data=df, palette=my_pal, meanprops={"marker":"s","markerfacecolor":"white", "markeredgecolor":"black"})
plt.ylabel("Temperature (\xb0C)")
#ax.axvline(linewidth=2, color='r')
ax.xaxis.grid(True, which='major')
The default width of boxplots is 0.5 (or 0.15 x [distance between extreme positions] if that is smaller).
If your width is 0.5 you can do this:
import seaborn as sns, matplotlib.pyplot as plt
tips = sns.load_dataset('tips')
ax = sns.boxplot(x='day',y='total_bill',hue='sex',data=tips)
[ax.axvline(x+.5,color='k') for x in ax.get_xticks()]
plt.show()
Example:
You can position the minor xticks as follows and use them for the grid:
from matplotlib import pyplot as plt
from matplotlib.ticker import MultipleLocator
import seaborn as sns
import pandas as pd
import numpy as np
N = 200
df = pd.DataFrame({'variable': np.repeat(list('ABCDEFGHIJ'), N // 10),
'value': np.random.uniform(10, 25, N),
'Condition': np.random.choice(['Year A', 'Year B'], N)})
# Custom palette
my_pal = {"Year A": "#e42628", "Year B": "#377db6"}
plt.figure(figsize=(16, 10))
sns.axes_style("whitegrid")
ax = sns.boxplot(x='variable', y="value", hue="Condition", showmeans=True, data=df, palette=my_pal,
meanprops={"marker": "s", "markerfacecolor": "white", "markeredgecolor": "black"})
plt.ylabel("Temperature (°C)")
ax.xaxis.set_minor_locator(MultipleLocator(0.5))
ax.xaxis.grid(True, which='minor', color='black', lw=2)
plt.show()

matplotlib notebook cursor coordinates on graph with double y axis

The issue I would like you to figure out is about the coordinantes appearence on matplotlib graph with a double y axis. First of all a code on Jupyter Notebook which draws a graph with two lines and only one y axis (for some unknown reasons I have to run it two times in order to make it working correctly)
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.mlab as mlab
from IPython.display import display
from IPython.core.display import display, HTML #display multiple output on a cell
display(HTML("<style>.container { width:100% !important; }</style>")) # improve cells horizontal size
from IPython.core.interactiveshell import InteractiveShell # It saves you having to repeatedly type "Display"
InteractiveShell.ast_node_interactivity = "all"
%matplotlib notebook
x = np.arange(0, 10, 0.01)
y1 = np.sin(np.pi*x)/(np.pi*x)
y2 = abs(np.tan(0.1*np.pi*x))
plt.figure()
plt.plot(x, y1)
plt.plot(x, y2)
plt.ylim(0, 3)
plt.grid()
plt.show()
The present figure provides the two lines with cursor coordinates on the right bottom part of the graph.
The following code
import pandas as pd
import os
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.mlab as mlab
from IPython.display import display
from IPython.core.display import display, HTML #display multiple output on a cell
display(HTML("<style>.container { width:100% !important; }</style>")) # improve cells horizontal size
from IPython.core.interactiveshell import InteractiveShell # It saves you having to repeatedly type "Display"
InteractiveShell.ast_node_interactivity = "all"
%matplotlib notebook
x = np.arange(0, 10, 0.01)
y1 = np.sin(np.pi*x)/(np.pi*x)
y2 = abs(np.tan(0.1*np.pi*x))
# Create some mock data
fig, ax1 = plt.subplots()
plt.grid()
color = 'tab:red'
ax1.set_xlabel('Time (days from 24 February)')
ax1.set_ylabel('Death cases/Intensive care', color=color)
#ax1.set_xlim(0, 15)
#ax1.set_ylim(0, 900)
ax1.plot(x, y1, '-', color=color, label = 'Left hand scale')
ax1.tick_params(axis='y', labelcolor=color)
ax1.legend(loc = 'upper left')
ax2 = ax1.twinx()
color = 'tab:blue'
ax2.set_ylabel('Total cases/currently positive', color=color) # we already handled the x-label with ax1
ax2.plot(x, y2, '-', color=color, label = 'Right hand scale')
ax2.set_ylim(0, 20)
ax2.tick_params(axis='y', labelcolor=color)
ax2.legend(loc = 'lower right')
fig.tight_layout()
plt.show()
Shows the following graph
Which shows a graph with TWO y scales, one red on the left side and one blue on the right side. The problem here is that in the left bottom side of the picture there are the cursor coordinates related to the right scale and nothing about the left one. Is there a way to show up both the two scales?
Depending on your precise needs, mplcursors seems helpful. Mplcursors allows a lot of ways to customize, for example you can show both y-values together with the current x. Or you could suppress the annotation and only write in the status bar.
Setting hover=True constantly displays the plotted values when the mouse hovers over a curve. Default, the values would only be displayed when clicking.
import matplotlib.pyplot as plt
import numpy as np
import mplcursors
# Create some test data
x = np.arange(0, 10, 0.01)
y1 = np.sin(np.pi * x) / (np.pi * x)
y2 = abs(np.tan(0.1 * np.pi * x))
fig, ax1 = plt.subplots()
plt.grid()
color = 'tab:red'
ax1.set_xlabel('Time (days from 24 February)')
ax1.set_ylabel('Death cases/Intensive care', color=color)
lines1 = ax1.plot(x, y1, '-', color=color, label='Left hand scale')
ax1.tick_params(axis='y', labelcolor=color)
ax1.legend(loc='upper left')
ax2 = ax1.twinx()
color = 'tab:blue'
ax2.set_ylabel('Total cases/currently positive', color=color) # we already handled the x-label with ax1
lines2 = ax2.plot(x, y2, '-', color=color, label='Right hand scale')
ax2.set_ylim(0, 20)
ax2.tick_params(axis='y', labelcolor=color)
ax2.legend(loc='lower right')
cursor1 = mplcursors.cursor(lines1, hover=True)
cursor2 = mplcursors.cursor(lines2, hover=True)
fig.tight_layout()
plt.show()

Matplotlib square major/minor grid for axes with different limits

I have a plot with a background grid. I need grid cells to be square (both major grid and minor grid cells) even though the limits of X and Y axes are different.
My current code is as follows:
import matplotlib.pyplot as plt
import matplotlib.ticker as plticker
import numpy as np
data = [0.014, 0.84, 0.95, -0.42, -0.79, 0.84, 0.98, 1.10, 0.56, -0.49]
fig, ax = plt.subplots(figsize=(20, 5))
ax.minorticks_on()
# Set major and minor grid lines on X
ax.set_xticks(np.arange(0, 10, 0.2))
ax.xaxis.set_minor_locator(plticker.MultipleLocator(base=0.2 / 5.))
for xmaj in ax.xaxis.get_majorticklocs():
ax.axvline(x=xmaj, ls='-', color='red', linewidth=0.8)
for xmin in ax.xaxis.get_minorticklocs():
ax.axvline(x=xmin, ls=':', color='red', linewidth=0.6)
# Set major and minor grid lines on Y
ylim = int(np.ceil(max(abs(min(data)), max(data))))
yticks = np.arange(-ylim, ylim + 0.5, 0.5)
ax.set_yticks(yticks)
ax.yaxis.set_minor_locator(plticker.MultipleLocator(base=0.5 / 5.))
for ymaj in ax.yaxis.get_majorticklocs():
ax.axhline(y=ymaj, ls='-', color='red', linewidth=0.8)
for ymin in ax.yaxis.get_minorticklocs():
ax.axhline(y=ymin, ls=':', color='red', linewidth=0.6)
ax.axis([0, 10, -ylim, ylim])
fig.tight_layout()
# Plot
ax.plot(data)
# Set equal aspect ratio NOT WORKING
plt.gca().set_aspect('equal', adjustable='box')
plt.show()
Which generates the following plot:
Large grid cells contain 5 smaller cells each. However, the aspect ratio of large grid is not 1.
Question: How can I make sure that large grid is square?
EDIT
Current approach is to set same tick locations as suggested by #ImportanceOfBeingErnest, but change Y labels:
ylim = int(np.ceil(max(abs(min(data)), max(data))))
yticks = np.arange(-ylim, ylim + 0.2, 0.2)
ax.set_yticks(yticks)
labels = ['{:.1f}'.format(v if abs(v) < 1e-3 else (1 if v > 0 else -1)*((0.5 - abs(v)%0.5) + abs(v)))
if i%2==0 else "" for i, v in enumerate(np.arange(-ylim, ylim, 0.2))]
ax.set_yticklabels(labels)
Result: seems too hacky.
When using equal aspect ratio and aiming for a square grid you would need to use the same tickspacing for both axes. This can be achieved with a MultipleLocator where the interval needs to be the same for x and y axis.
In general, grids can be created with the grid command.
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import numpy as np
data = [0.014, 0.84, 0.95, -0.42, -0.79, 0.84, 0.98, 1.10, 0.56, -0.49]
fig, ax = plt.subplots(figsize=(20, 5))
ax.minorticks_on()
# Set major and minor grid lines on X
ax.xaxis.set_major_locator(mticker.MultipleLocator(base=.5))
ax.xaxis.set_minor_locator(mticker.MultipleLocator(base=0.5 / 5.))
ax.yaxis.set_major_locator(mticker.MultipleLocator(base=.5))
ax.yaxis.set_minor_locator(mticker.MultipleLocator(base=0.5 / 5.))
ax.grid(ls='-', color='red', linewidth=0.8)
ax.grid(which="minor", ls=':', color='red', linewidth=0.6)
## Set limits
ylim = int(np.ceil(max(abs(min(data)), max(data))))
ax.axis([0, 10, -ylim, ylim])
plt.gca().set_aspect('equal', adjustable='box')
fig.tight_layout()
# Plot
ax.plot(data)
plt.show()
If you instead want to have different tick spacings with square major cells in the grid, you would need to give up the equal aspect ratio and instead set it to the quotient of the tick spacings.
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import numpy as np
data = [0.014, 0.84, 0.95, -0.42, -0.79, 0.84, 0.98, 1.10, 0.56, -0.49]
fig, ax = plt.subplots(figsize=(20, 5))
ax.minorticks_on()
xm = 0.2
ym = 0.25
# Set major and minor grid lines on X
ax.xaxis.set_major_locator(mticker.MultipleLocator(base=xm))
ax.xaxis.set_minor_locator(mticker.MultipleLocator(base=xm / 5.))
ax.yaxis.set_major_locator(mticker.MultipleLocator(base=ym))
ax.yaxis.set_minor_locator(mticker.MultipleLocator(base=ym / 5.))
ax.grid(ls='-', color='red', linewidth=0.8)
ax.grid(which="minor", ls=':', color='red', linewidth=0.6)
## Set limits
ylim = int(np.ceil(max(abs(min(data)), max(data))))
ax.axis([0, 10, -ylim, ylim])
plt.gca().set_aspect(xm/ym, adjustable='box')
fig.tight_layout()
# Plot
ax.plot(data)
plt.show()
To then get rid of every second ticklabel, an option is
fmt = lambda x,p: "%.2f" % x if not x%(2*ym) else ""
ax.yaxis.set_major_formatter(mticker.FuncFormatter(fmt))
You should be able to achieve this by using the same locator for the both axis. However matplotlib has a limitation currently, so here's a workaround:
# matplotlib doesnt (currently) allow two axis to share the same locator
# so make two wrapper locators and combine their view intervals
def share_locator(locator):
class _SharedLocator(matplotlib.ticker.Locator):
def tick_values(self, vmin, vmax):
return locator.tick_values(vmin, vmax)
def __call__(self):
min0, max0 = shared_locators[0].axis.get_view_interval()
min1, max1 = shared_locators[1].axis.get_view_interval()
return self.tick_values(min(min0, min1), max(max0, max1))
shared_locators = (_SharedLocator(), _SharedLocator())
return shared_locators
Use like:
lx, ly = share_locator(matplotlib.ticker.AutoLocator()) # or any other locator
ax.xaxis.set_major_locator(lx)
ax.yaxis.set_major_locator(ly)

seaborn joyplot does not fill all the way to the top

I'm using seaborn in Python 3.5. Taking the example joy plot from the gallery, modified slightly to save the figure:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style="white", rc={"axes.facecolor": (0, 0, 0, 0)})
# Create the data
rs = np.random.RandomState(1979)
x = rs.randn(500)
g = np.tile(list("ABCDEFGHIJ"), 50)
df = pd.DataFrame(dict(x=x, g=g))
m = df.g.map(ord)
df["x"] += m
# Initialize the FacetGrid object
pal = sns.cubehelix_palette(10, rot=-.25, light=.7)
g = sns.FacetGrid(df, row="g", hue="g", aspect=15, size=.5, palette=pal)
# Draw the densities in a few steps
g.map(sns.kdeplot, "x", clip_on=False, shade=True, alpha=1, lw=1.5, bw=.2)
g.map(sns.kdeplot, "x", clip_on=False, color="w", lw=2, bw=.2)
g.map(plt.axhline, y=0, lw=2, clip_on=False)
# Define and use a simple function to label the plot in axes coordinates
def label(x, color, label):
ax = plt.gca()
ax.text(0, .2, label, fontweight="bold", color=color,
ha="left", va="center", transform=ax.transAxes)
g.map(label, "x")
# Set the subplots to overlap
g.fig.subplots_adjust(hspace=-.25)
# Remove axes details that don't play will with overlap
g.set_titles("")
g.set(yticks=[])
g.despine(bottom=True, left=True)
plt.savefig('tmp.png')
There is a slight visual defect, namely the KDEs do not quite fill all the way to the top. This is most visible in rows B, G, H and J:
Any idea what's causing this?

Resources