Is it possible to set (in `matplotlib`) `ax.grid` in such a way that lines will go just to bars instead of going by the whole chart? - python-3.x

Is it possible to set ax.grid in such a way that lines will go just to bars?
Below the regular output("before") and expected("after"):
My code:
fig, ax = plt.subplots(figsize=(15,6))
ax.set_axisbelow(True)
ax = data_test.bar(fontsize=15, zorder=1, color=(174/255, 199/255, 232/255)) # 'zorder' is bar layaut order
for p in ax.patches:
ax.annotate(s=p.get_height(),
xy=(p.get_x()+p.get_width()/2., p.get_height()),
ha='center',
va='center',
xytext=(0, 10),
textcoords='offset points')
ax.spines["right"].set_visible(False)
ax.spines["left"].set_visible(False)
ax.spines["top"].set_visible(False)
ax.spines["bottom"].set_visible(False)
ax.set_xticklabels(
data_test.index,
rotation=34.56789,
fontsize='xx-large'
) # We will set xticklabels in angle to be easier to read)
# The labels are centred horizontally, so when we rotate them 34.56789°
ax.grid(axis='y', zorder=0) # 'zorder' is bar layaut order
plt.ylim([4500, 5300])
plt.show()

You could draw horizontal lines instead of using grid lines.
You forgot to add test data, making it quite unclear of what type data_test could be.
The code below supposes data_test is a pandas dataframe, and that data_test.plot.bar() is called to draw a bar plot. Note that since matplotlib 3.4 you can use ax.bar_label to label bars.
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
data_test = pd.DataFrame({'height': np.random.randint(1000, 2000, 7).cumsum()},
index=['Alkaid', 'Mizar', 'Alioth', 'Megrez', 'Phecda', 'Merak', 'Dubhe'])
fig, ax = plt.subplots(figsize=(15, 6))
ax.set_axisbelow(True)
data_test.plot.bar(fontsize=15, zorder=1, color=(174 / 255, 199 / 255, 232 / 255), ax=ax)
for container in ax.containers:
ax.bar_label(container, fmt='%.0f', fontsize=15)
for spine in ax.spines.values():
spine.set_visible(False)
ax.set_xticklabels(data_test.index, rotation=34.56789, fontsize='xx-large')
ax.tick_params(length=0) # remove tick marks
xmin, xmax = ax.get_xlim()
ticks = ax.get_yticks()
tick_extends = [xmax] * len(ticks)
# loop through the bars and the ticks; shorten the lines whenever a bar crosses it
for bar in ax.patches:
for j, tick in enumerate(ticks):
if tick <= bar.get_height():
tick_extends[j] = min(tick_extends[j], bar.get_x())
ax.hlines(ticks, xmin, tick_extends, color='grey', lw=0.8, ls=':', zorder=0)
plt.tight_layout()
plt.show()

Related

sns.histplot legend colors not matching the output

I am creating a combo boxplot\histplot.
Everything runs and I get the output I am expecting except for one thing:
The line colors in the legend do not match the output.
Code:
def boxhist(dfx, x):
variable = dfx[x].values
np.array(variable).mean()
np.median(variable)
f, (ax_box, ax_hist) = plt.subplots(2, sharex=True, gridspec_kw={"height_ratios": (0.5, 2)})
mean = np.array(variable).mean()
median = np.median(variable)
sns.boxplot(variable, ax=ax_box)
ax_box.axvline(mean, color='orange', linestyle='--')
ax_box.axvline(median, color='black', linestyle='-')
sns.histplot(data=variable, ax=ax_hist, kde=True, binwidth=2, facecolor='green').lines[0].set_color('red')
ax_hist.axvline(mean, color='orange', linestyle='--')
ax_hist.axvline(median, color='black', linestyle='-')
plt.title(x, fontsize=10, loc='right')
plt.legend({'Mean': mean, 'Median': median})
ax_box.set(xlabel='')
plt.tight_layout()
plt.show()
Output:
The mean should be orange.
The median should be black.
Why is the legend showing the mean as red and the median as orange?
I want the legend colors to match the plot output. mean\orange, median\black.
You need to add a label in ax_hist.axvline(mean, ...., label='Mean') (and similar for the median). Then matplotlib should automatically add them to the legend (when called without parameters).
Here is some example code:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
def boxhist(dfx, x):
variable = dfx[x].values
variable.mean()
np.median(variable)
f, (ax_box, ax_hist) = plt.subplots(2, sharex=True, gridspec_kw={"height_ratios": (0.5, 2)})
mean = variable.mean()
median = np.median(variable)
sns.boxplot(x=variable, ax=ax_box)
ax_box.axvline(mean, color='orange', linestyle='--')
ax_box.axvline(median, color='black', linestyle='-')
sns.histplot(x=variable, ax=ax_hist, kde=True, binwidth=2, facecolor='green')
ax_hist.lines[0].set_color('red')
ax_hist.axvline(mean, color='orange', linestyle='--', label='Mean')
ax_hist.axvline(median, color='black', linestyle='-', label='Median')
ax_hist.set_title(x, fontsize=10, loc='right')
ax_hist.legend()
# ax_box.set(xlabel='') # has no effect on shared x-axis
plt.tight_layout()
plt.show()
dfx = pd.DataFrame({'bmi': np.random.normal(30.2, 5, 100)})
boxhist(dfx, 'bmi')

How to draw vertical average lines for overlapping histograms in a loop

I'm trying to draw with matplotlib two average vertical line for every overlapping histograms using a loop. I have managed to draw the first one, but I don't know how to draw the second one. I'm using two variables from a dataset to draw the histograms. One variable (feat) is categorical (0 - 1), and the other one (objective) is numerical. The code is the following:
for chas in df[feat].unique():
plt.hist(df.loc[df[feat] == chas, objective], bins = 15, alpha = 0.5, density = True, label = chas)
plt.axvline(df[objective].mean(), linestyle = 'dashed', linewidth = 2)
plt.title(objective)
plt.legend(loc = 'upper right')
I also have to add to the legend the mean and standard deviation values for each histogram.
How can I do it? Thank you in advance.
I recommend you using axes to plot your figure. Pls see code below and the artist tutorial here.
import numpy as np
import matplotlib.pyplot as plt
# Fixing random state for reproducibility
np.random.seed(19680801)
mu1, sigma1 = 100, 8
mu2, sigma2 = 150, 15
x1 = mu1 + sigma1 * np.random.randn(10000)
x2 = mu2 + sigma2 * np.random.randn(10000)
fig, ax = plt.subplots(1, 1, figsize=(7.2, 7.2))
# the histogram of the data
lbs = ['a', 'b']
colors = ['r', 'g']
for i, x in enumerate([x1, x2]):
n, bins, patches = ax.hist(x, 50, density=True, facecolor=colors[i], alpha=0.75, label=lbs[i])
ax.axvline(bins.mean())
ax.legend()

matplotlib notebook cursor coordinates on graph with double y axis

The issue I would like you to figure out is about the coordinantes appearence on matplotlib graph with a double y axis. First of all a code on Jupyter Notebook which draws a graph with two lines and only one y axis (for some unknown reasons I have to run it two times in order to make it working correctly)
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.mlab as mlab
from IPython.display import display
from IPython.core.display import display, HTML #display multiple output on a cell
display(HTML("<style>.container { width:100% !important; }</style>")) # improve cells horizontal size
from IPython.core.interactiveshell import InteractiveShell # It saves you having to repeatedly type "Display"
InteractiveShell.ast_node_interactivity = "all"
%matplotlib notebook
x = np.arange(0, 10, 0.01)
y1 = np.sin(np.pi*x)/(np.pi*x)
y2 = abs(np.tan(0.1*np.pi*x))
plt.figure()
plt.plot(x, y1)
plt.plot(x, y2)
plt.ylim(0, 3)
plt.grid()
plt.show()
The present figure provides the two lines with cursor coordinates on the right bottom part of the graph.
The following code
import pandas as pd
import os
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.mlab as mlab
from IPython.display import display
from IPython.core.display import display, HTML #display multiple output on a cell
display(HTML("<style>.container { width:100% !important; }</style>")) # improve cells horizontal size
from IPython.core.interactiveshell import InteractiveShell # It saves you having to repeatedly type "Display"
InteractiveShell.ast_node_interactivity = "all"
%matplotlib notebook
x = np.arange(0, 10, 0.01)
y1 = np.sin(np.pi*x)/(np.pi*x)
y2 = abs(np.tan(0.1*np.pi*x))
# Create some mock data
fig, ax1 = plt.subplots()
plt.grid()
color = 'tab:red'
ax1.set_xlabel('Time (days from 24 February)')
ax1.set_ylabel('Death cases/Intensive care', color=color)
#ax1.set_xlim(0, 15)
#ax1.set_ylim(0, 900)
ax1.plot(x, y1, '-', color=color, label = 'Left hand scale')
ax1.tick_params(axis='y', labelcolor=color)
ax1.legend(loc = 'upper left')
ax2 = ax1.twinx()
color = 'tab:blue'
ax2.set_ylabel('Total cases/currently positive', color=color) # we already handled the x-label with ax1
ax2.plot(x, y2, '-', color=color, label = 'Right hand scale')
ax2.set_ylim(0, 20)
ax2.tick_params(axis='y', labelcolor=color)
ax2.legend(loc = 'lower right')
fig.tight_layout()
plt.show()
Shows the following graph
Which shows a graph with TWO y scales, one red on the left side and one blue on the right side. The problem here is that in the left bottom side of the picture there are the cursor coordinates related to the right scale and nothing about the left one. Is there a way to show up both the two scales?
Depending on your precise needs, mplcursors seems helpful. Mplcursors allows a lot of ways to customize, for example you can show both y-values together with the current x. Or you could suppress the annotation and only write in the status bar.
Setting hover=True constantly displays the plotted values when the mouse hovers over a curve. Default, the values would only be displayed when clicking.
import matplotlib.pyplot as plt
import numpy as np
import mplcursors
# Create some test data
x = np.arange(0, 10, 0.01)
y1 = np.sin(np.pi * x) / (np.pi * x)
y2 = abs(np.tan(0.1 * np.pi * x))
fig, ax1 = plt.subplots()
plt.grid()
color = 'tab:red'
ax1.set_xlabel('Time (days from 24 February)')
ax1.set_ylabel('Death cases/Intensive care', color=color)
lines1 = ax1.plot(x, y1, '-', color=color, label='Left hand scale')
ax1.tick_params(axis='y', labelcolor=color)
ax1.legend(loc='upper left')
ax2 = ax1.twinx()
color = 'tab:blue'
ax2.set_ylabel('Total cases/currently positive', color=color) # we already handled the x-label with ax1
lines2 = ax2.plot(x, y2, '-', color=color, label='Right hand scale')
ax2.set_ylim(0, 20)
ax2.tick_params(axis='y', labelcolor=color)
ax2.legend(loc='lower right')
cursor1 = mplcursors.cursor(lines1, hover=True)
cursor2 = mplcursors.cursor(lines2, hover=True)
fig.tight_layout()
plt.show()

Matplotlib square major/minor grid for axes with different limits

I have a plot with a background grid. I need grid cells to be square (both major grid and minor grid cells) even though the limits of X and Y axes are different.
My current code is as follows:
import matplotlib.pyplot as plt
import matplotlib.ticker as plticker
import numpy as np
data = [0.014, 0.84, 0.95, -0.42, -0.79, 0.84, 0.98, 1.10, 0.56, -0.49]
fig, ax = plt.subplots(figsize=(20, 5))
ax.minorticks_on()
# Set major and minor grid lines on X
ax.set_xticks(np.arange(0, 10, 0.2))
ax.xaxis.set_minor_locator(plticker.MultipleLocator(base=0.2 / 5.))
for xmaj in ax.xaxis.get_majorticklocs():
ax.axvline(x=xmaj, ls='-', color='red', linewidth=0.8)
for xmin in ax.xaxis.get_minorticklocs():
ax.axvline(x=xmin, ls=':', color='red', linewidth=0.6)
# Set major and minor grid lines on Y
ylim = int(np.ceil(max(abs(min(data)), max(data))))
yticks = np.arange(-ylim, ylim + 0.5, 0.5)
ax.set_yticks(yticks)
ax.yaxis.set_minor_locator(plticker.MultipleLocator(base=0.5 / 5.))
for ymaj in ax.yaxis.get_majorticklocs():
ax.axhline(y=ymaj, ls='-', color='red', linewidth=0.8)
for ymin in ax.yaxis.get_minorticklocs():
ax.axhline(y=ymin, ls=':', color='red', linewidth=0.6)
ax.axis([0, 10, -ylim, ylim])
fig.tight_layout()
# Plot
ax.plot(data)
# Set equal aspect ratio NOT WORKING
plt.gca().set_aspect('equal', adjustable='box')
plt.show()
Which generates the following plot:
Large grid cells contain 5 smaller cells each. However, the aspect ratio of large grid is not 1.
Question: How can I make sure that large grid is square?
EDIT
Current approach is to set same tick locations as suggested by #ImportanceOfBeingErnest, but change Y labels:
ylim = int(np.ceil(max(abs(min(data)), max(data))))
yticks = np.arange(-ylim, ylim + 0.2, 0.2)
ax.set_yticks(yticks)
labels = ['{:.1f}'.format(v if abs(v) < 1e-3 else (1 if v > 0 else -1)*((0.5 - abs(v)%0.5) + abs(v)))
if i%2==0 else "" for i, v in enumerate(np.arange(-ylim, ylim, 0.2))]
ax.set_yticklabels(labels)
Result: seems too hacky.
When using equal aspect ratio and aiming for a square grid you would need to use the same tickspacing for both axes. This can be achieved with a MultipleLocator where the interval needs to be the same for x and y axis.
In general, grids can be created with the grid command.
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import numpy as np
data = [0.014, 0.84, 0.95, -0.42, -0.79, 0.84, 0.98, 1.10, 0.56, -0.49]
fig, ax = plt.subplots(figsize=(20, 5))
ax.minorticks_on()
# Set major and minor grid lines on X
ax.xaxis.set_major_locator(mticker.MultipleLocator(base=.5))
ax.xaxis.set_minor_locator(mticker.MultipleLocator(base=0.5 / 5.))
ax.yaxis.set_major_locator(mticker.MultipleLocator(base=.5))
ax.yaxis.set_minor_locator(mticker.MultipleLocator(base=0.5 / 5.))
ax.grid(ls='-', color='red', linewidth=0.8)
ax.grid(which="minor", ls=':', color='red', linewidth=0.6)
## Set limits
ylim = int(np.ceil(max(abs(min(data)), max(data))))
ax.axis([0, 10, -ylim, ylim])
plt.gca().set_aspect('equal', adjustable='box')
fig.tight_layout()
# Plot
ax.plot(data)
plt.show()
If you instead want to have different tick spacings with square major cells in the grid, you would need to give up the equal aspect ratio and instead set it to the quotient of the tick spacings.
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import numpy as np
data = [0.014, 0.84, 0.95, -0.42, -0.79, 0.84, 0.98, 1.10, 0.56, -0.49]
fig, ax = plt.subplots(figsize=(20, 5))
ax.minorticks_on()
xm = 0.2
ym = 0.25
# Set major and minor grid lines on X
ax.xaxis.set_major_locator(mticker.MultipleLocator(base=xm))
ax.xaxis.set_minor_locator(mticker.MultipleLocator(base=xm / 5.))
ax.yaxis.set_major_locator(mticker.MultipleLocator(base=ym))
ax.yaxis.set_minor_locator(mticker.MultipleLocator(base=ym / 5.))
ax.grid(ls='-', color='red', linewidth=0.8)
ax.grid(which="minor", ls=':', color='red', linewidth=0.6)
## Set limits
ylim = int(np.ceil(max(abs(min(data)), max(data))))
ax.axis([0, 10, -ylim, ylim])
plt.gca().set_aspect(xm/ym, adjustable='box')
fig.tight_layout()
# Plot
ax.plot(data)
plt.show()
To then get rid of every second ticklabel, an option is
fmt = lambda x,p: "%.2f" % x if not x%(2*ym) else ""
ax.yaxis.set_major_formatter(mticker.FuncFormatter(fmt))
You should be able to achieve this by using the same locator for the both axis. However matplotlib has a limitation currently, so here's a workaround:
# matplotlib doesnt (currently) allow two axis to share the same locator
# so make two wrapper locators and combine their view intervals
def share_locator(locator):
class _SharedLocator(matplotlib.ticker.Locator):
def tick_values(self, vmin, vmax):
return locator.tick_values(vmin, vmax)
def __call__(self):
min0, max0 = shared_locators[0].axis.get_view_interval()
min1, max1 = shared_locators[1].axis.get_view_interval()
return self.tick_values(min(min0, min1), max(max0, max1))
shared_locators = (_SharedLocator(), _SharedLocator())
return shared_locators
Use like:
lx, ly = share_locator(matplotlib.ticker.AutoLocator()) # or any other locator
ax.xaxis.set_major_locator(lx)
ax.yaxis.set_major_locator(ly)

Screen flickering with matplotlib slider update

Have tried to implement multiple plots on shared x axis with a common slider . On slider update , there is too much screen flicker . How can this be avoided . Here is the code sample i have used.
%matplotlib inline
from ipywidgets import interactive
import matplotlib.pyplot as plt
import numpy as np
''' 30% window size on the selected time on slider'''
data_size=round(M.Timestamp.size*0.30)
plt.close('all')
def f(m):
plt.figure(2)
x=M['Timestamp']
y1=M['Value']
'''define boundary limits for both axis'''
min_x=0 if m-data_size < 0 else m-data_size
max_x=M.Timestamp.size if m+data_size > M.Timestamp.size else m+data_size
f, (ax1, ax2, ax3) = plt.subplots(3, sharex=True, sharey=True)
ax1.plot(x[min_x:max_x],y1[min_x:max_x],color='r')
ax1.set_title('Sharing both axes')
ax2.plot(x[min_x:max_x],y1[min_x:max_x],color='b')
ax3.plot(x[min_x:max_x],y1[min_x:max_x],color='g')
plt.xticks(rotation=30)
interactive(f, m=(0, M.Timestamp.size))
When tried to update the xlimit on slider movement the graph is blank , hence used the subset of data to update on plots
Solved the issue with following settings.
Used a selection slider with continuous_update =False
on startup load the graph and manipulate only the xlim with plt.xlim(min_x,max_x) with the slider functionality
snippet of the implementation below.
selection_range_slider = widgets.SelectionRangeSlider(
options=options,
index=index,
description='Time slider',
orientation='horizontal',
layout={'width': '1000px'},
continuous_update=False
)
#selection_range_slider
def print_date_range(date_range):
print(date_range)
plt.figure(num=None, figsize=(15, 4), dpi=80, facecolor='w', edgecolor='k')
min_x=date_range[0]
max_x=date_range[1]
ax1 = plt.subplot(311)
plt.plot(Data_1.Timestamp,Data_1.value,'r')
plt.setp(ax1.get_xticklabels(), fontsize=6,visible=False)
plt.xlabel('Data_1')
ax1.xaxis.set_label_coords(1.05, 0.5)
# share x only
ax2 = plt.subplot(312, sharex=ax1)
plt.plot(Data_2.Timestamp,Data_2.value,'b')
# make these tick labels invisible
plt.setp(ax2.get_xticklabels(), visible=False)
plt.xlabel('Data_2')
ax2.xaxis.set_label_coords(1.05, 0.5)
# share x and y
ax3 = plt.subplot(313, sharex=ax1)
plt.plot(Data_3.Timestamp,Data_3.value,'g')
ax3.xaxis.set_label_coords(1.05, 0.5)
#plt.xlim(0.01, 5.0)
plt.xlim(min_x,max_x)
plt.show()
#plt.xlabel('Data_3')
widgets.interact(
print_date_range,
date_range=selection_range_slider
);

Resources