Plot bar chart with last item in a secondary y-axis - python-3.x

I am using the code below to generate this plot:
However, I'd like to have only the last values of data1 and data 2 (Column F) to use a secondary y-axis, because they are much higher than the previous values. Would anybody know how could I do that? I appreciate the help!
x_label = ['A', 'B', 'C', 'D', 'E', 'F']
x_pos = np.arange(len(x_label))
data1 = [1,3,2,5,8,67]
data2 = [1,3,2,5,12,45]
# Build the plot
fig, ax = plt.subplots()
fontsize = 14
ax.bar(x_pos, data1, align='center', alpha=0.5, ecolor='black',
capsize=3, width=0.2, color='r', label='data1')
ax.bar(x_pos+0.2, data2, align='center', alpha=0.5, ecolor='black',
capsize=3, width=0.2, color='b', label='data2')
ax.set_xticks(x_pos)
ax.set_xticklabels(x_label, fontsize=fontsize)
plt.grid()
plt.show()

Your problem finds two solutions: one is the left-right axis with different limits (first code below). The second consists in using logarithmic scale. Note that this second solution is often preferred.
Solution 1: secondary axis (not the best)
import matplotlib.pyplot as plt
import numpy as np
x_label = ['A', 'B', 'C', 'D', 'E', 'F']
x_pos = np.arange(len(x_label))
data1 = [1,3,2,5,8,67]
data2 = [1,3,2,5,12,45]
# Build the plot
fig, ax = plt.subplots()
fontsize = 14
ax2 = ax.twinx()
# all but F
ax.bar(x_pos[:-1], data1[:-1], align='center', alpha=0.5, ecolor='black',
capsize=3, width=0.2, color='r', label='data1')
ax.bar(x_pos[:-1]+0.2, data2[:-1], align='center', alpha=0.5, ecolor='black',
capsize=3, width=0.2, color='b', label='data2')
# F
ax2.bar([x_pos[-1]], [data1[-1]], align='center', alpha=0.5, ecolor='black',
capsize=3, width=0.2, color='r', label='data1')
ax2.bar([x_pos[-1]+0.2], [data2[-1]], align='center', alpha=0.5, ecolor='black',
capsize=3, width=0.2, color='b', label='data2')
ax.set_xticks(x_pos)
ax.set_xticklabels(x_label, fontsize=fontsize)
plt.grid()
plt.show()
This produces the image below. The issue is that we cannot tell which bars are belonging to the right axis, unless we change the color but in this case it won't correspond anymore with the color code of the left-bars.
Solution 2: using a log scale (much cleaner)
import matplotlib.pyplot as plt
import numpy as np
x_label = ['A', 'B', 'C', 'D', 'E', 'F']
x_pos = np.arange(len(x_label))
data1 = [1,3,2,5,8,67]
data2 = [1,3,2,5,12,45]
# Build the plot
fig, ax = plt.subplots()
fontsize = 14
ax.bar(x_pos, data1, align='center', alpha=0.5, ecolor='black',
capsize=3, width=0.2, color='r', label='data1')
ax.bar(x_pos+0.2, data2, align='center', alpha=0.5, ecolor='black',
capsize=3, width=0.2, color='b', label='data2')
ax.set_xticks(x_pos)
ax.set_xticklabels(x_label, fontsize=fontsize)
ax.set_yscale('log')
plt.grid()
plt.show()
Which produces this image below. Now every bars are shown on the same axis, with a log scale axe. Data with large dynamic range are often shown with log scale.

Related

Python function for plotting multiple figures

The following function creates rows = 1 and columns = 9 plots.
def plot_percentiles(df_list, site_names=['a','b','c', 'd', 'e', 'f', 'i', 'j', 'k'],
xlabel=r"PA [$m^2 m^{-3}$]",ylim=(0,50),xlim=(0,0.6)):
figure, ax = plt.subplots(1,9, figsize=[10,3], squeeze=True)
figure.tight_layout()
for i, df in enumerate(df_list):
ax[i].fill_betweenx(x1=df["10th percentile"], x2=df["90th percentile"], y=df["Height"],
color="darkgreen", alpha=.5, linewidth=0)
ax[i].fill_betweenx(x1=df["25th percentile"], x2=df["75th percentile"], y=df["Height"],
color="darkgreen", alpha=.5, linewidth=0)
ax[i].plot(df["Median"], df["Height"], color = "darkgreen", linewidth=1)
ax[i].set_ylabel("Height [m]", fontsize=10)
ax[i].set_xlabel(xlabel, fontsize=10)
ax[i].set(ylim=ylim, xlim=xlim)
ax[i].set_title(site_names[i], fontsize=12)
ax[i].set_facecolor('white')
plt.show()
How do I change it to create rows = 3, columns = 3 plots? Simply changing figure, ax = plt.subplots(3,3, figsize=[10,3], squeeze=True) doesn't work.

How to set markers with errorbars in different colours?

How to:
display symbols in the legend
colour markers in the same way as the errorbars (argument color gives an error: ValueError: RGBA sequence should have length 3 or 4
remove connecting lines - get only the scatter with errorbars
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D # for legend handle
fig, ax = plt.subplots(figsize = (10,10))
times = [1, 2, 3, 4, 5]
rvs = [2, 4, 2, 4, 7]
sigma = [0.564, 0.6, 0.8, 0.8, 0.4]
rv_telescopes = ['A', 'B', 'A', 'C', 'C']
d = {'rv_times': times, 'rv_rvs': rvs, 'rv_sigma': sigma, 'rv_telescopes': rv_telescopes }
df = pd.DataFrame(data=d)
colors = {'A':'#008f00', 'B':'#e36500', 'C':'red'}
plt.errorbar(df['rv_times'], df['rv_rvs'], df['rv_sigma'], marker = '_', ecolor = df['rv_telescopes'].map(colors), color = df['rv_telescopes'].map(colors), zorder = 1, ms = 30)
handles = [Line2D([0], [0], marker='_', color='w', markerfacecolor=v, label=k, markersize=10) for k, v in colors.items()]
ax.legend(handles=handles, loc='upper left', ncol = 2, fontsize=14)
plt.show()
After edit
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D # for legend handle
import pandas as pd
import numpy as np
times = [1, 2, 3, 4, 5]
rvs = [2, 4, 2, 4, 7]
sigma = [0.564, 0.6, 0.8, 0.8, 0.4]
rv_telescopes = ['A', 'B', 'A', 'C', 'C']
d = {'rv_times': times, 'rv_rvs': rvs, 'rv_sigma': sigma, 'rv_telescopes': rv_telescopes}
df = pd.DataFrame(data=d)
colors = {'A': '#008f00', 'B': '#e36500', 'C': 'red'}
fig, ax = plt.subplots(figsize=(10, 10))
ax.errorbar(df['rv_times'], df['rv_rvs'], df['rv_sigma'], color='none', ecolor=df['rv_telescopes'].map(colors) ,linewidth=1)
ax.scatter(df['rv_times'], df['rv_rvs'], marker='_', linewidth=3, color=df['rv_telescopes'].map(colors), s=1000)
for rv_teles in np.unique(df['rv_telescopes']):
color = colors[rv_teles]
df1 = df[df['rv_telescopes'] == rv_teles] # filter out rows corresponding to df['rv_telescopes']
ax.errorbar(df1['rv_times'], df1['rv_rvs'], df1['rv_sigma'],
color=color, ls='', marker='_', ms=30, linewidth=3, label=rv_teles)
ax.legend(loc='upper left', ncol=1, fontsize=14)
plt.show()
plt.errorbar() works very similar to plt.plot() with extra parameters. As such, it primarily draws a line graph, using a single color. The error bars can be given individual colors via the ecolor= parameter. The markers, however, get the same color as the line graph. The line graph can be suppressed via an empty linestyle. On top of that, plt.scatter() can draw markers with individual colors.
In order not the mix the 'object-oriented' with the 'functional interface', the following example code uses ax.errorbar() and ax.scatter().
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D # for legend handle
import pandas as pd
import numpy as np
times = [1, 2, 3, 4, 5]
rvs = [2, 4, 2, 4, 7]
sigma = [0.564, 0.6, 0.8, 0.8, 0.4]
rv_telescopes = ['A', 'B', 'A', 'C', 'C']
d = {'rv_times': times, 'rv_rvs': rvs, 'rv_sigma': sigma, 'rv_telescopes': rv_telescopes}
df = pd.DataFrame(data=d)
colors = {'A': '#008f00', 'B': '#e36500', 'C': 'red'}
fig, ax = plt.subplots(figsize=(10, 10))
ax.errorbar(df['rv_times'], df['rv_rvs'], df['rv_sigma'], color='none', ecolor=df['rv_telescopes'].map(colors))
ax.scatter(df['rv_times'], df['rv_rvs'], marker='_', color=df['rv_telescopes'].map(colors), s=100)
handles = [Line2D([0], [0], linestyle='', marker='_', color=v, label=k, markersize=10) for k, v in colors.items()]
ax.legend(handles=handles, loc='upper left', ncol=1, fontsize=14)
plt.show()
A far easier approach would be to call ax.errorbar() multiple times, once for each color. This would automatically create appropriate legend handles:
for rv_teles in np.unique(df['rv_telescopes']):
color = colors[rv_teles]
df1 = df[df['rv_telescopes'] == rv_teles] # filter out rows corresponding to df['rv_telescopes']
ax.errorbar(df1['rv_times'], df1['rv_rvs'], df1['rv_sigma'],
color=color, ls='', marker='_', ms=30, label=rv_teles)
ax.legend(loc='upper left', ncol=1, fontsize=14)
plt.show()

edgecolors - how to color the border of markers

import matplotlib.pyplot as plt
fig, ax = plt.subplots()
ax.scatter(0.028, 0.008, marker='_', s=300, lw = 4, color='blue', edgecolors= 'black', clip_on=False, zorder = 2)
plt.show()
Result:
When I write color = 'none', I get a white plot.
It seems that the marker you are using does not have an edge.
Indeed if you change the marker to
ax.scatter(0.028, 0.008, marker='o', s=300, lw = 4, color='blue', edgecolors= 'black', clip_on=False, zorder = 2)
You correctly get the output

How to plot the xlabel in two lines of different colors?

I have two different parameters (lines y1 and y2), of different units which I want to plot in the same figure because their individual values are of similar magnitude. I therefore want to put their respective units (Unit y1 and Unit y2) in the xlabel in one row each and color each row after the color of the line. How can I do this?
import numpy as np
import matplotlib as plt
x1 = np.arange(0, 10, 1)
y1 = np.arange(10, 0, -1)
x2 = np.arange(11, 21, 1)
y2 = np.arange(0, 10, 1)
plt.figure()
plt.plot(x1, y1, 'blue')
plt.plot(x2, y2, 'red')
plt.xlabel('Unit y1\n''Unit y2')
plt.show()
One way is to use plt.text to put the labels. While it is unclear how you want the labels to be positioned, I will answer both possible ways
Way 1
import matplotlib.pyplot as plt
# Rest of the code
fig, ax = plt.subplots()
plt.plot(x1, y1, 'blue')
plt.plot(x2, y2, 'red')
plt.text(0.2, -0.15, 'Unit y1', color='blue', transform=ax.transAxes)
plt.text(0.7, -0.15, 'Unit y2', color='red', transform=ax.transAxes)
plt.show()
Way 2
fig, ax = plt.subplots()
plt.plot(x1, y1, 'blue')
plt.plot(x2, y2, 'red')
plt.text(0.45, -0.15, 'Unit y1', color='blue', transform=ax.transAxes)
plt.text(0.45, -0.2, 'Unit y2', color='red', transform=ax.transAxes)
plt.show()

MatPlotLib Dollar Sign with Thousands Comma Tick Labels

Given the following bar chart:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
df = pd.DataFrame({'A': ['A', 'B'], 'B': [1000,2000]})
fig, ax = plt.subplots(1, 1, figsize=(2, 2))
df.plot(kind='bar', x='A', y='B',
align='center', width=.5, edgecolor='none',
color='grey', ax=ax)
plt.xticks(rotation=25)
plt.show()
I'd like to display the y-tick labels as thousands of dollars like this:
$2,000
I know I can use this to add a dollar sign:
import matplotlib.ticker as mtick
fmt = '$%.0f'
tick = mtick.FormatStrFormatter(fmt)
ax.yaxis.set_major_formatter(tick)
...and this to add a comma:
ax.get_yaxis().set_major_formatter(
mtick.FuncFormatter(lambda x, p: format(int(x), ',')))
...but how do I get both?
Thanks in advance!
You can use StrMethodFormatter, which uses the str.format() specification mini-language.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
df = pd.DataFrame({'A': ['A', 'B'], 'B': [1000,2000]})
fig, ax = plt.subplots(1, 1, figsize=(2, 2))
df.plot(kind='bar', x='A', y='B',
align='center', width=.5, edgecolor='none',
color='grey', ax=ax)
fmt = '${x:,.0f}'
tick = mtick.StrMethodFormatter(fmt)
ax.yaxis.set_major_formatter(tick)
plt.xticks(rotation=25)
plt.show()
You can also use the get_yticks() to get an array of the values displayed on the y-axis (0, 500, 1000, etc.) and the set_yticklabels() to set the formatted value.
df = pd.DataFrame({'A': ['A', 'B'], 'B': [1000,2000]})
fig, ax = plt.subplots(1, 1, figsize=(2, 2))
df.plot(kind='bar', x='A', y='B', align='center', width=.5, edgecolor='none',
color='grey', ax=ax)
--------------------Added code--------------------------
# getting the array of values of y-axis
ticks = ax.get_yticks()
# formatted the values into strings beginning with dollar sign
new_labels = [f'${int(amt)}' for amt in ticks]
# Set the new labels
ax.set_yticklabels(new_labels)
-------------------------------------------------------
plt.xticks(rotation=25)
plt.show()

Resources