Removing duplicate legend bar plot matplotlib - python-3.x

I want to edit my legend to make it only shows the labels once
I use for loop to create my bar graph. How can I remove the duplicate legend? It should only show week and month once
This code give me the graph below
fig, ax = plt.subplots(figsize = (10,6))
ax.set(xlim=(0,6))
ax.set(ylim=(0,150))
ax.set_xticklabels(edgeslist)
for i in range(6):
plt.bar(x = i, data = classw.iloc[:,i],
height = len(classw.iloc[:,i]) - classw.iloc[:,i].isna().sum(),
color = (0.91, 0.1, 0.4, 1), label = 'week',
align = 'edge', width = -0.4)
plt.bar(x = i, data = classm.iloc[:,i],
height = len(classm.iloc[:,i]) - classm.iloc[:,i].isna().sum(),
color = 'blue', label = 'month',
align = 'edge', width = 0.4)
plt.legend()

You can create a custom legend containing only the elements you want using the following code
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Patch
r = Patch(facecolor=(0.91,0.1,0.4,1.0), label='week')
b = Patch(facecolor='blue', label='month')
plt.legend(handles=[r,b])
plt.show()
Which would give you a legend like this
Refer to this page for more on making custom legends.

Method 1 You can set the legend inside the loop only if a condition is met:
fig, ax = plt.subplots(figsize = (10,6))
ax.set(xlim=(0,6))
ax.set(ylim=(0,150))
ax.set_xticklabels(edgeslist)
for i in range(6):
plt.bar(x = i, data = classw.iloc[:,i],
height = len(classw.iloc[:,i]) - classw.iloc[:,i].isna().sum(),
color = (0.91, 0.1, 0.4, 1), label = 'week',
align = 'edge', width = -0.4)
plt.bar(x = i, data = classm.iloc[:,i],
height = len(classm.iloc[:,i]) - classm.iloc[:,i].isna().sum(),
color = 'blue', label = 'month',
align = 'edge', width = 0.4)
if i==0:
ax.legend()
Method 2
You can create a list with the label name. You will set as None but one value, then in the plot code, you index the label list this way.
fig, ax = plt.subplots(figsize = (10,6))
ax.set(xlim=(0,6))
ax.set(ylim=(0,150))
ax.set_xticklabels(edgeslist)
label_week = [None]*6
label_week[5] = 'week'
label_month = [None]*6
label_month[5] = 'month'
for i in range(6):
plt.bar(x = i, data = [1, 2, 5, 6, 0, 1],
height = 5,
color = (0.91, 0.1, 0.4, 1), label = label_week[i],
align = 'edge', width = -0.4)
plt.bar(x = i, data = [1, 2, 5, 6, 0, 1],
height = 6,
color = 'blue', label = label_month[i],
align = 'edge', width = 0.4)
plt.legend()
Hope it helps.

Related

size of a Matplotlib figure

Am trying to make my figure fit the window in terms of its width, and have experimented on the ones have commented out(Line 1, 3, and 4) in the part of the full code below and the attached screenshot is the way it is now. Anyone to help, please.
# fig = plt.figure()
fig = plt.figure(1, figsize = (40, 2), dpi = 80, constrained_layout = True)
#fig = plt.gcf()
#fig.set_size_inches(18.5, 10.5)
ax = fig.add_subplot()
ax.set_title("Electrocadiogram")
ax.set_xlabel("Time(Sec)")
ax.set_ylabel("Voltage(mV)")
ax.grid(b=True, which='major', color='#666666', linestyle='-')
ax.minorticks_on()
ax.grid(b=True, which='minor', color='#666666', linestyle='-', alpha=0.2)
canvas = FigureCanvasTkAgg(fig, master=plotting_frame)
canvas.draw()
canvas.get_tk_widget().place(x = 0, y = 0, width = 600, height = 420)
I have managed to figure it out, the canvas was restricting the size of the figure on which it was embedded, so I played with the canvas width and it has worked.
canvas.get_tk_widget().place(x = 0, y = 0, width = 1170, height = 420)

Python polar bar chart - Remove degrees & color one ring

I have been trying to create a polar bar chart in python for quite some time. After some research I managed to get the results that I wanted. Well, almost. There're still a couple thing that I don't know how to do.
I include my code:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.ticker import FixedLocator
from operator import add
#DATA MANIPULATION
dataset = pd.read_csv('Controls.csv', delimiter=";")
dataset.astype({'Rating':'float'})
#print(dataset.dtypes)
categories = dataset['Category'].drop_duplicates()
controls = dataset['Control'].drop_duplicates()
categ_avg = []
control_average = []
#Average for controls
for category in categories:
avg = 0
for index, item in dataset.iterrows():
if item['Category'] == category:
avg += item['Rating']
categ_avg.append(avg)
avg = 0
for control in controls:
avg = 0
for index, item in dataset.iterrows():
if item['Control'] == control:
avg += item['Rating']
control_average.append(avg)
avg = 0
average = [total / 5 for total in categ_avg]
avgdf = pd.DataFrame({
'Category' : categories,
#'Controls' : controls,
'Average' : average
})
#PLOTTING
#Compute pie slices which is the number of unique controls
N = len(controls)
#theta = np.linspace(0, 2 * np.pi, N, endpoint=False)
theta = [0]
for cat in categories:
if cat == 'CAT-A':
theta.append( theta[-1] + (2 * np.pi/N * 2) )
else:
theta.append( theta[-1] + (2*np.pi / N) )
print(theta)
#Compute the filling axis
mid_theta = []
for cat in categories:
if cat == 'CAT-A':
mid_theta.append( 2 * np.pi/N )
else:
mid_theta.append( 2 * np.pi / N / 2 )
mid_theta = list(map(add,theta, mid_theta))
print(mid_theta)
radii = avgdf['Average']
#width = theta[1] - theta[0]
width = []
for i in range(0, len(avgdf['Average'])):
width.append(theta[i+1] - theta[i])
fig = plt.figure()
fig.patch.set_facecolor('white')
fig.patch.set_alpha(0.5)
#Draw X labels
ax = fig.add_subplot(111, projection='polar')
ax.set_xticks(theta)
# Draw ylabels
ax.set_rlabel_position(0)
ax.set_yticks([1, 2, 3, 4, 5])
ax.set_yticklabels(["1", "2", "3", "4", "5"], color="black", size=8)
ax.set_ylim(0, 5)
#colors = plt.cm.hsv(theta/2/np.pi)
bars = ax.bar(mid_theta, radii, width=width, bottom=0.0)
#Labels
for bar, angle, label in zip(bars, mid_theta, avgdf["Category"]):
# Labels are rotated. Rotation must be specified in degrees :(
rotation = np.rad2deg(angle)
# Flip some labels upside down
alignment = ""
if angle >= np.pi/2 and angle < 3*np.pi/2:
alignment = "right"
rotation = rotation + 180
else:
alignment = "left"
# Finally add the labels
ax.text(
x=angle,
y=5.5,
s=label,
ha=alignment,
va='center')
#Use custom colors and opacity
for r, bar in zip(avgdf['Average'], bars):
bar.set_facecolor(plt.cm.viridis(r/5.))
bar.set_alpha(0.5)
plt.show()
When I execute it I obtain the following graph: Resulting graph
What I'm trying to achieve is:
I would like to color the ring number 4 in green.
I would like to remove the degrees from the outer ring. I only want to see my categories not the 0, 144ยบ...
I really appreciate the help.
Thanks you.
Create a list of colours with as many colours as you have polar bars.
c = ['blue', 'blue', 'blue', 'green', 'blue', 'blue']
bars = ax.bar(
x=angles,
height=heights,
width=width,
color=c,
linewidth=2,
edgecolor="white")

Annotate Percentage of Group within a Seaborn CountPlot

The below code gets the percentage of all collisions. However, I want to get the percentage within a group. E.G. Mid-Block (not related to intersection) has 2 labels, a 1(red) or a 2(green/blue). Currently, the percentages next to those bars are percentages of the whole (bar count / all collisions), but I need to display the percentage within just one y-axis label. E.G. for Mid-block (not related to intersection), bar count / all collisions within mid-block (not related to intersection). I do not know how to do this, so if someone could point me in the right direction or give me some code that I could study to understand, I'd be very grateful.
Thank you so much for your time.
plt.style.use('ggplot')
plt.figure(figsize = (20, 15))
ax = sb.countplot(y = "JUNCTIONTYPE", hue = "SEVERITYCODE", data = dfm)
plt.title('Number of Persons vs. Number of Collisions by Severity', fontsize = 30)
plt.xlabel('Number of Collisions', fontsize = 24)
plt.ylabel('Number of Persons', fontsize = 24)
plt.tick_params(labelsize=18);
plt.legend(fontsize = 18, title = "Severity", loc = 'lower right')
plt.text(5, 6, "Figure 8: Number of persons plotted against the number of collisions grouped by severity", fontsize = 16)
# labels = [item.get_text() for item in ax.get_yticklabels()]
# labels[0] = 'No'
# labels[1] = 'Yes'
# ax.set_yticklabels(labels)
for p in ax.patches:
width = p.get_width()
height = p.get_height()
x, y = p.get_xy()
ax.annotate(int(width),
((x + width), y),
xytext = (30, -25),
fontsize = 18,
color = '#000000',
textcoords = 'offset points',
ha = 'right',
va = 'center')
for p in ax.patches:
width = p.get_width()
height = p.get_height()
x, y = p.get_xy()
totals = []
for i in ax.patches:
totals.append(i.get_width())
total = sum(totals)
ax.text(width + 0.3, y + 0.38,
str(
round((width/total) * 100, 2))
+ '%',
fontsize=18)
You could pre-calculate the per-group percentage points and use the order in which seaborn / matplotlib draws the bars to reference them.
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
titanic = sns.load_dataset('titanic')
# prepare the dataset
df = (titanic
.groupby(["embark_town", "survived"])
.size()
.reset_index()
.replace({"survived": {0:"no", 1:"yes"}})
.rename(columns={0:"count"}))
# calculate survival % per town of embarkation
df["percent"] = (df
.groupby("embark_town")
.apply(lambda x: x["count"] / x["count"].sum()).values)
# sort the dataframe to match the drawing order
df.sort_values(by=["survived", "embark_town"], inplace=True)
# visualisation
plt.style.use('ggplot')
fig = sns.catplot(
x="count", y="embark_town", hue="survived",
kind="bar", data=df, height=4, aspect=2)
for i, bar in enumerate(fig.ax.patches):
height = bar.get_height()
fig.ax.annotate(
# reference the pre-calculated row in the dataframe
f"{df.iloc[i, 3] :.0%}",
xycoords="data",
xytext=(20, -15),
textcoords="offset points",
xy=(bar.get_width(), bar.get_y()),
ha='center', va='center')
# make space for annonations
plt.margins(x=0.2)
plt.show()

Seaborn boxplot whiskers are missing from just one category

I've been using the Seaborn library to plot box plots. In one instance, I see that the whiskers are randomly missing from one of my categorical variables.
fig, ax = plt.subplots(1, 1, figsize = (60,30))
plt.axhline(y = 0, color = 'k', linestyle = ':', linewidth = 2)
ax = sns.boxplot(x = 'Neighborhood', y = 'Price difference', data = sf_data_residuals_neighborhoods,
showfliers = False, order = list(neighborhood_order_residuals['Neighborhood']), linewidth = 5)
ax = sns.stripplot(x = 'Neighborhood', y = 'Price difference', data = sf_data_residuals_neighborhoods,
order = list(neighborhood_order_residuals['Neighborhood']), jitter = 0.25, size = 15,
linewidth = 3, edgecolor = 'black', alpha = 0.5)
# set axis properties
plt.xticks(rotation=45, fontname = 'Helvetica', fontsize = 42, ha = 'right')
plt.yticks(fontname = 'Helvetica', fontsize = 42)
plt.xlabel('San Francisco neighborhood', fontsize = 55, fontname = 'Arial', fontweight = 'bold')
plt.ylabel('Actual - predicted price ($M)', fontsize = 55, fontname = 'Arial',
fontweight = 'bold')
scale = 1000000; ax.set_ylim(-1000000, 3000000); ax.yaxis.labelpad = 25
ticks = ticker.FuncFormatter(lambda y, pos: '{0:g}'.format(y/scale))
ax.xaxis.set_tick_params(width = 3, length = 15)
ax.yaxis.set_tick_params(width = 3, length = 15)
ax.yaxis.set_major_formatter(ticks)
plt.setp(ax.spines.values(), linewidth = 3)
This produces the desired plot, but appears to leave out whiskers for the Potrero Hill category:
I've tried manually stipulating the default whis = 1.5 setting in sns.boxplot() but this does not make the missing whiskers appear.
Any idea what might be causing this?

Scaling a PDF to show 100% at peak

I'm displaying a histogram of my data, with an overlaid PDF. My plots all look something like this:
and I'm trying to scale the red curve to show 100% at the peak.
My following toy code is identical to what I'm actually using, apart from the lines in between the two %:
%
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as stats
import numpy as np
my_randoms = np.random.normal(0.5, 1, 50000)
dictOne = {"delta z":my_randoms}
df = pd.DataFrame(dictOne)
df = df[df['delta z'] > -999]
%
fig, ax = plt.subplots()
h, edges, _ = ax.hist(df['delta z'], alpha = 1, density = False, bins = 100)
param = stats.norm.fit(df['delta z'].dropna()) # Fit a normal distribution to the data
pdf_fitted = stats.norm.pdf(df['delta z'], *param)
x = np.linspace(*df['delta z'].agg([min, max]), 100) # x-values
binwidth = np.diff(edges).mean()
ax.plot(x, stats.norm.pdf(x, *param)*h.sum()*binwidth, color = 'r')
# Decorations
graph_title = 'U-B'
plt.grid(which = 'both')
plt.title(r'$\Delta z$ distribution for %s'%graph_title, fontsize = 25)
plt.xlabel(r'$\Delta z = z_{spec} - z_{photo}$', fontsize = 25)
plt.ylabel('Number', fontsize = 25)
plt.xticks(fontsize = 25)
plt.yticks(fontsize = 25)
xmin, xmax = min(df['delta z']), max(df['delta z'])
plt.xlim(xmin, xmax)
plt.annotate(
r'''$\mu_{\Delta z}$ = %.3f
$\sigma_{\Delta z}$ = %.3f'''%(param[0], param[1]),
fontsize = 25, color = 'r', xy=(0.85, 0.85), xycoords='axes fraction')
How would I define another axes object from 0 to 100 on the right-hand side and map the PDF to that?
Or is there a better way to do it?
This is kind of a follow-up to my previous question.
You can use density=True in plotting the histogram.
You use .twinx():
fig = plt.figure(figsize=(10, 8), dpi=72.0)
n_rows = 2
n_cols = 2
ax1 = fig.add_subplot(n_rows, n_cols, 1)
ax2 = fig.add_subplot(n_rows, n_cols, 2)
ax3 = ax1.twinx()
https://matplotlib.org/gallery/api/two_scales.html

Resources