Highlight some labels on the x-axis on seaborn barplot - python-3.x

I am using seaborn to plot the heritability of some brain regions. I want to highlight the labels on the x-axis based on the brain regions. So for example, let's say that I have regions that are White matter and regions that are grey matter. I want to highlight the brain regions of the grey matter in red and the white matter regions in blue. How can I do that?
Here is the code that I use:
b = sns.barplot(x="names", y="h2" ,data=df, ax = ax1)
ax1.set_xticklabels(labels= df['names'].values.ravel(),rotation=90,fontsize=5)
ax1.errorbar(x=list(range (0,165)),y=df['h2'], yerr=df['std'], fmt='none', c= 'b')
plt.tight_layout()
plt.title('heritability of regions ')
plt.show()
What should I add to do what I want?
Thanks

You can add a new column to the dataframe and use that as the hue parameter. To change the color of the ticklabels, you can loop through them and use set_color depending on the grey/white column.
import seaborn as sns
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
df = pd.DataFrame({'names': list('abcdefghij'),
'h2': np.random.randint(10, 100, 10),
'grey/white': np.random.choice(['grey', 'white'], 10)})
ax1 = sns.barplot(x='names', y='h2', hue='grey/white', dodge=False, data=df)
ax1.set_xticklabels(labels=df['names'], rotation=90, fontsize=15)
# ax1.errorbar(x=list(range(0, 165)), y=df['h2'], yerr=df['std'], fmt='none', c='b')
for (greywhite, ticklbl) in zip(df['grey/white'], ax1.xaxis.get_ticklabels()):
ticklbl.set_color('red' if greywhite == 'grey' else 'blue')
plt.title('heritability of regions ')
plt.tight_layout()
plt.show()

Related

Seaborn, how to gradient color distplot depending on the x-axis value

I'd like to gradient-color the plot line in the Seaborn's distplot, depending on the x-axis value. For example if the value is 1, then the colour is blue, when 1.1 then it's blue and goes toward green, and so on, and so on. For example like on the plot-draft below:
The problem is, that I don't how to set colour map manually in Seaborn or how to force x-dependend coloring of the plot's curve.
Note that distplot has been deprecated. In the current seaborn version, kdeplot draws a kde curve.
You can grab the generated line with ax.get_lines(). And then create a multicolored line similar to this tutorial example.
Here is some code to demonstrate the idea (currently it would also still work with distplot):
from matplotlib import pyplot as plt
from matplotlib.collections import LineCollection
import seaborn as sns
import numpy as np
np.random.seed(1234)
data = np.random.uniform(-1, 1.1, (5, 1000)).cumsum(axis=1).ravel()
ax = sns.kdeplot(x=data)
x, y = ax.get_lines()[0].get_data()
segments = np.array([x[:-1], y[:-1], x[1:], y[1:]]).T.reshape(-1, 2, 2)
norm = plt.Normalize(x.min(), x.max())
lc = LineCollection(segments, cmap='turbo_r', norm=norm)
lc.set_array(x[:-1])
lc.set_linewidth(2)
ax.get_lines()[0].remove()
line = ax.add_collection(lc)
ax.fill_between(x, y, color='purple', alpha=0.1, hatch='xx')
ax.margins(x=0)
ax.set_ylim(ymin=0)
plt.show()

Is it possible to use matplotlib to include a subheading in legend that isnt a part of the graph?

I am using matplotlib to plot a pie chart. I have added a legend to the chart. However, i would like to add a "Total" to the legend, to sum up the values of all the other categories. Hence the value of "Total" would not be a part of the pie chart, and would only be shown in the legend. Is it possible for me to do that? Thank you.
You can create 2 legends. On the second one, you can create/manipulate symbol/text/title as you want. Here is a runnable code that you can try.
from matplotlib import pyplot as plt
import matplotlib.patches as mpatches
import numpy as np
fig = plt.figure()
ax = fig.add_axes([0,0,1,1])
ax.axis('equal')
langs = ['C', 'C++', 'Java', 'Python', 'PHP']
students = [23,17,35,29,12]
ax.pie(students, labels = langs,autopct='%1.2f%%')
# first legend
lgn = plt.legend()
ax = plt.gca().add_artist(lgn)
# second legend
gold_patch = mpatches.Patch(color='gold', label='Total= 9999') # use your description text here
second_legend = plt.legend(handles=[gold_patch], loc=1, \
bbox_to_anchor=(0.5, 0.35, 0.55, 0.35)) # adjust location of legend here
second_legend.set_frame_on(False) # use True/False as needed
second_legend.set_title("Other categories")
plt.show()
The output plot:

Bar missing while plotting using Matplotlib's Twinx

I'm using matplotlib.axes.Axes.twinx to have a shared x-axis in matplotlib for both . I dont know why instead of 13 bars to be plotted, only 12 of them are getting plotted.
Link of Data set
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
dataFrame=pd.read_csv("NEM.csv",sep=',')
dataFrame['ratio']=dataFrame['Expert']/dataFrame['Novice']
fig, ax1 = plt.subplots(figsize=(9, 6))
ax1.set_title('N-E Analysis')
xticklabels=dataFrame['Task'].tolist()
ax1.plot('Novice', data=dataFrame, marker='', color='dodgerblue', linewidth=2,label='Novice',zorder=100)
ax1.plot('Expert', data=dataFrame, marker='', color='darkorange', linewidth=2,label='Expert',zorder=200)
plt.ylim(0,120)
ax2 = ax1.twinx()
ax2.bar('Task','ratio', data=dataFrame, color='gray',width=0.35,label='NE',zorder=0)
ax1.spines['top'].set_visible(False)
ax1.spines['right'].set_visible(False)
ax1.spines['left'].set_visible(False)
ax2.spines['top'].set_visible(False)
ax2.spines['right'].set_visible(False)
ax2.spines['left'].set_visible(False)
ax1.set_xticklabels(xticklabels, rotation = 45, ha="right")
ax1.yaxis.grid()
ax1.tick_params(left='off',bottom='off')
ax2.tick_params(right='off')
plt.ylim(0,12)
h1, l1 = ax1.get_legend_handles_labels()
h2, l2 = ax2.get_legend_handles_labels()
p=ax1.legend(h2+h1, l2+l1, loc=2,frameon=False)
fig.tight_layout()
plt.show()
When using plots, it could be good practice to say explicitily how many bars or points you are going to plot. For instance, you can create an x-axis this way:
x_axis = np.arange(len(dataFrame[Task].tolist())
then:
ax1.plot(x_axis, dataFrame['Novice'].tolist(), ...)
after that you rename the xticklabels like this:
ax1.set_xticks(x_axis)
ax1.set_xticklabels(dataFrame[Task].tolist())
Do the same with the bar graph:
ax2.bar(x_axis, dataFrame['Ratio'].tolist(), ...)
This should do the trick.
Hope it helps.

Matplotlib bar plot with table formatting

I have added a table to the bottom of my plot, but there are a number of issues with it:
The right has too much padding.
The left has too little padding.
The bottom has no padding.
The cells are too small for the text within them.
The table is too close to the bottom of the plot.
The cells belonging to the row names are not colored to match those of the bars.
I'm going out of my mind fiddling with this. Can someone help me fix these issues?
Here is the code (Python 3):
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import matplotlib
# Set styles
plt.style.use(['seaborn-paper', 'seaborn-whitegrid'])
plt.style.use(['seaborn'])
sns.set(palette='colorblind')
matplotlib.rc("font", family="Times New Roman", size=12)
labels = ['n=1','n=2','n=3','n=4','n=5']
a = [98.8,98.8,98.8,98.8,98.8]
b = [98.6,97.8,97.0,96.2,95.4]
bar_width = 0.20
data = [a,b]
print(data)
colors = plt.cm.BuPu(np.linspace(0, 0.5, len(labels)))
columns = ('n=1', 'n=2', 'n=3', 'n=4', 'n=5')
index = np.arange(len(labels))
plt.bar(index, a, bar_width)
plt.bar(index+bar_width+.02, b, bar_width)
plt.table(cellText=data,
rowLabels=['a', 'b'],
rowColours=colors,
colLabels=columns,
loc='bottom')
plt.subplots_adjust(bottom=0.7)
plt.ylabel('Some y label which effect the bottom padding!')
plt.xticks([])
plt.title('Some title')
plt.show()
This is the output:
Update
This is working now, but in case someone else is having issues: Make sure you are not viewing your plots and the changes you make to them with IntelliJ SciView as it does not represent changes accurately and introduces some formatting issues!
I think you can fix the first problem by setting the bounding box when you make the table using bbox like this:
bbox=[0, 0.225, 1, 0.2]
where the parameters are [left, bottom, width, height].
For the second issue (the coloring), that is because the color array is not corresponding to the seaborn coloring. You can query the seaborn color palette with
sns.color_palette(palette='colorblind')
this will give you a list of the colors seaborn is using.
Check the modifications below:
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import matplotlib
# Set styles
plt.style.use(['seaborn-paper', 'seaborn-whitegrid'])
plt.style.use(['seaborn'])
sns.set(palette='colorblind')
matplotlib.rc("font", family="Times New Roman", size=12)
labels = ['n=1','n=2','n=3','n=4','n=5']
a = [98.8,98.8,98.8,98.8,98.8]
b = [98.6,97.8,97.0,96.2,95.4]
bar_width = 0.20
data = [a,b]
colors = sns.color_palette(palette='colorblind')
columns = ('n=1', 'n=2', 'n=3', 'n=4', 'n=5')
index = np.arange(len(labels))
fig = plt.figure(figsize=(12,9))
plt.bar(index, a, bar_width)
plt.bar(index+bar_width+.02, b, bar_width)
plt.table(cellText=data,
rowLabels=[' a ', ' b '],
rowColours=colors,
colLabels=columns,
loc='bottom',
bbox=[0, 0.225, 1, 0.2])
fig.subplots_adjust(bottom=0.1)
plt.ylabel('Some y label which effect the bottom padding!')
plt.xticks([])
plt.title('Some title')
plt.show()
I also changed the subplot adjustment to subplot_adjust(bottom=0.1) because it wasn't coming out right otherwise. Here is the output:

Seaborn barplot with two y-axis

considering the following pandas DataFrame:
labels values_a values_b values_x values_y
0 date1 1 3 150 170
1 date2 2 6 200 180
It is easy to plot this with Seaborn (see example code below). However, due to the big difference between values_a/values_b and values_x/values_y, the bars for values_a and values_b are not easily visible (actually, the dataset given above is just a sample and in my real dataset the difference is even bigger). Therefore, I would like to use two y-axis, i.e., one y-axis for values_a/values_b and one for values_x/values_y. I tried to use plt.twinx() to get a second axis but unfortunately, the plot shows only two bars for values_x and values_y, even though there are at least two y-axis with the right scaling. :) Do you have an idea how to fix that and get four bars for each label whereas the values_a/values_b bars relate to the left y-axis and the values_x/values_y bars relate to the right y-axis?
Thanks in advance!
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
columns = ["labels", "values_a", "values_b", "values_x", "values_y"]
test_data = pd.DataFrame.from_records([("date1", 1, 3, 150, 170),\
("date2", 2, 6, 200, 180)],\
columns=columns)
# working example but with unreadable values_a and values_b
test_data_melted = pd.melt(test_data, id_vars=columns[0],\
var_name="source", value_name="value_numbers")
g = sns.barplot(x=columns[0], y="value_numbers", hue="source",\
data=test_data_melted)
plt.show()
# values_a and values_b are not displayed
values1_melted = pd.melt(test_data, id_vars=columns[0],\
value_vars=["values_a", "values_b"],\
var_name="source1", value_name="value_numbers1")
values2_melted = pd.melt(test_data, id_vars=columns[0],\
value_vars=["values_x", "values_y"],\
var_name="source2", value_name="value_numbers2")
g1 = sns.barplot(x=columns[0], y="value_numbers1", hue="source1",\
data=values1_melted)
ax2 = plt.twinx()
g2 = sns.barplot(x=columns[0], y="value_numbers2", hue="source2",\
data=values2_melted, ax=ax2)
plt.show()
This is probably best suited for multiple sub-plots, but if you are truly set on a single plot, you can scale the data before plotting, create another axis and then modify the tick values.
Sample Data
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import numpy as np
columns = ["labels", "values_a", "values_b", "values_x", "values_y"]
test_data = pd.DataFrame.from_records([("date1", 1, 3, 150, 170),\
("date2", 2, 6, 200, 180)],\
columns=columns)
test_data_melted = pd.melt(test_data, id_vars=columns[0],\
var_name="source", value_name="value_numbers")
Code:
# Scale the data, just a simple example of how you might determine the scaling
mask = test_data_melted.source.isin(['values_a', 'values_b'])
scale = int(test_data_melted[~mask].value_numbers.mean()
/test_data_melted[mask].value_numbers.mean())
test_data_melted.loc[mask, 'value_numbers'] = test_data_melted.loc[mask, 'value_numbers']*scale
# Plot
fig, ax1 = plt.subplots()
g = sns.barplot(x=columns[0], y="value_numbers", hue="source",\
data=test_data_melted, ax=ax1)
# Create a second y-axis with the scaled ticks
ax1.set_ylabel('X and Y')
ax2 = ax1.twinx()
# Ensure ticks occur at the same positions, then modify labels
ax2.set_ylim(ax1.get_ylim())
ax2.set_yticklabels(np.round(ax1.get_yticks()/scale,1))
ax2.set_ylabel('A and B')
plt.show()

Resources