Custom annotation of text in seaborn heatmap - python-3.x

I want to assign different fontsizes for positive and negative values in the following heatmap plotted using seaborn.
import seaborn as sns # % matplotlib inline
import matplotlib.pyplot as plt
data = np.array([[0.000000, 0.000000], [-0.231049, 0.000000], [0.231049, 0.000000]])
data = {0: [0.000000, 0.000000], 1: [2.31049, 0.000000], 2: [-0.231049, 0.000000]}
df = pd.DataFrame.from_dict(data, orient='index')
sns.heatmap(
df, cmap='bwr', vmax=10, vmin=0, annot=True, fmt='f',
linewidths=0.25, annot_kws={"fontsize": 16}, center=0, square=True
)
sns.heatmap(
df, cmap='bwr', vmax=0, vmin=-10, annot=True, fmt='f',
linewidths=0.25, annot_kws={"fontsize": 6}, center=0, square=True
)
plt.show()
I tried to specify the min and max and plot, in two steps but the colors and fonts aren't-displayed right.
Suggestions on how to fix this will be of great help.

To make it easier to keep the properties in sync, the code below uses a for loop. For the positive part, the dataframe is filtered to only contain the positive values. (Internally, pandas fills in NaN for the values that get filtered away, and seaborn leaves those cells blank.)
vmin and vmax are set to the same values for both the negative and positive part of the loop. That way, the colorbar will show all values. To avoid drawing the colorbar twice, cbar=False once.
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
df = pd.DataFrame(np.random.randint(-10, 11, (12, 12)))
fig, ax = plt.subplots()
for posneg in ['pos', 'neg']:
sns.heatmap(
df[df > 0] if posneg == 'pos' else df[df < 0],
cmap='bwr', vmin=-10, vmax=10, center=0, annot=True, fmt='.0f',
annot_kws={"fontsize": 16 if posneg == 'pos' else 8},
cbar=(posneg != 'pos'), cbar_kws={'ticks': range(-10, 11, 2)},
linewidths=0.25, square=True, ax=ax
)
plt.show()
PS: The code above uses if/else inside some of the arguments. Such a conditional expression can be handy when only something short is involved, or in a list comprehension.
An alternative would be to use a normal if test together with variables, e.g.:
for posneg in ['pos', 'neg']:
if posneg == 'pos':
df_filtered = df[df > 0]
fontsize = 16
fontweight = 'bold'
else:
df_filtered = df[df < 0]
fontsize = 12
fontweight = 'light'
sns.heatmap(
df_filtered,
cmap='bwr', vmin=-10, vmax=10, center=0, annot=True, fmt='.0f',
annot_kws={"fontweight": fontweight, "fontsize": fontsize},
cbar=(posneg != 'pos'), cbar_kws={'ticks': range(-10, 11, 2)},
linewidths=0.25, square=True, ax=ax
)

Related

Color Matplotlib Histogram Subplots by a Categorical Variable

I am trying to create histogram subplots whose values I want to color by a second, categorical variable.
A small subset of the data is below
data = {'ift': [0.031967, 0.067416, 0.091275, 0.046852, 0.100406],
'ine': [0.078384, 0.09554, 0.234695, 0.182821, 0.190237],
'ift_out': [1, 1, 0, 1, 0],
'ine_out': [1, 1, 0, 0, 1]}
xyz = pd.DataFrame(data)
xyz
My initial stab at it is also below. A bit stumped on the inclusion of the categorical columns as colors
fig, axs = plt.subplots(nrows=2, ncols=1, sharey=True, tight_layout=True)
axs[0].hist(xyz['ift']) # color = xyz['ift_out']
axs[1].hist(xyz['ine']) # color = xyz['ine_out']
plt.show()
Sample output is attached below
Following #JohanC's answer, I made the some changes to my original code as shown below, and that worked they way I wanted
import matplotlib.pyplot as plt
import seaborn as sns
sns.color_palette("tab10")
sns.set(style="darkgrid")
fig, axs = plt.subplots(nrows=1, ncols=2, tight_layout=True)
g = sns.histplot(data=xyz, x='ift',
hue='ift_out', palette=['skyblue','tomato'], multiple='stack', ax=axs[0])
g = sns.histplot(data=xyz, x='ine',
hue='ine_out', palette=['skyblue','tomato'], multiple='stack', ax=axs[1])

How to visualize a list of strings on a colorbar in matplotlib

I have a dataset like
x = 3,4,6,77,3
y = 8,5,2,5,5
labels = "null","exit","power","smile","null"
Then I use
from matplotlib import pyplot as plt
plt.scatter(x,y)
colorbar = plt.colorbar(labels)
plt.show()
to make a scatter plot, but cannot make colorbar showing labels as its colors.
How to get this?
I'm not sure, if it's a good idea to do that for scatter plots in general (you have the same description for different data points, maybe just use some legend here?), but I guess a specific solution to what you have in mind, might be the following:
from matplotlib import pyplot as plt
# Data
x = [3, 4, 6, 77, 3]
y = [8, 5, 2, 5, 5]
labels = ('null', 'exit', 'power', 'smile', 'null')
# Customize colormap and scatter plot
cm = plt.cm.get_cmap('hsv')
sc = plt.scatter(x, y, c=range(5), cmap=cm)
cbar = plt.colorbar(sc, ticks=range(5))
cbar.ax.set_yticklabels(labels)
plt.show()
This will result in such an output:
The code combines this Matplotlib demo and this SO answer.
Hope that helps!
EDIT: Incorporating the comments, I can only think of some kind of label color dictionary, generating a custom colormap from the colors, and before plotting explicitly grabbing the proper color indices from the labels.
Here's the updated code (I added some additional colors and data points to check scalability):
from matplotlib import pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
import numpy as np
# Color information; create custom colormap
label_color_dict = {'null': '#FF0000',
'exit': '#00FF00',
'power': '#0000FF',
'smile': '#FF00FF',
'addon': '#AAAAAA',
'addon2': '#444444'}
all_labels = list(label_color_dict.keys())
all_colors = list(label_color_dict.values())
n_colors = len(all_colors)
cm = LinearSegmentedColormap.from_list('custom_colormap', all_colors, N=n_colors)
# Data
x = [3, 4, 6, 77, 3, 10, 40]
y = [8, 5, 2, 5, 5, 4, 7]
labels = ('null', 'exit', 'power', 'smile', 'null', 'addon', 'addon2')
# Get indices from color list for given labels
color_idx = [all_colors.index(label_color_dict[label]) for label in labels]
# Customize colorbar and plot
sc = plt.scatter(x, y, c=color_idx, cmap=cm)
c_ticks = np.arange(n_colors) * (n_colors / (n_colors + 1)) + (2 / n_colors)
cbar = plt.colorbar(sc, ticks=c_ticks)
cbar.ax.set_yticklabels(all_labels)
plt.show()
And, the new output:
Finding the correct middle point of each color segment is (still) not good, but I'll leave this optimization to you.

How to plot a line representing a value from a dataframe with two geometry columns?

I have the following data:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
import matplotlib.pyplot as plt
points = gpd.GeoDataFrame([['A', Point(1.5, 1.75), Point(2, 2), 16],
['B', Point(3.0,2.0), Point(3, 4), 18],
['C', Point(2.5,1.25), Point(1, 1), 19]],
columns=['id', 'geometry', 'geometry b', 'value'],
geometry='geometry')
the first geometry column represents the starting point, the second the ending point of a line which has a value corresponding to the value column.
I have tried to plot this with:
f, ax = plt.subplots(1, figsize = [12, 12])
points.plot(ax=ax, column = 'value')
However it just plots the first geometry column and colours the points corresponding to their value.
How do I produce a plot that draws the lines, colour coded to their value?
This working code and its output plot demonstrate how you can achieve what you need. See comments in the code for more details.
import geopandas as gpd
from shapely.geometry import Point, LineString
import matplotlib.pyplot as plt
# handle all points and create relating lines
pA1 = Point(1.5, 1.75)
pA2 = Point(2, 2)
line_A = LineString([[pA1.x, pA1.y], [pA2.x, pA2.y]])
pB1 = Point(3.0, 2.0)
pB2 = Point(3, 4)
line_B = LineString([[pB1.x, pB1.y], [pB2.x, pB2.y]])
pC1 = Point(2.5, 1.25)
pC2 = Point(1, 1)
line_C = LineString([[pC1.x, pC1.y], [pC2.x, pC2.y]])
# create a geodataframe,
# assigning the column containing `LineString` as its geometry
pts_and_lines = gpd.GeoDataFrame([['A', pA1, pA2, 16, line_A],
['B', pB1, pB2, 18, line_B],
['C', pC1, pC2, 19, line_C]],
columns=['id', 'beg_pt', 'end_pt', 'value', 'LineString_obj'],
geometry='LineString_obj') # declare LineString (last column) as the `geometry`
# make a plot of the geodataframe obtained
f, ax = plt.subplots(1, figsize = [4, 4])
pts_and_lines.plot(ax=ax, column = 'value');
plt.show()
The output plot:
If you prefer to build a dataframe containing from_point and to_point first, then append new column containing LineString creating from the existing points, here is an alternative code.
import geopandas as gpd
from shapely.geometry import Point, LineString
import matplotlib.pyplot as plt
# this dataframe `points_df` contains from_point, to_point for creating `lineString`.
points_df = gpd.GeoDataFrame([['A', Point(1.5, 1.75), Point(2, 2), 16],
['B', Point(3.0,2.0), Point(3, 4), 18],
['C', Point(2.5,1.25), Point(1, 1), 19]],
columns=['id', 'geometry_a', 'geometry_b', 'value'])
# add new column, `line` to the dataframe,
# this column contains `LineString` geometry.
points_df['line'] = points_df.apply(lambda x: LineString([x['geometry_a'], x['geometry_b']]), axis=1)
# assign geometry to `points_df` using the column that has `LineString` geometry
# take the result as `target_gdf`
# `target_gdf` is now capable of plotting with matplotlib
target_gdf = gpd.GeoDataFrame(points_df, geometry=points_df['line'])
f, ax = plt.subplots(1, figsize = [4, 4])
target_gdf.plot(ax=ax, column = 'value');
plt.show()
Its output plot is the same as the previous one.

Masking annotations in seaborn heatmap

I would like to make a heatmap that has annotation only in specific cells. I though one way to do this would be to make a heatmap with annotations in all cells and then overlay another heatmap that has no annotation but that is masked in the regions that I want the original annotations to be visible:
import numpy as np
import seaborn as sns
par_corr_p = np.array([[1, 2], [3, 4]])
masked_array = np.ma.array(par_corr_p, mask=par_corr_p<2)
fig, ax = plt.subplots()
sns.heatmap(par_corr_p, ax=ax, cmap ='RdBu_r', annot = par_corr_p, center=0, vmin=-5, vmax=5)
sns.heatmap(par_corr_p, mask = masked_array.mask, ax=ax, cmap ='RdBu_r', center=0, vmin=-5, vmax=5)
However, this is not working - the second heatmap is not covering up the first one:
Please advise
I tried a few things, including using numpy.nan or "" in the annot array. Unfortunately they don't work.
This is probably the easiest way. It involves grabbing the texts of the axes, which should only be the labels in annot which sns.heatmap puts there.
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
par_corr_p = np.array([[1, 2], [3, 4]])
data = par_corr_p
show_annot_array = data >= 2
fig, ax = plt.subplots()
sns.heatmap(
ax=ax,
data=data,
annot=data,
cmap ='RdBu_r', center=0, vmin=-5, vmax=5
)
for text, show_annot in zip(ax.texts, (element for row in show_annot_array for element in row)):
text.set_visible(show_annot)
plt.show()

How to plot vertical lines in plotly offline?

How would one plot a vertical line in plotly offline, using python? I want to add lines at x=20, x=40, and x=60, all in the same plot.
def graph_contracts(self):
trace1 = go.Scatter(
x=np.array(range(len(all_prices))),
y=np.array(all_prices), mode='markers', marker=dict(size=10, color='rgba(152, 0, 0, .8)'))
data = [trace1]
layout = go.Layout(title='Market Contracts by Period',
xaxis=dict(title='Contract #',
titlefont=dict(family='Courier New, monospace', size=18, color='#7f7f7f')),
yaxis=dict(title='Prices ($)',
titlefont=dict(family='Courier New, monospace', size=18, color='#7f7f7f')))
fig = go.Figure(data=data, layout=layout)
py.offline.plot(fig)
You can add lines via shape in layout, e.g.
import plotly
plotly.offline.init_notebook_mode()
import random
x=[i for i in range(100)]
trace = plotly.graph_objs.Scatter(x=x,
y=[random.random() for _ in x],
mode='markers')
shapes = list()
for i in (20, 40, 60):
shapes.append({'type': 'line',
'xref': 'x',
'yref': 'y',
'x0': i,
'y0': 0,
'x1': i,
'y1': 1})
layout = plotly.graph_objs.Layout(shapes=shapes)
fig = plotly.graph_objs.Figure(data=[trace],
layout=layout)
plotly.offline.plot(fig)
would give you
This is my example. The most important instruction is this.
fig.add_trace(go.Scatter(x=[12, 12], y=[-300,300], mode="lines", name="SIGNAL"))
The most important attribute is MODE='LINES'.
Actually this example is about a segment with x=12
EXAMPLE
import pandas as pd
import plotly.graph_objects as go
import matplotlib.pyplot as plt
import numpy as np
import plotly.tools as tls
df1 = pd.read_csv('./jnjw_f8.csv')
layout = go.Layout(
xaxis = go.layout.XAxis(
tickmode = 'linear',
tick0 = 1,
dtick = 3
),
yaxis = go.layout.YAxis(
tickmode = 'linear',
tick0 = -100,
dtick = 3
))
fig = go.Figure(layout = layout)
fig.add_trace(go.Scatter(x = df1['x'], y =
df1['y1'],name='JNJW_sqrt'))
fig.add_trace(go.Scatter(x=[12, 12], y=[-300,300],
mode="lines", name="SIGNAL"))
fig.show()
Look here too.
how to plot a vertical line with plotly
A feature for vertical and horizontal lines is implemented with Plotly.py 4.12 (released 11/20). It works for plotly express and graph objects. See here: https://community.plotly.com/t/announcing-plotly-py-4-12-horizontal-and-vertical-lines-and-rectangles/46783
Simple example:
import plotly.express as px
df = px.data.stocks(indexed=True)
fig = px.line(df)
fig.add_vline(x='2018-09-24')
fig.show()
fig.add_vline(x=2.5, line_width=3, line_dash="dash", line_color="green")

Resources