plotting parallel events in python - python-3.x

I have 5 types of events: A,B,C,D,E
And a timeline between 0 and 100
These events can "start" and "end" any number of times.
To make it easier, that data is built as follows:
A = [(0,3), (50,58)]
B = [(40,60)]
...
(lists, with 2-tuples representing start and end time)
I want to plot then as vertical bars with the label A-E are the y axis.
I think it's like a regular vertical bar, except there can be multiple bars (matching each start-end)
Something like this:
Thx!

I think using LineCollection should be a good idea. See code below.
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.collections import LineCollection
A = [(0,3), (50,58)]
B = [(40,60)]
def plot_event(ax, A, i, **kwargs):
segs = [[(x, i), (y, i)] for (x, y) in A]
line_segments = LineCollection(segs, lw=10, **kwargs)
ax.add_collection(line_segments)
colors = ['r', 'g', 'b', 'k', 'crimson']
fig, ax = plt.subplots(1, 1, figsize=(7.2, 7.2))
for j, (i, event) in enumerate(zip([10, 30, 50, 70, 90], [A, B, A, B, A])):
plot_event(ax, event, i, color=colors[j])
ax.axhline(i, lw=2, color='gray', alpha=0.8, zorder=-1)
ax.set_xlim(0, 100)
ax.set_ylim(0, 100)

Related

How do I mask two different ranges of values in Seaborn

So I wish to red color the values in heatmap that are between the 2.3e-6-0.05. And I wanted to do that with plotting one heatmap on another. But I can't seem to find a way to mask numbers of different values. Here is my try.
from scipy.stats import pearsonr
N = 10
data = np.random.uniform(0, 45, size=(N, N))
for x, y in np.random.randint(0, N, 50).reshape(-1, 2):
data[x, y] = np.nan # fill in some nans at random places
df = pd.DataFrame(data)
def pearsonr_pval(x,y):
return pearsonr(x,y)[1]
data = df.loc[:, (df != 0).any(axis=0)]
data = data.iloc[:,3:50]
to_log = data.columns
df_log = data[to_log].applymap(lambda x: np.log(x+1))
X = df_log.corr(method = pearsonr_pval)
sns.set_style("darkgrid")
mask = np.zeros_like(X)
mask[np.triu_indices_from(mask)] = True
with sns.axes_style("white"):
f, ax = plt.subplots(figsize=(20, 20))
ax = sns.heatmap(X,
mask=mask,
vmax=1,
vmin=0,
square=True,
cmap="YlGnBu",
annot_kws={"size": 1})
ax = sns.heatmap(X,
mask=(X.values<2.3e-6) & (0.05<X.values) & mask.astype(bool),
vmax=1,
vmin=0,
square=True,
cmap="rocket",
annot_kws={"size": 1})
But I get an error: TypeError: ufunc 'bitwise_and' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe'
Edit with the code above I get :
As explained in this answer, for element-wise Boolean comparisons in Pandas you need to use & and |, and to enclose each condition in parentheses. So to combine your three conditions, you would need
mask=(X<2.3e-6) | (0.05<X) | mask.astype(bool),

Is there a way to add a colorbar to the top or side of the whole figure? [duplicate]

I cannot for the life of me figure out how to attach one colorbar for multiple pandas subplots. Almost all the other questions that resolve the issue of putting one colorbar for multiple subplots use np arrays, not dataframes, to plot.
There is one question, One colorbar for seaborn heatmaps in subplot, which seems like it could be useful, however I could not figure out how to extend it to my case.
Could anyone help? Below is an example of my current code. Thanks in advance!
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# If you're using a notebook:
# %matplotlib inline
df = pd.DataFrame({"BASE": np.random.randn(10),
"A": np.random.randn(10),
"B": np.random.randn(10),
"C": np.random.randn(10),
"D": np.random.randn(10),
"color_col": [1,1,2,2,1,1,2,1,2,2]})
plt.figure(1, figsize = (15,15))
plt.subplot(2,2,1)
df.plot.scatter(x = "BASE", y = "A", c = df["color_col"], ax = plt.gca())
plt.subplot(2,2,2)
df.plot.scatter(x = "BASE", y = "B", c = df["color_col"], ax = plt.gca())
plt.subplot(2,2,3)
df.plot.scatter(x = "BASE", y = "C", c = df["color_col"], ax = plt.gca())
plt.subplot(2,2,4)
df.plot.scatter(x = "BASE", y = "D", c = df["color_col"], ax = plt.gca())
The question Matplotlib 2 Subplots, 1 Colorbar is probably more what you are looking for. The problem is however that you do not directly have access to the mappable that is created in the pandas scatter plot.
The solution here would be to distill this mappable (in this case a PatchCollection) from the axes using it plt.gca().get_children()[0], which takes the first child artist from the axes.
This method is save as long as all scatterplots share the same colors and
as long as there are no other artists in the axes.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
df = pd.DataFrame({"BASE": np.random.randn(10),
"A": np.random.randn(10),
"B": np.random.randn(10),
"C": np.random.randn(10),
"D": np.random.randn(10),
"color_col": np.random.randn(10)})
fig = plt.figure(1, figsize = (6,6))
plt.subplots_adjust(wspace=0.5, right=0.8, top=0.9, bottom=0.1)
for i, col in enumerate("ABCD"):
plt.subplot(2,2,i+1)
df.plot.scatter(x = "BASE", y = col, ax = plt.gca(),
c = df["color_col"], cmap="jet", colorbar=False)
# we now take the first axes and
# create a colorbar for it's first child (the PathCollection from scatter)
# this is save as long as all scatterplots share the same colors and
# as long as there are no other artists in the axes.
im = plt.gca().get_children()[0]
cax = fig.add_axes([0.85,0.1,0.03,0.8])
fig.colorbar(im, cax=cax)
plt.show()
Here're a few stones at the target. I'm pretty sure at least one more method exists (using GridSpec differently than outlined below) but I like the latter two.
1. You could simply choose when to plot a color bar vs not, as long as you don't care if one of your subplots is trimmed to make room for the colorbar.
df = pd.DataFrame({"BASE": np.random.randn(10),
"A": np.random.randn(10),
"B": np.random.randn(10),
"C": np.random.randn(10),
"D": np.random.randn(10),
"color_col": np.random.randn(10)})
fig,axis = plt.subplots(nrows=1,ncols=4,figsize=(18,6))
for i, col in enumerate("ABCD"):
ax = axis[i]
df.plot.scatter(ax=ax, x = "BASE", y = col, c = df["color_col"], s = 55, cmap="jet", colorbar=(i==len("ABCD")-1))
fig.tight_layout()
plt.show()
2. If you really wanted 2X2 grid, and allsubplots must be same size, you could try this. I like the first answer, sans having to place color bar at a specific location. I'd rather draw everything I want, use existing tight_layout() functionality and then add a colorbar. The only magic number I retained is figsize.
fig,axes = plt.subplots(2,2, figsize = (9,4))
df = pd.DataFrame({"BASE": np.random.randn(10),
"A": np.random.randn(10),
"B": np.random.randn(10),
"C": np.random.randn(10),
"D": np.random.randn(10),
"color_col": np.random.randn(10)})
for i, col in enumerate("ABCD"):
ax = axes[i/2][i%2]
im = df.plot.scatter(x = "BASE", y = col, ax = ax, s = 35,
c = df["color_col"], cmap="jet", colorbar=False)
fig.tight_layout()
# Note: since colorbar is manu`enter code here`ally added, tight_layout must be called before
# rendering colorbar
plt.colorbar(plt.gca().get_children()[0], ax=axes.ravel().tolist())
3. Yet another approach would be to use gridspec as an argument to suplots constructor:
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.gridspec as gridspec
df = pd.DataFrame({"BASE": np.random.randn(10),
"A": np.random.randn(10),
"B": np.random.randn(10),
"C": np.random.randn(10),
"D": np.random.randn(10),
"color_col": np.random.randn(10)})
nrows,ncols = 2,2
fig, axes = plt.subplots(nrows=nrows, ncols=ncols, sharex='col', sharey=False,
gridspec_kw={'width_ratios': [1, 1,0.1]},
figsize=(7, 4))
for i, col in enumerate("ABCD"):
ax = axes[i/(ncols)][i%(ncols)]
im = df.plot.scatter(x = "BASE", y = col, ax = ax, s = 35
,c = df["color_col"], cmap="jet", colorbar=False)
fig.tight_layout()
cax,kw = mpl.colorbar.make_axes([ax for ax in axes.flat])
plt.colorbar(axes[0][0].get_children()[0], cax=cax, **kw)
plt.show()

How can I annotate a Grouped Broken Barh Chart Python Matplotlib

I have searched to exhaustion trying to annotate my grouped broken barH chart. I would like to have the "Event" from my dataframe annotated in each broken bar section. The examples I have found online manually enter the events x,y positions, AND, are not grouped broken bar examples.
the end goal is to have these events display on-hover, but I believe I wont have an issue if I can just get the events to display.
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import datetime
import matplotlib.ticker as ticker
import io
pd.plotting.register_matplotlib_converters()
inp = u""" T29,11/4/2019 0:00,11/4/2019 0:00,off,none
T29,11/4/2019 0:00,11/5/2019 0:00,off,eventa
T29,11/5/2019 0:00,11/6/2019 0:00,on,none
T35,11/4/2019 0:00,11/5/2019 0:00,off,eventb
T35,11/5/2019 0:00,11/6/2019 0:00,paused,eventa
T43,11/4/2019 0:00,11/4/2019 4:01,on,none
T43,11/4/2019 4:01,11/4/2019 12:06,off,none
T43,11/4/2019 12:06,11/5/2019 8:07,on,eventc
T43,11/5/2019 8:07,11/5/2019 10:12,paused,eventd
T43,11/5/2019 10:12,11/5/2019 16:15,on,none
T43,11/5/2019 18:12,11/5/2019 20:15,off,none
"""
mydateparser = lambda x: pd.datetime.strptime(x, "%m/%d/%Y %H:%M")
df = pd.read_csv(io.StringIO(inp), header=0, encoding = "ISO-8859-1", parse_dates=['StartTime', 'FinishTime'], date_parser=mydateparser, names=["Name", "StartTime", "FinishTime", "Status", "Event"])
color = {"on": "g", "paused": "yellow", "off": "black"}
df["Diff"] = df.FinishTime - df.StartTime
minDate = (datetime.datetime.toordinal(min(df.StartTime)))
maxDate = (datetime.datetime.toordinal(max(df.FinishTime)))
days = mdates.DayLocator()
Mcount = 0
fig, ax = plt.subplots(figsize=(6, 3), edgecolor="black", linewidth=1)
labels = []
for i, task in enumerate(df.groupby("Name")):
Mcount += 1
labels.append(task[0])
for r in task[1].groupby("Status"):
data = r[1][["StartTime", "Diff"]]
ax.broken_barh(data.values, (i - 0.4, 0.8), edgecolor="black", alpha=1, linewidth=1,
color=color[r[0]])
ax.set_ylim(bottom=-0.8, top=Mcount)
ax.set_yticks(range(len(labels)))
ax.set_yticklabels(labels)
ax.set_ylabel("Names", rotation=90, fontdict={'family': 'DejaVu Sans', 'color': 'black', 'weight': 'bold', 'size': 14})
ax.set_xlim(left=minDate, right=maxDate)
ax.set_xlabel("Date", fontdict={'family': 'DejaVu Sans', 'color': 'black', 'weight': 'bold', 'size': 14})
ax.xaxis.set_major_formatter(mdates.DateFormatter('%m-%d-%Y'))
ax.tick_params(which='major', axis='x', rotation=0, length=11, color='black')
ax.xaxis.set_major_locator(days)
ax.xaxis.set_minor_formatter(mdates.DateFormatter('%H:%M'))
ax.tick_params(which='minor', rotation=0, labelsize=8, length=4, color='red', size=2)
ax.xaxis.set_minor_locator(ticker.MultipleLocator(.50))
plt.show()
Hello and welcome to StackOverflow. IIUC, you can append a for loop to your enumerate statement to add text to the axes.
for i, task in enumerate(df.groupby("Name")):
Mcount += 1
labels.append(task[0])
for r in task[1].groupby("Status"):
data = r[1][["StartTime", "Diff"]]
ax.broken_barh(data.values,
(i - 0.4, 0.8),
edgecolor="black",
alpha=1,
linewidth=1,
color=color[r[0]]
)
for x1, x2 in data.values:
ax.text(x=x1 + x2/2,
y=i,
s=r[1]["Event"].values[0],
ha='center',
va='center',
color='white',
)
Modified from the docs.
Output:
You can, of course, modify the text formatting.
The text requires an x location, a y location, and a string. The hacky indexing was the quickest way I could pull the event info out of your dataframe.

Matplotlib Query: Connecting max value to corresponding Y point

I have the plot above done out for a project I am currently working on. I am relatively new to matplotlib and want to ask would there be any way to connect the max point of each line to the y axis along the lines of something like this (except straight and not as poorly done :) ):
example addressing the foreground issue of the hlines mentioned in the comments here.
plots visualizing the discussed variants:
created with this code:
data = [.82, .72, .6, .5, .45]
col = ['k', 'b', 'm', 'g', 'r']
fig, axs = plt.subplots(1, 3, figsize=(12, 4))
axs[0].set_title('Problem: hlines in foreround')
for d, c in zip(data, col):
axs[0].plot([0, d], c, lw=10)
for d in data:
axs[0].axhline(d, lw=5)
axs[1].set_title('Solution 1: zorder=0 for hlines')
for d, c in zip(data, col):
axs[1].plot([0, d], c, lw=10)
for d in data:
axs[1].axhline(d, lw=5, zorder=0)
axs[2].set_title('Solution 2: plot hlines first')
for d in data:
axs[2].axhline(d, lw=5)
for d, c in zip(data, col):
axs[2].plot([0, d], c, lw=10)
plt.tight_layout()
So I found the following code allows me to draw these lines:
plt.axhline(y=0.8462, color='r', linestyle='--')
Which produces:
I can just repeat this for the other max values of y.

multicolor grapf in networkx

I have no way how to not make each dictionary multi-colored. I try to do this way, but it turns out all in one color. What am I missing?
import networkx as nx
import matplotlib.pyplot as plt
uber_dict={'y': {('VD3', 'VD5'): 0, ('VD3', 'VD8'): 0}, 'blue': {('R1', 'R3'): 1, ('R1', 'R2'): 1, ('R1', 'R4'): 1}, 'green': {('VD1', 'VD2'): 0, ('VD1', 'VD7'): 0, ('VD1', 'VD6'): 0}, 'red': {('DD2', 'DD3'): 4, ('DD2', 'VD4'): 1, ('DD2', 'DD1'): 5}}
g = nx.Graph()
for cvet, slovar in uber_dict.items():
for e, p in slovar.items():
g.add_edge(*e, weight=p)
pos = nx.circular_layout(g)
edge_labels = {(u, v): d['weight'] for u, v, d in g.edges(data=True)}
nx.draw_networkx_nodes(g, pos, node_size=600, node_color=cvet)
nx.draw_networkx_edges(g, pos)
nx.draw_networkx_labels(g, pos)
nx.draw_networkx_edge_labels(g, pos, edge_labels=edge_labels, font_color='r')
plt.title("Входная схема")
plt.axis('off')
# plt.show()
plt.savefig('output.png')
The value of cvet in this line:
nx.draw_networkx_nodes(g, pos, node_size=600, node_color=cvet)
is whatever it was the last time through the for loop. So you're telling it to draw the graph with all nodes having whatever color that happened to be.
to fix the problem you can create a list of nodes and pass that in to the drawing command as nodelist, and also a list of colors (rather than the single value cvet) such that node nodelist[i] should have color colorlist[i].

Resources