I started using plotly to draw sankey charts.
I want to display a multi year series with birth and death year by year.
The current code looks like:
`
import plotly.graph_objects as go
fig = go.Figure(data=[go.Sankey(
node = dict(
pad = 15,
thickness = 20,
line = dict(color = "black", width = 0.5),
label = ["v11","v21","v31","v41","out21","out31","out41","in21","in31","in41", "v12", "v22"],
color = "blue"
),
link = dict(
source = [0, 7, 0, 1, 8, 1, 2, 9, 2, 10], #
target = [1, 1, 4, 2, 2, 5, 3, 3, 6,11],
value = [1000, 100, 100, 1000, 150, 50, 1000, 120, 80, 800]
))])
fig.update_layout(title_text="Basic Sankey Diagram", font_size=10)
fig.show()
`
which produces something like:
Sankey
All birth nodes are attached to the left side and the out nodes to the right side.
It should however look like this:
Sankey manually rearranged
Is there a way to pin the x-axis?
Or any d3 example, which does the trick?
Thanks for any hint,
Carl
PS: With the following question I got one step further [https://stackoverflow.com/questions/61152889/plotly-how-to-set-node-positions-in-a-sankey-diagram].
The look like this now:
import plotly.graph_objects as go
unique_list = ["c0_v_2021","c0_v_2022","c0_v_2023","c0_v_2024","c0_o_2022","c0_o_2023","c0_o_2024","c0_i_2022","c0_i_2023","c0_i_2024"]
title_list = ["Vol 2021","Vol 2022","Vol 2023","Vol 2024","Out 2022","Out 2023","Out 2024","In 2022","In 2023","In 2024"]
sources = [0, 7, 0, 1, 8, 1, 2, 9, 2]
targets = [1, 1, 4, 2, 2, 5, 3, 3, 6]
values = [1000, 100, 100, 1000, 150, 50, 1000, 120, 80]
## correction based on type
def my_corr(node_corr, steps):
x_corr = []
y_corr = []
for ftype in node_corr:
xcorr = 0
ycorr = 0
if ftype == 'i':
xcorr = - steps/3
ycorr = -0.2
x_corr.append(xcorr)
y_corr.append(ycorr)
return x_corr , y_corr
def my_nodify (node_names):
# node_names = unique_list.copy()
# unique name endings
## get year
ends = sorted(list(set([e[-4:] for e in node_names])))
## get type
corr = (list(([e[-6] for e in node_names])))
min, max = ends[0], ends[-1]
#intervals
steps = 1/((int(max)-int(min)))
x_corr, y_corr = my_corr(corr, steps)
# x-values for each unique name ending for input as node position
nodes_x = {}
xVal = 0
for e in ends:
nodes_x[str(e)] = xVal
xVal += steps
#x and y values in list form
x_values = [nodes_x[n[-4:]] for n in node_names]
## apply x adjustments
x_values_c = [x_values[i] + x_corr[i] for i in range(len(x_corr))]
y_values = []
y_val = 0
for n in node_names:
y_values.append(y_val)
y_val+=.001
y_values.append(y_val)
## apply y adjustments
y_values_c = [y_values[i] + y_corr[i] for i in range(len(y_corr))]
# y_values_c = y_values
return x_values_c, y_values_c
nodified = my_nodify(unique_list)
# plotly setup
fig = go.Figure(data=[go.Sankey(
arrangement='snap',
# arrangement='perpendicular',
node = dict(
pad = 5,
thickness = 20,
line = dict(color = "black", width = 0.5),
label = title_list,
color = "blue",
x=nodified[0], y=nodified[1]
),
link = dict(
source = sources,
target = targets,
value = values
))])
fig.update_layout(
hovermode = 'x',
title="Some Flow",
font=dict(size = 10, color = 'white'),
plot_bgcolor='black',
paper_bgcolor='black'
)
fig.show()
and produces almost what I want although the In-nodes overlapp and the sorting of link is inconsistent.
How can I influence this behaviour?
Related
I'm developing in Python using the pandas, numpy and matplotlib modules, to paint various subplots of a dataframe, using the following code:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import matplotlib.ticker as ticker
data = {'Name': ['Status', 'Status', 'HMI', 'Allst', 'Drvr', 'CurrTUBand', 'RUSource', 'RUReqstrPriority', 'RUReqstrSystem', 'RUResReqstStat', 'CurrTUBand', 'DSP', 'SetDSP', 'SetDSP', 'DSP', 'RUSource', 'RUReqstrPriority', 'RUReqstrSystem', 'RUResReqstStat', 'Status', 'Delay', 'Status', 'Delay', 'HMI', 'Status', 'Status', 'HMI', 'DSP'],
'Value': [4, 4, 2, 1, 1, 1, 0, 7, 0, 4, 1, 1, 3, 0, 3, 0, 7, 0, 4, 1, 0, 1, 0, 1, 4, 4, 2, 3],
'Id_Par': [0, 0, 0, 0, 0, 0, 10, 10, 10, 10, 10, 0, 0, 22, 22, 28, 28, 28, 28, 0, 0, 38, 38, 0, 0, 0, 0, 0]
}
signals_df = pd.DataFrame(data)
def plot_signals(signals_df):
# Count signals by parallel
signals_df['Count'] = signals_df.groupby('Id_Par').cumcount().add(1).mask(signals_df['Id_Par'].eq(0), 0)
# Subtract Parallel values from the index column
signals_df['Sub'] = signals_df.index - signals_df['Count']
id_par_prev = signals_df['Id_Par'].unique()
id_par = np.delete(id_par_prev, 0)
signals_df['Prev'] = [1 if x in id_par else 0 for x in signals_df['Id_Par']]
signals_df['Final'] = signals_df['Prev'] + signals_df['Sub']
# Convert and set Subtract to index
signals_df.set_index('Final', inplace=True)
# Get individual names and variables for the chart
names_list = [name for name in signals_df['Name'].unique()]
num_names_list = len(names_list)
# Creation Graphics
fig, ax = plt.subplots(nrows=num_names_list, figsize=(10, 10), sharex=True)
plt.xticks(color='SteelBlue', fontweight='bold')
# Matplotlib's categorical feature to convert x-axis values to string
x_values = [-1, ]
for name in all_names_list:
x_values.append(signals_df[signals_df["Name"] == name]["Value"].index.values[0])
x_values.append(len(signals_df) - 1)
x_values = [str(i) for i in sorted(set(x_values))]
print(x_values)
for pos, (a_, name) in enumerate(zip(ax, names_list)):
# Creating a dummy plot and then remove it
dummy, = ax[pos].plot(x_values, np.zeros_like(x_values))
dummy.remove()
# Get data
data = signals_df[signals_df["Name"] == name]["Value"]
# Get values axis-x and axis-y
x_ = np.hstack([-1, data.index.values, len(signals_df) - 1])
y_ = np.hstack([0, data.values, data.iloc[-1]])
# Plotting the data by position
ax[pos].plot(x_.astype('str'), y_, drawstyle='steps-post', marker='*', markersize=8, color='k', linewidth=2)
ax[pos].set_ylabel(name, fontsize=8, fontweight='bold', color='SteelBlue', rotation=30, labelpad=35)
ax[pos].yaxis.set_major_formatter(ticker.FormatStrFormatter('%0.1f'))
ax[pos].yaxis.set_tick_params(labelsize=6)
ax[pos].grid(alpha=0.4, color='SteelBlue')
# Labeling the markers with CAN-Values
for i in range(len(y_)):
if i == 0:
xy = [x_[0].astype('str'), y_[0]]
else:
xy = [x_[i - 1].astype('str'), y_[i - 1]]
ax[pos].text(x=xy[0], y=xy[1], s=str(xy[1]), color='k', fontweight='bold', fontsize=12)
plt.show()
plot_signals(signals_df)
I'm having trouble when names get repeated, using Matplotlib's categorical feature and converting x-axis values to string; taking into consideration the focus of the answer; this is what is bringing me:
I have been trying to change the pandas conditions, since it is the condition that I am using in this line: x_values.append(signals_df[signals_df["Name"] == name]["Value"].index.values[0]) and when I print the variable x_values it brings me the wrong indices: ['-1', '0', '2', '3', '4', '5', '6', '11', '12', '20', '27'] and I can't make it work well.
I expect to achieve is a graph like the following:
The yellow shading is the jumps that it must make on the x-axis and that it are not painting on the y-axis. Thank you very much to anyone who can help me, any comments help.
I leave this answer for possible searches later for someone with the same topic. I found my error, the way I was handling the for loop was not correct, I replaced it and modified it as follows:
# Matplotlib's categorical feature and to convert x-axis values to string
x_values = [-1,]
x_values + = (list (set (can_signals.index)))
x_values = [str (i) for i in sorted (x_values)]
This now allows to bring up the graph as below:
These codes produce a chart
import numpy as np
import matplotlib.pyplot as plt
N = 5
menMeans = (20, 35, 30, 35, 27)
womenMeans = (25, 32, 34, 20, 25)
menStd = (2, 3, 4, 1, 2)
womenStd = (3, 5, 2, 3, 3)
ind = np.arange(N) # the x locations for the groups
width = 0.35 # the width of the bars: can also be len(x) sequence
p1 = plt.bar(ind, menMeans, width, yerr=menStd)
p2 = plt.bar(ind, womenMeans, width,
bottom=menMeans, yerr=womenStd)
plt.ylabel('Scores')
plt.title('Scores by group and gender')
plt.xticks(ind, ('G1', 'G2', 'G3', 'G4', 'G5'))
plt.yticks(np.arange(0, 81, 10))
plt.legend((p1[0], p2[0]), ('Men', 'Women'))
Jupyter notebook automatically print the chart, even I didn't call plt.show(). I don't want to show the chart in the same cell with the code but the next cell by running a really short code such as plt.show(). In order to keep the cell as concise as possible.
Just enclose all your plot-related statements inside a function called plot_and_show(). Then you can call the function when you are ready.
import matplotlib.pyplot as plt
import numpy as np
N = 5
menMeans = (20, 35, 30, 35, 27)
womenMeans = (25, 32, 34, 20, 25)
menStd = (2, 3, 4, 1, 2)
womenStd = (3, 5, 2, 3, 3)
ind = np.arange(N) # the x locations for the groups
width = 0.35 # the width of the bars: can also be len(x) sequence
def plot_and_show():
p1 = plt.bar(ind, menMeans, width, yerr=menStd)
p2 = plt.bar(ind, womenMeans, width,
bottom=menMeans, yerr=womenStd)
plt.ylabel('Scores')
plt.title('Scores by group and gender')
plt.xticks(ind, ('G1', 'G2', 'G3', 'G4', 'G5'))
plt.yticks(np.arange(0, 81, 10))
plt.legend((p1[0], p2[0]), ('Men', 'Women'))
plot_and_show()
I've generated a network figure using vedo library and I'm trying to add this as an inset to a figure generated in matplotlib
import networkx as nx
import matplotlib.pyplot as plt
from vedo import *
from matplotlib.offsetbox import OffsetImage, AnnotationBbox
G = nx.gnm_random_graph(n=10, m=15, seed=1)
nxpos = nx.spring_layout(G, dim=3, seed=1)
nxpts = [nxpos[pt] for pt in sorted(nxpos)]
nx_lines = [(nxpts[i], nxpts[j]) for i, j in G.edges()]
pts = Points(nxpts, r=12)
edg = Lines(nx_lines).lw(2)
# node values
values = [[1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[30, 80, 10, 79, 70, 60, 75, 78, 65, 10],
[1, .30, .10, .79, .70, .60, .75, .78, .65, .90]]
time = [0.0, 0.1, 0.2] # in seconds
vplt = Plotter(N=1)
pts1 = pts.cmap('Blues', values[0])
vplt.show(
pts1, edg,
axes=False,
bg='white',
at=0,
interactive=False,
zoom=1.5
).screenshot("network.png")
ax = plt.subplot(111)
ax.plot(
[1, 2, 3], [1, 2, 3],
'go-',
label='line 1',
linewidth=2
)
arr_img = vplt.screenshot(returnNumpy=True, scale=1)
im = OffsetImage(arr_img, zoom=0.25)
ab = AnnotationBbox(im, (1, 0), xycoords='axes fraction', box_alignment=(1.1, -0.1), frameon=False)
ax.add_artist(ab)
plt.show()
ax.figure.savefig(
"output.svg",
transparent=True,
dpi=600,
bbox_inches="tight"
)
There resolution of the image in the inset is too low. Suggestions on how to add the inset without loss of resolution will be really helpful.
EDIT:
The answer posted below works for adding a 2D network, but I am still looking for ways that will be useful for adding a 3D network in the inset.
I am not familiar with vedo but the general procedure would be to create an inset_axis and plot the image with imshow. However, your code is using networkx which has matplotlib bindings and you can directly do this without vedo
EDIT: code edited for 3d plotting
import networkx as nx
import matplotlib.pyplot as plt
G = nx.gnm_random_graph(n=10, m=15, seed=1)
nxpos = nx.spring_layout(G, dim=3, seed=1)
nxpts = [nxpos[pt] for pt in sorted(nxpos)]
nx_lines = [(nxpts[i], nxpts[j]) for i, j in G.edges()]
# node values
values = [[1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[30, 80, 10, 79, 70, 60, 75, 78, 65, 10],
[1, .30, .10, .79, .70, .60, .75, .78, .65, .90]]
time = [0.0, 0.1, 0.2] # in seconds
fig, ax = plt.subplots()
ax.plot(
[1, 2, 3], [1, 2, 3],
'go-',
label='line 1',
linewidth=2
)
from mpl_toolkits.mplot3d import (Axes3D)
from matplotlib.transforms import Bbox
rect = [.6, 0, .5, .5]
bbox = Bbox.from_bounds(*rect)
inax = fig.add_axes(bbox, projection = '3d')
# inax = add_inset_axes(,
# ax_target = ax,
# fig = fig, projection = '3d')
# inax.axis('off')
# set angle
angle = 25
inax.view_init(10, angle)
# hide axes, make transparent
# inax.set_facecolor('none')
# inax.grid('off')
import numpy as np
# plot 3d
seen = set()
for i, j in G.edges():
x = np.stack((nxpos[i], nxpos[j]))
inax.plot(*x.T, color = 'k')
if i not in seen:
inax.scatter(*x[0], color = 'skyblue')
seen.add(i)
if j not in seen:
inax.scatter(*x[1], color = "skyblue")
seen.add(j)
fig.show()
For example, given logits, dim, and boundary,
boundary = torch.tensor([[0, 3, 4, 8, 0]
[1, 3, 5, 7, 9]]
# representing sections look like:
# [[00012222_]
# [_00112233]
# in shape: (2, 9)
# (sections cannot be sliced)
logits = torch.rand(2, 9, 100)
result = blocky_softmax(logits, dim = 1, boundary = boundary)
# result[:, :, 0] may look like:
# [[0.33, 0.33, 0.33, 1.00, 0.25, 0.25, 0.25, 0.25, 0.0 ]
# [0.0, 0.50, 0.50, 0.50, 0.50, 0.50, 0.50, 0.50, 0.50]]
# other 99 slices looks similar with each blocks sum to 1.
we hope the Softmax is applied to dim = 1, but sections are also applied to this dim.
My current implementation with PyTorch is using for. It is slow and cost too much memory,
which looks like:
def blocky_softmax(logits, splits, map_inf_to = None):
_, batch_len, _ = logits.shape
exp_logits = logits.exp() # [2, 9, 100]
batch_seq_idx = torch.arange(batch_len, device = logits.device)[None, :]
base = torch.zeros_like(logits)
_, n_blocks = splits.shape
for nid in range(1, n_blocks):
start = splits[:, nid - 1, None]
end = splits[:, nid, None]
area = batch_seq_idx >= start
area &= batch_seq_idx < end
area.unsqueeze_(dim = 2)
blocky_z = area * blocky_z
base = base + blocky_z
if map_inf_to is not None:
good_base = base > 0
ones = torch.ones_like(base)
base = torch.where(good_base, base, ones)
exp_logits = torch.where(good_base, exp_logits, ones * map_inf_to)
return exp_logits / base
This implementation is slowed and fattened by n_blocks times. But it could be parallel with each section.
If there is no off-the-shelf function, should I write a CUDA/C++ library? I hope you could help with my issue.
For further generalization, I hope there are discontinuities in boundary/sections.
sections = torch.tensor([[ 0, 0, 0, -1, 2, 3, 2, 3, 0, 3]
[-1, 0, 0, 1, 2, 1, 2, 1, -1, 1]]
# [[000_232303]
# [_0012121_1]]
Thank you for reading:)
I realize that scatter_add and gather perfectly solve the problem.
I have three 1D arrays (A, B, C) of equal length/size. I plot a scatter plot of B vs. A where I color each scatter plot bullet by the corresponding value in the C array (see the code below).
# Imports
import matplotlib.cm as cm
import matplotlib.colors as colors
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
# Create the Arrays
A = 10 * np.random.random_sample((20, 20))
B = 10 * np.random.random_sample((20, 20))
C = 100 * np.random.random_sample((20, 20))
A = A.reshape(20*20)
B = B.reshape(20*20)
C = C.reshape(20*20)
# Create the Colormap and Define Boundaries
cmap_C = cm.jet
cmap_C.set_bad(color='white')
bounds_C = np.arange(0, 110, 10)
norm_C = mpl.colors.BoundaryNorm(bounds_C, cmap_C.N)
# Plot the Figure
plt.figure()
plt.scatter(A, B, c=C, marker='o', s=100, cmap=cmap_C, norm=norm_C)
plt.xlim([-1, 11])
plt.ylim([-1, 11])
plt.xticks(np.arange(0, 11, 1))
plt.yticks(np.arange(0, 11, 1))
plt.xlabel('A')
plt.ylabel('B')
plt.grid()
plt.colorbar(label='Value of C')
plt.show()
Some bullets overlap in the figure so we cannot see them clearly. Therfore, next I now want to compute and plot the mean C value of all scatter plot bullets within each 1 integer x 1 integer bin in the figure so that each square grid point is colored by one single color (these bins are illustrated by the figure gridding). How can I do this?
It's not totally clear what you are trying to do, but I think there is an analytic result to your question before you work too hard. The expected mean value of color (C vector) is 50 because you have generated a uniformly distributed sample [0, 100]. The coordinates are also uniformly distributed, but that is irrelevant. Of course, there will be some variance in each of the grid squares.
If you need to go forward as an exercise, I'd construct a dictionary of coordinate:color mappings to help set up a screen...
color_map = {(x, y): color for x, y, color in zip(A,B,C)}
Then you could set up a dictionary to gather results for each grid and probably by taking the int() value of the coordinates put the data into the correct data field for the grid
Below is a solution that works for my purposes.
# Imports
import matplotlib.cm as cm
import matplotlib.colors as colors
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
from zipfile import ZipFile
# Create the Arrays
xx = 5
yy = 5
A = 10 * np.random.random_sample((xx, yy))
B = 10 * np.random.random_sample((xx, yy))
C = 100 * np.random.random_sample((xx, yy))
A = A.reshape(xx*yy)
B = B.reshape(xx*yy)
C = C.reshape(xx*yy)
color_map = {(x, y): color for x, y, color in zip(A,B,C)}
xedges = np.arange(11)
yedges = np.arange(11)
H, xedges, yedges = np.histogram2d(A, B, bins=(xedges, yedges))
HT = H.T
ca = np.asarray(list(color_map))
print(ca)
cai = ca.astype(int)
print(cai)
# Extracting all dictionary values using loop + keys()
res = []
for key in color_map.keys() :
res.append(color_map[key])
res = np.asarray(res)
resi = res.astype(int)
print(resi)
BMC = np.zeros([10, 10])
for i in np.arange(len(resi)):
BMC[cai[i,1],cai[i,0]] = BMC[cai[i,1],cai[i,0]] + resi[i]
print(cai[i])
print(resi[i])
print(BMC[cai[i,1],cai[i,0]])
print(HT)
print(BMC)
BMC = BMC/HT
print(BMC)
# Create the Colormap and Define Boundaries
cmap_C = cm.jet
cmap_C.set_bad(color='white')
bounds_C = np.arange(-5, 115, 10)
norm_C = mpl.colors.BoundaryNorm(bounds_C, cmap_C.N)
cmap_hist2d = cm.CMRmap_r
cmap_hist2d.set_bad(color='white')
bounds_hist2d = np.arange(-0.5, 4.5, 1)
norm_hist2d = mpl.colors.BoundaryNorm(bounds_hist2d, cmap_hist2d.N)
cmap_C = cm.jet
cmap_C.set_bad(color='white')
BMC_plot = np.ma.array ( BMC, mask=np.isnan(BMC)) # Mask NaN
bounds_C = np.arange(-5, 115, 10)
norm_C = mpl.colors.BoundaryNorm(bounds_C, cmap_C.N)
plt.subplot(311)
plt.scatter(A, B, c=C, marker='o', s=100, cmap=cmap_C, norm=norm_C)
plt.xlim([-1, 11])
plt.ylim([-1, 11])
plt.xticks(np.arange(0, 11, 1))
plt.yticks(np.arange(0, 11, 1))
plt.ylabel('B')
plt.grid()
plt.colorbar(label='Value of C', ticks=[0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100])
plt.subplot(312)
x, y = np.meshgrid(xedges, yedges)
plt.pcolor(x, y, HT, cmap=cmap_hist2d, norm=norm_hist2d)
plt.xlim([-1, 11])
plt.ylim([-1, 11])
plt.xticks(np.arange(0, 11, 1))
plt.yticks(np.arange(0, 11, 1))
plt.ylabel('B')
plt.grid()
plt.colorbar(label='Number of Data in Bin', ticks=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
plt.subplot(313)
plt.pcolor(x, y, BMC_plot, cmap=cmap_C, norm=norm_C)
plt.xlim([-1, 11])
plt.ylim([-1, 11])
plt.xticks(np.arange(0, 11, 1))
plt.yticks(np.arange(0, 11, 1))
plt.xlabel('A')
plt.ylabel('B')
plt.grid()
plt.colorbar(label='Bin-Mean C Value', ticks=[0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100])
plt.show()