add labels and customize nodes in plotly sankey diagram - python-3.x

I am trying to customize a sankey diagram done with pyplot. I would simply like to make some nodes invisible (e.g. transparent) and add some labels to the connection between nodes.
I modified slightly the code given as an example in the website, without much success. The node properties seem to be the same for all the same. Adding a label to the links does not modify the output.
import plotly.plotly as py
data = dict(
type='sankey',
node = dict(
pad = 15,
thickness = 20,
line = dict(
color = "black",
width = 0.5
),
label = ["A1", "A2", "B1", "B2", "C1", "C2"],
color = ["blue", "blue", "blue", "blue",
"white", "white"]),#attempt to make it less visible
link = dict(
source = [0,1,0,2,3,3],
target = [2,3,3,4,4,5],
value = [8,4,2,8,4,2],
# attempt to add labels
label= [8,4,2,8,4,2]))
layout = dict(
title = "Basic Sankey Diagram",
font = dict(
size = 10
)
)
fig = dict(data=[data], layout=layout)
py.iplot(fig, validate=False)
Any suggestion on how to do it?

You could just add annotations for each of the link labels, the problem is, even if you add label to the links, it will get shown only on hover! below is an example of added annotations for your reference!
import plotly.graph_objs as go
import plotly.offline as py
py.init_notebook_mode()
import numpy as np
data = dict(
type='sankey',
node = dict(
pad = 15,
thickness = 20,
line = dict(
color = "black",
width = 0.5
),
label = ["A1", "A2", "B1", "B2", "C1", "C2"],
color = ["blue", "blue", "blue", "blue",
"gray", "white"]),#attempt to make it less visible
link = dict(
source = [0,1,0,2,3,3],
target = [2,3,3,4,4,5],
value = [8,4,2,8,4,2],
# attempt to add labels
label= [8,4,2,8,4,2]))
layout = dict(
title = "Basic Sankey Diagram",
font = dict(
size = 10
),
annotations=[
dict(
x=0.25,
y=0.75,
text='8',
showarrow=False
),
dict(
x=0.75,
y=0.25,
text='4',
showarrow=False
)
]
)
fig = dict(data=[data], layout=layout)
py.iplot(fig, validate=False)

Related

Plotly Custom Legend

I have a plotly plot which looks like this:
The Code I am using is below:
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(go.Scatter( x = pf['Timestamp'], y = pf['Price_A'], name ='<b>A</b>',
mode = 'lines+markers',
marker_color = 'rgba(255, 0, 0, 0.8)',
line = dict(width = 3 ), yaxis = "y1"),
secondary_y=False,)
fig.add_trace(go.Scatter( x = df['Timestamp'], y = df['Price_B'], name='<b>B</b>',
mode = 'lines+markers',
marker_color = 'rgba(0, 196, 128, 0.8)',
line = dict(width = 3 ), yaxis = "y1") ,
secondary_y=False,)
for i in pf2['Timestamp']:
fig.add_vline(x=i, line_width=3, line_dash="dash", line_color="purple",
name='Event')
fig.update_layout( title="<b>Change over Time</b>", font=dict( family="Courier New,
monospace", size=16, color="RebeccaPurple"),
legend=dict(
yanchor="top",
y=0.99,
xanchor="left",
x=0.01
))
How can I add the entry in the legend for the event that is denoted by the vertical lines?
When you use add_vline, you are adding an annotation which will not have a corresponding legend entry.
You'll need to instead use go.Scatter to plot the vertical lines, passing the minimum and maximum values in your data (plus or minus some padding) to the y parameter. Then you can set this same y-range for your plot. This will give you the appearance of vertical lines while still showing the full range of your data.
Update: you can use a legend group so that the vertical lines appear as a single entry in the legend
For example:
from pkg_resources import yield_lines
import plotly.express as px
import plotly.graph_objects as go
fig = go.Figure()
df = px.data.stocks()
for col in ['GOOG','AMZN']:
fig.add_trace(go.Scatter(
x=df['date'],
y=df[col]
))
vlines = ["2018-07-01","2019-04-01","2019-07-01"]
min_y,max_y = df[['GOOG','AMZN']].min().min(), df[['GOOG','AMZN']].max().max()
padding = 0.05*(max_y-min_y)
for i,x in enumerate(vlines):
fig.add_trace(go.Scatter(
x=[x]*2,
y=[min_y-padding, max_y+padding],
mode='lines',
line=dict(color='purple', dash="dash"),
name="vertical lines",
legendgroup="vertical lines",
showlegend=True if i == 0 else False
))
fig.update_yaxes(range=[min_y-padding, max_y+padding])
fig.show()

Trouble with Multiple Sankey Subplots in Plotly

So I’m stuck trying to create a plot with multiple Sankey diagrams. Specifically, I keep getting an error that says "Invalid property specified for object of type plotly.graph_objs.Sankey: 'xaxis'". This seems to be invoked when the script gets to the append_trace command. Here is a simple example code, which I run in a Jupyter notebook:
import plotly.tools as tools
import plotly.offline as offline
import plotly.graph_objs as go
offline.init_notebook_mode(connected = True)
trace1 = go.Sankey(
node = dict(
pad = 15,
thickness = 20,
line = dict(
color = "black",
width = 0.5
),
label = ["Joe", "Molly", "Gavin", "Octavio", "Leslie", "Natasha"],
color = ["blue", "red", "green", "yellow", "brown", "magenta"]
),
link = dict(
source = [0,1,3],
target = [5,5,0],
value = [6,8,3]
)
)
trace2 = go.Sankey(
node = dict(
pad = 15,
thickness = 20,
line = dict(
color = "black",
width = 0.5
),
label = ["Russia", "Gambia", "Sweden", "Bolivia", "Timor-Leste", "Kazakhstan", "Bhutan"],
color = ["red", "black", "brown", "magenta", "yellow", "blue", "orange"]
),
link = dict(
source = [0,1,4,6],
target = [7,7,7,3],
value = [6,8,3,3]
)
)
fig = tools.make_subplots(rows = 2, cols = 1)
fig.append_trace(trace1, 1, 1)
fig.append_trace(trace2, 2, 1)
fig['layout'].update(
title = "Basic Sankey Diagram with Two Subplots",
font = dict(
size = 10
)
)
offline.iplot(fig)
When I run it, I get the following traceback:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-4-dd6268edf7ab> in <module>
37 fig = tools.make_subplots(rows = 2, cols = 1)
38
---> 39 fig.append_trace(trace1, 1, 1)
40 fig.append_trace(trace2, 2, 1)
41
/anaconda3/lib/python3.7/site-packages/plotly/basedatatypes.py in append_trace(self, trace, row, col)
1222 """, DeprecationWarning)
1223
-> 1224 self.add_trace(trace=trace, row=row, col=col)
1225
1226 def _set_trace_grid_position(self, trace, row, col):
/anaconda3/lib/python3.7/site-packages/plotly/basedatatypes.py in add_trace(self, trace, row, col)
1069 return self.add_traces(data=[trace],
1070 rows=[row] if row is not None else None,
-> 1071 cols=[col] if col is not None else None
1072 )[0]
1073
/anaconda3/lib/python3.7/site-packages/plotly/basedatatypes.py in add_traces(self, data, rows, cols)
1152 if rows is not None:
1153 for trace, row, col in zip(data, rows, cols):
-> 1154 self._set_trace_grid_position(trace, row, col)
1155
1156 # Make deep copy of trace data (Optimize later if needed)
/anaconda3/lib/python3.7/site-packages/plotly/basedatatypes.py in _set_trace_grid_position(self, trace, row, col)
1259 "An axis object for ({r},{c}) subplot "
1260 "cell got deleted.".format(r=row, c=col))
-> 1261 trace['xaxis'] = ref[0]
1262 trace['yaxis'] = ref[1]
1263
/anaconda3/lib/python3.7/site-packages/plotly/basedatatypes.py in __setitem__(self, prop, value)
2823 # ### Validate prop ###
2824 if prop not in self._validators:
-> 2825 self._raise_on_invalid_property_error(prop)
2826
2827 # ### Get validator for this property ###
/anaconda3/lib/python3.7/site-packages/plotly/basedatatypes.py in _raise_on_invalid_property_error(self, *args)
3006 full_obj_name=full_obj_name,
3007 invalid_str=invalid_str,
-> 3008 prop_descriptions=self._prop_descriptions))
3009
3010 def update(self, dict1=None, **kwargs):
ValueError: Invalid property specified for object of type plotly.graph_objs.Sankey: 'xaxis'
I am NOT specifying xaxis in my traces, so where the hell is this coming from? Seems like this part may have something to do with it:
/anaconda3/lib/python3.7/site-packages/plotly/basedatatypes.py in _set_trace_grid_position(self, trace, row, col)
1259 "An axis object for ({r},{c}) subplot "
1260 "cell got deleted.".format(r=row, c=col))
-> 1261 trace['xaxis'] = ref[0]
1262 trace['yaxis'] = ref[1]
1263
Is this a bug? I have no idea.
Somebody help!
I cross-posted this question on Plotly community forums here and received an answer that solved it (mostly). I'll copy the answer here so it's preserved even if the link goes dead.
The trouble you’re running into is that make_subplots and append_trace only work for cartesian trace types right now. This is something we’re planning to improve for version 4, but in the meantime, the best approach is to specify the position of each sankey trace individually using the domain property.
import plotly.offline as offline
import plotly.graph_objs as go
offline.init_notebook_mode(connected = True)
trace1 = go.Sankey(
node = dict(
pad = 15,
thickness = 20,
line = dict(
color = "black",
width = 0.5
),
label = ["Joe", "Molly", "Gavin", "Octavio", "Leslie", "Natasha"],
color = ["blue", "red", "green", "yellow", "brown", "magenta"]
),
link = dict(
source = [0,1,3],
target = [5,5,0],
value = [6,8,3]
),
domain={
'x': [0, 0.45]
}
)
trace2 = go.Sankey(
node = dict(
pad = 15,
thickness = 20,
line = dict(
color = "black",
width = 0.5
),
label = ["Russia", "Gambia", "Sweden", "Bolivia", "Timor-Leste", "Canada", "Bhutan"],
color = ["red", "black", "brown", "magenta", "yellow", "blue", "orange"]
),
link = dict(
source = [0,1,4,6],
target = [7,7,7,3],
value = [6,8,3,3]
),
domain={
'x': [0.55, 1.0]
}
)
data = [trace1, trace2]
layout = go.Layout(
title = "Basic Sankey Diagram",
font = dict(
size = 10
)
)
fig = go.Figure(data=data, layout=layout)
iplot(fig)
This produces two subplots side by side:
Sankey subplots horizontal
Instead of specifying 'x' in domain, we can specify 'y' to position the subplots vertically.
It's a nice hack, absent the built-in support for Sankey subplots. The only drawback is that I cannot see how one would specify individual titles for the subplots.
Try this way without append operation:
Add this to the definition of both traces.
type='sankey',
Define your two subplots as data array.
data = Data([trace1, trace2])
Define fig:
fig = dict(data=[data], layout=layout)
py.iplot(fig, validate=False)
Finally you need to add the definition of your layout using a dictionary as shown in my referenced post.

How can I use the plotly dropdown menu feature to update the z value in my choropleth map?

I just want to create a menu on the plot where I'm able to change the z-value in data only. I tried looking at other examples on here: https://plot.ly/python/dropdowns/#restyle-dropdown but it was hard since the examples were not exactly similar to my plot.
import plotly
import plotly.plotly as py
import plotly.graph_objs as go
import pandas as pd
df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2014_world_gdp_with_codes.csv')
data = [go.Choropleth(
locations = df['CODE'],
z = df['GDP (BILLIONS)'],
text = df['COUNTRY'],
colorscale = [
[0, "rgb(5, 10, 172)"],
[0.35, "rgb(40, 60, 190)"],
[0.5, "rgb(70, 100, 245)"],
[0.6, "rgb(90, 120, 245)"],
[0.7, "rgb(106, 137, 247)"],
[1, "rgb(220, 220, 220)"]
],
autocolorscale = False,
reversescale = True,
marker = go.choropleth.Marker(
line = go.choropleth.marker.Line(
color = 'rgb(180,180,180)',
width = 0.5
)),
colorbar = go.choropleth.ColorBar(
tickprefix = '$',
title = 'GDP<br>Billions US$'),
)]
layout = go.Layout(
title = go.layout.Title(
text = '2014 Global GDP'
),
geo = go.layout.Geo(
showframe = False,
showcoastlines = False,
projection = go.layout.geo.Projection(
type = 'equirectangular'
)
),
annotations = [go.layout.Annotation(
x = 0.55,
y = 0.1,
xref = 'paper',
yref = 'paper',
text = 'Source: <a href="https://www.cia.gov/library/publications/the-world-factbook/fields/2195.html">\
CIA World Factbook</a>',
showarrow = False
)]
)
fig = go.Figure(data = data, layout = layout)
py.iplot(fig, filename = 'd3-world-map')
It's been a while since this was asked, but I figured it was still worth answering. I can't speak to how this might have changed since it was asked in 2019, but this works today.
First, I'll provide the code I used to create the new z values and the dropdown menu, then I'll provide all of the code I used to create these graphs in one chunk (easier to cut and paste...and all that).
This is the data I used for the alternate data in the z field.
import plotly.graph_objects as go
import pandas as pd
import random
z2 = df['GDP (BILLIONS)'] * .667 + 12
random.seed(21)
random.shuffle(z2)
df['z2'] = z2 # example as another column in df
print(df.head()) # validate as expected
z3 = df['GDP (BILLIONS)'] * .2 + 1000
random.seed(231)
random.shuffle(z3) # example as a series outside of df
z4 = df['GDP (BILLIONS)']**(1/3) * df['GDP (BILLIONS)']**(1/2)
random.seed(23)
random.shuffle(z4)
z4 = z4.tolist() # example as a basic Python list
To add buttons to change z, you'll add updatemenus to your layout. Each dict() is a separate dropdown option. At a minimum, each button requires a method, a label, and args. These represent what is changing (method for data, layout, or both), what it's called in the dropdown (label), and the new information (the new z in this example).
args for changes to data (where the method is either restyle or update) can also include the trace the change applies to. So if you had a bar chart and a line graph together, you may have a button that only changes the bar graph.
Using the same structure you have:
updatemenus = [go.layout.Updatemenu(
x = 1, xanchor = 'right', y = 1.15, type = "dropdown",
pad = {'t': 5, 'r': 20, 'b': 5, 'l': 30}, # around all buttons (not indiv buttons)
buttons = list([
dict(
args = [{'z': [df['GDP (BILLIONS)']]}], # original data; nest data in []
label = 'Return to the Original z',
method = 'restyle' # restyle is for trace updates
),
dict(
args = [{'z': [df['z2']]}], # nest data in []
label = 'A different z',
method = 'restyle'
),
dict(
args = [{'z': [z3]}], # nest data in []
label = 'How about this z?',
method = 'restyle'
),
dict(
args = [{'z': [z4]}], # nest data in []
label = 'Last option for z',
method = 'restyle'
)])
)]
All code used to create this graph in one chunk (includes code shown above).
import plotly.graph_objs as go
import pandas as pd
import ssl
import random
# to collect data without an error
ssl._create_default_https_context = ssl._create_unverified_context
# data used in plot
df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2014_world_gdp_with_codes.csv')
# z values used in buttons
z2 = df['GDP (BILLIONS)'] * .667 + 12
random.seed(21)
random.shuffle(z2)
df['z2'] = z2 # example as another column in the data frame
print(df.head()) # validate as expected
z3 = df['GDP (BILLIONS)'] * .2 + 1000
random.seed(231)
random.shuffle(z3) # example as a series outside of the data frame
z4 = df['GDP (BILLIONS)']**(1/3) * df['GDP (BILLIONS)']**(1/2)
random.seed(23)
random.shuffle(z4)
z4 = z4.tolist() # example as a basic Python list
data = [go.Choropleth(
locations = df['CODE'], z = df['GDP (BILLIONS)'], text = df['COUNTRY'],
colorscale = [
[0, "rgb(5, 10, 172)"],
[0.35, "rgb(40, 60, 190)"],
[0.5, "rgb(70, 100, 245)"],
[0.6, "rgb(90, 120, 245)"],
[0.7, "rgb(106, 137, 247)"],
[1, "rgb(220, 220, 220)"]],
reversescale = True,
marker = go.choropleth.Marker(
line = go.choropleth.marker.Line(
color = 'rgb(180,180,180)', width = 0.5)),
colorbar = go.choropleth.ColorBar(
tickprefix = '$',
title = 'GDP<br>Billions US$',
len = .6) # I added this for aesthetics
)]
layout = go.Layout(
title = go.layout.Title(text = '2014 Global GDP'),
geo = go.layout.Geo(
showframe = False, showcoastlines = False,
projection = go.layout.geo.Projection(
type = 'equirectangular')
),
annotations = [go.layout.Annotation(
x = 0.55, y = 0.1, xref = 'paper', yref = 'paper',
text = 'Source: <a href="https://www.cia.gov/library/publications/the-world-factbook/fields/2195.html">\
CIA World Factbook</a>',
showarrow = False
)],
updatemenus = [go.layout.Updatemenu(
x = 1, xanchor = 'right', y = 1.15, type = "dropdown",
pad = {'t': 5, 'r': 20, 'b': 5, 'l': 30},
buttons = list([
dict(
args = [{'z': [df['GDP (BILLIONS)']]}], # original data; nest data in []
label = 'Return to the Original z',
method = 'restyle' # restyle is for trace updates only
),
dict(
args = [{'z': [df['z2']]}], # nest data in []
label = 'A different z',
method = 'restyle'
),
dict(
args = [{'z': [z3]}], # nest data in []
label = 'How about this z?',
method = 'restyle'
),
dict(
args = [{'z': [z4]}], # nest data in []
label = 'Last option for z',
method = 'restyle'
)])
)]
)
fig = go.Figure(data = data, layout = layout)
fig.show()

Changing the attributes of the what appears when hovering over a Choropleth Map in plotly

I am using plotly in Python 3.6.3 and am trying to do a Choropleth map as in here. I would like to change the attributes of what appears when hovering above the map. That is, for example, if we consider the first map and hover of California, it looks like:
I want to change both the font size of the content that appears and the size of the box. Is there a way to access those?
Here is the code that generates it:
import plotly.plotly as py
import pandas as pd
df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2011_us_ag_exports.csv')
for col in df.columns:
df[col] = df[col].astype(str)
scl = [[0.0, 'rgb(242,240,247)'],[0.2, 'rgb(218,218,235)'],[0.4, 'rgb(188,189,220)'],\
[0.6, 'rgb(158,154,200)'],[0.8, 'rgb(117,107,177)'],[1.0, 'rgb(84,39,143)']]
df['text'] = df['state'] + '<br>' +\
'Beef '+df['beef']+' Dairy '+df['dairy']+'<br>'+\
'Fruits '+df['total fruits']+' Veggies ' + df['total veggies']+'<br>'+\
'Wheat '+df['wheat']+' Corn '+df['corn']
data = [ dict(
type='choropleth',
colorscale = scl,
autocolorscale = False,
locations = df['code'],
z = df['total exports'].astype(float),
locationmode = 'USA-states',
text = df['text'],
marker = dict(
line = dict (
color = 'rgb(255,255,255)',
width = 2
) ),
colorbar = dict(
title = "Millions USD")
) ]
layout = dict(
title = '2011 US Agriculture Exports by State<br>(Hover for breakdown)',
geo = dict(
scope='usa',
projection=dict( type='albers usa' ),
showlakes = True,
lakecolor = 'rgb(255, 255, 255)'),
)
fig = dict( data=data, layout=layout )
py.iplot( fig, filename='d3-cloropleth-map' )
The chloropleth>hoverlabel function lets you set the background color, border color, and font. The size of the border box is determined by the text within it, however. If the name shows up as truncated it can be expanded with the chloropleth>hoverlabel>namelength function.

how to link vbar with circle plots using bokeh?

I have three plots based on the same dataset. How can I link all three plots so that when I select a certain species in vbar plot, two scatter plot also change to plot points in that species only.
any help is appreciated~
from bokeh.sampledata.iris import flowers
from bokeh.plotting import figure, output_file, show
from bokeh.models import ColumnDataSource, CategoricalColorMapper
from bokeh.layouts import column, row
#color mapper to color data by species
mapper = CategoricalColorMapper(factors = ['setosa','versicolor', 'virginica'],\
palette = ['green', 'blue', 'red'])
output_file("plots.html")
#group by species and plot barplot for count
species = flowers.groupby('species')
source = ColumnDataSource(species)
p = figure(plot_width = 800, plot_height = 400, title = 'Count by Species', \
x_range = source.data['species'], y_range = (0,60),tools = 'box_select')
p.vbar(x = 'species', top = 'petal_length_count', width = 0.8, source = source,\
nonselection_fill_color = 'gray', nonselection_fill_alpha = 0.2,\
color = {'field': 'species', 'transform': mapper})
labels = LabelSet(x='species', y='petal_length_count', text='petal_length_count',
x_offset=5, y_offset=5, source=source)
p.add_layout(labels)
#scatter plot for sepal length and width
source1 = ColumnDataSource(flowers)
p1 = figure(plot_width = 800, plot_height = 400, tools = 'box_select', title = 'scatter plot for sepal')
p1.circle(x = 'sepal_length', y ='sepal_width', source = source1, \
nonselection_fill_color = 'gray', nonselection_fill_alpha = 0.2, \
color = {'field': 'species', 'transform': mapper})
#scatter plot for petal length and width
p2 = figure(plot_width = 800, plot_height = 400, tools = 'box_select', title = 'scatter plot for petal')
p2.circle(x = 'petal_length', y ='petal_width', source = source1, \
nonselection_fill_color = 'gray', nonselection_fill_alpha = 0.2, \
color = {'field': 'species', 'transform': mapper})
#show all three plots
show(column(p, row(p1, p2)))
I don't think there's some functionality existing for this at the moment. But you can explicitly link two ColumnDataSources with a CustomJS callback:
from bokeh.models import CusomJS
source = ColumnDataSource(species)
source1 = ColumnDataSource(flowers)
source.js_on_change('selected', CustomJS(args=dict(s1=source1), code="""
const indices = cb_obj.selected['1d'].indices;
const species = new Set(indices.map(i => cb_obj.data.species[i]));
s1.selected['1d'].indices = s1.data.species.reduce((acc, s, i) => {if (species.has(s)) acc.push(i); return acc}, []);
s1.select.emit();
"""))
Note that this callback only synchronizes selection from the bar plot to the scatter plots. To make selections on the scatter plots influence the bar plot, you'll have to write some additional code.

Resources