Adding footnotes to layered chart in Altair - altair

I am making a layered chart using data from the Bureau of Labor Statistics, and since I am publishing the chart, I need to cite the data source. I need to add a line at the bottom of the chart saying "Source: Bureau of Labor Statistics. Data as of July 2022." I am able to add the title and subtitle, but there doesn't seem to be an option for footnote/source line. Are there any workarounds?
import pandas as pd
import pandas_datareader.data as pdr
import datetime
import altair as alt
start = datetime.datetime (2020, 1, 1)
end = datetime.datetime (2022, 7, 10)
df = pdr.DataReader('UNRATE', 'fred', start, end)
df = df.rename(columns={'UNRATE':'Unemployment Rate'})
df["Date"] = df.index
df['Prepandemic Rate'] = 3.5
source = df
line = (
alt.Chart(source)
.mark_line(point=False, strokeWidth=2, color='blue')
.encode(x="Date", y="Unemployment Rate")
)
line2 = (
alt.Chart(source)
.mark_line(point=False, strokeWidth=2, color='red')
.encode(x="Date", y="Prepandemic Rate")
)
alt.layer(line, line2).properties(
width=300, height=300, title={
"text":'Unemployment Rate',
"subtitle":['Seasonally adjusted']
},
).configure_title(
anchor= 'start'
)
Note: I saw this question (How to add a Text Footer to an Altair graph?) but I can't seem to get the concat function to work on my layered chart.

the footer to the faceted chart you can add as a TitleParams to your final chart. You still need to play with font sizes and balance the chart to your liking).
As for your further request - I updated the code to fit all (title, subtitle, footer), i used #jakevdp idea from this post
I think this approach makes it easier -> to create Title + subtle as separate charts, add footer inside your original chart, and concatenate all of them.
You still need to work on alignment, position of your legend, fonts, etc.
P.S. Or as an alternative approach - use title parameters for title & subtitle, and concatenate footnote.
df = pd.DataFrame([['Action', 5, 'F'],
['Crime', 10, 'F'],
['Action', 3, 'M'],
['Crime', 9, 'M']],
columns=['Genre', 'Rating', 'Gender'])
chart = alt.Chart(df).mark_bar().encode(
column=alt.Column(
'Genre', title=""),
x=alt.X('Gender', axis=alt.Axis(ticks=False, labels=False, title='')),
y=alt.Y('Rating', axis=alt.Axis(grid=False)),
color='Gender'
).properties(width=100, title=alt.TitleParams(
['This is a footer.'],
baseline='bottom',
orient='bottom',
anchor='start',
fontWeight='normal',
fontSize=10,
dy=20, dx=20
))
title = alt.Chart(
{"values": [{"text": "The Title"}]}
).mark_text(size=20).encode(
text="text:N"
)
subtitle = alt.Chart(
{"values": [{"text": "Subtitle"}]}
).mark_text(size=14).encode(
text="text:N"
)
alt.vconcat(
title,
subtitle,
chart
).configure_view(
stroke=None
).configure_concat(
spacing=1)

You can add any text anywhere you like. For example.
import altair as alt
import pandas as pd
source = pd.DataFrame({
'a': ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I'],
'b': [28, 55, 43, 91, 81, 53, 19, 87, 52]
})
data = alt.Data(values=[{'x': 'A'}])
text1 = (
alt.Chart(data)
.mark_text(text='Footnote', x='width', y='height', dx = 10, dy=40)
)
bar1 = alt.Chart(source).mark_bar().encode(
x='a',
y='b',
)
alt.layer(text1, bar1)

Related

How to display data across, by row, in pie chart in plotly/streamlit?

I have pandas df that looks like this that I want to display as a dashboard:
fname col1 col2 col3 sum
A 2 3 3 10
B 1 2 3 12
C 6 6 3 13
If a fname is selected by row, I want to display the pie slices as the column values by row.
What is the best way to display the data by fname grouped across by row in a pie chart?
I am not sure what to display when all the column values for fname are selected.
I tried creating a sunburst chart like so, but the chart is extremely convoluted:
px.sunburst(df, values='sum', path=[
'col3',
'col2',
'col1',
'fname'],
title='pie')
Here is a basic example.
import plotly.express as px
import pandas as pd
import streamlit as st
data = {
'ctry': ['USA', 'PHI', 'CHN'],
'gold': [12, 1, 20,],
'silver': [4,4, 12],
'bronze': [8, 2, 30],
'sum': [24, 7, 62]
}
df = pd.DataFrame(data)
st.dataframe(df)
cols = st.columns([1, 1])
with cols[0]:
medal_type = st.selectbox('Medal Type', ['gold', 'silver', 'bronze'])
fig = px.pie(df, values=medal_type, names='ctry',
title=f'number of {medal_type} medals',
height=300, width=200)
fig.update_layout(margin=dict(l=20, r=20, t=30, b=0),)
st.plotly_chart(fig, use_container_width=True)
with cols[1]:
st.text_input('sunburst', label_visibility='hidden', disabled=True)
fig = px.sunburst(df, path=['ctry', 'gold', 'silver', 'bronze'],
values='sum', height=300, width=200)
fig.update_layout(margin=dict(l=20, r=20, t=30, b=0),)
st.plotly_chart(fig, use_container_width=True)
Output

Altair: how do I get the values from a dropdown menu

Here is the code for generating the image:
input_dropdown = alt.binding_select(options=['Brand','Function','Category'])
selection = alt.selection_single(name='Color By', fields=['categories'], bind=input_dropdown)
alt.Chart(df_PCs).mark_circle().encode(x="PC1:Q", y="PC2:Q", color="Function:N", tooltip=['Name']).add_selection(selection)
What I want to do is to color the dots either by Brand, Function or Category whatever the value that comes from the dropdown menu. Is there a way to get the value of the dropdown menu? Such as selection.value()?
The best approach to this is similar to the Vega-Lite answer in Dynamically Change Y-Axis Field in Encoding Based on Selection Vega-Lite
Selections cannot filter on column titles, only on column values. Fortunately, you can use the fold transform to stack multiple columns and turn those column names into column values.
Here is an example of a Fold Transform in conjunction with a selection box to choose which column to color by:
import altair as alt
import pandas as pd
import numpy as np
df = pd.DataFrame({
'x': np.random.randn(100),
'y': np.random.randn(100),
'c1': np.random.randint(0, 3, 100),
'c2': np.random.randint(0, 3, 100),
'c3': np.random.randint(0, 3, 100),
})
selector = alt.selection_single(
name='Color by',
fields=['column'],
bind=alt.binding_select(options=['c1', 'c2', 'c3']),
init={'column': 'c1'}
)
alt.Chart(df).transform_fold(
['c1', 'c2', 'c3'], as_=['column', 'value']
).transform_filter(
selector
).mark_point().encode(
x='x:Q',
y='y:Q',
color='value:Q',
column='column:N'
).add_selection(
selector
)
For your data, it might look like this (though I've been unable to test it because the data is not included in the question)
selection = alt.selection_single(
fields=['column'],
bind=alt.binding_select(
name="Color by: ",
options=['Brand','Function','Category']
),
init={'column':'Function'}
)
alt.Chart(df_PCs).transform_fold(
["Brand", "Function", "Category"],
as_=['column', 'value']
).transform_filter(
selection
).mark_point().encode(
x="PC1:Q",
y="PC2:Q",
color="value:N",
column="column:N",
tooltip=['Name:N']
).add_selection(selection)

How to ignore or clip negative values in altair charts from the chart code itself?

I want to NOT show the negative value in the bar chart. The main idea is to NOT have that y-axis offset(in the actual problem its a facet), so any way to achieve this is fine - maybe clipping - just not at data level, preferably from the chart itself.
I thought of using alt.Scale but the domain requires you to specify a max limit and the issue is that I do not know that first hand, and I cannot find a way to programmatically specify max over the values.
You can use the following demo chart -
import pandas as pd
import altair as alt
dd = pd.DataFrame({'a': [0,1,2,3,4,5], 'b': [10,14, -5, 15, 0, 5]})
a = alt.Chart().mark_bar().encode(
x='a',
y=alt.Y('b:Q')
)
b = alt.Chart().mark_line().transform_window(
rolling_mean = 'mean(b)',
frame=[-2, 0]).encode(
x='a',
y='rolling_mean:Q'
)
alt.layer(a, b, data=dd)
There are only two ways I know of to hide data on a chart. First, you can set an explicit scale domain and set clip=True for the relevant marks:
import pandas as pd
import altair as alt
dd = pd.DataFrame({'a': [0,1,2,3,4,5], 'b': [10,14, -5, 15, 0, 5]})
a = alt.Chart().mark_bar(clip=True).encode(
x='a',
y=alt.Y('b:Q', scale=alt.Scale(domain=[0, 16]))
)
b = alt.Chart().mark_line().transform_window(
rolling_mean = 'mean(b)',
frame=[-2, 0]).encode(
x='a',
y='rolling_mean:Q'
)
alt.layer(a, b, data=dd)
Second, you can apply a filter transform to your data to remove rows from your dataset:
import pandas as pd
import altair as alt
dd = pd.DataFrame({'a': [0,1,2,3,4,5], 'b': [10,14, -5, 15, 0, 5]})
a = alt.Chart().mark_bar().encode(
x='a',
y=alt.Y('b:Q', scale=alt.Scale(domain=[0, 16]))
)
b = alt.Chart().mark_line().transform_window(
rolling_mean = 'mean(b)',
frame=[-2, 0]).encode(
x='a',
y='rolling_mean:Q'
)
alt.layer(a, b, data=dd).transform_filter(alt.datum.b > 0)
Note that difference: because this transform was applied at the top level, it removes rows for both sub-panels. If you instead apply the filter for only one of the subcharts, the rows will only be removed from that layer:
import pandas as pd
import altair as alt
dd = pd.DataFrame({'a': [0,1,2,3,4,5], 'b': [10,14, -5, 15, 0, 5]})
a = alt.Chart().transform_filter(
alt.datum.b > 0
).mark_bar().encode(
x='a',
y=alt.Y('b:Q', scale=alt.Scale(domain=[0, 16]))
)
b = alt.Chart().mark_line().transform_window(
rolling_mean = 'mean(b)',
frame=[-2, 0]).encode(
x='a',
y='rolling_mean:Q'
)
alt.layer(a, b, data=dd)
One way to do it seems to use transform_filter as follows -
.transform_filter(alt.datum.b >= 0 )

Interactive Plot of Pandas Data-frame Color coding based on a group from a Column

I have an example pandas dataframe as follows:
day id cnt
2 catx 4
2 kagm 3
2 dyrt 5
3 catx 3
3 kagm 3
3 dyrt 4
5 catx 2
5 kagm 2
5 dyrt 2
I want to plot the scatter data cnt (y) vs day(x), where the points will be labeled (colored/legend) based on the id column.
Now this is pretty simple in seaborn/matplotlib which I know can be plotted and the plot can be saved to a file.
However, I am looking to have an interactive plot using plotly/bokeh/d3/mp3ld etc and finally, put that plot into an url (of my choice or maybe an account based as in plotly). My goal is also to have hover function, which will show me the value of the points when I take the cursor over a specific cursor point.
I have tried bokeh/plotly with cufflinks using ColumnDataSource and everything to try out to get the plots. However, have failed to get anything which I am looking for. Can I get some help in this direction from the experts? Thanks in anticipation.
This code plots the data the way you requested. I created a new dataframe for every category in your dataframe so the interactive legend also works. An array with hex color strings is generated with the length of the number of unique categories and added to the dataframe to give every category it's own color.
#!/usr/bin/python3
import pandas as pd
from bokeh.models import ColumnDataSource
from bokeh.palettes import all_palettes
from bokeh.plotting import figure, output_file, show
data = {'day': [2, 2, 2, 3, 3, 3, 5, 5, 5], 'id': ['catx', 'kagm', 'dyrt', 'catx', 'kagm', 'dyrt', 'catx', 'kagm', 'dyrt'], 'cnt': [4, 3, 5, 3, 3, 4, 2, 2, 2]}
df = pd.DataFrame.from_dict(data)
output_file('plot.html')
tooltips = [
("day", "#day"),
("id", "#$name"),
("count", "#cnt")]
p = figure(tooltips=tooltips, plot_width=800, plot_height=800)
sources = []
colors = all_palettes['Viridis'][len(set(df['id'].tolist()))]
pd.options.mode.chained_assignment = None #Supress false positive warning
for ID, color in zip(set(df['id'].tolist()), colors):
dfSubset = df.loc[df['id'] == ID]
dfSubset['color'] = color
sources.append(ColumnDataSource(dfSubset))
p.circle(x = 'day', y = 'cnt', legend = 'id', color = 'color', name = 'id', alpha = 0.5, size = 15, source = sources[-1])
p.legend.click_policy="hide"
show(p)

Remove bars with 0 height in a bar graph in dash

I am trying to make a group bar graph in dash, I am plotting subject codes on the x-axis so they are not continuous numbers and I am getting empty bars for the missing subject codes so is there any way to remove these spaces or invisible bars.
This is the bar graph I am getting.
This is my code.
df = pd.read_csv('sampledata.csv')
a=df['SiteCode'].loc[df['SubjectStatus']=='In Progress'].value_counts()
a.index=a.index.astype(str)
b=df['SiteCode'].loc[df['SubjectStatus']=='Withdrawn'].value_counts()
b.index=b.index.astype(str)
x1=a.index
x2=b.index
trace1=go.Bar(
x=x1,
y=a.values,
name='In Progress',
)
trace2=go.Bar(
x=x2,
y=b.values,
name='Withdrawn',
)
app = dash.Dash()
app.layout = html.Div(
dcc.Graph(id='graph',
figure=go.Figure(data=[trace1,trace2],layout=go.Layout(barmode='group')))
if __name__=='__main__':
app.run_server()
Thanks in advance
PS: I am a noob in dash and python both so go easy on me.
You should try set barmode='stack', because barmode='group' added empty space if your one of your traces have empty values.
import dash
from dash.dependencies import Output, Input
import dash_core_components as dcc
import dash_html_components as html
import plotly
import plotly.graph_objs as go
import pandas as pd
app = dash.Dash(__name__)
df = pd.DataFrame({'x': [100, 100, 105, 110, 110, 115, 120, 125],
'y': [1, 2, 1, 1, 2, 2, 1, 1]})
colors = {
'background': '#111111',
'background2': '#FF0',
'text': '#7FDBFF'
}
df1 = df.loc[df["y"] == 1]
df2 = df.loc[df["y"] == 2]
trace1 = go.Bar(
x=df1["x"],
y=df1["y"],
name='In Progress',
)
trace2 = go.Bar(
x=df2["x"],
y=df2["y"],
name='Withdrawn',
)
app.layout = html.Div(children=[
html.Div([
html.H5('ANNx'),
dcc.Graph(
id='cx1',
figure=go.Figure(data=[trace1, trace2],
layout=go.Layout(barmode='group')))],)])
if __name__ == '__main__':
app.run_server(debug=True)
For example, in this code at value 105, 115 and 120 one trace is empty and this create space in plot:
Using another barmode solved this problem:

Resources