Remove bars with 0 height in a bar graph in dash - python-3.x

I am trying to make a group bar graph in dash, I am plotting subject codes on the x-axis so they are not continuous numbers and I am getting empty bars for the missing subject codes so is there any way to remove these spaces or invisible bars.
This is the bar graph I am getting.
This is my code.
df = pd.read_csv('sampledata.csv')
a=df['SiteCode'].loc[df['SubjectStatus']=='In Progress'].value_counts()
a.index=a.index.astype(str)
b=df['SiteCode'].loc[df['SubjectStatus']=='Withdrawn'].value_counts()
b.index=b.index.astype(str)
x1=a.index
x2=b.index
trace1=go.Bar(
x=x1,
y=a.values,
name='In Progress',
)
trace2=go.Bar(
x=x2,
y=b.values,
name='Withdrawn',
)
app = dash.Dash()
app.layout = html.Div(
dcc.Graph(id='graph',
figure=go.Figure(data=[trace1,trace2],layout=go.Layout(barmode='group')))
if __name__=='__main__':
app.run_server()
Thanks in advance
PS: I am a noob in dash and python both so go easy on me.

You should try set barmode='stack', because barmode='group' added empty space if your one of your traces have empty values.
import dash
from dash.dependencies import Output, Input
import dash_core_components as dcc
import dash_html_components as html
import plotly
import plotly.graph_objs as go
import pandas as pd
app = dash.Dash(__name__)
df = pd.DataFrame({'x': [100, 100, 105, 110, 110, 115, 120, 125],
'y': [1, 2, 1, 1, 2, 2, 1, 1]})
colors = {
'background': '#111111',
'background2': '#FF0',
'text': '#7FDBFF'
}
df1 = df.loc[df["y"] == 1]
df2 = df.loc[df["y"] == 2]
trace1 = go.Bar(
x=df1["x"],
y=df1["y"],
name='In Progress',
)
trace2 = go.Bar(
x=df2["x"],
y=df2["y"],
name='Withdrawn',
)
app.layout = html.Div(children=[
html.Div([
html.H5('ANNx'),
dcc.Graph(
id='cx1',
figure=go.Figure(data=[trace1, trace2],
layout=go.Layout(barmode='group')))],)])
if __name__ == '__main__':
app.run_server(debug=True)
For example, in this code at value 105, 115 and 120 one trace is empty and this create space in plot:
Using another barmode solved this problem:

Related

Adding footnotes to layered chart in Altair

I am making a layered chart using data from the Bureau of Labor Statistics, and since I am publishing the chart, I need to cite the data source. I need to add a line at the bottom of the chart saying "Source: Bureau of Labor Statistics. Data as of July 2022." I am able to add the title and subtitle, but there doesn't seem to be an option for footnote/source line. Are there any workarounds?
import pandas as pd
import pandas_datareader.data as pdr
import datetime
import altair as alt
start = datetime.datetime (2020, 1, 1)
end = datetime.datetime (2022, 7, 10)
df = pdr.DataReader('UNRATE', 'fred', start, end)
df = df.rename(columns={'UNRATE':'Unemployment Rate'})
df["Date"] = df.index
df['Prepandemic Rate'] = 3.5
source = df
line = (
alt.Chart(source)
.mark_line(point=False, strokeWidth=2, color='blue')
.encode(x="Date", y="Unemployment Rate")
)
line2 = (
alt.Chart(source)
.mark_line(point=False, strokeWidth=2, color='red')
.encode(x="Date", y="Prepandemic Rate")
)
alt.layer(line, line2).properties(
width=300, height=300, title={
"text":'Unemployment Rate',
"subtitle":['Seasonally adjusted']
},
).configure_title(
anchor= 'start'
)
Note: I saw this question (How to add a Text Footer to an Altair graph?) but I can't seem to get the concat function to work on my layered chart.
the footer to the faceted chart you can add as a TitleParams to your final chart. You still need to play with font sizes and balance the chart to your liking).
As for your further request - I updated the code to fit all (title, subtitle, footer), i used #jakevdp idea from this post
I think this approach makes it easier -> to create Title + subtle as separate charts, add footer inside your original chart, and concatenate all of them.
You still need to work on alignment, position of your legend, fonts, etc.
P.S. Or as an alternative approach - use title parameters for title & subtitle, and concatenate footnote.
df = pd.DataFrame([['Action', 5, 'F'],
['Crime', 10, 'F'],
['Action', 3, 'M'],
['Crime', 9, 'M']],
columns=['Genre', 'Rating', 'Gender'])
chart = alt.Chart(df).mark_bar().encode(
column=alt.Column(
'Genre', title=""),
x=alt.X('Gender', axis=alt.Axis(ticks=False, labels=False, title='')),
y=alt.Y('Rating', axis=alt.Axis(grid=False)),
color='Gender'
).properties(width=100, title=alt.TitleParams(
['This is a footer.'],
baseline='bottom',
orient='bottom',
anchor='start',
fontWeight='normal',
fontSize=10,
dy=20, dx=20
))
title = alt.Chart(
{"values": [{"text": "The Title"}]}
).mark_text(size=20).encode(
text="text:N"
)
subtitle = alt.Chart(
{"values": [{"text": "Subtitle"}]}
).mark_text(size=14).encode(
text="text:N"
)
alt.vconcat(
title,
subtitle,
chart
).configure_view(
stroke=None
).configure_concat(
spacing=1)
You can add any text anywhere you like. For example.
import altair as alt
import pandas as pd
source = pd.DataFrame({
'a': ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I'],
'b': [28, 55, 43, 91, 81, 53, 19, 87, 52]
})
data = alt.Data(values=[{'x': 'A'}])
text1 = (
alt.Chart(data)
.mark_text(text='Footnote', x='width', y='height', dx = 10, dy=40)
)
bar1 = alt.Chart(source).mark_bar().encode(
x='a',
y='b',
)
alt.layer(text1, bar1)

Dash plotly overcoming duplicate callback

I have a dashboard for displaying historical data alongside forecasted values. I would like the user to be able to make edits to the forecasted values and update the graph. I am accomplishing this through an editable datatable. However I am unsure of how to update the scatter plot after getting user input on the editable datatable.
example data frame
item time_period number forecast
apple 1 5 0
apple 2 10 0
apple 3 8 0
apple 4 9 1
apple 5 12 1
orange 1 20 0
orange 2 46 0
orange 3 35 0
orange 4 32 1
orange 5 55 1
current code
import dash_core_components as dcc
import dash_html_components as html
import plotly.express as px
import pandas as pd
import dash_table
from dash.dependencies import Input, Output
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
app = dash.Dash(__name__, external_stylesheets=external_stylesheets)
raw_data = {"item": ["apple", "apple", "apple", "apple", "apple", "orange", "orange", "orange", "orange", "orange"], "time_period":[1,2,3,4,5,1,2,3,4,5], "number":[5, 10, 8, 9, 12, 20, 46, 35, 32, 55],
"forecast": [0,0,0,1,1,0,0,0,1,1]}
df = pd.DataFrame(raw_data)
items = df["item"].unique()
app.layout = html.Div([
dcc.Graph(
id="scatter-plot"
),
dcc.Dropdown(
id="dropdown",
options=[{"label":i, "value":i} for i in items]
),
dash_table.DataTable(
id="data-table",
columns=[{"id": "time_period", "name":"time_period"}, {"id":"number", "name":"number", "editable":True}],
data=df.to_dict("records")
)
])
#app.callback(
Output(component_id="scatter-plot", component_property="figure"),
Output(component_id="data-table", component_property="data"),
Input(component_id="dropdown", component_property="value")
)
def select_item(fruit):
# create copy of original dataframe
dff = df.copy()
# isolate out fruit from dropdown
fruit_df = dff[dff["item"] == fruit]
# create scatter plot for selected brand
fig = px.scatter(data_frame=fruit_df, x="time_period", y="number", color="forecast")
# isolate ordered cases and item
forecasts = fruit_df[["time_period", "number"]]
forecasts = forecasts.to_dict("records")
return fig, forecasts
#app.callback(
Output(component_id="scatter-plot", component_property="figure"),
Input(component_id="data-table", component_property="data")
)
def update_scatter(data):
fig = px.scatter(data_frame=data, x="time_period", y="number")
return fig
app.run_server(debug=True)
Combine the two, and use callback context to determine which input caused the callback to fire.

Python Dash Data Table should display only selected columns

I am trying to display only selected columns from my dataframe using datatable . i am able select how many rows i want . looking for a similar option like rows i want to select to display certain columns alone at the time of executing the code.
My dataframe has close to 25 columns . i dont want all of them to be displayed hence looking for this solution
here is my code :
import dash
import dash_core_components as dcc
import dash_bootstrap_components as dbc
import dash_html_components as html
import dash_table as dt
from dash.dependencies import Input, Output
import plotly.graph_objs as go
import plotly.express as px
import pandas as pd
df = pd.read_csv('E:\pylab\dshlab\infratickets.csv', low_memory = False )
app = dash.Dash(__name__)
#style={'visibility': 'hidden'}
dpdown = []
for i in df['ASSIGNED_GROUP'].unique() :
str(dpdown.append({'label':i,'value':(i)}))
app.layout = html.Div([
html.P([
html.Label("Choose a feature"),
html.Div(dcc.Dropdown(id='dropdown', options=dpdown),
style = {'width': '100px',
'fontSize' : '10px',
'padding-left' : '100px',
'display': 'inline-block'})]),
#style={'visibility': 'hidden'},
html.Div(id='table-container', className='tableDiv'),
dcc.Graph(id = 'plot',style={'height' : '25%', 'width' : '25%'})
])
#dcc.Dropdown(id='dropdown', style={'height': '30px', 'width': '100px'}, options=dpdown),
#dcc.Graph(id='graph'),
#html.Div(html.H3('country graph'),id='table-container1',className='tableDiv1')
#app.callback(
dash.dependencies.Output('table-container','children'),
[dash.dependencies.Input('dropdown', 'value')])
def display_table(dpdown):
df_temp = df[df['ASSIGNED_GROUP']==dpdown]
return html.Div([
dt.DataTable(
id='main-table',
columns=[{'name': i, 'id': i} for i in df_temp.columns],
data=df_temp[0:5].to_dict('rows'),
style_table={
'maxHeight': '20%',
#'overflowY': 'scroll',
'width': '30%',
'minWidth': '10%',
},
style_header={'backgroundColor': 'rgb(30, 30, 30)'},
style_cell={'backgroundColor': 'rgb(50, 50, 50)','color': 'white','height': 'auto','width': 'auto'},#minWidth': '0px', 'maxWidth': '180px', 'whiteSpace': 'normal'},
#style_cell={'minWidth': '120px', 'width': '150px', 'maxWidth': '180px'},
style_data={'whiteSpace': 'auto','height': 'auto','width': 'auto'}
)
])
if __name__ == '__main__':
app.run_server(debug=True)
Able to figure out the solution
changed the code
columns=[{'name': i, 'id': i} for i in df_temp.columns]
to
columns=[{'name': i, 'id': i} for i in df.loc[:,['Colname1','Colname2',...]
fixed it
You could also use by index:
df = pd.read_csv('E:\pylab\dshlab\infratickets.csv', low_memory = False ) # load in the dataframe, then ressign with just the columns you want
df = df.iloc[:,1:3] # Remember that Python does not slice inclusive of the ending index.
Would give all rows and columns 1 to 2 of the data frame.
You can change the
columns=[{'name': i, 'id': i} for i in df_temp.columns],
as below:
First define TABLE_SELECTED_COLUMNS = ['col1','col2'. ...]
and
columns=[{"name": i, "id": i} for i in TABLE_SELECTED_COLUMNS],

How can I annotate a Grouped Broken Barh Chart Python Matplotlib

I have searched to exhaustion trying to annotate my grouped broken barH chart. I would like to have the "Event" from my dataframe annotated in each broken bar section. The examples I have found online manually enter the events x,y positions, AND, are not grouped broken bar examples.
the end goal is to have these events display on-hover, but I believe I wont have an issue if I can just get the events to display.
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import datetime
import matplotlib.ticker as ticker
import io
pd.plotting.register_matplotlib_converters()
inp = u""" T29,11/4/2019 0:00,11/4/2019 0:00,off,none
T29,11/4/2019 0:00,11/5/2019 0:00,off,eventa
T29,11/5/2019 0:00,11/6/2019 0:00,on,none
T35,11/4/2019 0:00,11/5/2019 0:00,off,eventb
T35,11/5/2019 0:00,11/6/2019 0:00,paused,eventa
T43,11/4/2019 0:00,11/4/2019 4:01,on,none
T43,11/4/2019 4:01,11/4/2019 12:06,off,none
T43,11/4/2019 12:06,11/5/2019 8:07,on,eventc
T43,11/5/2019 8:07,11/5/2019 10:12,paused,eventd
T43,11/5/2019 10:12,11/5/2019 16:15,on,none
T43,11/5/2019 18:12,11/5/2019 20:15,off,none
"""
mydateparser = lambda x: pd.datetime.strptime(x, "%m/%d/%Y %H:%M")
df = pd.read_csv(io.StringIO(inp), header=0, encoding = "ISO-8859-1", parse_dates=['StartTime', 'FinishTime'], date_parser=mydateparser, names=["Name", "StartTime", "FinishTime", "Status", "Event"])
color = {"on": "g", "paused": "yellow", "off": "black"}
df["Diff"] = df.FinishTime - df.StartTime
minDate = (datetime.datetime.toordinal(min(df.StartTime)))
maxDate = (datetime.datetime.toordinal(max(df.FinishTime)))
days = mdates.DayLocator()
Mcount = 0
fig, ax = plt.subplots(figsize=(6, 3), edgecolor="black", linewidth=1)
labels = []
for i, task in enumerate(df.groupby("Name")):
Mcount += 1
labels.append(task[0])
for r in task[1].groupby("Status"):
data = r[1][["StartTime", "Diff"]]
ax.broken_barh(data.values, (i - 0.4, 0.8), edgecolor="black", alpha=1, linewidth=1,
color=color[r[0]])
ax.set_ylim(bottom=-0.8, top=Mcount)
ax.set_yticks(range(len(labels)))
ax.set_yticklabels(labels)
ax.set_ylabel("Names", rotation=90, fontdict={'family': 'DejaVu Sans', 'color': 'black', 'weight': 'bold', 'size': 14})
ax.set_xlim(left=minDate, right=maxDate)
ax.set_xlabel("Date", fontdict={'family': 'DejaVu Sans', 'color': 'black', 'weight': 'bold', 'size': 14})
ax.xaxis.set_major_formatter(mdates.DateFormatter('%m-%d-%Y'))
ax.tick_params(which='major', axis='x', rotation=0, length=11, color='black')
ax.xaxis.set_major_locator(days)
ax.xaxis.set_minor_formatter(mdates.DateFormatter('%H:%M'))
ax.tick_params(which='minor', rotation=0, labelsize=8, length=4, color='red', size=2)
ax.xaxis.set_minor_locator(ticker.MultipleLocator(.50))
plt.show()
Hello and welcome to StackOverflow. IIUC, you can append a for loop to your enumerate statement to add text to the axes.
for i, task in enumerate(df.groupby("Name")):
Mcount += 1
labels.append(task[0])
for r in task[1].groupby("Status"):
data = r[1][["StartTime", "Diff"]]
ax.broken_barh(data.values,
(i - 0.4, 0.8),
edgecolor="black",
alpha=1,
linewidth=1,
color=color[r[0]]
)
for x1, x2 in data.values:
ax.text(x=x1 + x2/2,
y=i,
s=r[1]["Event"].values[0],
ha='center',
va='center',
color='white',
)
Modified from the docs.
Output:
You can, of course, modify the text formatting.
The text requires an x location, a y location, and a string. The hacky indexing was the quickest way I could pull the event info out of your dataframe.

How to add annotation based on the value in bokeh

I want to be able to display "NO DATA" when there is a value '0' in counts. For example for Strawberries, "NO DATA" should be displayed in the graph.
from bokeh.io import show, output_file
from bokeh.plotting import figure
output_file("bar_basic.html")
fruits = ['Apples', 'Pears', 'Nectarines', 'Plums', 'Grapes', 'Strawberries']
counts = [5, 3, 4, 2, 4, 0]
p = figure(x_range=fruits, plot_height=350, title="Fruit Counts")
p.vbar(x=fruits, top=counts, width=0.9)
p.y_range.start = 0
show(p)
For example, for above data the graph should look like this:example vbar with NO DATA
You can select the data with the count value '0' with Pandas. This new dataframe can be used to create another ColumnDataSource to use for the LabelSet to show the text 'NO DATA' in the figure.
from bokeh.io import show, output_file
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource, LabelSet
import pandas as pd
output_file("bar_basic.html")
fruits = ['Apples', 'Pears', 'Nectarines', 'Plums', 'Grapes', 'Strawberries']
counts = [5, 3, 4, 2, 4, 0]
df = pd.DataFrame.from_dict({'fruits': fruits, 'counts': counts})
source = ColumnDataSource(df)
p = figure(x_range=fruits, plot_height=350, title="Fruit Counts")
p.vbar(x='fruits', top='counts', source=source, width=0.9)
df_nodata = df.loc[df['counts'] == 0]
pd.options.mode.chained_assignment = None
df_nodata.loc[:, 'text'] = 'NO DATA'
source_nodata = ColumnDataSource(df_nodata)
labels = LabelSet(x='fruits', y=1, text='text', text_align='center', source=source_nodata)
p.add_layout(labels)
p.y_range.start = 0
show(p)

Resources