How to add annotation based on the value in bokeh - python-3.x

I want to be able to display "NO DATA" when there is a value '0' in counts. For example for Strawberries, "NO DATA" should be displayed in the graph.
from bokeh.io import show, output_file
from bokeh.plotting import figure
output_file("bar_basic.html")
fruits = ['Apples', 'Pears', 'Nectarines', 'Plums', 'Grapes', 'Strawberries']
counts = [5, 3, 4, 2, 4, 0]
p = figure(x_range=fruits, plot_height=350, title="Fruit Counts")
p.vbar(x=fruits, top=counts, width=0.9)
p.y_range.start = 0
show(p)
For example, for above data the graph should look like this:example vbar with NO DATA

You can select the data with the count value '0' with Pandas. This new dataframe can be used to create another ColumnDataSource to use for the LabelSet to show the text 'NO DATA' in the figure.
from bokeh.io import show, output_file
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource, LabelSet
import pandas as pd
output_file("bar_basic.html")
fruits = ['Apples', 'Pears', 'Nectarines', 'Plums', 'Grapes', 'Strawberries']
counts = [5, 3, 4, 2, 4, 0]
df = pd.DataFrame.from_dict({'fruits': fruits, 'counts': counts})
source = ColumnDataSource(df)
p = figure(x_range=fruits, plot_height=350, title="Fruit Counts")
p.vbar(x='fruits', top='counts', source=source, width=0.9)
df_nodata = df.loc[df['counts'] == 0]
pd.options.mode.chained_assignment = None
df_nodata.loc[:, 'text'] = 'NO DATA'
source_nodata = ColumnDataSource(df_nodata)
labels = LabelSet(x='fruits', y=1, text='text', text_align='center', source=source_nodata)
p.add_layout(labels)
p.y_range.start = 0
show(p)

Related

How to ignore or clip negative values in altair charts from the chart code itself?

I want to NOT show the negative value in the bar chart. The main idea is to NOT have that y-axis offset(in the actual problem its a facet), so any way to achieve this is fine - maybe clipping - just not at data level, preferably from the chart itself.
I thought of using alt.Scale but the domain requires you to specify a max limit and the issue is that I do not know that first hand, and I cannot find a way to programmatically specify max over the values.
You can use the following demo chart -
import pandas as pd
import altair as alt
dd = pd.DataFrame({'a': [0,1,2,3,4,5], 'b': [10,14, -5, 15, 0, 5]})
a = alt.Chart().mark_bar().encode(
x='a',
y=alt.Y('b:Q')
)
b = alt.Chart().mark_line().transform_window(
rolling_mean = 'mean(b)',
frame=[-2, 0]).encode(
x='a',
y='rolling_mean:Q'
)
alt.layer(a, b, data=dd)
There are only two ways I know of to hide data on a chart. First, you can set an explicit scale domain and set clip=True for the relevant marks:
import pandas as pd
import altair as alt
dd = pd.DataFrame({'a': [0,1,2,3,4,5], 'b': [10,14, -5, 15, 0, 5]})
a = alt.Chart().mark_bar(clip=True).encode(
x='a',
y=alt.Y('b:Q', scale=alt.Scale(domain=[0, 16]))
)
b = alt.Chart().mark_line().transform_window(
rolling_mean = 'mean(b)',
frame=[-2, 0]).encode(
x='a',
y='rolling_mean:Q'
)
alt.layer(a, b, data=dd)
Second, you can apply a filter transform to your data to remove rows from your dataset:
import pandas as pd
import altair as alt
dd = pd.DataFrame({'a': [0,1,2,3,4,5], 'b': [10,14, -5, 15, 0, 5]})
a = alt.Chart().mark_bar().encode(
x='a',
y=alt.Y('b:Q', scale=alt.Scale(domain=[0, 16]))
)
b = alt.Chart().mark_line().transform_window(
rolling_mean = 'mean(b)',
frame=[-2, 0]).encode(
x='a',
y='rolling_mean:Q'
)
alt.layer(a, b, data=dd).transform_filter(alt.datum.b > 0)
Note that difference: because this transform was applied at the top level, it removes rows for both sub-panels. If you instead apply the filter for only one of the subcharts, the rows will only be removed from that layer:
import pandas as pd
import altair as alt
dd = pd.DataFrame({'a': [0,1,2,3,4,5], 'b': [10,14, -5, 15, 0, 5]})
a = alt.Chart().transform_filter(
alt.datum.b > 0
).mark_bar().encode(
x='a',
y=alt.Y('b:Q', scale=alt.Scale(domain=[0, 16]))
)
b = alt.Chart().mark_line().transform_window(
rolling_mean = 'mean(b)',
frame=[-2, 0]).encode(
x='a',
y='rolling_mean:Q'
)
alt.layer(a, b, data=dd)
One way to do it seems to use transform_filter as follows -
.transform_filter(alt.datum.b >= 0 )

How can I annotate a Grouped Broken Barh Chart Python Matplotlib

I have searched to exhaustion trying to annotate my grouped broken barH chart. I would like to have the "Event" from my dataframe annotated in each broken bar section. The examples I have found online manually enter the events x,y positions, AND, are not grouped broken bar examples.
the end goal is to have these events display on-hover, but I believe I wont have an issue if I can just get the events to display.
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import datetime
import matplotlib.ticker as ticker
import io
pd.plotting.register_matplotlib_converters()
inp = u""" T29,11/4/2019 0:00,11/4/2019 0:00,off,none
T29,11/4/2019 0:00,11/5/2019 0:00,off,eventa
T29,11/5/2019 0:00,11/6/2019 0:00,on,none
T35,11/4/2019 0:00,11/5/2019 0:00,off,eventb
T35,11/5/2019 0:00,11/6/2019 0:00,paused,eventa
T43,11/4/2019 0:00,11/4/2019 4:01,on,none
T43,11/4/2019 4:01,11/4/2019 12:06,off,none
T43,11/4/2019 12:06,11/5/2019 8:07,on,eventc
T43,11/5/2019 8:07,11/5/2019 10:12,paused,eventd
T43,11/5/2019 10:12,11/5/2019 16:15,on,none
T43,11/5/2019 18:12,11/5/2019 20:15,off,none
"""
mydateparser = lambda x: pd.datetime.strptime(x, "%m/%d/%Y %H:%M")
df = pd.read_csv(io.StringIO(inp), header=0, encoding = "ISO-8859-1", parse_dates=['StartTime', 'FinishTime'], date_parser=mydateparser, names=["Name", "StartTime", "FinishTime", "Status", "Event"])
color = {"on": "g", "paused": "yellow", "off": "black"}
df["Diff"] = df.FinishTime - df.StartTime
minDate = (datetime.datetime.toordinal(min(df.StartTime)))
maxDate = (datetime.datetime.toordinal(max(df.FinishTime)))
days = mdates.DayLocator()
Mcount = 0
fig, ax = plt.subplots(figsize=(6, 3), edgecolor="black", linewidth=1)
labels = []
for i, task in enumerate(df.groupby("Name")):
Mcount += 1
labels.append(task[0])
for r in task[1].groupby("Status"):
data = r[1][["StartTime", "Diff"]]
ax.broken_barh(data.values, (i - 0.4, 0.8), edgecolor="black", alpha=1, linewidth=1,
color=color[r[0]])
ax.set_ylim(bottom=-0.8, top=Mcount)
ax.set_yticks(range(len(labels)))
ax.set_yticklabels(labels)
ax.set_ylabel("Names", rotation=90, fontdict={'family': 'DejaVu Sans', 'color': 'black', 'weight': 'bold', 'size': 14})
ax.set_xlim(left=minDate, right=maxDate)
ax.set_xlabel("Date", fontdict={'family': 'DejaVu Sans', 'color': 'black', 'weight': 'bold', 'size': 14})
ax.xaxis.set_major_formatter(mdates.DateFormatter('%m-%d-%Y'))
ax.tick_params(which='major', axis='x', rotation=0, length=11, color='black')
ax.xaxis.set_major_locator(days)
ax.xaxis.set_minor_formatter(mdates.DateFormatter('%H:%M'))
ax.tick_params(which='minor', rotation=0, labelsize=8, length=4, color='red', size=2)
ax.xaxis.set_minor_locator(ticker.MultipleLocator(.50))
plt.show()
Hello and welcome to StackOverflow. IIUC, you can append a for loop to your enumerate statement to add text to the axes.
for i, task in enumerate(df.groupby("Name")):
Mcount += 1
labels.append(task[0])
for r in task[1].groupby("Status"):
data = r[1][["StartTime", "Diff"]]
ax.broken_barh(data.values,
(i - 0.4, 0.8),
edgecolor="black",
alpha=1,
linewidth=1,
color=color[r[0]]
)
for x1, x2 in data.values:
ax.text(x=x1 + x2/2,
y=i,
s=r[1]["Event"].values[0],
ha='center',
va='center',
color='white',
)
Modified from the docs.
Output:
You can, of course, modify the text formatting.
The text requires an x location, a y location, and a string. The hacky indexing was the quickest way I could pull the event info out of your dataframe.

Interactive Plot of Pandas Data-frame Color coding based on a group from a Column

I have an example pandas dataframe as follows:
day id cnt
2 catx 4
2 kagm 3
2 dyrt 5
3 catx 3
3 kagm 3
3 dyrt 4
5 catx 2
5 kagm 2
5 dyrt 2
I want to plot the scatter data cnt (y) vs day(x), where the points will be labeled (colored/legend) based on the id column.
Now this is pretty simple in seaborn/matplotlib which I know can be plotted and the plot can be saved to a file.
However, I am looking to have an interactive plot using plotly/bokeh/d3/mp3ld etc and finally, put that plot into an url (of my choice or maybe an account based as in plotly). My goal is also to have hover function, which will show me the value of the points when I take the cursor over a specific cursor point.
I have tried bokeh/plotly with cufflinks using ColumnDataSource and everything to try out to get the plots. However, have failed to get anything which I am looking for. Can I get some help in this direction from the experts? Thanks in anticipation.
This code plots the data the way you requested. I created a new dataframe for every category in your dataframe so the interactive legend also works. An array with hex color strings is generated with the length of the number of unique categories and added to the dataframe to give every category it's own color.
#!/usr/bin/python3
import pandas as pd
from bokeh.models import ColumnDataSource
from bokeh.palettes import all_palettes
from bokeh.plotting import figure, output_file, show
data = {'day': [2, 2, 2, 3, 3, 3, 5, 5, 5], 'id': ['catx', 'kagm', 'dyrt', 'catx', 'kagm', 'dyrt', 'catx', 'kagm', 'dyrt'], 'cnt': [4, 3, 5, 3, 3, 4, 2, 2, 2]}
df = pd.DataFrame.from_dict(data)
output_file('plot.html')
tooltips = [
("day", "#day"),
("id", "#$name"),
("count", "#cnt")]
p = figure(tooltips=tooltips, plot_width=800, plot_height=800)
sources = []
colors = all_palettes['Viridis'][len(set(df['id'].tolist()))]
pd.options.mode.chained_assignment = None #Supress false positive warning
for ID, color in zip(set(df['id'].tolist()), colors):
dfSubset = df.loc[df['id'] == ID]
dfSubset['color'] = color
sources.append(ColumnDataSource(dfSubset))
p.circle(x = 'day', y = 'cnt', legend = 'id', color = 'color', name = 'id', alpha = 0.5, size = 15, source = sources[-1])
p.legend.click_policy="hide"
show(p)

Remove bars with 0 height in a bar graph in dash

I am trying to make a group bar graph in dash, I am plotting subject codes on the x-axis so they are not continuous numbers and I am getting empty bars for the missing subject codes so is there any way to remove these spaces or invisible bars.
This is the bar graph I am getting.
This is my code.
df = pd.read_csv('sampledata.csv')
a=df['SiteCode'].loc[df['SubjectStatus']=='In Progress'].value_counts()
a.index=a.index.astype(str)
b=df['SiteCode'].loc[df['SubjectStatus']=='Withdrawn'].value_counts()
b.index=b.index.astype(str)
x1=a.index
x2=b.index
trace1=go.Bar(
x=x1,
y=a.values,
name='In Progress',
)
trace2=go.Bar(
x=x2,
y=b.values,
name='Withdrawn',
)
app = dash.Dash()
app.layout = html.Div(
dcc.Graph(id='graph',
figure=go.Figure(data=[trace1,trace2],layout=go.Layout(barmode='group')))
if __name__=='__main__':
app.run_server()
Thanks in advance
PS: I am a noob in dash and python both so go easy on me.
You should try set barmode='stack', because barmode='group' added empty space if your one of your traces have empty values.
import dash
from dash.dependencies import Output, Input
import dash_core_components as dcc
import dash_html_components as html
import plotly
import plotly.graph_objs as go
import pandas as pd
app = dash.Dash(__name__)
df = pd.DataFrame({'x': [100, 100, 105, 110, 110, 115, 120, 125],
'y': [1, 2, 1, 1, 2, 2, 1, 1]})
colors = {
'background': '#111111',
'background2': '#FF0',
'text': '#7FDBFF'
}
df1 = df.loc[df["y"] == 1]
df2 = df.loc[df["y"] == 2]
trace1 = go.Bar(
x=df1["x"],
y=df1["y"],
name='In Progress',
)
trace2 = go.Bar(
x=df2["x"],
y=df2["y"],
name='Withdrawn',
)
app.layout = html.Div(children=[
html.Div([
html.H5('ANNx'),
dcc.Graph(
id='cx1',
figure=go.Figure(data=[trace1, trace2],
layout=go.Layout(barmode='group')))],)])
if __name__ == '__main__':
app.run_server(debug=True)
For example, in this code at value 105, 115 and 120 one trace is empty and this create space in plot:
Using another barmode solved this problem:

add secondary description in axis values, plotly

I am using a dataframe which includes the following columns:
Country, GNI, CarSalesPerCap. I am using kmeans to create clusters. In the algorithm i pass the dataframe with the two numeric columns: 'GNI', 'CarSalesPerCap'.
Then i am using plotly to create a scatter plot, where x-axis is the CarsalesPerCap and Y-axis is GNI. My question is, how am i going to add to the plot the corresponding country for each point plotted on the graph.
df = pd.read_sql_query(query,conn)
df = df.dropna()
#Cluster the data
kmeans = KMeans(n_clusters=6, random_state=0).fit(df1)
labels = kmeans.labels_
#Glue back to originaal data
df['clusters'] = labels
#Lets analyze the clusters
print (df)
cluster0=df.loc[df['clusters'] == 0]
cluster1=df.loc[df['clusters'] == 1]
cluster2=df.loc[df['clusters'] == 2]
cluster3=df.loc[df['clusters'] == 3]
cluster4=df.loc[df['clusters'] == 4]
cluster5=df.loc[df['clusters'] == 5]
p0 = go.Scatter(x=cluster0['CarSalesPerCap'],
y= cluster0['GNI'],
mode='markers',
marker=dict(color='black')
)
p1 = go.Scatter(x=cluster1['CarSalesPerCap'],
y= cluster1['GNI'],
mode='markers',
marker=dict(color='teal')
)
p2 = go.Scatter(x=cluster2['CarSalesPerCap'],
y= cluster2['GNI'],
mode='markers',
marker=dict(color='grey')
)
p3 = go.Scatter(x=cluster3['CarSalesPerCap'],
y= cluster3['GNI'],
mode='markers',
marker=dict(color='pink')
)
p4 = go.Scatter(x=cluster4['CarSalesPerCap'],
y= cluster4['GNI'],
mode='markers',
marker=dict(color='purple')
)
p5 = go.Scatter(x=cluster5['CarSalesPerCap'],
y= cluster5['GNI'],
mode='markers',
marker=dict(color='orange')
)
layout = go.Layout(xaxis=dict(ticks='',
showticklabels=True,
zeroline=True,
title = 'CarSalesPerCap'),
yaxis=dict(ticks='',
showticklabels=True,
zeroline=True,
title='GNI'),
showlegend=False, hovermode='closest')
fig = go.Figure(data=[p0,p1,p2,p3,p4,p5], layout=layout)
py.offline.plot(fig)
You can add a text element to your trace and it will allow you to overlay anything you want. If you add your country column then it will be displayed on hover. If you want a permanent label you can add annotations
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
import pandas as pd
df = pd.DataFrame({'country':["USA", "MEXICO", "CANADA"], 'x':[1, 2, 4], 'y':[5, 6, 7]})
p0 = go.Scatter(
x=df.x,
y= df.y,
mode='markers',
marker=dict(
color='#E90',
size=15
),
text = df.country,
)
data = [p0]
iplot(data)

Resources