How do I create a Bokeh Select menu for a line plot for an indeterminate number of options? - python-3.x

I've been working on getting a select menu and Bokeh plot up and running on a dataset I'm working with. The dataset can be found here. I have no experience with JavaScript, but I believe my select menu isn't connected/-ing to my plot. Therefore, I have a plot outline, but no data displayed. As I run the script from the console with bokeh serve --show test.py, I get the first 7 notifications in my JS console. The last three (those in the red bracket in the screenshot) occur when I try and change to a different item in my select menu.
Goal: Display the plot of data for rows those id number ('ndc' in this example) is selected in the Select menu.
Here's my code (modified from this post) that I used to get started. This one was also used, as were a handful of others, and the Bokeh documentation itself.
import pandas as pd
from bokeh.io import curdoc, output_notebook, output_file
from bokeh.layouts import row, column
from bokeh.models import Select, DataRange1d, ColumnDataSource
from bokeh.plotting import figure
# output_notebook()
output_file('test.html')
def get_dataset(src, drug_id):
src.drop('Unnamed: 0', axis = 1, inplace = True)
df = src[src.ndc == drug_id].copy()
df['date'] = pd.to_datetime(df['date'])
df = df.set_index(['date'])
df.sort_index(inplace=True)
source = ColumnDataSource(data=df)
return source
def make_plot(source, title):
plot = figure(plot_width=800, plot_height = 800, tools="", x_axis_type = 'datetime', toolbar_location=None)
plot.xaxis.axis_label = 'Time'
plot.yaxis.axis_label = 'Price ($)'
plot.axis.axis_label_text_font_style = 'bold'
plot.x_range = DataRange1d(range_padding = 0.0)
plot.grid.grid_line_alpha = 0.3
plot.title.text = title
plot.line(x= 'date', y='nadac_per_unit', source=source)
return plot
def update_plot(attrname, old, new):
ver = vselect.value
plot.title.text = "Drug Prices"
src = get_dataset(df, ver)
source.date.update(src.date)
df = pd.read_csv('data/plotting_data.csv')
ver = '54034808' #Initial id number
cc = df['ndc'].astype(str).unique() #select-menu options
vselect = Select(value=ver, title='Drug ID', options=sorted((cc)))
source = get_dataset(df, ver)
plot = make_plot(source, "Drug Prices")
vselect.on_change('value', update_plot)
controls = row(vselect)
curdoc().add_root(row(plot, controls))

There were some problems in your code:
You want to drop the Unnamed: 0 column. This can only be done once and when you try this again it will throw an error since this column does not exist anymore.
The way you tried to filter the dataframe didn't work and would result in an empty dataframe. You can select rows based on a column value like this: df.loc[df['column_name'] == some_value]
Updating the ColumnDataSource object can be done by replacing source.data with the new data.
import pandas as pd
from bokeh.io import curdoc, output_notebook, output_file
from bokeh.layouts import row, column
from bokeh.models import Select, DataRange1d, ColumnDataSource
from bokeh.plotting import figure
output_notebook()
output_file('test.html')
def get_dataset(src, drug_id):
src.drop('Unnamed: 0', axis = 1, inplace = True)
df = src.loc[src['ndc'] == int(drug_id)]
df['date'] = pd.to_datetime(df['date'])
df = df.set_index(['date'])
df.sort_index(inplace=True)
source = ColumnDataSource(data=df)
return source
def make_plot(source, title):
plot = figure(plot_width=800, plot_height = 800, tools="", x_axis_type = 'datetime', toolbar_location=None)
plot.xaxis.axis_label = 'Time'
plot.yaxis.axis_label = 'Price ($)'
plot.axis.axis_label_text_font_style = 'bold'
plot.x_range = DataRange1d(range_padding = 0.0)
plot.grid.grid_line_alpha = 0.3
plot.title.text = title
plot.line(x= 'date', y='nadac_per_unit', source=source)
return plot
def update_plot(attrname, old, new):
ver = vselect.value
df1 = df.loc[df['ndc'] == int(new)]
df1['date'] = pd.to_datetime(df1['date'])
df1 = df1.set_index(['date'])
df1.sort_index(inplace=True)
newSource = ColumnDataSource(df1)
source.data = newSource.data
df = pd.read_csv('data/plotting_data.csv')
ver = '54034808' #Initial id number
cc = df['ndc'].astype(str).unique() #select-menu options
vselect = Select(value=ver, title='Drug ID', options=sorted((cc)))
source = get_dataset(df, ver)
plot = make_plot(source, "Drug Prices")
vselect.on_change('value', update_plot)
controls = row(vselect)
curdoc().add_root(row(plot, controls))

Related

How to get a fixed axis range in plotly when using restyle

I want to have an interactive line plot in plotly which would have two buttons and from which you can select the data you want to plot. I managed to achieve this however when the plot is being updated the y axis range changes with each update which is something I do not want. I checked several topics here which suggest to set autorange to False but that does not seem to work. The code I have at the moment is as follows (which does not work):
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import datetime
np.random.seed(123)
df = pd.DataFrame(np.random.randint(-10, 12, size=(200, 4)),
columns=list('ABCD'))
datelist = pd.date_range(datetime.datetime(2020, 1, 1).strftime('%Y-%m-%d'),
periods=200).tolist()
df2 = df.copy()
df['dates'] = datelist
df = df.set_index(['dates'])
df.index = pd.to_datetime(df.index)
df.iloc[0] = 0
df = df.cumsum()
df2['dates'] = datelist
df2 = df2.set_index(['dates'])
df2.index = pd.to_datetime(df2.index)
df2.iloc[0] = 0
df2 = df2.cumsum()
# # plotly
fig = go.Figure()
# set up ONE trace
fig.add_trace(go.Scatter(x=df.index,
y=df[df.columns[0]],
visible=True,
)
)
buttons1 = []
buttons2 = []
fig.add_trace(go.Scatter(x=df2.index,
y=df2[df.columns[0]],
visible=True)
)
# button with one option for each dataframe
for col in df.columns:
buttons1.append(dict(method='restyle',
label=col,
visible=True,
args=[{'y':[df[col]],
'x':[df.index],
'type':'scatter'}, [0]],
)
)
# button with one option for each dataframe
for col in df2.columns:
buttons2.append(dict(method='restyle',
label=col,
visible=True,
args=[{'y':[df2[col]],
'x':[df2.index],
'type':'scatter'}, [0]],
)
)
# some adjustments to the updatemenus
updatemenus = []
updatemenus.append(dict())
updatemenus[0]['buttons'] = buttons1
updatemenus[0]['direction'] = 'down'
updatemenus[0]['showactive'] = True
updatemenus[0]['y'] = 1.12
updatemenus.append(dict())
updatemenus[1]['buttons'] = buttons2
updatemenus[1]['direction'] = 'down'
updatemenus[1]['showactive'] = True
updatemenus[1]['y'] = 1
# add dropdown menus to the figure
fig.update_layout(showlegend=False, updatemenus=updatemenus)
fig.update_yaxes(dict(range=[-400, 400],
autorange=False))
fig.show()
fig.write_html("./plot.html")
This behaves as you require if you do not 'type':'scatter' in the button definition. Also I used actual ranges from DF to set y-axis.
# button with one option for each dataframe
for col in df.columns:
buttons1.append(dict(method='restyle',
label=col,
visible=True,
args=[{'y':[df[col]],
'x':[df.index],
# 'type':'scatter'
}, [0]],
)
)
# button with one option for each dataframe
for col in df2.columns:
buttons2.append(dict(method='restyle',
label=col,
visible=True,
args=[{'y':[df2[col]],
'x':[df2.index],
# 'type':'scatter'
}, [0]],
)
)
fig.update_yaxes(dict(range=[min(df.min().min(), df2.max().max()), max(df.max().max(), df2.max().max())],
autorange=False))

Why is Bokeh's plot not changing with plot selection?

Struggling to understand why this bokeh visual will not allow me to change plots and see the predicted data. The plot and select (dropdown-looking) menu appears, but I'm not able to change the plot for items in the menu.
Running Bokeh 1.2.0 via Anaconda. The code has been run both inside & outside of Jupyter. No errors display when the code is run. I've looked through the handful of SO posts relating to this same issue, but I've not been able to apply the same solutions successfully.
I wasn't sure how to create a toy problem out of this, so in addition to the code sample below, the full code (including the regression code and corresponding data) can be found at my github here (code: Regression&Plotting.ipynb, data: pred_data.csv, historical_data.csv, features_created.pkd.)
import pandas as pd
import datetime
from bokeh.io import curdoc, output_notebook, output_file
from bokeh.layouts import row, column
from bokeh.models import Select, DataRange1d, ColumnDataSource
from bokeh.plotting import figure
#Must be run from the command line
def get_historical_data(src_hist, drug_id):
historical_data = src_hist.loc[src_hist['ndc'] == drug_id]
historical_data.drop(['Unnamed: 0', 'date'], inplace = True, axis = 1)#.dropna()
historical_data['date'] = pd.to_datetime(historical_data[['year', 'month', 'day']], infer_datetime_format=True)
historical_data = historical_data.set_index(['date'])
historical_data.sort_index(inplace = True)
# csd_historical = ColumnDataSource(historical_data)
return historical_data
def get_prediction_data(src_test, drug_id):
#Assign the new date
#Write a new dataframe with values for the new dates
df_pred = src_test.loc[src_test['ndc'] == drug_id].copy()
df_pred.loc[:, 'year'] = input_date.year
df_pred.loc[:, 'month'] = input_date.month
df_pred.loc[:, 'day'] = input_date.day
df_pred.drop(['Unnamed: 0', 'date'], inplace = True, axis = 1)
prediction = lin_model.predict(df_pred)
prediction_data = pd.DataFrame({'drug_id': prediction[0][0], 'predictions': prediction[0][1], 'date': pd.to_datetime(df_pred[['year', 'month', 'day']], infer_datetime_format=True, errors = 'coerce')})
prediction_data = prediction_data.set_index(['date'])
prediction_data.sort_index(inplace = True)
# csd_prediction = ColumnDataSource(prediction_data)
return prediction_data
def make_plot(historical_data, prediction_data, title):
#Historical Data
plot = figure(plot_width=800, plot_height = 800, x_axis_type = 'datetime',
toolbar_location = 'below')
plot.xaxis.axis_label = 'Time'
plot.yaxis.axis_label = 'Price ($)'
plot.axis.axis_label_text_font_style = 'bold'
plot.x_range = DataRange1d(range_padding = 0.0)
plot.grid.grid_line_alpha = 0.3
plot.title.text = title
plot.line(x = 'date', y='nadac_per_unit', source = historical_data, line_color = 'blue', ) #plot historical data
plot.line(x = 'date', y='predictions', source = prediction_data, line_color = 'red') #plot prediction data (line from last date/price point to date, price point for input_date above)
return plot
def update_plot(attrname, old, new):
ver = vselect.value
new_hist_source = get_historical_data(src_hist, ver) #calls the function above to get the data instead of handling it here on its own
historical_data.data = ColumnDataSource.from_df(new_hist_source)
# new_pred_source = get_prediction_data(src_pred, ver)
# prediction_data.data = new_pred_source.data
#Import data source
src_hist = pd.read_csv('data/historical_data.csv')
src_pred = pd.read_csv('data/pred_data.csv')
#Prep for default view
#Initialize plot with ID number
ver = 781593600
#Set the prediction date
input_date = datetime.datetime(2020, 3, 31) #Make this selectable in future
#Select-menu options
menu_options = src_pred['ndc'].astype(str) #already contains unique values
#Create select (dropdown) menu
vselect = Select(value=str(ver), title='Drug ID', options=sorted((menu_options)))
#Prep datasets for plotting
historical_data = get_historical_data(src_hist, ver)
prediction_data = get_prediction_data(src_pred, ver)
#Create a new plot with the source data
plot = make_plot(historical_data, prediction_data, "Drug Prices")
#Update the plot every time 'vselect' is changed'
vselect.on_change('value', update_plot)
controls = row(vselect)
curdoc().add_root(row(plot, controls))
UPDATED: ERRORS:
1) No errors show up in Jupyter Notebook.
2) CLI shows a UserWarning: Pandas doesn't allow columns to be careated via a new attribute name, referencing `historical_data.data = ColumnDatasource.from_df(new_hist_source).
Ultimately, the plot should have a line for historical data, and another line or dot for predicted data derived from sklearn. It also has a dropdown menu to select each item to plot (one at a time).
Your update_plot is a no-op that does not actually make any changes to Bokeh model state, which is what is necessary to change a Bokeh plot. Changing Bokeh model state means assigning a new value to a property on a Bokeh object. Typically, to update a plot, you would compute a new data dict and then set an existing CDS from it:
source.data = new_data # plain python dict
Or, if you want to update from a DataFame:
source.data = ColumnDataSource.from_df(new_df)
As an aside, don't assign the .data from one CDS to another:
source.data = other_source.data # BAD
By contrast, your update_plot computes some new data and then throws it away. Note there is never any purpose to returning anything at all from any Bokeh callback. The callbacks are called by Bokeh library code, which does not expect or use any return values.
Lastly, I don't think any of those last JS console errors were generated by BokehJS.

How to use Bokeh hover tool in for loop with checkbox widget

Python 3.6
Bokeh 12.15
I have tried to implement the bokeh example line_on_off.py, but in a for loop with a hover tool and data of varying length. What happens though is that when a line is turned off it turns off the tool tip of any line created after it. For example if I turn off line 1, line 2,3,4 tool tips are disabled, or if I turn off line 3 line 4's tool tip is disabled.
Can I use a hover tool and checkbox widget in a for loop like this? I have seen this multiline example, but my data is of varying length and I do not want to resample because I would like to see if there is bad or missing data.
Code
from bokeh.plotting import figure
from bokeh.models import CheckboxGroup, CustomJS
from bokeh.models import ColumnDataSource
import pandas as pd
from bokeh.models import HoverTool
def create_plot(df_list):
p = figure(x_axis_type = 'datetime')
glyph_dict = {}
labels = []
active = []
items = []
names = 'abcdefghijklmnopqrstuvwxyz'
callback_string = '{}.visible = {} in checkbox.active;'
code_string = ''
i = 0
sources = []
for df in df_list:
legend = df.columns[0]
series = df.iloc[:,0]
labels.append(legend)
x = series.index
y = series.values
source =ColumnDataSource(data = {'x':x,'y':y, 'date': [str(x) for x in x]})
sources.append(source)
line = p.line('x', 'y', source = sources[i])
items.append((legend, [line]))
name = names[i]
line.name = name
code_string += callback_string.format(name, str(i))
glyph_dict.update({name:line})
active.append(i)
i+=1
hover = HoverTool(tooltips=[('date', '#date'),('y', '#y')])
p.add_tools(hover)
checkbox = CheckboxGroup(labels=labels, active=active, width=200)
glyph_dict.update({'checkbox':checkbox})
checkbox.callback = CustomJS.from_coffeescript(args=glyph_dict, code=code_string)
return checkbox, p
Minimal example
import numpy as np
from datetime import datetime, timedelta
from bokeh.layouts import row
from bokeh.plotting import show
df_list = []
start = datetime(2017, 4,1)
end = datetime(2017,5,1)
for i in range(1,5):
date = pd.date_range(start, end, freq = '1w')
shape = len(date)
df = pd.DataFrame(index = date, data = np.random.randn(shape,1))
name = 'df'+ str(i)
df.columns = [name]
end = end + timedelta(weeks = 1)
df_list.append(df)
c,p = create_plot(df_list)
r=row([c,p])
show(r)
In a situation like this, you should probably create a new, separate hover tool for each line, by restricting the renderers property of each hover tool. So, in relation to your code, move the hover tool creation inside the loop, and have it set renderers each time:
line = p.line('x', 'y', source = sources[i])
hover = HoverTool(tooltips=[('date', '#date'),('y', '#y')]
renderers=[line])
p.add_tools(hover)

Plotly iplot() doesnt run within a function

I am trying to use iplot() within a function within Jupyter so that i can use a filter on the graph and have it change dynamically. The code works in a cell on its own like this
# Code for put by ticker
data = []
opPriceDic = priceToArray(getPuts(getOptionPricesByTicker('ABBV')))
for key, values in opPriceDic.items():
trace = go.Scatter(
x = numberOfDays,
y = values,
name = 'option',
line = dict(
width = 4)
)
data.append(trace)
# Edit the layout
layout = dict(title = 'Call prices for ' ,
xaxis = dict(title = 'Days to Expiration'),
yaxis = dict(title = 'Price '),
)
fig = dict(data=data, layout=layout)
py.iplot(fig, filename='calls For ')
But once this is placed within a function the graph fails to load
def graph(ticker):
# Code for put by ticker
data = []
opPriceDic = priceToArray(getPuts(getOptionPricesByTicker(ticker)))
for key, values in opPriceDic.items():
trace = go.Scatter(
x = numberOfDays,
y = values,
name = 'option',
line = dict(
width = 4)
)
data.append(trace)
# Edit the layout
layout = dict(title = 'Call prices for ' ,
xaxis = dict(title = 'Days to Expiration'),
yaxis = dict(title = 'Price '),
)
fig = dict(data=data, layout=layout)
py.iplot(fig, filename='calls For ')
But if I change the iplot() to plot() it calls the plotly API and opens a new tab with the graph displaying.
I am just wondering if anyone has noticed this before and may have come across a solution?
(if I am in the wrong area I will remove the post)
I have tried to use pandas data.reader calls to pull ticker data between a start and end date. The data.reader seems to work from within the function. In the question code, if the opPriceDic dictionary could be converted to a dataframe, then iplot() could plot it without use of layout and fig as below:
# Import libraries
import datetime
from datetime import date
import pandas as pd
import numpy as np
from plotly import __version__
%matplotlib inline
import cufflinks as cf
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
init_notebook_mode(connected=True)
cf.go_offline()
# Create function that uses data.reader and iplot()
def graph(ticker):
# create sample data set
start = datetime.datetime(2006, 1, 1)
end = datetime.datetime(2016, 1, 1)
df = data.DataReader(ticker, 'morningstar', start, end)
df = df.reset_index()
df['numberOfDays'] = df.apply(lambda x: abs((datetime.datetime.now() - x['Date']).days), axis=1)
# call iplot within the function graph()
df.iplot(kind='line', x='numberOfDays', y='Close', xTitle='Days', yTitle='Value', title='Prices', width=4)

How to use a slider callback to filter a ColumnDataSource in Bokeh using Python 3?

I'm trying to use a slider with a callback in Bokeh using Python 3 to filter the rows of my ColumnDataSource objects (which originate from a DataFrame). More specifically, if a slider with options of 0 to 10000000 (in multiples of 1 million) returns a value N of say 2000000, then I want my plot to only show the data for, in this case, US counties where the population is >= 2000000. Below is my code. Everything works as I want it to except for the slider callback.
from bokeh.io import curdoc
from bokeh.layouts import layout
from bokeh.models import HoverTool, ColumnDataSource, Select, Slider
from bokeh.plotting import figure
TOOLS='pan,wheel_zoom,box_zoom,reset,tap,save,box_select,lasso_select'
source1 = ColumnDataSource(df[df.winner == 'Democratic'])
source2 = ColumnDataSource(df[df.winner == 'Republican'])
hover = HoverTool(
tooltips = [
('County Name', '#county'),
('Population', '#population'),
('Land Area', '#land_area'),
('Pop. Density', '#density'),
('Winning Party', '#winner'),
('Winning Vote %', '#winning_vote_pct'),
]
)
# Plot
plot = figure(plot_width=800, plot_height=450, tools=[hover, TOOLS],
title='2016 US Presidential Vote % vs. Population Density (by County)',
x_axis_label='Vote %', y_axis_label='Population Density (K / sq. mi.)')
y = 'density'
size = 'bokeh_size'
alpha = 0.5
c1 = plot.circle(x='pct_d', y=y, size=size, alpha=alpha, color='blue',
legend='Democratic-Won County', source=source1)
c2 = plot.circle(x='pct_r', y=y, size=size, alpha=alpha, color='red',
legend='Republican-Won County', source=source2)
plot.legend.location = 'top_left'
# Select widget
party_options = ['Show both parties', 'Democratic-won only', 'Republican-won only']
menu = Select(options=party_options, value='Show both parties')
# Slider widget
N = 2000000
slider = Slider(start=0, end=10000000, step=1000000, value=N, title='Population Cutoff')
# Select callback
def select_callback(attr, old, new):
if menu.value == 'Democratic-won only': c1.visible=True; c2.visible=False
elif menu.value == 'Republican-won only': c1.visible=False; c2.visible=True
elif menu.value == 'Show both parties': c1.visible=True; c2.visible=True
menu.on_change('value', select_callback)
# Slider callback
def slider_callback(attr, old, new):
N = slider.value
# NEED HELP HERE...
source1 = ColumnDataSource(df.loc[(df.winner == 'Democratic') & (df.population >= N)])
source2 = ColumnDataSource(df.loc[(df.winner == 'Republican') & (df.population >= N)])
slider.on_change('value', slider_callback)
# Arrange plots and widgets in layouts
layout = layout([menu, slider],
[plot])
curdoc().add_root(layout)
Here is a solution using CustomJSFilter and CDSView as suggest in the other answer by Alex. It does not directly use the data as supplied in the question, but is rather a general hint how this can be implemented:
from bokeh.layouts import column
from bokeh.models import CustomJS, ColumnDataSource, Slider, CustomJSFilter, CDSView
from bokeh.plotting import Figure, show
import numpy as np
# Create some data to display
x = np.arange(200)
y = np.random.random(size=200)
source = ColumnDataSource(data=dict(x=x, y=y))
plot = Figure(plot_width=400, plot_height=400)
# Create the slider that modifies the filtered indices
# I am just creating one that shows 0 to 100% of the existing data rows
slider = Slider(start=0., end=1., value=1., step=.01, title="Percentage")
# This callback is crucial, otherwise the filter will not be triggered when the slider changes
callback = CustomJS(args=dict(source=source), code="""
source.change.emit();
""")
slider.js_on_change('value', callback)
# Define the custom filter to return the indices from 0 to the desired percentage of total data rows. You could also compare against values in source.data
js_filter = CustomJSFilter(args=dict(slider=slider, source=source), code=f"""
desiredElementCount = slider.value * 200;
return [...Array(desiredElementCount).keys()];
""")
# Use the filter in a view
view = CDSView(source=source, filters=[js_filter])
plot.line('x', 'y', source=source, line_width=3, line_alpha=0.6, view=view)
layout = column(slider, plot)
show(layout)
I hope this helps anyone who stumbles upon this in the future! Tested in bokeh 1.0.2
A quick solution with minimal change to your code would be:
def slider_callback(attr, old, new):
N = new # this works also with slider.value but new is more explicit
new1 = ColumnDataSource(df.loc[(df.winner == 'Democratic') & (df.population >= N)])
new2 = ColumnDataSource(df.loc[(df.winner == 'Republican') & (df.population >= N)])
source1.data = new1.data
source2.data = new2.data
When updating data sources, you should replace the data, not the whole object. Here I still create new ColumnDataSource as shortcut. A more direct way (but more verbose too) would be to create the dictionary from the filtered df's columns:
new1 = {
'winner': filtered_df.winner.values,
'pct_d': filtered_df.pct_d.values,
...
}
new2 = {...}
source1.data = new1
source2.data = new2
Note that there's another solution which would make the callback local (not server based) by using a CDSView with a CustomJSFilter. You can also write the other callback with a CDSView as well make the plot completely server-independent.

Resources