python plotly choropleth dropdown + time slider - python-3.x

I am trying to plot a choropleth map with a drop-down menu to select multiple variables and also show each variable with a time slider to display data over time.
import plotly
import numpy as np
plotly.offline.init_notebook_mode()
# Reading sample data using pandas DataFrame
df = pd.read_csv('https://raw.githubusercontent.\
com/plotly/datasets/master/2011_us_ag_exports.csv')
data = [dict(type='choropleth',
locations = df['code'].astype(str),
z=df['total exports'].astype(float),
locationmode='USA-states')]
# let's create some more additional, data
for i in range(5):
data.append(data[0].copy())
data[-1]['z'] = data[0]['z'] * np.random.rand(*data[0]['z'].shape)
# let's now create slider for map
steps = []
for i in range(len(data)):
step = dict(method='restyle',
args=['visible', [False] * len(data)],
label='Year {}'.format(i + 1980))
step['args'][1][i] = True
steps.append(step)
slider = [dict(active=0,
pad={"t": 1},
steps=steps)]
layout = dict(geo=dict(scope='usa',
projection={'type': 'albers usa'}),
sliders=slider)
fig = dict(data=data,
layout=layout)
plotly.offline.iplot(fig)
This example is taken from: here
Any ideas how to add another column as data points?

Related

How do I put two scatter line charts on top of each other on streamlit?

I am making a stock prediction web app and I am trying to overlay the current data with the forecasted data I have produced using NeuralProphet. Here is my full code right now:
import streamlit as st
from datetime import date
import yfinance as yf
from neuralprophet import NeuralProphet
from plotly import graph_objs as go
START = "2015-01-01"
TODAY = date.today().strftime("%Y-%m-%d")
st.title("Prediction")
stocks = ("AAPL", "GOOG", "MSFT", "GME", "TSLA", "RIVN")
selected_stocks = st.selectbox("Select Dataset for Prediction", stocks)
n_years = st.slider("Years of Prediction:", 1, 10)
period = n_years * 365
#st.cache
def load_data(ticker):
data = yf.download(ticker, START, TODAY)
data.reset_index(inplace=True)
return data
data_load_state = st.text("Load data...")
data = load_data(selected_stocks)
data_load_state.text("Loading data...done!")
st.subheader('Raw Data')
st.write(data.tail())
def plot_raw_data():
global fig
fig = go.Figure()
fig.add_trace(go.Line(x=data['Date'], y=data['Open'], name='stock_open'))
fig.add_trace(go.Line(x=data['Date'], y=data['Close'], name='stock_close'))
fig.layout.update(title_text="Time Series Data", xaxis_rangeslider_visible=True)
st.plotly_chart(fig, use_container_width=True)
plot_raw_data()
# Forecasting
df_train = data[['Date', 'Close']]
df_train = df_train.rename(columns={"Date": "ds", "Close": "y"})
m = NeuralProphet()
metrics = m.fit(df_train)
future = m.make_future_dataframe(df=df_train, periods=period)
forecast = m.predict(df=future)
st.subheader('Forecast data')
st.write(forecast.tail())
st.write('Forecast Data')
fig1 = m.plot(forecast)
st.plotly_chart(fig1, use_container_width=True)
st.write("Forecast Components")
fig2 = m.plot_components(forecast)
st.write(fig2)
I can't seem to figure out what to do. I have tried implementing different st.plotly_chart() in attempt to overlay two charts, but no such luck... Help.
Add n_historic_predictions=True param in make_future_dataframe as in:
future = m.make_future_dataframe(df=df_train, periods=period, n_historic_predictions=True)
Output

Changing the values of a dict in lowercase ( values are code colors ) to be accepted as a color parametrer in plotly.graph.object

So, I'm trying to get the colors from the dictionary 'Disaster_type' to draw the markers in geoscatters depending of the type of disaster.
Basically, I want to reprensent in the graphic the natural diasasters with it's color code. eg; it's is a volcanic activity paint it 'orange'. I want to change the size of the marker as well depending of the magnitude of the disaster, but that's for another day.
here's the link of the dataset: https://www.kaggle.com/datasets/brsdincer/all-natural-disasters-19002021-eosdis
import plotly.graph_objects as go
import pandas as pd
import plotly as plt
df = pd.read_csv('1900_2021_DISASTERS - main.csv')
df.head()
df.tail()
disaster_set = {disaster for disaster in df['Disaster Type']}
disaster_type = {'Storm':'aliceblue',
'Volcanic activity':'orange',
'Flood':'royalblue',
'Mass movement (dry)':'darkorange',
'Landslide':'#C76114',
'Extreme temperature':'#FF0000',
'Animal accident':'gray55',
'Glacial lake outburst':'#7D9EC0',
'Earthquake':'#CD8C95',
'Insect infestation':'#EEE8AA',
'Wildfire':' #FFFF00',
'Fog':'#00E5EE',
'Drought':'#FFEFD5',
'Epidemic':'#00CD66 ',
'Impact':'#FF6347'}
# disaster_type_lower = {(k, v.lower()) for k, v in disaster_type.items()}
# print(disaster_type_lower)
# for values in disaster_type.values():
# disaster_type[values] = disaster_type.lowercase()
fig = go.Figure(data=go.Scattergeo(
lon = df['Longitude'],
lat = df['Latitude'],
text = df['Country'],
mode = 'markers',
marker_color = disaster_type_.values()
)
)
fig.show()
I cant figure how, I've left in comments after the dict how I tried to do that.
It changes them to lowercase, but know I dont know hot to get them...My brain is completly melted
it's a simple case of pandas map
found data that appears same as yours on kaggle so have used that
one type is unmapped Extreme temperature so used a fillna("red") to remove any errors
gray55 gave me an error so replaced it with RGB equivalent
import kaggle.cli
import sys
import pandas as pd
from zipfile import ZipFile
import urllib
import plotly.graph_objects as go
# fmt: off
# download data set
url = "https://www.kaggle.com/brsdincer/all-natural-disasters-19002021-eosdis"
sys.argv = [sys.argv[0]] + f"datasets download {urllib.parse.urlparse(url).path[1:]}".split(" ")
kaggle.cli.main()
zfile = ZipFile(f'{urllib.parse.urlparse(url).path.split("/")[-1]}.zip')
dfs = {f.filename: pd.read_csv(zfile.open(f)) for f in zfile.infolist()}
# fmt: on
df = dfs["DISASTERS/1970-2021_DISASTERS.xlsx - emdat data.csv"]
disaster_type = {
"Storm": "aliceblue",
"Volcanic activity": "orange",
"Flood": "royalblue",
"Mass movement (dry)": "darkorange",
"Landslide": "#C76114",
"Extreme temperature": "#FF0000",
"Animal accident": "#8c8c8c", # gray55
"Glacial lake outburst": "#7D9EC0",
"Earthquake": "#CD8C95",
"Insect infestation": "#EEE8AA",
"Wildfire": " #FFFF00",
"Fog": "#00E5EE",
"Drought": "#FFEFD5",
"Epidemic": "#00CD66 ",
"Impact": "#FF6347",
}
fig = go.Figure(
data=go.Scattergeo(
lon=df["Longitude"],
lat=df["Latitude"],
text=df["Country"],
mode="markers",
marker_color=df["Disaster Type"].map(disaster_type).fillna("red"),
)
)
fig.show()

Why is Bokeh's plot not changing with plot selection?

Struggling to understand why this bokeh visual will not allow me to change plots and see the predicted data. The plot and select (dropdown-looking) menu appears, but I'm not able to change the plot for items in the menu.
Running Bokeh 1.2.0 via Anaconda. The code has been run both inside & outside of Jupyter. No errors display when the code is run. I've looked through the handful of SO posts relating to this same issue, but I've not been able to apply the same solutions successfully.
I wasn't sure how to create a toy problem out of this, so in addition to the code sample below, the full code (including the regression code and corresponding data) can be found at my github here (code: Regression&Plotting.ipynb, data: pred_data.csv, historical_data.csv, features_created.pkd.)
import pandas as pd
import datetime
from bokeh.io import curdoc, output_notebook, output_file
from bokeh.layouts import row, column
from bokeh.models import Select, DataRange1d, ColumnDataSource
from bokeh.plotting import figure
#Must be run from the command line
def get_historical_data(src_hist, drug_id):
historical_data = src_hist.loc[src_hist['ndc'] == drug_id]
historical_data.drop(['Unnamed: 0', 'date'], inplace = True, axis = 1)#.dropna()
historical_data['date'] = pd.to_datetime(historical_data[['year', 'month', 'day']], infer_datetime_format=True)
historical_data = historical_data.set_index(['date'])
historical_data.sort_index(inplace = True)
# csd_historical = ColumnDataSource(historical_data)
return historical_data
def get_prediction_data(src_test, drug_id):
#Assign the new date
#Write a new dataframe with values for the new dates
df_pred = src_test.loc[src_test['ndc'] == drug_id].copy()
df_pred.loc[:, 'year'] = input_date.year
df_pred.loc[:, 'month'] = input_date.month
df_pred.loc[:, 'day'] = input_date.day
df_pred.drop(['Unnamed: 0', 'date'], inplace = True, axis = 1)
prediction = lin_model.predict(df_pred)
prediction_data = pd.DataFrame({'drug_id': prediction[0][0], 'predictions': prediction[0][1], 'date': pd.to_datetime(df_pred[['year', 'month', 'day']], infer_datetime_format=True, errors = 'coerce')})
prediction_data = prediction_data.set_index(['date'])
prediction_data.sort_index(inplace = True)
# csd_prediction = ColumnDataSource(prediction_data)
return prediction_data
def make_plot(historical_data, prediction_data, title):
#Historical Data
plot = figure(plot_width=800, plot_height = 800, x_axis_type = 'datetime',
toolbar_location = 'below')
plot.xaxis.axis_label = 'Time'
plot.yaxis.axis_label = 'Price ($)'
plot.axis.axis_label_text_font_style = 'bold'
plot.x_range = DataRange1d(range_padding = 0.0)
plot.grid.grid_line_alpha = 0.3
plot.title.text = title
plot.line(x = 'date', y='nadac_per_unit', source = historical_data, line_color = 'blue', ) #plot historical data
plot.line(x = 'date', y='predictions', source = prediction_data, line_color = 'red') #plot prediction data (line from last date/price point to date, price point for input_date above)
return plot
def update_plot(attrname, old, new):
ver = vselect.value
new_hist_source = get_historical_data(src_hist, ver) #calls the function above to get the data instead of handling it here on its own
historical_data.data = ColumnDataSource.from_df(new_hist_source)
# new_pred_source = get_prediction_data(src_pred, ver)
# prediction_data.data = new_pred_source.data
#Import data source
src_hist = pd.read_csv('data/historical_data.csv')
src_pred = pd.read_csv('data/pred_data.csv')
#Prep for default view
#Initialize plot with ID number
ver = 781593600
#Set the prediction date
input_date = datetime.datetime(2020, 3, 31) #Make this selectable in future
#Select-menu options
menu_options = src_pred['ndc'].astype(str) #already contains unique values
#Create select (dropdown) menu
vselect = Select(value=str(ver), title='Drug ID', options=sorted((menu_options)))
#Prep datasets for plotting
historical_data = get_historical_data(src_hist, ver)
prediction_data = get_prediction_data(src_pred, ver)
#Create a new plot with the source data
plot = make_plot(historical_data, prediction_data, "Drug Prices")
#Update the plot every time 'vselect' is changed'
vselect.on_change('value', update_plot)
controls = row(vselect)
curdoc().add_root(row(plot, controls))
UPDATED: ERRORS:
1) No errors show up in Jupyter Notebook.
2) CLI shows a UserWarning: Pandas doesn't allow columns to be careated via a new attribute name, referencing `historical_data.data = ColumnDatasource.from_df(new_hist_source).
Ultimately, the plot should have a line for historical data, and another line or dot for predicted data derived from sklearn. It also has a dropdown menu to select each item to plot (one at a time).
Your update_plot is a no-op that does not actually make any changes to Bokeh model state, which is what is necessary to change a Bokeh plot. Changing Bokeh model state means assigning a new value to a property on a Bokeh object. Typically, to update a plot, you would compute a new data dict and then set an existing CDS from it:
source.data = new_data # plain python dict
Or, if you want to update from a DataFame:
source.data = ColumnDataSource.from_df(new_df)
As an aside, don't assign the .data from one CDS to another:
source.data = other_source.data # BAD
By contrast, your update_plot computes some new data and then throws it away. Note there is never any purpose to returning anything at all from any Bokeh callback. The callbacks are called by Bokeh library code, which does not expect or use any return values.
Lastly, I don't think any of those last JS console errors were generated by BokehJS.

Plotly iplot() doesnt run within a function

I am trying to use iplot() within a function within Jupyter so that i can use a filter on the graph and have it change dynamically. The code works in a cell on its own like this
# Code for put by ticker
data = []
opPriceDic = priceToArray(getPuts(getOptionPricesByTicker('ABBV')))
for key, values in opPriceDic.items():
trace = go.Scatter(
x = numberOfDays,
y = values,
name = 'option',
line = dict(
width = 4)
)
data.append(trace)
# Edit the layout
layout = dict(title = 'Call prices for ' ,
xaxis = dict(title = 'Days to Expiration'),
yaxis = dict(title = 'Price '),
)
fig = dict(data=data, layout=layout)
py.iplot(fig, filename='calls For ')
But once this is placed within a function the graph fails to load
def graph(ticker):
# Code for put by ticker
data = []
opPriceDic = priceToArray(getPuts(getOptionPricesByTicker(ticker)))
for key, values in opPriceDic.items():
trace = go.Scatter(
x = numberOfDays,
y = values,
name = 'option',
line = dict(
width = 4)
)
data.append(trace)
# Edit the layout
layout = dict(title = 'Call prices for ' ,
xaxis = dict(title = 'Days to Expiration'),
yaxis = dict(title = 'Price '),
)
fig = dict(data=data, layout=layout)
py.iplot(fig, filename='calls For ')
But if I change the iplot() to plot() it calls the plotly API and opens a new tab with the graph displaying.
I am just wondering if anyone has noticed this before and may have come across a solution?
(if I am in the wrong area I will remove the post)
I have tried to use pandas data.reader calls to pull ticker data between a start and end date. The data.reader seems to work from within the function. In the question code, if the opPriceDic dictionary could be converted to a dataframe, then iplot() could plot it without use of layout and fig as below:
# Import libraries
import datetime
from datetime import date
import pandas as pd
import numpy as np
from plotly import __version__
%matplotlib inline
import cufflinks as cf
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
init_notebook_mode(connected=True)
cf.go_offline()
# Create function that uses data.reader and iplot()
def graph(ticker):
# create sample data set
start = datetime.datetime(2006, 1, 1)
end = datetime.datetime(2016, 1, 1)
df = data.DataReader(ticker, 'morningstar', start, end)
df = df.reset_index()
df['numberOfDays'] = df.apply(lambda x: abs((datetime.datetime.now() - x['Date']).days), axis=1)
# call iplot within the function graph()
df.iplot(kind='line', x='numberOfDays', y='Close', xTitle='Days', yTitle='Value', title='Prices', width=4)

In Bokeh, how do I add tooltips to a Timeseries chart (hover tool)?

Is it possible to add Tooltips to a Timeseries chart?
In the simplified code example below, I want to see a single column name ('a','b' or 'c') when the mouse hovers over the relevant line.
Instead, a "???" is displayed and ALL three lines get a tool tip (rather than just the one im hovering over)
Per the documentation (
http://docs.bokeh.org/en/latest/docs/user_guide/tools.html#hovertool), field names starting with “#” are interpreted as columns on the data source.
How can I display the 'columns' from a pandas DataFrame in the tooltip?
Or, if the high level TimeSeries interface doesn't support this, any clues for using the lower level interfaces to do the same thing? (line? multi_line?) or convert the DataFrame into a different format (ColumnDataSource?)
For bonus credit, how should the "$x" be formatted to display the date as a date?
thanks in advance
import pandas as pd
import numpy as np
from bokeh.charts import TimeSeries
from bokeh.models import HoverTool
from bokeh.plotting import show
toy_df = pd.DataFrame(data=np.random.rand(5,3), columns = ('a', 'b' ,'c'), index = pd.DatetimeIndex(start='01-01-2015',periods=5, freq='d'))
p = TimeSeries(toy_df, tools='hover')
hover = p.select(dict(type=HoverTool))
hover.tooltips = [
("Series", "#columns"),
("Date", "$x"),
("Value", "$y"),
]
show(p)
Below is what I came up with.
Its not pretty but it works.
Im still new to Bokeh (& Python for that matter) so if anyone wants to suggest a better way to do this, please feel free.
import pandas as pd
import numpy as np
from bokeh.charts import TimeSeries
from bokeh.models import HoverTool
from bokeh.plotting import show
toy_df = pd.DataFrame(data=np.random.rand(5,3), columns = ('a', 'b' ,'c'), index = pd.DatetimeIndex(start='01-01-2015',periods=5, freq='d'))
_tools_to_show = 'box_zoom,pan,save,hover,resize,reset,tap,wheel_zoom'
p = figure(width=1200, height=900, x_axis_type="datetime", tools=_tools_to_show)
# FIRST plot ALL lines (This is a hack to get it working, why can't i pass in a dataframe to multi_line?)
# It's not pretty but it works.
# what I want to do!: p.multi_line(df)
ts_list_of_list = []
for i in range(0,len(toy_df.columns)):
ts_list_of_list.append(toy_df.index.T)
vals_list_of_list = toy_df.values.T.tolist()
# Define colors because otherwise multi_line will use blue for all lines...
cols_to_use = ['Black', 'Red', 'Lime']
p.multi_line(ts_list_of_list, vals_list_of_list, line_color=cols_to_use)
# THEN put scatter one at a time on top of each one to get tool tips (HACK! lines with tooltips not yet supported by Bokeh?)
for (name, series) in toy_df.iteritems():
# need to repmat the name to be same dimension as index
name_for_display = np.tile(name, [len(toy_df.index),1])
source = ColumnDataSource({'x': toy_df.index, 'y': series.values, 'series_name': name_for_display, 'Date': toy_df.index.format()})
# trouble formating x as datestring, so pre-formating and using an extra column. It's not pretty but it works.
p.scatter('x', 'y', source = source, fill_alpha=0, line_alpha=0.3, line_color="grey")
hover = p.select(dict(type=HoverTool))
hover.tooltips = [("Series", "#series_name"), ("Date", "#Date"), ("Value", "#y{0.00%}"),]
hover.mode = 'mouse'
show(p)
I’m not familiar with Pandas,I just use python list to show the very example of how to add tooltips to muti_lines, show series names ,and properly display date/time。Below is the result.
Thanks to #bs123's answer and #tterry's answer in Bokeh Plotting: Enable tooltips for only some glyphs
my result
# -*- coding: utf-8 -*-
from bokeh.plotting import figure, output_file, show, ColumnDataSource
from bokeh.models import HoverTool
from datetime import datetime
dateX_str = ['2016-11-14','2016-11-15','2016-11-16']
#conver the string of datetime to python datetime object
dateX = [datetime.strptime(i, "%Y-%m-%d") for i in dateX_str]
v1= [10,13,5]
v2 = [8,4,14]
v3= [14,9,6]
v = [v1,v2,v3]
names = ['v1','v2','v3']
colors = ['red','blue','yellow']
output_file('example.html',title = 'example of add tooltips to multi_timeseries')
tools_to_show = 'hover,box_zoom,pan,save,resize,reset,wheel_zoom'
p = figure(x_axis_type="datetime", tools=tools_to_show)
#to show the tooltip for multi_lines,you need use the ColumnDataSource which define the data source of glyph
#the key is to use the same column name for each data source of the glyph
#so you don't have to add tooltip for each glyph,the tooltip is added to the figure
#plot each timeseries line glyph
for i in xrange(3):
# bokeh can't show datetime object in tooltip properly,so we use string instead
source = ColumnDataSource(data={
'dateX': dateX, # python datetime object as X axis
'v': v[i],
'dateX_str': dateX_str, #string of datetime for display in tooltip
'name': [names[i] for n in xrange(3)]
})
p.line('dateX', 'v',source=source,legend=names[i],color = colors[i])
circle = p.circle('dateX', 'v',source=source, fill_color="white", size=8, legend=names[i],color = colors[i])
#to avoid some strange behavior(as shown in the picture at the end), only add the circle glyph to the renders of hover tool
#so tooltip only takes effect on circle glyph
p.tools[0].renderers.append(circle)
# show the tooltip
hover = p.select(dict(type=HoverTool))
hover.tooltips = [("value", "#v"), ("name", "#name"), ("date", "#dateX_str")]
hover.mode = 'mouse'
show(p)
tooltips with some strange behavior,two tips displayed at the same time
Here is my solution. I inspected the glyph render data source to see what are the names on it. Then I use those names on the hoover tooltips. You can see the resulting plot here.
import numpy as np
from bokeh.charts import TimeSeries
from bokeh.models import HoverTool
from bokeh.plotting import show
toy_df = pd.DataFrame(data=np.random.rand(5,3), columns = ('a', 'b' ,'c'), index = pd.DatetimeIndex(start='01-01-2015',periods=5, freq='d'))
#Bockeh display dates as numbers so convert to string tu show correctly
toy_df.index = toy_df.index.astype(str)
p = TimeSeries(toy_df, tools='hover')
#Next 3 lines are to inspect how are names on gliph to call them with #name on hover
#glyph_renderers = p.select(dict(type=GlyphRenderer))
#bar_source = glyph_renderers[0].data_source
#print(bar_source.data) #Here we can inspect names to call on hover
hover = p.select(dict(type=HoverTool))
hover.tooltips = [
("Series", "#series"),
("Date", "#x_values"),
("Value", "#y_values"),
]
show(p)
The original poster's code doesn't work with the latest pandas (DatetimeIndex constructor has changed), but Hovertool now supports a formatters attribute that lets you specify a format as a strftime string. Something like
fig.add_tool(HoverTool(
tooltip=[
('time', '#index{%Y-%m-%d}')
],
formatters={
'#index': 'datetime'
}
))

Resources