How do I put two scatter line charts on top of each other on streamlit? - python-3.x

I am making a stock prediction web app and I am trying to overlay the current data with the forecasted data I have produced using NeuralProphet. Here is my full code right now:
import streamlit as st
from datetime import date
import yfinance as yf
from neuralprophet import NeuralProphet
from plotly import graph_objs as go
START = "2015-01-01"
TODAY = date.today().strftime("%Y-%m-%d")
st.title("Prediction")
stocks = ("AAPL", "GOOG", "MSFT", "GME", "TSLA", "RIVN")
selected_stocks = st.selectbox("Select Dataset for Prediction", stocks)
n_years = st.slider("Years of Prediction:", 1, 10)
period = n_years * 365
#st.cache
def load_data(ticker):
data = yf.download(ticker, START, TODAY)
data.reset_index(inplace=True)
return data
data_load_state = st.text("Load data...")
data = load_data(selected_stocks)
data_load_state.text("Loading data...done!")
st.subheader('Raw Data')
st.write(data.tail())
def plot_raw_data():
global fig
fig = go.Figure()
fig.add_trace(go.Line(x=data['Date'], y=data['Open'], name='stock_open'))
fig.add_trace(go.Line(x=data['Date'], y=data['Close'], name='stock_close'))
fig.layout.update(title_text="Time Series Data", xaxis_rangeslider_visible=True)
st.plotly_chart(fig, use_container_width=True)
plot_raw_data()
# Forecasting
df_train = data[['Date', 'Close']]
df_train = df_train.rename(columns={"Date": "ds", "Close": "y"})
m = NeuralProphet()
metrics = m.fit(df_train)
future = m.make_future_dataframe(df=df_train, periods=period)
forecast = m.predict(df=future)
st.subheader('Forecast data')
st.write(forecast.tail())
st.write('Forecast Data')
fig1 = m.plot(forecast)
st.plotly_chart(fig1, use_container_width=True)
st.write("Forecast Components")
fig2 = m.plot_components(forecast)
st.write(fig2)
I can't seem to figure out what to do. I have tried implementing different st.plotly_chart() in attempt to overlay two charts, but no such luck... Help.

Add n_historic_predictions=True param in make_future_dataframe as in:
future = m.make_future_dataframe(df=df_train, periods=period, n_historic_predictions=True)
Output

Related

Locate an id in Dataframe using constraint on columns percentile

I am trying to do a Weighted Aged Historical Var based on the below Dataframe. I would like to identify the ID in my dataframe corresponding to the 5% quantile of the 'Weight_Age_Cumul' column (like in the below example i found on internet)
enter image description here
I ve tryied the following line of code but i get the following error message : 'DataFrame' object has no attribute 'idmax'
cac_df_sorted[cac_df_sorted.Weight_Age_Cumul]<=0.05].CAC_Log_returns.idmax()
enter image description here
If you can help me on that it you be great, thank you
full code below if needed :
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.mlab as mlab
from tabulate import tabulate
from scipy.stats import norm
import yfinance as yf
from yahoofinancials import YahooFinancials
import sys
cac_df = yf.download('^FCHI',
start='2020-04-01',
end='2022-05-31',
progress=False,
)
cac_df.head()
cac_df = cac_df.drop(columns=['Open','High','Low','Close','Volume'])
#convertion into retuns
cac_df['Adj Close_-1'] = cac_df['Adj Close'].shift(1)
cac_df['CAC_Log_returns'] = np.log(cac_df['Adj Close']/cac_df['Adj Close_-1'])
cac_df.index = pd.to_datetime(cac_df.index, format = '%Y-%m-%d').strftime('%Y-%m-%d')
#plot CAC returns graph & histogram
cac_df['CAC_Log_returns'].plot(kind='line',figsize=(15,7))
plt.show()
cac_df['CAC_Log_returns'].hist(bins=40,normed=True,histtype='stepfilled',alpha=0.5)
plt.xlabel('Returns')
plt.ylabel('Frequency')
plt.grid(True)
plt.show()
#Historical Var Constant weight & Age Weighted & Vol Weighted
cac_df_sorted = cac_df.copy()
cac_df_sorted.sort_values(by=['Date'],inplace=True,ascending = False)
#Weight for Var Age weighted
lamb = 0.98
n = len(cac_df_sorted['CAC_Log_returns'])
weight_age= []
weight_age = [(lamb**(i-1) * (1-lamb))/(1-lamb**n)for i in range(1, n+1)]
#design of the dataframe
cac_df_sorted['Weight_Age'] = weight_age
cac_df_sorted.sort_values(by=['CAC_Log_returns'],inplace=True,ascending = True)
cac_df_sorted['Weight_Age_Cumul'] = np.cumsum(weight_age)
#Historical Var Constant weight
Var_95_1d_CW = -cac_df_sorted['CAC_Log_returns'].quantile(0.05)
Var_99_1d_CW = -cac_df_sorted['CAC_Log_returns'].quantile(0.01)
#from Var1d to Var10d
mean = np.mean(cac_df['CAC_Log_returns'])
Var_95_10d_CW =(np.sqrt(10)*Var_95_1d_CW)+(mean *(np.sqrt(10)-10))
Var_99_10d_CW = (np.sqrt(10)*Var_99_1d_CW) +(mean *(np.sqrt(10)-10))
print(tabulate([['95%',Var_95_1d_CW,Var_95_10d_CW],['99%',Var_99_1d_CW,Var_99_10d_CW]], headers= ['Confidence Level', 'Value at Risk 1 day Constant Weight','Value at Risk 10 days Constant Weight']))
print(cac_df_sorted)
# Historical Var Age weighted
#Find where cumulative (percentile) hits 0.05 and 0.01
cac_df_sorted[cac_df_sorted['Weight_Age_Cumul']<=0.05].CAC_Log_returns.idmax()

python plotly choropleth dropdown + time slider

I am trying to plot a choropleth map with a drop-down menu to select multiple variables and also show each variable with a time slider to display data over time.
import plotly
import numpy as np
plotly.offline.init_notebook_mode()
# Reading sample data using pandas DataFrame
df = pd.read_csv('https://raw.githubusercontent.\
com/plotly/datasets/master/2011_us_ag_exports.csv')
data = [dict(type='choropleth',
locations = df['code'].astype(str),
z=df['total exports'].astype(float),
locationmode='USA-states')]
# let's create some more additional, data
for i in range(5):
data.append(data[0].copy())
data[-1]['z'] = data[0]['z'] * np.random.rand(*data[0]['z'].shape)
# let's now create slider for map
steps = []
for i in range(len(data)):
step = dict(method='restyle',
args=['visible', [False] * len(data)],
label='Year {}'.format(i + 1980))
step['args'][1][i] = True
steps.append(step)
slider = [dict(active=0,
pad={"t": 1},
steps=steps)]
layout = dict(geo=dict(scope='usa',
projection={'type': 'albers usa'}),
sliders=slider)
fig = dict(data=data,
layout=layout)
plotly.offline.iplot(fig)
This example is taken from: here
Any ideas how to add another column as data points?

Why is Bokeh's plot not changing with plot selection?

Struggling to understand why this bokeh visual will not allow me to change plots and see the predicted data. The plot and select (dropdown-looking) menu appears, but I'm not able to change the plot for items in the menu.
Running Bokeh 1.2.0 via Anaconda. The code has been run both inside & outside of Jupyter. No errors display when the code is run. I've looked through the handful of SO posts relating to this same issue, but I've not been able to apply the same solutions successfully.
I wasn't sure how to create a toy problem out of this, so in addition to the code sample below, the full code (including the regression code and corresponding data) can be found at my github here (code: Regression&Plotting.ipynb, data: pred_data.csv, historical_data.csv, features_created.pkd.)
import pandas as pd
import datetime
from bokeh.io import curdoc, output_notebook, output_file
from bokeh.layouts import row, column
from bokeh.models import Select, DataRange1d, ColumnDataSource
from bokeh.plotting import figure
#Must be run from the command line
def get_historical_data(src_hist, drug_id):
historical_data = src_hist.loc[src_hist['ndc'] == drug_id]
historical_data.drop(['Unnamed: 0', 'date'], inplace = True, axis = 1)#.dropna()
historical_data['date'] = pd.to_datetime(historical_data[['year', 'month', 'day']], infer_datetime_format=True)
historical_data = historical_data.set_index(['date'])
historical_data.sort_index(inplace = True)
# csd_historical = ColumnDataSource(historical_data)
return historical_data
def get_prediction_data(src_test, drug_id):
#Assign the new date
#Write a new dataframe with values for the new dates
df_pred = src_test.loc[src_test['ndc'] == drug_id].copy()
df_pred.loc[:, 'year'] = input_date.year
df_pred.loc[:, 'month'] = input_date.month
df_pred.loc[:, 'day'] = input_date.day
df_pred.drop(['Unnamed: 0', 'date'], inplace = True, axis = 1)
prediction = lin_model.predict(df_pred)
prediction_data = pd.DataFrame({'drug_id': prediction[0][0], 'predictions': prediction[0][1], 'date': pd.to_datetime(df_pred[['year', 'month', 'day']], infer_datetime_format=True, errors = 'coerce')})
prediction_data = prediction_data.set_index(['date'])
prediction_data.sort_index(inplace = True)
# csd_prediction = ColumnDataSource(prediction_data)
return prediction_data
def make_plot(historical_data, prediction_data, title):
#Historical Data
plot = figure(plot_width=800, plot_height = 800, x_axis_type = 'datetime',
toolbar_location = 'below')
plot.xaxis.axis_label = 'Time'
plot.yaxis.axis_label = 'Price ($)'
plot.axis.axis_label_text_font_style = 'bold'
plot.x_range = DataRange1d(range_padding = 0.0)
plot.grid.grid_line_alpha = 0.3
plot.title.text = title
plot.line(x = 'date', y='nadac_per_unit', source = historical_data, line_color = 'blue', ) #plot historical data
plot.line(x = 'date', y='predictions', source = prediction_data, line_color = 'red') #plot prediction data (line from last date/price point to date, price point for input_date above)
return plot
def update_plot(attrname, old, new):
ver = vselect.value
new_hist_source = get_historical_data(src_hist, ver) #calls the function above to get the data instead of handling it here on its own
historical_data.data = ColumnDataSource.from_df(new_hist_source)
# new_pred_source = get_prediction_data(src_pred, ver)
# prediction_data.data = new_pred_source.data
#Import data source
src_hist = pd.read_csv('data/historical_data.csv')
src_pred = pd.read_csv('data/pred_data.csv')
#Prep for default view
#Initialize plot with ID number
ver = 781593600
#Set the prediction date
input_date = datetime.datetime(2020, 3, 31) #Make this selectable in future
#Select-menu options
menu_options = src_pred['ndc'].astype(str) #already contains unique values
#Create select (dropdown) menu
vselect = Select(value=str(ver), title='Drug ID', options=sorted((menu_options)))
#Prep datasets for plotting
historical_data = get_historical_data(src_hist, ver)
prediction_data = get_prediction_data(src_pred, ver)
#Create a new plot with the source data
plot = make_plot(historical_data, prediction_data, "Drug Prices")
#Update the plot every time 'vselect' is changed'
vselect.on_change('value', update_plot)
controls = row(vselect)
curdoc().add_root(row(plot, controls))
UPDATED: ERRORS:
1) No errors show up in Jupyter Notebook.
2) CLI shows a UserWarning: Pandas doesn't allow columns to be careated via a new attribute name, referencing `historical_data.data = ColumnDatasource.from_df(new_hist_source).
Ultimately, the plot should have a line for historical data, and another line or dot for predicted data derived from sklearn. It also has a dropdown menu to select each item to plot (one at a time).
Your update_plot is a no-op that does not actually make any changes to Bokeh model state, which is what is necessary to change a Bokeh plot. Changing Bokeh model state means assigning a new value to a property on a Bokeh object. Typically, to update a plot, you would compute a new data dict and then set an existing CDS from it:
source.data = new_data # plain python dict
Or, if you want to update from a DataFame:
source.data = ColumnDataSource.from_df(new_df)
As an aside, don't assign the .data from one CDS to another:
source.data = other_source.data # BAD
By contrast, your update_plot computes some new data and then throws it away. Note there is never any purpose to returning anything at all from any Bokeh callback. The callbacks are called by Bokeh library code, which does not expect or use any return values.
Lastly, I don't think any of those last JS console errors were generated by BokehJS.

How do I create a Bokeh Select menu for a line plot for an indeterminate number of options?

I've been working on getting a select menu and Bokeh plot up and running on a dataset I'm working with. The dataset can be found here. I have no experience with JavaScript, but I believe my select menu isn't connected/-ing to my plot. Therefore, I have a plot outline, but no data displayed. As I run the script from the console with bokeh serve --show test.py, I get the first 7 notifications in my JS console. The last three (those in the red bracket in the screenshot) occur when I try and change to a different item in my select menu.
Goal: Display the plot of data for rows those id number ('ndc' in this example) is selected in the Select menu.
Here's my code (modified from this post) that I used to get started. This one was also used, as were a handful of others, and the Bokeh documentation itself.
import pandas as pd
from bokeh.io import curdoc, output_notebook, output_file
from bokeh.layouts import row, column
from bokeh.models import Select, DataRange1d, ColumnDataSource
from bokeh.plotting import figure
# output_notebook()
output_file('test.html')
def get_dataset(src, drug_id):
src.drop('Unnamed: 0', axis = 1, inplace = True)
df = src[src.ndc == drug_id].copy()
df['date'] = pd.to_datetime(df['date'])
df = df.set_index(['date'])
df.sort_index(inplace=True)
source = ColumnDataSource(data=df)
return source
def make_plot(source, title):
plot = figure(plot_width=800, plot_height = 800, tools="", x_axis_type = 'datetime', toolbar_location=None)
plot.xaxis.axis_label = 'Time'
plot.yaxis.axis_label = 'Price ($)'
plot.axis.axis_label_text_font_style = 'bold'
plot.x_range = DataRange1d(range_padding = 0.0)
plot.grid.grid_line_alpha = 0.3
plot.title.text = title
plot.line(x= 'date', y='nadac_per_unit', source=source)
return plot
def update_plot(attrname, old, new):
ver = vselect.value
plot.title.text = "Drug Prices"
src = get_dataset(df, ver)
source.date.update(src.date)
df = pd.read_csv('data/plotting_data.csv')
ver = '54034808' #Initial id number
cc = df['ndc'].astype(str).unique() #select-menu options
vselect = Select(value=ver, title='Drug ID', options=sorted((cc)))
source = get_dataset(df, ver)
plot = make_plot(source, "Drug Prices")
vselect.on_change('value', update_plot)
controls = row(vselect)
curdoc().add_root(row(plot, controls))
There were some problems in your code:
You want to drop the Unnamed: 0 column. This can only be done once and when you try this again it will throw an error since this column does not exist anymore.
The way you tried to filter the dataframe didn't work and would result in an empty dataframe. You can select rows based on a column value like this: df.loc[df['column_name'] == some_value]
Updating the ColumnDataSource object can be done by replacing source.data with the new data.
import pandas as pd
from bokeh.io import curdoc, output_notebook, output_file
from bokeh.layouts import row, column
from bokeh.models import Select, DataRange1d, ColumnDataSource
from bokeh.plotting import figure
output_notebook()
output_file('test.html')
def get_dataset(src, drug_id):
src.drop('Unnamed: 0', axis = 1, inplace = True)
df = src.loc[src['ndc'] == int(drug_id)]
df['date'] = pd.to_datetime(df['date'])
df = df.set_index(['date'])
df.sort_index(inplace=True)
source = ColumnDataSource(data=df)
return source
def make_plot(source, title):
plot = figure(plot_width=800, plot_height = 800, tools="", x_axis_type = 'datetime', toolbar_location=None)
plot.xaxis.axis_label = 'Time'
plot.yaxis.axis_label = 'Price ($)'
plot.axis.axis_label_text_font_style = 'bold'
plot.x_range = DataRange1d(range_padding = 0.0)
plot.grid.grid_line_alpha = 0.3
plot.title.text = title
plot.line(x= 'date', y='nadac_per_unit', source=source)
return plot
def update_plot(attrname, old, new):
ver = vselect.value
df1 = df.loc[df['ndc'] == int(new)]
df1['date'] = pd.to_datetime(df1['date'])
df1 = df1.set_index(['date'])
df1.sort_index(inplace=True)
newSource = ColumnDataSource(df1)
source.data = newSource.data
df = pd.read_csv('data/plotting_data.csv')
ver = '54034808' #Initial id number
cc = df['ndc'].astype(str).unique() #select-menu options
vselect = Select(value=ver, title='Drug ID', options=sorted((cc)))
source = get_dataset(df, ver)
plot = make_plot(source, "Drug Prices")
vselect.on_change('value', update_plot)
controls = row(vselect)
curdoc().add_root(row(plot, controls))

Plotly iplot() doesnt run within a function

I am trying to use iplot() within a function within Jupyter so that i can use a filter on the graph and have it change dynamically. The code works in a cell on its own like this
# Code for put by ticker
data = []
opPriceDic = priceToArray(getPuts(getOptionPricesByTicker('ABBV')))
for key, values in opPriceDic.items():
trace = go.Scatter(
x = numberOfDays,
y = values,
name = 'option',
line = dict(
width = 4)
)
data.append(trace)
# Edit the layout
layout = dict(title = 'Call prices for ' ,
xaxis = dict(title = 'Days to Expiration'),
yaxis = dict(title = 'Price '),
)
fig = dict(data=data, layout=layout)
py.iplot(fig, filename='calls For ')
But once this is placed within a function the graph fails to load
def graph(ticker):
# Code for put by ticker
data = []
opPriceDic = priceToArray(getPuts(getOptionPricesByTicker(ticker)))
for key, values in opPriceDic.items():
trace = go.Scatter(
x = numberOfDays,
y = values,
name = 'option',
line = dict(
width = 4)
)
data.append(trace)
# Edit the layout
layout = dict(title = 'Call prices for ' ,
xaxis = dict(title = 'Days to Expiration'),
yaxis = dict(title = 'Price '),
)
fig = dict(data=data, layout=layout)
py.iplot(fig, filename='calls For ')
But if I change the iplot() to plot() it calls the plotly API and opens a new tab with the graph displaying.
I am just wondering if anyone has noticed this before and may have come across a solution?
(if I am in the wrong area I will remove the post)
I have tried to use pandas data.reader calls to pull ticker data between a start and end date. The data.reader seems to work from within the function. In the question code, if the opPriceDic dictionary could be converted to a dataframe, then iplot() could plot it without use of layout and fig as below:
# Import libraries
import datetime
from datetime import date
import pandas as pd
import numpy as np
from plotly import __version__
%matplotlib inline
import cufflinks as cf
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
init_notebook_mode(connected=True)
cf.go_offline()
# Create function that uses data.reader and iplot()
def graph(ticker):
# create sample data set
start = datetime.datetime(2006, 1, 1)
end = datetime.datetime(2016, 1, 1)
df = data.DataReader(ticker, 'morningstar', start, end)
df = df.reset_index()
df['numberOfDays'] = df.apply(lambda x: abs((datetime.datetime.now() - x['Date']).days), axis=1)
# call iplot within the function graph()
df.iplot(kind='line', x='numberOfDays', y='Close', xTitle='Days', yTitle='Value', title='Prices', width=4)

Resources