How to get a fixed axis range in plotly when using restyle - python-3.x

I want to have an interactive line plot in plotly which would have two buttons and from which you can select the data you want to plot. I managed to achieve this however when the plot is being updated the y axis range changes with each update which is something I do not want. I checked several topics here which suggest to set autorange to False but that does not seem to work. The code I have at the moment is as follows (which does not work):
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import datetime
np.random.seed(123)
df = pd.DataFrame(np.random.randint(-10, 12, size=(200, 4)),
columns=list('ABCD'))
datelist = pd.date_range(datetime.datetime(2020, 1, 1).strftime('%Y-%m-%d'),
periods=200).tolist()
df2 = df.copy()
df['dates'] = datelist
df = df.set_index(['dates'])
df.index = pd.to_datetime(df.index)
df.iloc[0] = 0
df = df.cumsum()
df2['dates'] = datelist
df2 = df2.set_index(['dates'])
df2.index = pd.to_datetime(df2.index)
df2.iloc[0] = 0
df2 = df2.cumsum()
# # plotly
fig = go.Figure()
# set up ONE trace
fig.add_trace(go.Scatter(x=df.index,
y=df[df.columns[0]],
visible=True,
)
)
buttons1 = []
buttons2 = []
fig.add_trace(go.Scatter(x=df2.index,
y=df2[df.columns[0]],
visible=True)
)
# button with one option for each dataframe
for col in df.columns:
buttons1.append(dict(method='restyle',
label=col,
visible=True,
args=[{'y':[df[col]],
'x':[df.index],
'type':'scatter'}, [0]],
)
)
# button with one option for each dataframe
for col in df2.columns:
buttons2.append(dict(method='restyle',
label=col,
visible=True,
args=[{'y':[df2[col]],
'x':[df2.index],
'type':'scatter'}, [0]],
)
)
# some adjustments to the updatemenus
updatemenus = []
updatemenus.append(dict())
updatemenus[0]['buttons'] = buttons1
updatemenus[0]['direction'] = 'down'
updatemenus[0]['showactive'] = True
updatemenus[0]['y'] = 1.12
updatemenus.append(dict())
updatemenus[1]['buttons'] = buttons2
updatemenus[1]['direction'] = 'down'
updatemenus[1]['showactive'] = True
updatemenus[1]['y'] = 1
# add dropdown menus to the figure
fig.update_layout(showlegend=False, updatemenus=updatemenus)
fig.update_yaxes(dict(range=[-400, 400],
autorange=False))
fig.show()
fig.write_html("./plot.html")

This behaves as you require if you do not 'type':'scatter' in the button definition. Also I used actual ranges from DF to set y-axis.
# button with one option for each dataframe
for col in df.columns:
buttons1.append(dict(method='restyle',
label=col,
visible=True,
args=[{'y':[df[col]],
'x':[df.index],
# 'type':'scatter'
}, [0]],
)
)
# button with one option for each dataframe
for col in df2.columns:
buttons2.append(dict(method='restyle',
label=col,
visible=True,
args=[{'y':[df2[col]],
'x':[df2.index],
# 'type':'scatter'
}, [0]],
)
)
fig.update_yaxes(dict(range=[min(df.min().min(), df2.max().max()), max(df.max().max(), df2.max().max())],
autorange=False))

Related

Get id each row in dataframe

I have done this code. according to my code, I have done(x1-x).The 'outlier_reference_x' is the difference between (x1-x). Now How can I know which x1 and x were?,I want to know which x and x1 were that subtracted of them is less than (3* sd)?
Actully I want to what iloc, 'outlier_reference_x' referes in df_reference and df_test? and then I want to delete these row.
sorry for my language.
import pandas as pd
df_reference = pd.read_csv(
"reference.txt",
delim_whitespace=True, # any whitespace separates data
names=["x", "y"], # column names
index_col=False # no index
)
df_test = pd.read_csv(
"test.txt",
delim_whitespace=True, # any whitespace separates data
names=["x1", "y1"], # column names
index_col=False # no index
)
frames = [df_reference ,df_test]
df = pd.concat(frames, axis=1)
df.to_csv('dataset.txt', sep='\t', header=True)
df_ = df[['x','x1']].copy()
df_['x1-x']=df_['x1']-df_['x']
set_mean_X = df_.loc[:, 'x1-x'].mean()
set_std_X = df_.loc[:, 'x1-x'].std()
outlier_reference_x = [x for x in df_['x1-x'] if ( x > 3 * set_std_X)]

I am trying to create a population pyramid graph using Dash with Plotly

i have a directory containing three files, years.csv, 2014.csv and 2015.csv. i want to plot a population pyramid graph for the two files but i want pandas to pick the dataframe from the years.csv with respect to the slider value.
my years.csv looks like, on the slider when i select 2014, from the code you can see, its an int that i convert into a string and append .csv to it. but all i want is that final string interpreted as df = pd.read_csv('2014.csv') so that i can be able to generate graphs of all the years as long as that file is in the directoy.
years
0
2014(2014.csv)
1
2015(2015.csv)
from dash import Dash, dcc, html, Input, Output
# import plotly.express as px
import plotly.graph_objects as gp
import pandas as pd
# df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/gapminderDataFiveYear.csv')
df = pd.read_csv('years.csv')
app = Dash(__name__)
app.layout = html.Div([
dcc.Graph(id='graph-with-slider'),
dcc.Slider(
df['year'].min(),
df['year'].max(),
step=None,
value=df['year'].min(),
marks={str(year): str(year) for year in df['year'].unique()},
id='year-slider'
)
])
#app.callback(
Output('graph-with-slider', 'figure'),
Input('year-slider', 'value'))
def update_figure(selected_year):
new_df = str(df[df.year == selected_year]) + ".csv"
print(new_df)
# fig = px.scatter(filtered_df, x="gdpPercap", y="lifeExp",
# size="pop", color="continent", hover_name="country",
# log_x=True, size_max=55)
y_age = new_df['Age']
x_M = new_df['Male']
x_F = new_df['Female'] * -1
# fig.update_layout(transition_duration=500)
# Creating instance of the figure
fig = gp.Figure()
# Adding Male data to the figure
fig.add_trace(gp.Bar(y= y_age, x = x_M,
name = 'Male',
orientation = 'h'))
# Adding Female data to the figure
fig.add_trace(gp.Bar(y = y_age, x = x_F,
name = 'Female', orientation = 'h'))
# Updating the layoutout for our graph
fig.update_layout(title = 'Population Pyramid of Uganda-2015',
title_font_size = 22, barmode = 'relative',
bargap = 0.0, bargroupgap = 0,
xaxis = dict(tickvals = [-600000, -400000, -200000,
0, 200000, 400000, 600000],
ticktext = ['6k', '4k', '2k', '0',
'2k', '4k', '6k'],
title = 'Population in Thousands',
title_font_size = 14)
)
# fig.show()
return fig
if __name__ == '__main__':
app.run_server(debug=True)

How to add entire dataframe row as scatter plot annotation

I'm plotting two columns of a Pandas DataFrame on a scatterplot and I want each point to show all the row values of the DataFrame. I've looked at this post, and tried to do something similar with mplcursors:
import pandas as pd
from datetime import date, datetime, time, timedelta
import numpy as np
import matplotlib.pyplot as plt
from mplcursors import cursor
df = pd.DataFrame()
df['datetime'] = pd.date_range(start='2016-01-01', end='2016-01-14', freq='30T')
#df = df.set_index('datetime')
df['x1'] = np.random.randint(-30, 30, size=len(df))
df['x2'] = np.random.randint(-30, 20, size=len(df))
df['x3'] = np.random.randint(-20, 30, size=len(df))
df['y1'] = np.random.randint(-100, 100, size=len(df))
df['y2'] = np.random.randint(-300, 200, size=len(df))
df['y3'] = np.random.randint(-200, 300, size=len(df))
def conditions(s):
if (s['y1'] > 20) or (s['y3'] < 0):
return 'group1'
elif (s['x3'] < 20):
return 'group2'
elif (s['x2'] == 0):
return 'group3'
else:
return 'group4'
df['category'] = df.apply(conditions, axis=1)
fig = plt.figure(figsize=(12,4))
ax1 = plt.subplot(121)
ax1.scatter(df.x1, df.y1, label='test1')
ax1.scatter(df.x2, df.y2, label='test2')
#cursor(hover=True)
ax1.set_xlabel('test1')
ax1.set_ylabel('test2')
ax1.legend(['test1','test2'])
cr1 = cursor(ax1,hover=True)
#ax1.annotation_names = df.columns.tolist()
cr1.connect("add", lambda x: x.annotation.set_text(df.columns.tolist()[x.target.index]))
ax2 = plt.subplot(122)
ax2.scatter(df.x1, df.y1, label='test1')
ax2.scatter(df.x3, df.y3, label='test3')
ax2.set_xlabel('test1')
ax2.set_ylabel('test3')
ax2.legend(['test1','test3'])
cr2 = cursor(ax2,hover=True)
#ax2.annotation_names = df.columns.tolist()
cr2.connect("add", lambda x: x.annotation.set_text(df.columns.tolist()[x.target.index]))
# save figure
import pickle
pickle.dump(fig, open('FigureObject.fig.pickle', 'wb'))
plt.show()
When I hover over a point, I want to see a label containing (for example):
datetime = 2016-01-01 00:00:00
x1 = 1
x2 = -4
x3 = 22
y1 = -42
y2 = -219
y3 = -158
category = group1
but I get this type of error:
cr2.connect("add", lambda x: x.annotation.set_text(df.columns.tolist()[x.target.index]))
IndexError: list index out of range
How do I fix it?
The IndexError occurs because of df.columns.tolist()[x.target.index]
df.columns.tolist() is a list of 7 columns, which is then indexed by [x.target.index].
df.iloc[x.target.index, :].to_dict() will get the desired row data for the point as a dict
A list comprehension creates a list of strings for each key value pair
'\n'.join(...) creates a string with each column separated by a \n
In mplcursors v0.5.1, Selection.target.index is deprecated, use Selection.index instead.
df.iloc[x.index, :] instead of df.iloc[x.target.index, :]
cr1.connect("add", lambda x: x.annotation.set_text('\n'.join([f'{k}: {v}' for k, v in df.iloc[x.index, :].to_dict().items()])))
Alternatively, use .to_string()
cr1.connect("add", lambda x: x.annotation.set_text(df.iloc[x.index, :].to_string()))

How do I create a Bokeh Select menu for a line plot for an indeterminate number of options?

I've been working on getting a select menu and Bokeh plot up and running on a dataset I'm working with. The dataset can be found here. I have no experience with JavaScript, but I believe my select menu isn't connected/-ing to my plot. Therefore, I have a plot outline, but no data displayed. As I run the script from the console with bokeh serve --show test.py, I get the first 7 notifications in my JS console. The last three (those in the red bracket in the screenshot) occur when I try and change to a different item in my select menu.
Goal: Display the plot of data for rows those id number ('ndc' in this example) is selected in the Select menu.
Here's my code (modified from this post) that I used to get started. This one was also used, as were a handful of others, and the Bokeh documentation itself.
import pandas as pd
from bokeh.io import curdoc, output_notebook, output_file
from bokeh.layouts import row, column
from bokeh.models import Select, DataRange1d, ColumnDataSource
from bokeh.plotting import figure
# output_notebook()
output_file('test.html')
def get_dataset(src, drug_id):
src.drop('Unnamed: 0', axis = 1, inplace = True)
df = src[src.ndc == drug_id].copy()
df['date'] = pd.to_datetime(df['date'])
df = df.set_index(['date'])
df.sort_index(inplace=True)
source = ColumnDataSource(data=df)
return source
def make_plot(source, title):
plot = figure(plot_width=800, plot_height = 800, tools="", x_axis_type = 'datetime', toolbar_location=None)
plot.xaxis.axis_label = 'Time'
plot.yaxis.axis_label = 'Price ($)'
plot.axis.axis_label_text_font_style = 'bold'
plot.x_range = DataRange1d(range_padding = 0.0)
plot.grid.grid_line_alpha = 0.3
plot.title.text = title
plot.line(x= 'date', y='nadac_per_unit', source=source)
return plot
def update_plot(attrname, old, new):
ver = vselect.value
plot.title.text = "Drug Prices"
src = get_dataset(df, ver)
source.date.update(src.date)
df = pd.read_csv('data/plotting_data.csv')
ver = '54034808' #Initial id number
cc = df['ndc'].astype(str).unique() #select-menu options
vselect = Select(value=ver, title='Drug ID', options=sorted((cc)))
source = get_dataset(df, ver)
plot = make_plot(source, "Drug Prices")
vselect.on_change('value', update_plot)
controls = row(vselect)
curdoc().add_root(row(plot, controls))
There were some problems in your code:
You want to drop the Unnamed: 0 column. This can only be done once and when you try this again it will throw an error since this column does not exist anymore.
The way you tried to filter the dataframe didn't work and would result in an empty dataframe. You can select rows based on a column value like this: df.loc[df['column_name'] == some_value]
Updating the ColumnDataSource object can be done by replacing source.data with the new data.
import pandas as pd
from bokeh.io import curdoc, output_notebook, output_file
from bokeh.layouts import row, column
from bokeh.models import Select, DataRange1d, ColumnDataSource
from bokeh.plotting import figure
output_notebook()
output_file('test.html')
def get_dataset(src, drug_id):
src.drop('Unnamed: 0', axis = 1, inplace = True)
df = src.loc[src['ndc'] == int(drug_id)]
df['date'] = pd.to_datetime(df['date'])
df = df.set_index(['date'])
df.sort_index(inplace=True)
source = ColumnDataSource(data=df)
return source
def make_plot(source, title):
plot = figure(plot_width=800, plot_height = 800, tools="", x_axis_type = 'datetime', toolbar_location=None)
plot.xaxis.axis_label = 'Time'
plot.yaxis.axis_label = 'Price ($)'
plot.axis.axis_label_text_font_style = 'bold'
plot.x_range = DataRange1d(range_padding = 0.0)
plot.grid.grid_line_alpha = 0.3
plot.title.text = title
plot.line(x= 'date', y='nadac_per_unit', source=source)
return plot
def update_plot(attrname, old, new):
ver = vselect.value
df1 = df.loc[df['ndc'] == int(new)]
df1['date'] = pd.to_datetime(df1['date'])
df1 = df1.set_index(['date'])
df1.sort_index(inplace=True)
newSource = ColumnDataSource(df1)
source.data = newSource.data
df = pd.read_csv('data/plotting_data.csv')
ver = '54034808' #Initial id number
cc = df['ndc'].astype(str).unique() #select-menu options
vselect = Select(value=ver, title='Drug ID', options=sorted((cc)))
source = get_dataset(df, ver)
plot = make_plot(source, "Drug Prices")
vselect.on_change('value', update_plot)
controls = row(vselect)
curdoc().add_root(row(plot, controls))

Plotly iplot() doesnt run within a function

I am trying to use iplot() within a function within Jupyter so that i can use a filter on the graph and have it change dynamically. The code works in a cell on its own like this
# Code for put by ticker
data = []
opPriceDic = priceToArray(getPuts(getOptionPricesByTicker('ABBV')))
for key, values in opPriceDic.items():
trace = go.Scatter(
x = numberOfDays,
y = values,
name = 'option',
line = dict(
width = 4)
)
data.append(trace)
# Edit the layout
layout = dict(title = 'Call prices for ' ,
xaxis = dict(title = 'Days to Expiration'),
yaxis = dict(title = 'Price '),
)
fig = dict(data=data, layout=layout)
py.iplot(fig, filename='calls For ')
But once this is placed within a function the graph fails to load
def graph(ticker):
# Code for put by ticker
data = []
opPriceDic = priceToArray(getPuts(getOptionPricesByTicker(ticker)))
for key, values in opPriceDic.items():
trace = go.Scatter(
x = numberOfDays,
y = values,
name = 'option',
line = dict(
width = 4)
)
data.append(trace)
# Edit the layout
layout = dict(title = 'Call prices for ' ,
xaxis = dict(title = 'Days to Expiration'),
yaxis = dict(title = 'Price '),
)
fig = dict(data=data, layout=layout)
py.iplot(fig, filename='calls For ')
But if I change the iplot() to plot() it calls the plotly API and opens a new tab with the graph displaying.
I am just wondering if anyone has noticed this before and may have come across a solution?
(if I am in the wrong area I will remove the post)
I have tried to use pandas data.reader calls to pull ticker data between a start and end date. The data.reader seems to work from within the function. In the question code, if the opPriceDic dictionary could be converted to a dataframe, then iplot() could plot it without use of layout and fig as below:
# Import libraries
import datetime
from datetime import date
import pandas as pd
import numpy as np
from plotly import __version__
%matplotlib inline
import cufflinks as cf
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
init_notebook_mode(connected=True)
cf.go_offline()
# Create function that uses data.reader and iplot()
def graph(ticker):
# create sample data set
start = datetime.datetime(2006, 1, 1)
end = datetime.datetime(2016, 1, 1)
df = data.DataReader(ticker, 'morningstar', start, end)
df = df.reset_index()
df['numberOfDays'] = df.apply(lambda x: abs((datetime.datetime.now() - x['Date']).days), axis=1)
# call iplot within the function graph()
df.iplot(kind='line', x='numberOfDays', y='Close', xTitle='Days', yTitle='Value', title='Prices', width=4)

Resources