Altair adding date slider for interactive scatter chart pot - python-3.x

Has anyone tried using date as a slider for Altair interactive scatter plots?
I'm trying to reproduce a similar plot to the gapminder scatter:
1) Instead of a year filter I'm trying to use a date e.g. '2020-01-05' and having the follow error:
altair.vegalite.v4.schema.core.BindRange->max, validating 'type'
'2020-05-17T00:00:00' is not of type 'number'
2) When I try to parse it as an int, nothing shows up in the plot
3) Examples of using the Year slider: https://www.datacamp.com/community/tutorials/altair-in-python
https://altair-viz.github.io/gallery/multiple_interactions.html
4) Also a timestamp option wouldn't be ideal as the date needs to be readable
Would appreciate any help. Thanks
#Date Slider
from altair import datum
from datetime import datetime
import altair as alt
import pandas as pd
import numpy as np
import datetime as dt
date_slider = alt.binding_range(min=min(df['date']), max=max(df['date']), step=1)
slider_selection = alt.selection_single(bind=date_slider, fields=['date'], name="Date", init={'week_starting': max(df[‘date’]})
alt.Chart(df).mark_point(filled=True).encode(
x='mom_pct',
y='yoy_pct',
size='n_queries',
color='vertical',
tooltip = ['vertical', 'yoy_pct', 'mom_pct']
).properties(
width=800,
height=600
).add_selection(slider_selection).transform_filter(slider_selection)

Vega-Lite sliders do not support datetime display, but it is possible to display timestamps. Here is a full example (I didn't base it off of your code, because you did not provide any data):
import altair as alt
import pandas as pd
import numpy as np
from datetime import datetime
datelist = pd.date_range(datetime.today(), periods=100).tolist()
rand = np.random.RandomState(42)
df = pd.DataFrame({
'xval': datelist,
'yval': rand.randn(100).cumsum(),
})
def timestamp(t):
return pd.to_datetime(t).timestamp() * 1000
slider = alt.binding_range(name='cutoff:', min=timestamp(min(datelist)), max=timestamp(max(datelist)))
selector = alt.selection_single(name="SelectorName", fields=['cutoff'],
bind=slider,init={"cutoff": timestamp("2020-05-05")})
alt.Chart(df).mark_point().encode(
x='xval',
y='yval',
opacity=alt.condition(
'toDate(datum.xval) < SelectorName.cutoff[0]',
alt.value(1), alt.value(0)
)
).add_selection(
selector
)
Unfortunately, Vega-Lite does not currently provide any native way to create a slider that displays a formatted date.

Another way to workaround this issue is using another chart in place of the slider. This let's your see the date as well as using a range for the selection which is also not possible with sliders at the moment.
import altair as alt
from vega_datasets import data
import pandas as pd
# Convert date column to an actual date and filter the data.
movies = (
data.movies()
.assign(Release_Date=lambda df: pd.to_datetime(df['Release_Date']))
.query('1960 < Release_Date < 2010')
.sample(1_000, random_state=90384))
select_year = alt.selection_interval(encodings=['x'])
bar_slider = alt.Chart(movies).mark_bar().encode(
x='year(Release_Date)',
y='count()').properties(height=50).add_selection(select_year)
scatter_plot = alt.Chart(movies).mark_circle().encode(
x='Rotten_Tomatoes_Rating',
y='IMDB_Rating',
opacity=alt.condition(
select_year,
alt.value(0.7), alt.value(0.1)))
scatter_plot & bar_slider

Related

Facets not working properly plotly express

import plotly.graph_objects as go
import plotly.express as px
fig = px.histogram(df, nbins = 5, x = "numerical_col", color = "cat_1", animation_frame="date",
range_x=["10000","500000"], facet_col="cat_2")
fig.update_layout(
margin=dict(l=25, r=25, t=20, b=20))
fig.show()
How can I fix the output? I would like multiple subplots based on cat_2 where the hue is cat_1.
you have not provided sample data, so I've simulated it based on code you are using to generate figure
I have encountered one issue range_x does not work, it impacts y-axis as well. Otherwise approach fully works.
import plotly.graph_objects as go
import plotly.express as px
import numpy as np
import pandas as pd
# data not provided.... simulate some
DAYS = 5
ROWS = DAYS * 2000
df = pd.DataFrame(
{
"date_d": np.repeat(pd.date_range("1-Jan-2021", periods=DAYS), ROWS // DAYS),
"numerical_col": np.random.uniform(10000, 500000, ROWS),
"cat_1": np.random.choice(list("ABCD"), ROWS),
"cat_2": np.random.choice(list("UVWXYZ"), ROWS),
}
)
# animation frame has to be a string not a date...
df["date"] = df["date_d"].dt.strftime("%Y-%b-%d")
# always best to provide pre-sorted data to plotly
df = df.sort_values(["date", "cat_1", "cat_2"])
fig = px.histogram(
df,
nbins=5,
x="numerical_col",
color="cat_1",
animation_frame="date",
# range_x=[10000, 500000],
facet_col="cat_2",
)
fig.update_layout(margin=dict(l=25, r=25, t=20, b=20))

How to implement date range for bar chart

I have generated a bar chart in plotly. Every bar corresponds to a a date in x-axis. How can I implement something of a date picker widget of sort, so that I can see the bar for a particular date or for a range of dates. Is there any module already available for this job. And also could the same solution be applicable for a sunburst chart.
part 1 - Bar chart with "date picker"
create some sample data for plotting
simple case of using https://plotly.com/python/range-slider/
import pandas as pd
import numpy as np
import plotly.express as px
df = pd.DataFrame({**{"date":pd.date_range("1-apr-2021", periods=100)}, **{c:np.random.uniform(i,i+1, 100) for i,c in enumerate(list("abc"))}})
px.bar(df, x="date", y=list("abc")).update_layout(xaxis={"rangeslider":{"visible":True}})
part 2 - date picker on sunburst
using sample data again, assuming date picker applies to rows that form part of sunburst
use dash https://dash.plotly.com/dash-core-components/datepickerrange
simple callback to filter data in plot
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output, State
import plotly.express as px
import pandas as pd
import numpy as np
from jupyter_dash import JupyterDash
app = JupyterDash(__name__)
app.layout = html.Div(
[
dcc.DatePickerRange(
id="date-picker",
start_date=df["date"].min(),
end_date=df["date"].max(),
display_format="MMMM Y, DD",
),
dcc.Graph(id="sunburst-fig"),
]
)
#app.callback(
Output("sunburst-fig", "figure"),
[Input("date-picker", "start_date"), Input("date-picker", "end_date")],
)
def update_graph(start_date, end_date):
if start_date:
dfp = df.loc[df["date"].between(*pd.to_datetime([start_date, end_date]))]
else:
dfp = df
fig = px.sunburst(dfp.assign(day=df["date"].dt.strftime("%a"), month=df["date"].dt.strftime("%b")),
path=["month","day"], values="a")
return fig
if __name__ == "__main__":
# app.run_server(debug=True)
app.run_server(mode="inline")

Empty plot on Bokeh Tutorial Exercise

I'm following the bokeh tutorial and in the basic plotting section, I can't manage to show a plot. I only get the axis. What am I missing?
Here is the code:
df = pd.DataFrame.from_dict(AAPL)
weekapple = df.loc["2000-03-01":"2000-04-01"]
p = figure(x_axis_type="datetime", title="AAPL", plot_height=350, plot_width=800)
p.xgrid.grid_line_color=None
p.ygrid.grid_line_alpha=0.5
p.xaxis.axis_label = 'Time'
p.yaxis.axis_label = 'Value'
p.line(weekapple.date, weekapple.close)
show(p)
I get this:
My result
I'm trying to complete the exercise here (10th Code cell - Exercise with AAPL data) I was able to follow all previous code up to that point correctly.
Thanks in advance!
In case this is still relevant, this is how you should do you selection:
df = pd.DataFrame.from_dict(AAPL)
# Convert date column in df from strings to the proper datetime format
date_format="%Y-%m-%d"
df["date"] = pd.to_datetime(df['date'], format=date_format)
# Use the same conversion for selected dates
weekapple = df[(df.date>=dt.strptime("2000-03-01", date_format)) &
(df.date<=dt.strptime("2000-04-01", date_format))]
p = figure(x_axis_type="datetime", title="AAPL", plot_height=350, plot_width=800)
p.xgrid.grid_line_color=None
p.ygrid.grid_line_alpha=0.5
p.xaxis.axis_label = 'Time'
p.yaxis.axis_label = 'Value'
p.line(weekapple.date, weekapple.close)
show(p)
To make this work, before this code, I have (in my Jupyter notebook):
import numpy as np
from bokeh.io import output_notebook, show
from bokeh.plotting import figure
import bokeh
import pandas as pd
from datetime import datetime as dt
bokeh.sampledata.download()
from bokeh.sampledata.stocks import AAPL
output_notebook()
As descried at, https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.loc.html, .loc is used in operations with the index (or boolean lists); date is not in the index in your dataframe (it is a regular column).
I hope this helps.
You dataframe sub-view is empty:
In [3]: import pandas as pd
...: from bokeh.sampledata.stocks import AAPL
...: df = pd.DataFrame.from_dict(AAPL)
...: weekapple = df.loc["2000-03-01":"2000-04-01"]
In [4]: weekapple
Out[4]:
Empty DataFrame
Columns: [date, open, high, low, close, volume, adj_close]
Index: []

x axis labels (date) slips in Python matplotlib

I'm beginner in Python and I have the following problems. I would like to plot a dataset, where the x-axis shows date data. The Dataset look likes the follows:
datum, start, end
2017.09.01 38086 37719,8984
2017.09.04 37707.3906 37465.2617
2017.09.05 37471.5117 37736.1016
2017.09.06 37723.5898 37878.8594
2017.09.07 37878.8594 37783.5117
2017.09.08 37764.7383 37596.75
2017.09.11 37615.5117 37895.8516
2017.09.12 37889.6016 38076.8789
2017.09.13 38089.1406 38119.0898
2017.09.14 38119.2617 38243.1992
2017.09.15 38243.7188 38325.9297
2017.09.18 38325.3086 38387.2188
2017.09.19 38387.2188 38176.0781
2017.09.20 38173.2109 38108.0391
2017.09.21 38107.2617 38109.2109
2017.09.22 38110.4609 38178.6289
2017.09.25 38121.9102 38107.8711
2017.09.26 38127.25 37319.2383
2017.09.27 37360.8398 37244.3008
2017.09.28 37282.1094 37191.6484
2017.09.29 37192.1484 37290.6484
In the first column are the labels of the x-axis (this is the date).
When I write the following code the x axis data slips:
import pandas as pd
import matplotlib.pyplot as plt
bux = pd.read_csv('C:\\Home\\BUX.txt',
sep='\t',
decimal='.',
header=0)
fig1 = bux.plot(marker='o')
fig1.set_xticklabels(bux.datum, rotation='vertical', fontsize=8)
The resulted figure look likes as follows:
The second data row in the dataset is '2017.09.04 37707.3906 37465.2617', BUT '2017.09.04' is yield at the third data row with start value=37471.5117
What shell I do to get correct x axis labels?
Thank you!
Agnes
First, there is a comma in the second line instead of a .. This should be adjusted. Then, you convert the "datum," column to actual dates and simply plot the dataframe with matplotlib.
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv('data/BUX.txt', sep='\s+')
df["datum,"] = pd.to_datetime(df["datum,"], format="%Y.%m.%d")
plt.plot(df["datum,"], df["start,"], marker="o")
plt.plot(df["datum,"], df["end"], marker="o")
plt.gcf().autofmt_xdate()
plt.show()
Thank you! It works perfectly. The key moment was to convert the data to date format. Thank you again!
Agnes
Actually you can easily use the df.plot() to fix it:
import pandas as pd
import matplotlib.pyplot as plt
import io
t="""
date start end
2017.09.01 38086 37719.8984
2017.09.04 37707.3906 37465.2617
2017.09.05 37471.5117 37736.1016
2017.09.06 37723.5898 37878.8594
2017.09.07 37878.8594 37783.5117
2017.09.08 37764.7383 37596.75
2017.09.11 37615.5117 37895.8516
2017.09.12 37889.6016 38076.8789
2017.09.13 38089.1406 38119.0898
2017.09.14 38119.2617 38243.1992
2017.09.15 38243.7188 38325.9297
2017.09.18 38325.3086 38387.2188
2017.09.19 38387.2188 38176.0781
2017.09.20 38173.2109 38108.0391
2017.09.21 38107.2617 38109.2109
2017.09.22 38110.4609 38178.6289
2017.09.25 38121.9102 38107.8711
2017.09.26 38127.25 37319.2383
2017.09.27 37360.8398 37244.3008
2017.09.28 37282.1094 37191.6484
2017.09.29 37192.1484 37290.6484
"""
import numpy as np
data=pd.read_fwf(io.StringIO(t),header=1,parse_dates=['date'])
data.plot(x='date',marker='o')
plt.show()

Parse Years in Python 3.4 Pandas and Bokeh from counter dictionary

I'm struggling with creating a Bokeh time series graph from the output of the counter function from collections.
import pandas as pd
from bokeh.plotting import figure, output_file, show
import collections
plotyears = []
counter = collections.Counter(plotyears)
output_file("years.html")
p = figure(width=800, height=250, x_axis_type="datetime")
for number in sorted(counter):
yearvalue = number, counter[number]
p.line(yearvalue, color='navy', alpha=0.5)
show(p)
The output of yearvalue when printed is:
(2013, 132)
(2014, 188)
(2015, 233)
How can I make bokeh make the years as x-axis and numbers as y-axis. I have tried to follow the Time series tutorial, but I can't use the pd.read_csv and parse_dates=['Date'] functionalities since I'm not reading a csv file.
The simple way is to convert your data into a pandas DataFrame (with pd.DataFrame) and after create a datetime column with your year column.
simple example :
import pandas as pd
from bokeh.plotting import figure, output_notebook, show
output_notebook()
years = [2012,2013,2014,2015]
val = [230,120,200,340]
# Convert your data into a panda DataFrame format
data=pd.DataFrame({'year':years, 'value':val})
# Create a new column (yearDate) equal to the year Column but with a datetime format
data['yearDate']=pd.to_datetime(data['year'],format='%Y')
# Create a line graph with datetime x axis and use datetime column(yearDate) for this axis
p = figure(width=800, height=250, x_axis_type="datetime")
p.line(x=data['yearDate'],y=data['value'])
show(p)

Resources