How to implement date range for bar chart - python-3.x

I have generated a bar chart in plotly. Every bar corresponds to a a date in x-axis. How can I implement something of a date picker widget of sort, so that I can see the bar for a particular date or for a range of dates. Is there any module already available for this job. And also could the same solution be applicable for a sunburst chart.

part 1 - Bar chart with "date picker"
create some sample data for plotting
simple case of using https://plotly.com/python/range-slider/
import pandas as pd
import numpy as np
import plotly.express as px
df = pd.DataFrame({**{"date":pd.date_range("1-apr-2021", periods=100)}, **{c:np.random.uniform(i,i+1, 100) for i,c in enumerate(list("abc"))}})
px.bar(df, x="date", y=list("abc")).update_layout(xaxis={"rangeslider":{"visible":True}})
part 2 - date picker on sunburst
using sample data again, assuming date picker applies to rows that form part of sunburst
use dash https://dash.plotly.com/dash-core-components/datepickerrange
simple callback to filter data in plot
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output, State
import plotly.express as px
import pandas as pd
import numpy as np
from jupyter_dash import JupyterDash
app = JupyterDash(__name__)
app.layout = html.Div(
[
dcc.DatePickerRange(
id="date-picker",
start_date=df["date"].min(),
end_date=df["date"].max(),
display_format="MMMM Y, DD",
),
dcc.Graph(id="sunburst-fig"),
]
)
#app.callback(
Output("sunburst-fig", "figure"),
[Input("date-picker", "start_date"), Input("date-picker", "end_date")],
)
def update_graph(start_date, end_date):
if start_date:
dfp = df.loc[df["date"].between(*pd.to_datetime([start_date, end_date]))]
else:
dfp = df
fig = px.sunburst(dfp.assign(day=df["date"].dt.strftime("%a"), month=df["date"].dt.strftime("%b")),
path=["month","day"], values="a")
return fig
if __name__ == "__main__":
# app.run_server(debug=True)
app.run_server(mode="inline")

Related

Bokeh plot title 'str' object is not callable

In Jupyter Notebooks I read in a dataframe and create several plots with Pandas / Bokeh.
While creating one of the latter I get an error.
Search for similar problems said, that there might be somewhere above in the script something like
plt.title = "Title"
which overwrites the method. But this is not the case for me. I have nothing similar in the code above -exept in the figure parameters. Here the Bokeh documentation describes to set a figure title like I used it.
Using the part of the code that leads the the error in the complete notebook in a stand-alone script only does NOT lead to the error. So, also in my case the problem might have something to do with my code above. But maybe some of you has an idea when seeing this..(?)
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from bokeh.plotting import figure, show, output_notebook, ColumnDataSource
from bokeh.io import output_notebook
from bokeh.layouts import column, gridplot
from bokeh.models import Label, Title
from bokeh.models import Div
data = df
output_notebook()
# Title of the overall plot
abovetitle = ("This should be the overall title of all graphs")
# GRAPH 1
s1 = figure(width = 250, plot_height = 250, title="Graph 1", x_axis_label = "axis title 1", y_axis_label = 'µs')
s1.line(x, y, width=1, color="black", alpha=1, source = data)
# s1.title.text = "Title With Options" # this is a instead-off 'title=' test, but does not solve the problem
# GRAPH 2
s2 = figure(width = 250, plot_height = 250, title="Graph 2", x_axis_label = "axis title 2, y_axis_label = 'µs')
s2.line(x, y, width=1, color="blue", alpha=1, source = data)
#s2.title.text = "Title With Options" # this is a instead-off 'title=' test, but does not solve the problem
# plot graphs:
p = gridplot([[s1, s2]])
show(column(Div(text=abovetitle), p))
leads to the type error
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-24-33e4828b986d> in <module>
31 # plot graphs:
32 p = gridplot([[s1, s2]])
---> 33 show(column(Div(text=title), p))
TypeError: 'str' object is not callable
Recalling
import matplotlib.pyplot as plt
does not solve the problem. Hence, recalling
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from bokeh.plotting import figure, show, output_notebook, ColumnDataSource
from bokeh.io import output_notebook
from bokeh.layouts import column, gridplot
from bokeh.models import Label, Title
from bokeh.models import Div
solves the problem. Any further idea what might cause this error?
In the mean time I got a very useful hint: In one of the prior cells I accidentially used a Bokeh API function name as variable name and overwrote the function. If someone faces a comparable problem have a look at your variable naming. Maybe there happend the same accident... ;-)
#############################
# Define column names of XData binary part
header = ["Col1","Col2","Col3"]
# Split XData in single, space separated columns
x_df = selected_df.XData.str.split(' ', expand=True)
x_df.drop(0, inplace=True, axis=1)
x_df.columns = header
#print(x_df)
# Binary XData to integer
for column in x_df: # DONT DO THAT!!!!! THIS OVERWRITES BOKEH API FUNCTION. EG. USE `col` INSTEAD OF `column`
x_df[column] = x_df[column].apply(int, base=16) # DONT DO THAT!!!!! THIS OVERWRITES BOKEH API FUNCTION. EG. USE `col` INSTEAD OF `column`

Facets not working properly plotly express

import plotly.graph_objects as go
import plotly.express as px
fig = px.histogram(df, nbins = 5, x = "numerical_col", color = "cat_1", animation_frame="date",
range_x=["10000","500000"], facet_col="cat_2")
fig.update_layout(
margin=dict(l=25, r=25, t=20, b=20))
fig.show()
How can I fix the output? I would like multiple subplots based on cat_2 where the hue is cat_1.
you have not provided sample data, so I've simulated it based on code you are using to generate figure
I have encountered one issue range_x does not work, it impacts y-axis as well. Otherwise approach fully works.
import plotly.graph_objects as go
import plotly.express as px
import numpy as np
import pandas as pd
# data not provided.... simulate some
DAYS = 5
ROWS = DAYS * 2000
df = pd.DataFrame(
{
"date_d": np.repeat(pd.date_range("1-Jan-2021", periods=DAYS), ROWS // DAYS),
"numerical_col": np.random.uniform(10000, 500000, ROWS),
"cat_1": np.random.choice(list("ABCD"), ROWS),
"cat_2": np.random.choice(list("UVWXYZ"), ROWS),
}
)
# animation frame has to be a string not a date...
df["date"] = df["date_d"].dt.strftime("%Y-%b-%d")
# always best to provide pre-sorted data to plotly
df = df.sort_values(["date", "cat_1", "cat_2"])
fig = px.histogram(
df,
nbins=5,
x="numerical_col",
color="cat_1",
animation_frame="date",
# range_x=[10000, 500000],
facet_col="cat_2",
)
fig.update_layout(margin=dict(l=25, r=25, t=20, b=20))

Plotly: How to use a built-in colorscale in a pie chart?

I am just trying to learn python. Can someone help to understand how to add px.colors.qualitative.Pastel2 into the below code.
import dash
import dash_core_components as dcc
import dash_html_components as html
import pyodbc
import plotly.offline as pyo
import plotly.graph_objs as go
import pandas as pd
import plotly.express as px
connection = pyodbc.connect('Driver={SQL Server};'
'Server=sqlserver;'
'Database=mydatabase;'
'UID =sa;'
'PWD = sa123;')
app = dash.Dash()
sql_data=pd.read_sql_query("select columna, columnb from table", connection)
piechart = go.Pie(labels=sql_data['columna'], values=sql_data['columnb'])
app.layout = html.Div(children=[
dcc.Graph(id='example',
figure={'data': [piechart],
'layout': {'title': 'PieChart Sample'}
})
])
Thanks
Use something like color_discrete_sequence=px.colors.qualitative.G10 when creating your figure. You can also update the layout's colorway attribute. You can find more info, as well as a visualization of the included color scales, here: https://plotly.com/python/discrete-color/. Cheers!

Altair adding date slider for interactive scatter chart pot

Has anyone tried using date as a slider for Altair interactive scatter plots?
I'm trying to reproduce a similar plot to the gapminder scatter:
1) Instead of a year filter I'm trying to use a date e.g. '2020-01-05' and having the follow error:
altair.vegalite.v4.schema.core.BindRange->max, validating 'type'
'2020-05-17T00:00:00' is not of type 'number'
2) When I try to parse it as an int, nothing shows up in the plot
3) Examples of using the Year slider: https://www.datacamp.com/community/tutorials/altair-in-python
https://altair-viz.github.io/gallery/multiple_interactions.html
4) Also a timestamp option wouldn't be ideal as the date needs to be readable
Would appreciate any help. Thanks
#Date Slider
from altair import datum
from datetime import datetime
import altair as alt
import pandas as pd
import numpy as np
import datetime as dt
date_slider = alt.binding_range(min=min(df['date']), max=max(df['date']), step=1)
slider_selection = alt.selection_single(bind=date_slider, fields=['date'], name="Date", init={'week_starting': max(df[‘date’]})
alt.Chart(df).mark_point(filled=True).encode(
x='mom_pct',
y='yoy_pct',
size='n_queries',
color='vertical',
tooltip = ['vertical', 'yoy_pct', 'mom_pct']
).properties(
width=800,
height=600
).add_selection(slider_selection).transform_filter(slider_selection)
Vega-Lite sliders do not support datetime display, but it is possible to display timestamps. Here is a full example (I didn't base it off of your code, because you did not provide any data):
import altair as alt
import pandas as pd
import numpy as np
from datetime import datetime
datelist = pd.date_range(datetime.today(), periods=100).tolist()
rand = np.random.RandomState(42)
df = pd.DataFrame({
'xval': datelist,
'yval': rand.randn(100).cumsum(),
})
def timestamp(t):
return pd.to_datetime(t).timestamp() * 1000
slider = alt.binding_range(name='cutoff:', min=timestamp(min(datelist)), max=timestamp(max(datelist)))
selector = alt.selection_single(name="SelectorName", fields=['cutoff'],
bind=slider,init={"cutoff": timestamp("2020-05-05")})
alt.Chart(df).mark_point().encode(
x='xval',
y='yval',
opacity=alt.condition(
'toDate(datum.xval) < SelectorName.cutoff[0]',
alt.value(1), alt.value(0)
)
).add_selection(
selector
)
Unfortunately, Vega-Lite does not currently provide any native way to create a slider that displays a formatted date.
Another way to workaround this issue is using another chart in place of the slider. This let's your see the date as well as using a range for the selection which is also not possible with sliders at the moment.
import altair as alt
from vega_datasets import data
import pandas as pd
# Convert date column to an actual date and filter the data.
movies = (
data.movies()
.assign(Release_Date=lambda df: pd.to_datetime(df['Release_Date']))
.query('1960 < Release_Date < 2010')
.sample(1_000, random_state=90384))
select_year = alt.selection_interval(encodings=['x'])
bar_slider = alt.Chart(movies).mark_bar().encode(
x='year(Release_Date)',
y='count()').properties(height=50).add_selection(select_year)
scatter_plot = alt.Chart(movies).mark_circle().encode(
x='Rotten_Tomatoes_Rating',
y='IMDB_Rating',
opacity=alt.condition(
select_year,
alt.value(0.7), alt.value(0.1)))
scatter_plot & bar_slider

Implementing ipywidget slider for time

I am trying to create a slider for time in Jupyter Notebook using ipywidgets. I would like to take the tabulated experimental data (see figure below) and control the value bounds with the help of a slider. The graph should be a force-displacement graph, evolving in time:
This is my python code:
from ipywidgets import IntSlider, interact, FloatSlider
u = fdat1['C_1_Weg_R4[mm]'].values
f = fdat1['C_1_Kraft_R4[kN]'].values
t = fdat1['S/No'].values
#interact(t = IntSlider(min = 0, max = max(fdat0['S/No'].values)))
def aa_(t):
plt.plot(f[t],u[t])
plt.grid()
plt.xlabel("force [kN]")
plt.ylabel("displacement [mm]")
plt.title("Load-displacement curve for \nexperiment")
fdat1 is the name of the tabulated data. I have also considered using "C_1_Zeit[s]" column as my slider values, but these are not integer values.
The problem is that nothing gets plotted, but the slider works and the graph changes scale.
I have been searching online for some time now and would really appreciate some help.
Thank you in advance!
Edit:
from ipywidgets import IntSlider, interact, FloatSlider
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
df = pd.DataFrame.from_records(
[np.linspace(0,30, num=30), np.linspace(0,20, num=30), ]).T
df.columns=['A', 'B']
#interact(t = IntSlider(min = 0, max = 21))
def aa_(t):
plt.scatter(df['A'], df['B'])
plt.grid()
plt.xlabel("force [kN]")
plt.ylabel("displacement [mm]")
plt.title("Load-displacement curve for \nexperiment")
plt.xlim(0, 30)
plt.ylim(0, 30)
Inside your plotting function, create a slice of your results dataframe that slices based on the slider value.
from ipywidgets import IntSlider, interact, FloatSlider
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
results = pd.DataFrame.from_records(
[np.linspace(0,30, num=30), np.linspace(0,20, num=30), ]).T
results.columns=['A', 'B']
#interact(t = IntSlider(min = 0, max = 21))
def aa_(t):
df = results.iloc[:t] # make the slice here
plt.scatter(df['A'], df['B'])
plt.grid()
plt.xlabel("force [kN]")
plt.ylabel("displacement [mm]")
plt.title("Load-displacement curve for \nexperiment")
plt.xlim(0, 30)
plt.ylim(0, 30)
So, basically, this should have been the correct code:
from ipywidgets import IntSlider, interact, FloatSlider
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
u = fdat1['C_1_Weg_R4[mm]'].values #loads displacement values from fdat1
f = fdat1['C_1_Kraft_R4[kN]'].values #loads force values from fdat1
df = pd.DataFrame.from_dict([u,f]).T #creates a dataframe
df.columns=['A', 'B']
#interact(t = IntSlider(min = 0, max = df.shape[0])) #interactive scatterplot with a slider for time
def scatterplot_(t):
plt.scatter(df.loc[0:t,'A'], df.loc[0:t,'B'])
plt.grid()
plt.xlabel("force [kN]")
plt.ylabel("displacement [mm]")
plt.title("Load-displacement curve for \nexperiment")
plt.xlim(-5, 5)
plt.ylim(-25, 25)

Resources