I am visualizing the data stored in pandas dataframe via plotly.
import plotly.graph_objects as go
import numpy as np
import plotly.express as px
# ref: https://plotly.com/python/sliders/
if __name__ == '__main__':
df = px.data.iris()
df = df.iloc[0:10, :]
# print(df)
petal_width = [1.3, 1.4, 1.4]
# Create figure
fig = go.Figure()
# Add traces, one for each slider step
data = [
go.Scatter(
mode="lines+markers",
x=df['sepal_width'],
y=df['sepal_length'],
),
go.Scatter(
mode="lines+markers",
x=df['sepal_width'],
y=df['sepal_length']+2,
),
go.Scatter(
mode="lines+markers",
x=df['sepal_width'],
y=df['sepal_length'] + 3,
)
]
slider_range = min(petal_width), max(petal_width)
low, high = slider_range
# slides = []
# for i in range(3):
# slide = dict(
# method="update",
# args=[{"visible": [False] * len(fig.data)},
# {"title": "Slider switched to step: " + str(i)}], # layout attribute
# )
# slide["args"][0]["visible"][i] = True # Toggle i'th trace to "visible"
# slides.append(slide)
#
# sliders = [
# dict(
# active=10,
# currentvalue={"prefix": "Frequency: "},
# pad={"t": 50},
# steps=slides
# )
# ]
fig = go.Figure(data=data)
#fig.update_layout(
# sliders=slider
#)
# fig.show()
with open("check.html", 'a') as f:
f.write(fig.to_html(full_html=False, include_plotlyjs='cdn'))
Plot:
This figure shows three lines corresponding to the there dataset in data. These threee curves are associated with 3 values stored in petal_width = [1.3, 1.4, 1.4] .
I would like to add a slider like shown in the example (please see the example plot below) presented [here]
(https://plotly.com/python/line-and-scatter/)
I'm not really sure how to add the slider. Basically, I want to add the slider and based on the value selected in the slider the corresponding curves in the plot should appear. e.g. if 1.4 is selected in the slider (petal_width), I want the second and third curves to appear.
Suggestions will be helpful.
steps = []
for i in range(len(fig.data)):
step = dict(
method="update",
args=[{"visible": [False] * len(fig.data)},
{"title": "Slider switched to step: " + str(i)}], # layout attribute
)
visible = []
val = petal_width[i]
for j in range(len(petal_width)):
if petal_width[j] == val:
step["args"][0]["visible"][j] = True # Toggle i'th trace to "visible"
steps.append(step)
sliders = [dict(
active=1,
currentvalue={"prefix": "Frequency: "},
pad={"t": 50},
steps=steps
)]
fig.update_layout(
sliders=sliders
)
You just need to fix the colors, for all 3 categories.
steps output:
[{'method': 'update',
'args': [{'visible': [True, False, False]},
{'title': 'Slider switched to step: 0'}]},
{'method': 'update',
'args': [{'visible': [False, True, True]},
{'title': 'Slider switched to step: 1'}]},
{'method': 'update',
'args': [{'visible': [False, True, True]},
{'title': 'Slider switched to step: 2'}]}]
Output :
Step:0
Step:1 and 2
Related
I want to create three filters for my 3D scatter plot.
Based on what I read here: https://medium.com/left-join/building-a-plotly-dashboard-with-dynamic-sliders-in-python-f5cf84161dc5 and here: https://plotly.com/python/3d-scatter-plots/ I should put only the sliders and call them in #app.callback as a list with parameters.
I tried like this:
from dash import Dash, dcc, html, Input, Output
import plotly.express as px
app = Dash(__name__)
app.layout = html.Div([
html.H4('Iris samples filtered by petal width'),
dcc.Graph(id="3d-scatter-plot-x-graph"),
html.P("Petal Width:"),
dcc.RangeSlider(
id='3d-scatter-plot-x-range-slider',
min=0, max=2.5, step=0.1,
marks={0: '0', 2.5: '2.5'},
value=[0.5, 2]
),
html.P("Sepal Length:"),
dcc.RangeSlider(
id='3d-scatter-plot-y-range-slider',
min=0, max=2.5, step=0.1,
marks={0: '0', 2.5: '2.5'},
value=[0.5, 2]
),
html.P("Sepal Width:"),
dcc.RangeSlider(
id='3d-scatter-plot-z-range-slider',
min=0, max=5, step=0.1,
marks={0: '0', 5: '5'},
value=[0.5, 4.5]
),
])
#app.callback(
Output("3d-scatter-plot-x-graph", "figure"),
[Input("3d-scatter-plot-x-range-slider", "value"),
Input("3d-scatter-plot-y-range-slider", "value"),
Input("3d-scatter-plot-z-range-slider", "value")])
def update_bar_chart(slider_x, slider_y, slider_z):
df = px.data.iris() # replace with your own data source
low_x, high_x = slider_x
low_y, high_y = slider_y
low_z, high_z = slider_z
mask = (df.petal_width > low_x) & (df.petal_width < high_x) & (df.sepal_length > low_y) & (df.sepal_length < high_y)
fig = px.scatter_3d(df[mask],
x='sepal_length', y='sepal_width', z='petal_width',
color="species", hover_data=['petal_width'])
return fig
if __name__ == "__main__":
app.run_server(debug=True)
But I got nothing (empty plot with sliders)
And I didn't got any errors.
Any idea what can I do?
As you can see in #Kat's comment, you have to change the ranges and as I see Sepal Width slider does not take any effect, you have to add it to the update function.
from dash import Dash, dcc, html, Input, Output
import plotly.express as px
app = Dash(__name__)
app.layout = html.Div([
html.H4('Iris samples filtered by petal width'),
dcc.Graph(id="3d-scatter-plot-x-graph"),
html.P("Petal Width:"),
dcc.RangeSlider(
id='3d-scatter-plot-x-range-slider',
min=0, max=2.5, step=0.1,
marks={0: '0', 2.5: '2.5'},
value=[0.5, 2]
),
html.P("Sepal Length:"),
dcc.RangeSlider(
id='3d-scatter-plot-y-range-slider',
min=4.3, max=7.9, step=0.1,
marks={0: '0', 2.5: '2.5'},
value=[4.3, 7.9]
),
html.P("Sepal Width:"),
dcc.RangeSlider(
id='3d-scatter-plot-z-range-slider',
min=2, max=4, step=0.1,
marks={0: '0', 5: '5'},
value=[1.5, 4]
),
])
#app.callback(
Output("3d-scatter-plot-x-graph", "figure"),
[Input("3d-scatter-plot-x-range-slider", "value"),
Input("3d-scatter-plot-y-range-slider", "value"),
Input("3d-scatter-plot-z-range-slider", "value")])
def update_bar_chart(slider_x, slider_y, slider_z):
df = px.data.iris() # replace with your own data source
low_x, high_x = slider_x
low_y, high_y = slider_y
low_z, high_z = slider_z
mask = (df.petal_width > low_x) & (df.petal_width < high_x) & (df.sepal_length > low_y) & (df.sepal_length < high_y) & (df.sepal_width > low_z) & (df.sepal_width < high_z)
fig = px.scatter_3d(df[mask],
x='sepal_length', y='sepal_width', z='petal_width',
color="species", hover_data=['petal_width'])
return fig
if __name__ == "__main__":
app.run_server(debug=True)
I am trying to create a seaborn lineplot with sns.relplot using markers to distinguish between state changes over time, along with some arbitrary value on the y-axis.
Using the dataset below:
df = pd.DataFrame({
'NAME':['29078719','29078719','29078719','29078719','29078719','29078719','29078719'],
'DIAGNOSIS':['negative', 'negative', 'positive', 'positive', 'positive', 'positive', 'negative'],
'ENTRY_DATE': ['2014-01-23 15:13:54', '2015-03-06 15:57:16', '2016-02-26 14:40:53', '2016-02-26 14:40:53', '2017-11-24 15:20:38', '2020-01-29 13:41:24', '2020-03-30 12:11:24'],
'CALCULATED_VALUE': [0.456957, 0.468468, 0.865333, 0.896950, 0.920930, 0.767100, 0.835690]
})
df['ENTRY_DATE'] = pd.to_datetime(df.ENTRY_DATE, infer_datetime_format=True)
NAME
DIAGNOSIS
ENTRY_DATE
CALCULATED_VALUE
0
29078719
negative
2014-01-23 15:13:54
0.456957
1
29078719
negative
2015-03-06 15:57:16
0.468468
2
29078719
positive
2016-02-26 14:40:53
0.865333
3
29078719
positive
2016-02-26 14:40:53
0.89695
4
29078719
positive
2017-11-24 15:20:38
0.92093
5
29078719
positive
2020-01-29 13:41:24
0.7671
6
29078719
negative
2020-03-30 12:11:24
0.83569
sns.relplot(
data=df,
x='ENTRY_DATE',
y='CALCULATED_VALUE',
kind='line',
height=8.27,
aspect=11.7/8.27,
linewidth=2.5,
markers=True,
style='DIAGNOSIS'
)
plt.show()
Would like to achieve:
Note: The data changed in question, and the graph matched output prior to data in the question and images changing. That's why the output in my question does not match the new desired output, but the same code should work with the different data.
relplot is a figure-level plot that isn't great for dynamically plotting in a nuanced way like this. As such, I don't know if you could do such a customized graph with seaborn, but you could use matplotlib and plot three separate lines depending on what grp each line falls in. This is pretty customized, but you could adjust the logic to create the groups, and I have used a combination of the grp number as well as using using shift to connect the lines and try and match your expected output:
df = pd.DataFrame({
'NAME':['29078719','29078719','29078719','29078719','29078719','29078719','29078719'],
'DIAGNOSIS':['negative', 'negative', 'positive', 'positive', 'positive', 'negative', 'negative'],
'ENTRY_DATE': ['2014-01-23 15:13:54', '2015-03-06 15:57:16', '2016-02-26 14:40:53', '2016-02-26 14:40:53', '2017-11-24 15:20:38', '2017-11-24 15:20:38', '2020-01-29 13:41:24'],
'CALCULATED_VALUE': [0.456957, 0.468468, 0.865333, 0.896950, 0.920930, 0.833549, 0.767100]
})
df['ENTRY_DATE'] = pd.to_datetime(df.ENTRY_DATE, infer_datetime_format=True)
df = df.sort_values('ENTRY_DATE')
grp = (df['DIAGNOSIS'] != df['DIAGNOSIS'].shift()).cumsum()
plt.figure(dpi=125)
df1 = df[(grp == 1).shift(2).fillna(True)]
df2 = df[(grp == 2).shift().fillna(False) & (grp == 2)]
df3 = df[(grp == 3).shift(-1).fillna(True)]
plt.plot(df['ENTRY_DATE'], df['CALCULATED_VALUE'], visible=False)
plt.plot(df1['ENTRY_DATE'], df1['CALCULATED_VALUE'], color='red',
label=df['DIAGNOSIS'].iloc[0])
plt.plot(df2['ENTRY_DATE'], df2['CALCULATED_VALUE'], marker='x',
linestyle='dashed', markersize=8, color='red', label=df['DIAGNOSIS'].iloc[2])
plt.plot(df3['ENTRY_DATE'],df3['CALCULATED_VALUE'], color='red')
plt.xlabel('ENTRY_DATE')
plt.ylabel('DIAGNOSIS')
plt.legend()
plt.grid()
plt.show()
Another variation of with the markers. One issue is that the x markers and o markers will overlap, so you can use the same marker style but a different line style. Otherwisee, you might have to create more lines separting the points to get x and o markers as you've shown:
df = pd.DataFrame({
'NAME':['29078719','29078719','29078719','29078719','29078719','29078719','29078719'],
'DIAGNOSIS':['negative', 'negative', 'positive', 'positive', 'positive', 'negative', 'negative'],
'ENTRY_DATE': ['2014-01-23 15:13:54', '2015-03-06 15:57:16', '2016-02-26 14:40:53', '2016-02-26 14:40:53', '2017-11-24 15:20:38', '2017-11-24 15:20:38', '2020-01-29 13:41:24'],
'CALCULATED_VALUE': [0.456957, 0.468468, 0.865333, 0.896950, 0.920930, 0.833549, 0.767100]
})
df['ENTRY_DATE'] = pd.to_datetime(df.ENTRY_DATE, infer_datetime_format=True)
df = df.sort_values('ENTRY_DATE')
grp = (df['DIAGNOSIS'] != df['DIAGNOSIS'].shift()).cumsum()
plt.figure(dpi=125)
df1 = df[(grp == 1).shift(2).fillna(True)]
df2 = df[(grp == 2).shift().fillna(False) & (grp == 2)]
df3 = df[(grp == 3).shift(-1).fillna(True)]
plt.plot(df['ENTRY_DATE'], df['CALCULATED_VALUE'], visible=False)
plt.plot(df1['ENTRY_DATE'], df1['CALCULATED_VALUE'], color='red', marker='o',
label=df['DIAGNOSIS'].iloc[0])
plt.plot(df2['ENTRY_DATE'], df2['CALCULATED_VALUE'], marker='o',
linestyle='dashed', color='red', label=df['DIAGNOSIS'].iloc[2])
plt.plot(df3['ENTRY_DATE'],df3['CALCULATED_VALUE'], color='red', marker='o')
plt.xlabel('ENTRY_DATE')
plt.ylabel('DIAGNOSIS')
plt.legend()
plt.grid()
plt.show()
I am trying to display only selected columns from my dataframe using datatable . i am able select how many rows i want . looking for a similar option like rows i want to select to display certain columns alone at the time of executing the code.
My dataframe has close to 25 columns . i dont want all of them to be displayed hence looking for this solution
here is my code :
import dash
import dash_core_components as dcc
import dash_bootstrap_components as dbc
import dash_html_components as html
import dash_table as dt
from dash.dependencies import Input, Output
import plotly.graph_objs as go
import plotly.express as px
import pandas as pd
df = pd.read_csv('E:\pylab\dshlab\infratickets.csv', low_memory = False )
app = dash.Dash(__name__)
#style={'visibility': 'hidden'}
dpdown = []
for i in df['ASSIGNED_GROUP'].unique() :
str(dpdown.append({'label':i,'value':(i)}))
app.layout = html.Div([
html.P([
html.Label("Choose a feature"),
html.Div(dcc.Dropdown(id='dropdown', options=dpdown),
style = {'width': '100px',
'fontSize' : '10px',
'padding-left' : '100px',
'display': 'inline-block'})]),
#style={'visibility': 'hidden'},
html.Div(id='table-container', className='tableDiv'),
dcc.Graph(id = 'plot',style={'height' : '25%', 'width' : '25%'})
])
#dcc.Dropdown(id='dropdown', style={'height': '30px', 'width': '100px'}, options=dpdown),
#dcc.Graph(id='graph'),
#html.Div(html.H3('country graph'),id='table-container1',className='tableDiv1')
#app.callback(
dash.dependencies.Output('table-container','children'),
[dash.dependencies.Input('dropdown', 'value')])
def display_table(dpdown):
df_temp = df[df['ASSIGNED_GROUP']==dpdown]
return html.Div([
dt.DataTable(
id='main-table',
columns=[{'name': i, 'id': i} for i in df_temp.columns],
data=df_temp[0:5].to_dict('rows'),
style_table={
'maxHeight': '20%',
#'overflowY': 'scroll',
'width': '30%',
'minWidth': '10%',
},
style_header={'backgroundColor': 'rgb(30, 30, 30)'},
style_cell={'backgroundColor': 'rgb(50, 50, 50)','color': 'white','height': 'auto','width': 'auto'},#minWidth': '0px', 'maxWidth': '180px', 'whiteSpace': 'normal'},
#style_cell={'minWidth': '120px', 'width': '150px', 'maxWidth': '180px'},
style_data={'whiteSpace': 'auto','height': 'auto','width': 'auto'}
)
])
if __name__ == '__main__':
app.run_server(debug=True)
Able to figure out the solution
changed the code
columns=[{'name': i, 'id': i} for i in df_temp.columns]
to
columns=[{'name': i, 'id': i} for i in df.loc[:,['Colname1','Colname2',...]
fixed it
You could also use by index:
df = pd.read_csv('E:\pylab\dshlab\infratickets.csv', low_memory = False ) # load in the dataframe, then ressign with just the columns you want
df = df.iloc[:,1:3] # Remember that Python does not slice inclusive of the ending index.
Would give all rows and columns 1 to 2 of the data frame.
You can change the
columns=[{'name': i, 'id': i} for i in df_temp.columns],
as below:
First define TABLE_SELECTED_COLUMNS = ['col1','col2'. ...]
and
columns=[{"name": i, "id": i} for i in TABLE_SELECTED_COLUMNS],
I have searched to exhaustion trying to annotate my grouped broken barH chart. I would like to have the "Event" from my dataframe annotated in each broken bar section. The examples I have found online manually enter the events x,y positions, AND, are not grouped broken bar examples.
the end goal is to have these events display on-hover, but I believe I wont have an issue if I can just get the events to display.
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import datetime
import matplotlib.ticker as ticker
import io
pd.plotting.register_matplotlib_converters()
inp = u""" T29,11/4/2019 0:00,11/4/2019 0:00,off,none
T29,11/4/2019 0:00,11/5/2019 0:00,off,eventa
T29,11/5/2019 0:00,11/6/2019 0:00,on,none
T35,11/4/2019 0:00,11/5/2019 0:00,off,eventb
T35,11/5/2019 0:00,11/6/2019 0:00,paused,eventa
T43,11/4/2019 0:00,11/4/2019 4:01,on,none
T43,11/4/2019 4:01,11/4/2019 12:06,off,none
T43,11/4/2019 12:06,11/5/2019 8:07,on,eventc
T43,11/5/2019 8:07,11/5/2019 10:12,paused,eventd
T43,11/5/2019 10:12,11/5/2019 16:15,on,none
T43,11/5/2019 18:12,11/5/2019 20:15,off,none
"""
mydateparser = lambda x: pd.datetime.strptime(x, "%m/%d/%Y %H:%M")
df = pd.read_csv(io.StringIO(inp), header=0, encoding = "ISO-8859-1", parse_dates=['StartTime', 'FinishTime'], date_parser=mydateparser, names=["Name", "StartTime", "FinishTime", "Status", "Event"])
color = {"on": "g", "paused": "yellow", "off": "black"}
df["Diff"] = df.FinishTime - df.StartTime
minDate = (datetime.datetime.toordinal(min(df.StartTime)))
maxDate = (datetime.datetime.toordinal(max(df.FinishTime)))
days = mdates.DayLocator()
Mcount = 0
fig, ax = plt.subplots(figsize=(6, 3), edgecolor="black", linewidth=1)
labels = []
for i, task in enumerate(df.groupby("Name")):
Mcount += 1
labels.append(task[0])
for r in task[1].groupby("Status"):
data = r[1][["StartTime", "Diff"]]
ax.broken_barh(data.values, (i - 0.4, 0.8), edgecolor="black", alpha=1, linewidth=1,
color=color[r[0]])
ax.set_ylim(bottom=-0.8, top=Mcount)
ax.set_yticks(range(len(labels)))
ax.set_yticklabels(labels)
ax.set_ylabel("Names", rotation=90, fontdict={'family': 'DejaVu Sans', 'color': 'black', 'weight': 'bold', 'size': 14})
ax.set_xlim(left=minDate, right=maxDate)
ax.set_xlabel("Date", fontdict={'family': 'DejaVu Sans', 'color': 'black', 'weight': 'bold', 'size': 14})
ax.xaxis.set_major_formatter(mdates.DateFormatter('%m-%d-%Y'))
ax.tick_params(which='major', axis='x', rotation=0, length=11, color='black')
ax.xaxis.set_major_locator(days)
ax.xaxis.set_minor_formatter(mdates.DateFormatter('%H:%M'))
ax.tick_params(which='minor', rotation=0, labelsize=8, length=4, color='red', size=2)
ax.xaxis.set_minor_locator(ticker.MultipleLocator(.50))
plt.show()
Hello and welcome to StackOverflow. IIUC, you can append a for loop to your enumerate statement to add text to the axes.
for i, task in enumerate(df.groupby("Name")):
Mcount += 1
labels.append(task[0])
for r in task[1].groupby("Status"):
data = r[1][["StartTime", "Diff"]]
ax.broken_barh(data.values,
(i - 0.4, 0.8),
edgecolor="black",
alpha=1,
linewidth=1,
color=color[r[0]]
)
for x1, x2 in data.values:
ax.text(x=x1 + x2/2,
y=i,
s=r[1]["Event"].values[0],
ha='center',
va='center',
color='white',
)
Modified from the docs.
Output:
You can, of course, modify the text formatting.
The text requires an x location, a y location, and a string. The hacky indexing was the quickest way I could pull the event info out of your dataframe.
I am using a dataframe which includes the following columns:
Country, GNI, CarSalesPerCap. I am using kmeans to create clusters. In the algorithm i pass the dataframe with the two numeric columns: 'GNI', 'CarSalesPerCap'.
Then i am using plotly to create a scatter plot, where x-axis is the CarsalesPerCap and Y-axis is GNI. My question is, how am i going to add to the plot the corresponding country for each point plotted on the graph.
df = pd.read_sql_query(query,conn)
df = df.dropna()
#Cluster the data
kmeans = KMeans(n_clusters=6, random_state=0).fit(df1)
labels = kmeans.labels_
#Glue back to originaal data
df['clusters'] = labels
#Lets analyze the clusters
print (df)
cluster0=df.loc[df['clusters'] == 0]
cluster1=df.loc[df['clusters'] == 1]
cluster2=df.loc[df['clusters'] == 2]
cluster3=df.loc[df['clusters'] == 3]
cluster4=df.loc[df['clusters'] == 4]
cluster5=df.loc[df['clusters'] == 5]
p0 = go.Scatter(x=cluster0['CarSalesPerCap'],
y= cluster0['GNI'],
mode='markers',
marker=dict(color='black')
)
p1 = go.Scatter(x=cluster1['CarSalesPerCap'],
y= cluster1['GNI'],
mode='markers',
marker=dict(color='teal')
)
p2 = go.Scatter(x=cluster2['CarSalesPerCap'],
y= cluster2['GNI'],
mode='markers',
marker=dict(color='grey')
)
p3 = go.Scatter(x=cluster3['CarSalesPerCap'],
y= cluster3['GNI'],
mode='markers',
marker=dict(color='pink')
)
p4 = go.Scatter(x=cluster4['CarSalesPerCap'],
y= cluster4['GNI'],
mode='markers',
marker=dict(color='purple')
)
p5 = go.Scatter(x=cluster5['CarSalesPerCap'],
y= cluster5['GNI'],
mode='markers',
marker=dict(color='orange')
)
layout = go.Layout(xaxis=dict(ticks='',
showticklabels=True,
zeroline=True,
title = 'CarSalesPerCap'),
yaxis=dict(ticks='',
showticklabels=True,
zeroline=True,
title='GNI'),
showlegend=False, hovermode='closest')
fig = go.Figure(data=[p0,p1,p2,p3,p4,p5], layout=layout)
py.offline.plot(fig)
You can add a text element to your trace and it will allow you to overlay anything you want. If you add your country column then it will be displayed on hover. If you want a permanent label you can add annotations
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
import pandas as pd
df = pd.DataFrame({'country':["USA", "MEXICO", "CANADA"], 'x':[1, 2, 4], 'y':[5, 6, 7]})
p0 = go.Scatter(
x=df.x,
y= df.y,
mode='markers',
marker=dict(
color='#E90',
size=15
),
text = df.country,
)
data = [p0]
iplot(data)