Facets not working properly plotly express - python-3.x

import plotly.graph_objects as go
import plotly.express as px
fig = px.histogram(df, nbins = 5, x = "numerical_col", color = "cat_1", animation_frame="date",
range_x=["10000","500000"], facet_col="cat_2")
fig.update_layout(
margin=dict(l=25, r=25, t=20, b=20))
fig.show()
How can I fix the output? I would like multiple subplots based on cat_2 where the hue is cat_1.

you have not provided sample data, so I've simulated it based on code you are using to generate figure
I have encountered one issue range_x does not work, it impacts y-axis as well. Otherwise approach fully works.
import plotly.graph_objects as go
import plotly.express as px
import numpy as np
import pandas as pd
# data not provided.... simulate some
DAYS = 5
ROWS = DAYS * 2000
df = pd.DataFrame(
{
"date_d": np.repeat(pd.date_range("1-Jan-2021", periods=DAYS), ROWS // DAYS),
"numerical_col": np.random.uniform(10000, 500000, ROWS),
"cat_1": np.random.choice(list("ABCD"), ROWS),
"cat_2": np.random.choice(list("UVWXYZ"), ROWS),
}
)
# animation frame has to be a string not a date...
df["date"] = df["date_d"].dt.strftime("%Y-%b-%d")
# always best to provide pre-sorted data to plotly
df = df.sort_values(["date", "cat_1", "cat_2"])
fig = px.histogram(
df,
nbins=5,
x="numerical_col",
color="cat_1",
animation_frame="date",
# range_x=[10000, 500000],
facet_col="cat_2",
)
fig.update_layout(margin=dict(l=25, r=25, t=20, b=20))

Related

Annotating clustering from DBSCAN to original Pandas DataFrame

I have working code that is utilizing dbscan to find tight groups of sparse spatial data imported with pd.read_csv.
I am maintaining the original spatial data locations and would like to annotate the labels returned by dbscan for each data point to the original dataframe and then write a csv with the same information.
So the code below is doing exactly what I would expect it to at this point, I would just like to extend it to import the label for each row in the original dataframe.
import argparse
import string
import os, subprocess
import pathlib
import glob
import gzip
import re
import time
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from sklearn.cluster import DBSCAN
X = pd.read_csv(tmp_csv_name)
X = X.drop('Name', axis = 1)
X = X.drop('Type', axis = 1)
X = X.drop('SomeValue', axis = 1)
# only columns 'x' and 'y' now remain
db=DBSCAN(eps=EPS, min_samples=minSamples, metric='euclidean', algorithm='auto', leaf_size=30).fit(X)
labels = def_inst_dbsc.labels_
unique_labels = set(labels)
# maxX , maxY are manual inputs temporarily
while sizeX > 16 or sizeY > 16 :
sizeX=sizeX*0.8 ; sizeY=sizeY*0.8
fig, ax = plt.subplots(figsize=(sizeX,sizeY))
plt.xlim(0,maxX)
plt.ylim(0,maxY)
plt.scatter(X['x'], X['y'], c=colors, marker="o", picker=True)
# hackX , hackY are manual inputs temporarily
# which represent the boundaries defined in the original dataset
poly = patches.Polygon(xy=list(zip(hackX,hackY)), fill=False)
ax.add_patch(poly)
plt.show()

Plotly python facetted heatmaps

I'm using the example from this SO Q&A to use seaborn for facetted heatmaps in python. The result looks like this:
I'd like to do the same thing with plotly express and have tried with this starter code:
import plotly.express as px
df = px.data.medals_wide(indexed=True)
fig = px.imshow(df)
fig.show()
My data is also in a pd.DataFrame and it's important I show the groups the heatmaps are grouped by as well as the x/y-axis of the maps.
How do you extend the px.imshow example to create a facetted heatmap by group like the seaborn example above?
The sample data is taken from the referenced responses to answer the question. express, as data, can be subplotted if it is column data, but the results cannot be used with a categorical variable as the extraction condition with a different categorical variable, as in the sample data. You can draw it if it is as a subplot using a graph object in A heat map can be created by specifying the xy-axis in the data frame of the result of data extraction by category variable.
import numpy as np
import pandas as pd
import plotly.express
# Generate a set of sample data
np.random.seed(0)
indices = pd.MultiIndex.from_product((range(5), range(5), range(5)), names=('label0', 'label1', 'label2'))
data = pd.DataFrame(np.random.uniform(0, 100, size=len(indices)), index=indices, columns=('value',)).reset_index()
import plotly.graph_objects as go
from plotly.subplots import make_subplots
titles = ['label0='+ str(x) for x in range(5)]
fig = make_subplots(rows=1, cols=len(data['label0'].unique()),
shared_yaxes=True,
subplot_titles = tuple(titles))
for i in data['label0'].unique():
df = data[data['label0'] == i]
fig.add_trace(go.Heatmap(z=df.value, x=df.label1, y=df.label2), row=1, col=i+1)
fig.update_traces(showscale=False)
fig.update_xaxes(dtick=[0,1,2,3,4])
fig.update_xaxes(title_text='label1', row=1, col=i+1)
fig.update_yaxes(title_text='label2', row=1, col=1)
fig.show()

Implementing ipywidget slider for time

I am trying to create a slider for time in Jupyter Notebook using ipywidgets. I would like to take the tabulated experimental data (see figure below) and control the value bounds with the help of a slider. The graph should be a force-displacement graph, evolving in time:
This is my python code:
from ipywidgets import IntSlider, interact, FloatSlider
u = fdat1['C_1_Weg_R4[mm]'].values
f = fdat1['C_1_Kraft_R4[kN]'].values
t = fdat1['S/No'].values
#interact(t = IntSlider(min = 0, max = max(fdat0['S/No'].values)))
def aa_(t):
plt.plot(f[t],u[t])
plt.grid()
plt.xlabel("force [kN]")
plt.ylabel("displacement [mm]")
plt.title("Load-displacement curve for \nexperiment")
fdat1 is the name of the tabulated data. I have also considered using "C_1_Zeit[s]" column as my slider values, but these are not integer values.
The problem is that nothing gets plotted, but the slider works and the graph changes scale.
I have been searching online for some time now and would really appreciate some help.
Thank you in advance!
Edit:
from ipywidgets import IntSlider, interact, FloatSlider
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
df = pd.DataFrame.from_records(
[np.linspace(0,30, num=30), np.linspace(0,20, num=30), ]).T
df.columns=['A', 'B']
#interact(t = IntSlider(min = 0, max = 21))
def aa_(t):
plt.scatter(df['A'], df['B'])
plt.grid()
plt.xlabel("force [kN]")
plt.ylabel("displacement [mm]")
plt.title("Load-displacement curve for \nexperiment")
plt.xlim(0, 30)
plt.ylim(0, 30)
Inside your plotting function, create a slice of your results dataframe that slices based on the slider value.
from ipywidgets import IntSlider, interact, FloatSlider
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
results = pd.DataFrame.from_records(
[np.linspace(0,30, num=30), np.linspace(0,20, num=30), ]).T
results.columns=['A', 'B']
#interact(t = IntSlider(min = 0, max = 21))
def aa_(t):
df = results.iloc[:t] # make the slice here
plt.scatter(df['A'], df['B'])
plt.grid()
plt.xlabel("force [kN]")
plt.ylabel("displacement [mm]")
plt.title("Load-displacement curve for \nexperiment")
plt.xlim(0, 30)
plt.ylim(0, 30)
So, basically, this should have been the correct code:
from ipywidgets import IntSlider, interact, FloatSlider
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
u = fdat1['C_1_Weg_R4[mm]'].values #loads displacement values from fdat1
f = fdat1['C_1_Kraft_R4[kN]'].values #loads force values from fdat1
df = pd.DataFrame.from_dict([u,f]).T #creates a dataframe
df.columns=['A', 'B']
#interact(t = IntSlider(min = 0, max = df.shape[0])) #interactive scatterplot with a slider for time
def scatterplot_(t):
plt.scatter(df.loc[0:t,'A'], df.loc[0:t,'B'])
plt.grid()
plt.xlabel("force [kN]")
plt.ylabel("displacement [mm]")
plt.title("Load-displacement curve for \nexperiment")
plt.xlim(-5, 5)
plt.ylim(-25, 25)

Set hue using a range of values in Seaborn stripplot

I am trying to set hue based on a range of values rather than unique values in seaborn stripplot. For example, different colors for different value ranges (1940-1950, 1950-1960 etc.).
sns.stripplot('Condition', 'IM', data=dd3, jitter=0.3, hue= dd3['Year Built'])
Output Figure
Thanks
Looks like you need to bin the data. Use .cut() in the below manner. The years are binned into 5 groups. You can arrange your own step in .arrange() to adjust your ranges.
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
x = np.random.randint(0,100,size=100)
y = np.random.randint(0,100, size=100)
year = np.random.randint(1918, 2019, size=100)
df = pd.DataFrame({
'x':x,
'y':y,
'year':year
})
df['year_bin'] = pd.cut(df['year'], np.arange(min(year), max(year), step=20))
sns.lmplot('x','y', data=df, hue='year_bin')
plt.show()
Output:

Hide Legend and Scale information on surface plot using pandas, plotly

I am at my wits end but so far did not find any documentation to solve my specific issue. I am using jupyter notebook.
I have two data frames, df1 & df2.
# libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import cufflinks as cf
cf.go_offline()
import plotly.graph_objs as go
# df1 & df2
np.random.seed(0)
dates = pd.date_range('20130101',periods=6)
df1 = pd.DataFrame(np.random.randn(6,4),index=dates,columns=list('ABCD'))
df2 = pd.DataFrame(np.random.randn(6,4),index=dates,columns=list('ABCD'))
I have two surface plots:
layout = go.Layout(
title='Random Numbers',
autosize=False,
width=500,
height=500,
margin=dict(
l=65,
r=50,
b=65,
t=90
)
)
df1.iplot(kind="surface", layout=layout)
df2.iplot(kind="surface", layout=layout)
I have three problems:
I need to plot them side by side as in (row = 1 & column = 2).
The scale legend is either removed or is shared.
The x and y in the axes are removed. I do not need to change them, just get rid of these.
Any help will be appreciated.
I'm sorry if this doesn't answer your question directly but I would suggest using plotly without cufflings.
import plotly
# Define scene which changes the default attributes of the chart
scene = dict(
xaxis=dict(title=''),
yaxis=dict(title=''),
zaxis=dict(title='')
)
# Create 2 empty subplots
fig = plotly.tools.make_subplots(rows=1, cols=2,
specs=[[{'is_3d': True}, {'is_3d': True}]])
# Add df1
fig.append_trace(dict(type='surface', x=df1.index, y=df1.columns, z=df1.as_matrix(),
colorscale='Viridis', scene='scene1', showscale=False), 1, 1)
# Add df2
fig.append_trace(dict(type='surface', x=df2.index, y=df2.columns, z=df2.as_matrix(),
colorscale='RdBu', scene='scene2', showscale=False), 1, 2)
# Set layout and change defaults with scene
fig['layout'].update(title='Random Numbers', height=400, width=800)
fig['layout']['scene1'].update(scene)
fig['layout']['scene2'].update(scene)
# Use plotly offline to display the graph
plotly.offline.plot(fig)
Output:
EDIT:
To answer your third question, you can use .update(scene) to change the axis attributes. Details are in the code above.

Resources