add secondary description in axis values, plotly - python-3.x

I am using a dataframe which includes the following columns:
Country, GNI, CarSalesPerCap. I am using kmeans to create clusters. In the algorithm i pass the dataframe with the two numeric columns: 'GNI', 'CarSalesPerCap'.
Then i am using plotly to create a scatter plot, where x-axis is the CarsalesPerCap and Y-axis is GNI. My question is, how am i going to add to the plot the corresponding country for each point plotted on the graph.
df = pd.read_sql_query(query,conn)
df = df.dropna()
#Cluster the data
kmeans = KMeans(n_clusters=6, random_state=0).fit(df1)
labels = kmeans.labels_
#Glue back to originaal data
df['clusters'] = labels
#Lets analyze the clusters
print (df)
cluster0=df.loc[df['clusters'] == 0]
cluster1=df.loc[df['clusters'] == 1]
cluster2=df.loc[df['clusters'] == 2]
cluster3=df.loc[df['clusters'] == 3]
cluster4=df.loc[df['clusters'] == 4]
cluster5=df.loc[df['clusters'] == 5]
p0 = go.Scatter(x=cluster0['CarSalesPerCap'],
y= cluster0['GNI'],
mode='markers',
marker=dict(color='black')
)
p1 = go.Scatter(x=cluster1['CarSalesPerCap'],
y= cluster1['GNI'],
mode='markers',
marker=dict(color='teal')
)
p2 = go.Scatter(x=cluster2['CarSalesPerCap'],
y= cluster2['GNI'],
mode='markers',
marker=dict(color='grey')
)
p3 = go.Scatter(x=cluster3['CarSalesPerCap'],
y= cluster3['GNI'],
mode='markers',
marker=dict(color='pink')
)
p4 = go.Scatter(x=cluster4['CarSalesPerCap'],
y= cluster4['GNI'],
mode='markers',
marker=dict(color='purple')
)
p5 = go.Scatter(x=cluster5['CarSalesPerCap'],
y= cluster5['GNI'],
mode='markers',
marker=dict(color='orange')
)
layout = go.Layout(xaxis=dict(ticks='',
showticklabels=True,
zeroline=True,
title = 'CarSalesPerCap'),
yaxis=dict(ticks='',
showticklabels=True,
zeroline=True,
title='GNI'),
showlegend=False, hovermode='closest')
fig = go.Figure(data=[p0,p1,p2,p3,p4,p5], layout=layout)
py.offline.plot(fig)

You can add a text element to your trace and it will allow you to overlay anything you want. If you add your country column then it will be displayed on hover. If you want a permanent label you can add annotations
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
import pandas as pd
df = pd.DataFrame({'country':["USA", "MEXICO", "CANADA"], 'x':[1, 2, 4], 'y':[5, 6, 7]})
p0 = go.Scatter(
x=df.x,
y= df.y,
mode='markers',
marker=dict(
color='#E90',
size=15
),
text = df.country,
)
data = [p0]
iplot(data)

Related

How do I mask two different ranges of values in Seaborn

So I wish to red color the values in heatmap that are between the 2.3e-6-0.05. And I wanted to do that with plotting one heatmap on another. But I can't seem to find a way to mask numbers of different values. Here is my try.
from scipy.stats import pearsonr
N = 10
data = np.random.uniform(0, 45, size=(N, N))
for x, y in np.random.randint(0, N, 50).reshape(-1, 2):
data[x, y] = np.nan # fill in some nans at random places
df = pd.DataFrame(data)
def pearsonr_pval(x,y):
return pearsonr(x,y)[1]
data = df.loc[:, (df != 0).any(axis=0)]
data = data.iloc[:,3:50]
to_log = data.columns
df_log = data[to_log].applymap(lambda x: np.log(x+1))
X = df_log.corr(method = pearsonr_pval)
sns.set_style("darkgrid")
mask = np.zeros_like(X)
mask[np.triu_indices_from(mask)] = True
with sns.axes_style("white"):
f, ax = plt.subplots(figsize=(20, 20))
ax = sns.heatmap(X,
mask=mask,
vmax=1,
vmin=0,
square=True,
cmap="YlGnBu",
annot_kws={"size": 1})
ax = sns.heatmap(X,
mask=(X.values<2.3e-6) & (0.05<X.values) & mask.astype(bool),
vmax=1,
vmin=0,
square=True,
cmap="rocket",
annot_kws={"size": 1})
But I get an error: TypeError: ufunc 'bitwise_and' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe'
Edit with the code above I get :
As explained in this answer, for element-wise Boolean comparisons in Pandas you need to use & and |, and to enclose each condition in parentheses. So to combine your three conditions, you would need
mask=(X<2.3e-6) | (0.05<X) | mask.astype(bool),

Continuous plotting with seaborn with varying markers based on variable z

I am trying to create a seaborn lineplot with sns.relplot using markers to distinguish between state changes over time, along with some arbitrary value on the y-axis.
Using the dataset below:
df = pd.DataFrame({
'NAME':['29078719','29078719','29078719','29078719','29078719','29078719','29078719'],
'DIAGNOSIS':['negative', 'negative', 'positive', 'positive', 'positive', 'positive', 'negative'],
'ENTRY_DATE': ['2014-01-23 15:13:54', '2015-03-06 15:57:16', '2016-02-26 14:40:53', '2016-02-26 14:40:53', '2017-11-24 15:20:38', '2020-01-29 13:41:24', '2020-03-30 12:11:24'],
'CALCULATED_VALUE': [0.456957, 0.468468, 0.865333, 0.896950, 0.920930, 0.767100, 0.835690]
})
df['ENTRY_DATE'] = pd.to_datetime(df.ENTRY_DATE, infer_datetime_format=True)
NAME
DIAGNOSIS
ENTRY_DATE
CALCULATED_VALUE
0
29078719
negative
2014-01-23 15:13:54
0.456957
1
29078719
negative
2015-03-06 15:57:16
0.468468
2
29078719
positive
2016-02-26 14:40:53
0.865333
3
29078719
positive
2016-02-26 14:40:53
0.89695
4
29078719
positive
2017-11-24 15:20:38
0.92093
5
29078719
positive
2020-01-29 13:41:24
0.7671
6
29078719
negative
2020-03-30 12:11:24
0.83569
sns.relplot(
data=df,
x='ENTRY_DATE',
y='CALCULATED_VALUE',
kind='line',
height=8.27,
aspect=11.7/8.27,
linewidth=2.5,
markers=True,
style='DIAGNOSIS'
)
plt.show()
Would like to achieve:
Note: The data changed in question, and the graph matched output prior to data in the question and images changing. That's why the output in my question does not match the new desired output, but the same code should work with the different data.
relplot is a figure-level plot that isn't great for dynamically plotting in a nuanced way like this. As such, I don't know if you could do such a customized graph with seaborn, but you could use matplotlib and plot three separate lines depending on what grp each line falls in. This is pretty customized, but you could adjust the logic to create the groups, and I have used a combination of the grp number as well as using using shift to connect the lines and try and match your expected output:
df = pd.DataFrame({
'NAME':['29078719','29078719','29078719','29078719','29078719','29078719','29078719'],
'DIAGNOSIS':['negative', 'negative', 'positive', 'positive', 'positive', 'negative', 'negative'],
'ENTRY_DATE': ['2014-01-23 15:13:54', '2015-03-06 15:57:16', '2016-02-26 14:40:53', '2016-02-26 14:40:53', '2017-11-24 15:20:38', '2017-11-24 15:20:38', '2020-01-29 13:41:24'],
'CALCULATED_VALUE': [0.456957, 0.468468, 0.865333, 0.896950, 0.920930, 0.833549, 0.767100]
})
df['ENTRY_DATE'] = pd.to_datetime(df.ENTRY_DATE, infer_datetime_format=True)
df = df.sort_values('ENTRY_DATE')
grp = (df['DIAGNOSIS'] != df['DIAGNOSIS'].shift()).cumsum()
plt.figure(dpi=125)
df1 = df[(grp == 1).shift(2).fillna(True)]
df2 = df[(grp == 2).shift().fillna(False) & (grp == 2)]
df3 = df[(grp == 3).shift(-1).fillna(True)]
plt.plot(df['ENTRY_DATE'], df['CALCULATED_VALUE'], visible=False)
plt.plot(df1['ENTRY_DATE'], df1['CALCULATED_VALUE'], color='red',
label=df['DIAGNOSIS'].iloc[0])
plt.plot(df2['ENTRY_DATE'], df2['CALCULATED_VALUE'], marker='x',
linestyle='dashed', markersize=8, color='red', label=df['DIAGNOSIS'].iloc[2])
plt.plot(df3['ENTRY_DATE'],df3['CALCULATED_VALUE'], color='red')
plt.xlabel('ENTRY_DATE')
plt.ylabel('DIAGNOSIS')
plt.legend()
plt.grid()
plt.show()
Another variation of with the markers. One issue is that the x markers and o markers will overlap, so you can use the same marker style but a different line style. Otherwisee, you might have to create more lines separting the points to get x and o markers as you've shown:
df = pd.DataFrame({
'NAME':['29078719','29078719','29078719','29078719','29078719','29078719','29078719'],
'DIAGNOSIS':['negative', 'negative', 'positive', 'positive', 'positive', 'negative', 'negative'],
'ENTRY_DATE': ['2014-01-23 15:13:54', '2015-03-06 15:57:16', '2016-02-26 14:40:53', '2016-02-26 14:40:53', '2017-11-24 15:20:38', '2017-11-24 15:20:38', '2020-01-29 13:41:24'],
'CALCULATED_VALUE': [0.456957, 0.468468, 0.865333, 0.896950, 0.920930, 0.833549, 0.767100]
})
df['ENTRY_DATE'] = pd.to_datetime(df.ENTRY_DATE, infer_datetime_format=True)
df = df.sort_values('ENTRY_DATE')
grp = (df['DIAGNOSIS'] != df['DIAGNOSIS'].shift()).cumsum()
plt.figure(dpi=125)
df1 = df[(grp == 1).shift(2).fillna(True)]
df2 = df[(grp == 2).shift().fillna(False) & (grp == 2)]
df3 = df[(grp == 3).shift(-1).fillna(True)]
plt.plot(df['ENTRY_DATE'], df['CALCULATED_VALUE'], visible=False)
plt.plot(df1['ENTRY_DATE'], df1['CALCULATED_VALUE'], color='red', marker='o',
label=df['DIAGNOSIS'].iloc[0])
plt.plot(df2['ENTRY_DATE'], df2['CALCULATED_VALUE'], marker='o',
linestyle='dashed', color='red', label=df['DIAGNOSIS'].iloc[2])
plt.plot(df3['ENTRY_DATE'],df3['CALCULATED_VALUE'], color='red', marker='o')
plt.xlabel('ENTRY_DATE')
plt.ylabel('DIAGNOSIS')
plt.legend()
plt.grid()
plt.show()

How can I annotate a Grouped Broken Barh Chart Python Matplotlib

I have searched to exhaustion trying to annotate my grouped broken barH chart. I would like to have the "Event" from my dataframe annotated in each broken bar section. The examples I have found online manually enter the events x,y positions, AND, are not grouped broken bar examples.
the end goal is to have these events display on-hover, but I believe I wont have an issue if I can just get the events to display.
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import datetime
import matplotlib.ticker as ticker
import io
pd.plotting.register_matplotlib_converters()
inp = u""" T29,11/4/2019 0:00,11/4/2019 0:00,off,none
T29,11/4/2019 0:00,11/5/2019 0:00,off,eventa
T29,11/5/2019 0:00,11/6/2019 0:00,on,none
T35,11/4/2019 0:00,11/5/2019 0:00,off,eventb
T35,11/5/2019 0:00,11/6/2019 0:00,paused,eventa
T43,11/4/2019 0:00,11/4/2019 4:01,on,none
T43,11/4/2019 4:01,11/4/2019 12:06,off,none
T43,11/4/2019 12:06,11/5/2019 8:07,on,eventc
T43,11/5/2019 8:07,11/5/2019 10:12,paused,eventd
T43,11/5/2019 10:12,11/5/2019 16:15,on,none
T43,11/5/2019 18:12,11/5/2019 20:15,off,none
"""
mydateparser = lambda x: pd.datetime.strptime(x, "%m/%d/%Y %H:%M")
df = pd.read_csv(io.StringIO(inp), header=0, encoding = "ISO-8859-1", parse_dates=['StartTime', 'FinishTime'], date_parser=mydateparser, names=["Name", "StartTime", "FinishTime", "Status", "Event"])
color = {"on": "g", "paused": "yellow", "off": "black"}
df["Diff"] = df.FinishTime - df.StartTime
minDate = (datetime.datetime.toordinal(min(df.StartTime)))
maxDate = (datetime.datetime.toordinal(max(df.FinishTime)))
days = mdates.DayLocator()
Mcount = 0
fig, ax = plt.subplots(figsize=(6, 3), edgecolor="black", linewidth=1)
labels = []
for i, task in enumerate(df.groupby("Name")):
Mcount += 1
labels.append(task[0])
for r in task[1].groupby("Status"):
data = r[1][["StartTime", "Diff"]]
ax.broken_barh(data.values, (i - 0.4, 0.8), edgecolor="black", alpha=1, linewidth=1,
color=color[r[0]])
ax.set_ylim(bottom=-0.8, top=Mcount)
ax.set_yticks(range(len(labels)))
ax.set_yticklabels(labels)
ax.set_ylabel("Names", rotation=90, fontdict={'family': 'DejaVu Sans', 'color': 'black', 'weight': 'bold', 'size': 14})
ax.set_xlim(left=minDate, right=maxDate)
ax.set_xlabel("Date", fontdict={'family': 'DejaVu Sans', 'color': 'black', 'weight': 'bold', 'size': 14})
ax.xaxis.set_major_formatter(mdates.DateFormatter('%m-%d-%Y'))
ax.tick_params(which='major', axis='x', rotation=0, length=11, color='black')
ax.xaxis.set_major_locator(days)
ax.xaxis.set_minor_formatter(mdates.DateFormatter('%H:%M'))
ax.tick_params(which='minor', rotation=0, labelsize=8, length=4, color='red', size=2)
ax.xaxis.set_minor_locator(ticker.MultipleLocator(.50))
plt.show()
Hello and welcome to StackOverflow. IIUC, you can append a for loop to your enumerate statement to add text to the axes.
for i, task in enumerate(df.groupby("Name")):
Mcount += 1
labels.append(task[0])
for r in task[1].groupby("Status"):
data = r[1][["StartTime", "Diff"]]
ax.broken_barh(data.values,
(i - 0.4, 0.8),
edgecolor="black",
alpha=1,
linewidth=1,
color=color[r[0]]
)
for x1, x2 in data.values:
ax.text(x=x1 + x2/2,
y=i,
s=r[1]["Event"].values[0],
ha='center',
va='center',
color='white',
)
Modified from the docs.
Output:
You can, of course, modify the text formatting.
The text requires an x location, a y location, and a string. The hacky indexing was the quickest way I could pull the event info out of your dataframe.

How to add annotation based on the value in bokeh

I want to be able to display "NO DATA" when there is a value '0' in counts. For example for Strawberries, "NO DATA" should be displayed in the graph.
from bokeh.io import show, output_file
from bokeh.plotting import figure
output_file("bar_basic.html")
fruits = ['Apples', 'Pears', 'Nectarines', 'Plums', 'Grapes', 'Strawberries']
counts = [5, 3, 4, 2, 4, 0]
p = figure(x_range=fruits, plot_height=350, title="Fruit Counts")
p.vbar(x=fruits, top=counts, width=0.9)
p.y_range.start = 0
show(p)
For example, for above data the graph should look like this:example vbar with NO DATA
You can select the data with the count value '0' with Pandas. This new dataframe can be used to create another ColumnDataSource to use for the LabelSet to show the text 'NO DATA' in the figure.
from bokeh.io import show, output_file
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource, LabelSet
import pandas as pd
output_file("bar_basic.html")
fruits = ['Apples', 'Pears', 'Nectarines', 'Plums', 'Grapes', 'Strawberries']
counts = [5, 3, 4, 2, 4, 0]
df = pd.DataFrame.from_dict({'fruits': fruits, 'counts': counts})
source = ColumnDataSource(df)
p = figure(x_range=fruits, plot_height=350, title="Fruit Counts")
p.vbar(x='fruits', top='counts', source=source, width=0.9)
df_nodata = df.loc[df['counts'] == 0]
pd.options.mode.chained_assignment = None
df_nodata.loc[:, 'text'] = 'NO DATA'
source_nodata = ColumnDataSource(df_nodata)
labels = LabelSet(x='fruits', y=1, text='text', text_align='center', source=source_nodata)
p.add_layout(labels)
p.y_range.start = 0
show(p)

How do I add an axis for a second trace in a Plotly subplot?

I have three traces, one of which I have in one subplot, and two of which are in another. I would like to have a distinct y-axis each of the traces in the subplot with 2 traces.
For example, I have
fig = plotly.tools.make_subplots(rows=2, cols=1, shared_xaxes=True)
fig.append_trace(trace1, 1, 1)
fig.append_trace(trace2, 1, 1)
fig.append_trace(trace3, 2, 1)
fig['layout'].update(height=200, width=400)
which produces
And when I have no subplots, I can get a second axis for the second trace with
layout = go.Layout(
yaxis=dict(
title='y for trace1'
),
yaxis2=dict(
title='y for trace2',
titlefont=dict(
color='rgb(148, 103, 189)'
),
tickfont=dict(
color='rgb(148, 103, 189)'
),
overlaying='y',
side='right'
)
)
fig = go.Figure(data=data, layout=layout)
which produces
But I can't figure out how to get the first subplot in the first example to look like the plot in the second example: with a distinct axis for the second trace there.
How do I add an axis for a second trace in a Plotly subplot?
This is a bit of a workaround, but it seems to work:
import plotly as py
import plotly.graph_objs as go
from plotly import tools
import numpy as np
left_trace = go.Scatter(x = np.random.randn(1000), y = np.random.randn(1000), yaxis = "y1", mode = "markers")
right_traces = []
right_traces.append(go.Scatter(x = np.random.randn(1000), y = np.random.randn(1000), yaxis = "y2", mode = "markers"))
right_traces.append(go.Scatter(x = np.random.randn(1000) * 10, y = np.random.randn(1000) * 10, yaxis = "y3", mode = "markers"))
fig = tools.make_subplots(rows = 1, cols = 2)
fig.append_trace(left_trace, 1, 1)
for trace in right_traces:
yaxis = trace["yaxis"] # Store the yaxis
fig.append_trace(trace, 1, 2)
fig["data"][-1].update(yaxis = yaxis) # Update the appended trace with the yaxis
fig["layout"]["yaxis1"].update(range = [0, 3], anchor = "x1", side = "left")
fig["layout"]["yaxis2"].update(range = [0, 3], anchor = "x2", side = "left")
fig["layout"]["yaxis3"].update(range = [0, 30], anchor = "x2", side = "right", overlaying = "y2")
py.offline.plot(fig)
Produces this, where trace0 is in the first subplot plotted on yaxis1, and trace1 and trace2 are in the second subplot, plotted on yaxis2 (0-3) and yaxis3 (0-30) respectively:
When traces are appended to subplots, the xaxis and yaxis seem to be overwritten, or that's my understanding of this discussion anyway.

Resources