plotly: descending order in grouped bar chart - python-3.x

I'm trying to
create a nice plot which is sorted by LABEL and then by Value inside each LABEL.
If possible remove labels on the bottom of the chart because I have explanation in legend.
libraries:
from plotly import graph_objs as go
import plotly.express as px
import pandas as pd
My data looks like this:
df = pd.DataFrame({'LABEL': ['1', '1', '2', '2', '3', '3', '3', '3'],
'Cat2': ['a', 'b', 'a', 'b', 'c', 'a', 'e', 'f'],
'Value': [3, 2, 1, 4, 1, 3, 4, 1]})
df.sort_values(by=['LABEL', 'Value'], ascending=[True, False],inplace=True)
Here is my try:
COLOR_MAP = {str(i): c for i, c in enumerate(px.colors.qualitative.Light24)}
fig = go.Figure()
for i in df['LABEL'].unique():
df_ = df[df['LABEL'] == i]
fig.add_trace(go.Bar(
x=[df_['LABEL'], df_['Cat2']],
y=df_['Value'],
marker=dict(color=COLOR_MAP[i]),
name=f'{i}'))
fig.update_layout(legend_title='Cat1')
fig.update_layout(
xaxis=dict(tickangle=45))
fig.update_layout(xaxis={'categoryorder': 'trace'}) # I tried: 'total descending', 'category descending', 'array'
Result:
My expectation:
Thanks in advance!!

it's much simpler in plotly express
define a new column in dataframe that defines x
from plotly import graph_objs as go
import plotly.express as px
import pandas as pd
df = pd.DataFrame(
{
"LABEL": ["1", "1", "2", "2", "3", "3", "3", "3"],
"Cat2": ["a", "b", "a", "b", "c", "a", "e", "f"],
"Value": [3, 2, 1, 4, 1, 3, 4, 1],
}
)
df.sort_values(by=["LABEL", "Value"], ascending=[True, False], inplace=True)
# define a concatenated column for x
df = df.assign(labx=df["LABEL"] + df["Cat2"])
px.bar(
df,
x="labx",
y="Value",
hover_data=["Cat2"],
color="LABEL",
color_discrete_sequence=px.colors.qualitative.Light24,
).update_layout(
xaxis={"tickmode": "array", "tickvals": df["labx"], "ticktext": df["Cat2"]}
)
without plotly express
import plotly.graph_objects as go
fig = go.Figure()
fig.add_trace(
go.Bar(
x=df["labx"],
y=df["Value"],
marker_color=df["LABEL"]
.map(
{v: c for v, c in zip(df["LABEL"].unique(), px.colors.qualitative.Light24)}
)
.values,
)
).update_layout(
xaxis={"tickmode": "array", "tickvals": df["labx"], "ticktext": df["Cat2"]}
)

Related

add_scatter and custom data

How to add fig.add_scatter with hover data that is in the hover label?
The minimal code is not working.
I need to add another set of data with the same hover template as the first one.
Many thanks
import numpy as np
import pandas as pd
a, b, c = [1, 2], [1, 5], [5, 6]
d, e, f = [5, 5], [4, 4], [5, 5]
s1 = ['A', 'F']
s2 = ['V', 'T']
d = {'a': a, 'b': b, 'c': c, 's1':s1}
df = pd.DataFrame(data=d)
d2 = {'d': d, 'e': e, 'f': f, 's2':s2}
df2 = pd.DataFrame(data=d2)
fig = px.scatter(df, x='a', y='b', hover_data=['c', 's1'], color='s1', color_discrete_sequence=["green", "navy"])
fig.add_scatter(x=df2['d'], y=df2['e'], customdata=['f', 's2'], mode="markers", marker=dict(size=10,color='Purple'), name = 'A') # ------> these custom data are not in label, there is just %{customdata[1]}
fig.update_traces(
hovertemplate="<br>".join([
"<b>G:</b> %{x:.3f}",
"<b>R:</b> %{y:.6f}<extra></extra>",
"<b>D:</b> %{customdata[1]}",
"<b>E:</b> %{customdata[0]}",
])
)
fig.update_xaxes(title_font_family="Trebuchet")
fig.update_traces(marker=dict(size=9),
selector=dict(mode='markers'))
fig.show()
There are errors in creating df2. Have assumed what you are trying to achieve. Below makes hovertext work.
import numpy as np
import pandas as pd
a, b, c = [1, 2], [1, 5], [5, 6]
d, e, f = [5, 5], [4, 4], [5, 5]
s1 = ["A", "F"]
s2 = ["V", "T"]
d = {"a": a, "b": b, "c": c, "s1": s1}
df = pd.DataFrame(data=d)
d2 = {"d": d, "e": e, "f": f, "s2": s2}
# SO question invalid !!!
# df2 = pd.DataFrame(data=d2)
# try this
df2 = pd.DataFrame(d).join(pd.DataFrame({k:v for k,v in d2.items() if k!="d"}))
fig = px.scatter(
df,
x="a",
y="b",
hover_data=["c", "s1"],
color="s1",
color_discrete_sequence=["green", "navy"],
)
fig.add_scatter(
x=df2["a"],
y=df2["e"],
customdata=df2.loc[:,["f", "s2"]].values.reshape([len(df2),2]),
mode="markers",
marker=dict(size=10, color="Purple"),
name="A",
) # ------> these custom data are not in label, there is just %{customdata[1]}
fig.update_traces(
hovertemplate="<br>".join(
[
"<b>G:</b> %{x:.3f}",
"<b>R:</b> %{y:.6f}<extra></extra>",
"<b>D:</b> %{customdata[1]}",
"<b>E:</b> %{customdata[0]}",
]
)
)
fig.update_xaxes(title_font_family="Trebuchet")
fig.update_traces(marker=dict(size=9), selector=dict(mode="markers"))
fig.show()

How to set markers with errorbars in different colours?

How to:
display symbols in the legend
colour markers in the same way as the errorbars (argument color gives an error: ValueError: RGBA sequence should have length 3 or 4
remove connecting lines - get only the scatter with errorbars
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D # for legend handle
fig, ax = plt.subplots(figsize = (10,10))
times = [1, 2, 3, 4, 5]
rvs = [2, 4, 2, 4, 7]
sigma = [0.564, 0.6, 0.8, 0.8, 0.4]
rv_telescopes = ['A', 'B', 'A', 'C', 'C']
d = {'rv_times': times, 'rv_rvs': rvs, 'rv_sigma': sigma, 'rv_telescopes': rv_telescopes }
df = pd.DataFrame(data=d)
colors = {'A':'#008f00', 'B':'#e36500', 'C':'red'}
plt.errorbar(df['rv_times'], df['rv_rvs'], df['rv_sigma'], marker = '_', ecolor = df['rv_telescopes'].map(colors), color = df['rv_telescopes'].map(colors), zorder = 1, ms = 30)
handles = [Line2D([0], [0], marker='_', color='w', markerfacecolor=v, label=k, markersize=10) for k, v in colors.items()]
ax.legend(handles=handles, loc='upper left', ncol = 2, fontsize=14)
plt.show()
After edit
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D # for legend handle
import pandas as pd
import numpy as np
times = [1, 2, 3, 4, 5]
rvs = [2, 4, 2, 4, 7]
sigma = [0.564, 0.6, 0.8, 0.8, 0.4]
rv_telescopes = ['A', 'B', 'A', 'C', 'C']
d = {'rv_times': times, 'rv_rvs': rvs, 'rv_sigma': sigma, 'rv_telescopes': rv_telescopes}
df = pd.DataFrame(data=d)
colors = {'A': '#008f00', 'B': '#e36500', 'C': 'red'}
fig, ax = plt.subplots(figsize=(10, 10))
ax.errorbar(df['rv_times'], df['rv_rvs'], df['rv_sigma'], color='none', ecolor=df['rv_telescopes'].map(colors) ,linewidth=1)
ax.scatter(df['rv_times'], df['rv_rvs'], marker='_', linewidth=3, color=df['rv_telescopes'].map(colors), s=1000)
for rv_teles in np.unique(df['rv_telescopes']):
color = colors[rv_teles]
df1 = df[df['rv_telescopes'] == rv_teles] # filter out rows corresponding to df['rv_telescopes']
ax.errorbar(df1['rv_times'], df1['rv_rvs'], df1['rv_sigma'],
color=color, ls='', marker='_', ms=30, linewidth=3, label=rv_teles)
ax.legend(loc='upper left', ncol=1, fontsize=14)
plt.show()
plt.errorbar() works very similar to plt.plot() with extra parameters. As such, it primarily draws a line graph, using a single color. The error bars can be given individual colors via the ecolor= parameter. The markers, however, get the same color as the line graph. The line graph can be suppressed via an empty linestyle. On top of that, plt.scatter() can draw markers with individual colors.
In order not the mix the 'object-oriented' with the 'functional interface', the following example code uses ax.errorbar() and ax.scatter().
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D # for legend handle
import pandas as pd
import numpy as np
times = [1, 2, 3, 4, 5]
rvs = [2, 4, 2, 4, 7]
sigma = [0.564, 0.6, 0.8, 0.8, 0.4]
rv_telescopes = ['A', 'B', 'A', 'C', 'C']
d = {'rv_times': times, 'rv_rvs': rvs, 'rv_sigma': sigma, 'rv_telescopes': rv_telescopes}
df = pd.DataFrame(data=d)
colors = {'A': '#008f00', 'B': '#e36500', 'C': 'red'}
fig, ax = plt.subplots(figsize=(10, 10))
ax.errorbar(df['rv_times'], df['rv_rvs'], df['rv_sigma'], color='none', ecolor=df['rv_telescopes'].map(colors))
ax.scatter(df['rv_times'], df['rv_rvs'], marker='_', color=df['rv_telescopes'].map(colors), s=100)
handles = [Line2D([0], [0], linestyle='', marker='_', color=v, label=k, markersize=10) for k, v in colors.items()]
ax.legend(handles=handles, loc='upper left', ncol=1, fontsize=14)
plt.show()
A far easier approach would be to call ax.errorbar() multiple times, once for each color. This would automatically create appropriate legend handles:
for rv_teles in np.unique(df['rv_telescopes']):
color = colors[rv_teles]
df1 = df[df['rv_telescopes'] == rv_teles] # filter out rows corresponding to df['rv_telescopes']
ax.errorbar(df1['rv_times'], df1['rv_rvs'], df1['rv_sigma'],
color=color, ls='', marker='_', ms=30, label=rv_teles)
ax.legend(loc='upper left', ncol=1, fontsize=14)
plt.show()

Plot crosstab results using All row as benchmark lines

I have this sample dataframe:
test = pd.DataFrame({'cluster':['1','1','1','1','2','2','2','2','2','3','3','3'],
'type':['a','b','c','a','a','b','c','c','a','b','c','a']})
I use crosstab to produce a new dataframe and plot results:
pd.crosstab(test.cluster,test.type,normalize='index',margins=True).plot(kind='bar')
I would like to plot the row All as dotted horizontal benchmark lines of the same colour corresponding to each type to improve interpretation of the plot. Will appreciate help of this community!
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
test = pd.DataFrame(
{'cluster': ['1', '1', '1', '1', '2', '2', '2', '2', '2', '3', '3', '3'],
'type': ['a', 'b', 'c', 'a', 'a', 'b', 'c', 'c', 'a', 'b', 'c', 'a']})
tab = pd.crosstab(test.cluster, test.type, normalize='index', margins=True)
fig, ax = plt.subplots()
# find the default colors
prop_cycle = plt.rcParams['axes.prop_cycle']
colors = prop_cycle.by_key()['color']
# make a bar plot using all rows but the last
tab.iloc[:-1].plot(ax=ax, kind='bar', color=colors)
# draw the horizontal dotted lines
for y, c in zip(tab.loc['All'], colors):
ax.axhline(y=y, color=c, linestyle=':', alpha=0.5)
plt.show()

Python - Visualizing data in a diagram

I have a data with two columns: Product and Category. See below for an example of the data:
import pandas as pd
df = pd.DataFrame({'Product': ['A', 'A', 'A', 'A', 'B', 'B', 'B', 'B'],
'Category': ['Text', 'Text2', 'Text3', 'Text4', 'Text', 'Text2', 'Text3', 'Text4'],
'Value': [80, 10, 5, 5, 5, 3, 2, 0]})
I would like to visualize this data in a diagram:
Here the "Total" is the total value of the entire data frame, "A" & "B" boxes are the total value for each product, and then the values for each product & category are in the right-most box.
I'm not very familiar with the viz packages available in Python. Is there a package that exists that does these types of visualizations.
You can use graphviz. But you need to extract your own blocks/nodes
Example:
from graphviz import Graph
g = Graph()
g.attr(rankdir='RL')
T = df['Value'].sum()
g.node('1', 'Total = ' + str(T), shape='square')
A = df.loc[df.Product == 'A', ['Category', 'Value']].to_string(index=False)
g.node('2', A, shape='square')
B = df.loc[df.Product == 'B', ['Category', 'Value']].to_string(index=False)
g.node('3', B, shape='square')
g.edges(['21', '31'])
g.render(view=True)

How to add column next to Seaborn heat map

Given the code below, which produces a heat map, how can I get column "D" (the total column)
to display as a column to the right of the heat map with no color, just aligned total values per cell? I'm also trying to move the labels to the top. I don't mind that the labels on the left are horizontal as this does not occur with my actual data.
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
%matplotlib inline
df = pd.DataFrame(
{'A' : ['A', 'A', 'B', 'B','C', 'C', 'D', 'D'],
'B' : ['A', 'B', 'A', 'B','A', 'B', 'A', 'B'],
'C' : [2, 4, 5, 2, 0, 3, 9, 1],
'D' : [6, 6, 7, 7, 3, 3, 10, 10]})
df=df.pivot('A','B','C')
fig, ax = plt.subplots(1, 1, figsize =(4,6))
sns.heatmap(df, annot=True, linewidths=0, cbar=False)
plt.show()
Here's the desired result:
Thanks in advance!
I think the cleanest way (although probably not the shortest), would be to plot Total as one of the columns, and then access colors of the facets of the heatmap and change some of them to white.
The element that is responsible for color on heatmap is matplotlib.collections.QuadMesh. It contains all facecolors used for each facet of the heatmap, from left to right, bottom to top.
You can modify some colors and pass them back to QuadMesh before you plt.show().
There is a slight problem that seaborn changes text color of some of the annotations to make them visible on dark background, and they become invisible when you change to white color. So for now I set color of all text to black, you will need to figure out what is best for your plots.
Finally, to put x axis ticks and label on top, use:
ax.xaxis.tick_top()
ax.xaxis.set_label_position('top')
The final version of the code:
import matplotlib.pyplot as plt
from matplotlib.collections import QuadMesh
from matplotlib.text import Text
import seaborn as sns
import pandas as pd
import numpy as np
%matplotlib inline
df = pd.DataFrame(
{'A' : ['A', 'A', 'B', 'B','C', 'C', 'D', 'D'],
'B' : ['A', 'B', 'A', 'B','A', 'B', 'A', 'B'],
'C' : [2, 4, 5, 2, 0, 3, 9, 1],
'D' : [6, 6, 7, 7, 3, 3, 10, 10]})
df=df.pivot('A','B','C')
# create "Total" column
df['Total'] = df['A'] + df['B']
fig, ax = plt.subplots(1, 1, figsize =(4,6))
sns.heatmap(df, annot=True, linewidths=0, cbar=False)
# find your QuadMesh object and get array of colors
quadmesh = ax.findobj(QuadMesh)[0]
facecolors = quadmesh.get_facecolors()
# make colors of the last column white
facecolors[np.arange(2,12,3)] = np.array([1,1,1,1])
# set modified colors
quadmesh.set_facecolors = facecolors
# set color of all text to black
for i in ax.findobj(Text):
i.set_color('black')
# move x ticks and label to the top
ax.xaxis.tick_top()
ax.xaxis.set_label_position('top')
plt.show()
P.S. I am on Python 2.7, some syntax adjustments might be required, though I cannot think of any.

Resources