individual colours for grouped boxplots in seaborn - python-3.x

I'd like to adjust the colour scheme to this boxplot so that the group on the left are dark & light blue, and on the right dark and light red. I've made the colours I want in my_colours but I still can't figure out how to do it. Here's the code for the data:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
a1 = list(np.random.normal(.70, .20, 20))
a2 = list(np.random.normal(.5, .15, 20))
b1 = list(np.random.normal(.78, .20, 20))
b2 = list(np.random.normal(.4, .25, 20))
levsA = ['a' for i in range(40)]
levsB = ['b' for i in range(40)]
itemsa = [1 for i in range(20)] + [2 for i in range(20)]
itemsb = [1 for i in range(20)] + [2 for i in range(20)]
df = pd.DataFrame({'cs':a1 + a2 + b1+ b2,
'levels':levsA + levsB,
'type':itemsa + itemsb})
my_colours = ((0.1216, 0.4667, 0.7059),
(0.8392, 0.1529, 0.1569),
(0.6824, 0.7804, 0.9098),
(1, 0.5961, 0.5882))
sns.set_palette(my_colours)
sns.boxplot(x='type', y='cs', hue='levels', data=df)
I would like them in this order:

The boxes are PathPatches. You may loop over them and set their color. One would need to pay attention to the order they appear in the axes though.
import matplotlib.patches
boxes = ax.findobj(matplotlib.patches.PathPatch)
for color, box in zip(my_colours[::2]+my_colours[1::2], boxes):
box.set_facecolor(color)

Related

Dynamic area coloring in plotly subplots

I have 6 graphs done with plotly-subplots where I show the stock price and its simple moving average (20 days for instance). What I would like to do is for each subplot (except one), color the area in red where the current price is below the simple moving average and green if the price is above its simple moving average (and if possible as extra, no color when the stock price is crossing over or under the average: so the color area would start and end at the marker).
I understand I could use add_vrect to define my color area but this seems to be ok for static data: here each subplot would need its "own conditional coloring"
For clarity, I used only 3 stocks/graphs in my example (and the coloring would be on the second and third graphs):
import yfinance as yf
import pandas as pd
import numpy as np
from plotly.subplots import make_subplots
import plotly.graph_objects as go
tickers = ['AAPL', 'MSFT']
df = yf.download(tickers, period='1y', interval='1d', progress=False)['Close']
df_bench = yf.download('SPY', period='1y', interval='1d', progress=False)
df['AAPL_SMA'] = df['AAPL'].rolling(20).mean()
df['MSFT_SMA'] = df['MSFT'].rolling(20).mean()
fig = make_subplots(
rows=1, cols=3,
subplot_titles=('AAPL', 'MSFT')
)
fig.add_trace(
go.Candlestick(x=df_bench .index,
open=df_bench ['Open'],
high=df_bench ['High'],
low=df_bench ['Low'],
close=df_bench ['Close'],
name='Benchmark'),
row=1, col=1
)
fig.add_trace(
go.Scatter(x=df.index, y=df['AAPL'], name='AAPL', marker_color='blue'),
row=1, col=2
)
fig.add_trace(
go.Scatter(x=df.index, y=df['AAPL_SMA'], name='HYG/TLT_SMA', marker_color='red'),
row=1, col=2
)
fig.add_trace(
go.Scatter(x=df.index, y=df['MSFT'], name='MSFT', marker_color='blue'),
row=1, col=3
)
fig.add_trace(
go.Scatter(x=df.index, y=df['MSFT_SMA'], name='MSFT_SMA', marker_color='red'),
row=1, col=3
)
fig.update_layout(height=1200, width=2400, title_text='Dynamic coloring test',
showlegend=False, template='plotly_white',
hovermode='x unified', xaxis_rangeslider_visible=False)
fig.show()
I have added two booleans columns in my dataframe df where I test whether the area should be red (-1) or green (1):
df['AAPL_Sig'] = np.where(df['AAPL'] < df['AAPL_SMA'], -1, 1)
df['MSFT_Sig'] = np.where(df['MSFT'] < df['MSFT_SMA'], -1, 1)
Now I am stuck and would need some pointers as to how to use add_vrect (or maybe there are better functions -?-) dynamically and on some of the subplots.

How to add percentage label on top of bar chart from a data frame with different sum total data groups

I am new in coding with python, I am trying to develop a bar chart with percentage on top. I have a sample data frame Quiz2. I developed code and gives only 1600% at first single bar. Kindly any one with help how can i do it correct?
#Approach 2
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
sns.set()
%matplotlib inline
Quiz2 = pd.DataFrame({'Kaha': ['16', '5'], 'Shiny': ['16', '10']})
data=Quiz2 .rename(index={0: "Male", 1: "Female"})
data=data.astype(float)
Q1p = data[['Kaha','Shiny']].plot(kind='bar', figsize=(5, 5), legend=True, fontsize=12)
Q1p.set_xlabel("Gender", fontsize=12)
Q1p.set_ylabel("Number of people", fontsize=12)
#Q1p.set_xticklabels(x_labels)
for p in Q1p.patches:
width = p.get_width()
height = p.get_height()
x, y = p.get_xy()
Q1p.annotate(f'{height:.0%}', (x + width/2, y + height*1.02), ha='center')
plt.show()
I want the percentage of Kaha (with 21 sum total) to appear as (76.2% for Male and 23.8% for Female) and that of shy (with 26 sum total) as (61.5% for Male and 38.5%for Female). Kindly requesting help
In approach 2, the reason you have only 1 value displaying is the plt.show()
should be outdented so it comes after the processing of the for loop. You are getting a value of 1600% because you are plotting the value as the height of the bar in the line beginning with Q1p.annotate(f'{height:.0%}' Instead of height this should be height/10*total or something to give you the percentage.
Here is a solution, but not sure if I am computing the percentages correctly:
Quiz2 = pd.DataFrame({'Kaha': ['16', '5'], 'Shiny': ['16', '10']})
data=Quiz2 .rename(index={0: "Male", 1: "Female"})
data=data.astype(float)
total = len(data)*10
Q1p = data[['Kaha','Shiny']].plot(kind='bar', figsize=(5, 5), legend=True, fontsize=12)
Q1p.set_xlabel("Gender", fontsize=12)
Q1p.set_ylabel("Number of people", fontsize=12)
#Q1p.set_xticklabels(x_labels)
for p in Q1p.patches:
width = p.get_width()
height = p.get_height()
x, y = p.get_xy()
Q1p.annotate(f'{height/total:.0%}', (x + width/2, y + height*1.02), ha='center')
plt.show()

Scatter Plot of Multiple Y Values for each X coloring each X value

For each company name I want to assign a color. I tried playing with color parameter in scatterplot but that gives different colors within company names.
import matplotlib.pyplot as plt
import seaborn as sns
y = [[0.15,0.25,0.63],[0.69,0.24,0.85],[0.85,0.41,0.73]]
x = [1,2,3]
sns.set_style("dark")
plt.title("Company Records")
for xe, ye in zip(x, y):
plt.scatter([xe] * len(ye), ye)
plt.xticks([1,2,3]);
plt.axes().set_xticklabels(['ACTP', 'ATC',"LKO"],rotation = 45);
Pass the custom colors along with zip:
colors = ['red', 'magenta', 'pink']
for xe, ye,c in zip(x, y,colors):
plt.scatter([xe] * len(ye), ye, c=c)
plt.xticks([1,2,3]);
plt.axes().set_xticklabels(['ACTP', 'ATC',"LKO"],rotation = 45);
Output:

How do I apply a line between two points in geopanda e.g. between 2 cities

I am trying to plot a green line between the 2 cities on my map in geopandas
The result should show the 2 cities with the red point and the name of the cities as well plus a green line between the two cities
I hope you can help me!
Thanks in ahead!
I tried it a few times but I dont get the key for plotting a line
import geopandas as gpd
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
###################### The Map of Germany is plotted here
plt.style.use('seaborn')
plz_shape_df = gpd.read_file(r'C:\Users\XXXXX\geopanda\plz-gebiete.shp', dtype={'plz': str})
plz_shape_df.head()
plt.rcParams['figure.figsize'] = [16, 11]
fig, ax = plt.subplots()
plz_shape_df.plot(ax=ax, color='orange', alpha=0.8)
ax.set(
title='Germany',
aspect=1.3,
facecolor='lightblue');
################ The dict: new_dict3 with the 2 cities gets plotted
new_dict3 = {
'Stuttgart': (9.181332, 48.777128),
'Munich': (11.576124, 48.137154),
}
for c in new_dict3.keys():
ax.text(
x=new_dict3[c][0],
y=float(new_dict3[c][1]) + 0.1,
s=c,
fontsize = 12,
ha = "center",
)
ax.plot(
new_dict3[c][0],
new_dict3[c][1],
marker = "o",
c = "red",
alpha = 2.0
)
############### Now I want to plot a green line between the 2 cities of the new_dict3
ax.plot(
x= new_dict3[c][0],
y= float(new_dict3[c][1]) + 0.1,
linestyle = "--",
c = "green",
marker="",
)
#this doesn't work
[
I got the right answer myself here is my result:
Stuttgart = [9.181332, 48.777128]
Munich = [11.576124, 48.137154]
x_values = [ Stuttgart[0], Munich[0]]
y_values = [ Stuttgart[1], Munich[1]]
plt.plot(x_values, y_values, linewidth = 5, linestyle = "--", color = "green")

Stack area chart with quantitative x-axis with altair

Area charts in altair are automatically stacked when the x-axis is time. But when x belongs to a quantitative data type, areas are not stacked.
import pandas as pd
import numpy as np
import string
import altair as alt
np.random.seed(394378)
n_series = 3
series_names = list(string.ascii_lowercase)[:n_series]
x_range = range(0, 21)
df = pd.DataFrame({"Series": np.tile(series_names, len(x_range)),
"X": np.repeat(x_range, n_series),
"Y": np.random.poisson(lam = 10, size = len(x_range) * n_series)})
alt.Chart(df).\
mark_area().\
encode(
x = "X:Q",
y = "Y:Q",
color = "Series:N"
)
How can I stack areas?
You can do this by passing stack=True to the y encoding. For example:
alt.Chart(df).\
mark_area().\
encode(
x = "X:Q",
y = alt.Y("Y:Q", stack=True),
color = "Series:N"
)

Resources