Horizontal grouped Barplot with Bokeh using dataframe - python-3.x

I have a Dataframe and I want to group by Type, and then Flag and plot a graph for count of ID and another graph grouped by Type , Flag and sum of Total column in Bokeh.
')
p.hbar(df,
plot_width=800,
plot_height=800,
label='Type',
values='ID',
bar_width=0.4,
group = ' Type', 'Flag'
legend='top_right')
[![Expected Graph ][2]][2]
If it's not possible with Bokeh what other package can I use to get a good looking graph( Vibrant colours with white background)

You can do this with the holoviews library, which uses bokeh as a backend.
import pandas as pd
import holoviews as hv
from holoviews import opts
hv.extension("bokeh")
df = pd.DataFrame({
"type": list("ABABCCAD"),
"flag": list("YYNNNYNY"),
"id": list("DEFGHIJK"),
"total": [40, 100, 20, 60, 77, 300, 60, 50]
})
# Duplicate the dataframe
df = pd.concat([df] * 2)
print(df)
type flag id total
0 A Y 1 40
1 B Y 2 100
2 A N 3 20
3 B N 4 60
4 C N 5 77
5 C Y 6 300
6 A N 7 60
7 D Y 8 50
Now that we have our data, lets work on plotting it:
def mainplot_hook(plot, element):
plot.state.text(
y="xoffsets",
x="total",
text="total",
source=plot.handles["source"],
text_align="left",
y_offset=9,
x_offset=5
)
def sideplot_hook(plot, element):
plot.state.text(
y="xoffsets",
x="count",
text="count",
source=plot.handles["source"],
text_align="left",
y_offset=9,
x_offset=5
)
# Create single bar plot for sum of the total column
total_sum = df.groupby(["type", "flag"])["total"].sum().reset_index()
total_sum_bars = hv.Bars(total_sum, kdims=["type", "flag"], vdims="total")
# Create our multi-dimensional bar plot
all_ids = sorted(df["id"].unique())
counts = df.groupby(["type", "flag"])["id"].value_counts().rename("count").reset_index()
id_counts_hmap = hv.Bars(counts, kdims=["type", "flag", "id"], vdims="count").groupby("type")
main_plot = (total_sum_bars
.opts(hooks=[mainplot_hook],
title="Total Sum",
invert_axes=True)
)
side_plots = (
id_counts_hmap
.redim.values(id=all_ids, flag=["Y", "N"])
.redim.range(count=(0, 3))
.opts(
opts.NdLayout(title="Counts of ID"),
opts.Bars(color="#1F77B4", height=250, width=250, invert_axes=True, hooks=[sideplot_hook]))
.layout("type")
.cols(2)
)
final_plot = main_plot + side_plots
# Save combined output as html
hv.save(final_plot, "my_plot.html")
# Save just the main_plot as html
hv.save(main_plot, "main_plot.html")
As you can see, the code to make plots in holoviews can be a little tricky but it's definitely a tool I would recommend you pick up. Especially if you deal with high dimensional data regularly, it makes plotting it a breeze once you get the syntax down.

Related

Bokeh colorbar, assign a tick to each color

I'm trying to plot an heatmap of a matrix containing some counts (called mat in my code, then df after change the structure to use it with Bokeh). The structure is like this:
X
element 1
element 2
element 3
category 1
0
6
4
category 2
1
7
3
category 3
5
2
10
category 4
0
1
4
Now with my code I'm using df.value.unique() both for the color mapper and the ticks, but in the heatmap the colorbar's ticks doesn't correspond to the colors:
How can I make the ticks coincide each one to one color? I'm quite sure I have to use the CategoricalColorMapper but with that I get only a white screen. Thank you for the help.
Here's my code:
mat = pd.read_csv("tests/count_50.dat", sep="\t", index_col=0)
mat.index.name = 'MGI_id'
mat.columns.name = 'phen_sys'
#set data as float numbers
mat=mat.astype(float)
#Create a custom palette and add a specific mapper to map color with values
df = mat.stack(dropna=False).rename("value").reset_index()
pal=bokeh.palettes.brewer['YlGnBu'][len(df.value.unique())]
mapper = LinearColorMapper(palette=pal, low=df.value.min(), high=df.value.max(), nan_color = 'gray')
#Define a figure
p = figure(
plot_width=1280,
plot_height=800,
title="Heatmap",
x_range=list(df.MGI_id.drop_duplicates()),
y_range=list(df.phen_sys.drop_duplicates()[::-1]),
tooltips=[('Phenotype system','#phen_sys'),('Gene','#MGI_id'),('Phenotypes','#value')],
x_axis_location="above",
output_backend="webgl")
#Create rectangles for heatmap
p.rect(
x="MGI_id",
y="phen_sys",
width=1,
height=1,
source=ColumnDataSource(df),
fill_color=transform('value', mapper))
p.xaxis.major_label_orientation = 45
#Add legend
t = df.value.unique()
t.sort()
color_bar = ColorBar(
color_mapper=mapper,
ticker=FixedTicker(ticks=t, desired_num_ticks=len(df.value.unique())),
label_standoff=6,
border_line_color=None)
p.add_layout(color_bar, 'right')
show(p)
I found a solution:
I create a factor list by ordering the values and then converting both the dataframe values and the factors. At that point I created a CategoricalColorMapper instead of the linear one and the plot now is correct:
Your list of values goes from 0 to 10, so ColorBar will go up to 10. You can change mapper 'high' value to '9':
mapper = LinearColorMapper(palette=colors, low=0, high=9, nan_color = 'gray')
Or a ColorBar that goes from 1 to 10:
mapper = LinearColorMapper(palette=colors, low=1, high=10, nan_color = 'gray')

How to remove repeating and empty or unmarked values on subplot of x-axis

I'm developing a set of graphs to paint some Pandas DataFrame values. For that I'm using various pandas, numpy and matplotlib modules and functions using the following code:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import matplotlib.ticker as ticker
data = {'Name': ['immoControlCmd', 'BrkTerrMde', 'GlblClkYr', 'HsaStat', 'TesterPhysicalResGWM', 'FapLc','FirstRowBuckleDriver', 'GlblClkDay'],
'Value': [0, 5, 0, 4, 0, 1, 1, 1],
'Id_Par': [0, 0, 3, 3, 3, 3, 0, 0]
}
signals_df = pd.DataFrame(data)
def plot_signals(signals_df):
# Count signals by par
signals_df['Count'] = signals_df.groupby('Id_Par').cumcount().add(1).mask(signals_df['Id_Par'].eq(0), 0)
# Subtract Par values from the index column
signals_df['Sub'] = signals_df.index - signals_df['Count']
id_par_prev = signals_df['Id_Par'].unique()
id_par = np.delete(id_par_prev, 0)
signals_df['Prev'] = [1 if x in id_par else 0 for x in signals_df['Id_Par']]
signals_df['Final'] = signals_df['Prev'] + signals_df['Sub']
# signals_df['Finall'] = signals_df['Final'].unique()
# print(signals_df['Finall'])
# Convert and set Subtract to index
signals_df.set_index('Final', inplace=True)
# pos_x = len(signals_df.index.unique()) - 1
# print(pos_x)
# Get individual names and variables for the chart
names_list = [name for name in signals_df['Name'].unique()]
num_names_list = len(names_list)
num_axis_x = len(signals_df["Name"])
# Creation Graphics
fig, ax = plt.subplots(nrows=num_names_list, figsize=(10, 10), sharex=True)
plt.xticks(np.arange(0, num_axis_x), color='SteelBlue', fontweight='bold')
for pos, (a_, name) in enumerate(zip(ax, names_list)):
# Get data
data = signals_df[signals_df["Name"] == name]["Value"]
# Get values axis-x and axis-y
x_ = np.hstack([-1, data.index.values, len(signals_df) - 1])
# print(data.index.values)
y_ = np.hstack([0, data.values, data.iloc[-1]])
# Plotting the data by position
ax[pos].plot(x_, y_, drawstyle='steps-post', marker='*', markersize=8, color='k', linewidth=2)
ax[pos].set_ylabel(name, fontsize=8, fontweight='bold', color='SteelBlue', rotation=30, labelpad=35)
ax[pos].yaxis.set_major_formatter(ticker.FormatStrFormatter('%0.1f'))
ax[pos].yaxis.set_tick_params(labelsize=6)
ax[pos].grid(alpha=0.4, color='SteelBlue')
plt.show()
plot_signals(signals_df)
What I want is to remove the points or positions of the x-axis where nothing is painted or they are not marked on the graph, but leave the values ​​and names as in the image at the end; Seen from Pandas it would be the "Final" column that, before painting the subplots, assigned it as an index and it is where some of the values ​​in this column are repeated; would be to remove the values ​​enclosed in the red box from the graph, but leave the values ​​and names as in the image at the end:
Name Value Id_Par Count Sub Prev
Final
0 immoControlCmd 0 0 0 0 0
1 BrkTerrMde 5 0 0 1 0
2 GlblClkYr 0 3 1 1 1
2 HsaStat 4 3 2 1 1
2 TesterPhysicalResGWM 0 3 3 1 1
2 FapLc 1 3 4 1 1
6 FirstRowBuckleDriver 1 0 0 6 0
7 GlblClkDay 1 0 0 7 0
I've been trying to bring the unique values ​​of the last column, which would be the value that the x-axis should be, but since the dataframe is of another size or dimension, I get an error: ValueError: Length of values ​​(5) does not match length of index (8), and then I have to resize my chart, but in this case I don't understand how to do it:
signals_df['Final'] = signals_df['Prev'] + signals_df['Sub']
signals_df['Finall'] = signals_df['Final'].unique()
print(signals_df['Finall'])
I've also tried to bring the size of the unique index, previously assigned to apply a subtraction to data.index.values ​​of the variable x_, but it does not bring me what I want because it is gathering all the values ​​and subtracting them in bulk and not separately , as is data.index.values:
signals_df.set_index('Final', inplace=True)
pos_x = len(signals_df.index.unique()) - 1
...
..
.
x_ = np.hstack([-1, data.index.values-pos-x, len(signals_df) - 1])
Is there a Pandas and/or Matplotlib function that allows me? or Could someone give me a suggestion that will help me better understand how to do it? what I expect to achieve would be the plot below:
I really appreciate your help, any comments help.
I've Python version: 3.6.5, Pandas version: 1.1.5 and Matplotlib version: 3.3.2
One possible way to do this is if you make your x-axis values into strings, which means that matplotlib will make a "categorical" plot. See examples of that here.
For your case, because you have subplots which would have different values, and they are not always in the right order, we need to do a bit of trickery first to make sure the ticks appear in the correct order. For that, we can use the approach from this answer, where they plot something that uses all of the x values in the correct order, and then remove it.
To gather all the xtick values together, you can do something like this, where you create a list of the values, reduce it to the unique values using a set, then sort those values, and convert to strings using a list comprehension and str():
# First make a list of all the xticks we want
xvals = [-1,]
for name in names_list:
xvals.append(signals_df[signals_df["Name"] == name]["Value"].index.values[0])
xvals.append(len(signals_df)-1)
# Reduce to only unique values, sorted, and then convert to strings
xvals = [str(i) for i in sorted(set(xvals))]
Once you have those, you can make a dummy plot, and then remove it, like so (this is to fix the tick positions in the correct order). NOTE that this needs to be inside your plotting loop for matplotlib versions 3.3.4 and earlier:
# To get the ticks in the right order on all subplots, we need to make
# a dummy plot here and then remove it
dummy, = ax[0].plot(xvals, np.zeros_like(xvals))
dummy.remove()
Finally, when you actually plot the real data inside the loop, you just need to convert x_ to strings as you plot them:
ax[pos].plot(x_.astype('str'), y_, drawstyle='steps-post', marker='*', markersize=8, color='k', linewidth=2)
Note the only other change I made was to not explicitly set the xtick positions (which you did, with plt.xticks), but you can still use that command to set the font colour and weight
plt.xticks(color='SteelBlue', fontweight='bold')
And this is the output:
For completeness, here I have put it all together in your script:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import matplotlib.ticker as ticker
import matplotlib
print(matplotlib.__version__)
data = {'Name': ['immoControlCmd', 'BrkTerrMde', 'GlblClkYr', 'HsaStat', 'TesterPhysicalResGWM', 'FapLc',
'FirstRowBuckleDriver', 'GlblClkDay'],
'Value': [0, 5, 0, 4, 0, 1, 1, 1],
'Id_Par': [0, 0, 3, 3, 3, 3, 0, 0]
}
signals_df = pd.DataFrame(data)
def plot_signals(signals_df):
# Count signals by par
signals_df['Count'] = signals_df.groupby('Id_Par').cumcount().add(1).mask(signals_df['Id_Par'].eq(0), 0)
# Subtract Par values from the index column
signals_df['Sub'] = signals_df.index - signals_df['Count']
id_par_prev = signals_df['Id_Par'].unique()
id_par = np.delete(id_par_prev, 0)
signals_df['Prev'] = [1 if x in id_par else 0 for x in signals_df['Id_Par']]
signals_df['Final'] = signals_df['Prev'] + signals_df['Sub']
# signals_df['Finall'] = signals_df['Final'].unique()
# print(signals_df['Finall'])
# Convert and set Subtract to index
signals_df.set_index('Final', inplace=True)
# pos_x = len(signals_df.index.unique()) - 1
# print(pos_x)
# Get individual names and variables for the chart
names_list = [name for name in signals_df['Name'].unique()]
num_names_list = len(names_list)
num_axis_x = len(signals_df["Name"])
# Creation Graphics
fig, ax = plt.subplots(nrows=num_names_list, figsize=(10, 10), sharex=True)
# No longer any need to define where the ticks go, but still set the colour and weight here
plt.xticks(color='SteelBlue', fontweight='bold')
# First make a list of all the xticks we want
xvals = [-1, ]
for name in names_list:
xvals.append(signals_df[signals_df["Name"] == name]["Value"].index.values[0])
xvals.append(len(signals_df) - 1)
# Reduce to only unique values, sorted, and then convert to strings
xvals = [str(i) for i in sorted(set(xvals))]
for pos, (a_, name) in enumerate(zip(ax, names_list)):
# To get the ticks in the right order on all subplots,
# we need to make a dummy plot here and then remove it
dummy, = ax[pos].plot(xvals, np.zeros_like(xvals))
dummy.remove()
# Get data
data = signals_df[signals_df["Name"] == name]["Value"]
# Get values axis-x and axis-y
x_ = np.hstack([-1, data.index.values, len(signals_df) - 1])
y_ = np.hstack([0, data.values, data.iloc[-1]])
# Plotting the data by position
# NOTE: here we convert x_ to strings as we plot, to make sure they are plotted as catagorical values
ax[pos].plot(x_.astype('str'), y_, drawstyle='steps-post', marker='*', markersize=8, color='k', linewidth=2)
ax[pos].set_ylabel(name, fontsize=8, fontweight='bold', color='SteelBlue', rotation=30, labelpad=35)
ax[pos].yaxis.set_major_formatter(ticker.FormatStrFormatter('%0.1f'))
ax[pos].yaxis.set_tick_params(labelsize=6)
ax[pos].grid(alpha=0.4, color='SteelBlue')
plt.show()
plot_signals(signals_df)

Python Pandas apply function not being applied to every row when using variables from a DataFrame

I have this weird Pandas problem, when I use the apply function using values from a data frame, it only gets applied to the first row:
import pandas as pd
# main data frame - to be edited
headerData = [['dataA', 'dataB']]
valuesData = [[10, 20], [10, 20]]
dfData = pd.DataFrame(valuesData, columns = headerData)
dfData.to_csv('MainData.csv', index=False)
readMainDataCSV = pd.read_csv('MainData.csv')
print(readMainDataCSV)
#variable data frame - pull values from this to edit main data frame
headerVariables = [['varA', 'varB']]
valuesVariables = [[2, 10]]
dfVariables = pd.DataFrame(valuesVariables, columns = headerVariables)
dfVariables.to_csv('Variables.csv', index=False)
readVariablesCSV = pd.read_csv('Variables.csv')
readVarA = readVariablesCSV['varA']
readVarB = readVariablesCSV['varB']
def formula(x):
return (x / readVarA) * readVarB
dfFormulaApplied = readMainDataCSV.apply(lambda x: formula(x))
print('\n', dfFormulaApplied)
Output:
dataA dataB
0 50.0 100.0
1 NaN NaN
But when I just use regular variables (not being called from a data frame), it functions just fine:
import pandas as pd
# main data frame - to be edited
headerData = [['dataA', 'dataB']]
valuesData = [[10, 20], [20, 40]]
dfData = pd.DataFrame(valuesData, columns = headerData)
dfData.to_csv('MainData.csv', index=False)
readMainDataCSV = pd.read_csv('MainData.csv')
print(readMainDataCSV)
# variables
readVarA = 2
readVarB = 10
def formula(x):
return (x / readVarA) * readVarB
dfFormulaApplied = readMainDataCSV.apply(lambda x: formula(x))
print('\n', dfFormulaApplied)
Output:
dataA dataB
0 50.0 100.0
1 100.0 200.0
Help please I'm pulling my hair out.
If you take readVarA and readVarB from the dataframe by selecting the column it is a pandas Series with an index, which gives a problem in the calculation (dividing a series by another series with a different index doesn't work).
You can take the first value from the series to get the value like this:
def formula(x):
return (x / readVarA[0]) * readVarB[0]

Matplotlib X-ticks not distributing correctly

I have a plot as shown below that has over 1000 x-axis points. I'm trying to scale the x-axis into 3 values, the min, mid & max value instead of having 1000 labels.
Despite my efforts denoted within the hashtags, all 3 values are written onto the same tick (on top of each other) or simply only 1 tick is randomly placed along the x-axis.
import matplotlib.pyplot as plt
from matlplotlib.pyplot import figure
figure (num = None, figsize=(20,10), dpi=80, facecolor='w', edgecolor='k')
ax =plt.gca()
data.plot(kind='bar', x='colA', y='colB', ax=ax)
######
plt.xticks(np.arrange(0,3, step 1)
**ALSO TRIED**
plt.xticks = ([1,2,3], ["a","b","c"])
######
plt.show()
How can I distribute the min,mid and max value evenly across the X-axis?
If 'colA' is numerical:
x_min = min(data['colA'])
x_max = max(data['colA'])
x_mid = (x_min + x_max) / 2
# use regular division if the numbers are floats, use integer division in case all numbers are integers
plt.xticks([x_min, x_mid, x_max], ["a","b","c"])
# plt.xticks([x_min, x_mid, x_max]) # leave out the labels if the default labels are OK
If, on the contrary, 'colA' is categorical (so, some strings), they are numbered internally as 0, 1, 2, ... up till the number of strings minus one:
x_min = 0
x_max = len(data['colA']) - 1
x_mid = x_max // 2 # integer division
plt.xticks([x_min, x_mid, x_max])
You could try:
min_ = min(data['colA'])
max_ = max(data['colA'])
mid_ = (min_ + max_) / 2.
ax.set_xticks([min_, mid_, max_])
ax.set_xticklabels(['min', 'mid', 'max'])

Pandas grouping and resampling for a bar plot:

I have a dataframe that records concentrations for several different locations in different years, with a high temporal frequency (<1 hour). I am trying to make a bar/multibar plot showing mean concentrations, at different locations in different years
To calculate mean concentration, I have to apply quality control filters to daily and monthly data.
My approach is to first apply filters and resample per year and then do the grouping by location and year.
Also, out of all the locations (in the column titled locations) I have to choose only a few rows. So, I am slicing the original dataframe and creating a new dataframe with selected rows.
I am not able to achieve this using the following code:
date=df['date']
location = df['location']
df.date = pd.to_datetime(df.date)
year=df.date.dt.year
df=df.set_index(date)
df['Year'] = df['date'].map(lambda x: x.year )
#Location name selection/correction in each city:
#Changing all stations:
df['location'] = df['location'].map(lambda x: "M" if x == "mm" else x)
#New dataframe:
df_new = df[(df['location'].isin(['K', 'L', 'M']))]
#Data filtering:
df_new = df_new[df_new['value'] >= 0]
df_new.drop(df_new[df_new['value'] > 400].index, inplace = True)
df_new.drop(df_new[df_new['value'] <2].index, inplace = True)
diurnal = df_new[df_new['value']].resample('12h')
diurnal_mean = diurnal.mean()[diurnal.count() >= 9]
daily_mean=diurnal_mean.resample('d').mean()
df_month=daily_mean.resample('m').mean()
df_yearly=df_month[df_month['value']].resample('y')
#For plotting:
df_grouped = df_new.groupby(['location', 'Year']).agg({'value':'sum'}).reset_index()
sns.barplot(x='location',y='value',hue='Year',data= df_grouped)
This is one of the many errors that cropped up:
"None of [Float64Index([22.73, 64.81, 8.67, 19.98, 33.12, 37.81, 39.87, 42.29, 37.81,\n 36.51,\n ...\n 11.0, 40.0, 23.0, 80.0, 50.0, 60.0, 40.0, 80.0, 80.0,\n 17.0],\n dtype='float64', length=63846)] are in the [columns]"
ERROR:root:Invalid alias: The name clear can't be aliased because it is another magic command.
This is a sample dataframe, showing what I need to plot; value column should ideally represent resampled values, after performing the quality control operations and resampling.
Unnamed: 0 location value \
date location value
2017-10-21 08:45:00+05:30 8335 M 339.3
2017-08-18 17:45:00+05:30 8344 M 45.1
2017-11-08 13:15:00+05:30 8347 L 594.4
2017-10-21 13:15:00+05:30 8659 N 189.9
2017-08-18 15:45:00+05:30 8662 N 46.5
This is how the a part of the actual data should look like, after selecting the chosen locations. I am a new user so cannot attach a screenshot of the graph I require. This query is an extension of the query I had posted earlier , with the additional requirement of plotting resampled data instead of simple value counts. Iteration over years to plot different group values as bar plot in pandas
Any help will be much appreciated.
Fundamentally, your errors come with this unclear indexing where you are passing continuous, float values of one column for rowwise selection of index which currently is a datetime type.
df_new[df_new['value']] # INDEXING DATETIME USING FLOAT VALUES
...
df_month[df_month['value']] # COLUMN value DOES NOT EXIST
Possibly, you meant to select the column value (out of the others) during resampling.
diurnal = df_new['value'].resample('12h')
diurnal.mean()[diurnal.count() >= 9]
daily_mean = diurnal_mean.resample('d').mean()
df_month = daily_mean.resample('m').mean() # REMOVE value BEING UNDERLYING SERIES
df_yearly = df_month.resample('y')
However, no where above do you retain location for plotting. Hence, instead of resample, use groupby(pd.Grouper(...))
# AGGREGATE TO KEEP LOCATION AND 12h
diurnal = (df_new.groupby(["location", pd.Grouper(freq='12h')])["value"]
.agg(["count", "mean"])
.reset_index().set_index(['date'])
)
# FILTER
diurnal_sub = diurnal[diurnal["count"] >= 9]
# MULTIPLE DATE TIME LEVEL MEANS
daily_mean = diurnal_sub.groupby(["location", pd.Grouper(freq='d')])["mean"].mean()
df_month = diurnal_sub.groupby(["location", pd.Grouper(freq='m')])["mean"].mean()
df_yearly = diurnal_sub.groupby(["location", pd.Grouper(freq='y')])["mean"].mean()
print(df_yearly)
To demonstrate with random, reproducible data:
Data
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
np.random.seed(242020)
random_df = pd.DataFrame({'date': (np.random.choice(pd.date_range('2017-01-01', '2019-12-31'), 5000) +
pd.to_timedelta(np.random.randint(60*60, 60*60*24, 5000), unit='s')),
'location': np.random.choice(list("KLM"), 5000),
'value': np.random.uniform(10, 1000, 5000)
})
Aggregation
loc_list = list("KLM")
# NEW DATA FRAME WITH DATA FILTERING
df = (random_df.set_index(random_df['date'])
.assign(Year = lambda x: x['date'].dt.year,
location = lambda x: x['location'].where(x["location"] != "mm", "M"))
.query('(location == #loc_list) and (value >= 2 and value <= 400)')
)
# 12h AGGREGATION
diurnal = (df_new.groupby(["location", pd.Grouper(freq='12h')])["value"]
.agg(["count", "mean"])
.reset_index().set_index(['date'])
.query("count >= 2")
)
# d, m, y AGGREGATION
daily_mean = diurnal.groupby(["location", pd.Grouper(freq='d')])["mean"].mean()
df_month = diurnal.groupby(["location", pd.Grouper(freq='m')])["mean"].mean()
df_yearly = (diurnal.groupby(["location", pd.Grouper(freq='y')])["mean"].mean()
.reset_index()
.assign(Year = lambda x: x["date"].dt.year)
)
print(df_yearly)
# location date mean Year
# 0 K 2017-12-31 188.984592 2017
# 1 K 2018-12-31 199.521702 2018
# 2 K 2019-12-31 216.497268 2019
# 3 L 2017-12-31 214.347873 2017
# 4 L 2018-12-31 199.232711 2018
# 5 L 2019-12-31 177.689221 2019
# 6 M 2017-12-31 222.412711 2017
# 7 M 2018-12-31 241.597977 2018
# 8 M 2019-12-31 215.554228 2019
Plotting
sns.set()
fig, axs = plt.subplots(figsize=(12,5))
sns.barplot(x='location', y='mean', hue='Year', data= df_yearly, ax=axs)
plt.title("Location Value Yearly Aggregation", weight="bold", size=16)
plt.show()
plt.clf()
plt.close()

Resources