Bar plot with different minimal value for each bar - python-3.x

I'm trying to reproduce this type of graph :
basically, the Y axis represent the date of beginning and end of a phenomenon for each year.
but here is what I have when I try to plot my data :
It seems that no matter what, the bar for each year is plotted from the y axis minimal value.
Here is the data I use
Here is my code :
select=pd.read_excel("./writer.xlsx")
select=pd.DataFrame(select)
select["dte"]=pd.to_datetime(select.dte)
select["month_day"]=pd.DatetimeIndex(select.dte).strftime('%B %d')
select["month"]=pd.DatetimeIndex(select.dte).month
select["day"]=pd.DatetimeIndex(select.dte).day
gs=gridspec.GridSpec(2,2)
fig=plt.figure()
ax1=plt.subplot(gs[0,0])
ax2=plt.subplot(gs[0,1])
ax3=plt.subplot(gs[1,:])
###2 others graphs that works just fine
data=pd.DataFrame()
del select["res"],select["Seuil"],select["Seuil%"] #these don't matter for that graph
for year_ in list(set(select.dteYear)):
temp=select.loc[select["dteYear"]==year_]
temp2=temp.iloc[[0,-1]] #the beginning and ending of the phenomenon
data=pd.concat([data,temp2]).reset_index(drop=True)
data=data.sort_values(["month","day"])
ax3.bar(data["dteYear"],data["month_day"],tick_label=data["dteYear"])
plt.show()
If you have some clue to help me, I'd really appreciate, because I havn't found any model to make this type of graph.
thanks !
EDIT :
I tried something else :
height,bottom,x_position=[], [], []
for year_ in list(set(select.dteYear)):
temp=select.loc[select["dteYear"]==year_]
bottom.append(temp["month_day"].iloc[0])
height.append(temp["month_day"].iloc[-1])
x_position.append(year_)
temp2=temp.iloc[[0,-1]]
data=pd.concat([data,temp2]).reset_index(drop=True)
ax3.bar(x=x_position,height=height,bottom=bottom,tick_label=x_position)
I got this error :
Traceback (most recent call last):
File "C:\Users\E31\Documents\cours\stage_dossier\projet_python\tool_etiage\test.py", line 103, in <module>
ax3.bar(x=x_position,height=height,bottom=bottom,tick_label=x_position)
File "C:\Users\E31\AppData\Local\Programs\Python\Python39\lib\site-packages\matplotlib\__init__.py", line 1352, in inner
return func(ax, *map(sanitize_sequence, args), **kwargs)
File "C:\Users\E31\AppData\Local\Programs\Python\Python39\lib\site-packages\matplotlib\axes\_axes.py", line 2357, in bar
r = mpatches.Rectangle(
File "C:\Users\E31\AppData\Local\Programs\Python\Python39\lib\site-packages\matplotlib\patches.py", line 752, in __init__
super().__init__(**kwargs)
File "C:\Users\E31\AppData\Local\Programs\Python\Python39\lib\site-packages\matplotlib\patches.py", line 101, in __init__
self.set_linewidth(linewidth)
File "C:\Users\E31\AppData\Local\Programs\Python\Python39\lib\site-packages\matplotlib\patches.py", line 406, in set_linewidth
self._linewidth = float(w)
TypeError: only size-1 arrays can be converted to Python scalars

To make a bar graph that shows a difference between dates you should start by getting your data into a nice format in the dataframe where it is easy to access the bottom and top values of the bar for each year you are plotting. After this you can simply plot the bars and indicate the 'bottom' parameter. The hardest part in your case may be specifying the datetime differences correctly. I added a x tick locator and y tick formatter for the datetimes.
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.dates as mdates
# make function that returns a random datetime
# between a start and stop date
def random_date(start, stop):
days = (stop - start).days
rand = np.random.randint(days)
return start + pd.Timedelta(rand, unit='days')
# simulate poster's data
T1 = pd.to_datetime('July 1 2021')
T2 = pd.to_datetime('August 1 2021')
T3 = pd.to_datetime('November 1 2021')
df = pd.DataFrame({
'year' : np.random.choice(np.arange(1969, 2020), size=15, replace=False),
'bottom' : [random_date(T1, T2) for x in range(15)],
'top' : [random_date(T2, T3) for x in range(15)],
}).sort_values(by='year').set_index('year')
# define fig/ax and figsize
fig, ax = plt.subplots(figsize=(16,8))
# plot data
ax.bar(
x = df.index,
height = (df.top - df.bottom),
bottom = df.bottom,
color = '#9e7711'
)
# add x_locator (every 2 years), y tick datetime formatter, grid
# hide top/right spines, and rotate the x ticks for readability
x_locator = ax.xaxis.set_major_locator(mpl.ticker.MultipleLocator(2))
y_formatter = ax.yaxis.set_major_formatter(mdates.DateFormatter('%d %b'))
tick_params = ax.tick_params(axis='x', rotation=45)
grid = ax.grid(axis='y', dashes=(8,3), alpha=0.3, color='gray')
hide_spines = [ax.spines[s].set_visible(False) for s in ['top','right']]

Related

Adding annotations for High and Low pressure symbols in Mean Sea Level Pressure field

I'm currently plotting the Mean Sea Level Pressure (MSLP) data from ERA5 Reanalysis (sample netcdf data is included) using the 'recipe' from Unidata.
The reproducible code (I tried to be as minimal as I can) runs like this
```python
from datetime import datetime
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import matplotlib.pyplot as plt
import metpy.calc as mpcalc
from metpy.units import units
import numpy as np
from scipy.ndimage import gaussian_filter
import xarray as xr
#This function is from Unidata recipe as indicated above used for plotting max/min points
def plot_maxmin_points(lon, lat, data, extrema, nsize, symbol, color='k',
plotValue=True, transform=None):
from scipy.ndimage import maximum_filter, minimum_filter
if (extrema == 'max'):
data_ext = maximum_filter(data, nsize, mode='nearest')
elif (extrema == 'min'):
data_ext = minimum_filter(data, nsize, mode='nearest')
else:
raise ValueError('Value for hilo must be either max or min')
mxy, mxx = np.where(data_ext == data)
for i in range(len(mxy)):
ax.text(lon[mxy[i], mxx[i]], lat[mxy[i], mxx[i]], symbol, color=color, size=24,
clip_on=True, horizontalalignment='center', verticalalignment='center',
transform=transform)
ax.text(lon[mxy[i], mxx[i]], lat[mxy[i], mxx[i]],
'\n' + str(np.int(data[mxy[i], mxx[i]])),
color=color, size=12, clip_on=True, fontweight='bold',
horizontalalignment='center', verticalalignment='top', transform=transform)
#Open the included ERA5 data in this post
ds = xr.open_dataset('EU_Single Levels.nc')
lats = ds.latitude
lons = ds.longitude
# Select and grab data
mslp = ds['msl']
# Apply gaussian filter to the MSLP
mslp_surf = gaussian_filter(mslp.data[30], sigma=3.0) * units.pascal
mslp_hpa = mslp_surf.to(units.hectopascal)
mapcrs = ccrs.PlateCarree()
datacrs = ccrs.PlateCarree()
# Create plot axes with proper projection
fig = plt.figure(1, figsize=(14, 12))
ax = plt.subplot(111, projection=datacrs)
ax.set_extent([105, 140, 0, 25], mapcrs)
# Plot MSLP Contour
MSLP_range = np.arange(995, 1010, 2)
prs = ax.contour(lons, lats, mslp_hpa, MSLP_range, colors='k',
transform=datacrs)
ax.clabel(prs, fmt='%d')
#Should annotate highs and low pressure areas...not working?
plot_maxmin_points(lons, lats, mslp_hpa, 'max', 50, symbol='H', color='b', transform=datacrs)
plot_maxmin_points(lons, lats, mslp_hpa, 'min', 25, symbol='L', color='r', transform=datacrs)
ax.add_feature(cfeature.COASTLINE.with_scale('10m'), linewidth=1.15)
plt.title('ERA5 Reanalysis: MSLP (hPa)', loc='left')
plt.subplots_adjust(bottom=0, top=1)
plt.show()
```
Which produces a plot like this:
Unfortunately, I ran into some problems when the MSLP plot never 'displayed' the annotations of the MSLP max/min points from the function as created by Unidata. In fact, it also shows a:
DeprecationWarning: elementwise comparison failed; this will raise an error in the future. mxy, mxx = np.where(data_ext == data)
Is there a way to possibly sort out this small issue? Thanks in advance.
It does not plot anything because the line mxy, mxx = np.where(data_ext == data) return empty arrays.
You are making an elementwise comparison between arrays whose elements are respectively float32 (in data_ext, returned by maximum_filter) and Quantity objects created by the pint package, dealing with units (in data). Eventhough numbers can be the same, the elementwise comparison will fail (indicated by the warning), and therefore return False everywhere.
To fix this, you can change mxy, mxx = np.where(data_ext == data) to mxy, mxx = np.where(data_ext == np.array(data)).
Two sides note:
Having imports inside a function is a bad habit, as it will run the imoprt each time you run the function. Therefore, move from scipy.ndimage import maximum_filter, minimum_filter with the other imports.
The function you use to catch the extrema actually return the max/min of box centered on a pixel, and whose size is fixed by your parameter nsize. Therefore, nsize should reflect the 'localness' of what you are looking for, here your pressure structures. As your image size is (101, 141), using a box size of 50 will be too big to capture these structures.

Unable to read data from kdeplot

I have a pandas dataframe with two columns, A and B, named df in the following bits of code.
And I try to plot a kde for each value of B like so:
import seaborn as sbn, numpy as np, pandas as pd
fig = plt.figure(figsize=(15, 7.5))
sbn.kdeplot(data=df, x="A", hue="B", fill=True)
fig.savefig("test.png")
I read the following propositions but only those where I compute the kde from scratch using statsmodel or some other module get me somewhere:
Seaborn/Matplotlib: how to access line values in FacetGrid?
Get data points from Seaborn distplot
For curiosity's sake, I would like to know why I am unable to get something from the following code:
kde = sns.kdeplot(data=df, x="A", hue="B", fill=True)
line = kde.lines[0]
x, y = line.get_data()
print(x, y)
The error I get is IndexError: list index out of range. kde.lines has a length of 0.
Accessing the lines through fig.axes[0].lines[0] also raises an IndexError.
All in all, I think I tried everything proposed in the previous threads (I tried switching to displot instead of using kdeplot but this is the same story, only that I have to access axes differently, note displot and not distplot because it is deprecated), but every time I get to .get_lines(), ax.lines, ... what is returned is an empty list. So I can't get any values out of it.
EDIT : Reproducible example
import pandas as pd, numpy as np, matplotlib.pyplot as plt, seaborn as sbn
# 1. Generate random data
df = pd.DataFrame(columns=["A", "B"])
for i in [1, 2, 3, 5, 7, 8, 10, 12, 15, 17, 20, 40, 50]:
for _ in range(10):
df = df.append({"A": np.random.random() * i, "B": i}, ignore_index=True)
# 2. Plot data
fig = plt.figure(figsize=(15, 7.5))
sbn.kdeplot(data=df, x="A", hue="B", fill=True)
# 3. Read data (error)
ax = fig.axes[0]
x, y = ax.lines[0].get_data()
print(x, y)
This happens because using fill=True changes the object that matplotlib draws.
When no fill is used, lines are plotted:
fig = plt.figure(figsize=(15, 7.5))
ax = sbn.kdeplot(data=df, x="A", hue="B")
print(ax.lines)
# [<matplotlib.lines.Line2D object at 0x000001F365EF7848>, etc.]
when you use fill, it changes them to PolyCollection objects
fig = plt.figure(figsize=(15, 7.5))
ax = sbn.kdeplot(data=df, x="A", hue="B", fill=True)
print(ax.collections)
# [<matplotlib.collections.PolyCollection object at 0x0000016EE13F39C8>, etc.]
You could draw the kdeplot a second time, but with fill=False so that you have access to the line objects

Stop x-axis labels from shrinking the plot in Matplotlib?

I'm trying to make a bar graph with the following code:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
test = {'names':['a','b','abcdefghijklmnopqrstuvwxyz123456789012345678901234567890'], 'values':[1,2,3]}
df = pd.DataFrame(test)
plt.rcParams['figure.autolayout'] = False
ax = sns.barplot(x='names', y='values', data=df)
ax.set_xticklabels(ax.get_xticklabels(), rotation=90)
plt.show()
But I get the following error because the long value in 'names' as a label on the x-axis is making the image shrink until the bottom is above the top.
Traceback (most recent call last):
File "C:/Users/Adam/.PyCharm2018.2/config/scratches/scratch.py", line 11, in <module>
plt.show()
File "C:\Anaconda3\lib\site-packages\matplotlib\pyplot.py", line 253, in show
return _show(*args, **kw)
File "C:\Program Files\JetBrains\PyCharm 2018.2.3\helpers\pycharm_matplotlib_backend\backend_interagg.py", line 25, in __call__
manager.show(**kwargs)
File "C:\Program Files\JetBrains\PyCharm 2018.2.3\helpers\pycharm_matplotlib_backend\backend_interagg.py", line 107, in show
self.canvas.show()
File "C:\Program Files\JetBrains\PyCharm 2018.2.3\helpers\pycharm_matplotlib_backend\backend_interagg.py", line 62, in show
self.figure.tight_layout()
File "C:\Anaconda3\lib\site-packages\matplotlib\figure.py", line 2276, in tight_layout
self.subplots_adjust(**kwargs)
File "C:\Anaconda3\lib\site-packages\matplotlib\figure.py", line 2088, in subplots_adjust
self.subplotpars.update(*args, **kwargs)
File "C:\Anaconda3\lib\site-packages\matplotlib\figure.py", line 245, in update
raise ValueError('bottom cannot be >= top')
ValueError: bottom cannot be >= top
Here is what it looks like if I reduce the length of that name slightly:
How can I get it to expand the figure to fit the label instead of shrinking the axes?
One workaround is to create the Axes instance yourself as axes, not as subplot. Then tight_layout() has no effect, even if it's called internally. You can then pass the Axes with the ax keyword to sns.barplot. The problem now is that if you call plt.show() the label may be cut off, but if you call savefig with bbox_inches='tight', the figure size will be extended to contain both the figure and all labels:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
fig = plt.figure()
ax = fig.add_axes([0,0,1,1])
test = {'names':['a','b','abcdefghijklmnopqrstuvwxyz123456789012345678901234567890'], 'values':[1,2,3]}
df = pd.DataFrame(test)
#plt.rcParams['figure.autolayout'] = False
ax = sns.barplot(x='names', y='values', data=df, ax=ax)
ax.set_xticklabels(ax.get_xticklabels(), rotation=90)
#plt.show()
fig.savefig('long_label.png', bbox_inches='tight')
PROCLAIMER: I don't have pycharm, so there goes the assumption in this code, that matplotlib behaves the same with and without pycharm. Anyway, for me the outcome looks like this:
If you want this in an interactive backend I didn't find any other way than manually adjust the figure size. This is what I get using the qt5agg backend:
ax = sns.barplot(x='names', y='values', data=df)
ax.set_xticklabels(ax.get_xticklabels(), rotation=90)
ax.figure.set_size_inches(5, 8) # manually adjust figure size
plt.tight_layout() # automatically adjust elements inside the figure
plt.show()
Note that pycharm's scientific mode might be doing some magic that prevents this to work so you might need to deactivate it or just run the script outside pycharm.

How to connect points to draw a line segment and also make a parallel line w.r.t. line segment?

I want to draw a line segment passing through all the end points of my graph (the maximum value of a curve) which is plotted from a csv file. For this line segment I also need to draw a line parallel to this line segments by taking one point(known) as reference on the curve.
z, x, y
-40,0,0
-40,0.658,26.443
-40,1.316,47.128
-40,1.974,62.084
-40,2.632,73.336
-40,3.29,81.785
-40,3.948,87.501
-40,4.606,90.795
-40,5.264,92.491
-40,5.922,93.231
-40,6.58,93.41 - maximum value i.e end point of the curve
23,0,0
23,0.889,22.616
23,1.778,36.552
23,2.667,45.238
23,3.556,50.666
23,4.445,53.856
23,5.334,55.673
23,6.223,56.672
23,7.112,57.203
23,8.001,57.443
23,8.89,57.51- maximum value i.e end point of the curve
40,0,0
40,0.937,19.191
40,1.874,30.893
40,2.811,38.58
40,3.748,43.547
40,4.685,46.518
40,5.622,48.238
40,6.559,49.193
40,7.496,49.694
40,8.433,49.935
40,9.37,50.02- maximum value i.e end point of the curve
Above is the CSV file which I need to plot and the end points are mentioined. I need to connect all the end points with a line as in the image by using Pandas function and I tried the below code for doing this. The parallel line for instance take a single point on any curve w.r.t. this point the line to be drawn and should be parallel to the first line.
import csv
from tkinter import filedialog
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from matplotlib import style
from mpldatacursor import datacursor
x=[] # Initializing empty lists to store the 3 columns in csv
y=[]
z=[]
df = pd.DataFrame({'A' : []})
def readCSV(e):
global df
filename = filedialog.askopenfilename()
df = pd.read_csv(filename, error_bad_lines=False) #Reading CSV file using pandas
read = csv.reader(df, delimiter = ",")
fig = plt.figure()
data_list = []
ax= fig.add_subplot(111)
df.set_index('x', inplace=True) #Setting index
df.groupby('z')['y'].plot(legend=True,ax=ax) #grouping and plotting
for line in ax.lines:
xdata = line.get_xdata()
ydata = line.get_ydata()
s = line.append([6.58,8.89,9.37])
r = line.append([93.41,57.51,50.02])
ax.plot(s,r)
ax.set_ylabel('y')
ax.set_xlabel('x')
ax.grid(True)
plt.show()
To plot a line connecting the end points of your graph, one way would be to get the last set of coordinates for each line. This can be done using get_xdata() and get_ydata(). This returns all the values, but we only want the last value. This can be done using the slice notation [-1]:
my_list = [1,2,3,4,5]
print (my_list[-1])
# 5
So your code would become something like:
s = []
r = []
df = pd.read_csv("test.csv", error_bad_lines=False) #Reading CSV file using pandas
fig = plt.figure()
data_list = []
ax= fig.add_subplot(111)
df.set_index('x', inplace=True) #Setting index
df.groupby('z')['y'].plot(legend=True,ax=ax) #grouping and plotting
for line in ax.lines:
s.append(line.get_xdata()[-1])
r.append(line.get_ydata()[-1])
ax.plot(s, r, color="black", linestyle=":")
ax.set_ylabel('y')
ax.set_xlabel('x')
ax.grid(True)
plt.show()
Which gives:

How to plot the S&P500 and its SMA in two different windows and in one window

I try to plot the S&P500 and its SMA in two different window with the folling codes. But it seems it doesn't work well. If I plot only one of them, it is OK.
import pandas_datareader.data as web
import datetime
import matplotlib.pyplot as pyplot
import talib
import pandas as pd
import numpy as np
start = datetime.datetime(2002, 1, 1)
## S&P 500
sp500 = web.DataReader("SP500", "fred", start)
head = sp500[-100:].dropna()
print(len(head))
## Transform DataFrame to nparray
my_array = head.as_matrix()
## Transform column to row
x = my_array.T[0]
## Get rid off the NaN
y = x[~np.isnan(x)]
print(len(y))
## Compute SMA
my_sma=talib.SMA(y, timeperiod=5)
print(len(my_sma))
## Plot
pyplot.figure(1)
pyplot.subplot(211) ## upper window
head.plot(use_index=False)
pyplot.subplot(212) ## lower window
pd.Series(my_sma).plot(use_index=False)
And here is the plotting.
And besides, I want to plot them in the same window, i.e. oberlay.
Sorry that I have to change a lillte bit my codes so that it is more well-formed and one can better understand what I mean.
start = datetime.datetime(2002, 1, 1)
def computeSMA(data):
head = data[-100:].dropna()
## Transform column to row
x = head.as_matrix().T[0]
## Get rid off the NaN
y = x[~np.isnan(x)]
## Compute SMA
my_sma=talib.SMA(y, timeperiod=5)
return my_sma
## S&P 500
sp500 = web.DataReader("SP500", "fred", start)
sp_sma = computeSMA(sp500)
## Plot
pyplot.figure(1)
sp500[-100:].dropna().plot()
pyplot.figure(2)
pd.Series(sp_sma).plot(use_index=False)
If I run the code, I got the error as follow:
File "C:\Users\Administrator\Anaconda3\lib\site-packages\matplotlib\dates.py", line 401, in num2date
return _from_ordinalf(x, tz)
File "C:\Users\Administrator\Anaconda3\lib\site-packages\matplotlib\dates.py", line 254, in _from_ordinalf
dt = datetime.datetime.fromordinal(ix).replace(tzinfo=UTC)
ValueError: ordinal must be >= 1
If I comment the plotting of figure(2), I will get plotting shown2:
If I comment the plotting of figure(1), I will get the plotting shown 3:
Besides, I want to plot the SP500 and its SMA on the same figure and with the Date in X-axis.
To plot two series on the same plot (note that this works for any number of time series):
import pandas as pd
import matplotlib.pyplot as pyplot
# Generate sample series (in your case these are s1=head and s2=pd.Series(my_sma)
s1 = pd.Series([1, 3, 8, 10])
s2 = pd.Series([2, 4, 9, 11])
# Create the plot.
pyplot.figure()
pyplot.plot(s1)
pyplot.plot(s2)
pyplot.show()
Result:

Resources