Matplotlib - Horizontal Bar Chart Timeline With Dates - Xticks not showing date - python-3.x

Trying to make a graph that looks like the first image here.
However when I try and implement it, I can't work out how to get the dates to print on the X axis, the scale seems about right, just the xticks seem not to be dates, but some basically random number. The typical output is visible in figure 2.
How can I get the dates to show on the xticks. I would like it to show every month, or quarter between 2019-12-01 and 2021-03-01 (march 1st).
Bonus points for any formatting that makes it look more like the first picture.
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime
event = np.array(['Groupone','Grouptwo','Group3','Group4','Group5','Group6'])
begin = np.array([datetime(year=2019,month=12,day=1),datetime(year=2020,month=2,day=1),datetime(year=2020,month=5,day=1),datetime(year=2020,month=11,day=1),datetime(year=2019,month=12,day=1),datetime(year=2020,month=5,day=1)])
end = np.array([datetime(year=2019,month=12,day=30),datetime(year=2021,month=2,day=1),datetime(year=2021,month=2,day=1),datetime(year=2021,month=2,day=1),datetime(year=2021,month=2,day=1),datetime(year=2020,month=7,day=3)])
beg_sort = np.sort(begin)
end_sort = end[np.argsort(begin)]
evt_sort = event[np.argsort(begin)]
plt.barh(range(len(beg_sort)), end_sort-beg_sort, left=beg_sort, align='center')
plt.yticks(range(len(beg_sort)), evt_sort)
plt.yticks(range(len(beg_sort)), evt_sort)
plt.show()(begin)
end_sort = end[np.argsort(begin)]
evt_sort = event[np.argsort(begin)]
plt.barh(range(len(beg_sort)), end_sort-beg_sort, left=beg_sort, align='center')
plt.yticks(range(len(beg_sort)), evt_sort)
plt.show()

You can plot each bar as line, choosing the width of the line (lw) you prefer:
# Set the color of the grid lines
mpl.rcParams['grid.color'] = "w"
fig, ax = plt.subplots(1, 1)
# Plot eac item as a line
for i, (b, e, l) in enumerate(zip(beg_sort, end_sort, evt_sort)):
ax.plot_date([b, e], [i + 1] * 2, ls='-', marker=None, lw=10) # 10 for the line width
# Set ticks and labels on y axis
ax.set_yticks(range(1, len(evt_sort) + 1))
ax.set_yticklabels(evt_sort)
# Set color and transparency of the grid
ax.patch.set_facecolor('gray')
ax.patch.set_alpha(0.3)
# activate grid
ax.grid(True)
Moreover, you can play with the background grid, customizing it according to your needs.

Hacked something together that works, posting for curiosity, however go with PieCot's answer above:
import matplotlib.pyplot as plt
import numpy as np
from datetime import date
from datetime import datetime
import matplotlib.dates as mdates
#https://stackoverflow.com/questions/58387731/plotting-month-year-as-x-ticks-in-matplotlib
fig, ax = plt.subplots(ncols=2, nrows=1, figsize=(15, 4.18))
#fig, ax = plt.figure( figsize=(15, 4.18))
event = np.array(['Groupone','Grouptwo','Group3','Group4','Group5','Group6'])
begin = np.array([datetime(year=2019,month=12,day=1),datetime(year=2020,month=2,day=1),datetime(year=2020,month=5,day=1),datetime(year=2020,month=11,day=1),datetime(year=2019,month=12,day=1),datetime(year=2020,month=5,day=1)])
end = np.array([datetime(year=2019,month=12,day=30),datetime(year=2021,month=2,day=1),datetime(year=2021,month=2,day=1),datetime(year=2021,month=2,day=1),datetime(year=2021,month=2,day=1),datetime(year=2020,month=7,day=3)])
beg_sort = np.sort(begin)
end_sort = end[np.argsort(begin)]
evt_sort = event[np.argsort(begin)]
#start_m = click.prompt('Start month', type=int)
#start_y = click.prompt('Start year', type=int)
#end_m = click.prompt('End month', type=int)
#end_y = click.prompt('End year', type=int)
start_m = 12
start_y = 2019
end_m = 3
end_y = 2021
months = mdates.MonthLocator() # Add tick every month
#days = mdates.DayLocator(range(1,32,5)) # Add tick every 5th day in a month
#monthFmt = mdates.DateFormatter('%b') # Use abbreviated month name
ax[1].xaxis.set_major_locator(months)
#ax[1].xaxis.set_major_formatter(mdates.DateFormatter('%m-%Y'))
ax[1].xaxis.set_major_formatter(mdates.DateFormatter('%b-%Y'))
ax[1].xaxis.set_tick_params(rotation=90)
#ax.xaxis.set_tick_params(rotation=30)
#ax[1].xaxis.set_major_formatter(monthFmt)
#ax[0].xaxis.set_minor_locator(days)
start = date(year=start_y,month=start_m,day=1)
print(start)
end = date(year=end_y,month=end_m,day=1)
print(end)
Nticks = 6
delta = (end-start)/Nticks
tick_dates = [start + i*delta for i in range(Nticks)]
x_ticks = ['{}/{}'.format(d.month,d.year) for d in tick_dates]
print(x_ticks)
plt.barh(range(len(beg_sort)), end_sort-beg_sort, left=beg_sort, align='center')
plt.yticks(range(len(beg_sort)), evt_sort)
plt.yticks(range(len(beg_sort)), evt_sort)
#plt.xticks = x_ticks
#plt.set_xticks(x_ticks)
#plt.show()
fig.delaxes(ax[0])
plt.savefig('gwern.pdf',bbox_inches='tight')

Related

Date Tick-marks (mm/dd) on Slider

I have added a slider to my plot.
ax2 = fig.add_axes([.1, .01, .8, .05])
ax2_Slider = Slider(ax2, 'NormDate', valmin, valmax, valstep=1, color='w', initcolor='none', track_color='g')
ax2_Slider.valtext.set_visible(False)
In matplotlib, the slider values must be float, not date.
Thus I used date2num to convert the dates values.
leftDate = datetime.date.today() - relativedelta(days=366)
valmin = mpl.dates.date2num(leftDate)
valmax = mpl.dates.date2num(datetime.date.today())
How can I add tick marks to the slider to show mm/dd?
Also how to add title showing which date is selected?
Of course, you could set tick labels. But why not also display the actual date?
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.widgets import Slider
from datetime import date, timedelta
from matplotlib.dates import date2num, num2date
#set figure layout
fig = plt.figure(figsize=(10, 5))
gs = fig.add_gridspec(2, 3, width_ratios=[1, 10, 1], height_ratios=[10, 1])
#main panel
ax1 = fig.add_subplot(gs[0, :])
#slider panel
ax2 = fig.add_subplot(gs[1, 1])
valmin = date2num(date.today() - timedelta(days=366))
valmax = date2num(date.today())
ax2_Slider = Slider(ax2, 'NormDate', valmin, valmax, valstep=1, color='w', initcolor='none', track_color='g')
#adding and formatting of the date ticks
ax2.add_artist(ax2.xaxis)
x_tick_nums=np.linspace(valmin, valmax, 10)
ax2.set_xticks(x_tick_nums, [num2date(s).strftime("%m-%d") for s in x_tick_nums])
#convert slider value to date
def changed_slider(s):
ax2_Slider.valtext.set_text(num2date(s).date())
#...
#other things that should happen when the slider value changes
#initiate the correct date display
changed_slider(valmin)
ax2_Slider.on_changed(changed_slider)
plt.show()
Sample output:

How to select specific number of colors to show in color bar from a big list ? - Matplotlib

I plotted some data which has 70 classes, so when I built the color bar it's very difficult to distinguish between each legend as shown below:
The code that I'm using is:
formation_colors = # 70 colors
formation_labels = # 70 labels
data = # the section of the entire dataset which only has 13 labels
data = data.sort_values(by='DEPTH_MD')
ztop=data.DEPTH_MD.min(); zbot=data.DEPTH_MD.max()
cmap_formations = colors.ListedColormap(formation_colors[0:len(formation_colors)], 'indexed')
cluster_f = np.repeat(np.expand_dims(data['Formations'].values,1), 100, 1)
fig = plt.figure(figsize=(2,10))
ax = fig.add_subplot()
im_f = ax.imshow(cluster_f, interpolation='none', aspect='auto', cmap = cmap_formations, vmin=0, vmax=69)
ax.set_xlabel('FORMATION')
ax.set_xticklabels(['']);
divider_f = make_axes_locatable(ax)
cax_f = divider_f.append_axes("right", size="20%", pad=0.05)
cbar_f = plt.colorbar(im_f, cax = cax_f,)
cbar_f.set_ticks(range(0,len(formation_labels))); cbar_f.set_ticklabels(formation_labels)
So far, if I just change:
1. cmap_formations = colors.ListedColormap(formation_colors[0:len(formation_colors)], 'indexed')
2. cbar_f.set_ticks(range(0,len(formation_labels))); cbar_f.set_ticklabels(formation_labels)
to:
cmap_formations = colors.ListedColormap(formation_colors[0:len(data['FORMATION'].unique())], 'indexed')
cbar_f.set_ticks(range(0,len(data['FORMATION'].unique()))); cbar_f.set_ticklabels(data['FORMATION'].unique())
I get, the corresponding colors in the cbar, however the plot is no longer correct and also the legends are out of square
Thank you so much if you have any idea how to do this.
Although not explicitly mentioned in the question, I suppose data['FORMATION'] contains indices from 0 till 69 into the lists of formation_colors and formation_labels
The main problem is that data['FORMATION'] needs to be renumbered to be new indices (with numbers 0 till 12) into the new list of unique colors. np.unique(..., return_inverse=True) returns both the list of unique numbers, and the renumbering for the values.
To be able to reindex the list of colors and of labels, it helps to convert them to numpy arrays.
To make the code easier to debug, the following test uses a simple relation between the list of colors and the list of labels.
from matplotlib import pyplot as plt
from matplotlib import colors
from mpl_toolkits.axes_grid1.axes_divider import make_axes_locatable
import numpy as np
import pandas as pd
formation_colors = np.random.choice(list(colors.CSS4_COLORS), 70, replace=False) # 70 random color names
formation_labels = ['lbl_' + c for c in formation_colors] # 70 labels
formation_colors = np.asarray(formation_colors)
formation_labels = np.asarray(formation_labels)
f = np.random.randint(0, 70, 13)
d = np.sort(np.random.randint(0, 5300, 13))
data = pd.DataFrame({'FORMATION': np.repeat(f, np.diff(np.append(0, d))),
'DEPTH_MD': np.arange(d[-1])})
data = data.sort_values(by='DEPTH_MD')
ztop = data['DEPTH_MD'].min()
zbot = data['DEPTH_MD'].max()
unique_values, formation_new_values = np.unique(data['FORMATION'], return_inverse=True)
cmap_formations = colors.ListedColormap(formation_colors[unique_values], 'indexed')
cluster_f = formation_new_values.reshape(-1, 1)
fig = plt.figure(figsize=(3, 10))
ax = fig.add_subplot()
im_f = ax.imshow(cluster_f, extent=[0, 1, zbot, ztop],
interpolation='none', aspect='auto', cmap=cmap_formations, vmin=0, vmax=len(unique_values)-1)
ax.set_xlabel('FORMATION')
ax.set_xticks([])
divider_f = make_axes_locatable(ax)
cax_f = divider_f.append_axes("right", size="20%", pad=0.05)
cbar_f = plt.colorbar(im_f, cax=cax_f)
cbar_f.set_ticks(np.linspace(0, len(unique_values)-1, 2*len(unique_values)+1)[1::2])
cbar_f.set_ticklabels(formation_labels[unique_values])
plt.subplots_adjust(left=0.2, right=0.5)
plt.show()
Here is a comparison plot:

Second title of a plot/photo is the value of a column in a CSV but only the last value is used in all photos

I have a script that makes a photo that shows a basemap and where an earthquake happened. So 1 earthquake, 1 photo. The second title of each plot should be the date of the earthquake. However, only the last value, which is "2020-04-10", is used in all photos.
from shapely.geometry import Point
from geopandas import GeoDataFrame
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import os
os.chdir(r'path')
def plotPoint():
df = pd.read_csv('earthquakes.csv')
basemap = gpd.read_file('basemap.shp')
crs = "epsg:32651"
geometry = gpd.points_from_xy(df.Longitude, df.Latitude)
gdf = GeoDataFrame(df, crs=crs, geometry=geometry)
for d in df['Date'].values:
date = d
for i in range(gdf.shape[0]):
ax = basemap.plot(figsize=(15,10))
ax.axis('off')
g = gdf.iloc[i].geometry
plt.plot(g.x, g.y, marker='o', color='red', markersize=15)
title = 'Earthquakes in the ___ from 2008 to 2020'
dateInfo = str(date)
plt.suptitle(title)
plt.title(dateInfo)
plt.savefig("earthquake_{0}.png".format(i))
plotPoint()
Get the values of "Date" column
for i in df['Date'].values:
print(i)
Result
2020-04-22
2020-04-22
2020-04-21
2020-04-18
2020-04-10
Sample CSV
Latitude,Longitude,Date,Time_UTC,Depth,Depth Type,Magnitude Type,Magnitude,Region Name,Last Update,Eqid,unknown field
13.81,121.1,2020-04-22,03:19:57,10,f,mb,4.5,MINDORO, PHILIPPINES,2020-04-28 23:17,850323
13.76,120.92,2020-04-22,02:36:19,10, , M,4.2,MINDORO, PHILIPPINES,2020-04-22 03:50,850325
10.45,125.2,2020-04-21,21:43:05,10,f,mb,4.7,LEYTE, PHILIPPINES,2020-04-21 22:55,850252
6.69,125.23,2020-04-18,15:22:16,32, , M,3.6,MINDANAO, PHILIPPINES,2020-04-18 15:35,849329
5.65,126.54,2020-04-10,18:45:49,80, ,Mw,5.2,MINDANAO, PHILIPPINES,2020-04-11 06:41,846838
Changed your code, you were using date from a different for loop and that's why it picked up only the last date, you can use the Date from gdf too I'm guessing:
# for d in df['Date'].values:
# date = d
for i in range(gdf.shape[0]):
ax = basemap.plot(figsize=(15,10))
ax.axis('off')
g = gdf.iloc[i].geometry
plt.plot(g.x, g.y, marker='o', color='red', markersize=15)
title = 'Earthquakes in the ___ from 2008 to 2020'
# Added this line
date = gdf.iloc[i]['Date']
dateInfo = str(date)
plt.suptitle(title)
# Changed this line
plt.title(dateInfo)
plt.savefig("earthquake_{0}.png".format(i))
plt.show()

Bokeh: Changing the frequency of time in the x-axis of a chart

The following coding represents a candlestick chart in bokeh:
from math import pi
import pandas as pd
from bokeh.plotting import figure, show, output_file
from bokeh.sampledata.stocks import MSFT
df = pd.DataFrame(MSFT)[:50]
df["date"] = pd.to_datetime(df["date"])
mids = (df.open + df.close)/2
spans = abs(df.close-df.open)
inc = df.close > df.open
dec = df.open > df.close
w = 12*60*60*1000 # half day in ms
TOOLS = "pan,wheel_zoom,box_zoom,reset,save"
p = figure(x_axis_type="datetime", tools=TOOLS, plot_width=1000, toolbar_location="left")
#p.title = "MSFT Candlestick"
p.xaxis.major_label_orientation = pi/4
p.grid.grid_line_alpha=0.3
p.segment(df.date, df.high, df.date, df.low, color="black")
p.rect(df.date[inc], mids[inc], w, spans[inc], fill_color="#D5E1DD", line_color="black")
p.rect(df.date[dec], mids[dec], w, spans[dec], fill_color="#F2583E", line_color="black")
output_file("candlestick.html", title="candlestick.py example")
show(p) # open a browser
As you can see in this result that the x-axis dates matches March, the 1th and March, 15th, etc. Is there a possibility to increase the frequency, so that the next date is after March, 1th would be March, 5th for example?
Bokeh documentation offers several options. In some cases setting desired_num_ticks like this could help:
p.xaxis[0].ticker.desired_num_ticks = 20
Or you could try for example:
from bokeh.models import DaysTicker
p.xaxis[0].ticker = DaysTicker(days = [1, 5, 10, 15, 20, 25, 30])
Result:

Plotly iplot() doesnt run within a function

I am trying to use iplot() within a function within Jupyter so that i can use a filter on the graph and have it change dynamically. The code works in a cell on its own like this
# Code for put by ticker
data = []
opPriceDic = priceToArray(getPuts(getOptionPricesByTicker('ABBV')))
for key, values in opPriceDic.items():
trace = go.Scatter(
x = numberOfDays,
y = values,
name = 'option',
line = dict(
width = 4)
)
data.append(trace)
# Edit the layout
layout = dict(title = 'Call prices for ' ,
xaxis = dict(title = 'Days to Expiration'),
yaxis = dict(title = 'Price '),
)
fig = dict(data=data, layout=layout)
py.iplot(fig, filename='calls For ')
But once this is placed within a function the graph fails to load
def graph(ticker):
# Code for put by ticker
data = []
opPriceDic = priceToArray(getPuts(getOptionPricesByTicker(ticker)))
for key, values in opPriceDic.items():
trace = go.Scatter(
x = numberOfDays,
y = values,
name = 'option',
line = dict(
width = 4)
)
data.append(trace)
# Edit the layout
layout = dict(title = 'Call prices for ' ,
xaxis = dict(title = 'Days to Expiration'),
yaxis = dict(title = 'Price '),
)
fig = dict(data=data, layout=layout)
py.iplot(fig, filename='calls For ')
But if I change the iplot() to plot() it calls the plotly API and opens a new tab with the graph displaying.
I am just wondering if anyone has noticed this before and may have come across a solution?
(if I am in the wrong area I will remove the post)
I have tried to use pandas data.reader calls to pull ticker data between a start and end date. The data.reader seems to work from within the function. In the question code, if the opPriceDic dictionary could be converted to a dataframe, then iplot() could plot it without use of layout and fig as below:
# Import libraries
import datetime
from datetime import date
import pandas as pd
import numpy as np
from plotly import __version__
%matplotlib inline
import cufflinks as cf
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
init_notebook_mode(connected=True)
cf.go_offline()
# Create function that uses data.reader and iplot()
def graph(ticker):
# create sample data set
start = datetime.datetime(2006, 1, 1)
end = datetime.datetime(2016, 1, 1)
df = data.DataReader(ticker, 'morningstar', start, end)
df = df.reset_index()
df['numberOfDays'] = df.apply(lambda x: abs((datetime.datetime.now() - x['Date']).days), axis=1)
# call iplot within the function graph()
df.iplot(kind='line', x='numberOfDays', y='Close', xTitle='Days', yTitle='Value', title='Prices', width=4)

Resources