matplotlib x-axis messed up [duplicate] - python-3.x

I have a series whose index is datetime that I wish to plot. I want to plot the values of the series on the y axis and the index of the series on the x axis. The Series looks as follows:
2014-01-01 7
2014-02-01 8
2014-03-01 9
2014-04-01 8
...
I generate a graph using plt.plot(series.index, series.values). But the graph looks like:
The problem is that I would like to have only year and month (yyyy-mm or 2016 March). However, the graph contains hours, minutes and seconds. How can I remove them so that I get my desired formatting?

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
# sample data
N = 30
drange = pd.date_range("2014-01", periods=N, freq="MS")
np.random.seed(365) # for a reproducible example of values
values = {'values':np.random.randint(1,20,size=N)}
df = pd.DataFrame(values, index=drange)
fig, ax = plt.subplots()
ax.plot(df.index, df.values)
ax.set_xticks(df.index)
# use formatters to specify major and minor ticks
ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m"))
ax.xaxis.set_minor_formatter(mdates.DateFormatter("%Y-%m"))
_ = plt.xticks(rotation=90)

You can try something like this:
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
df = pd.DataFrame({'values':np.random.randint(0,1000,36)},index=pd.date_range(start='2014-01-01',end='2016-12-31',freq='M'))
fig,ax1 = plt.subplots()
plt.plot(df.index,df.values)
monthyearFmt = mdates.DateFormatter('%Y %B')
ax1.xaxis.set_major_formatter(monthyearFmt)
_ = plt.xticks(rotation=90)

You should check out this native function of matplotlib:
fig.autofmt_xdate()
See examples on the source website Custom tick formatter

Related

How to use Matplotlib to plot time series data with unequal distance between dates [duplicate]

Hi I am trying to convert a list of dates as strings to an x axis in matplotlib and I can't seem to get it to come out right.
dates = ['2014-05-06', '2014-05-07', '2014-05-08', '2014-05-09', '2014-05-10', '2014-05-11', '2014-05-12', '2014-05-13']
import matplotlib
from matplotlib import pyplot
from matplotlib import dates
converted_dates = matplotlib.dates.datestr2num(dates)
x_axis = (converted_dates)
y_axis = range(0,8)
pyplot.plot( x_axis, y_axis, '-' )
pyplot.show()
This brings back 1 2 3 4 5 6 7 on the x axis on the chart, what am I missing. I would like this to display 2014-05-06 etc
Is this the goal? (Threw in rotation because it almost always comes up, with dates.)
datelist = ['2014-05-06', '2014-05-07', '2014-05-08', '2014-05-09', '2014-05-10', '2014-05-11', '2014-05-12', '2014-05-13']
import matplotlib
from matplotlib import pyplot
from matplotlib import dates
import datetime
converted_dates = list(map(datetime.datetime.strptime, datelist, len(datelist)*['%Y-%m-%d']))
x_axis = converted_dates
formatter = dates.DateFormatter('%Y-%m-%d')
y_axis = range(0,8)
pyplot.plot( x_axis, y_axis, '-' )
ax = pyplot.gcf().axes[0]
ax.xaxis.set_major_formatter(formatter)
pyplot.gcf().autofmt_xdate(rotation=25)
pyplot.show()
The idea of using matplotlib.dates.datestr2num is in principle correct. You would then need to tell matplotlib to actually interprete the resulting numbers as dates. One easy option is to use plot_date instead of plot.
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.dates
dates = ['2014-05-06', '2014-05-07', '2014-05-08', '2014-05-09',
'2014-05-10', '2014-05-11', '2014-05-12', '2014-05-13']
converted_dates = matplotlib.dates.datestr2num(dates)
x_axis = (converted_dates)
y_axis = range(0,8)
plt.plot_date( x_axis, y_axis, '-' )
plt.show()
Try using strptime. Documentation is here:
https://docs.python.org/2/library/datetime.html#strftime-strptime-behavior
For example:
import datetime
sDate = '2014-05-06'
dtDate = datetime.datetime.strptime(sDate,"%m-%d-%Y")
matplotlib can compare datetime objects.
The easiest is to use numpy directly:
import matplotlib
from matplotlib import pyplot
from matplotlib import dates
import numpy as np
dates = ['2014-05-06', '2014-05-07', '2014-05-08', '2014-05-09',
'2014-05-10', '2014-05-11', '2014-05-12', '2014-05-13']
converted_dates = np.array(dates, dtype='datetime64[ms]')
ydata = range(0,8)
pyplot.plot(converted_dates, ydata, '-' )
pyplot.show()

How to fix 'RuntimeError: Locator ... exceeds Locator.MAXTICKS - matplotlib'

I'm plotting camapign data on a timeline, where only the time (rather than the date) sent is relevant hance the column contains only time data (imported from a csv)
It displays the various line graphs (spaghetti plot) however, when I want to add the labels to the x axis, I receive
RuntimeError: Locator attempting to generate 4473217 ticks from 30282.0 to 76878.0: exceeds Locator.MAXTICKS
I have 140 rows of data for this test file, the times are between 9:05 and 20:55 and my code is supposed to get a tick for every 15 minutes.
python: 3.7.1.final.0
python-bits: 64
OS: Windows
OS-release: 10
pandas: 0.23.4
matplotlib: 3.0.2
My actual code looks like:
import pandas
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from datetime import datetime
file_name = r'''C:\\Users\\A_B_testing.csv'''
df1 = pandas.read_csv(file_name, encoding='utf-8')
df_Campaign1 = df1[df1['DataSource ID'].str.contains('Campaign1')==True]
Campaign1_times = df_Campaign1['time sent'].tolist()
Campaign1_revenue = df_Campaign1['EstValue/sms'].tolist()
Campaign1_times = [datetime.strptime(slot,"%H:%M").time() for slot in Campaign1_times]
df_Campaign2 = df1[df1['DataSource ID'].str.contains('Campaign2')==True]
Campaign2_times = df_Campaign2['time sent'].tolist()
Campaign2_revenue = df_Campaign2['EstValue/sms'].tolist()
Campaign2_times = [datetime.strptime(slot,"%H:%M").time() for slot in Campaign2_times]
fig, ax = plt.subplots(1, 1, figsize=(16, 8))
xlocator = mdates.MinuteLocator(byminute=None, interval=15) # tick every 15 minutes
xformatter = mdates.DateFormatter('%H:%M')
ax.xaxis.set_major_locator(xlocator)
ax.xaxis.set_major_formatter(xformatter)
ax.minorticks_off()
plt.grid(True)
plt.plot(Campaign1_times, Campaign1_revenue, c = 'g', linewidth = 1)
plt.plot(Campaign2_times, Campaign2_revenue, c = 'y', linewidth = 2)
plt.show()
I tired to reduce the number of values to be plotted and it worked fine on a dummy set as follows:
from matplotlib import pyplot as plt
import matplotlib.dates as mdates
from matplotlib.dates import HourLocator, MinuteLocator, DateFormatter
from datetime import datetime
fig, ax = plt.subplots(1, figsize=(16, 6))
xlocator = MinuteLocator(interval=15)
xformatter = DateFormatter('%H:%M')
ax.xaxis.set_major_locator(xlocator)
ax.xaxis.set_major_formatter(xformatter)
ax.minorticks_off()
plt.grid(True, )
xvalues = ['9:05', '10:35' ,'12:05' ,'12:35', '13:05']
xvalues = [datetime.strptime(slot,"%H:%M") for slot in xvalues]
yvalues = [2.2, 2.4, 1.7, 3, 2]
zvalues = [3.2, 1.4, 1.8, 2.7, 2.2]
plt.plot(xvalues, yvalues, c = 'g')
plt.plot(xvalues, zvalues, c = 'b')
plt.show()
So I think that issue is related to the way I'm declaring the ticks, tried to find a relevant post here on but none has solved my problem. Can anyone please point me to the right direction? Thanks in advance.
I had a similar issue which got fixed by using datetime objects instead of time objects in the x-axis.
Similarly, in the code of the question, using the full datetime instead of just the time should fix the issue.
replace:
[datetime.strptime(slot,"%H:%M").time() for slot in ...
by:
[datetime.strptime(slot,"<full date format>") for slot in

Tick labels only displayed in one subplot

Need to display custom shared x-axis tick labels on both subplots, using two datasets with different dates.
from pandas import DataFrame, date_range, Timedelta
import numpy as np
from matplotlib import pyplot as plt
import matplotlib.dates as mdates
#Dataset 1
rng1 = date_range(start='01-01-2015', periods=5, freq='1M')
df1 = DataFrame({'y':np.random.normal(size=len(rng1))}, index=rng1)
y1 = df1['y']
#Dataset 2
rng2 = date_range(start='01-01-2015', periods=5, freq='2M')
df2 = DataFrame({'y':np.random.normal(size=len(rng2))}, index=rng2)
y2 = df2['y']
#Figure
fig,(ax1,ax2) = plt.subplots(2,1,sharex=True)
y1.plot(ax=ax1)
y2.plot(ax=ax2)
plt.xticks(rotation=30)
ax1.xaxis.set_minor_formatter(plt.NullFormatter())
ax2.xaxis.set_major_locator(mdates.MonthLocator(interval=1))
ax2.xaxis.set_major_formatter(mdates.DateFormatter('%D'))
plt.show()
(Above) Creates figure without x-axis labels on the upper subplot
I expect adding the below code to display the same x-axis labels to the upper subplot, but they are not showing up. What am I doing wrong?
ax1.xaxis.set_major_locator(mdates.MonthLocator(interval=1))
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%D'))
Setting sharex=False does not work because the dates are different for each dataset.
First of all, if you want to use matplotlib.dates locators and formatters on a plot created via pandas you should use the x_compat=True argument in your plot, otherwise pandas may scale the axis rather arbitrarily.
Then '%D' is not a valid format string. Maybe you mean '%b'?
Now there are two options.
Use sharex=False, set your locators and formatters to both axes, and finally set the limits of the one plot to the limits of the other. In this case since the lower plot comprises a larger range,
ax1.set_xlim(ax2.get_xlim())
The other option is to use sharex=True and turn the labels visible again.
plt.setp(ax1.get_xticklabels(), visible=True)
Unfortunately this option is broken on the newest matplotlib version. I just opened a bug report about it.
Full code for the first option (since the second one is not working):
from pandas import DataFrame, date_range, Timedelta
import numpy as np
from matplotlib import pyplot as plt
import matplotlib.dates as mdates
#Dataset 1
rng1 = date_range(start='2015-01-01', periods=5, freq='1M')
df1 = DataFrame({'y':np.random.normal(size=len(rng1))}, index=rng1)
y1 = df1['y']
#Dataset 2
rng2 = date_range(start='2015-01-01', periods=5, freq='2M')
df2 = DataFrame({'y':np.random.normal(size=len(rng2))}, index=rng2)
y2 = df2['y']
#Figure
fig,(ax1,ax2) = plt.subplots(2,1,sharex=False)
y1.plot(ax=ax1, x_compat=True)
y2.plot(ax=ax2, x_compat=True)
plt.xticks(rotation=30)
ax1.xaxis.set_major_locator(mdates.MonthLocator(interval=1))
ax1.xaxis.set_minor_locator(plt.NullLocator())
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%b'))
ax2.xaxis.set_major_locator(mdates.MonthLocator(interval=1))
ax2.xaxis.set_minor_locator(plt.NullLocator())
ax2.xaxis.set_major_formatter(mdates.DateFormatter('%b'))
ax1.set_xlim(ax2.get_xlim())
plt.show()
This seemed to work for me, just following the shared_axis_demo
fig = plt.figure()
ax1 = plt.subplot(211)
_ = plt.plot(y1)
plt.setp(ax1.get_xticklabels(), visible=True)
ax2 = plt.subplot(212, sharex=ax1)
_ = plt.plot(y2)
plt.show()

Timeserie datetick problems when using pandas.DataFrame.plot method

I just discovered something really strange when using plot method of pandas.DataFrame. I am using pandas 0.19.1. Here is my MWE:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import pandas as pd
t = pd.date_range('1990-01-01', '1990-01-08', freq='1H')
x = pd.DataFrame(np.random.rand(len(t)), index=t)
fig, axe = plt.subplots()
x.plot(ax=axe)
plt.show(axe)
xt = axe.get_xticks()
When I try to format my xticklabels I get strange beahviours, then I insepcted objects to understand and I have found the following:
t[-1] - t[0] = Timedelta('7 days 00:00:00'), confirming the DateTimeIndex is what I expect;
xt = [175320, 175488], xticks are integers but they are not equals to a number of days since epoch (I do not have any idea about what it is);
xt[-1] - xt[0] = 168 there are more like index, there is the same amount that len(x) = 169.
This explains why I cannot succed to format my axe using:
axe.xaxis.set_major_locator(mdates.HourLocator(byhour=(0,6,12,18)))
axe.xaxis.set_major_formatter(mdates.DateFormatter("%a %H:%M"))
The first raise an error that there is to many ticks to generate
The second show that my first tick is Fri 00:00 but it should be Mon 00:00 (in fact matplotlib assumes the first tick to be 0481-01-03 00:00, oops this is where my bug is).
It looks like there is some incompatibility between pandas and matplotlib integer to date conversion but I cannot find out how to fix this issue.
If I run instead:
fig, axe = plt.subplots()
axe.plot(x)
axe.xaxis.set_major_formatter(mdates.DateFormatter("%a %H:%M"))
plt.show(axe)
xt = axe.get_xticks()
Everything works as expected but I miss all cool features from pandas.DataFrame.plot method such as curve labeling, etc. And here xt = [726468. 726475.].
How can I properly format my ticks using pandas.DataFrame.plot method instead of axe.plot and avoiding this issue?
Update
The problem seems to be about origin and scale (units) of underlying numbers for date representation. Anyway I cannot control it, even by forcing it to the correct type:
t = pd.date_range('1990-01-01', '1990-01-08', freq='1H', origin='unix', units='D')
There is a discrepancy between matplotlib and pandas representation. And I could not find any documentation of this problem.
Is this what you are going for? Note I shortened the date_range to make it easier to see the labels.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as dates
t = pd.date_range('1990-01-01', '1990-01-04', freq='1H')
x = pd.DataFrame(np.random.rand(len(t)), index=t)
# resample the df to get the index at 6-hour intervals
l = x.resample('6H').first().index
# set the ticks when you plot. this appears to position them, but not set the label
ax = x.plot(xticks=l)
# set the display value of the tick labels
ax.set_xticklabels(l.strftime("%a %H:%M"))
# hide the labels from the initial pandas plot
ax.set_xticklabels([], minor=True)
# make pretty
ax.get_figure().autofmt_xdate()
plt.show()

Matplotlib Line Graph with Table from Pandas Pivot Table

Given the following:
import pandas as pd
import numpy as np
%matplotlib inline
df = pd.DataFrame(
{'YYYYMM':[201603,201503,201403,201303,201603,201503,201403,201303],
'Count':[5,6,2,7,4,7,8,9],
'Group':['A','A','A','A','B','B','B','B']})
df['YYYYMM']=df['YYYYMM'].astype(str).str[:-2]
t=df.pivot_table(df,index=['YYYYMM'],columns=['Group'],aggfunc=np.sum)
fig, ax = plt.subplots(1,1)
t.plot(ax=ax)
I'd like to add a summary table below it as a separate axis.
I would allocate the other axis via subplot2grid, but using subplots(2,1) will work as well as I can just adapt it to my needs.
I'd like the table to look like this:
2013 2014 2015 2016
A 7 2 6 5
B 9 8 7 4
...with no borders/lines if possible.
Update:
Here's a sample of what I've tried with subplot2grid:
import pandas as pd
import numpy as np
%matplotlib inline
df = pd.DataFrame(
{'YYYYMM':[201603,201503,201403,201303,201603,201503,201403,201303],
'Count':[5,6,2,7,4,7,8,9],
'Group':['A','A','A','A','B','B','B','B']})
df['YYYYMM']=df['YYYYMM'].astype(str).str[:-2]
t=df.pivot_table(df,index=['YYYYMM'],columns=['Group'],aggfunc=np.sum)
fig = plt.figure(figsize=(figsize), dpi=300)
ax1 = plt.subplot2grid((100,100), (0,0), rowspan=70, colspan=100)
ax2 = plt.subplot2grid((100,100), (80,0), rowspan=20, colspan=100)
t.plot(ax=ax1)
ax1.legend_.remove()
ax1.spines['top'].set_visible(False);ax1.spines['right'].set_visible(False);ax1.spines['bottom'].set_visible(False);ax1.spines['left'].set_visible(False)
ax2.spines['top'].set_visible(False);ax2.spines['right'].set_visible(False);ax2.spines['bottom'].set_visible(False);ax2.spines['left'].set_visible(False)
#ax1.xaxis.set_visible(False) #Hide x axis label
ax2.xaxis.set_visible(False)
ax2.yaxis.set_visible(False)
ax1.tick_params(axis='x',which='both',bottom='on',top='off')
ax1.tick_params(axis='y',which='both',left='on',right='off')
ax2.tick_params(axis='x',which='both',bottom='off',top='off')
ax2.tick_params(axis='y',which='both',left='off',right='off')
from matplotlib.colors import ListedColormap
t2=df.pivot_table(df,index=['Group'],columns=['YYYYMM'],aggfunc=np.sum).sortlevel(ascending=False)
sns.heatmap(df,annot=True,fmt='d',linewidths=.5,cbar=False,cmap=ListedColormap(['white']))
plt.show()
...which produces this (notice that the bottom plot is hidden; this is intentional as I only want to display faint grey lines between rows and columns where absolutely needed (per Stephen Few - "Show Me The Numbers").
But I would like for the years in the table to align with the year index tick labels on the x-axis.
Another update:
Using Seaborn (see last 4 lines of code in update), I tried a heat map, which might get me to where I need to be. I just need to format the numbers, label the groups, and maybe shift the dates.

Resources