Convert string to matplotlib date for plotting - python-3.x

I've been trying to convert string date into a format that can be plotted on a graph.
The code
import matplotlib.pyplot as plt
import numpy as np
import urllib
import matplotlib.dates as mdates
import datetime
def graph_data():
fig=plt.figure()
ax1=plt.subplot2grid((1,1),(0,0))
stock_price_url = 'https://pythonprogramming.net/yahoo_finance_replacement'
source_code = urllib.request.urlopen(stock_price_url).read().decode()
stock_data = []
split_source=source_code.split('\n')
print(len(split_source))
for line in split_source[1:]:
stock_data.append(line)
date,openn,closep,highp,lowp,openp,volume=np.loadtxt(stock_data,delimiter=',',unpack=True)
x = [datetime.strptime(d, '%Y-%m-%d') for d in date]
ax1.plot_date(x,closep,'-',linewidth=0.1)
plt.xlabel('x')
plt.ylabel('y')
plt.title('Graph')
plt.show()
graph_data()
Any method of conversion just gives the same error
ValueError: could not convert string to float: '2017-07-26'
What method can I use to convert the string into date that can be plotted

Ther's nothing wrong with your code. The problem is with the data.
If you look at the data, you will find that from date to volume features your data is a string like this :
data = '2017-07-26,153.3500,153.9300,153.0600,153.5000,153.5000,12778195.00'.
That is the representation of a string.
So you need to do some preprocessing first. There may be various methods to do so.
I found this method helpful to me:
First, you have to remove the commas in data and replace them with spaces and then use the split function to convert data into a split format.
So, you need to make these changes in your code:
date = []
closep = []
for i in range(len(stock_data)):
temp = stock_data[i].replace(',', ' ').split()
date.append(temp[0])
closep.append(temp[2])
0 and two are the positions of date and closep in your dataset.
Now instead of 'x' and 'closep' as you have used in your plot method, use these 'date ' and 'closep' I just shared you via code.
One more thing is that the graph is having trouble with this big dataset I think.
So use date[0:100], closep[0:100] to try the plot for smaller dataset.
The complete code would look like this:
import matplotlib.pyplot as plt
import numpy as np
import urllib
import matplotlib.dates as mdates
import datetime
def graph_data():
fig = plt.figure()
ax1 = plt.subplot2grid((1, 1), (0, 0))
stock_price_url =
'https://pythonprogramming.net/yahoo_finance_replacement'
source_code = urllib.request.urlopen(stock_price_url).read().decode()
stock_data = []
split_source = source_code.split('\n')
for line in split_source[1:]:
stock_data.append(line)
date = []
closep = []
for i in range(len(stock_data)):
temp = stock_data[i].replace(',', ' ').split()
date.append(temp[0])
closep.append(temp[2])
ax1.plot_date(date[0:100], closep[0:100], '-', linewidth=0.1)
plt.xlabel('x')
plt.ylabel('y')
plt.title('Graph')
plt.show()
graph_data()
Hope this helps.

Related

Annotating clustering from DBSCAN to original Pandas DataFrame

I have working code that is utilizing dbscan to find tight groups of sparse spatial data imported with pd.read_csv.
I am maintaining the original spatial data locations and would like to annotate the labels returned by dbscan for each data point to the original dataframe and then write a csv with the same information.
So the code below is doing exactly what I would expect it to at this point, I would just like to extend it to import the label for each row in the original dataframe.
import argparse
import string
import os, subprocess
import pathlib
import glob
import gzip
import re
import time
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from sklearn.cluster import DBSCAN
X = pd.read_csv(tmp_csv_name)
X = X.drop('Name', axis = 1)
X = X.drop('Type', axis = 1)
X = X.drop('SomeValue', axis = 1)
# only columns 'x' and 'y' now remain
db=DBSCAN(eps=EPS, min_samples=minSamples, metric='euclidean', algorithm='auto', leaf_size=30).fit(X)
labels = def_inst_dbsc.labels_
unique_labels = set(labels)
# maxX , maxY are manual inputs temporarily
while sizeX > 16 or sizeY > 16 :
sizeX=sizeX*0.8 ; sizeY=sizeY*0.8
fig, ax = plt.subplots(figsize=(sizeX,sizeY))
plt.xlim(0,maxX)
plt.ylim(0,maxY)
plt.scatter(X['x'], X['y'], c=colors, marker="o", picker=True)
# hackX , hackY are manual inputs temporarily
# which represent the boundaries defined in the original dataset
poly = patches.Polygon(xy=list(zip(hackX,hackY)), fill=False)
ax.add_patch(poly)
plt.show()

How to use Matplotlib to plot time series data with unequal distance between dates [duplicate]

Hi I am trying to convert a list of dates as strings to an x axis in matplotlib and I can't seem to get it to come out right.
dates = ['2014-05-06', '2014-05-07', '2014-05-08', '2014-05-09', '2014-05-10', '2014-05-11', '2014-05-12', '2014-05-13']
import matplotlib
from matplotlib import pyplot
from matplotlib import dates
converted_dates = matplotlib.dates.datestr2num(dates)
x_axis = (converted_dates)
y_axis = range(0,8)
pyplot.plot( x_axis, y_axis, '-' )
pyplot.show()
This brings back 1 2 3 4 5 6 7 on the x axis on the chart, what am I missing. I would like this to display 2014-05-06 etc
Is this the goal? (Threw in rotation because it almost always comes up, with dates.)
datelist = ['2014-05-06', '2014-05-07', '2014-05-08', '2014-05-09', '2014-05-10', '2014-05-11', '2014-05-12', '2014-05-13']
import matplotlib
from matplotlib import pyplot
from matplotlib import dates
import datetime
converted_dates = list(map(datetime.datetime.strptime, datelist, len(datelist)*['%Y-%m-%d']))
x_axis = converted_dates
formatter = dates.DateFormatter('%Y-%m-%d')
y_axis = range(0,8)
pyplot.plot( x_axis, y_axis, '-' )
ax = pyplot.gcf().axes[0]
ax.xaxis.set_major_formatter(formatter)
pyplot.gcf().autofmt_xdate(rotation=25)
pyplot.show()
The idea of using matplotlib.dates.datestr2num is in principle correct. You would then need to tell matplotlib to actually interprete the resulting numbers as dates. One easy option is to use plot_date instead of plot.
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.dates
dates = ['2014-05-06', '2014-05-07', '2014-05-08', '2014-05-09',
'2014-05-10', '2014-05-11', '2014-05-12', '2014-05-13']
converted_dates = matplotlib.dates.datestr2num(dates)
x_axis = (converted_dates)
y_axis = range(0,8)
plt.plot_date( x_axis, y_axis, '-' )
plt.show()
Try using strptime. Documentation is here:
https://docs.python.org/2/library/datetime.html#strftime-strptime-behavior
For example:
import datetime
sDate = '2014-05-06'
dtDate = datetime.datetime.strptime(sDate,"%m-%d-%Y")
matplotlib can compare datetime objects.
The easiest is to use numpy directly:
import matplotlib
from matplotlib import pyplot
from matplotlib import dates
import numpy as np
dates = ['2014-05-06', '2014-05-07', '2014-05-08', '2014-05-09',
'2014-05-10', '2014-05-11', '2014-05-12', '2014-05-13']
converted_dates = np.array(dates, dtype='datetime64[ms]')
ydata = range(0,8)
pyplot.plot(converted_dates, ydata, '-' )
pyplot.show()

matplotlib not displaying all axis values

I have a small program that is plotting some data. The program runs without any errors and displays the plot, but it is removing every other x-axis value. What should I be doing to get all twelve axis labels to display properly?
The program:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pandas.plotting import register_matplotlib_converters
print('NumPy: {}'.format(np.__version__))
print('Pandas: {}'.format(pd.__version__))
print('-----')
display_settings = {
'max_columns': 14,
'expand_frame_repr': False, # Wrap to multiple pages
'max_rows': 50,
'show_dimensions': False
}
pd.options.display.float_format = '{:,.2f}'.format
for op, value in display_settings.items():
pd.set_option("display.{}".format(op), value)
file = "e:\\python\\dataquest\\unrate.csv"
unrate = pd.read_csv(file)
print(unrate.shape, '\n')
unrate['DATE'] = pd.to_datetime(unrate['DATE'])
print(unrate.info(), '\n')
print(unrate.head(12), '\n')
register_matplotlib_converters()
plt.xticks(rotation=90)
plt.plot(unrate['DATE'][0:12], unrate['VALUE'][0:12])
plt.show()
I am getting as output: (I am using PyCharm)
I believe I should be getting: (From Dataquests built in IDE)
#Quang Hong, You were on the right track. I had to adjust the interval value and the date format as follows:
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b %Y'))
plt.gca().xaxis.set_major_locator(mdates.DayLocator(interval=30))
Now I get this output:

Annotate date on the xaxis

I have the following code:
import pandas as pd
from pandas_datareader import data as web
import matplotlib.pyplot as plt
import datetime as datetime
start = datetime.date(2008,1,1)
end = datetime.date.today()
start1 = datetime.date(2019,1,1)
data = web.get_data_yahoo("AAPL",start, end)
data1 = web.get_data_yahoo("AMZN", start1, end1)
ax = data.plot(y ="Close")
data1.plot(y = "Close", ax=ax)
The resulting chart looks like this:
How can i annotate the orange line which is AMZN so i can see the date. Is there a way a straight line could be drawn down and have its date shown on the xaxis?
If you plot the date sting on the x-axis I think you will get a clumpy result. What about adding a text notation to the side:
ax.text(start1, data1.Close[0], start1, ha='right', va='top', rotation=90)
Here's the complete code if you want to add a vertical line as well:
import pandas as pd
from pandas_datareader import data as web
import matplotlib.pyplot as plt
import datetime as datetime
start = datetime.date(2008,1,1)
end = datetime.date.today()
start1 = datetime.date(2019,1,1)
data = web.get_data_yahoo("AAPL",start, end)
data1 = web.get_data_yahoo("AMZN", start1, end)
ax = data.plot(y ="Close")
data1.plot(y = "Close", ax=ax)
ylims = ax.get_ylim()
ax.vlines(start1, ylims[0], data1.Close[0], linestyles='--')
ax.text(start1, data1.Close[0], start1, ha='right', va='top', rotation=90)
ax.set_ylim(ylims)

How to plot events with minute precision on hourly plots using matplotlib?

I have an hourly plot generated with matplotlib. I need to plot an event which goes for example, from 09:00 to 10:45. When I try to do it, using axvspan I obtain a bar from 9:00 to 10:00. How could I obtain the longer one?
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
import datetime as dt
import pandas as pd
now_date = dt.datetime(2018,10,1,9)
d_tw_ini = now_date - dt.timedelta(hours = 1)
d_tw_fin = now_date + dt.timedelta(hours = 3)
dts = pd.date_range(start=d_tw_ini, end=d_tw_fin, freq='1H', name='ini', closed='left')
data=pd.DataFrame({'val':[0.5,0.4,0.7,0.9]})
ev1=[dt.datetime(2018,10,1,9,5),dt.datetime(2018,10,1,10,50)]
data['t']=dts.values
data.set_index('t',inplace=True)
fig = plt.figure()
gs = GridSpec(1, 1)
ax_1 = fig.add_subplot(gs[0, 0])
data.plot(ax=ax_1, y='val')
ax_1.axvspan(ev1[0],ev1[1], alpha=0.3, color= 'red')
Result
Juan, it looks when you used pandas to plot, the hourly indexing seems to cause issues with how axvspan gets plotted.
I replaced
data.plot(ax=ax_1, y='val')
with
ax_1.plot(data.index, data['val'])
which generates the image below, but unfortunately you lose the automated x-axis formatting.
Adding the two lines below will result in the same date formatting as your example.
ax_1.set_xticks([x for x in data.index])
ax_1.set_xticklabels([str(x)[11:16] for x in data.index])
Below is the full code to produce the above plot.
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
import datetime as dt
import pandas as pd
now_date = dt.datetime(2018,10,1,9)
d_tw_ini = now_date - dt.timedelta(hours = 1)
d_tw_fin = now_date + dt.timedelta(hours = 3)
dts = pd.date_range(start=d_tw_ini, end=d_tw_fin, freq='1h', name='ini',
closed='left')
data=pd.DataFrame({'val':[0.5,0.4,0.7,0.9]})
ev1=[dt.datetime(2018,10,1,9,5,0),dt.datetime(2018,10,1,10,50,0)]
data['t']=dts.values
data.set_index('t',inplace=True)
fig = plt.figure()
gs = GridSpec(1, 1)
ax_1 = fig.add_subplot(gs[0, 0])
# modified section below
ax_1.plot(data.index, data['val'])
ax_1.axvspan(ev1[0],ev1[1], alpha=0.3, color= 'red')
ax_1.set_xticks([x for x in data.index])
ax_1.set_xticklabels([str(x)[11:16] for x in data.index])
plt.show()

Resources