Second title of a plot/photo is the value of a column in a CSV but only the last value is used in all photos - python-3.x

I have a script that makes a photo that shows a basemap and where an earthquake happened. So 1 earthquake, 1 photo. The second title of each plot should be the date of the earthquake. However, only the last value, which is "2020-04-10", is used in all photos.
from shapely.geometry import Point
from geopandas import GeoDataFrame
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import os
os.chdir(r'path')
def plotPoint():
df = pd.read_csv('earthquakes.csv')
basemap = gpd.read_file('basemap.shp')
crs = "epsg:32651"
geometry = gpd.points_from_xy(df.Longitude, df.Latitude)
gdf = GeoDataFrame(df, crs=crs, geometry=geometry)
for d in df['Date'].values:
date = d
for i in range(gdf.shape[0]):
ax = basemap.plot(figsize=(15,10))
ax.axis('off')
g = gdf.iloc[i].geometry
plt.plot(g.x, g.y, marker='o', color='red', markersize=15)
title = 'Earthquakes in the ___ from 2008 to 2020'
dateInfo = str(date)
plt.suptitle(title)
plt.title(dateInfo)
plt.savefig("earthquake_{0}.png".format(i))
plotPoint()
Get the values of "Date" column
for i in df['Date'].values:
print(i)
Result
2020-04-22
2020-04-22
2020-04-21
2020-04-18
2020-04-10
Sample CSV
Latitude,Longitude,Date,Time_UTC,Depth,Depth Type,Magnitude Type,Magnitude,Region Name,Last Update,Eqid,unknown field
13.81,121.1,2020-04-22,03:19:57,10,f,mb,4.5,MINDORO, PHILIPPINES,2020-04-28 23:17,850323
13.76,120.92,2020-04-22,02:36:19,10, , M,4.2,MINDORO, PHILIPPINES,2020-04-22 03:50,850325
10.45,125.2,2020-04-21,21:43:05,10,f,mb,4.7,LEYTE, PHILIPPINES,2020-04-21 22:55,850252
6.69,125.23,2020-04-18,15:22:16,32, , M,3.6,MINDANAO, PHILIPPINES,2020-04-18 15:35,849329
5.65,126.54,2020-04-10,18:45:49,80, ,Mw,5.2,MINDANAO, PHILIPPINES,2020-04-11 06:41,846838

Changed your code, you were using date from a different for loop and that's why it picked up only the last date, you can use the Date from gdf too I'm guessing:
# for d in df['Date'].values:
# date = d
for i in range(gdf.shape[0]):
ax = basemap.plot(figsize=(15,10))
ax.axis('off')
g = gdf.iloc[i].geometry
plt.plot(g.x, g.y, marker='o', color='red', markersize=15)
title = 'Earthquakes in the ___ from 2008 to 2020'
# Added this line
date = gdf.iloc[i]['Date']
dateInfo = str(date)
plt.suptitle(title)
# Changed this line
plt.title(dateInfo)
plt.savefig("earthquake_{0}.png".format(i))
plt.show()

Related

Matplotlib - Horizontal Bar Chart Timeline With Dates - Xticks not showing date

Trying to make a graph that looks like the first image here.
However when I try and implement it, I can't work out how to get the dates to print on the X axis, the scale seems about right, just the xticks seem not to be dates, but some basically random number. The typical output is visible in figure 2.
How can I get the dates to show on the xticks. I would like it to show every month, or quarter between 2019-12-01 and 2021-03-01 (march 1st).
Bonus points for any formatting that makes it look more like the first picture.
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime
event = np.array(['Groupone','Grouptwo','Group3','Group4','Group5','Group6'])
begin = np.array([datetime(year=2019,month=12,day=1),datetime(year=2020,month=2,day=1),datetime(year=2020,month=5,day=1),datetime(year=2020,month=11,day=1),datetime(year=2019,month=12,day=1),datetime(year=2020,month=5,day=1)])
end = np.array([datetime(year=2019,month=12,day=30),datetime(year=2021,month=2,day=1),datetime(year=2021,month=2,day=1),datetime(year=2021,month=2,day=1),datetime(year=2021,month=2,day=1),datetime(year=2020,month=7,day=3)])
beg_sort = np.sort(begin)
end_sort = end[np.argsort(begin)]
evt_sort = event[np.argsort(begin)]
plt.barh(range(len(beg_sort)), end_sort-beg_sort, left=beg_sort, align='center')
plt.yticks(range(len(beg_sort)), evt_sort)
plt.yticks(range(len(beg_sort)), evt_sort)
plt.show()(begin)
end_sort = end[np.argsort(begin)]
evt_sort = event[np.argsort(begin)]
plt.barh(range(len(beg_sort)), end_sort-beg_sort, left=beg_sort, align='center')
plt.yticks(range(len(beg_sort)), evt_sort)
plt.show()
You can plot each bar as line, choosing the width of the line (lw) you prefer:
# Set the color of the grid lines
mpl.rcParams['grid.color'] = "w"
fig, ax = plt.subplots(1, 1)
# Plot eac item as a line
for i, (b, e, l) in enumerate(zip(beg_sort, end_sort, evt_sort)):
ax.plot_date([b, e], [i + 1] * 2, ls='-', marker=None, lw=10) # 10 for the line width
# Set ticks and labels on y axis
ax.set_yticks(range(1, len(evt_sort) + 1))
ax.set_yticklabels(evt_sort)
# Set color and transparency of the grid
ax.patch.set_facecolor('gray')
ax.patch.set_alpha(0.3)
# activate grid
ax.grid(True)
Moreover, you can play with the background grid, customizing it according to your needs.
Hacked something together that works, posting for curiosity, however go with PieCot's answer above:
import matplotlib.pyplot as plt
import numpy as np
from datetime import date
from datetime import datetime
import matplotlib.dates as mdates
#https://stackoverflow.com/questions/58387731/plotting-month-year-as-x-ticks-in-matplotlib
fig, ax = plt.subplots(ncols=2, nrows=1, figsize=(15, 4.18))
#fig, ax = plt.figure( figsize=(15, 4.18))
event = np.array(['Groupone','Grouptwo','Group3','Group4','Group5','Group6'])
begin = np.array([datetime(year=2019,month=12,day=1),datetime(year=2020,month=2,day=1),datetime(year=2020,month=5,day=1),datetime(year=2020,month=11,day=1),datetime(year=2019,month=12,day=1),datetime(year=2020,month=5,day=1)])
end = np.array([datetime(year=2019,month=12,day=30),datetime(year=2021,month=2,day=1),datetime(year=2021,month=2,day=1),datetime(year=2021,month=2,day=1),datetime(year=2021,month=2,day=1),datetime(year=2020,month=7,day=3)])
beg_sort = np.sort(begin)
end_sort = end[np.argsort(begin)]
evt_sort = event[np.argsort(begin)]
#start_m = click.prompt('Start month', type=int)
#start_y = click.prompt('Start year', type=int)
#end_m = click.prompt('End month', type=int)
#end_y = click.prompt('End year', type=int)
start_m = 12
start_y = 2019
end_m = 3
end_y = 2021
months = mdates.MonthLocator() # Add tick every month
#days = mdates.DayLocator(range(1,32,5)) # Add tick every 5th day in a month
#monthFmt = mdates.DateFormatter('%b') # Use abbreviated month name
ax[1].xaxis.set_major_locator(months)
#ax[1].xaxis.set_major_formatter(mdates.DateFormatter('%m-%Y'))
ax[1].xaxis.set_major_formatter(mdates.DateFormatter('%b-%Y'))
ax[1].xaxis.set_tick_params(rotation=90)
#ax.xaxis.set_tick_params(rotation=30)
#ax[1].xaxis.set_major_formatter(monthFmt)
#ax[0].xaxis.set_minor_locator(days)
start = date(year=start_y,month=start_m,day=1)
print(start)
end = date(year=end_y,month=end_m,day=1)
print(end)
Nticks = 6
delta = (end-start)/Nticks
tick_dates = [start + i*delta for i in range(Nticks)]
x_ticks = ['{}/{}'.format(d.month,d.year) for d in tick_dates]
print(x_ticks)
plt.barh(range(len(beg_sort)), end_sort-beg_sort, left=beg_sort, align='center')
plt.yticks(range(len(beg_sort)), evt_sort)
plt.yticks(range(len(beg_sort)), evt_sort)
#plt.xticks = x_ticks
#plt.set_xticks(x_ticks)
#plt.show()
fig.delaxes(ax[0])
plt.savefig('gwern.pdf',bbox_inches='tight')

Locate columns in dataframe to graph

I have an excel file with 3 columns and 60 rows. The first column is the Date but I want to put that on the x axis and plot the other 2 against it. I need help locating the 2 other columns so i can enter it in ax1.plot() and ax2.plot().
I have tried to locate it by [:,1] but that doesnt work and I have tried to locate it by the name of the column. The second column is "S&P/TSX Composite index (^GSPTSE)" and the third column is "Bitcoin CAD (BTC-CAD)"
import pandas as pd
import matplotlib.pyplot as plt
InputData = pd.read_excel('Python_assignment_InputData.xlsx')
#InputData = InputData[:15]
"""
print("a)\n", InputData,"\n")
print("b)")
InputData['Date'] = InputData.DATE.dt.year
InputData['Year'] = pd.to_datetime(InputData.Date).dt.year
"""
#ax1 = InputData.iloc[:,1]
fig, ax1 = plt.subplots()
ax2 = ax1.twinx()
ax1.plot()
ax2.plot()
ax1.set_ylabel("TSX", color = 'b')
ax2.set_ylabel("BTC", color = 'g')
ax1.set_xlabel("Year")
plt.title("Question 6")
plt.show()

ValueError: year is out of range using matplotlib.pyplot

After I call candlestick_ohlc, I can't seem to convert the x axis dates to something matplotlib can understand.
I'm a noob Python programmer. I've tried turning the dataframe into a list, I've tried passing dates to candlestick_ohlc, nothing seems to work other than changing
df['time'] = (df['time'].astype('float'))
into
df['time'] = (df['time'].astype('float')\1000)
Although that renders the wrong datetime.
import requests
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdate
import matplotlib.style as style
import matplotlib.ticker as mticker
from matplotlib.dates import date2num
from mpl_finance import candlestick_ohlc
import datetime as dt
import numpy as np
import matplotlib.ticker as mticker
def get_data(date):
""" Query the API for 2000 days historical price data starting from "date". """
url = "https://min-api.cryptocompare.com/data/histoday?fsym=BTC&tsym=USD&limit=2000&toTs={}".format(date)
r = requests.get(url)
ipdata = r.json()
return ipdata
def get_df(from_date, to_date):
""" Get historical price data between two dates. """
date = to_date
holder = []
# While the earliest date returned is later than the earliest date requested, keep on querying the API
# and adding the results to a list.
while date > from_date:
data = get_data(date)
holder.append(pd.DataFrame(data['Data']))
date = data['TimeFrom']
# Join together all of the API queries in the list.
df = pd.concat(holder, axis = 0)
# Remove data points from before from_date
df = df[df['time']>from_date]
# Convert to timestamp to readable date format
# df['time'] = pd.to_datetime(df['time'], unit='s')
# Make the DataFrame index the time
df.set_index('time', inplace=True)
# And sort it so its in time order
df.sort_index(ascending=False, inplace=True)
return df
df = get_df(1528502400, 1560112385)
style.use('dark_background')
fig = plt.figure()
ax1 = plt.subplot2grid((1,1), (0,0))
df = df.reset_index()
cols = ['time', 'open', 'high', 'low', 'close', 'volumefrom', 'volumeto']
df = df[cols]
#IF YOU /1000 AFER ('float') IT WILL RUN BUT NOT CORRECT DATE
df['time'] = (df['time'].astype('float'))
print(df.dtypes)
ohlc = df.values.tolist()
candlestick_ohlc(ax1, ohlc, width=.4, colorup='g', colordown='r')
# IF YOU COMMENT NEXT 4 LINES IT WILL RUN, but NO DATES for XAXIS
date_fmt = "%d-%m-%Y"
date_formatter = mdate.DateFormatter(date_fmt)
ax1.xaxis.set_major_formatter(date_formatter)
fig.autofmt_xdate()
ax1.set_ylabel('BTC Price (USD)')
ax1.set_xlabel('Date')
plt.show()
Expected result would be date labels plotted as d-m-y. :)
Wish this had dates for xaxis labels not seconds since 1970
This is what I want it to look like, but with accurate dates
This is how to fix the code:
df['time'] = df['time'].apply(mdates.epoch2num)
It was definitely one of those lines of code that you spend hours on... now I know.

Bokeh: Changing the frequency of time in the x-axis of a chart

The following coding represents a candlestick chart in bokeh:
from math import pi
import pandas as pd
from bokeh.plotting import figure, show, output_file
from bokeh.sampledata.stocks import MSFT
df = pd.DataFrame(MSFT)[:50]
df["date"] = pd.to_datetime(df["date"])
mids = (df.open + df.close)/2
spans = abs(df.close-df.open)
inc = df.close > df.open
dec = df.open > df.close
w = 12*60*60*1000 # half day in ms
TOOLS = "pan,wheel_zoom,box_zoom,reset,save"
p = figure(x_axis_type="datetime", tools=TOOLS, plot_width=1000, toolbar_location="left")
#p.title = "MSFT Candlestick"
p.xaxis.major_label_orientation = pi/4
p.grid.grid_line_alpha=0.3
p.segment(df.date, df.high, df.date, df.low, color="black")
p.rect(df.date[inc], mids[inc], w, spans[inc], fill_color="#D5E1DD", line_color="black")
p.rect(df.date[dec], mids[dec], w, spans[dec], fill_color="#F2583E", line_color="black")
output_file("candlestick.html", title="candlestick.py example")
show(p) # open a browser
As you can see in this result that the x-axis dates matches March, the 1th and March, 15th, etc. Is there a possibility to increase the frequency, so that the next date is after March, 1th would be March, 5th for example?
Bokeh documentation offers several options. In some cases setting desired_num_ticks like this could help:
p.xaxis[0].ticker.desired_num_ticks = 20
Or you could try for example:
from bokeh.models import DaysTicker
p.xaxis[0].ticker = DaysTicker(days = [1, 5, 10, 15, 20, 25, 30])
Result:

Plotly iplot() doesnt run within a function

I am trying to use iplot() within a function within Jupyter so that i can use a filter on the graph and have it change dynamically. The code works in a cell on its own like this
# Code for put by ticker
data = []
opPriceDic = priceToArray(getPuts(getOptionPricesByTicker('ABBV')))
for key, values in opPriceDic.items():
trace = go.Scatter(
x = numberOfDays,
y = values,
name = 'option',
line = dict(
width = 4)
)
data.append(trace)
# Edit the layout
layout = dict(title = 'Call prices for ' ,
xaxis = dict(title = 'Days to Expiration'),
yaxis = dict(title = 'Price '),
)
fig = dict(data=data, layout=layout)
py.iplot(fig, filename='calls For ')
But once this is placed within a function the graph fails to load
def graph(ticker):
# Code for put by ticker
data = []
opPriceDic = priceToArray(getPuts(getOptionPricesByTicker(ticker)))
for key, values in opPriceDic.items():
trace = go.Scatter(
x = numberOfDays,
y = values,
name = 'option',
line = dict(
width = 4)
)
data.append(trace)
# Edit the layout
layout = dict(title = 'Call prices for ' ,
xaxis = dict(title = 'Days to Expiration'),
yaxis = dict(title = 'Price '),
)
fig = dict(data=data, layout=layout)
py.iplot(fig, filename='calls For ')
But if I change the iplot() to plot() it calls the plotly API and opens a new tab with the graph displaying.
I am just wondering if anyone has noticed this before and may have come across a solution?
(if I am in the wrong area I will remove the post)
I have tried to use pandas data.reader calls to pull ticker data between a start and end date. The data.reader seems to work from within the function. In the question code, if the opPriceDic dictionary could be converted to a dataframe, then iplot() could plot it without use of layout and fig as below:
# Import libraries
import datetime
from datetime import date
import pandas as pd
import numpy as np
from plotly import __version__
%matplotlib inline
import cufflinks as cf
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
init_notebook_mode(connected=True)
cf.go_offline()
# Create function that uses data.reader and iplot()
def graph(ticker):
# create sample data set
start = datetime.datetime(2006, 1, 1)
end = datetime.datetime(2016, 1, 1)
df = data.DataReader(ticker, 'morningstar', start, end)
df = df.reset_index()
df['numberOfDays'] = df.apply(lambda x: abs((datetime.datetime.now() - x['Date']).days), axis=1)
# call iplot within the function graph()
df.iplot(kind='line', x='numberOfDays', y='Close', xTitle='Days', yTitle='Value', title='Prices', width=4)

Resources