Need help in Pulling stock value of last business day of a month in a time series/dataframe
I am executing the fol code:
import pandas as pd
import datetime
import matplotlib
import warnings
warnings.filterwarnings('ignore')
start = datetime.datetime(2014, 3, 31)
end = datetime.datetime(2017, 9, 30)
stocks = ['AAPL', 'GOOG']
col = 'Adj Close'
df = web.get_data_yahoo(stocks,start,end)
data = df.ix[col]
dataframe = pd.DataFrame(data)
This gives me a dataframe with all the close values.
I want to get the values only from the last business day of the month
Please ignore the question, I've managed to find the answer
Related
I am new to Python and I have trouble getting data into one dataframe.
I have the following code.
from pandas_datareader import data as pdr
from datetime import date
from datetime import timedelta
import yfinance as yf
yf.pdr_override()
import pandas as pd
# tickers list
ticker_list = ['0P0001A532.CO','0P00018Q4V.CO','0P00017UBI.CO','0P00000YYT.CO','PFIBAA.CO','PFIBAB.CO','PFIBAC.CO','PFIDKA.CO','PFIGLA.CO','PFIMLO.CO','PFIKRB.CO','0P00019SMI.F','WEKAFKI.CO','0P0001CICW.CO','WEISTA.CO','WEISTS.CO','WEISA.CO','WEITISOP.CO']
today = date.today()
# We can get data by our choice by days bracket
if date.today().weekday()==0:
start_date = (today + timedelta((4 + today.weekday()) % 7)) - timedelta(days=7) # Friday. If it is monday we do not have a price since it is based on the previous day close.
else:
start_date = today - timedelta(days=1)
files=[]
allData = []
dafr_All = []
def getData(ticker):
print(ticker)
data = pdr.get_data_yahoo(ticker, start= start_date, end=(today + timedelta(days=2)))['Adj Close']
dataname = ticker+'_'+str(today)
files.append(dataname)
allData.append(data)
SaveData(data, dataname)
# Create a data folder in your current dir.
def SaveData(df, filename):
df.to_csv('./data/'+filename+'.csv')
#This loop will iterate over ticker list, will pass one ticker to get data, and save that data as file.
for tik in ticker_list:
getData(tik)
for i in range(0,11):
df1= pd.read_csv('./data/'+ str(files[i])+'.csv')
print (df1.head())
I get several csv files containing the adjusted close values (if there exists an adjusted close).
I want to save all the data to a dataframe where the first column consist of tickers, while the second column consist of adjusted close values. The dataframe then needs to be exported into a csv-file.
I have a Pandas df of Stock Tickers with specific dates, I want to add the adjusted close for that date using yahoo finance. I iterate through the dataframe, do the yahoo call for that Ticker and Date, and the correct information is returned. However, I am not able to add that information back to the original df. I have tried various loc, iloc, and join methods, and none of them are working for me. The df shows the initialized zero values instead of the new value.
import pandas as pd
import yfinance as yf
from datetime import timedelta
# Build the dataframe
df = pd.DataFrame({'Ticker':['BGFV','META','WIRE','UG'],
'Date':['5/18/2021','5/18/2021','4/12/2022','6/3/2019'],
})
# Change the Date to Datetime
df['Date'] = pd.to_datetime(df.Date)
# initialize the adjusted close
df['Adj_Close'] = 0.00 # You'll get a column of all 0s
# iterate through the rows of the df and retrieve the Adjusted Close from Yahoo
for i in range(len(df)):
ticker = df.iloc[i]['Ticker']
start = df.iloc[i]['Date']
end = start + timedelta(days=1)
# YF call
data = yf.download(ticker, start=start, end=end)
# Get just the adjusted close
adj_close = data['Adj Close']
# Write the acjusted close to the dataframe on the correct row
df.iloc[i]['Adj_Close'] = adj_close
print(f'i value is {i} and adjusted close value is {adj_close} \n')
print(df)
The simplest way to do is to use loc as below-
# change this line
df.loc[i,'Adj_Close'] = adj_close.values[0]
You can use:
def get_adj_close(x):
# You needn't specify end param because period is already set to 1 day
df = df = yf.download(x['Ticker'], start=x['Date'], progress=False)
return df['Adj Close'][0].squeeze()
df['Adj_Close'] = df.apply(get_adj_close, axis=1)
Output:
>>> df
Ticker Date Adj_Close
0 BGFV 2021-05-18 27.808811
1 META 2021-05-18 315.459991
2 WIRE 2022-04-12 104.320045
3 UG 2019-06-03 16.746983
What I am trying to figure out is how to add "Cases" and "Deaths" for each day, so that it starts with: "1/19/2020 Cases" and "1/19/2020 Deaths" then "1/20/2020 Cases" etc. It seems the append function does not work for this, and I don't know how else to add this. It doesn't seem like python has a way to do this task. My eventual goal is to make this a pandas dataframe.
import pandas as pd
dates = pd.date_range(start = '1/19/2020', end = '12/31/2021')
lst = dates.repeat(repeats = 2)
print(lst)
Thanks
If I am not mistaken, I don't think there's a way to do it with purely pandas. However with python and datetime, you can do so:
import pandas as pd
from datetime import timedelta, date
def daterange(start_date, end_date):
# Credit: https://stackoverflow.com/a/1060330/10640517
for n in range(int((end_date - start_date).days)):
yield start_date + timedelta(n)
dates = []
start_date = date(2020, 1, 19) # Start date here
end_date = date(2021, 12, 31) # End date here
for single_date in daterange(start_date, end_date):
dates.append(single_date.strftime("%m/%d/%Y") + " Cases")
dates.append(single_date.strftime("%m/%d/%Y") + " Deaths")
pdates = pd.DataFrame(dates)
print (pdates)
Is this what you want? If not, I can delete it.
The problem is the dataset has variable data rates per ID, I would like to filter out the IDs that do not have at least one data point per day.
I have a dataframe with IDs, dates, and data, in which I counted the daily sampling rate for each ID.
dfcounted = df.reset_index().groupby(['id', pd.Grouper(key='datetime', freq='D')]).count().reset_index()
Now, i have taken the first and last date of the dataframe, and created a dataframe of each day between the starting and ending dates:
# take dates
sdate = df['datetime'].min() # start date
edate = df['datetime'].max() # end date
# interval
delta = edate - sdate # as timedelta
# empty list
dates = []
# store each date in list
for i in range(delta.days + 1):
day = sdate + timedelta(days=i)
dates.append(day)
# convert to dataframe
dates = pd.DataFrame(data = dates, columns=["date"])
From here, I am lost on how to proceed. I have created a sample dataframe
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random
import string
letters = string.ascii_lowercase
ids = random.choices(letters,k=100)
date_today = datetime.now()
days = pd.date_range(date_today, date_today + timedelta(99), freq='D')
np.random.seed(seed=1111)
data = np.random.randint(1, high=100, size=len(days))
df = pd.DataFrame({'date': days,'ids': ids, 'data': data})
df = df.set_index('date')
With the sample df, i would expect to create a "results" df with only the ids that have data in each date.
After I call candlestick_ohlc, I can't seem to convert the x axis dates to something matplotlib can understand.
I'm a noob Python programmer. I've tried turning the dataframe into a list, I've tried passing dates to candlestick_ohlc, nothing seems to work other than changing
df['time'] = (df['time'].astype('float'))
into
df['time'] = (df['time'].astype('float')\1000)
Although that renders the wrong datetime.
import requests
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdate
import matplotlib.style as style
import matplotlib.ticker as mticker
from matplotlib.dates import date2num
from mpl_finance import candlestick_ohlc
import datetime as dt
import numpy as np
import matplotlib.ticker as mticker
def get_data(date):
""" Query the API for 2000 days historical price data starting from "date". """
url = "https://min-api.cryptocompare.com/data/histoday?fsym=BTC&tsym=USD&limit=2000&toTs={}".format(date)
r = requests.get(url)
ipdata = r.json()
return ipdata
def get_df(from_date, to_date):
""" Get historical price data between two dates. """
date = to_date
holder = []
# While the earliest date returned is later than the earliest date requested, keep on querying the API
# and adding the results to a list.
while date > from_date:
data = get_data(date)
holder.append(pd.DataFrame(data['Data']))
date = data['TimeFrom']
# Join together all of the API queries in the list.
df = pd.concat(holder, axis = 0)
# Remove data points from before from_date
df = df[df['time']>from_date]
# Convert to timestamp to readable date format
# df['time'] = pd.to_datetime(df['time'], unit='s')
# Make the DataFrame index the time
df.set_index('time', inplace=True)
# And sort it so its in time order
df.sort_index(ascending=False, inplace=True)
return df
df = get_df(1528502400, 1560112385)
style.use('dark_background')
fig = plt.figure()
ax1 = plt.subplot2grid((1,1), (0,0))
df = df.reset_index()
cols = ['time', 'open', 'high', 'low', 'close', 'volumefrom', 'volumeto']
df = df[cols]
#IF YOU /1000 AFER ('float') IT WILL RUN BUT NOT CORRECT DATE
df['time'] = (df['time'].astype('float'))
print(df.dtypes)
ohlc = df.values.tolist()
candlestick_ohlc(ax1, ohlc, width=.4, colorup='g', colordown='r')
# IF YOU COMMENT NEXT 4 LINES IT WILL RUN, but NO DATES for XAXIS
date_fmt = "%d-%m-%Y"
date_formatter = mdate.DateFormatter(date_fmt)
ax1.xaxis.set_major_formatter(date_formatter)
fig.autofmt_xdate()
ax1.set_ylabel('BTC Price (USD)')
ax1.set_xlabel('Date')
plt.show()
Expected result would be date labels plotted as d-m-y. :)
Wish this had dates for xaxis labels not seconds since 1970
This is what I want it to look like, but with accurate dates
This is how to fix the code:
df['time'] = df['time'].apply(mdates.epoch2num)
It was definitely one of those lines of code that you spend hours on... now I know.