AMZN stock data retrieval with YahooDailyReader - python-3.x

Up until 30 minutes ago I was executing the following code without problems:
import pandas_datareader.data as web
import datetime
start = datetime.datetime(2012,1,1)
end = datetime.datetime(2015,12,31)
AAPL = web.get_data_yahoo('AAPL', start, end)
AMZN = web.get_data_yahoo('AMZN', start, end)
Instead now I get:
Traceback (most recent call last):
File "/Users/me/opt/anaconda3/lib/python3.7/site-packages/pandas_datareader/yahoo/daily.py", line 157, in _read_one_data
data = j["context"]["dispatcher"]["stores"]["HistoricalPriceStore"]
KeyError: 'HistoricalPriceStore'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/Users/me/opt/anaconda3/lib/python3.7/site-packages/pandas_datareader/data.py", line 82, in get_data_yahoo
return YahooDailyReader(*args, **kwargs).read()
File "/Users/me/opt/anaconda3/lib/python3.7/site-packages/pandas_datareader/base.py", line 251, in read
df = self._read_one_data(self.url, params=self._get_params(self.symbols))
File "/Users/me/opt/anaconda3/lib/python3.7/site-packages/pandas_datareader/yahoo/daily.py", line 160, in _read_one_data
raise RemoteDataError(msg.format(symbol, self.__class__.__name__))
pandas_datareader._utils.RemoteDataError: No data fetched for symbol AMZN using YahooDailyReader
How can I fix this?
Is there a work around to get the AMZN data as DataFrame from another source (different from Yahoo_Daily_Reader)?
Python version 3.4.7

How about this solution?
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.optimize as sco
import datetime as dt
import math
from datetime import datetime, timedelta
from pandas_datareader import data as wb
from sklearn.cluster import KMeans
np.random.seed(777)
start = '2019-4-30'
end = '2019-10-31'
# N = 90
# start = datetime.now() - timedelta(days=N)
# end = dt.datetime.today()
tickers = ['MMM',
'ABT',
'AAPL',
'AMAT',
'APTV',
'ADM',
'ARNC',
'AMZN']
thelen = len(tickers)
price_data = []
for ticker in tickers:
prices = wb.DataReader(ticker, start = start, end = end, data_source='yahoo')[['Adj Close']]
price_data.append(prices.assign(ticker=ticker)[['ticker', 'Adj Close']])
df = pd.concat(price_data)
df.dtypes
df.head()
df.shape
pd.set_option('display.max_columns', 500)
df = df.reset_index()
df = df.set_index('Date')
table = df.pivot(columns='ticker')
# By specifying col[1] in below list comprehension
# You can select the stock names under multi-level column
table.columns = [col[1] for col in table.columns]
table.head()
plt.figure(figsize=(14, 7))
for c in table.columns.values:
plt.plot(table.index, table[c], lw=3, alpha=0.8,label=c)
plt.legend(loc='upper left', fontsize=12)
plt.ylabel('price in $')

Related

deleting pandas dataframe rows not working

import numpy as np
import pandas as pd
randArr = np.random.randint(0,100,20).reshape(5,4)
df =pd.DataFrame(randArr,np.arange(101,106,1),['PDS', 'Algo','SE','INS'])
df.drop('103',inplace=True)
this code not working
Traceback (most recent call last):
File "D:\Education\4th year\1st sem\Machine Learning Lab\1st Lab\python\pandas\pdDataFrame.py", line 25, in <module>
df.drop('103',inplace=True)
The string '103' isnt in the index, but the integer 103 is:
Replace df.drop('103',inplace=True) with df.drop(103,inplace=True)

Scraping values using Python

I want to scrape "Short interest" and "% of float shorted" values from https://www.marketwatch.com/investing/stock/aapl. I have tried using xpath, but it returns an Error.
import requests
from lxml import html
import pandas as pd
import numpy as np
url = "https://www.marketwatch.com/investing/stock/aapl"
page = requests.get(url)
tree = html.fromstring(page.content)
per_of_float_shorted = tree.xpath('/html/body/div[4]/div[5]/div[1]/div[1]/div/ul/li[14]/span[1]')[0].text()
short_interest = tree.xpath('/html/body/div[4]/div[5]/div[1]/div[1]/div/ul/li[14]/span[1]')[0].text()
print(short_interest)
print(per_of_float_shorted)```
Traceback (most recent call last):
File "scraper.py", line 11, in <module>
per_of_float_shorted = tree.xpath('/html/body/div[4]/div[5]/div[1]/div[1]/div/ul/li[14]/span[1]')[0].text()
IndexError: list index out of range
Thank you.

Yahoo Finance ValueError: zero-size array to reduction operation maximum which has no identity

I have been trying to pull data from Yahoo Finance and I keep getting this strange error.
So I run this code :
#Importing Modules/Libraries
import pandas as pd
pd.core.common.is_list_like = pd.api.types.is_list_like
from pandas_datareader import data, wb
import fix_yahoo_finance as yf
yf.pdr_override()
import numpy as np
import datetime
import seaborn as sns
import matplotlib.pyplot as plt
And I get a feedback from my console with this error :
#Importing Modules/Libraries
import pandas as pd
pd.core.common.is_list_like = pd.api.types.is_list_like
from pandas_datareader import data, wb
import fix_yahoo_finance as yf
yf.pdr_override()
import numpy as np
import datetime
import seaborn as sns
import matplotlib.pyplot as plt
#Importing Historical data from yahoo finance
tickers =
['XSLV','SMLV','XMLV','USMV','LGLV','SPLV','PRFZ','PXSC','FNDB','PXMC','PRF','QQ
EW','RSP','EQWL','EQAL','EWMC','EWSC',
'DWAS','MMTM','PDP','DWAQ','QUAL','SPHQ','^PHB','ACWV','IDLV','EELV','PDN','PXH'
,'QWLD','IQLT','IQDF','IDMO','EEMO',
'PIZ','PIE']
indices = pd.DataFrame()
for t in tickers:indices[t]=data.get_data_yahoo(t, data_source='yahoo',start='2016-1-1')['Adj Close']
Output:
[ 0% ]
[*********************100%***********************] 1 of 1 downloaded
[ 0% ]
[*********************100%***********************] 1 of 1 downloaded
[ 0% ]
[*********************100%***********************] 1 of 1 downloaded
[ 0% ]
[*********************100%***********************] 1 of 1 downloaded
[ 0% ]
[*********************100%***********************] 1 of 1 downloaded
[ 0% ]
[*********************100%***********************] 1 of 1 downloaded
[ 0% ]
[*********************100%***********************] 1 of 1 downloaded
[ 0% ]
[*********************100%***********************] 1 of 1 downloaded
Traceback (most recent call last):
File "<input>", line 18, in <module>
File "C:\Users\TIM\PycharmProjects\BILLIONAIRE'S CLUB\venv\lib\site-
packages\fix_yahoo_finance\__init__.py", line 202, in download
'Close', 'Adj Close', 'Volume']]
File "C:\Users\TIM\PycharmProjects\BILLIONAIRE'S CLUB\venv\lib\site-
packages\pandas\core\frame.py", line 2682, in __getitem__
return self._getitem_array(key)
File "C:\Users\TIM\PycharmProjects\BILLIONAIRE'S CLUB\venv\lib\site-
packages\pandas\core\frame.py", line 2726, in _getitem_array
indexer = self.loc._convert_to_indexer(key, axis=1)
File "C:\Users\TIM\PycharmProjects\BILLIONAIRE'S CLUB\venv\lib\site-
packages\pandas\core\indexing.py", line 1308, in _convert_to_indexer
obj, kind=self.name)
File "C:\Users\TIM\PycharmProjects\BILLIONAIRE'S CLUB\venv\lib\site-
packages\pandas\core\indexes\multi.py", line 1968, in _convert_listlike_indexer
_, indexer = self.reindex(keyarr, level=level)
File "C:\Users\TIM\PycharmProjects\BILLIONAIRE'S CLUB\venv\lib\site-
packages\pandas\core\indexes\multi.py", line 2057, in reindex
keep_order=False)
File "C:\Users\TIM\PycharmProjects\BILLIONAIRE'S CLUB\venv\lib\site-
packages\pandas\core\indexes\base.py", line 3969, in _join_level
ngroups = 1 + new_lev_labels.max()
File "C:\Users\TIM\PycharmProjects\BILLIONAIRE'S CLUB\venv\lib\site-
packages\numpy\core\_methods.py", line 26, in _amax
return umr_maximum(a, axis, None, out, keepdims)
ValueError: zero-size array to reduction operation maximum which has no identity.
This is a rate limit set on Yahoo. You can do try and pass to avoid value error.
Something like this :
for ticker in tickers1:
try:
df = yahoo.download(self.TICK_SYMBOLS, start = self.START, end = datetime.now())
df.reset_index(inplace = True)
df.set_index("Date", inplace = True)
except ValueError:
pass
You need to specify end_date. Also date has to be in 'YYYY-MM-DD' format.
import fix_yahoo_finance as yahoo
spx_df = yahoo.download("^GSPC", "2015-01-01", "2019-04-28')
spx_df.Close.plot()

"NameError: name 'datetime' is not defined" with datetime imported

I know there are a lot of datetime not defined posts but they all seem to forget the obvious import of datetime. I can't figure out why I'm getting this error. When I do each step in iPython it works well, but the method dosen't
import requests
import datetime
def daily_price_historical(symbol, comparison_symbol, limit=1, aggregate=1, exchange='', allData='true'):
url = 'https://min-api.cryptocompare.com/data/histoday?fsym={}&tsym={}&limit={}&aggregate={}&allData={}'\
.format(symbol.upper(), comparison_symbol.upper(), limit, aggregate, allData)
if exchange:
url += '&e={}'.format(exchange)
page = requests.get(url)
data = page.json()['Data']
df = pd.DataFrame(data)
df['timestamp'] = [datetime.datetime.fromtimestamp(d) for d in df.time]
datetime.datetime.fromtimestamp()
return df
This code produces this error:
Traceback (most recent call last):
File "C:\Users\20115619\AppData\Local\Continuum\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2963, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-29-4f015e05113f>", line 1, in <module>
rv.get_prices(30, 'ETH')
File "C:\Users\20115619\Desktop\projects\testDash\Revas.py", line 161, in get_prices
for symbol in symbols:
File "C:\Users\20115619\Desktop\projects\testDash\Revas.py", line 50, in daily_price_historical
df = pd.DataFrame(data)
File "C:\Users\20115619\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\generic.py", line 4372, in __getattr__
return object.__getattribute__(self, name)
AttributeError: 'DataFrame' object has no attribute 'time'
df['timestamp'] = [datetime.datetime.fromtimestamp(d) for d in df.time]
I think that line is the problem.
Your Dataframe df at the end of the line doesn't have the attribute .time
For what it's worth I'm on Python 3.6.0 and this runs perfectly for me:
import requests
import datetime
import pandas as pd
def daily_price_historical(symbol, comparison_symbol, limit=1, aggregate=1, exchange='', allData='true'):
url = 'https://min-api.cryptocompare.com/data/histoday?fsym={}&tsym={}&limit={}&aggregate={}&allData={}'\
.format(symbol.upper(), comparison_symbol.upper(), limit, aggregate, allData)
if exchange:
url += '&e={}'.format(exchange)
page = requests.get(url)
data = page.json()['Data']
df = pd.DataFrame(data)
df['timestamp'] = [datetime.datetime.fromtimestamp(d) for d in df.time]
#I don't have the following function, but it's not needed to run this
#datetime.datetime.fromtimestamp()
return df
df = daily_price_historical('BTC', 'ETH')
print(df)
Note, I commented out the line that calls an external function that I do not have. Perhaps you have a global variable causing a problem?
Update as per the comments:
I'd use join instead to make the URL:
url = "".join(["https://min-api.cryptocompare.com/data/histoday?fsym=", str(symbol.upper()), "&tsym=", str(comparison_symbol.upper()), "&limit=", str(limit), "&aggregate=", str(aggregate), "&allData=", str(allData)])

Import error in python 3.4.4

# Load the data
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import sklearn
oecd = pd.read_csv("oecd.csv", thousands=',')
gd_per_capita = pd.read_csv("gdp_per_capita.csv", thousands=',',delimiter='\t', encoding='latin1', na_values="n/a")
#Prepare the data
country_stats = prepare_country_stats(oecd, gdp_per_capita)
x = np.c_[country_stats["GDP per capita"]]
y = np.c[country_stats["Life satisfaction"]]
#Visualise the data
country_stats.plot(kind='scatter', x="GDP per capita", y='Life satisfaction')
plt.show()
# Select a linear model
model = sklearn.linear_model.LinearRegression()
# Train the code
model.fit(x,y)
# Make a prediction for Cyprus
x_new = [[22587]] # Cyprus GDP per capita
print(model.predict(x_new))
Whenever I try to run this code in Python 3.4.4 this throws up:
Traceback (most recent call last):
File "C:\Users\Ranjan.Ranjan-PC\Desktop\Hands-On\gdp.py", line 6, in <module>
import sklearn
File "C:\Python34\lib\site-packages\sklearn\__init__.py", line 134, in <module>
from .base import clone
File "C:\Python34\lib\site-packages\sklearn\base.py", line 10, in <module>
from scipy import sparse
File "C:\Python34\lib\site-packages\scipy\sparse\__init__.py", line 213, in <module>
from .csr import *
File "C:\Python34\lib\site-packages\scipy\sparse\csr.py", line 13, in <module>
from ._sparsetools import csr_tocsc, csr_tobsr, csr_count_blocks, \
ImportError: DLL load failed: %1 is not a valid Win32 application.
sklearn has been installed though
What is wrong?

Resources