import pandas as pd
import numpy as np
from matplotlib.finance import candlestick_ohlc
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.ticker as mticker
import io
import datetime
import urllib
import urllib.request
%matplotlib notebook
urlToVisit = 'http://chartapi.finance.yahoo.com/instrument/1.0/GOOG/chartdata;
type=quote;range=1y/csv'
with urllib.request.urlopen(urlToVisit) as response:
sourcePage = response.read().decode('utf-8')
df = pd.read_csv(io.StringIO(sourcePage), skiprows=18, header=None, sep=",",
names=['date','closeP','highP','lowP','openP','volume'],
index_col= 0, parse_dates= True)
if 'volume' not in df:
df['volume'] = np.zeros(len(df))
DATA = df[['openP', 'highP', 'lowP', 'closeP','volume']].values
f1 = plt.subplot2grid((6,4), (1,0), rowspan=6, colspan=4, axisbg='#07000d')
candlestick_ohlc(f1, DATA, width=.6, colorup='#53c156', colordown='#ff1717')
f1.grid('on')
f1.xaxis.set_major_locator(mticker.MaxNLocator(15))
f1.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
plt.subplots_adjust(left=.09, bottom=.14, right=.94, top=.95, wspace=.20, hspace=0)
plt.xlabel('Date')
plt.ylabel('Stock Price')
plt.show()
So here's the problem, when I try to plot the 'candlestick_ohlc' but it only plots the volume bar chart! (Why is this happening?) I'm thinking that maybe the problem has to do with my dates? I'm using iPython Notebook btw. My source is from - Yahoo Finance. If you notice, I skipped the first 18 lines so that I can get straight to the actual data itself and it looks like:
20150302,569.7757,570.5834,557.2202,558.9953,2129600
20150303,572.0694,573.8146,564.9689,568.8881,1704700
20150304,571.8001,575.5299,566.4548,570.3043,1876800
20150305,573.7548,576.3277,571.8400,573.4456,1389600
20150306,566.1307,575.1011,565.2082,573.3060,1659100
20150309,567.2925,568.7086,561.9921,565.3079,1062100
date,close,high,low,open,volume
Any ideas? Would appreciate any help!!
So with the help of #DSM,
DATA = df[['openP', 'highP', 'lowP', 'closeP','volume']]
DATA = DATA.reset_index()
DATA["date"] = DATA["date"].apply(mdates.date2num)
f1 = plt.subplot2grid((6,4), (1,0), rowspan=6, colspan=4, axisbg='#07000d')
candlestick_ohlc(f1, DATA.values, width=.6, colorup='#53c156', colordown='#ff1717')
fixed the problem! Credits to him.
Related
I have been using the following code for a while to extract stock price from yahoo finance. This code is now generating an error saying it cannot read the url.
import pandas_datareader.data as web
stock = web.DataReader(i_allStock+'.L', 'yahoo', start, end)
Has anyone had this problem and found a solution?
Try it like this.
from math import sqrt
from sklearn.cluster import MiniBatchKMeans
import pandas_datareader as dr
from matplotlib import pyplot as plt
import pandas as pd
import matplotlib.cm as cm
import seaborn as sn
start = '2019-1-1'
end = '2020-1-1'
tickers = ['AXP','AAPL','BA','CAT','CSCO','CVX','XOM','GS','HD','IBM','INTC','JNJ','KO','JPM','MCD', 'MMM', 'MRK', 'MSFT', 'NKE','PFE','PG','TRV','UNH','RTX','VZ','V','WBA','WMT','DIS','DOW']
prices_list = []
for ticker in tickers:
try:
prices = dr.DataReader(ticker,'yahoo',start)['Adj Close']
prices = pd.DataFrame(prices)
prices.columns = [ticker]
prices_list.append(prices)
except:
pass
prices_df = pd.concat(prices_list,axis=1)
prices_df.sort_index(inplace=True)
prices_df.head()
You can put the whole bunch of items in a single list. Yahoo Finance will retrive all those at once
import yfinance as yf
etf = ['AXP','AAPL','BA','CAT','CSCO','CVX','XOM','GS','HD','IBM','INTC','JNJ','KO']
tit = yf.download(tickers=etf, period='max')
Hello I am working on assignment and I am having trouble. When I run the program it compiles and there are no errors. however, nothing is printed out on the terminal screen. I guess I expect a graph on the terminal screen but don't see anything
with open('myfile.csv') as csvfile:
data = pd.read_csv(csvfile, delimiter=',')
d = data.values
dd = pd.DataFrame(data=d)
dd.plot()
Any tips or suggestions is appreciated
import pandas as pd
import matplotlib.pyplot as plt
with open('myfile.csv') as csvfile:
data = pd.read_csv(csvfile, delimiter=',')
d = data.values
dd = pd.DataFrame(data=d)
dd.plot()
plt.show()
use this
The graph is plotting with your current code as is,
Please check the imports once again. I'm using the following imports:
import pandas as pd
pd.plotting.register_matplotlib_converters()
import matplotlib.pyplot as plt
%matplotlib inline
I have a dataframe with closing stock prices:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn; seaborn.set()
from pandas_datareader import data
import pandas_datareader.data as web
from pandas.tseries.offsets import BDay
f = web.DataReader('^DJI', 'stooq')
CLOSE = f['Close']
CLOSE.plot(alpha= 0.5,style='-')
CLOSE.resample('BA').mean().plot(style=':')
CLOSE.asfreq(freq='BA').plot(style='--')
plt.legend(['input','resample','asfreq'],loc='upper left')
With resample() I get the average of the previous year. This works.
With asfreq() I try to get the closing value at the end of the year. This doesn't work.
I get the following error in the asfreq() line: TypeError: no numeric data to plot
f.info() displays that close is a non-null float64 type.
What could be wrong?
The indices were not hierachically sorted:
f= f.sort_index(axis=0) solved it.
I am trying to scale my data using Python 3
But I keep getting this error: I am out of ideas as to what could be the issue? Please can you assist me guys? I would deeply appreciate your help!
import pandas as pd
import numpy as np
from numpy.random import randn
from pandas import Series, DataFrame
from pandas.plotting import scatter_matrix
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib import rcParams
from pylab import rcParams
import seaborn as sb
import scipy
from scipy import stats
from scipy.stats import pearsonr
from scipy.stats import spearmanr
from scipy.stats import chi2_contingency
import sklearn
from sklearn import preprocessing
from sklearn.preprocessing import scale
mtcars = pd.read_csv('mtcars.csv')
mtcars.columns = ['Car
names','mpg','cyl','disp','hp','drat','wt','qsec','vs','am','gear','carb']
mpg = mtcars['mpg']
#Scale your data
mpg_matrix = mpg.reshape(-1,1)
scaled = preprocessing.MinMaxScaler()
scaled_mpg = scaled.fit_transform(mpg_matrix)
plt.plot(scaled_mpg)
plt.show()
mpg_matrix = mpg.numpy.reshape(-1,1)
tr__
File "C:\Anaconda\lib\site-packages\pandas\core\generic.py", line 5067, in __getattr__
return object.__getattribute__(self, name)
AttributeError: 'Series' object has no attribute 'numpy'
pandas.core.series.Series doesn't have reshape.
Perhaps:
mpg_matrix = mpg.values.reshape(-1,1)
i.e. get the underlying numpy array and reshape that.
I have a dataset with 1915 rows, that has an entry date col and a qty col contained in it. i.e.
10/22/2018 qty:1
10/22/2018 qty:3
11/22/2017 qty:1
Is it possible to edit the code below to multiply the count of dates by the qty associated with it? I've been fiddling around with where to put the multiplier but am stumped. This is the code I have running so far (without multiplier).
import pandas as pd
import matplotlib.pyplot as plt; plt.rcdefaults()
import matplotlib.dates as mdates
import numpy as np
import matplotlib.pyplot as plt
quotes = pd.read_csv("PO25474.csv")
quotes["ENTRY_DATE"] = quotes["ENTRY_DATE"].astype("datetime64")
plt.figure(figsize=(20, 10))
ax = (quotes["ENTRY_DATE"].groupby([quotes["ENTRY_DATE"].dt.year,\
quotes["ENTRY_DATE"].dt.month]).count().plot(kind="bar"))
ax.set_xlabel("quotes by month")
ax.set_ylabel("count")
ax.set_title("title")
plt.show()
I solved it. Had to put in QTY after the groupby and swap out count with sum.
import pandas as pd
import matplotlib.pyplot as plt; plt.rcdefaults()
import matplotlib.dates as mdates
import numpy as np
quotes = pd.read_csv("PO25474.csv")
qty = quotes[["ENTRY_DATE" , "QTY"]]
quotes["ENTRY_DATE"] = quotes["ENTRY_DATE"].astype("datetime64")
plt.figure(figsize=(12, 5))
ax = (quotes.groupby([quotes["ENTRY_DATE"].dt.year, quotes["ENTRY_DATE"].dt.month])
['QTY'].sum().plot(kind="bar"))
ax.set_xlabel("quotes by month")
ax.set_ylabel("count")
ax.set_title("PO25474")
plt.show()