deleting pandas dataframe rows not working - python-3.x

import numpy as np
import pandas as pd
randArr = np.random.randint(0,100,20).reshape(5,4)
df =pd.DataFrame(randArr,np.arange(101,106,1),['PDS', 'Algo','SE','INS'])
df.drop('103',inplace=True)
this code not working
Traceback (most recent call last):
File "D:\Education\4th year\1st sem\Machine Learning Lab\1st Lab\python\pandas\pdDataFrame.py", line 25, in <module>
df.drop('103',inplace=True)

The string '103' isnt in the index, but the integer 103 is:
Replace df.drop('103',inplace=True) with df.drop(103,inplace=True)

Related

Scraping values using Python

I want to scrape "Short interest" and "% of float shorted" values from https://www.marketwatch.com/investing/stock/aapl. I have tried using xpath, but it returns an Error.
import requests
from lxml import html
import pandas as pd
import numpy as np
url = "https://www.marketwatch.com/investing/stock/aapl"
page = requests.get(url)
tree = html.fromstring(page.content)
per_of_float_shorted = tree.xpath('/html/body/div[4]/div[5]/div[1]/div[1]/div/ul/li[14]/span[1]')[0].text()
short_interest = tree.xpath('/html/body/div[4]/div[5]/div[1]/div[1]/div/ul/li[14]/span[1]')[0].text()
print(short_interest)
print(per_of_float_shorted)```
Traceback (most recent call last):
File "scraper.py", line 11, in <module>
per_of_float_shorted = tree.xpath('/html/body/div[4]/div[5]/div[1]/div[1]/div/ul/li[14]/span[1]')[0].text()
IndexError: list index out of range
Thank you.

AMZN stock data retrieval with YahooDailyReader

Up until 30 minutes ago I was executing the following code without problems:
import pandas_datareader.data as web
import datetime
start = datetime.datetime(2012,1,1)
end = datetime.datetime(2015,12,31)
AAPL = web.get_data_yahoo('AAPL', start, end)
AMZN = web.get_data_yahoo('AMZN', start, end)
Instead now I get:
Traceback (most recent call last):
File "/Users/me/opt/anaconda3/lib/python3.7/site-packages/pandas_datareader/yahoo/daily.py", line 157, in _read_one_data
data = j["context"]["dispatcher"]["stores"]["HistoricalPriceStore"]
KeyError: 'HistoricalPriceStore'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/Users/me/opt/anaconda3/lib/python3.7/site-packages/pandas_datareader/data.py", line 82, in get_data_yahoo
return YahooDailyReader(*args, **kwargs).read()
File "/Users/me/opt/anaconda3/lib/python3.7/site-packages/pandas_datareader/base.py", line 251, in read
df = self._read_one_data(self.url, params=self._get_params(self.symbols))
File "/Users/me/opt/anaconda3/lib/python3.7/site-packages/pandas_datareader/yahoo/daily.py", line 160, in _read_one_data
raise RemoteDataError(msg.format(symbol, self.__class__.__name__))
pandas_datareader._utils.RemoteDataError: No data fetched for symbol AMZN using YahooDailyReader
How can I fix this?
Is there a work around to get the AMZN data as DataFrame from another source (different from Yahoo_Daily_Reader)?
Python version 3.4.7
How about this solution?
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.optimize as sco
import datetime as dt
import math
from datetime import datetime, timedelta
from pandas_datareader import data as wb
from sklearn.cluster import KMeans
np.random.seed(777)
start = '2019-4-30'
end = '2019-10-31'
# N = 90
# start = datetime.now() - timedelta(days=N)
# end = dt.datetime.today()
tickers = ['MMM',
'ABT',
'AAPL',
'AMAT',
'APTV',
'ADM',
'ARNC',
'AMZN']
thelen = len(tickers)
price_data = []
for ticker in tickers:
prices = wb.DataReader(ticker, start = start, end = end, data_source='yahoo')[['Adj Close']]
price_data.append(prices.assign(ticker=ticker)[['ticker', 'Adj Close']])
df = pd.concat(price_data)
df.dtypes
df.head()
df.shape
pd.set_option('display.max_columns', 500)
df = df.reset_index()
df = df.set_index('Date')
table = df.pivot(columns='ticker')
# By specifying col[1] in below list comprehension
# You can select the stock names under multi-level column
table.columns = [col[1] for col in table.columns]
table.head()
plt.figure(figsize=(14, 7))
for c in table.columns.values:
plt.plot(table.index, table[c], lw=3, alpha=0.8,label=c)
plt.legend(loc='upper left', fontsize=12)
plt.ylabel('price in $')

IndexError: list index out of range in Python 3.7

I have written some code below In Python 3.7(64 bit) & Anaconda 1.9.7.
import pandas as pd
import re
import glob
import os
data = data.join(data.pop('Serial Number')
.str.strip(',')
.str.split(',', expand=True)
.stack()
.reset_index(level=1, drop=True)
.rename('Serial Number')).reset_index(drop=True)
data['Serial Number']
After running this file i got below error message
Traceback (most recent call last):
File "servicereport.py", line 70, in <module>
.str.split(',', expand=True)
IndexError: list index out of range
I am not getting what exactly missing in this as this code is running on system on which it is created.
How can I fix this error?

Cannot from pandas import Dataframe

from pandas import Dataframe
ImportError Traceback (most recent call last)
in ()
----> 1 from pandas import Dataframe
ImportError: cannot import name 'Dataframe'
I understand there are workarounds but I need to do this for an assignment. I am using Jupiter Python ver 3.6.
Thsnks in Advance
from pandas import DataFrame
Notice capitalization

Colored Image unresponsive and gives an error

I am trying to read an image from the skimage.data. colored image appears but not responding . Also I am getting valurError. Here are my commands and errors.
>>> import skimage
>>> import numpy as np
>>> import scipy as sp
>>> import matplotlib.pyplot as plt
>>> from skimage import data
>>> cat=data.chelsea()
>>> plt.imshow(cat,interpolation='nearest');
<matplotlib.image.AxesImage object at 0x0788BDF0>
>>> plt.show(cat)
Traceback (most recent call last):
File "<pyshell#7>", line 1, in <module>
plt.show(cat)
File "C:\Python36-32\lib\site-packages\matplotlib\pyplot.py", line 253, in show
return _show(*args, **kw)
File "C:\Python36-32\lib\site-packages\matplotlib\backend_bases.py", line 166, in __call__
if block:
ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()
Can anyone help me to get through this error?

Resources