Can't convert 'bytes' object to str implicitly - python-3.x

In [458]: type(obj_xml)
Out[458]: builtins.bytes
In [459]: with codecs.open( xmlOutFile, "+ab", "utf-8" ) as f:
.....: f.write(obj_xml)
.....:
error i am hitting
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-459-61a3d9d572a6> in <module>()
1 with codecs.open( xmlOutFile, "+ab", "utf-8" ) as f:
----> 2 f.write(obj_xml)
3
C:\Python3\lib\codecs.py in write(self, data)
698 def write(self, data):
699
--> 700 return self.writer.write(data)
701
702 def writelines(self, list):
C:\Python3\lib\codecs.py in write(self, object)
354 """ Writes the object's contents encoded to self.stream.
355 """
--> 356 data, consumed = self.encode(object, self.errors)
357 self.stream.write(data)
358
TypeError: Can't convert 'bytes' object to str implicitly
How do i go about writing the contents of obj_xml to the file ?

codecs.open takes a Unicode string and encodes it to bytes when writing. You already have a bytes object, so just open file file in binary mode and write the object:
with open(xmlOutFile,'+ab') as f:
f.write(obj_xml)

Related

TypeError: string indices must be integers in the time of downloading stock data

Previously, this same code was running perfectly. However, I encountered this error recently "TypeError: string indices must be integers".
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import pandas_datareader.data as web
import datetime
start=datetime.datetime(2015,6,1)
end=datetime.datetime(2022,6,30)
sbin=web.DataReader('SBIN.BO','yahoo',start,end)
tatamotors=web.DataReader('TATAMOTORS.BO','yahoo',start,end)
reliance=web.DataReader('RELIANCE.BO','yahoo',start,end)
I have tried this code by considering other stock aslo. But same result obtained. After running the above code, the error occured as follows:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Input In [3], in <cell line: 1>()
----> 1 sbin=web.DataReader('SBIN.BO','yahoo',start,end)
2 tatamotors=web.DataReader('TATAMOTORS.BO','yahoo',start,end)
3 reliance=web.DataReader('RELIANCE.BO','yahoo',start,end)
File C:\ProgramData\Anaconda3\lib\site-packages\pandas\util\_decorators.py:207, in deprecate_kwarg.<locals>._deprecate_kwarg.<locals>.wrapper(*args, **kwargs)
205 else:
206 kwargs[new_arg_name] = new_arg_value
--> 207 return func(*args, **kwargs)
File C:\ProgramData\Anaconda3\lib\site-packages\pandas_datareader\data.py:370, in DataReader(name, data_source, start, end, retry_count, pause, session, api_key)
367 raise NotImplementedError(msg)
369 if data_source == "yahoo":
--> 370 return YahooDailyReader(
371 symbols=name,
372 start=start,
373 end=end,
374 adjust_price=False,
375 chunksize=25,
376 retry_count=retry_count,
377 pause=pause,
378 session=session,
379 ).read()
381 elif data_source == "iex":
382 return IEXDailyReader(
383 symbols=name,
384 start=start,
(...)
390 session=session,
391 ).read()
File C:\ProgramData\Anaconda3\lib\site-packages\pandas_datareader\base.py:253, in _DailyBaseReader.read(self)
251 # If a single symbol, (e.g., 'GOOG')
252 if isinstance(self.symbols, (string_types, int)):
--> 253 df = self._read_one_data(self.url, params=self._get_params(self.symbols))
254 # Or multiple symbols, (e.g., ['GOOG', 'AAPL', 'MSFT'])
255 elif isinstance(self.symbols, DataFrame):
File C:\ProgramData\Anaconda3\lib\site-packages\pandas_datareader\yahoo\daily.py:153, in YahooDailyReader._read_one_data(self, url, params)
151 try:
152 j = json.loads(re.search(ptrn, resp.text, re.DOTALL).group(1))
--> 153 data = j["context"]["dispatcher"]["stores"]["HistoricalPriceStore"]
154 except KeyError:
155 msg = "No data fetched for symbol {} using {}"
TypeError: string indices must be integers.
Please help me in solving this issue.
There is a long-standing gh-issue that discusses your problem. As the corresponding PR hasn't been merged as of today, I would recommend to use the yfinance override instead:
import datetime
import pandas_datareader.data as web
import yfinance as yf
yf.pdr_override()
start=datetime.datetime(2015, 6, 1)
end=datetime.datetime(2022, 6, 30)
sbin = web.DataReader('SBIN.BO', start, end)
tatamotors = web.DataReader('TATAMOTORS.BO', start, end)
reliance = web.DataReader('RELIANCE.BO', start, end)
Output for SBIN:
Open High Low Close Adj Close Volume
Date
2015-06-01 00:00:00+05:30 279.000000 281.950012 277.600006 278.149994 265.453278 1331528
2015-06-02 00:00:00+05:30 278.500000 279.500000 265.500000 266.250000 254.096466 3382530
2015-06-03 00:00:00+05:30 267.149994 268.000000 255.100006 257.549988 245.793579 2706069

Writing CSV file into dataframe from FTPS server with python

I am trying to get a csv file out of an ftps server. I am receiving this info, though:
file = r'filename.csv'
with ftplib.FTP() as ftp:
with open(file, 'rb') as f:
ftp.retrbinary(file, f.read)
df1= pd.read_csv(file)
df1.head()
with this particular error:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-10-a2725f958d45> in <module>
4
5 with open(file, 'rb') as f:
----> 6 ftp.retrbinary(file, f.read)
7 df1= pd.read_csv(file) #delimiter = '|', encoding = 'latin1')
8 df1.head()
~\AppData\Local\Continuum\anaconda3\lib\ftplib.py in retrbinary(self, cmd, callback, blocksize, rest)
439 The response code.
440 """
--> 441 self.voidcmd('TYPE I')
442 with self.transfercmd(cmd, rest) as conn:
443 while 1:
~\AppData\Local\Continuum\anaconda3\lib\ftplib.py in voidcmd(self, cmd)
275 def voidcmd(self, cmd):
276 """Send a command and expect a response beginning with '2'."""
--> 277 self.putcmd(cmd)
278 return self.voidresp()
279
~\AppData\Local\Continuum\anaconda3\lib\ftplib.py in putcmd(self, line)
197 def putcmd(self, line):
198 if self.debugging: print('*cmd*', self.sanitize(line))
--> 199 self.putline(line)
200
201 # Internal: return one line from the server, stripping CRLF.
~\AppData\Local\Continuum\anaconda3\lib\ftplib.py in putline(self, line)
192 if self.debugging > 1:
193 print('*put*', self.sanitize(line))
--> 194 self.sock.sendall(line.encode(self.encoding))
195
196 # Internal: send one command to the server (through putline())
AttributeError: 'NoneType' object has no attribute 'sendall'
Any ideas as to why this isn't putting the requested file into a dataframe?
the documentation says that the cmd argument of retrbinary method should be an appropriate RETR command: RETR filename and the callback function is called for each block of data received.
if you need to get data, write to file and read file try: ftp.retrbinary(f'RETR {file}', f.write)
Method Name:
retrbinary
retrbinary(cmd, callback, blocksize=8192, rest=None)
callback:For each block of the data received from the FTP server the callback function is called. This callback function can be used for processing the data received. For example, the callback can be used for writing the received blocks into a file
for example:
you can use this:
fhandle = open(filename, 'wb')
ftp.retrbinary('RETR ' + filename, fhandle.write)
or
ftp.retrbinary('RETR %s' % FILE, open(FILE, 'wb').write)

Need help passing date to pandas query

How do I pass the output of this prompt to a pandas search by date in excel?
import pandas as pd
TestedDateBegin = pd.to_datetime(input('Input date in mm-dd-yyyy format: '))
For example, if I input 2019-09-08 into above input prompt and run TestedDateBegin I get this output:
Timestamp('2019-09-08 00:00:00')
This search with the date hard coded works fine.
data = df.loc[df['emr_first_access_date'] >= '2019-09-08', ['site_name','subs_num','emr_id', ```'emr_first_access_date']]
But how do I pass the date inputted from the prompt so the user can search by any date?
This doesnt work:
data = df.loc[df['emr_first_access_date'] >= 'TestedDateBegin', ['site_name','subs_num','emr_id',
and throws a exception:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
pandas/_libs/tslibs/conversion.pyx in pandas._libs.tslibs.conversion.convert_str_to_tsobject()
pandas/_libs/tslibs/np_datetime.pyx in pandas._libs.tslibs.np_datetime._string_to_dts()
ValueError: Error parsing datetime string "TestedDateBegin" at position 0
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
pandas/_libs/tslibs/conversion.pyx in pandas._libs.tslibs.conversion.convert_str_to_tsobject()
pandas/_libs/tslibs/parsing.pyx in pandas._libs.tslibs.parsing.parse_datetime_string()
~\AppData\Local\Continuum\anaconda3\lib\site-packages\dateutil\parser\_parser.py in parse(timestr, parserinfo, **kwargs)
1357 else:
-> 1358 return DEFAULTPARSER.parse(timestr, **kwargs)
1359
~\AppData\Local\Continuum\anaconda3\lib\site-packages\dateutil\parser\_parser.py in parse(self, timestr, default, ignoretz, tzinfos, **kwargs)
648 if res is None:
--> 649 raise ValueError("Unknown string format:", timestr)
650
ValueError: ('Unknown string format:', 'TestedDateBegin')
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\arrays\datetimes.py in wrapper(self, other)
144 try:
--> 145 other = _to_M8(other, tz=self.tz)
146 except ValueError:
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\arrays\datetimes.py in _to_M8(key, tz)
77 # this also converts strings
---> 78 key = Timestamp(key)
79 if key.tzinfo is not None and tz is not None:
pandas/_libs/tslibs/timestamps.pyx in pandas._libs.tslibs.timestamps.Timestamp.__new__()
pandas/_libs/tslibs/conversion.pyx in pandas._libs.tslibs.conversion.convert_to_tsobject()
pandas/_libs/tslibs/conversion.pyx in pandas._libs.tslibs.conversion.convert_str_to_tsobject()
ValueError: could not convert string to Timestamp
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
<ipython-input-2-702fd23c14bb> in <module>
----> 1 data = df.loc[df['emr_first_access_date'] >= 'TestedDateBegin', ['site_name','subs_num','emr_id', 'emr_first_access_date']]
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\ops.py in wrapper(self, other, axis)
1714
1715 res_values = dispatch_to_index_op(op, self, other,
-> 1716 pd.DatetimeIndex)
1717
1718 return self._constructor(res_values, index=self.index,
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\ops.py in dispatch_to_index_op(op, left, right, index_class)
1189 left_idx = left_idx._shallow_copy(freq=None)
1190 try:
-> 1191 result = op(left_idx, right)
1192 except NullFrequencyError:
1193 # DatetimeIndex and TimedeltaIndex with freq == None raise ValueError
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\indexes\datetimelike.py in wrapper(self, other)
115 other = other._values
116
--> 117 result = op(self._data, maybe_unwrap_index(other))
118 return result
119
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\arrays\datetimes.py in wrapper(self, other)
146 except ValueError:
147 # string that cannot be parsed to Timestamp
--> 148 return ops.invalid_comparison(self, other, op)
149
150 result = op(self.asi8, other.view('i8'))
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\ops.py in invalid_comparison(left, right, op)
1056 else:
1057 raise TypeError("Invalid comparison between dtype={dtype} and {typ}"
-> 1058 .format(dtype=left.dtype, typ=type(right).__name__))
1059 return res_values
1060
TypeError: Invalid comparison between dtype=datetime64[ns] and str
The error
TypeError: Invalid comparison between dtype=datetime64[ns] and str
tells that you try to compare datetime with string. To do it convert your string to datetime manually. In your case try:
from datetime import datetime
date = '2019-09-08'
date = datetime.strptime(date, '%Y-%m-%d')
To learn more information about date formatting see documentation

Tweet-Cleaning by removing b' and ASCII - can't find the problem

I am currently preprocessing tweets, extracted via Twitter API and saved as csv. Within the csv there are some characters like "b'" at the beginning of the tweet and code like aren\xe2\x80\x99t, which stands for "'". Now I want to remove these chars but don't know how although I have tried it a couple of times. Can anyone help me? I read the file with pandas and Python3. The column is called "text"
What I mean is the following:
b'RT #username: some text some text C\xe2\x80\xa6' OR
"b'RT #username: some text some text .A\xe2\x80\xa6'
Input 1:
df = pd.read_csv('Data/test.csv', encoding= 'utf8')
df['text'] = df['text'].str.replace('b[\s]+', ' ')
df['text'] = df['text'].str.replace('[^\x00-\x7F]+',' ')
df['text'] = df['text'].str.replace('[^\u0000-\uD7FF\uE000-\uFFFF]',' ')
Output 1: Nothing happens.
With the next snippet I tried to apply the UTF-8 encoding. As I am write this need sometimes to be done for further processing.
Input 2:
df = pd.read_csv('Data/Result_w8_Pfizer_en_test.csv', encoding= 'utf8')
df.apply(lambda x: pd.lib.infer_dtype(x.values))
Output 2:
AttributeError Traceback (most recent call last)
<ipython-input-50-4c6bdb11d736> in <module>
25
26 df = pd.read_csv('Data/test.csv', encoding= 'utf8') # dtype=string
---> 27 df.apply(lambda x: pd.lib.infer_dtype(x.values))
28
29
~/conda/lib/python3.6/site-packages/pandas/core/frame.py in apply(self, func, axis, broadcast, raw, reduce, result_type, args, **kwds)
6485 args=args,
6486 kwds=kwds)
-> 6487 return op.get_result()
6488
6489 def applymap(self, func):
~/conda/lib/python3.6/site-packages/pandas/core/apply.py in get_result(self)
149 return self.apply_raw()
150
--> 151 return self.apply_standard()
152
153 def apply_empty_result(self):
~/conda/lib/python3.6/site-packages/pandas/core/apply.py in apply_standard(self)
255
256 # compute the result using the series generator
--> 257 self.apply_series_generator()
258
259 # wrap results
~/conda/lib/python3.6/site-packages/pandas/core/apply.py in apply_series_generator(self)
284 try:
285 for i, v in enumerate(series_gen):
--> 286 results[i] = self.f(v)
287 keys.append(v.name)
288 except Exception as e:
<ipython-input-50-4c6bdb11d736> in <lambda>(x)
25
26 df = pd.read_csv('Data/test.csv', encoding= 'utf8')
---> 27 df.apply(lambda x: pd.lib.infer_dtype(x.values))
28
29
AttributeError: ("module 'pandas' has no attribute 'lib'", 'occurred at index date')
Here I did some research but couldn't find out the issue or how to solve it.

AttributeError: Can only use .dt accessor with datetimelike values in 0yrs 0mon format

I am trying converting date string format to numeric, but I get some error,
my date column like this :
train['AVERAGE_ACCT_AGE'].head(6)
0 0yrs 0mon
1 1yrs 11mon
2 0yrs 0mon
3 0yrs 8mon
4 0yrs 0mon
5 1yrs 9mon
Name: AVERAGE_ACCT_AGE, dtype: object
I tried this code to add DateTime format to that variable.
train['AVERAGE_ACCT_AGE']=pd.to_datetime(train['AVERAGE.ACCT.AGE'], format='%Y%m')
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
~\Anaconda3\lib\site-packages\pandas\core\tools\datetimes.py in _convert_listlike(arg, box, format, name, tz)
376 try:
--> 377 values, tz = conversion.datetime_to_datetime64(arg)
378 return DatetimeIndex._simple_new(values, name=name, tz=tz)
pandas\_libs\tslibs\conversion.pyx in pandas._libs.tslibs.conversion.datetime_to_datetime64()
TypeError: Unrecognized value type: <class 'str'>
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
<ipython-input-49-13f5c298f460> in <module>()
----> 1 train['AVERAGE_ACCT_AGE']=pd.to_datetime(train['AVERAGE.ACCT.AGE'], format='%Y-%m')
~\Anaconda3\lib\site-packages\pandas\core\tools\datetimes.py in to_datetime(arg, errors, dayfirst, yearfirst, utc, box, format, exact, unit, infer_datetime_format, origin, cache)
449 else:
450 from pandas import Series
--> 451 values = _convert_listlike(arg._values, True, format)
452 result = Series(values, index=arg.index, name=arg.name)
453 elif isinstance(arg, (ABCDataFrame, MutableMapping)):
~\Anaconda3\lib\site-packages\pandas\core\tools\datetimes.py in _convert_listlike(arg, box, format, name, tz)
378 return DatetimeIndex._simple_new(values, name=name, tz=tz)
379 except (ValueError, TypeError):
--> 380 raise e
381
382 if arg is None:
~\Anaconda3\lib\site-packages\pandas\core\tools\datetimes.py in _convert_listlike(arg, box, format, name, tz)
366 dayfirst=dayfirst,
367 yearfirst=yearfirst,
--> 368 require_iso8601=require_iso8601
369 )
370
pandas\_libs\tslib.pyx in pandas._libs.tslib.array_to_datetime()
pandas\_libs\tslib.pyx in pandas._libs.tslib.array_to_datetime()
ValueError: time data 0yrs 0mon doesn't match format specified
After that, I tried this code to added error ignore to the column.
train['AVERAGE_ACCT_AGE']=pd.to_datetime(train['AVERAGE.ACCT.AGE'], format='%Y%m',errors='ignore',infer_datetime_format=True)
Its added datetime format then I this code
train['yrs']=train['AVERAGE_ACCT_AGE'].dt.year
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-50-39b8c6e07f77> in <module>()
----> 1 train['yrs']=train['AVERAGE_ACCT_AGE'].dt.year
~\Anaconda3\lib\site-packages\pandas\core\generic.py in __getattr__(self, name)
4366 if (name in self._internal_names_set or name in self._metadata or
4367 name in self._accessors):
-> 4368 return object.__getattribute__(self, name)
4369 else:
4370 if self._info_axis._can_hold_identifiers_and_holds_name(name):
~\Anaconda3\lib\site-packages\pandas\core\accessor.py in __get__(self, obj, cls)
130 # we're accessing the attribute of the class, i.e., Dataset.geo
131 return self._accessor
--> 132 accessor_obj = self._accessor(obj)
133 # Replace the property with the accessor object. Inspired by:
134 # http://www.pydanny.com/cached-property.html
~\Anaconda3\lib\site-packages\pandas\core\indexes\accessors.py in __new__(cls, data)
323 pass # we raise an attribute error anyway
324
--> 325 raise AttributeError("Can only use .dt accessor with datetimelike "
326 "values")
please help me how to convert object type to numeric type. I want years and months of columns separately.
AttributeError: Can only use .dt accessor with datetimelike values
The column is not of Datetime format.
Here is a quick way to get it to numeric.
I am using more lines than needed.
# doing this so we can have it in string format
train['AVERAGE_ACCT_AGE'] = train['AVERAGE_ACCT_AGE'].astype(str)
#Now remove the trailing or any such spaces
train['AVERAGE_ACCT_AGE'] = train['AVERAGE_ACCT_AGE'].map(lambda x: x.strip())
#Next we split and expand the column into 2 columns:
train[['yrs','months']] = train['AVERAGE_ACCT_AGE'].str.split(' ',n=1,expand=True)
#remove characters from new columns,
#I am assuming the characters remain the same
train['yrs'] = train['yrs'].str.replace('yrs','')
train['months'] = train['months'].str.replace('mon','')
# Convert yrs to float
train['yrs'] = train['yrs'].astype('float')
# Convert months to float
train['months'] = train['yrs'].astype('float')
Hope it helps.

Resources