Need help passing date to pandas query - python-3.x

How do I pass the output of this prompt to a pandas search by date in excel?
import pandas as pd
TestedDateBegin = pd.to_datetime(input('Input date in mm-dd-yyyy format: '))
For example, if I input 2019-09-08 into above input prompt and run TestedDateBegin I get this output:
Timestamp('2019-09-08 00:00:00')
This search with the date hard coded works fine.
data = df.loc[df['emr_first_access_date'] >= '2019-09-08', ['site_name','subs_num','emr_id', ```'emr_first_access_date']]
But how do I pass the date inputted from the prompt so the user can search by any date?
This doesnt work:
data = df.loc[df['emr_first_access_date'] >= 'TestedDateBegin', ['site_name','subs_num','emr_id',
and throws a exception:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
pandas/_libs/tslibs/conversion.pyx in pandas._libs.tslibs.conversion.convert_str_to_tsobject()
pandas/_libs/tslibs/np_datetime.pyx in pandas._libs.tslibs.np_datetime._string_to_dts()
ValueError: Error parsing datetime string "TestedDateBegin" at position 0
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
pandas/_libs/tslibs/conversion.pyx in pandas._libs.tslibs.conversion.convert_str_to_tsobject()
pandas/_libs/tslibs/parsing.pyx in pandas._libs.tslibs.parsing.parse_datetime_string()
~\AppData\Local\Continuum\anaconda3\lib\site-packages\dateutil\parser\_parser.py in parse(timestr, parserinfo, **kwargs)
1357 else:
-> 1358 return DEFAULTPARSER.parse(timestr, **kwargs)
1359
~\AppData\Local\Continuum\anaconda3\lib\site-packages\dateutil\parser\_parser.py in parse(self, timestr, default, ignoretz, tzinfos, **kwargs)
648 if res is None:
--> 649 raise ValueError("Unknown string format:", timestr)
650
ValueError: ('Unknown string format:', 'TestedDateBegin')
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\arrays\datetimes.py in wrapper(self, other)
144 try:
--> 145 other = _to_M8(other, tz=self.tz)
146 except ValueError:
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\arrays\datetimes.py in _to_M8(key, tz)
77 # this also converts strings
---> 78 key = Timestamp(key)
79 if key.tzinfo is not None and tz is not None:
pandas/_libs/tslibs/timestamps.pyx in pandas._libs.tslibs.timestamps.Timestamp.__new__()
pandas/_libs/tslibs/conversion.pyx in pandas._libs.tslibs.conversion.convert_to_tsobject()
pandas/_libs/tslibs/conversion.pyx in pandas._libs.tslibs.conversion.convert_str_to_tsobject()
ValueError: could not convert string to Timestamp
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
<ipython-input-2-702fd23c14bb> in <module>
----> 1 data = df.loc[df['emr_first_access_date'] >= 'TestedDateBegin', ['site_name','subs_num','emr_id', 'emr_first_access_date']]
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\ops.py in wrapper(self, other, axis)
1714
1715 res_values = dispatch_to_index_op(op, self, other,
-> 1716 pd.DatetimeIndex)
1717
1718 return self._constructor(res_values, index=self.index,
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\ops.py in dispatch_to_index_op(op, left, right, index_class)
1189 left_idx = left_idx._shallow_copy(freq=None)
1190 try:
-> 1191 result = op(left_idx, right)
1192 except NullFrequencyError:
1193 # DatetimeIndex and TimedeltaIndex with freq == None raise ValueError
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\indexes\datetimelike.py in wrapper(self, other)
115 other = other._values
116
--> 117 result = op(self._data, maybe_unwrap_index(other))
118 return result
119
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\arrays\datetimes.py in wrapper(self, other)
146 except ValueError:
147 # string that cannot be parsed to Timestamp
--> 148 return ops.invalid_comparison(self, other, op)
149
150 result = op(self.asi8, other.view('i8'))
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\ops.py in invalid_comparison(left, right, op)
1056 else:
1057 raise TypeError("Invalid comparison between dtype={dtype} and {typ}"
-> 1058 .format(dtype=left.dtype, typ=type(right).__name__))
1059 return res_values
1060
TypeError: Invalid comparison between dtype=datetime64[ns] and str

The error
TypeError: Invalid comparison between dtype=datetime64[ns] and str
tells that you try to compare datetime with string. To do it convert your string to datetime manually. In your case try:
from datetime import datetime
date = '2019-09-08'
date = datetime.strptime(date, '%Y-%m-%d')
To learn more information about date formatting see documentation

Related

"TypeError: float() argument must be a string or a number, not 'FreeRV' " in pymc3

I am stuck in the following error in pymc3 and at a loss.
TypeError: float() argument must be a string or a number, not 'FreeRV'
The below is my code. I pick up this code from here. (Sorry, this is Japanese). However it does not work in my environment:
Google Colab, Python: 3.7.13, pymc3: 3.11.4
import numpy as np
import matplotlib
import matplotlib.pylab as plt
%matplotlib inline
from tqdm import tqdm
import pymc3 as pm
# generate time-series data
np.random.seed(0)
y = np.cumsum(np.random.normal(size=100))
# Infer parameters in time-series data
N = len(y)
T = 1000
with pm.Model() as model:
muZero = pm.Normal(name='muZero', mu=0.0, tau=1.0)
sigmaW = pm.InverseGamma(name='sigmaW', alpha=1.0, beta=1.0)
mu = [0]*N
mu[0] = pm.Normal(name='mu0', mu=muZero, tau=1/sigmaW)
for n in range(1, N):
mu[n] = pm.Normal(name='mu'+str(n), mu=mu[n-1], tau=1/sigmaW)
sigmaV = pm.InverseGamma(name='sigmaV', alpha=1.0, beta=1.0)
y_pre = pm.Normal('y_pre', mu=mu, tau=1/sigmaV, observed=y) # I got error here
start = pm.find_MAP()
step = pm.NUTS()
trace = pm.sample(T, step, start=start)
Here is the full discription of my error.
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
/usr/local/lib/python3.7/dist-packages/pymc3/theanof.py in floatX(X)
82 try:
---> 83 return X.astype(theano.config.floatX)
84 except AttributeError:
AttributeError: 'list' object has no attribute 'astype'
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
TypeError: float() argument must be a string or a number, not 'FreeRV'
The above exception was the direct cause of the following exception:
ValueError Traceback (most recent call last)
4 frames
<ipython-input-30-c3709f545993> in <module>()
26
27 sigmaV = pm.InverseGamma(name='sigmaV', alpha=1.0, beta=1.0)
---> 28 y_pre = pm.Normal('y_pre', mu=mu, tau=1/sigmaV, observed=y) # I got error here
29 start = pm.find_MAP()
30 step = pm.NUTS()
/usr/local/lib/python3.7/dist-packages/pymc3/distributions/distribution.py in __new__(cls, name, *args, **kwargs)
119 dist = cls.dist(*args, **kwargs, shape=shape)
120 else:
--> 121 dist = cls.dist(*args, **kwargs)
122 return model.Var(name, dist, data, total_size, dims=dims)
123
/usr/local/lib/python3.7/dist-packages/pymc3/distributions/distribution.py in dist(cls, *args, **kwargs)
128 def dist(cls, *args, **kwargs):
129 dist = object.__new__(cls)
--> 130 dist.__init__(*args, **kwargs)
131 return dist
132
/usr/local/lib/python3.7/dist-packages/pymc3/distributions/continuous.py in __init__(self, mu, sigma, tau, sd, **kwargs)
485 self.tau = tt.as_tensor_variable(tau)
486
--> 487 self.mean = self.median = self.mode = self.mu = mu = tt.as_tensor_variable(floatX(mu))
488 self.variance = 1.0 / self.tau
489
/usr/local/lib/python3.7/dist-packages/pymc3/theanof.py in floatX(X)
84 except AttributeError:
85 # Scalar passed
---> 86 return np.asarray(X, dtype=theano.config.floatX)
87
88
ValueError: setting an array element with a sequence.

Scraping multiple wikitables using Python

I am very beginner to Python. I have a task to scrape information table from wikipedia page. I would like to scrape using the below code:
from pandas.io.html import read_html
page = requests.get('https://de.wikipedia.org/wiki/Köln')
wikitables = read_html(page, attrs={"class":"hintergrundfarbe5 float-right toptextcells infobox"})
print("Extracted {num} wikitables".format(num=len(wikitables)))
wikitables[0]
But I get the below error due to the special character in the Url as Köln: Please help me where to do the modifications in the program to scrape the information.
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-168-d9bd1e1d7548> in <module>
2 page = requests.get('https://de.wikipedia.org/wiki/Köln')
3 Soup = BeautifulSoup(page.content)
----> 4 wikitables = read_html(page, attrs={"class":"hintergrundfarbe5 float-right toptextcells infobox"})
5 print("Extracted {num} wikitables".format(num=len(wikitables)))
6
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\io\html.py in read_html(io, match, flavor, header, index_col, skiprows, attrs, parse_dates, tupleize_cols, thousands, encoding, decimal, converters, na_values, keep_default_na, displayed_only)
1092 decimal=decimal, converters=converters, na_values=na_values,
1093 keep_default_na=keep_default_na,
-> 1094 displayed_only=displayed_only)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\io\html.py in _parse(flavor, io, match, attrs, encoding, displayed_only, **kwargs)
914 break
915 else:
--> 916 raise_with_traceback(retained)
917
918 ret = []
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\compat\__init__.py in raise_with_traceback(exc, traceback)
418 if traceback == Ellipsis:
419 _, _, traceback = sys.exc_info()
--> 420 raise exc.with_traceback(traceback)
421 else:
422 # this version of raise is a syntax error in Python 3
TypeError: Cannot read object of type 'Response'
This has nothing to do with beautiful Köln...
You need to change
wikitables = read_html(page, attrs={"..."})
to
wikitables = read_html(page.text, attrs={"..."})
and it should work.

Finding emails by subject name with Python 3

I'm trying to pull emails in my inbox with a certain subject name by using the following code:
import imapclient
import pprint
imapObj = imapclient.IMAPClient('imap-mail.outlook.com',ssl=True)
imapObj.login('personalemail#outlook.com','strongpassword')
imapObj.select_folder('INBOX',readonly=True)
imapObj.search('SUBJECT Broker Dealer Fails Report – NY')
The error I'm getting is:
---------------------------------------------------------------------------
UnicodeEncodeError Traceback (most recent call last)
<ipython-input-35-d172a6d61d89> in <module>
1 imapObj.select_folder('INBOX',readonly=True)
----> 2 imapObj.search('SUBJECT Broker Dealer Fails Report – NY')
C:\ProgramData\Anaconda3\lib\site-packages\imapclient\imapclient.py in search(self, criteria, charset)
954
955 """
--> 956 return self._search(criteria, charset)
957
958 #require_capability('X-GM-EXT-1')
C:\ProgramData\Anaconda3\lib\site-packages\imapclient\imapclient.py in _search(self, criteria, charset)
977 if charset:
978 args.extend([b'CHARSET', to_bytes(charset)])
--> 979 args.extend(_normalise_search_criteria(criteria, charset))
980
981 try:
C:\ProgramData\Anaconda3\lib\site-packages\imapclient\imapclient.py in _normalise_search_criteria(criteria, charset)
1614
1615 if isinstance(criteria, (text_type, binary_type)):
-> 1616 return [to_bytes(criteria, charset)]
1617
1618 out = []
C:\ProgramData\Anaconda3\lib\site-packages\imapclient\util.py in to_bytes(s, charset)
28 def to_bytes(s, charset='ascii'):
29 if isinstance(s, text_type):
---> 30 return s.encode(charset)
31 return s
32
UnicodeEncodeError: 'ascii' codec can't encode character '\u2013' in position 35: ordinal not in range(128)
I have tried different combinations of using lists in the parenthesis ([]), using '' and "" for the name of the subject.
Subject should start with "Subject:" try this and see if it works

AttributeError: Can only use .dt accessor with datetimelike values in 0yrs 0mon format

I am trying converting date string format to numeric, but I get some error,
my date column like this :
train['AVERAGE_ACCT_AGE'].head(6)
0 0yrs 0mon
1 1yrs 11mon
2 0yrs 0mon
3 0yrs 8mon
4 0yrs 0mon
5 1yrs 9mon
Name: AVERAGE_ACCT_AGE, dtype: object
I tried this code to add DateTime format to that variable.
train['AVERAGE_ACCT_AGE']=pd.to_datetime(train['AVERAGE.ACCT.AGE'], format='%Y%m')
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
~\Anaconda3\lib\site-packages\pandas\core\tools\datetimes.py in _convert_listlike(arg, box, format, name, tz)
376 try:
--> 377 values, tz = conversion.datetime_to_datetime64(arg)
378 return DatetimeIndex._simple_new(values, name=name, tz=tz)
pandas\_libs\tslibs\conversion.pyx in pandas._libs.tslibs.conversion.datetime_to_datetime64()
TypeError: Unrecognized value type: <class 'str'>
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
<ipython-input-49-13f5c298f460> in <module>()
----> 1 train['AVERAGE_ACCT_AGE']=pd.to_datetime(train['AVERAGE.ACCT.AGE'], format='%Y-%m')
~\Anaconda3\lib\site-packages\pandas\core\tools\datetimes.py in to_datetime(arg, errors, dayfirst, yearfirst, utc, box, format, exact, unit, infer_datetime_format, origin, cache)
449 else:
450 from pandas import Series
--> 451 values = _convert_listlike(arg._values, True, format)
452 result = Series(values, index=arg.index, name=arg.name)
453 elif isinstance(arg, (ABCDataFrame, MutableMapping)):
~\Anaconda3\lib\site-packages\pandas\core\tools\datetimes.py in _convert_listlike(arg, box, format, name, tz)
378 return DatetimeIndex._simple_new(values, name=name, tz=tz)
379 except (ValueError, TypeError):
--> 380 raise e
381
382 if arg is None:
~\Anaconda3\lib\site-packages\pandas\core\tools\datetimes.py in _convert_listlike(arg, box, format, name, tz)
366 dayfirst=dayfirst,
367 yearfirst=yearfirst,
--> 368 require_iso8601=require_iso8601
369 )
370
pandas\_libs\tslib.pyx in pandas._libs.tslib.array_to_datetime()
pandas\_libs\tslib.pyx in pandas._libs.tslib.array_to_datetime()
ValueError: time data 0yrs 0mon doesn't match format specified
After that, I tried this code to added error ignore to the column.
train['AVERAGE_ACCT_AGE']=pd.to_datetime(train['AVERAGE.ACCT.AGE'], format='%Y%m',errors='ignore',infer_datetime_format=True)
Its added datetime format then I this code
train['yrs']=train['AVERAGE_ACCT_AGE'].dt.year
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-50-39b8c6e07f77> in <module>()
----> 1 train['yrs']=train['AVERAGE_ACCT_AGE'].dt.year
~\Anaconda3\lib\site-packages\pandas\core\generic.py in __getattr__(self, name)
4366 if (name in self._internal_names_set or name in self._metadata or
4367 name in self._accessors):
-> 4368 return object.__getattribute__(self, name)
4369 else:
4370 if self._info_axis._can_hold_identifiers_and_holds_name(name):
~\Anaconda3\lib\site-packages\pandas\core\accessor.py in __get__(self, obj, cls)
130 # we're accessing the attribute of the class, i.e., Dataset.geo
131 return self._accessor
--> 132 accessor_obj = self._accessor(obj)
133 # Replace the property with the accessor object. Inspired by:
134 # http://www.pydanny.com/cached-property.html
~\Anaconda3\lib\site-packages\pandas\core\indexes\accessors.py in __new__(cls, data)
323 pass # we raise an attribute error anyway
324
--> 325 raise AttributeError("Can only use .dt accessor with datetimelike "
326 "values")
please help me how to convert object type to numeric type. I want years and months of columns separately.
AttributeError: Can only use .dt accessor with datetimelike values
The column is not of Datetime format.
Here is a quick way to get it to numeric.
I am using more lines than needed.
# doing this so we can have it in string format
train['AVERAGE_ACCT_AGE'] = train['AVERAGE_ACCT_AGE'].astype(str)
#Now remove the trailing or any such spaces
train['AVERAGE_ACCT_AGE'] = train['AVERAGE_ACCT_AGE'].map(lambda x: x.strip())
#Next we split and expand the column into 2 columns:
train[['yrs','months']] = train['AVERAGE_ACCT_AGE'].str.split(' ',n=1,expand=True)
#remove characters from new columns,
#I am assuming the characters remain the same
train['yrs'] = train['yrs'].str.replace('yrs','')
train['months'] = train['months'].str.replace('mon','')
# Convert yrs to float
train['yrs'] = train['yrs'].astype('float')
# Convert months to float
train['months'] = train['yrs'].astype('float')
Hope it helps.

Python3 parsing datetime in format 2018-01-14T23:55:27.337Z

I've tried several perturbations of the following:
datetime.strptime('2018-01-14T23:55:27.337Z',"%Y-%m-%dT%H:%M:%S.%3N%Z")
but get errors like this:
ValueError Traceback (most recent call last)
<ipython-input-45-babd38d0d73f> in <module>()----> 1 datetime.strptime('2018-01-14T23:55:27.337Z',"%Y-%m-%dT%H:%M:%S.%3N%Z")
/usr/lib/python3.5/_strptime.py in _strptime_datetime(cls, data_string, format)
508 """Return a class cls instance based on the input string and the
509 format string."""
--> 510 tt, fraction = _strptime(data_string, format)
511 tzname, gmtoff = tt[-2:]
512 args = tt[:6] + (fraction,)
/usr/lib/python3.5/_strptime.py in _strptime(data_string, format)
333 del err
334 raise ValueError("'%s' is a bad directive in format '%s'" %
--> 335 (bad_directive, format)) from None
336 # IndexError only occurs when the format string is "%"
337 except IndexError:
ValueError: '3' is a bad directive in format '%Y-%m-%dT%H:%M:%S.%3N%Z'
Try doing this.
datetime.strptime('2018-01-14T23:55:27.337Z', '%Y-%m-%dT%H:%M:%S.%fZ')
Take note at the second argument. It's '%Y-%m-%dT%H:%M:%S.%fZ' and not '%Y-%m-%dT%H:%M:%S.%3N%Z'. You are getting a bad directive value error because %3 is not a directive.
In case you need it, here's the documentation for strptime().

Resources