import time
import sys
import json as json
import spacy
from datetime import datetime
from dateutil.parser import parse
def format_source_date(date):
if date != None:
try:
try:
dt = parse(date)
date_formatted=dt.strftime('%m/%d/%Y')
print(date_formatted)
except:
print(date)
except ValueError:
print(date)
def has_seperator(text):
if ',' in text or '/' in text or '-' in text:
print(text)
return True
else:
return False
date = 'Sep 25,2017'
has_seperator(date)
format_source_date(date)
The required answer is 09/25/2017. Instead taking current year 2021 . Is there any solution to solve this issue
Seems your date string is not a valid format,
See the following
>>> parse('Sep 25,2017')
datetime.datetime(2021, 9, 25, 0, 0)
>>> parse('Sep 25 2017')
datetime.datetime(2017, 9, 25, 0, 0)
so already have a has_seperator function, use it remove the character or use a supported format
Related
I am trying to format the column 'Data' to make a pattern with dates.
The formats I have are:
1/30/20 16:00
1/31/2020 23:59
2020-02-02T23:43:02
Here is the code for the dataframe.
import requests
import pandas as pd
import numpy as np
url = "https://github.com/CSSEGISandData/COVID-19/tree/master/csse_covid_19_data/csse_covid_19_daily_reports"
csv_only = [i.split("=")[1][1:-1] for i in requests.get(url).text.split(" ") if '.csv' in i and 'title' in i]
combo = [pd.read_csv(url.replace("github","raw.githubusercontent").replace("/tree/","/")+"/"+f) for f in csv_only]
one_df = pd.concat(combo,ignore_index=True)
one_df["País"] = one_df["Country/Region"].fillna(one_df["Country_Region"])
one_df["Data"] = one_df["Last Update"].fillna(one_df["Last_Update"])
I tried adding the code bellow but it doesn't bring the result I wanted
pd.to_datetime(one_df['Data'])
one_df.style.format({"Data": lambda t: t.strftime("%m/%d/%Y")})
Any help?
UPDATE
This is the complete code, but it doesn't work. Many exceptions printed with different date formats.
import requests
import pandas as pd
import numpy as np
from datetime import datetime
url = "https://github.com/CSSEGISandData/COVID-19/tree/master/csse_covid_19_data/csse_covid_19_daily_reports"
csv_only = [i.split("=")[1][1:-1] for i in requests.get(url).text.split(" ") if '.csv' in i and 'title' in i]
combo = [pd.read_csv(url.replace("github","raw.githubusercontent").replace("/tree/","/")+"/"+f) for f in csv_only]
one_df = pd.concat(combo,ignore_index=True)
df = pd.DataFrame()
DATE_FORMATS = ["%m/%d/%y %H:%M", "%m/%d/%Y %H:%M", "%Y-%m-%dT%H:%M:%S", "%Y-%m-%d %H:%M:%S", "%Y-%m-%d %H:%M:%S", "%Y-%m-%d %H:%M:%S"]
df["Região"] = one_df["Province/State"].fillna(one_df["Admin2"])
df["País"] = one_df["Country/Region"].fillna(one_df["Country_Region"])
df["Data"] = one_df["Last Update"].fillna(one_df["Last_Update"])
df["Confirmados"] = one_df["Confirmed"]
df["Mortes"] = one_df["Deaths"]
df["Recuperados"] = one_df["Recovered"]
def parse(x_):
for fmt in DATE_FORMATS :
try:
tmp = datetime.strptime(x_, fmt).strftime("%m/%d/%Y")
return tmp
except ValueError:
print(x_)
pd.to_datetime(df['Data'])
df['Data'] = df['Data'].apply(lambda x: parse(x))
#df['Data'].strftime('%m/%d/%Y')
#df['Data'] = df['Data'].map(lambda x: x.strftime('%m/%d/%Y') if x else '')
df.to_excel(r'C:\Users\guilh\Downloads\Covid2\Covid-19.xlsx', index=False, encoding="utf8")
print(df)
from datetime import datetime
import pandas as pd
You could save all possible formats in a list as -
DATE_FORMATS = ["%Y-%m-%d %H:%M:%S", "%Y-%m-%dT%H:%M:%S", "%m/%d/%y %H:%M", "%m/%d/%Y %H:%M"]
Define a function that loops through the formats and tries to parse it.
(Fixed a bug, where the print statement should have been outside the for loop)
issues = set()
def parse(x_):
for fmt in DATE_FORMATS:
try:
return datetime.strptime(x_, fmt).strftime("%m/%d/%Y")
except ValueError:
pass
issues.add(x_)
sample = ["1/30/20 16:00", "1/31/2020 23:59", "2020-02-02T23:43:02"]
df = pd.DataFrame({'data': sample})
df['data'] = df['data'].apply(lambda x: parse(x))
assert df['Data'].isna().sum() == len(issues) == 0, "Issues observed, nulls observed in dataframe"
print("Done")
Output
data
0 01/30/2020
1 01/31/2020
2 02/02/2020
If df.apply() comes across a particular date format that hasn't been defined in the list, it would simply print None since nothing would be returned by the function parse()
also here, letting pd.to_datetime infer the format does the trick:
import pandas as pd
s = pd.to_datetime(["1/30/20 16:00", "1/31/2020 23:59", "2020-02-02T23:43:02"])
print(s)
# DatetimeIndex(['2020-01-30 16:00:00', '2020-01-31 23:59:00',
# '2020-02-02 23:43:02'],
# dtype='datetime64[ns]', freq=None)
Note that if your date/time format generally provides the day first (e.g. 30.1.2021 for Jan 30th 2021), set keyword dayfirst=True.
I am using below code to pull live Nifty quote from NSEpy module but getting error.
Code:
from nsepy import get_quote
nifty = get_quote('NIFTY', series='EQ', instrument='FUTIDX', expiry='26SEP2019')
print(nifty)
Error:
nifty = get_quote('NIFTY', series='EQ', instrument='FUTIDX', expiry='26SEP2019')
File "C:\ProgramData\Anaconda3\lib\site-packages\nsepy\live.py", line 26, in get_quote
expiry_str = "%02d%s%d"%(expiry.day, months[expiry.month][0:3].upper(), expiry.year)
AttributeError: 'str' object has no attribute 'day'
I could to make this working :
#!/usr/bin/python
from nsepy import get_quote
from nsepy.derivatives import get_expiry_date
expiry = get_expiry_date(year=2019, month=9)
print(expiry)
data = get_quote('NIFTY', series='EQ', instrument='FUTIDX', expiry=expiry, option_type='CE', strike=300)
print(data)
Output:
building dictionary
2019-09-26
{'annualisedVolatility': 18.15, 'bestBuy': 10.5, 'totalSellQuantity': 263550, 'vwap': 11032.65, 'clientWisePositionLimits': 14074147, 'optionType': '-', 'highPrice': 11118.0, 'dailyVolatility': 0.95, 'bestSell': 11.31, 'marketLot': 75, 'sellQuantity5': 450, 'marketWidePositionLimits': '-', 'sellQuantity4': 150, 'sellQuantity3': 375, 'sellQuantity2': 150, 'underlying': 'NIFTY', 'sellQuantity1': 1275, 'pChange': 1.07, 'premiumTurnover': '-', 'totalBuyQuantity': 538425, 'turnoverinRsLakhs': 1219692.56, 'changeinOpenInterest': -377325, 'strikePrice': '-', 'openInterest': 16778250, 'buyPrice2': 11110.95, 'buyPrice1': 11111.0, 'openPrice': 10990.0, 'prevClose': 10996.45, 'expiryDate': '26SEP2019', 'lowPrice': 10962.15, 'buyPrice4': 11110.2, 'buyPrice3': 11110.45, 'buyPrice5': 11110.15, 'numberOfContractsTraded': 147404, 'instrumentType': 'FUTIDX', 'sellPrice1': 11113.7, 'sellPrice2': 11114.0, 'sellPrice3': 11114.5, 'sellPrice4': 11114.6, 'sellPrice5': 11114.7, 'change': 118.05, 'pchangeinOpenInterest': -2.2, 'ltp': 11.54, 'impliedVolatility': '-', 'underlyingValue': 11075.9, 'buyQuantity4': 1425, 'buyQuantity3': 75, 'buyQuantity2': 75, 'buyQuantity1': 1500, 'buyQuantity5': 150, 'settlementPrice': 11105.55, 'closePrice': 11105.55, 'lastPrice': 11114.5}
The documentation seems to be incorrect in the NSEpy library, which says expiry should be in (ddMMMyyyy) format and strike(strike_price) is a mandatory argument.
def get_quote(symbol, series='EQ', instrument=None, expiry=None, option_type=None, strike=None):
"""
1. Underlying security (stock symbol or index name)
2. instrument (FUTSTK, OPTSTK, FUTIDX, OPTIDX)
3. expiry (ddMMMyyyy)
4. type (CE/PE for options, - for futures
5. strike (strike price upto two decimal places
"""
https://nsepy.readthedocs.io/en/latest/#quick-hand-on
Expiry date is a list thats why you are getting error. use below code change.
from pprint import pprint
from datetime import date
from nsepy import get_quote
from nsepy.derivatives import get_expiry_date
expiry = date(2020,5,28)
print(expiry)
data = get_quote('BANKNIFTY', series='EQ', instrument='FUTIDX', expiry=expiry, option_type='CE', strike=300)
pprint(data)
I am trying to insert 'NULL' values in a table depending on the date.
If the date is between todays date and 3 months backward (which will be february).
Then I want to update the 'NULL' values into each selected columns.
The Traceback is as following:
Traceback (most recent call last):
File "C:\projects\docs\script.py", line 41, in <module>
if dt < date_sql < dr3:
TypeError: '<' not supported between instances of 'datetime.datetime' and 'pyodbc.Row'
Been strugling for a long time, so really appreciate your guidance as I have tried to find a solution.
Python code is:
import pyodbc
from datetime import date, datetime
import dateutil.relativedelta
conn = pyodbc.connect(
r'DRIVER={SQL Server};'
r'SERVER=server;'
r'DATABASE=db;'
)
dt = datetime.today()
dr3 = dt - dateutil.relativedelta.relativedelta(months=3)
print(dr3)
cursor = conn.cursor()
sent_date = cursor.execute("""SELECT TOP 30 sent_date, id
FROM Department.Customer""")
def fetch_date():
for row in sent_date:
r = row
print(r)
return r
date_sql = fetch_date()
if dt < date_sql < dr3:
try:
value = None
cursor.execute("""UPDATE Department.Customer SET name=?, address=?, email=?,
phone=?""", (value, value, value, value))
cursor.commit()
except pyodbc.Error as ex:
print(str(ex))
cursor.rollback()
cursor.close()
Output from print(dr3) is:
2018-02-28 17:19:50.452290
Output from print(r) in fetch_date() function is:
(datetime.datetime(2018, 5, 22, 10, 21, 36), 1)
(datetime.datetime(2018, 5, 22, 10, 21, 36), 2)
(datetime.datetime(2018, 5, 22, 10, 21, 36), 3)
...
I'm trying to convert a date in a string format (13/06/2017) into a date format June 13, 2007. I have written the code but I keep getting a syntax error for my first line which is the definition of the function line.
My code is this:
def printDate(date):
import datetime
newdate = datetime.strptime(date, %d/%m/%Y)
d = newdate.strftime(%b %d, %Y)
return d
You didn't pass the parameter "date format" as a string, that's why, also be sure to import datetime module as follows:
from datetime import datetime
def printDate(date):
newdate = datetime.strptime(date, "%d/%m/%Y")
d = newdate.strftime("%B %d, %Y")
return d
Test:
printDate("13/06/2017")
>> 'June 13, 2017'
I am trying to load OHLC-data form Kraken with the API krakenex for my research project. But I can't figure out my mistake.
I am using a modified version of https://github.com/veox/python3-krakenex/blob/master/examples/trades-history.py in python for fetching the historical OHLC-Data:
import krakenex
import datetime
import calendar
import pandas as pd
import time
# takes date and returns nix time
def date_nix(str_date):
return calendar.timegm(str_date.timetuple())
# takes nix time and returns date
def date_str(nix_time):
return datetime.datetime.fromtimestamp(nix_time).strftime('%m, %d, %Y')
#return formated request data
def req(start, end, ofs):
req_data = {'type': 'all',
'trades': 'true',
'start': str(date_nix(start)),
'end': str(date_nix(end)),
'ofs': str(ofs)
}
return req_data
k = krakenex.API()
k.load_key('kraken.key.txt')
#k.set_connection({'pair':'GNOETH'})
#headers={"headers":'XXBTZUSD'}
#pairs = ['XETHZEUR','XXBTZEUR', 'XZECZEUR', 'XXRPZEUR']
datum_ende=[[31,28,31,30,31,30,31,31,30,31,30,31],[31,29,31,30,31,30,31,31,30,31,30,31]]
data = []
count = 0
jahre =[2015,2016,2017]
for j in jahre:
for i in range(0,11):
start_date = datetime.datetime(j, i+1, 1)
if j==2016:
end_date = datetime.datetime(2016, i+1, datum_ende[1][i])
else:
end_date = datetime.datetime(j, (i+1),datum_ende[0][i])
th = k.query_private('TradesHistory', req(start_date,end_date,1))
time.sleep(.25)
print(th)
th_error = th['error']
if int(th['result']['count'])>0:
count += th['result']['count']
data.append(pd.DataFrame.from_dict(th['result']
So my problem is now that I receive the lines:
{'error': [], 'result': {'trades': {}, 'count': 0}}
I guess the problem is that I haven't defined a ticker pair. But I can't figure out how I am supposed to do this.
Can you help me out?
Why don't you try dedicated OHCL method?
Here is a simple usage example:
import krakenex
from pprint import pprint
k = krakenex.API()
pprint(k.query_public('OHLC', {'pair':'XXBTZUSD', 'interval':1440, 'since':1214011560}))