Understanding Pandas <pandas._libs.ops.scalar_compare> call - python-3.x

I ran the PyCharm profiler on my code and go the following call graph.
new_method (second from the left) gets called however I do not have any function called new_method in my code. I assume it comes from where I assign close_price in the code below. If so, is there a better way of sectioning data from a dataframe that does not take so long?
def base_algo(conn):
#start_date = datetime.strptime("2000-09-22", "%Y-%m-%d")
start_date = datetime.strptime("2020-07-02", "%Y-%m-%d")
end_date = datetime.strptime("2020-07-23", "%Y-%m-%d")
current_date = start_date
while current_date < end_date:
historical_data = select_historical_data(conn, current_date)
buy_sell_signal = pd.DataFrame()
for stock in historical_data["stock"].unique():
close_price = historical_data[historical_data["stock"] == stock].close_val
slope_50 = calculate_slope(close_price, 50)
slope_200 = calculate_slope(close_price, 200)
to_append = {"stock":stock, "slope_50":slope_50, "slope_200":slope_200}
buy_sell_signal = buy_sell_signal.append(to_append, ignore_index=True)
current_date = current_date + timedelta(days=1)

Related

Restricting last item value in python list to a given date

I am trying to create a list using a given start and end date range in a specific format. I would like to have the elements in the list incremented by 30 days. The last item in the list should not exceed the end date that i have set.
With the logic that I have below, it exceeds the end date that i have set.
from datetime import timedelta, date
start_date = date(2021,1,1)
end_date = date(2021,6,30)
n = 30
next_date = start_date
res = []
while start_date < end_date:
next_date = (start_date + timedelta(n))
next_date_str = next_date.strftime('%Y-%m-%d')
var = start_date.strftime('%Y-%m-%d')+'#'+next_date_str
res.append(var)
start_date = next_date + timedelta(1)
print(res)
Result of above code:
['2022-01-01#2022-01-31', '2022-02-01#2022-03-03', '2022-03-04#2022-04-03', '2022-04-04#2022-05-04', '2022-05-05#2022-06-04', '2022-06-05#2022-07-05']
Expected output:
['2022-01-01#2022-01-31', '2022-02-01#2022-03-03', '2022-03-04#2022-04-03', '2022-04-04#2022-05-04', '2022-05-05#2022-06-04', '2022-06-05#2022-06-30']
Please guide me how to restrict the end date for the list
One way to fix this problem would be to add a check to your while loop to check if the next date is greater than the end date. If it is, you can set the next date to the end date, and then break out of the loop.
Here's how you could update your code to do this:
from datetime import timedelta, date
start_date = date(2021,1,1)
end_date = date(2021,6,30)
n = 30
next_date = start_date
res = []
while start_date < end_date:
next_date = (start_date + timedelta(n))
if next_date > end_date:
next_date = end_date
break
next_date_str = next_date.strftime('%Y-%m-%d')
var = start_date.strftime('%Y-%m-%d')+'#'+next_date_str
res.append(var)
start_date = next_date + timedelta(1)
print(res)

How to get the google search console data using access_token or refresh token in python?

I'm trying to get the data from google search console in behalf of the user once they login it returns the access_token and refresh_token by using the access_token or refresh_token how to get the Google Search Console data (imperssion,click,pages).
Same way i am getting the data from Google Analytics but in google search console it's not possible.
def extract_data(site, creds, num_days, output):
domain_name = get_domain_name(site)
create_project(domain_name)
full_path = domain_name + '/' + output
current_dates = get_dates_from_csv(full_path)
webmasters_service = authorize_creds(creds)
# Set up Dates
end_date = datetime.date.today() - relativedelta.relativedelta(days=3)
start_date = end_date - relativedelta.relativedelta(days=num_days)
delta = datetime.timedelta(days=1) # This will let us loop one day at the time
scDict = defaultdict(list)
while start_date <= end_date:
if current_dates is not None and current_dates.str.contains(
datetime.datetime.strftime(start_date, '%Y-%m-%d')).any():
start_date += delta
else:
# print('Start date at beginning: %s' % start_date)
maxRows = 25000 # Maximum 25K per call
numRows = 0 # Start at Row Zero
status = '' # Initialize status of extraction
# print("status status status status",status)
while (status != 'Finished'): # Test with i < 10 just to see how long the task will take to process.
request = {
'startDate': datetime.datetime.strftime(start_date, '%Y-%m-%d'),
'endDate': datetime.datetime.strftime(start_date, '%Y-%m-%d'),
'dimensions': ['date', 'page', 'query'],
'rowLimit': maxRows,
'startRow': numRows
}
response = execute_request(webmasters_service, site, request)
try:
# Process the response
for row in response['rows']:
scDict['date'].append(row['keys'][0] or 0)
scDict['page'].append(row['keys'][1] or 0)
scDict['query'].append(row['keys'][2] or 0)
scDict['clicks'].append(row['clicks'] or 0)
scDict['ctr'].append(row['ctr'] or 0)
scDict['impressions'].append(row['impressions'] or 0)
scDict['position'].append(row['position'] or 0)
# print('successful at %i' % numRows)
except:
print('error occurred at %i' % numRows)
# Add response to dataframe
df = pd.DataFrame(data=scDict)
df['clicks'] = df['clicks'].astype('int')
df['ctr'] = df['ctr'] * 100
df['impressions'] = df['impressions'].astype('int')
df['position'] = df['position'].round(2)
print('Numrows at the start of loop: %i' % numRows)
try:
numRows = numRows + len(response['rows'])
except:
status = 'Finished'
print('Numrows at the end of loop: %i' % numRows)
if numRows % maxRows != 0:
status = 'Finished'
start_date += delta
print('Start date at end: %s' % start_date)
write_to_csv(df, full_path)
return df
This is code i am getting in google search console this code using the webmasters_service = authorize_creds(creds) method but i want to access using access_token or refresh token.
This is the code used in google analytics.
def google_analytics_reporting_api_data_extraction(viewID, dim, met, start_date,
end_date, refresh_token,
transaction_type, goal_number,
condition):
viewID = viewID;
dim = dim;
met = met;
start_date = start_date;
end_date = end_date;
refresh_token = refresh_token;
transaction_type = transaction_type;
condition = condition
goal_number = goal_number
viewID = "".join(['ga%3A', viewID])
if transaction_type == "Goal":
met1 = "%2C".join([re.sub(":", "%3A", i) for i in met]).replace("XX", str(goal_number))
elif transaction_type == "Transaction":
met1 = "%2C".join([re.sub(":", "%3A", i) for i in met])
dim1 = "%2C".join([re.sub(":", "%3A", i) for i in dim])
credentials = client.OAuth2Credentials(
access_token=None, client_id=client_id, client_secret=client_secret, refresh_token=refresh_token,
token_expiry=3600, token_uri=GOOGLE_TOKEN_URI, user_agent='my-user-agent/1.0', revoke_uri=GOOGLE_REVOKE_URI)
credentials.refresh(httplib2.Http())
rt = (json.loads(credentials.to_json()))['access_token']
api_url = "https://www.googleapis.com/analytics/v3/data/ga?ids="
url = "".join(
[api_url, viewID, '&start-date=', start_date, '&end-date=', end_date, '&metrics=', met1, '&dimensions=',
dim1, '&max-results=1000000', condition, '&access_token=', rt])
data = pd.DataFrame()
dataa = pd.DataFrame()
users = []
final_date = []
# try:
r = requests.get(url)
# print("r values",list((r.json())['rows']))
# print("start_date",start_date)
start = datetime.datetime.strptime(start_date, "%Y-%m-%d")
end = datetime.datetime.strptime(end_date, "%Y-%m-%d")
date_generated = [start + datetime.timedelta(days=x) for x in range(0, (end - start).days)]
for each in date_generated:
date_value = each.date()
url = "".join(
[api_url, viewID, '&start-date=', str(each.date()), '&end-date=', str(each.date()), '&metrics=', met1,
'&dimensions=',
dim1, '&max-results=1000000', condition, '&access_token=', rt])
rr = requests.get(url)
dataa = pd.DataFrame(list((rr.json())['rows']))
users.append(dataa[0][0])
final_date.append(str(date_value))
# print("data and users", users, final_date)
data = pd.DataFrame(list((r.json())['rows']))
try:
data = pd.DataFrame(list((r.json())['rows']), columns=[re.sub("ga:", "", i) for i in met])
# data['date'] = start_date
# dim_data = pd.DataFrame(list((r.json())['rows']), columns=[re.sub("ga:", "", i) for i in dim])
return data, users, final_date
except:
print((r.json()))
In the above code by using refresh_token we access the data from google analytics. Like this way only i want the code in google search console.
Please help me out

In Django, how do I construct my queryset to filter by time over slices of time?

I'm using Python 3.9 and Django 3.2. I have this price model
class Price(models.Model):
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
price = models.FloatField(null=False)
created = models.DateTimeField(null=False, default=datetime.now)
If I want to get the price per hour over the last 24 hours, I can run a method like this
def _get_prices_per_time_slice(self, last_hours=24):
now = timezone.now()
end_time = now.replace(second = 0, microsecond = 0)
start_time = end_time - timedelta(hours=last_hours)
qset = Price.objects.filter(
created__range=[start_time, end_time],
created__minute=end_time.minute
).values('price')
return [r['price'] for r in qset]
but let's say I want to get the price every last X hours.
def _get_prices_per_time_slice(self, last_hours=24, time_slice_in_hours=4):
so if the current time is midnight (and zero seconds and minutes), I would want to get the prices for midnight, 8 pm, 4 pm, noon, 8 am and 4 am. How do I add a filter to screen for prices every X hours?
The range function in python helps to specify the incremental step value
SYNTAX : range(start, stop, step)
x = range(3, 20, 4)
for n in x:
print(n)
#Gives output
>>> 3
>>> 7
>>> 11
>>> 15
>>> 19
Just rewrite created_range to add the step value time_slice_in_hours
created__range=[start_time, end_time, time_slice_in_hours]
OPTION 1
def _get_prices_per_time_slice(self, last_hours= 24,time_slice_in_hours=4):
now = timezone.now()
end_time = now.replace(second = 0, microsecond = 0)
start_time = end_time - timedelta(hours=last_hours)
qset = Price.objects.filter(
created__range=[start_time, end_time, time_slice_in_hours],
created__minute=end_time.minute
).values('price')
return [r['price'] for r in qset]
However the syntax in django query set api official documentation fails to mention step parameter in __range() function i.e., it might not be supported by created__range.
OPTION 2
In that case you can use the below function where you can calculate time range x_time_slice_list using Python’s DateTimeRange function(official documentation) and evaluate to created__in
from datetimerange import DateTimeRange
from dateutil.relativedelta import relativedelta
def _get_prices_per_time_slice(self, last_hours= 24,time_slice_in_hours=4):
now = timezone.now()
end_time = now.replace(second = 0, microsecond = 0)
start_time = end_time - timedelta(hours=last_hours)
x_time_slice_list= []
time_range = DateTimeRange(start_time, end_time)
for value in time_range.range(relativedelta(hours=+time_slice_in_hours)):
x_time_slice_list.append(value)
qset = Price.objects.filter(
created__in= x_time_slice_list,
created__minute=end_time.minute
).values('price')
return [r['price'] for r in qset]
You could achieve this by handling the time slice filtering in python.
What I mean is if you take the function you have. i.e.
def _get_prices_per_time_slice(self, last_hours=24):
now = timezone.now()
end_time = now.replace(second = 0, microsecond = 0)
start_time = end_time - timedelta(hours=last_hours)
qset = Price.objects.filter(
created__range=[start_time, end_time],
created__minute=end_time.minute
).values('price')
return [r['price'] for r in qset]
And rewrite it as follows you to return the created data and handle the filtering in the return list comprehension.
def _get_prices_per_time_slice(self, last_hours=24, time_slice_in_hours=4):
now = timezone.now()
end_time = now.replace(second = 0, microsecond = 0)
start_time = end_time - timedelta(hours=last_hours)
qset = Price.objects.filter(
created__range=[start_time, end_time],
created__minute=end_time.minute
).values('price', 'created')
return [r.get('price') for r in qset if r.get('created').hour % time_slice_in_hours == 0]

Track database history

def startlog():
id = enteruser.id
x = time.localtime()
sec = x.tm_sec
min = x.tm_min
hour = x.tm_hour + 1
day = x.tm_mday
date = f"{x.tm_mon}-{x.tm_mday}-{x.tm_year}"
starttime = (day * 86400) + (hour * 3600) + (min * 60) + sec
updatestart = "UPDATE log SET start = ?, date = ? WHERE ID = ?"
c.execute(updatestart, (starttime, date, id,))
conn.commit()
I have this function startlog, and a clone of it endlog.
My database log is consisted of (name, starttime, endtime, date)
Is there any way to keep track of the changes?
Desired output:
Name / Time / Date
x / time1 / date1
x / time2 / date2
I tried creating a list so everytime I'm calling out the function it will append on the list but it disappears after the session.
I used csv for my case since it's just a personal project. I used columns like ID/Time in / Time out / Total Time and used ID to determine which value to display. This is the snippet of my code (using tkinter for gui)
def csvwrite():
with open ('test.cvs', 'a', newline="") as csvfile:
writer = csv.writer(csvfile)
tup1 = (enteruser.id, log.start, log.end)
writer.writerow(tup1)
csvfile.close()
def csvread():
with open('test.cvs', 'r') as csvfile:
reader = csv.reader(csvfile)
filtered = filter(filterer, reader)
res = []
for i in filtered:
print(i)
historylbl = Label(historyWindow.historywndw, text = i)
historylbl.pack()

Bokeh charts unresponsive on rangeslider on_change

I am working on bokeh charts for the first time. I have followed a few tutorials but due to some reason, update function is not working on rangeslider on_change()
def make_data(df, start, end):
#df['ID'] = range(1, len(df) + 1)
s = df['ID'] >= start
e = df['ID'] <= end
df1 = df[e & s]
date = df1['date'].tolist()
capi = df1['capi'].tolist()
data = {'x': dateTime(date), 'y': capi}
source = ColumnDataSource(data)
return source
def update(attr, old, new):
df = pd.DataFrame.from_csv("main_data.csv", index_col = None)
df['ID'] = range(1, len(df) + 1)
new_src = make_dataset(df, range_start = range_select.value[0], range_end = range_select.value[1])
source.data.update(new_src.data)
def make_plot(source):
p1 = figure(x_axis_type="datetime", title="Stock Closing Prices")
p1.grid.grid_line_alpha=0.3
p1.xaxis.axis_label = 'Date'
p1.yaxis.axis_label = 'Price'
p1.line('x', 'y', source = source, color='#A6CEE3', legend='capi')
return p1
range_select = RangeSlider(title="Date range", value=(ids[0], ids[100]), start=ids[0], end=ids[-1], step=1)
range_select.on_change('value', update)
source = make_data(df, 1, 1000)
p = make_plot(source)
controls = WidgetBox(range_select)
layout = column(controls, p)
tab = Panel(child=layout, title = 'Histogram')
tabs = Tabs(tabs = [tab])
show(tabs)
can someone please point me in the right direction here

Resources