How to disable retry in celery? - python-3.x

I am running a celerybeat scheduler every 15 mins where I need to fetch data from API (rate limit = 300 requests/min max) and store the results into the database. I would like to fetch the urls in parallel subject to rate limits at the same time. If any worker fails here, I don't want to retry since I will ping again in 15 mins. Any suggestions on how this can be accomplished in celery.
#celery.task(bind=True)
def fetch_store(self):
start = time()
return c.chain(c.group(emap.s() for _ in range(2000)), ereduce.s(start)).apply_async()
#celery.task(rate_limit='300/m')
def fetch():
#... requests data from external API
return data
#celery.task
def store(numbers, start):
end = time()
logger.info("Received" + numbers + " " + (end - start)/1000 + "seconds")

Related

MLflow is taking longer than expected time to finish logging metrics and parameters

I'm running a code where I have to perform multiple iterations for a set of products to select the best performing model. While running multiple iterations for a single product, I need to log details of every single run using mlflow(using mlflow with pandas-udf). While logging for individual iterations are taking around 2 seconds but the parent run under which I'm tracking every iteration details is taking 1.5 hours to finish. Here is the code -
#F.pandas_udf( model_results_schema, F.PandasUDFType.GROUPED_MAP )
def get_gam_pe_results( model_input ):
...
...
for j, gam_terms in enumerate(term_list[-1]):
results_iteration_output_1, results_iteration_output, results_iteration_all = run_gam_model(gam_terms)
results_iteration_version = results_iteration_version.append(results_iteration_output)
unique_id = uuid.uuid1()
metric_list = ["AIC", "AICc", "GCV", "adjusted_R2", "deviance", "edof", "elasticity_in_k", "loglikelihood",
"scale"]
param_list = ["features"]
start_time = str(datetime.now())
with mlflow.start_run(run_id=parent_run_id, experiment_id=experiment_id):
with mlflow.start_run(run_name=str(model_input['prod_id'].iloc[1]) + "-" + unique_id.hex,
experiment_id=experiment_id, nested=True):
for item in results_iteration_output.columns.values.tolist():
if item in metric_list:
mlflow.log_metric(item, results_iteration_output[item].iloc[0])
if item in param_list:
mlflow.log_param(item, results_iteration_output[item].iloc[0])
end_time = str(datetime.now())
mlflow.log_param("start_time", start_time)
mlflow.log_param("end_time", end_time)
Outside pandas-udf -
current_time = str(datetime.today().replace(microsecond=0))
run_id = None
with mlflow.start_run(run_name="MLflow_pandas_udf_testing-"+current_time, experiment_id=experiment_id) as run:
run_id = run.info.run_uuid
gam_model_output = (Product_data
.withColumn("run_id", F.lit(run_id))
.groupby(['prod_id'])
.apply(get_gam_pe_results)
)
Note - Running this entire code in Databricks(cluster has 8 cores and 28gb ram).
Any idea why this parent run is taking so long to finish while it's only 2 seconds to finish each iterations?

Delayed Response using requests.get with uwsgi

I was trying to execute the following code:
import requests as rq
def google_translation(text, model='nmt', source_language=''):
temptime = time.time()
URL = f"https://translation.googleapis.com/language/translate/v2?q={text}&target=en&source={source_language}&model={model}&
try:
time_check = time.time()
result = rq.get(URL,timeout=0.5,headers = headers)
print('api_call time ' + str((time.time()-time_check)*1000))
print('elapsed time in api call ' + str(result.elapsed.total_seconds()*1000))
except:
result = {}
The two print statements have 500 ms of difference.
API call time is approx 750 ms
Elapsed time in API call is somewhere around 251 ms
The problem is encountered when the code is executed with uwsgi.
Has anyone faced similar scenario? If yes, please let me know the possible reason and resolution for the same.
Thanks in advance

Simpy resource unavialbality

I am trying to make resources unavailable for a certain time in simpy. The issue is with timeout I find the resource is still active and serving during the time it should be unavailable. Can anyone help me with this in case you have encountered such a problem. Thanks a lot!
import numpy as np
import simpy
def interarrival():
return(np.random.exponential(10))
def servicetime():
return(np.random.exponential(20))
def servicing(env, servers_1):
i = 0
while(True):
i = i+1
yield env.timeout(interarrival())
print("Customer "+str(i)+ " arrived in the process at "+str(env.now))
state = 0
env.process(items(env, i, servers_array, state))
def items(env, customer_id, servers_array, state):
with servers_array[state].request() as request:
yield request
t_arrival = env.now
print("Customer "+str(customer_id)+ " arrived in "+str(state)+ " at "+str(t_arrival))
yield env.timeout(servicetime())
t_depart = env.now
print("Customer "+str(customer_id)+ " departed from "+str(state)+ " at "+str(t_depart))
if (state == 1):
print("Customer exists")
else:
state = 1
env.process(items(env, customer_id, servers_array, state))
def delay(env, servers_array):
while(True):
if (env.now%1440 >= 540 and env.now <= 1080):
yield(1080 - env.now%1440)
else:
print(str(env.now), "resources will be blocked")
resource_unavailability_dict = dict()
resource_unavailability_dict[0] = []
resource_unavailability_dict[1] = []
for nodes in resource_unavailability_dict:
for _ in range(servers_array[nodes].capacity):
resource_unavailability_dict[nodes].append(servers_array[nodes].request())
print(resource_unavailability_dict)
for nodes in resource_unavailability_dict:
yield env.all_of(resource_unavailability_dict[nodes])
if (env.now < 540):
yield env.timeout(540)
else:
yield env.timeout((int(env.now/1440)+1)*1440+540 - env.now)
for nodes in resource_unavailability_dict:
for request in resource_unavailability_dict[nodes]:
servers_array[nodes].release(request)
print(str(env.now), "resources are released")
env = simpy.Environment()
servers_array = []
servers_array.append(simpy.Resource(env, capacity = 5))
servers_array.append(simpy.Resource(env, capacity = 7))
env.process(servicing(env, servers_array))
env.process(delay(env,servers_array))
env.run(until=2880)
The code is given above. Actually, I have two nodes 0 and 1 where server capacities are 5 and 7 respectively. The servers are unavailable before 9AM (540 mins from midnight) and after 6 PM everyday. I am trying to create the unavailability using timeout but not working. Can you suggest how do I modify the code to incorporate it.
I am getting the error AttributeError: 'int' object has no attribute 'callbacks'which I can't figure out why ?
So the problem with simpy resources is the capacity is a read only attribute. To get around this you need something to seize and hold the resource off line. So in essence, I have two types of users, the ones that do "real work" and the ones that control the capacity. I am using a simple resource, which means that the queue at the schedule time will get processed before the capacity change occurs. Using a priority resource means the current users of a resource can finish their processes before the capacity change occurs , or you can use a pre-emptive resource to interrupt users with resources at the scheduled time. here is my code
"""
one way to change a resouce capacity on a schedule
note the the capacity of a resource is a read only atribute
Programmer: Michael R. Gibbs
"""
import simpy
import random
def schedRes(env, res):
"""
Performs maintenance at time 100 and 200
waits till all the resources have been seized
and then spend 25 time units doing maintenace
and then release
since I am using a simple resource, maintenance
will wait of all request that are already in
the queue when maintenace starts to finish
you can change this behavior with a priority resource
or pre-emptive resource
"""
# wait till first scheduled maintenance
yield env.timeout(100)
# build a list of requests for each resource
# then wait till all requests are filled
res_maint_list = []
print(env.now, "Starting maintenance")
for _ in range(res.capacity):
res_maint_list.append(res.request())
yield env.all_of(res_maint_list)
print(env.now, "All resources seized for maintenance")
# do maintenance
yield env.timeout(25)
print(env.now, "Maintenance fisish")
# release all the resources
for req in res_maint_list:
res.release(req)
print(env.now,"All resources released from maint")
# wait till next scheduled maintenance
dur_to_next_maint = 200 -env.now
if dur_to_next_maint > 0:
yield env.timeout(dur_to_next_maint)
# do it all again
res_maint_list = []
print(env.now, "Starting maintenance")
for _ in range(res.capacity):
res_maint_list.append(res.request())
yield env.all_of(res_maint_list)
print(env.now, "All resources seized for maintenance")
yield env.timeout(25)
print(env.now, "Maintenance fisish")
for req in res_maint_list:
res.release(req)
print(env.now,"All resources released from maint")
def use(env, res, dur):
"""
Simple process of a user seizing a resource
and keeping it for a little while
"""
with res.request() as req:
print(env.now, f"User is in queue of size {len(res.queue)}")
yield req
print(env.now, "User has seized a resource")
yield env.timeout(dur)
print(env.now, "User has released a resource")
def genUsers(env,res):
"""
generate users to seize resources
"""
while True:
yield env.timeout(10)
env.process(use(env,res,21))
# set up
env = simpy.Environment()
res = simpy.Resource(env,capacity=2) # may want to use a priority or preemtive resource
env.process(genUsers(env,res))
env.process(schedRes(env, res))
# start
env.run(300)
One way to do this is with preemptive resources. When it is time to make resources unavailable, issue a bunch of requests with the highest priority to seize idle resources, and to preempt resources currently in use. These requests would then release the resources when its time to make the resources available again. Note that you will need to add some logic on how the preempted processes resume once the resources become available again. If you do not need to preempt processes, you can just use priority resources instead of preemptive resources

Python3 threading on AWS Lambda

I am using flask, and have a route that sends emails to people. I am using threading to send them faster. When i run it on my local machine it takes about 12 seconds to send 300 emails. But when I run it on lambda thorough API Gateway it times out.
here's my code:
import threading
def async_mail(app, msg):
with app.app_context():
mail.send(msg)
def mass_mail_sender(order, user, header):
html = render_template('emails/pickup_mail.html', bruger_order=order.ordre, produkt=order.produkt)
msg = Message(recipients=[user],
sender=('Sender', 'infor#example.com'),
html=html,
subject=header)
thread = threading.Thread(target=async_mail, args=[create_app(), msg])
thread.start()
return thread
#admin.route('/lager/<url_id>/opdater', methods=['POST'])
def update_stock(url_id):
start = time.time()
if current_user.navn != 'Admin':
abort(403)
if request.method == 'POST':
produkt = Produkt.query.filter_by(url_id=url_id)
nyt_antal = int(request.form['bestilt_hjem'])
produkt.balance = nyt_antal
produkt.bestilt_hjem = nyt_antal
db.session.commit()
orders = OrdreBog.query.filter(OrdreBog.produkt.has(func.lower(Produkt.url_id == url_id))) \
.filter(OrdreBog.produkt_status == 'Ikke klar').all()
threads = []
for order in orders:
if order.antal <= nyt_antal:
nyt_antal -= order.antal
new_thread = mass_mail_sender(order, order.ordre.bruger.email, f'Din bog {order.produkt.titel} er klar til afhentning')
threads.append(new_thread)
order.produkt_status = 'Klar til afhentning'
db.session.commit()
for thread in threads:
try:
thread.join()
except Exception:
pass
end = time.time()
print(end - start)
return 'Emails sendt'
return ''
AWS lambda functions designed to run functions within these constraints:
Memory– The amount of memory available to the function during execution. Choose an amount between 128 MB and 3,008 MB in 64-MB increments.
Lambda allocates CPU power linearly in proportion to the amount of memory configured. At 1,792 MB, a function has the equivalent of one full vCPU (one vCPU-second of credits per second).
Timeout – The amount of time that Lambda allows a function to run before stopping it. The default is 3 seconds. The maximum allowed value is 900 seconds.
To put this in your mail sending multi threaded python code. The function will terminate automatically either when your function execution completes successfully or it reaches configured timeout.
I understand you want single python function to send n number of emails "concurrently". To achieve this with lambda try the "Concurrency" setting and trigger your lambda function through a local script, S3 hosted html/js triggered by cloud watch or EC2 instance.
Concurrency – Reserve concurrency for a function to set the maximum number of simultaneous executions for a function. Provision concurrency to ensure that a function can scale without fluctuations in latency.
https://docs.aws.amazon.com/lambda/latest/dg/configuration-console.html
Important: All above settings will affect your lambda function execution cost significantly. So plan and compare before applying.
If you need any more help, let me know.
Thank you.

multiprocessing: maxtasksperchild and chunksize conflict?

I am using the multiprocessing module in Python 3.7. My code is not working as expected (see this question here). Someone suggested to set maxtasksperchild, which I set to 1. Then, while reading the documentation, I figured that it was best to set the chunksize to 1 as well. This is the relevant code part:
# Parallel Entropy Calculation
# ============================
node_combinations = [(i, j) for i in g.nodes for j in g.nodes]
pool = Pool(maxtaskperchild=1)
start = datetime.datetime.now()
logging.info("Start time: %s", start)
print("Start time: ", start)
results = pool.starmap(g._log_probability_path_ij, node_combinations, chunksize=1)
end = datetime.datetime.now()
print("End time: ", end)
print("Run time: ", end - start)
logging.info("End time: %s", end)
logging.info("Total run time: %s", start)
pool.close()
pool.join()
This backfired enormously. Setting only maxtasksperchild or only chunksize got the job done in the expected time (for a smaller dataset that I am using to test the code). Setting both just wouldn't finish and nothing was really running after a few seconds (I checked with htop to see if the cores where working).
Questions
Do maxtasksperchild and chunksize conflict when setting them together?
Do they do the same thing? maxtasksperchild at the Pool() level and chunksize at the Pool methods level?
======================================================
EDIT
I understand that debugging may be impossible from the extract of code presented, please find the full code below. The modules graph and graphfile are just little libraries written by me available in GitHub. If you wish to run the code, you can use any of the files in the data/ directory in the mentioned GitHub repository. Short tests are better run using F2, but F1 and F3 are the ones causing trouble in the HPC.
import graphfile
import graph
from multiprocessing.pool import Pool
import datetime
import logging
def remove_i_and_f(edges):
new_edges = dict()
for k,v in edges.items():
if 'i' in k:
continue
elif 'f' in k:
key = (k[0],k[0])
new_edges[key] = v
else:
new_edges[k] = v
return new_edges
if __name__ == "__main__":
import sys
# Read data
# =========
graph_to_study = sys.argv[1]
full_path = "/ComplexNetworkEntropy/"
file = graphfile.GraphFile(full_path + "data/" + graph_to_study + ".txt")
edges = file.read_edges_from_file()
# logging
# =======
d = datetime.date.today().strftime("%Y_%m_%d")
log_filename = full_path + "results/" + d + "_probabilities_log_" + graph_to_study + ".log"
logging.basicConfig(filename=log_filename, level=logging.INFO, format='%(asctime)s === %(message)s')
logging.info("Graph to study: %s", graph_to_study)
logging.info("Date: %s", d)
# Process data
# ==============
edges = remove_i_and_f(edges)
g = graph.Graph(edges)
# Parallel Entropy Calculation
# ============================
node_combinations = [(i, j) for i in g.nodes for j in g.nodes]
pool = Pool(maxtasksperchild=1)
start = datetime.datetime.now()
logging.info("Start time: %s", start)
print("Start time: ", start)
results = pool.starmap(g._log_probability_path_ij, node_combinations, chunksize=1)
end = datetime.datetime.now()
print("End time: ", end)
print("Run time: ", end - start)
logging.info("End time: %s", end)
logging.info("Total run time: %s", start)
pool.close()
pool.join()
maxtasksperchild ensures a worker is restarted after a certain amount of tasks. In other words, it kills the process after it runs maxtaskperchild iteration of your given function. It is provided to contain resource leakages caused by poor implementations on long running services.
chunksize groups a given collection/iterator in multiple tasks. It then ships over the internal pipe the whole group to reduce inter-process communication (IPC) overhead. The collection elements will still be processed 1 by 1. chunksize is useful if you have a large collection of small elements and the IPC overhead is significant in relation to the processing of the elements themselves. One side effect is that the same process will process a whole chunk.
Setting both parameters to 1 dramatically increases process rotation and IPC which are both quite resource-heavy especially on machines with high number of cores.

Resources