Why python asyncio code stucks on the first concurrent task? - python-3.x

During the asyncio learning and tests, I've wrote the code below with 3 concurrent tasks.
import asyncio
from time import time
tasks_to_schedule = []
task_queue = []
class Test():
def __init__(self, task_name, repeat_every):
self.name = task_name
self.repeat_every = repeat_every
self.scheduled = 0
def schedule(self, t_now):
self.scheduled = t_now
async def run(self):
print(f'It is {self.name}')
print(f'{self.name} running...')
await asyncio.sleep(2)
print(f'{self.name} finished')
def check_result(self):
pass
async def report(self):
print(f'{self.name} report DONE')
await asyncio.sleep(1)
def prepare_tasks():
task_a = Test('Task A', 2)
task_b = Test('Task B', 4)
tasks_to_schedule.append(task_a)
tasks_to_schedule.append(task_b)
async def scheduler():
turn = 0
while turn < 5:
if tasks_to_schedule:
print(f'***\t Turn {turn} \t***')
task = tasks_to_schedule.pop(0)
if task.scheduled < time():
task_queue.append(task)
print(f'adding task {task.name} to queue,\n queue size = {len(task_queue)}')
turn += 1
else:
tasks_to_schedule.append(task)
await asyncio.sleep(1)
async def worker(name):
while True:
if task_queue:
task = task_queue.pop(0)
print(f'Worker {name} - took task {task.name}')
await task.run()
await task.report()
print(f'Worker {name} - task {task.name} completed, reschedule it')
task.schedule(time())
tasks_to_schedule.append(task)
# await asyncio.sleep(1) #Process stuck without this line
async def main():
task_scheduler = asyncio.create_task(scheduler())
worker1 = asyncio.create_task(worker(1))
worker2 = asyncio.create_task(worker(2))
await asyncio.gather(task_scheduler, worker1, worker2)
if __name__ == '__main__':
prepare_tasks()
asyncio.run(main())
The problem in process stuck after "Task A running...", the only output is:
*** Turn 0 ***
adding task Task A to queue,
queue size = 1
Worker 1 - took task Task A
It is Task A
Task A running...
After several tries, I've noticed, that with the additional "await asyncio.sleep(1)" line in the end of the loop inside "worker" func the process run correctly without any stuck.
I wonder, what is the reason?
Could someone explain me, please, why this additional line change everything?
Platform: Python 3.9.4, Windows 10 x64, inside venv.

I've added an additional line after:
async def worker(name):
while True:
print(f'{strftime("%X")}: worker loop') #this line
and I can see an endless worker loop in the output...
Now I see, the worker can't find task...
Solved :)

Related

Getting returning value from multithreading in python 3

I'm trying to get one or several returning values from a thread in a multithreading process. The code I show get cycled with no way to interrupt it with Ctrl-C, Ctrl+D.
import queue as Queue
import threading
class myThread (threading.Thread):
def __init__(self, threadID, name, region):
threading.Thread.__init__(self)
self.threadID = threadID
self.name = name
self.region = region
def run(self):
GetSales(self.region)
def GetSales(strReg):
print("Thread-" + strReg)
return "Returning-" + strReg
def Main():
RegionList = []
RegionList.append("EMEA")
RegionList.append("AP")
RegionList.append("AM")
# Create threads
threads = []
x = 0
for region in RegionList:
x += 1
rthread = myThread(x, "Thread-" + region, region) # Create new thread
rthread.start() # Start new thread
threads.append(rthread) # Add new thread to threads list
que = Queue.Queue()
# Wait for all threads to complete
for t in threads:
t.join()
result = que.get()
print(t.name + " -> Done")
Main()
If I comment line "result = que.get()" the program runs with no issues.
What you are looking for is future and async management.
Firstly, your program loop indefinitely because of the line que.get(), because there is nothing in the queue, it wait that something happen, which will never happen. You don't use it.
What you want to do is an async task and get the result :
import asyncio
async def yourExpensiveTask():
// some long calculation
return 42
async main():
tasks = []
tasks += [asyncio.create_task(yourExpensiveTask())]
tasks += [asyncio.create_task(yourExpensiveTask())]
for task in tasks:
result = await task
print(result)
See also https://docs.python.org/3/library/asyncio-task.html

asyncio, multiprocessing and websockets not returning a result

I am trying to get websockets, asyncio and multiprocess to work together. I have been stuck on this for 2 days and could appreciate some help.
I have searched for websockets asyncio and multiprocessing on stackoverflow as well as general internet searches. I have found threading examples, which I can make work.
import asyncio
import websockets
import threading
class Connection():
def __init__(self):
self.loop = asyncio.new_event_loop()
sock_thread = threading.Thread(target=self.new_loop)
sock_thread.start()
self.x = 0
async def connect_to_socket(self):
self.websocket = await websockets.connect('ws://demos.kaazing.com/echo')
await self.websocket.send("hello")
response = await self.websocket.recv()
print(response)
async def listen_to_socket(self):
while True:
await asyncio.sleep(0)
print('Listening for a message...')
while self.x < 5:
message = await self.websocket.recv()
print("< {}".format(message))
print('\n\n')
print(self.x)
self.x += 1
self.task.cancel()
self.loop.close()
def stop(self):
print('canceling task\n\n')
self.x = 0
self.task.cancel()
def new_loop(self):
self.task = self.loop.create_task(self.connect_to_socket())
self.loop.run_forever()
def make_task(self):
self.task = self.loop.create_task(self.listen_to_socket())
if __name__ == '__main__':
conn=Connection()
This works with no issues. I have seen examples where multiprocessing opens a process in an event loop, this is not what I want. I want to ope However, this is not what I want. I want to open a new process and run an event loop in the new process. Inside the event loop, I want to run my sockets. I want to free my main process from listening to sockets and use a child process to listen to the sockets while I do computationally expensive work on my main process.
When I try the following code. I get nothing.
import asyncio
import websockets
import multiprocessing
class Connection(multiprocessing.Process):
def __init__(self, tasks, results):
super().__init__()
self.tasks = tasks
self.results = results
self.loop = asyncio.new_event_loop()
print('create event loop')
self.x = 0
self.task = self.loop.create_task(self.test())
print('done with connecting')
#connect to socket and get response
async def test(self):
self.ws = await websockets.connect('ws://demos.kaazing.com/echo')
await self.websocket.send("hello")
response = await self.websocket.recv()
print(response)
#listen to socket long term after connection
async def listen_to_socket(self):
while True:
await asyncio.sleep(0)
print('Listening for a message...')
while self.x < 5:
await self.websocket.send("hello")
message = await self.websocket.recv()
print("< {}".format(message))
print('\n\n')
print(self.x)
self.x += 1
self.results.put(message)
self.task.cancel()
self.loop.close()
#stop task
def stop(self):
print('canceling task\n\n')
self.x = 0
self.task.cancel()
# listen to socket long term
#I have not called this as I can't even get a response from test()
def make_task(self):
self.task = self.loop.create_task(self.listen_to_socket())
if __name__ == '__main__':
tasks = multiprocessing.JoinableQueue()
results = multiprocessing.Queue()
process = Connection(tasks, results)
if tasks.empty():
print('empty')
else:
print(tasks.get())
I expect to connect with the socket and receive a response. However, I get nothing. I get no error messages,no printout from the connection, I get an empty queue and that's all. How do I get the return values from my websocket?
I am still new enough, I am not sure what I am doing wrong. Any advice would help me out.
Thank you
Anyone interested, I got this to work. It is very much a work in progress and I am adding to it, and since this is for me and relatively simple, I didn't comment it.
I started with the code from this answer and built on it.
Python3 Websockets / Multithreading issue
import asyncio
import websockets
import sys
import time
import multiprocessing
class connect():
def __init__(self, results, tasks):
self.x = 0
self.results = results
self.tasks = tasks
self.loop = asyncio.new_event_loop()
async def commander_start(self):
while not self.tasks.empty():
self.uri = self.tasks.get()
self.tasks.task_done()
self.ws = await websockets.connect(self.uri)
while True:
await asyncio.sleep(0.1)
print('Listening for a message...')
while self.x < 5:
await self.ws.send("hello")
message = await self.ws.recv()
message = message+str(self.x)
print("< {}".format(message))
print('\n\n')
print(self.x)
self.x += 1
self.results.put(message)
self.ws.close()
self.x = 0
print('ws clsed')
self.task.cancel()
await asyncio.sleep(1)
self.loop.close()
def run_commander(self):
self.task = self.loop.create_task(self.commander_start())
self.loop.run_forever()
def main(self):
self.commander = multiprocessing.Process(target=self.run_commander)
self.commander.start()
time.sleep(3)
self.commander.kill()
print('is alive:', self.commander, self.commander.is_alive())
if __name__ == "__main__":
size_q = 10
tasks = multiprocessing.JoinableQueue(maxsize=size_q)
results = multiprocessing.Queue(maxsize=size_q)
conn = connect(results,tasks)
tasks.put('ws://demos.kaazing.com/echo')
conn.main()
print('tasks2 put')
tasks.put('wss://echo.websocket.org')
conn.main()
if not results.empty():
for x in range(size_q):
print(results.get())
There is a bunch I am going to change and improve, I just wanted the base system to work so I could build from there, so that anyone that uses this will need to modify it to suit their needs. For instance, I spawn a new process and kill it, instead of running a continuous process and giving it work to do, I also am trying to figure out the specifics of the joinable queue and how to use it to add jobs after the process and event loop has been created.

Process tasks in batchs in asyncio

I have got a funcion that generates tasks (io bound tasks):
def get_task():
while True:
new_task = _get_task()
if new_task is not None:
yield new_task
else:
sleep(1)
And I am trying to write a consumer in asyncio that will be processing max 10 tasks at the time and one task is finished then will take new one.
I am not sure if I should use semaphores or is there any kind of asycio pool executor? I started to write a pseudocode with threads:
def run(self)
while True:
self.semaphore.acquire() # first acquire, then get task
t = get_task()
self.process_task(t)
def process_task(self, task):
try:
self.execute_task(task)
self.mark_as_done(task)
except:
self.mark_as_failed(task)
self.semaphore.release()
Could anyone help me? I have no clue where to put async/await keywords
Simple task cap using asyncio.Sepmaphore
async def max10(task_generator):
semaphore = asyncio.Semaphore(10)
async def bounded(task):
async with semaphore:
return await task
async for task in task_generator:
asyncio.ensure_future(bounded(task))
The problem with this solution is that tasks are being drawn from the generator greedily. For example, if generator reads from a large database, the program could run out of memory.
Other than that it's idiomatic and well-behaved.
A solution, that uses async generator protocol to pull new tasks on demand:
async def max10(task_generator):
tasks = set()
gen = task_generator.__aiter__()
try:
while True:
while len(tasks) < 10:
tasks.add(await gen.__anext__())
_done, tasks = await asyncio.wait(tasks, return_when=asyncio.FIRST_COMPLETED)
except StopAsyncIteration:
await asyncio.gather(*tasks)
It may be considered sub-optimal, because it doesn't start executing tasks until 10 are available.
And here's concise and magic solution using worker pattern:
async def max10(task_generator):
async def worker():
async for task in task_generator:
await task
await asyncio.gather(*[worker() for i in range(10)])
It relies on a somewhat counter-intuitive property of being able to have multiple async iterators over the same async generator, in which case each generated item is seen by only one iterator.
My gut tells me that none of these solutions behaves properly on cancellation.
Async isn't threads. If for example you have tasks that are file IO bound then write them async using aiofiles
async with aiofiles.open('filename', mode='r') as f:
contents = await f.read()
Then replace task with your tasks. If you want to only run 10 at a time await asyncio.gather every 10 tasks.
import asyncio
async def task(x):
await asyncio.sleep(0.5)
print( x, "is done" )
async def run(loop):
futs = []
for x in range(50):
futs.append( task(x) )
await asyncio.gather( *futs )
loop = asyncio.get_event_loop()
loop.run_until_complete( run(loop) )
loop.close()
If you can't write the tasks async and need threads this is a basic example using asyncio's ThreadPoolExecutor. Note that with max_workers=5 only 5 tasks are run at a time.
import time
from concurrent.futures import ThreadPoolExecutor
import asyncio
def blocking(x):
time.sleep(1)
print( x, "is done" )
async def run(loop):
futs = []
executor = ThreadPoolExecutor(max_workers=5)
for x in range(15):
future = loop.run_in_executor(executor, blocking, x)
futs.append( future )
await asyncio.sleep(4)
res = await asyncio.gather( *futs )
loop = asyncio.get_event_loop()
loop.run_until_complete( run(loop) )
loop.close()
As pointed out by Dima Tismek, using semaphores to limit concurrency is vulnerable to exhausting task_generator too eagerly, since there is no backpressure between obtaining the tasks and submitting them to the event loop. A better option, also explored by the other answer, is not to spawn a task as soon as the generator has produced an item, but to create a fixed number of workers that exhaust the generator concurrently.
There are two areas where the code could be improved:
there is no need for a semaphore - it is superfluous when the number of tasks is fixed to begin with;
handling cancellation of generated tasks and of the throttling task.
Here is an implementation that tackles both issues:
async def throttle(task_generator, max_tasks):
it = task_generator.__aiter__()
cancelled = False
async def worker():
async for task in it:
try:
await task
except asyncio.CancelledError:
# If a generated task is canceled, let its worker
# proceed with other tasks - except if it's the
# outer coroutine that is cancelling us.
if cancelled:
raise
# other exceptions are propagated to the caller
worker_tasks = [asyncio.create_task(worker())
for i in range(max_tasks)]
try:
await asyncio.gather(*worker_tasks)
except:
# In case of exception in one worker, or in case we're
# being cancelled, cancel all workers and propagate the
# exception.
cancelled = True
for t in worker_tasks:
t.cancel()
raise
A simple test case:
async def mock_task(num):
print('running', num)
await asyncio.sleep(random.uniform(1, 5))
print('done', num)
async def mock_gen():
tnum = 0
while True:
await asyncio.sleep(.1 * random.random())
print('generating', tnum)
yield asyncio.create_task(mock_task(tnum))
tnum += 1
if __name__ == '__main__':
asyncio.run(throttle(mock_gen(), 3))

Coroutine to mimic a OS's scheduler

I am following the :A Curious Course on Coroutines and Concurrency to learn coroutine, encounter problem to get the following codes running:
The code mimic an operating system to schedule tasks
from queue import Quue
class Task:
taskid = 0
def __init__(self, target):
Task.taskid += 1 #count the task
self.tid = Task.taskid
self.tartet = target
self.sendval = None
def run(self):
return self.target.send(self.sendval)
class Scheduler:
def __init__(self):
self.ready = Queue() # a queue of tasks that are ready to run.
self.taskmap = {} #dictionary that keeps track of all active tasks (each task has a unique integer task ID)
def new(self, target): #introduce a new task to the scheduler
newtask = Task(target)
self.taskmap[newtask.tid] = newtask
def schedule(self, task):
self.ready.put(task)
def mainloop(self):
while self.taskmap: #I think the problem is here
task = self.ready.get() #I think it should be while self.ready
result = task.run()
self.schedule(task)
Test it with
def foo():
while True:
print("I'm foo")
yield
def bar():
while True:
print("I'm bar")
yield
It pending instead of return value
In [85]: schedule.new(foo())
In [86]: schedule.new(bar())
In [87]: schedule.mainloop()
^C---------------------------------------------------------------------------
KeyboardInterrupt Traceback (most recent call last)
I review the codes and find problem with
def mainloop(self):
while self.taskmap: #I think the problem is here
task = self.ready.get() #I think it should be while self.ready
result = task.run()
self.schedule(task)
while self.taskmap, but there is no methods to remove elements, so it is an infinite loop
I changed it to
def mainloop(self):
while self.taskmap: #I think the problem is here
task = self.ready.get() #I think it should be while self.ready
result = task.run()
self.schedule(task)
However, it still not work.
What's the problem with my code.

Asyncio worker that handles N jobs at a time?

I'm trying to make an asyncio worker class that will consume jobs from a job queue and process up to N jobs in parallel. Some jobs may queue additional jobs. When the job queue is empty and the worker finishes all of its current jobs, it should end.
I'm still struggling with asyncio conceptually. Here is one of my attempts, where N=3:
import asyncio, logging, random
async def do_work(id_):
await asyncio.sleep(random.random())
return id_
class JobQueue:
''' Maintains a list of all pendings jobs. '''
def __init__(self):
self._queue = asyncio.Queue()
self._max_id = 10
for id_ in range(self._max_id):
self._queue.put_nowait(id_ + 1)
def add_job(self):
self._max_id += 1
self._queue.put_nowait(self._max_id)
async def get_job(self):
return await self._queue.get()
def has_jobs(self):
return self._queue.qsize() > 0
class JobWorker:
''' Processes up to 3 jobs at a time in parallel. '''
def __init__(self, job_queue):
self._current_jobs = set()
self._job_queue = job_queue
self._semaphore = asyncio.Semaphore(3)
async def run(self):
while self._job_queue.has_jobs() or len(self._current_jobs) > 0:
print('Acquiring semaphore...')
await self._semaphore.acquire()
print('Getting a job...')
job_id = await self._job_queue.get_job()
print('Scheduling job {}'.format(job_id))
self._current_jobs.add(job_id)
task = asyncio.Task(do_work(job_id))
task.add_done_callback(self.task_finished)
def task_finished(self, task):
job_id = task.result()
print('Finished job {} / released semaphore'.format(job_id))
self._current_jobs.remove(job_id)
self._semaphore.release()
if random.random() < 0.2:
print('Queuing a new job')
self._job_queue.add_job()
loop = asyncio.get_event_loop()
jw = JobWorker(JobQueue())
print('Starting event loop')
loop.run_until_complete(jw.run())
print('Event loop ended')
loop.close()
An excerpt of the output:
Starting event loop
Acquiring semaphore...
Getting a job...
Scheduling job 1
Acquiring semaphore...
Getting a job...
Scheduling job 2
Acquiring semaphore...
Getting a job...
Scheduling job 3
Acquiring semaphore...
Finished job 2 / released semaphore
Getting a job...
Scheduling job 4
...snip...
Acquiring semaphore...
Finished job 11 / released semaphore
Getting a job...
Finished job 12 / released semaphore
Finished job 13 / released semaphore
It appears to correctly process all jobs while processing no more than 3 jobs at any one time. However, the program hangs after the last job is finished. As indicated by the output, it appears to be hanging at job_id = await self._job_queue.get_job(). Once the job queue is empty, this coroutine will never resume, and the check to see if the job queue is empty (at the top of the loop) isn't reached again.
I've tried working around this in a number of ways but conceptually something just don't quite fit. My current WIP is passing some futures between the queue and the worker and then using some combination of asyncio.wait(...) on all of them, but it's getting ugly and I'm wondering if there is an elegant solution that I'm overlooking.
You could take advantage of queue.task_done that indicates that a formerly enqueued task is complete. Then you can combine queue.join and queue.get using asyncio.wait: if queue.join finishes and queue.get doesn't, this means all the jobs have been completed.
See this example:
class Worker:
def __init__(self, func, n=3):
self.func = func
self.queue = asyncio.Queue()
self.semaphore = asyncio.Semaphore(n)
def put(self, *args):
self.queue.put_nowait(args)
async def run(self):
while True:
args = await self._get()
if args is None:
return
asyncio.ensure_future(self._target(args))
async def _get(self):
get_task = asyncio.ensure_future(self.queue.get())
join_task = asyncio.ensure_future(self.queue.join())
await asyncio.wait([get_task, join_task], return_when='FIRST_COMPLETED')
if get_task.done():
return task.result()
async def _target(self, args):
try:
async with self.semaphore:
return await self.func(*args)
finally:
self.queue.task_done()
You can timeout get_job with simple asyncio.wait_for. For example with 1s, and get back to the beginning of loop on timeout.
async def run(self):
while self._job_queue.has_jobs() or len(self._current_jobs) > 0:
print('Acquiring semaphore...')
await self._semaphore.acquire()
print('Getting a job...')
try:
job_id = await asyncio.wait_for(self._job_queue.get_job(), 1)
except asyncio.TimeoutError:
continue
print('Scheduling job {}'.format(job_id))
self._current_jobs.add(job_id)
task = asyncio.Task(do_work(job_id))
task.add_done_callback(self.task_finished)

Resources