Running coroutines in different thread with same event loop

Running coroutines in different thread with same event loop - python-3.x

I want to run a coroutine in a different thread and get the result that the coroutine returns.
class Main:
def __init__(self, result_from_io_task=None):
self._io_task_result = result_from_io_task
async def io_task(self):
await asyncio.sleep(2)
return "slept of 2s"
def non_async_func(self):
#This can't be made async.
if not self._io_task_result:
#run io_task and get its result
#event loop will be running in the main thread so I can fire the task
task = asyncio.create_task(self.io_task)
#can't await task since I am in non-async func and I cannot
#return from non_async_func until and unless I know what
#self.io_task has returned. Tried following but my app hangs forever.
while not task.done():
pass
I also tried, but it doesn't work "
def run_in_thread(coro, loop):
output = []
def run():
fut = asyncio.run_coroutine_threadsafe(coro, loop)
output.append(fut)
thr = Thread(target=run)
thr.start()
return output
async def main():
main_obj = Main(result_from_io_task=None)
v = main_obj.non_async_func()
How can I spawn a new thread and run the given coroutine using event loop running in main thread

Unfortunately, my codebase depends on python < 3.8 and asyncio.to_thread is not available in python 3.7
Based on the example of my answer, I'm introducing another implementation of the asynchronous decorator that does not use asyncio.to_thread() function but uses ThreadPoolExecutor instead.
import asyncio
import requests
import concurrent.futures
def asynchronous(func):
async def wrapper(*args, **kwargs):
with concurrent.futures.ThreadPoolExecutor() as executor:
future = executor.submit(func, *args, **kwargs)
return future.result()
return wrapper
#asynchronous
def request(url):
with requests.Session() as session:
response = session.get(url)
try:
return response.json()
except requests.JSONDecodeError:
return response.text
async def main():
task = asyncio.create_task(request("https://google.com/"))
print("waiting for response...")
result = await task
print(result)
if __name__ == "__main__":
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
loop.close()

Related

Best way to avoid warning about un-run coroutines that are not-yet run by cancelled tasks?

In the following, the coroutinerunIt() is created and provided as a parameter to delegate(...) - which is turned into a Task that is canceled before runIt executes:
import asyncio
async def cancelTaskTest():
async def runIt():
print("RunIt ran")
async def delegate(coro):
await coro
task = asyncio.create_task(delegate(runIt()))
task.cancel()
if __name__=='__main__':
asyncio.run(cancelTaskTest())
Produces the unwanted warning:
/usr/lib/python3.10/asyncio/base_events.py:1881: RuntimeWarning: coroutine 'cancelTaskTest.<locals>.runIt' was never awaited
handle = self._ready.popleft()
RuntimeWarning: Enable tracemalloc to get the object allocation traceback
I'm aware that runIt did not run. I don't want a warning about it - what's the best way to avoid this.

Simplest method would be to remove () in runIt and call it in runAfterTimeout():
import asyncio
async def cancelTaskTest():
async def runIt():
print("RunIt ran")
async def delegate(asyncFunc):
coro = asyncFunc() # <-- put () here
await coro
task = asyncio.create_task(delegate(runIt)) # <-- removed () in runIt
task.cancel()
if __name__ == "__main__":
asyncio.run(cancelTaskTest())
EDIT: To add parameters to RunIt, just create plain lambda::
import asyncio
async def cancelTaskTest():
async def runIt(p1, p2):
print(f"RunIt({p1}, {p2})")
async def delegate(coro):
await coro()
task = asyncio.create_task(delegate(lambda: runIt(1, 2)))
task.cancel()
if __name__ == "__main__":
asyncio.run(cancelTaskTest())

Feed ProcessPoolExecutor with results from asyncio

I have a bunch of online data that I want to download and process efficiently. Downloading already takes some time but cpu-bound processing takes much longer. I struggle to implement a combination of async and ProcessPoolExecutor.
import asyncio
import time
import aiohttp
from aiohttp import ClientSession
from concurrent.futures import ProcessPoolExecutor
class WebData:
def __init__(self, url):
self.url = url
self.binary = b''
async def download(self, client):
time.sleep(0.2)
try:
async with client.get(self.url, timeout=5) as resp:
self.binary = await resp.read()
print(f'Downloaded {self.url}')
except (aiohttp.ClientConnectionError,
asyncio.exceptions.TimeoutError):
pass
return
def process(self):
print(f'Start processing {self.url}')
time.sleep(1)
print(f'Finished processing {self.url}')
async def main():
list_urls = [f'https://www.google.com/search?q={i}'
for i in range(10)]
list_obj = [WebData(url) for url in list_urls]
with ProcessPoolExecutor() as executor:
async with ClientSession() as session:
tasks = [obj.download(session) for obj in list_obj]
await asyncio.gather(*tasks)
list_futures = [
executor.submit(obj.process)
for obj in list_obj]
return list_futures
res = asyncio.run(main())
This works as expected but it fails to accomplish what I am looking for. It first downloads all data and starts processing it only afterwards, which leaves my cores idle during download. Is there any way I can pipe the downloaded objects to the executor while other objects are still downloading?
I found this thread but it isn't what I need.

You should submit the self.process inside after the coroutine ends. For that, you can have a separate asynchronous method that will await the download method and submit the process to ProcessPoolExecutor.
class WebData:
def __init__(self, url):
"""The code has not been changed"""
async def download(self, client):
"""The code has not been changed"""
def process(self):
"""The code has not been changed"""
async def execute(self, session, pool):
await self.download(session)
pool.submit(self.process)
async def main():
list_urls = [f'https://www.google.com/search?q={i}' for i in range(10)]
list_obj = [WebData(url) for url in list_urls]
with ProcessPoolExecutor() as pool:
async with ClientSession() as session:
list_futures = await asyncio.gather(*[obj.execute(session, pool) for obj in list_obj])
return list_futures

How to avoid writing `await` every time

When I use aiologger, I have to write await logger many times.
For example,
import asyncio
from aiologger import Logger
async def main():
logger = Logger.with_default_handlers(name='my-logger')
await logger.debug("debug at stdout")
await logger.info("info at stdout")
await logger.warning("warning at stderr")
await logger.error("error at stderr")
await logger.critical("critical at stderr")
await logger.shutdown()
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
loop.close()
It would be great if I could write something like al instead of await logger.

Disclaimer: I've written about this -- https://coxley.org/logging/#logging-over-the-network
Please don't accept a logging interface like this.
You can't avoid using await to yield the event loop. You just can't. But you can leverage existing features to do I/O outside of the main thread and still use asyncio. You just start a second event loop in that thread.
Example
I don't like to recommend third-party libs in answers, but janus.Queue is important here. Makes it easier to bridge between non-asyncio writers (eg: Log Handler) and asyncio readers (the flusher).
Note 1: If you don't actually need asyncio-compatible I/O from the flusher, use stdlib queue.Queue, remove the async-closure, and get rid of the second loop.
Note 2: This example has both an unbounded queue and does I/O for every message. Add an interval and/or message threshold for flushing to be production-ready. Depending on your system, decide whether you accept memory growth for log bursts, drop logs, or block the main code-path.
import asyncio
import logging
import time
import threading
import typing as t
# pip install --user janus
import janus
LOG = logging.getLogger(__name__)
# Queue must be created within the event loop it will be used from. Start as
# None since this will not be the main thread.
_QUEUE: t.Optional[janus.Queue] = None
class IOHandler(logging.Handler):
def __init__(self, *args, **kwargs):
# This is set from the flusher thread
global _QUEUE
while _QUEUE is None:
time.sleep(0.01)
self.q = _QUEUE.sync_q
super().__init__(*args, **kwargs)
def emit(self, record: logging.LogRecord):
self.q.put(record)
def flusher():
async def run():
global _QUEUE
if _QUEUE is None:
_QUEUE = janus.Queue()
# Upload record instead of print
# Perhaps flush every n-seconds w/ buffer for upper-bound on inserts.
q = _QUEUE.async_q
while True:
record = await q.get()
print("woohoo, doing i/o:", record.msg)
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
loop.run_until_complete(run())
def foo():
print("foo")
def bar():
print("bar")
async def baz():
await asyncio.sleep(1)
print("baz")
async def main():
threading.Thread(target=flusher, daemon=True).start()
LOG.setLevel(logging.INFO)
LOG.addHandler(IOHandler())
foo()
LOG.info("starting program")
LOG.info("doing some stuff")
LOG.info("mighty cool")
bar()
await baz()
if __name__ == "__main__":
asyncio.run(main())

Concurrency Loop in python

My Scenario:- Start, Wait, Start, Stop or Kill
Starting the first event & waiting for some time.
If I reach the waiting time, I need to start the second event & return both event result.
But, if the first event completed before waiting time.
No need to start the second event.
Return the first event result
Ex:-
import asyncio
async def some_task():
print('io start')
await asyncio.sleep(2)
print('io end')
return "hello"
async def callback(loop):
await asyncio.sleep(4)
if loop.is_running():
print('doing other things')
if __name__ == "__main__":
loop = asyncio.get_event_loop()
loop2 = asyncio.get_event_loop()
a = loop.create_task(some_task())
b = loop2.create_task(callback(loop))
result = loop.run_until_complete(a)
loop2.run_until_complete(b)
loop.close()
loop2.close()

The variables loop and loop2 will get the same (main) event loop, which is why it will always still be running. Here is my approach:
import asyncio
async def async_main():
result = None
async def some_task():
print('io start')
await asyncio.sleep(6)
print('io end')
result = "hello"
return result
async def callback():
await asyncio.sleep(4)
if result is None:
print('doing other things')
awaited = await asyncio.gather(
asyncio.create_task(some_task()),
asyncio.create_task(callback()),
)
return awaited
asyncio.run(async_main())
Asyncio can be very confusing and I do not recommend it for non-experts, so here is an alternative using threading instead of asyncio:
import threading
import time
def do_tasks():
result = None
def some_task():
nonlocal result
print('io start')
time.sleep(2)
print('io end')
result = "hello"
def callback():
time.sleep(4)
if result is None:
print('doing other things')
threading.Thread(target=some_task).start()
threading.Thread(target=callback).start()
do_tasks()

RuntimeError when running coroutine from init

Here's a sample code.
class Foo:
def __init__(self):
self._run_coro()
def _run_coro(self):
async def init():
bar = #some I/O op
self.bar = bar
loop = asyncio.get_event_loop()
loop.run_until_complete(init())
async def spam(self):
return await #I/O op
async def main():
foo = Foo()
await foo.spam()
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
When I run this code, I get following exception:
RuntimeError: This event loop is already running
If I initialize Foo outside main, the code runs without any exception. I want to initialize Foo such that during initialization it runs a coroutine which creates a class attribute bar.
I am unable to figure how to do it correctly. How can I run a coroutine from __init__.
Any help would be highly appreciated.
class Foo:
def __init__(self):
self.session = requests.Session()
self.async_session = None
#I guess this can be done to initialize it.
s = self.init_async_session()
try:
s.send(None)
except StopIteration:
pass
finally:
s.close()
async def init_async_session(self):
#ClientSession should be created inside a coroutine.
self.async_session = aiohttp.ClientSession()
What would be the right way to initialize self.async_session

If some method uses something asynchronous it should be explicitly defined as asynchronous either. This is a core idea behind asyncio: make you write code a way you always know if some arbitrary method may do something asynchronous.
In your snippet you want to do async thing (bar I/O) inside sync method __init__ and asyncio prohibits it. You should make _run_coro async and initialize Foo asynchronously, for example, using __await__ method:
import asyncio
class Foo:
def __await__(self):
return self._run_coro().__await__()
async def _run_coro(self): # real async initializer
async def init():
await asyncio.sleep(1) # bar I/O
self.bar = 123
await init()
return self
async def spam(self):
return await asyncio.sleep(1) # I/O op
async def main():
foo = await Foo()
await foo.spam()
asyncio.run(main()) # instead of two lines in Python 3.7+
You may be interested in reading this answer to understand better how asyncio works and how to handle it.
Upd:
s = self.init_async_session()
try:
s.send(None)
Don't do such things: generator's method are only details of implementation in regard of coroutines. You can predict how coroutine will react on calling .send() method and you can rely on this behavior.
If you want to execute coroutine use await, if you want to start it "in background" use task or other functions from asyncio doc.
What would be the right way to initialize self.async_session
When it comes to aiohttp.ClientSession it should not only be created, but properly closed also. Best way to do it is to use async context manager as shown in aiohttp doc.
If you want to hide this operation inside Foo you can make it async manager either. Complete example:
import aiohttp
class Foo:
async def __aenter__(self):
self._session = aiohttp.ClientSession()
await self._session.__aenter__()
return self
async def __aexit__(self, *args):
await self._session.__aexit__(*args)
async def spam(self):
url = 'http://httpbin.org/delay/1'
resp = await self._session.get(url)
text = await resp.text()
print(text)
async def main():
async with Foo() as foo:
await foo.spam()
asyncio.run(main())
Upd2:
You can combine ways to init/close object from above to achive result you like. As long as you keep in mind both operations are asynchronous and thus should be awaited, everything should be fine.
One more possible way:
import asyncio
import aiohttp
class Foo:
def __await__(self):
return self._init().__await__()
async def _init(self):
self._session = aiohttp.ClientSession()
await self._session.__aenter__()
return self
async def close(self):
await self._session.__aexit__(None, None, None)
async def spam(self):
url = 'http://httpbin.org/delay/1'
resp = await self._session.get(url)
text = await resp.text()
print(text)
async def main():
foo = await Foo()
try:
await foo.spam()
finally:
await foo.close()
asyncio.run(main())

Here's my solution.
class Session:
def __init__(self, headers):
self._headers = headers
self._session = requests.Session()
self._async_session = None
async def _init(self):
self._session = aiohttp.ClientSession(headers=headers)
async def async_request(self, url):
while True:
try:
async with self._async_session.get(url) as resp:
resp.raise_for_status()
return await resp.text()
except aiohttp.client_exceptions.ClientError:
#retry or raise
except AttributeError:
if isinstance(self._async_session, aiohttp.ClientSession):
raise
await self._init()
def request(self, url):
return self._session.get(url).text
async def close(self):
if isinstance(self._async_session, aiohttp.ClientSession):
await self._session.close()
async def main():
session = Session({})
print(await session.async_request('https://httpstat.us/200')
await session.close()
asyncio.run(main())
I can initialize the Session class and make synchronous as well as asynchronous requests. I do not have to explicitly call await session._init() to initialize self._async_session as when session._async_request is called and self._async_session is None, then await session._init() will be called and the request will be retried.

Develop Reference

node.js excel linux python-3.x azure haskell apache-spark rust .htaccess string

Running coroutines in different thread with same event loop - python-3.x

Related

Best way to avoid warning about un-run coroutines that are not-yet run by cancelled tasks?

Feed ProcessPoolExecutor with results from asyncio

How to avoid writing `await` every time

Concurrency Loop in python

RuntimeError when running coroutine from init

Categories

Resources

Develop Reference

node.js excel linux python-3.x azure haskell apache-spark rust .htaccess string

Running coroutines in different thread with same event loop - python-3.x

Related

Best way to avoid warning about un-run coroutines that are not-yet run by cancelled tasks?

Feed ProcessPoolExecutor with results from asyncio

How to avoid writing `await` every time

Concurrency Loop in python

RuntimeError when running coroutine from __init__

Categories

Resources

RuntimeError when running coroutine from init