How does aiohttp signal to event loop for IO operation? - python-3.x

I somewhat understand how asyncio works in Python, I'm also would like to know how does the aiohttp package signal to the event loop that it's doing some IO operation, and the event loop should skip to the next task?
import aiohttp
import asyncio
async def fetch(session, url):
async with session.get(url) as response:
return await response.text()
async def main():
async with aiohttp.ClientSession() as session:
html = await fetch(session, 'http://python.org')
print(html)
loop = asyncio.get_event_loop()
loop.run_until_complete(main())

Related

Feed ProcessPoolExecutor with results from asyncio

I have a bunch of online data that I want to download and process efficiently. Downloading already takes some time but cpu-bound processing takes much longer. I struggle to implement a combination of async and ProcessPoolExecutor.
import asyncio
import time
import aiohttp
from aiohttp import ClientSession
from concurrent.futures import ProcessPoolExecutor
class WebData:
def __init__(self, url):
self.url = url
self.binary = b''
async def download(self, client):
time.sleep(0.2)
try:
async with client.get(self.url, timeout=5) as resp:
self.binary = await resp.read()
print(f'Downloaded {self.url}')
except (aiohttp.ClientConnectionError,
asyncio.exceptions.TimeoutError):
pass
return
def process(self):
print(f'Start processing {self.url}')
time.sleep(1)
print(f'Finished processing {self.url}')
async def main():
list_urls = [f'https://www.google.com/search?q={i}'
for i in range(10)]
list_obj = [WebData(url) for url in list_urls]
with ProcessPoolExecutor() as executor:
async with ClientSession() as session:
tasks = [obj.download(session) for obj in list_obj]
await asyncio.gather(*tasks)
list_futures = [
executor.submit(obj.process)
for obj in list_obj]
return list_futures
res = asyncio.run(main())
This works as expected but it fails to accomplish what I am looking for. It first downloads all data and starts processing it only afterwards, which leaves my cores idle during download. Is there any way I can pipe the downloaded objects to the executor while other objects are still downloading?
I found this thread but it isn't what I need.
You should submit the self.process inside after the coroutine ends. For that, you can have a separate asynchronous method that will await the download method and submit the process to ProcessPoolExecutor.
class WebData:
def __init__(self, url):
"""The code has not been changed"""
async def download(self, client):
"""The code has not been changed"""
def process(self):
"""The code has not been changed"""
async def execute(self, session, pool):
await self.download(session)
pool.submit(self.process)
async def main():
list_urls = [f'https://www.google.com/search?q={i}' for i in range(10)]
list_obj = [WebData(url) for url in list_urls]
with ProcessPoolExecutor() as pool:
async with ClientSession() as session:
list_futures = await asyncio.gather(*[obj.execute(session, pool) for obj in list_obj])
return list_futures

Python 3.7 Non-Blocking Request?

I'd like to do a non-blocking http request in Python 3.7. What I'm trying to do is described well in this SO post, but it doesn't yet have an accepted answer.
Here's my code so far:
import asyncio
from aiohttp import ClientSession
[.....]
async def call_endpoint_async(endpoint, data):
async with ClientSession() as session, session.post(url=endpoint, data=data) as result:
response = await result.read()
print(response)
return response
class CreateTestScores(APIView):
permission_classes = (IsAuthenticated,)
def post(self, request):
[.....]
asyncio.run(call_endpoint_async(url, data))
print('cp #1') # <== `async.io` BLOCKS -- PRINT STATEMENT DOESN'T RUN UNTIL `asyncio.run` RETURNS
What is the correct way to do an Ajax-style non-blocking http request in Python?
Asyncio makes it easy to make a non-blocking request if your program runs in asyncio. For example:
async def doit():
task = asyncio.create_task(call_endpoint_async(url, data))
print('cp #1')
await asyncio.sleep(1)
print('is it done?', task.done())
await task
print('now it is done')
But this requires that the "caller" be async as well. In your case you want the whole asyncio event loop to run in the background, so that. This can be achieved by running it in a separate thread, e.g.:
pool = concurrent.futures.ThreadPoolExecutor()
# ...
def post(self, request):
fut = pool.submit(asyncio.run, call_endpoint_async(url, data))
print('cp #1')
However, in that case you're not getting anything by using asyncio. Since you're using threads anyway, you could as well call a sync function such as requests.get() to begin with.

Fetching multiple urls with aiohttp in python

In a previous question, a user suggested the following approach for fetching multiple urls (API calls) with aiohttp:
import asyncio
import aiohttp
url_list = ['https://api.pushshift.io/reddit/search/comment/?q=Nestle&size=30&after=1530396000&before=1530436000', 'https://api.pushshift.io/reddit/search/comment/?q=Nestle&size=30&after=1530436000&before=1530476000']
async def fetch(session, url):
async with session.get(url) as response:
return await response.json()['data']
async def fetch_all(session, urls, loop):
results = await asyncio.gather(*[loop.create_task(fetch(session, url)) for url in urls], return_exceptions= True)
return results
if __name__=='__main__':
loop = asyncio.get_event_loop()
urls = url_list
with aiohttp.ClientSession(loop=loop) as session:
htmls = loop.run_until_complete(fetch_all(session, urls, loop))
print(htmls)
However, this results in only returning Attribute errors:
[AttributeError('__aexit__',), AttributeError('__aexit__',)]
(which I enabled, otherwhise it would just break). I really hope there is somebody here, who can help with this, it is still kind of hard to find resources for asyncio etc. The returned data is in json format. In the end I would like to put all json dicts in a list.
Working example:
import asyncio
import aiohttp
import ssl
url_list = ['https://api.pushshift.io/reddit/search/comment/?q=Nestle&size=30&after=1530396000&before=1530436000',
'https://api.pushshift.io/reddit/search/comment/?q=Nestle&size=30&after=1530436000&before=1530476000']
async def fetch(session, url):
async with session.get(url, ssl=ssl.SSLContext()) as response:
return await response.json()
async def fetch_all(urls, loop):
async with aiohttp.ClientSession(loop=loop) as session:
results = await asyncio.gather(*[fetch(session, url) for url in urls], return_exceptions=True)
return results
if __name__ == '__main__':
loop = asyncio.get_event_loop()
urls = url_list
htmls = loop.run_until_complete(fetch_all(urls, loop))
print(htmls)

Awaiting a coroutine inside of a Class while event_loop is already running

I have an Issue with asyncio I can't really get me head around.
take this working example (with Python 3.6+ because of string interpolation)
import asyncio
import aiohttp
import async_timeout
import json
async def fetch(session, url):
async with async_timeout.timeout(10):
async with session.get(url) as response:
return await response.text()
async def get_bittrex_marketsummary(currency_pair):
url = f'https://bittrex.com/api/v1.1/public/getmarketsummary?market={currency_pair}'
async with aiohttp.ClientSession() as session:
response = await fetch(session, url)
return json.loads(response)
class MyCryptoCurrency:
def __init__(self):
self.currency = "BTC-ETH"
self.last_price = None
asyncio.ensure_future(self.get_last_price())
async def get_last_price(self):
self.last_price = await get_bittrex_marketsummary(self.currency)
async def main():
eth = MyCryptoCurrency()
print(eth.last_price)
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
while this runs and doesn't throw any Exceptions, it doesn't get the result from the api request and so ... doesn't work :P
If I try to use f.e. loop.run_until_complete(get_bittrex_marketsummary()) I get "event loop is already running" error - which kind of makes sense.
Any hints how to solve this properly?
Thx in advance!
ok, after talking about this in #python channel on freenode I got the answer "don't do async I/O in __init__", so here is the working version:
import asyncio
import aiohttp
import async_timeout
import json
async def fetch(session, url):
async with async_timeout.timeout(10):
async with session.get(url) as response:
return await response.text()
async def get_bittrex_marketsummary(currency_pair):
url = f'https://bittrex.com/api/v1.1/public/getmarketsummary?market={currency_pair}'
async with aiohttp.ClientSession() as session:
response = await fetch(session, url)
return json.loads(response)
class MyCryptoCurrency:
def __init__(self):
self.currency = "BTC-ETH"
self.last_price = None
async def get_last_price(self):
self.last_price = await get_bittrex_marketsummary(self.currency)
async def main():
eth = MyCryptoCurrency()
await eth.get_last_price()
print(eth.last_price)
loop = asyncio.get_event_loop()
loop.run_until_complete(main())

How to Create Custom Events for Python Asyncio

The Mesos scheduler Marathon has an asynchronous HTTP API. E.g. when one deploys an app by posting a JSON to /v2/apps a deployment id is returned. The id can then be used to either poll the deployment state in /v2/deployments or by subscribing to /v2/events and look for the deployment_success event.
I would like to create an asynchronous Python client with coroutines. E.g. client.deploy_app(...) should return once the deployment_success event arrived but not block.
How can I implement these methods with asyncio? How can I create an event listener? It feels an event loop is made for this but I don't see how I register events.
Creating asynchronous post http requests that needed for /v2/apps can be done with aiohttp module:
import asyncio
import aiohttp
async def post(url, json):
async with aiohttp.ClientSession() as session:
async with session.post(url, json=json) as resp:
return await resp.json()
async def main():
res = await post('http://httpbin.org/post', {'test': 'object'})
print(res['json']) # {'test': 'object'}
loop = asyncio.get_event_loop()
try:
loop.run_until_complete(main())
loop.run_until_complete(loop.shutdown_asyncgens())
finally:
loop.close()
If you want to use /v2/events to track deployment success you should request for stream (see api doc). It can be achieved in aiohttp with it's asynchronous iteration: you just asynchronously read content line-by-line waiting for event you need, for example:
import asyncio
import aiohttp
async def stream(url):
async with aiohttp.ClientSession() as session:
async with session.get(url) as resp:
async for line in resp.content:
yield line
async def main():
async for line in stream('http://httpbin.org/stream/10'):
print(line) # check if line contains event you need
loop = asyncio.get_event_loop()
try:
loop.run_until_complete(main())
loop.run_until_complete(loop.shutdown_asyncgens())
finally:
loop.close()
If you want to use /v2/deployments you should request it periodically waiting some delay using asyncio.sleep. In this case your function won't be blocking:
import asyncio
import aiohttp
async def get(url):
async with aiohttp.ClientSession() as session:
async with session.get(url) as resp:
return await resp.json()
async def main():
while True:
# Make request to see if deplayment finished:
res = await get('http://httpbin.org/get')
print(res['origin']) # break if ok here
# Async wait before next try:
await asyncio.sleep(3)
loop = asyncio.get_event_loop()
try:
loop.run_until_complete(main())
loop.run_until_complete(loop.shutdown_asyncgens())
finally:
loop.close()

Resources