aiohttp says undisclosed client session even after awaiting on close - python-3.x

Consider following code fragment.
async def f():
http_client_session = aiohttp.ClientSession()
headers = {"developerkey": "somekey"}
body = {
"password": "somepassword",
"username": "someemail#gmail.com",
}
url = "https://localhost/login"
response_body = None
async with http_client_session.post(url, json=body, headers=headers) as response:
assert response.status == 200
response_body = await response.json()
await http_client_session.close()
return response_body()
The function f is awaited in another function. aiohttp gives the warning 'Unclosed client session' but I do not understand this as I have already awaited for it to close the session.

I've seen a similar issue previously in another project. Turns out it may be just that you need to allow some time for the session to fully close. For my project I added time.sleep(5) after the close statement to let the connection end.
See this ticket on aiohttp: https://github.com/aio-libs/aiohttp/issues/1925

Related

using httpx to send 100K get requests

I'm using the httpx library and asyncio to try and send about 100K of get requests.
I ran the code and received httpx.ConnectError so I opened wireshark and saw that I was getting a lot of messages saying TCP Retransmission TCP Port numbers reused
when I saw the data in wireshark and the error httpx.ConnectError I added limits = httpx.Limits(max_connections=10000) to limit the amount of active connections to 10,000 but I still get that error.
my code:
import asyncio
import json
import httpx
SOME_URL = "some url"
ANOTHER_URL = "another url"
MAX = 10000
async def search():
guids = [guid for guid in range(688001, 800000)] # 688001 - 838611
timeout = httpx.Timeout(None)
limits = httpx.Limits(max_connections=MAX)
async with httpx.AsyncClient(timeout=timeout, limits=limits) as client:
tasks = [client.get(f"{SOME_URL}{guid}", timeout=timeout) for guid in guids]
blob_list = await asyncio.gather(*tasks) # <---- error from here !!!!!
blob_list = [(res, guid) for res, guid in zip(blob_list, guids)]
guids = [guid for res, guid in blob_list]
blob_list = [json.loads(res.text)["blob_name"] for res, guid in blob_list]
async with httpx.AsyncClient(timeout=timeout, limits=limits) as client:
tasks = [client.get(f"{ANOTHER_URL}{blob}", timeout=timeout) for blob in blob_list]
game_results = await asyncio.gather(*tasks) # <---- error from here !!!!!
game_results = [(res, guid) for res, guid in zip(game_results, guids)]
game_results = [guid for res, guid in game_results]
print(game_results)
def main():
asyncio.run(search())
if __name__ == '__main__':
main()
this is a minimal version of my code there some steps in between the requests that I deleted, but I didn't touch the code that made the trouble, there are comments on the lines that I receive the errors (# <---- error from here !!!!!).
does anyone know how to solve this? or another way to send about 100K of get requests fast?
I managed to solve my problem with the following code:
(this is not the entire code, only the parts needed to send the requests, I have some stuff in between)
import asyncio
from aiohttp import ClientSession
SOME_URL = "some url"
ANOTHER_URL = "another url"
MAX_SIM_CONNS = 50
worker_responses = []
async def fetch(url, session):
async with session.get(url) as response:
return await response.read()
async def fetch_worker(url_queue: asyncio.Queue):
global worker_responses
async with ClientSession() as session:
while True:
url = await url_queue.get()
try:
if url is None:
return
response = await fetch(url, session)
worker_responses.append(response)
finally:
url_queue.task_done()
# calling task_done() is necessary for the url_queue.join() to work correctly
async def fetch_all(base_url: str, range_: range):
url_queue = asyncio.Queue(maxsize=10000)
worker_tasks = []
for i in range(MAX_SIM_CONNS):
wt = asyncio.create_task(fetch_worker(url_queue))
worker_tasks.append(wt)
for i in range_:
await url_queue.put(f"{base_url}{i}")
for i in range(MAX_SIM_CONNS):
# tell the workers that the work is done
await url_queue.put(None)
await url_queue.join()
await asyncio.gather(*worker_tasks)
if __name__ == '__main__':
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
asyncio.run(fetch_all(SOME_URL, range(680_842, 840_423)))
print(worker_responses)
I used aiohttp instead of httpx and used asyncio.Queue to reduce RAM usage and it worked for me.

Failure on unit tests with pytest, tornado and aiopg, any query fail

I've a REST API running on Python 3.7 + Tornado 5, with postgresql as database, using aiopg with SQLAlchemy core (via the aiopg.sa binding). For the unit tests, I use py.test with pytest-tornado.
All the tests go ok as soon as no query to the database is involved, where I'd get this:
Runtime error: Task cb=[IOLoop.add_future..() at venv/lib/python3.7/site-packages/tornado/ioloop.py:719]> got Future attached to a different loop
The same code works fine out of the tests, I'm capable of handling 100s of requests so far.
This is part of an #auth decorator which will check the Authorization header for a JWT token, decode it and get the user's data and attach it to the request; this is the part for the query:
partner_id = payload['partner_id']
provided_scopes = payload.get("scope", [])
for scope in scopes:
if scope not in provided_scopes:
logger.error(
'Authentication failed, scopes are not compliant - '
'required: {} - '
'provided: {}'.format(scopes, provided_scopes)
)
raise ForbiddenException(
"insufficient permissions or wrong user."
)
db = self.settings['db']
partner = await Partner.get(db, username=partner_id)
# The user is authenticated at this stage, let's add
# the user info to the request so it can be used
if not partner:
raise UnauthorizedException('Unknown user from token')
p = Partner(**partner)
setattr(self.request, "partner_id", p.uuid)
setattr(self.request, "partner", p)
The .get() async method from Partner comes from the Base class for all models in the app. This is the .get method implementation:
#classmethod
async def get(cls, db, order=None, limit=None, offset=None, **kwargs):
"""
Get one instance that will match the criteria
:param db:
:param order:
:param limit:
:param offset:
:param kwargs:
:return:
"""
if len(kwargs) == 0:
return None
if not hasattr(cls, '__tablename__'):
raise InvalidModelException()
tbl = cls.__table__
instance = None
clause = cls.get_clause(**kwargs)
query = (tbl.select().where(text(clause)))
if order:
query = query.order_by(text(order))
if limit:
query = query.limit(limit)
if offset:
query = query.offset(offset)
logger.info(f'GET query executing:\n{query}')
try:
async with db.acquire() as conn:
async with conn.execute(query) as rows:
instance = await rows.first()
except DataError as de:
[...]
return instance
The .get() method above will either return a model instance (row representation) or None.
It uses the db.acquire() context manager, as described in aiopg's doc here: https://aiopg.readthedocs.io/en/stable/sa.html.
As described in this same doc, the sa.create_engine() method returns a connection pool, so the db.acquire() just uses one connection from the pool. I'm sharing this pool to every request in Tornado, they use it to perform the queries when they need it.
So this is the fixture I've set up in my conftest.py:
#pytest.fixture
async def db():
dbe = await setup_db()
return dbe
#pytest.fixture
def app(db, event_loop):
"""
Returns a valid testing Tornado Application instance.
:return:
"""
app = make_app(db)
settings.JWT_SECRET = 'its_secret_one'
return app
I can't find an explanation of why this is happening; Tornado's doc and source makes it clear that asyncIO event loop is used as default, and by debugging it I can see the event loop is indeed the same one, but for some reason it seems to get closed or stopped abruptly.
This is one test that fails:
#pytest.mark.gen_test(timeout=2)
def test_score_returns_204_empty(app, http_server, http_client, base_url):
score_url = '/'.join([base_url, URL_PREFIX, 'score'])
token = create_token('test', scopes=['score:get'])
headers = {
'Authorization': f'Bearer {token}',
'Accept': 'application/json',
}
response = yield http_client.fetch(score_url, headers=headers, raise_error=False)
assert response.code == 204
This test fails as it returns 401 instead of 204, given the query on the auth decorator fails due to the RuntimeError, which returns then an Unauthorized response.
Any idea from the async experts here will be very appreciated, I'm quite lost on this!!!
Well, after a lot of digging, testing and, of course, learning quite a lot about asyncio, I made it work myself. Thanks for the suggestions so far.
The issue was that the event_loop from asyncio was not running; as #hoefling mentioned, pytest itself does not support coroutines, but pytest-asyncio brings such a useful feature to your tests. This is very well explained here: https://medium.com/ideas-at-igenius/testing-asyncio-python-code-with-pytest-a2f3628f82bc
So, without pytest-asyncio, your async code that needs to be tested will look like this:
def test_this_is_an_async_test():
loop = asyncio.get_event_loop()
result = loop.run_until_complete(my_async_function(param1, param2, param3)
assert result == 'expected'
We use loop.run_until_complete() as, otherwise, the loop will never be running, as this is the way asyncio works by default (and pytest makes nothing to make it work differently).
With pytest-asyncio, your test works with the well-known async / await parts:
async def test_this_is_an_async_test(event_loop):
result = await my_async_function(param1, param2, param3)
assert result == 'expected'
pytest-asyncio in this case wraps the run_until_complete() call above, summarizing it heavily, so the event loop will run and be available for your async code to use it.
Please note: the event_loop parameter in the second case is not even necessary here, pytest-asyncio gives one available for your test.
On the other hand, when you are testing your Tornado app, you usually need to get a http server up and running during your tests, listening in a well-known port, etc., so the usual way goes by writing fixtures to get a http server, base_url (usually http://localhost:, with an unused port, etc etc).
pytest-tornado comes up as a very useful one, as it offers several of these fixtures for you: http_server, http_client, unused_port, base_url, etc.
Also to mention, it gets a pytest mark's gen_test() feature, which converts any standard test to use coroutines via yield, and even to assert it will run with a given timeout, like this:
#pytest.mark.gen_test(timeout=3)
def test_fetch_my_data(http_client, base_url):
result = yield http_client.fetch('/'.join([base_url, 'result']))
assert len(result) == 1000
But, this way it does not support async / await, and actually only Tornado's ioloop will be available via the io_loop fixture (although Tornado's ioloop uses by default asyncio underneath from Tornado 5.0), so you'd need to combine both pytest.mark.gen_test and pytest.mark.asyncio, but in the right order! (which I did fail).
Once I understood better what could be the problem, this was the next approach:
#pytest.mark.gen_test(timeout=2)
#pytest.mark.asyncio
async def test_score_returns_204_empty(http_client, base_url):
score_url = '/'.join([base_url, URL_PREFIX, 'score'])
token = create_token('test', scopes=['score:get'])
headers = {
'Authorization': f'Bearer {token}',
'Accept': 'application/json',
}
response = await http_client.fetch(score_url, headers=headers, raise_error=False)
assert response.code == 204
But this is utterly wrong, if you understand how Python's decorator wrappers work. With the code above, pytest-asyncio's coroutine is then wrapped in a pytest-tornado yield gen.coroutine, which won't get the event-loop running... so my tests were still failing with the same problem. Any query to the database were returning a Future waiting for an event loop to be running.
My updated code once I made myself up of the silly mistake:
#pytest.mark.asyncio
#pytest.mark.gen_test(timeout=2)
async def test_score_returns_204_empty(http_client, base_url):
score_url = '/'.join([base_url, URL_PREFIX, 'score'])
token = create_token('test', scopes=['score:get'])
headers = {
'Authorization': f'Bearer {token}',
'Accept': 'application/json',
}
response = await http_client.fetch(score_url, headers=headers, raise_error=False)
assert response.code == 204
In this case, the gen.coroutine is wrapped inside the pytest-asyncio coroutine, and the event_loop runs the coroutines as expected!
But there were still a minor issue that took me a little while to realize, too; pytest-asyncio's event_loop fixture creates for every test a new event loop, while pytest-tornado creates too a new IOloop. And the tests were still failing, but this time with a different error.
The conftest.py file now looks like this; please note I've re-declared the event_loop fixture to use the event_loop from pytest-tornado io_loop fixture itself (please recall pytest-tornado creates a new io_loop on each test function):
#pytest.fixture(scope='function')
def event_loop(io_loop):
loop = io_loop.current().asyncio_loop
yield loop
loop.stop()
#pytest.fixture(scope='function')
async def db():
dbe = await setup_db()
yield dbe
#pytest.fixture
def app(db):
"""
Returns a valid testing Tornado Application instance.
:return:
"""
app = make_app(db)
settings.JWT_SECRET = 'its_secret_one'
yield app
Now all my tests work, I'm back a happy man and very proud of my now better understanding of the asyncio way of life. Cool!

How to gather task results in Trio?

I wrote a script that uses a nursery and the asks module to loop through and call an API based upon the loop variables. I get responses but don't know how to return the data like you would with asyncio.
I also have a question on limiting the APIs to 5 per second.
from datetime import datetime
import asks
import time
import trio
asks.init("trio")
s = asks.Session(connections=4)
async def main():
start_time = time.time()
api_key = 'API-KEY'
org_id = 'ORG-ID'
networkIds = ['id1','id2','idn']
url = 'https://api.meraki.com/api/v0/networks/{0}/airMarshal?timespan=3600'
headers = {'X-Cisco-Meraki-API-Key': api_key, 'Content-Type': 'application/json'}
async with trio.open_nursery() as nursery:
for i in networkIds:
nursery.start_soon(fetch, url.format(i), headers)
print("Total time:", time.time() - start_time)
async def fetch(url, headers):
print("Start: ", url)
response = await s.get(url, headers=headers)
print("Finished: ", url, len(response.content), response.status_code)
if __name__ == "__main__":
trio.run(main)
When I run nursery.start_soon(fetch...) , I am printing data within fetch, but how do I return the data? I didn't see anything similar to asyncio.gather(*tasks) function.
Also, I can limit the number of sessions to 1-4, which helps get down below the 5 API per second limit, but was wondering if there was a built in way to ensure that no more than 5 APIs get called in any given second?
Returning data: pass the networkID and a dict to the fetch tasks:
async def main():
…
results = {}
async with trio.open_nursery() as nursery:
for i in networkIds:
nursery.start_soon(fetch, url.format(i), headers, results, i)
## results are available here
async def fetch(url, headers, results, i):
print("Start: ", url)
response = await s.get(url, headers=headers)
print("Finished: ", url, len(response.content), response.status_code)
results[i] = response
Alternately, create a trio.Queue to which you put the results; your main task can then read the results from the queue.
API limit: create a trio.Queue(10) and start a task along these lines:
async def limiter(queue):
while True:
await trio.sleep(0.2)
await queue.put(None)
Pass that queue to fetch, as another argument, and call await limit_queue.get() before each API call.
Based on this answers, you can define the following function:
async def gather(*tasks):
async def collect(index, task, results):
task_func, *task_args = task
results[index] = await task_func(*task_args)
results = {}
async with trio.open_nursery() as nursery:
for index, task in enumerate(tasks):
nursery.start_soon(collect, index, task, results)
return [results[i] for i in range(len(tasks))]
You can then use trio in the exact same way as asyncio by simply patching trio (adding the gather function):
import trio
trio.gather = gather
Here is a practical example:
async def child(x):
print(f"Child sleeping {x}")
await trio.sleep(x)
return 2*x
async def parent():
tasks = [(child, t) for t in range(3)]
return await trio.gather(*tasks)
print("results:", trio.run(parent))
Technically, trio.Queue has been deprecated in trio 0.9. It has been replaced by trio.open_memory_channel.
Short example:
sender, receiver = trio.open_memory_channel(len(networkIds)
async with trio.open_nursery() as nursery:
for i in networkIds:
nursery.start_soon(fetch, sender, url.format(i), headers)
async for value in receiver:
# Do your job here
pass
And in your fetch function you should call async sender.send(value) somewhere.
When I run nursery.start_soon(fetch...) , I am printing data within fetch, but how do I return the data? I didn't see anything similar to asyncio.gather(*tasks) function.
You're asking two different questions, so I'll just answer this one. Matthias already answered your other question.
When you call start_soon(), you are asking Trio to run the task in the background, and then keep going. This is why Trio is able to run fetch() several times concurrently. But because Trio keeps going, there is no way to "return" the result the way a Python function normally would. where would it even return to?
You can use a queue to let fetch() tasks send results to another task for additional processing.
To create a queue:
response_queue = trio.Queue()
When you start your fetch tasks, pass the queue as an argument and send a sentintel to the queue when you're done:
async with trio.open_nursery() as nursery:
for i in networkIds:
nursery.start_soon(fetch, url.format(i), headers)
await response_queue.put(None)
After you download a URL, put the response into the queue:
async def fetch(url, headers, response_queue):
print("Start: ", url)
response = await s.get(url, headers=headers)
# Add responses to queue
await response_queue.put(response)
print("Finished: ", url, len(response.content), response.status_code)
With the changes above, your fetch tasks will put responses into the queue. Now you need to read responses from the queue so you can process them. You might add a new function to do this:
async def process(response_queue):
async for response in response_queue:
if response is None:
break
# Do whatever processing you want here.
You should start this process function as a background task before you start any fetch tasks so that it will process responses as soon as they are received.
Read more in the Synchronizing and Communicating Between Tasks section of the Trio documentation.

Getting a file without saving it aiohttp discord.py

So i have a command where it sends whatever the user said after the command to a website api and sends the file the site generates. However i'm changing over to aiohttp as it doesn't block like the standered requests functions
This is how i do it with normal requests and it works fine:
elif (data[0].lower() == ">signgirl"):
await bot.send_typing(message.channel)
tmp = message.content.replace(">signgirl", "")
m = hashlib.md5()
m.update(tmp.encode('utf-8'))
print(tmp, m.hexdigest())
r = requests.post("http://localhost/sign.php", stream=True, data={'text': tmp})
if (r.status_code() == 200):
await bot.send_file(destination=message.channel, filename=str(m.hexdigest()+".png"), fp=r.raw)
However when i try with aiohttp i have no idea how to actually get the raw file data..
So i made this function to get it. but it doesn't let me return an image and i cannot check the http status code without it causing an error.
async def post_data2(url, payload):
async with aiohttp.ClientSession() as session2:
async with session2.post(url, data=payload) as response2:
output = {}
output['data'] = await Image.open(BytesIO(response2.read()))
output['status'] = 200 #await str(response2.status()) #Why is this object not callable?
return output
How else could i do this? Is this possible? aiohttp doesn't seem as easy to understand.
Mister Day "V" Own from the discord.py discord server sent a perfect example of getting and sending the data
async with aiohttp.ClientSession() as session:
# or use a session you already have
async with session.get("http://example.com") as resp:
buffer = io.BytesIO(await resp.read())
# buffer is a file-like
await client.send_file(channel, fp=buffer, filename="whatever")

How do I implement synchronously waiting for a callback in Python 3 asyncio?

Due to some unusual constraints, I need to synchronously wait for a callback URL from another service before returning a response. Currently I have something resembling:
ROUTE = '/operation'
async def post(self):
##SOME OPERATIONS##
post_body = { 'callbackUrl' : 'myservice.com/cb' }
response = await other_service.post('/endpoint')
global my_return_value
my_return_value = None
while not my_return_value:
pass
return self.make_response(my_return_value)
Then I have a way to handle the callback URL something like:
ROUTE = '/cb'
async def post(self):
##OPERATIONS###
global my_return_value
my_return_value = some_value
return web.json_response()
The problem with this code is that it forever gets trapped in that while loop forever even if the callback URL gets invoked. I suspect there is a better way to do this, but I'm not sure how to go about it nor how to google for it. Any ideas?
Thanks in advance!
Just a quick scan, but I think you're trapped in
while not my_return_value:
pass
Python will be trapped there and not have time to deal with the callback function. What you need is
while not my_return_value:
await asyncio.sleep(1)
(or you can even do an asyncio.sleep(0) if you don't want the millisecond delay).
An even nicer way would be (and now I'm writing from memory, no guarantees...):
my_return_value = asyncio.get_event_loop().create_future()
await my_return_value
return self.make_response(my_return_value.result())
async def post(self):
##OPERATIONS###
my_return_value.set_result(some_value)
return web.json_response()
Note however that either way will break very much if there is ever more than one concurrent use of this system. It feels very fragile! Maybe even better:
ROUTE = '/operation'
my_return_value = {}
async def post(self):
##SOME OPERATIONS##
token = "%016x" % random.SystemRandom().randint(0, 2**128)
post_body = { 'callbackUrl' : 'myservice.com/cb?token='+token }
response = await other_service.post('/endpoint')
my_return_value[token] = asyncio.get_event_loop().create_future()
await my_return_value[token]
result = my_return_value[token].result()
del my_return_value[token]
return self.make_response(result)
async def post(self):
##OPERATIONS###
token = self.arguments("token")
my_return_value[token].set_result(some_value)
return web.json_response()
Now cherry on top would be a timer that would cancel the future after a timeout and clean up the entry in my_return_value after a while if the callback does not happen. Also, if you're going with my last suggestion, don't call it my_return_value but something like callback_future_by_token...

Resources