How to create delegate/nested async context manager for aiohttp? - python-3.5

I want to create custom request manager for crawler with dynamic waiting.
My crawler need to make requests to sites which prohibit parallel requests from same ip address. If such blocking occurs requests returns with HTTP error codes 403, 503, 429, etc.
In case of error I want to wait some time and repeat request. But for simplicity of parsers they just call get and receive correct page.
I want to use aiohttp and new async with syntax of Python 3.5 so my parsers classes can use async with for my requester class same way if they used aiohttp.ClientSession like this:
# somewhere in a parser
async def get_page(self, requester, page_index):
async with requester.get(URL_FMT.format(page_index)) as response:
html_content = await response.read()
result = self.parsing_page(html_content)
return result
if requester is aiohttp.ClientSession, then response is aiohtpp.ClientResponse which have __aenter__ and __aexit__ methods, so async with working as expected.
But if I put my requester class in the middle it is not working anymore.
Traceback (most recent call last):
File "/opt/project/api/tornado_runner.py", line 6, in <module>
from api import app
File "/opt/project/api/api.py", line 20, in <module>
loop.run_until_complete(session.login())
File "/usr/local/lib/python3.5/asyncio/base_events.py", line 337, in run_until_complete
return future.result()
File "/usr/local/lib/python3.5/asyncio/futures.py", line 274, in result
raise self._exception
File "/usr/local/lib/python3.5/asyncio/tasks.py", line 239, in _step
result = coro.send(None)
File "/opt/project/api/viudata/session.py", line 72, in login
async with self.get('https://www.viudata.com') as resp:
AttributeError: __aexit__
Unclosed client session
client_session: <aiohttp.client.ClientSession object at 0x7f44f61ef240>
It is looking like this.
class Requester:
def __init__(self, session: aiohttp.ClientSession):
self.session = session
async def get(self, *args, **kwargs):
is_result_successful = False
while not is_result_successful:
response = await self.session.get(*args, **kwargs)
if response.status in [503, 403, 429]:
await self.wait_some_time()
else:
is_result_successful = True
return response
From my understanding self.session.get is coroutine function so I will await it. Result is aiohttp.ClientResponse which have __aenter__ or __aexit__. But if return it parser's code of async with block return odd error.
Can you say what I need to replace to with my requester class as with aiohttp.ClientSession?

You should write additional code to support async with protocol.
See client.request() and _RequestContextManager for inspiration.

Related

How to use asyncio and aioredis lock inside celery tasks?

Goal:
Possibility to run asyncio coroutines.
Correct celery behavior on exceptions and task retries.
Possibility to use aioredis lock.
So, how to run async tasks properly to achieve the goal?
What is RuntimeError: await wasn't used with future (below), how can I fix it?
I have already tried:
1. asgiref
async_to_sync (from asgiref https://pypi.org/project/asgiref/).
This option makes it possible to run asyncio coroutines, but retries functionality doesn't work.
2. celery-pool-asyncio
(https://pypi.org/project/celery-pool-asyncio/)
Same problem as in asgiref. (This option makes it possible to run asyncio coroutines, but retries functionality doesn't work.)
3. write own async to sync decorator
I have performed try to create my own decorator like async_to_sync that runs coroutines threadsafe (asyncio.run_coroutine_threadsafe), but I have behavior as I described above.
4. asyncio module
Also I have try asyncio.run() or asyncio.get_event_loop().run_until_complete() (and self.retry(...)) inside celery task. This works well, tasks runs, retries works, but there is incorrect coroutine execution - inside async function I cannot use aioredis.
Implementation notes:
start celery command: celery -A celery_test.celery_app worker -l info -n worker1 -P gevent --concurrency=10 --without-gossip --without-mingle
celery app:
transport = f"redis://localhost/9"
celery_app = Celery("worker", broker=transport, backend=transport,
include=['tasks'])
celery_app.conf.broker_transport_options = {
'visibility_timeout': 60 * 60 * 24,
'fanout_prefix': True,
'fanout_patterns': True
}
utils:
#contextmanager
def temp_asyncio_loop():
# asyncio.get_event_loop() automatically creates event loop only for main thread
try:
prev_loop = asyncio.get_event_loop()
except RuntimeError:
prev_loop = None
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
try:
yield loop
finally:
loop.stop()
loop.close()
del loop
asyncio.set_event_loop(prev_loop)
def with_temp_asyncio_loop(f):
#functools.wraps(f)
def wrapper(*args, **kwargs):
with temp_asyncio_loop() as t_loop:
return f(*args, loop=t_loop, **kwargs)
return wrapper
def await_(coro):
return asyncio.get_event_loop().run_until_complete(coro)
tasks:
#celery_app.task(bind=True, max_retries=30, default_retry_delay=0)
#with_temp_asyncio_loop
def debug(self, **kwargs):
try:
await_(debug_async())
except Exception as exc:
self.retry(exc=exc)
async def debug_async():
async with RedisLock(f'redis_lock_{datetime.now()}'):
pass
redis lock
class RedisLockException(Exception):
pass
class RedisLock(AsyncContextManager):
"""
Redis Lock class
:param lock_id: string (unique key)
:param value: dummy value
:param expire: int (time in seconds that key will storing)
:param expire_on_delete: int (time in seconds, set pause before deleting)
Usage:
try:
with RedisLock('123_lock', 5 * 60):
# do something
except RedisLockException:
"""
def __init__(self, lock_id: str, value='1', expire: int = 4, expire_on_delete: int = None):
self.lock_id = lock_id
self.expire = expire
self.value = value
self.expire_on_delete = expire_on_delete
async def acquire_lock(self):
return await redis.setnx(self.lock_id, self.value)
async def release_lock(self):
if self.expire_on_delete is None:
return await redis.delete(self.lock_id)
else:
await redis.expire(self.lock_id, self.expire_on_delete)
async def __aenter__(self, *args, **kwargs):
if not await self.acquire_lock():
raise RedisLockException({
'redis_lock': 'The process: {} still run, try again later'.format(await redis.get(self.lock_id))
})
await redis.expire(self.lock_id, self.expire)
async def __aexit__(self, exc_type, exc_value, traceback):
await self.release_lock()
On my windows machine await redis.setnx(...) blocks celery worker and it stops producing logs and Ctrl+C doesn't work.
Inside the docker container, I receive an error. There is part of traceback:
Traceback (most recent call last):
File "/usr/local/lib/python3.9/site-packages/aioredis/connection.py", line 854, in read_response
response = await self._parser.read_response()
File "/usr/local/lib/python3.9/site-packages/aioredis/connection.py", line 366, in read_response
raise ConnectionError(SERVER_CLOSED_CONNECTION_ERROR)
aioredis.exceptions.ConnectionError: Connection closed by server.
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/local/lib/python3.9/site-packages/celery/app/trace.py", line 451, in trace_task
R = retval = fun(*args, **kwargs)
File "/usr/local/lib/python3.9/site-packages/celery/app/trace.py", line 734, in __protected_call__
return self.run(*args, **kwargs)
File "/usr/local/lib/python3.9/site-packages/celery/app/autoretry.py", line 54, in run
ret = task.retry(exc=exc, **retry_kwargs)
File "/usr/local/lib/python3.9/site-packages/celery/app/task.py", line 717, in retry
raise_with_context(exc)
File "/usr/local/lib/python3.9/site-packages/celery/app/autoretry.py", line 34, in run
return task._orig_run(*args, **kwargs)
File "/app/celery_tasks/tasks.py", line 69, in wrapper
return f(*args, **kwargs) # <--- inside with_temp_asyncio_loop from utils
...
File "/usr/local/lib/python3.9/contextlib.py", line 575, in enter_async_context
result = await _cm_type.__aenter__(cm)
File "/app/db/redis.py", line 50, in __aenter__
if not await self.acquire_lock():
File "/app/db/redis.py", line 41, in acquire_lock
return await redis.setnx(self.lock_id, self.value)
File "/usr/local/lib/python3.9/site-packages/aioredis/client.py", line 1064, in execute_command
return await self.parse_response(conn, command_name, **options)
File "/usr/local/lib/python3.9/site-packages/aioredis/client.py", line 1080, in parse_response
response = await connection.read_response()
File "/usr/local/lib/python3.9/site-packages/aioredis/connection.py", line 859, in read_response
await self.disconnect()
File "/usr/local/lib/python3.9/site-packages/aioredis/connection.py", line 762, in disconnect
await self._writer.wait_closed()
File "/usr/local/lib/python3.9/asyncio/streams.py", line 359, in wait_closed
await self._protocol._get_close_waiter(self)
RuntimeError: await wasn't used with future
library versions:
celery==5.2.1
aioredis==2.0.0
Use solo pool, then create a decorator which run task function
asyncio.get_event_loop().run_until_complete(f(*args, **kwargs)) and make your task asynchronous
def sync(f):
#functools.wraps(f)
def wrapper(*args, **kwargs):
return asyncio.get_event_loop().run_until_complete(f(*args, **kwargs))
return wrapper
#celery_app.task()
#sync
async def task():
...
Maybe it helps. https://github.com/aio-libs/aioredis-py/issues/1273
The main point is:
replace all the calls to get_event_loop to get_running_loop
which would remove that Runtime exception when a future is attached to
a different loop.

FSM doesn't work when I run aiogram with fastapi

I have a problem with fsm using aiogram with Fastapi. I ran the code from aiogram_fsm_example, but changed the long-polling to the Fastapi implementation. Here's the code I've got:
import logging
from fastapi import FastAPI, Request
import aiogram.utils.markdown as md
from aiogram import Bot, Dispatcher, types
from aiogram.contrib.fsm_storage.memory import MemoryStorage
from aiogram.dispatcher import FSMContext
from aiogram.dispatcher.filters import Text
from aiogram.dispatcher.filters.state import State, StatesGroup
from aiogram.types import ParseMode
from aiogram.utils import executor
logging.basicConfig(level=logging.INFO)
API_TOKEN = "here's the bot token"
bot = Bot(token=API_TOKEN)
# For example use simple MemoryStorage for Dispatcher.
storage = MemoryStorage()
dp = Dispatcher(bot, storage=storage)
# States
class Form(StatesGroup):
name = State() # Will be represented in storage as 'Form:name'
age = State() # Will be represented in storage as 'Form:age'
gender = State() # Will be represented in storage as 'Form:gender'
#dp.message_handler(commands='start')
async def cmd_start(message: types.Message):
"""
Conversation's entry point
"""
# Set state
await Form.name.set()
await message.reply("Hi there! What's your name?")
#dp.message_handler(state=Form.name)
async def process_name(message: types.Message, state: FSMContext):
"""
Process user name
"""
async with state.proxy() as data:
data['name'] = message.text
await Form.next()
await message.reply("How old are you?")
# Check age. Age gotta be digit
#dp.message_handler(lambda message: not message.text.isdigit(), state=Form.age)
async def process_age_invalid(message: types.Message):
"""
If age is invalid
"""
return await message.reply("Age gotta be a number.\nHow old are you? (digits only)")
#dp.message_handler(lambda message: message.text.isdigit(), state=Form.age)
async def process_age(message: types.Message, state: FSMContext):
# Update state and data
await Form.next()
await state.update_data(age=int(message.text))
# Configure ReplyKeyboardMarkup
markup = types.ReplyKeyboardMarkup(resize_keyboard=True, selective=True)
markup.add("Male", "Female")
markup.add("Other")
await message.reply("What is your gender?", reply_markup=markup)
#dp.message_handler(lambda message: message.text not in ["Male", "Female", "Other"], state=Form.gender)
async def process_gender_invalid(message: types.Message):
"""
In this example gender has to be one of: Male, Female, Other.
"""
return await message.reply("Bad gender name. Choose your gender from the keyboard.")
#dp.message_handler(state=Form.gender)
async def process_gender(message: types.Message, state: FSMContext):
async with state.proxy() as data:
data['gender'] = message.text
# Remove keyboard
markup = types.ReplyKeyboardRemove()
# And send message
await bot.send_message(
message.chat.id,
md.text(
md.text('Hi! Nice to meet you,', md.bold(data['name'])),
md.text('Age:', md.code(data['age'])),
md.text('Gender:', data['gender']),
sep='\n',
),
reply_markup=markup,
parse_mode=ParseMode.MARKDOWN,
)
# Finish conversation
await state.finish()
# my changes
app = FastAPI()
#app.get("/")
async def root():
return "ok"
#app.post("/")
async def process_update(request: Request):
update = await request.json()
update = types.Update(**update)
print("incoming", update)
await dp.process_update(update)
But when I run that with uvicorn (uvicorn main:app) and send /start command to the bot, the backend throws this error:
ERROR: Exception in ASGI application
Traceback (most recent call last):
File "/home/oleh/projects/tg_bot_test_fsm/.venv/lib/python3.8/site-packages/uvicorn/protocols/http/h11_impl.py", line 373, in run_asgi
result = await app(self.scope, self.receive, self.send)
File "/home/oleh/projects/tg_bot_test_fsm/.venv/lib/python3.8/site-packages/uvicorn/middleware/proxy_headers.py", line 75, in __call__
return await self.app(scope, receive, send)
File "/home/oleh/projects/tg_bot_test_fsm/.venv/lib/python3.8/site-packages/fastapi/applications.py", line 208, in __call__
await super().__call__(scope, receive, send)
File "/home/oleh/projects/tg_bot_test_fsm/.venv/lib/python3.8/site-packages/starlette/applications.py", line 112, in __call__
await self.middleware_stack(scope, receive, send)
File "/home/oleh/projects/tg_bot_test_fsm/.venv/lib/python3.8/site-packages/starlette/middleware/errors.py", line 181, in __call__
raise exc
File "/home/oleh/projects/tg_bot_test_fsm/.venv/lib/python3.8/site-packages/starlette/middleware/errors.py", line 159, in __call__
await self.app(scope, receive, _send)
File "/home/oleh/projects/tg_bot_test_fsm/.venv/lib/python3.8/site-packages/starlette/exceptions.py", line 82, in __call__
raise exc
File "/home/oleh/projects/tg_bot_test_fsm/.venv/lib/python3.8/site-packages/starlette/exceptions.py", line 71, in __call__
await self.app(scope, receive, sender)
File "/home/oleh/projects/tg_bot_test_fsm/.venv/lib/python3.8/site-packages/starlette/routing.py", line 656, in __call__
await route.handle(scope, receive, send)
File "/home/oleh/projects/tg_bot_test_fsm/.venv/lib/python3.8/site-packages/starlette/routing.py", line 259, in handle
await self.app(scope, receive, send)
File "/home/oleh/projects/tg_bot_test_fsm/.venv/lib/python3.8/site-packages/starlette/routing.py", line 61, in app
response = await func(request)
File "/home/oleh/projects/tg_bot_test_fsm/.venv/lib/python3.8/site-packages/fastapi/routing.py", line 226, in app
raw_response = await run_endpoint_function(
File "/home/oleh/projects/tg_bot_test_fsm/.venv/lib/python3.8/site-packages/fastapi/routing.py", line 159, in run_endpoint_function
return await dependant.call(**values)
File "/home/oleh/projects/tg_bot_test_fsm/./main.py", line 124, in process_update
await dp.process_update(update)
File "/home/oleh/projects/tg_bot_test_fsm/.venv/lib/python3.8/site-packages/aiogram/dispatcher/dispatcher.py", line 257, in process_update
return await self.message_handlers.notify(update.message)
File "/home/oleh/projects/tg_bot_test_fsm/.venv/lib/python3.8/site-packages/aiogram/dispatcher/handler.py", line 116, in notify
response = await handler_obj.handler(*args, **partial_data)
File "/home/oleh/projects/tg_bot_test_fsm/./main.py", line 38, in cmd_start
await Form.name.set()
File "/home/oleh/projects/tg_bot_test_fsm/.venv/lib/python3.8/site-packages/aiogram/dispatcher/filters/state.py", line 56, in set
state = Dispatcher.get_current().current_state()
AttributeError: 'NoneType' object has no attribute 'current_state'
As far as I understood: there's a state in the dispatcher that is not created somewhy when I use the dp.process_update() function.
When I run that with long_polling - everything works fine, but I need so much to run it with Fastapi.
Is there a way to set up the state manually? Or I just do not process it correctly?
P.S. I run it in the WSL Ubuntu 20.04 LTS. Python version is 3.8.10, aiogram - 2.15, uvicorn - 0.15.0 and Fastapi - 0.70.0.
SOLVED: if you're using Fastapi with aiogram and trying FSM, you need to set state in another way, with state.set_state(Form.name) function. My working code of start method looks like that:
#dp.message_handler(commands='start', state="*")
async def cmd_start(message: types.Message, state: FSMContext):
"""
Conversation's entry point
"""
# Set state
await state.set_state(Form.name)
await message.reply("Hi there! What's your name?")
It's enough to set current context for Dispatcher
dp = Dispatcher(bot, storage=storage)
Dispatcher.set_current(dp)

What is the proper way of catching http error connection with Python module aiohttp?

I want to write a simple script that checks to see if website is up. If it is not, I want to catch the http return error code using the aiohttp module for Python. In the example below, I pass in a fake website 'http://www.googlesr2332.com' rather than returning the http error, I am getting the following:
Traceback (most recent call last):
File "/home/runner/.local/share/virtualenvs/python3/lib/python3.7/site-packages/aiohttp/connector.py", l
ine 967, in _create_direct_connection traces=traces), loop=self._loop)
File "/home/runner/.local/share/virtualenvs/python3/lib/python3.7/site-packages/aiohttp/connector.py", l
ine 830, in _resolve_host
self._resolver.resolve(host, port, family=self._family) File "/home/runner/.local/share/virtualenvs/python3/lib/python3.7/site-packages/aiohttp/resolver.py", li
ne 30, in resolve
host, port, type=socket.SOCK_STREAM, family=family)
File "/usr/local/lib/python3.7/asyncio/base_events.py", line 784, in getaddrinfo
None, getaddr_func, host, port, family, type, proto, flags)
File "/usr/local/lib/python3.7/concurrent/futures/thread.py", line 57, in run
result = self.fn(*self.args, **self.kwargs)
File "/usr/local/lib/python3.7/socket.py", line 748, in getaddrinfo
for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
socket.gaierror: [Errno -2] Name or service not known
The above exception was the direct cause of the following exception:
Traceback (most recent call last): File "main.py", line 19, in <module>
loop.run_until_complete(main())
File "/usr/local/lib/python3.7/asyncio/base_events.py", line 579, in run_until_complete
return future.result()
File "main.py", line 8, in main
async with session.get(site) as response:
File "/home/runner/.local/share/virtualenvs/python3/lib/python3.7/site-packages/aiohttp/client.py", line
1012, in __aenter__
self._resp = await self._coro
File "/home/runner/.local/share/virtualenvs/python3/lib/python3.7/site-packages/aiohttp/client.py", line 483, in _request
timeout=real_timeout
File "/home/runner/.local/share/virtualenvs/python3/lib/python3.7/site-packages/aiohttp/connector.py", l
ine 523, in connect
proto = await self._create_connection(req, traces, timeout)
File "/home/runner/.local/share/virtualenvs/python3/lib/python3.7/site-packages/aiohttp/connector.py", l
ine 859, in _create_connection req, traces, timeout)
File "/home/runner/.local/share/virtualenvs/python3/lib/python3.7/site-packages/aiohttp/connector.py", l
ine 971, in _create_direct_connection
raise ClientConnectorError(req.connection_key, exc) from exc
aiohttp.client_exceptions.ClientConnectorError: Cannot connect to host www.googlesr2332.com:80 ssl:default [Name or service not known]
Here is the sample code I am running:
import aiohttp
import asyncio
sites = ['http://www.google.com', 'http://python.org', 'http://www.facebook.com', 'http://www.googlesr2332.com']
async def main():
async with aiohttp.ClientSession() as session:
for site in sites:
async with session.get(site) as response:
if response.status == 200:
print("Status:", response.status)
print("Content-type:", response.headers['content-type'])
html = await response.text()
print("Body:", html[15], "...")
else:
print(response.status)
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
You have the code when there is a response. But there is no code to handle if the connection itself has got in trouble.
import aiohttp
import asyncio
sites = ['http://www.google.com', 'http://python.org', 'http://www.facebook.com', 'http://www.googlesr2332.com']
async def main():
async with aiohttp.ClientSession() as session:
for site in sites:
try:
async with session.get(site) as response:
if response.status == 200:
print("Status:", response.status)
print("Content-type:", response.headers['content-type'])
html = await response.text()
print("Body:", html[:15], "...")
else:
print(response.status)
except aiohttp.ClientConnectorError as e:
print('Connection Error', str(e))
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
When making a request to a website, you expect to get a response from it. But if your request can't reach the desired server, you can't get any response. You don't have any errors handling, so you get an error when trying to reach website that doesn't exist. The error message is pretty much self-explanatory: Cannot connect to host www.googlesr2332.com:80 ssl:default [Name or service not known]. Consider wrapping your request sending function with try except.

Task exception with aiohttp async request

Im trying to accelerate multiple get requests to a web service using asyncio and aiohttp.
For that im fetching my data from a postgresql database using psycopg2 module .fetchmany() inside a function and constructing a dictionary of 100 records to send as lists of dictionary urls to an async function named batch() . batch by batch process.
The problem im facing in batch() function is that some requests are logging the message below although the script continues and dont fail but im not able to catch and log this exceptions to later reprocess them.
Task exception was never retrieved
future: <Task finished coro=<batch.<locals>.fetch() done, defined at C:/PythonProjects/bindings/batch_fetch.py:34> exception=ClientOSError(10054, 'An existing connection was forcibly closed by the remote host', None, 10054, None)>
Traceback (most recent call last):
File "C:/PythonProjects/bindings/batch_fetch.py", line 36, in fetch
async with session.get(url) as resp:
File "C:\Miniconda3\lib\site-packages\aiohttp\client.py", line 1005, in __aenter__
self._resp = await self._coro
File "C:\Miniconda3\lib\site-packages\aiohttp\client.py", line 497, in _request
await resp.start(conn)
File "C:\Miniconda3\lib\site-packages\aiohttp\client_reqrep.py", line 844, in start
message, payload = await self._protocol.read() # type: ignore # noqa
File "C:\Miniconda3\lib\site-packages\aiohttp\streams.py", line 588, in read
await self._waiter
aiohttp.client_exceptions.ClientOSError: [WinError 10054] An existing connection was forcibly closed by the remote host
Task exception was never retrieved
future: <Task finished coro=<batch.<locals>.fetch() done, defined at C:/PythonProjects/bindings/batch_fetch.py:34> exception=ClientConnectorError(10060, "Connect call failed ('xx.xxx.xx.xxx', 80)")>
Traceback (most recent call last):
File "C:\Miniconda3\lib\site-packages\aiohttp\connector.py", line 924, in _wrap_create_connection
await self._loop.create_connection(*args, **kwargs))
File "C:\Miniconda3\lib\asyncio\base_events.py", line 778, in create_connection
raise exceptions[0]
File "C:\Miniconda3\lib\asyncio\base_events.py", line 765, in create_connection
yield from self.sock_connect(sock, address)
File "C:\Miniconda3\lib\asyncio\selector_events.py", line 450, in sock_connect
return (yield from fut)
File "C:\Miniconda3\lib\asyncio\selector_events.py", line 480, in _sock_connect_cb
raise OSError(err, 'Connect call failed %s' % (address,))
TimeoutError: [Errno 10060] Connect call failed ('xx.xxx.xx.xxx', 80)
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "C:/PythonProjects/bindings/batch_fetch.py", line 36, in fetch
async with session.get(url) as resp:
File "C:\Miniconda3\lib\site-packages\aiohttp\client.py", line 1005, in __aenter__
self._resp = await self._coro
File "C:\Miniconda3\lib\site-packages\aiohttp\client.py", line 476, in _request
timeout=real_timeout
File "C:\Miniconda3\lib\site-packages\aiohttp\connector.py", line 522, in connect
proto = await self._create_connection(req, traces, timeout)
File "C:\Miniconda3\lib\site-packages\aiohttp\connector.py", line 854, in _create_connection
req, traces, timeout)
File "C:\Miniconda3\lib\site-packages\aiohttp\connector.py", line 992, in _create_direct_connection
raise last_exc
File "C:\Miniconda3\lib\site-packages\aiohttp\connector.py", line 974, in _create_direct_connection
req=req, client_error=client_error)
File "C:\Miniconda3\lib\site-packages\aiohttp\connector.py", line 931, in _wrap_create_connection
raise client_error(req.connection_key, exc) from exc
aiohttp.client_exceptions.ClientConnectorError: Cannot connect to host cms-uat.cme.in.here.com:80 ssl:None [Connect call failed ('xx.xxx.xx.xxx', 80)]
Im just entering into asyncio world as you can depict from my code, so all the advises on the full code approach for this scenario are very welcomme.
Thank you
full code below.
import psycopg2.extras
import asyncio
import json
from aiohttp import ClientSession
from aiohttp import TCPConnector
base_url = 'http://url-example/{}'
def query_db():
urls = []
# connection to postgres table , fetch data.
conn = psycopg2.connect("dbname='pac' user='user' host='db'")
cursor = conn.cursor('psycopg2 request', cursor_factory=psycopg2.extras.NamedTupleCursor)
sql = "select gid, paid from table"
cursor.execute(sql)
while True:
rec = cursor.fetchmany(100)
for item in rec:
record = {"gid": item.gid, "url": base_url.format(item.paid)}
urls.append(record.get('url'))
if not rec:
break
# send batch for async batch request
batch(urls)
# empty list of urls for new async batch request
urls = []
def batch(urls):
async def fetch(url):
async with ClientSession() as session:
async with session.get(url) as resp:
if resp.status == 200:
response = await resp.json()
# parse the url to fetch the point address id.
paid = str(resp.request_info.url).split('/')[4].split('?')[0]
# build the dictionary with pa id and full response.
resp_dict = {'paid': paid, 'response': response}
with open('sucessful.json', 'a') as json_file:
json.dump(resp_dict, json_file)
json_file.write("\n")
elif resp.status is None:
print(resp.status)
elif resp.status != 200:
print(resp.status)
response = await resp.json()
# parse the url to fetch the paid.
paid = str(resp.request_info.url).split('/')[4].split('?')[0]
# build the dictionary with paid and full response.
resp_dict = {'paid': paid, 'response': response}
with open('failed.json', 'a') as json_file:
json.dump(resp_dict, json_file)
json_file.write("\n")
loop = asyncio.get_event_loop()
tasks = []
for url in urls:
task = asyncio.ensure_future(fetch(url))
tasks.append(task)
try:
loop.run_until_complete(asyncio.wait(tasks))
except Exception:
print("exception consumed")
if __name__ == "__main__":
query_db()
Task exception was never retrieved
You see this warning when you've created some task, it finished with exception, but you never explicitly retrieved (awaited) for its result. Here's related doc section.
I bet in your case problem is with the line
loop.run_until_complete(asyncio.wait(tasks))
asyncio.wait() by default just waits when all tasks are done. It doesn't distinguish tasks finished normally or with exception, it just blocks until everything finished. In this case it's you job to retrieve exceptions from finished tasks and following part won't help you with this since asyncio.wait() will never raise an error:
try:
loop.run_until_complete(asyncio.wait(tasks))
except Exception:
print('...') # You will probably NEVER see this message
If you want to catch error as soon as it happened in one of tasks I can advice you to use asyncio.gather(). By default it will raise first happened exception. Note however that it is you job to cancel pending tasks if you want their graceful shutdown.

asyncio doesn't work on listening to the http request

I am running aiohttp as my server. When a request comes in, I try to spawn a process to handle it. But I get the below error:
Traceback (most recent call last): File "asyncppx.py", line 33, in
app.add_routes([web.get('/', asyncio.ensure_future(runMcows(n)))]) File
"/Users/i3ye/Programming/vsc/async/env/lib/python3.6/site-packages/aiohttp/web_app.py",
line 231, in add_routes
self.router.add_routes(routes) File "/Users/i3ye/Programming/vsc/async/env/lib/python3.6/site-packages/aiohttp/web_urldispatcher.py",
line 966, in add_routes
route_obj.register(self) File "/Users/i3ye/Programming/vsc/async/env/lib/python3.6/site-packages/aiohttp/web_routedef.py",
line 38, in register
reg(self.path, self.handler, **self.kwargs) File "/Users/i3ye/Programming/vsc/async/env/lib/python3.6/site-packages/aiohttp/web_urldispatcher.py",
line 922, in add_get
resource.add_route(hdrs.METH_HEAD, handler, **kwargs) File "/Users/i3ye/Programming/vsc/async/env/lib/python3.6/site-packages/aiohttp/web_urldispatcher.py",
line 269, in add_route
expect_handler=expect_handler) File "/Users/i3ye/Programming/vsc/async/env/lib/python3.6/site-packages/aiohttp/web_urldispatcher.py",
line 682, in init
resource=resource) File "/Users/i3ye/Programming/vsc/async/env/lib/python3.6/site-packages/aiohttp/web_urldispatcher.py",
line 103, in init
assert callable(handler), handler AssertionError: > Task was destroyed but it
is pending! task: > sys:1: RuntimeWarning: coroutine 'runMcows' was never
awaited
The code is below, any suggestions?
from aiohttp import web
import aiohttp
import asyncio
loop = asyncio.get_event_loop()
#tasks = []
n = 0
def mcowA(n):
print (n, " : A")
return
async def fetch(session, url):
async with session.get(url) as response:
return await response.text()
def mcowB(n):
print (n, " : B")
return
async def runMcows(n):
mcowA(n)
async with aiohttp.ClientSession() as session:
html = await fetch(session, 'http://localhost:8081')
mcowB(n)
return html
try:
app = web.Application()
app.add_routes([web.get('/', asyncio.ensure_future(runMcows(n)))])
loop.run_forever()
web.run_app(app)
finally:
loop.close()
If you look at the server example here:
Your code should be like this in the main execution:
app = web.Application()
app.add_routes([web.get('/', runMcows])
web.run_app(app)
app.add_routes You need to pass a coroutine runMcows which can only take 1 variable, the request itself.
async def runMcows(request):
mcowA(n)
async with aiohttp.ClientSession() as session:
html = await fetch(session, 'http://localhost:8081')
mcowB(n)
return web.Response(text=html) # Change this response type based on what you need.

Resources