Im trying to accelerate multiple get requests to a web service using asyncio and aiohttp.
For that im fetching my data from a postgresql database using psycopg2 module .fetchmany() inside a function and constructing a dictionary of 100 records to send as lists of dictionary urls to an async function named batch() . batch by batch process.
The problem im facing in batch() function is that some requests are logging the message below although the script continues and dont fail but im not able to catch and log this exceptions to later reprocess them.
Task exception was never retrieved
future: <Task finished coro=<batch.<locals>.fetch() done, defined at C:/PythonProjects/bindings/batch_fetch.py:34> exception=ClientOSError(10054, 'An existing connection was forcibly closed by the remote host', None, 10054, None)>
Traceback (most recent call last):
File "C:/PythonProjects/bindings/batch_fetch.py", line 36, in fetch
async with session.get(url) as resp:
File "C:\Miniconda3\lib\site-packages\aiohttp\client.py", line 1005, in __aenter__
self._resp = await self._coro
File "C:\Miniconda3\lib\site-packages\aiohttp\client.py", line 497, in _request
await resp.start(conn)
File "C:\Miniconda3\lib\site-packages\aiohttp\client_reqrep.py", line 844, in start
message, payload = await self._protocol.read() # type: ignore # noqa
File "C:\Miniconda3\lib\site-packages\aiohttp\streams.py", line 588, in read
await self._waiter
aiohttp.client_exceptions.ClientOSError: [WinError 10054] An existing connection was forcibly closed by the remote host
Task exception was never retrieved
future: <Task finished coro=<batch.<locals>.fetch() done, defined at C:/PythonProjects/bindings/batch_fetch.py:34> exception=ClientConnectorError(10060, "Connect call failed ('xx.xxx.xx.xxx', 80)")>
Traceback (most recent call last):
File "C:\Miniconda3\lib\site-packages\aiohttp\connector.py", line 924, in _wrap_create_connection
await self._loop.create_connection(*args, **kwargs))
File "C:\Miniconda3\lib\asyncio\base_events.py", line 778, in create_connection
raise exceptions[0]
File "C:\Miniconda3\lib\asyncio\base_events.py", line 765, in create_connection
yield from self.sock_connect(sock, address)
File "C:\Miniconda3\lib\asyncio\selector_events.py", line 450, in sock_connect
return (yield from fut)
File "C:\Miniconda3\lib\asyncio\selector_events.py", line 480, in _sock_connect_cb
raise OSError(err, 'Connect call failed %s' % (address,))
TimeoutError: [Errno 10060] Connect call failed ('xx.xxx.xx.xxx', 80)
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "C:/PythonProjects/bindings/batch_fetch.py", line 36, in fetch
async with session.get(url) as resp:
File "C:\Miniconda3\lib\site-packages\aiohttp\client.py", line 1005, in __aenter__
self._resp = await self._coro
File "C:\Miniconda3\lib\site-packages\aiohttp\client.py", line 476, in _request
timeout=real_timeout
File "C:\Miniconda3\lib\site-packages\aiohttp\connector.py", line 522, in connect
proto = await self._create_connection(req, traces, timeout)
File "C:\Miniconda3\lib\site-packages\aiohttp\connector.py", line 854, in _create_connection
req, traces, timeout)
File "C:\Miniconda3\lib\site-packages\aiohttp\connector.py", line 992, in _create_direct_connection
raise last_exc
File "C:\Miniconda3\lib\site-packages\aiohttp\connector.py", line 974, in _create_direct_connection
req=req, client_error=client_error)
File "C:\Miniconda3\lib\site-packages\aiohttp\connector.py", line 931, in _wrap_create_connection
raise client_error(req.connection_key, exc) from exc
aiohttp.client_exceptions.ClientConnectorError: Cannot connect to host cms-uat.cme.in.here.com:80 ssl:None [Connect call failed ('xx.xxx.xx.xxx', 80)]
Im just entering into asyncio world as you can depict from my code, so all the advises on the full code approach for this scenario are very welcomme.
Thank you
full code below.
import psycopg2.extras
import asyncio
import json
from aiohttp import ClientSession
from aiohttp import TCPConnector
base_url = 'http://url-example/{}'
def query_db():
urls = []
# connection to postgres table , fetch data.
conn = psycopg2.connect("dbname='pac' user='user' host='db'")
cursor = conn.cursor('psycopg2 request', cursor_factory=psycopg2.extras.NamedTupleCursor)
sql = "select gid, paid from table"
cursor.execute(sql)
while True:
rec = cursor.fetchmany(100)
for item in rec:
record = {"gid": item.gid, "url": base_url.format(item.paid)}
urls.append(record.get('url'))
if not rec:
break
# send batch for async batch request
batch(urls)
# empty list of urls for new async batch request
urls = []
def batch(urls):
async def fetch(url):
async with ClientSession() as session:
async with session.get(url) as resp:
if resp.status == 200:
response = await resp.json()
# parse the url to fetch the point address id.
paid = str(resp.request_info.url).split('/')[4].split('?')[0]
# build the dictionary with pa id and full response.
resp_dict = {'paid': paid, 'response': response}
with open('sucessful.json', 'a') as json_file:
json.dump(resp_dict, json_file)
json_file.write("\n")
elif resp.status is None:
print(resp.status)
elif resp.status != 200:
print(resp.status)
response = await resp.json()
# parse the url to fetch the paid.
paid = str(resp.request_info.url).split('/')[4].split('?')[0]
# build the dictionary with paid and full response.
resp_dict = {'paid': paid, 'response': response}
with open('failed.json', 'a') as json_file:
json.dump(resp_dict, json_file)
json_file.write("\n")
loop = asyncio.get_event_loop()
tasks = []
for url in urls:
task = asyncio.ensure_future(fetch(url))
tasks.append(task)
try:
loop.run_until_complete(asyncio.wait(tasks))
except Exception:
print("exception consumed")
if __name__ == "__main__":
query_db()
Task exception was never retrieved
You see this warning when you've created some task, it finished with exception, but you never explicitly retrieved (awaited) for its result. Here's related doc section.
I bet in your case problem is with the line
loop.run_until_complete(asyncio.wait(tasks))
asyncio.wait() by default just waits when all tasks are done. It doesn't distinguish tasks finished normally or with exception, it just blocks until everything finished. In this case it's you job to retrieve exceptions from finished tasks and following part won't help you with this since asyncio.wait() will never raise an error:
try:
loop.run_until_complete(asyncio.wait(tasks))
except Exception:
print('...') # You will probably NEVER see this message
If you want to catch error as soon as it happened in one of tasks I can advice you to use asyncio.gather(). By default it will raise first happened exception. Note however that it is you job to cancel pending tasks if you want their graceful shutdown.
Related
I have an old bot that I'm trying to transfer to discord.py v2. Asyncpg isn't working and I'm not sure why. I've tried putting the loop and pool in the on_ready and setup_hook but that didn't work either.
import discord
from discord import app_commands
import aiohttp
import asyncio
import requests
from discord.ext import commands
import os
import asyncpg
# Pydest
# Define Client
class Aclient(commands.Bot):
def __init__(self):
super().__init__(command_prefix='.', intents=discord.Intents.default())
self.synced = False
async def setup_hook(self):
cog = await self.load_extension('cogs.weather')
print(f"Loaded All Cogs")
if not self.synced:
await self.tree.sync()
self.synced = True
async def on_ready(self):
await self.wait_until_ready()
if not self.synced:
await self.tree.sync()
self.synced = True
await client.change_presence(status=discord.Status.dnd)
print(f'We have logged in as {self.user}')
client=Aclient()
# test command
#client.tree.command(name="test", description="testing")
async def test(interaction: discord.Interaction, name: str):
async with pool.acquire() as connection:
async with connection.transaction():
check = await connection.fetchrow('select * from weather')
print(check)
await connection.close()
loop = asyncio.get_event_loop()
pool = loop.run_until_complete(asyncpg.create_pool(**credentials))
I'm getting this error:
Traceback (most recent call last):
File "/Users/prathik/Documents/GitHub/red/YELLOW/bot.py", line 61, in test
async with connection.transaction():
File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/asyncpg/transaction.py", line 62, in __aenter__
await self.start()
File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/asyncpg/transaction.py", line 138, in start
await self._connection.execute(query)
File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/asyncpg/connection.py", line 317, in execute
return await self._protocol.query(query, timeout)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "asyncpg/protocol/protocol.pyx", line 338, in query
RuntimeError: Task <Task pending name='CommandTree-invoker' coro=<CommandTree._from_interaction.<locals>.wrapper() running at /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/discord/app_commands/tree.py:1089>> got Future <Future pending cb=[Protocol._on_waiter_completed()]> attached to a different loop
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/discord/app_commands/commands.py", line 851, in _do_call
return await self._callback(interaction, **params) # type: ignore
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/prathik/Documents/GitHub/red/YELLOW/bot.py", line 60, in test
async with pool.acquire() as connection:
File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/asyncpg/pool.py", line 220, in release
raise ex
File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/asyncpg/pool.py", line 210, in release
await self._con.reset(timeout=budget)
File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/asyncpg/connection.py", line 1366, in reset
await self.execute(reset_query, timeout=timeout)
File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/asyncpg/connection.py", line 317, in execute
return await self._protocol.query(query, timeout)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "asyncpg/protocol/protocol.pyx", line 323, in query
File "asyncpg/protocol/protocol.pyx", line 707, in asyncpg.protocol.protocol.BaseProtocol._check_state
asyncpg.exceptions._base.InterfaceError: cannot perform operation: another operation is in progress
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/discord/app_commands/tree.py", line 1240, in _call
await command._invoke_with_namespace(interaction, namespace)
File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/discord/app_commands/commands.py", line 876, in _invoke_with_namespace
return await self._do_call(interaction, transformed_values)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/discord/app_commands/commands.py", line 869, in _do_call
raise CommandInvokeError(self, e) from e
discord.app_commands.errors.CommandInvokeError: Command 'test' raised an exception: InterfaceError: cannot perform operation: another operation is in progress
Does anyone know why this is happening?
I want to write a simple script that checks to see if website is up. If it is not, I want to catch the http return error code using the aiohttp module for Python. In the example below, I pass in a fake website 'http://www.googlesr2332.com' rather than returning the http error, I am getting the following:
Traceback (most recent call last):
File "/home/runner/.local/share/virtualenvs/python3/lib/python3.7/site-packages/aiohttp/connector.py", l
ine 967, in _create_direct_connection traces=traces), loop=self._loop)
File "/home/runner/.local/share/virtualenvs/python3/lib/python3.7/site-packages/aiohttp/connector.py", l
ine 830, in _resolve_host
self._resolver.resolve(host, port, family=self._family) File "/home/runner/.local/share/virtualenvs/python3/lib/python3.7/site-packages/aiohttp/resolver.py", li
ne 30, in resolve
host, port, type=socket.SOCK_STREAM, family=family)
File "/usr/local/lib/python3.7/asyncio/base_events.py", line 784, in getaddrinfo
None, getaddr_func, host, port, family, type, proto, flags)
File "/usr/local/lib/python3.7/concurrent/futures/thread.py", line 57, in run
result = self.fn(*self.args, **self.kwargs)
File "/usr/local/lib/python3.7/socket.py", line 748, in getaddrinfo
for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
socket.gaierror: [Errno -2] Name or service not known
The above exception was the direct cause of the following exception:
Traceback (most recent call last): File "main.py", line 19, in <module>
loop.run_until_complete(main())
File "/usr/local/lib/python3.7/asyncio/base_events.py", line 579, in run_until_complete
return future.result()
File "main.py", line 8, in main
async with session.get(site) as response:
File "/home/runner/.local/share/virtualenvs/python3/lib/python3.7/site-packages/aiohttp/client.py", line
1012, in __aenter__
self._resp = await self._coro
File "/home/runner/.local/share/virtualenvs/python3/lib/python3.7/site-packages/aiohttp/client.py", line 483, in _request
timeout=real_timeout
File "/home/runner/.local/share/virtualenvs/python3/lib/python3.7/site-packages/aiohttp/connector.py", l
ine 523, in connect
proto = await self._create_connection(req, traces, timeout)
File "/home/runner/.local/share/virtualenvs/python3/lib/python3.7/site-packages/aiohttp/connector.py", l
ine 859, in _create_connection req, traces, timeout)
File "/home/runner/.local/share/virtualenvs/python3/lib/python3.7/site-packages/aiohttp/connector.py", l
ine 971, in _create_direct_connection
raise ClientConnectorError(req.connection_key, exc) from exc
aiohttp.client_exceptions.ClientConnectorError: Cannot connect to host www.googlesr2332.com:80 ssl:default [Name or service not known]
Here is the sample code I am running:
import aiohttp
import asyncio
sites = ['http://www.google.com', 'http://python.org', 'http://www.facebook.com', 'http://www.googlesr2332.com']
async def main():
async with aiohttp.ClientSession() as session:
for site in sites:
async with session.get(site) as response:
if response.status == 200:
print("Status:", response.status)
print("Content-type:", response.headers['content-type'])
html = await response.text()
print("Body:", html[15], "...")
else:
print(response.status)
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
You have the code when there is a response. But there is no code to handle if the connection itself has got in trouble.
import aiohttp
import asyncio
sites = ['http://www.google.com', 'http://python.org', 'http://www.facebook.com', 'http://www.googlesr2332.com']
async def main():
async with aiohttp.ClientSession() as session:
for site in sites:
try:
async with session.get(site) as response:
if response.status == 200:
print("Status:", response.status)
print("Content-type:", response.headers['content-type'])
html = await response.text()
print("Body:", html[:15], "...")
else:
print(response.status)
except aiohttp.ClientConnectorError as e:
print('Connection Error', str(e))
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
When making a request to a website, you expect to get a response from it. But if your request can't reach the desired server, you can't get any response. You don't have any errors handling, so you get an error when trying to reach website that doesn't exist. The error message is pretty much self-explanatory: Cannot connect to host www.googlesr2332.com:80 ssl:default [Name or service not known]. Consider wrapping your request sending function with try except.
I have an asynchronous function to get data from the site:
async def get_matches_info(url):
async with aiohttp.ClientSession() as session:
try:
async with session.get(url, proxy=proxy) as response:
...
...
...
...
except:
print('ERROR GET URL: ', url)
print(traceback.print_exc())
I have a list of about 200 links. Almost always everything is OK, but sometimes I get the following error:
Traceback (most recent call last):
File "C:\Python37\lib\site-packages\aiohttp\connector.py", line 924, in _wrap_create_connection
await self._loop.create_connection(*args, **kwargs))
File "C:\Python37\lib\asyncio\base_events.py", line 986, in create_connection
ssl_handshake_timeout=ssl_handshake_timeout)
File "C:\Python37\lib\asyncio\base_events.py", line 1014, in _create_connection_transport
await waiter
ConnectionResetError
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "parser.py", line 90, in get_matches_info
async with session.get(url, proxy=proxy) as response:
File "C:\Python37\lib\site-packages\aiohttp\client.py", line 1005, in __aenter__
self._resp = await self._coro
File "C:\Python37\lib\site-packages\aiohttp\client.py", line 476, in _request
timeout=real_timeout
File "C:\Python37\lib\site-packages\aiohttp\connector.py", line 522, in connect
proto = await self._create_connection(req, traces, timeout)
File "C:\Python37\lib\site-packages\aiohttp\connector.py", line 851, in _create_connection
req, traces, timeout)
File "C:\Python37\lib\site-packages\aiohttp\connector.py", line 1085, in _create_proxy_connection
req=req)
File "C:\Python37\lib\site-packages\aiohttp\connector.py", line 931, in _wrap_create_connection
raise client_error(req.connection_key, exc) from exc
aiohttp.client_exceptions.ClientConnectorError: Cannot connect to host www.myscore.com.ua:443 ssl:None [None]
I checked all the links from the errors - they are working. Why can this happen?
This is probably the server limit of concurrent requests thinking you are doing a DoS attack. If you are in control of the server and it's running Apache you can raise that limit on the httpd conf for MaxKeepAliveRequests.
If not, you can also set a limit to the amount of concurrent asyncio requests by using its semaphores. The example below sets that limit to 100 concurrent requests.
async def get_matches_info(url):
sem = asyncio.Semaphore(100)
async with sem:
async with aiohttp.ClientSession() as session:
try:
async with session.get(url, proxy=proxy) as response:
...
Note that if you call this function recursively that semaphore queue will be reset each time so you might want to consider placing this semaphore outside the function and pass it as a parameter.
I have the need to reach a page using client certificate with pyppeteer,
i have seen that its possible with puppeteer itself here,
but my attempts lead to an invalid state errror with python3. Im not sure whether my attempt is entirely correct, but it follows the spirit of the issue comment in puppeteer. I would expect it to be possible, hence my attempts, here is the error:
[E:pyppeteer.connection] connection unexpectedly closed
Task exception was never retrieved
future: <Task finished coro=<Connection._async_send() done, defined at /home/data/experim/jaenv/lib/python3.6/site-packages/pyppeteer/connection.py:69> exception=InvalidStateError('invalid state',)>
Traceback (most recent call last):
File "/home/data/experim/jaenv/lib/python3.6/site-packages/pyppeteer/connection.py", line 73, in _async_send
await self.connection.send(msg)
File "/home/data/experim/jaenv/lib/python3.6/site-packages/websockets/protocol.py", line 467, in send
yield from self.write_frame(True, OP_TEXT, data.encode('utf-8'))
File "/home/data/experim/jaenv/lib/python3.6/site-packages/websockets/protocol.py", line 919, in write_frame
yield from self.ensure_open()
File "/home/data/experim/jaenv/lib/python3.6/site-packages/websockets/protocol.py", line 646, in ensure_open
) from self.transfer_data_exc
websockets.exceptions.ConnectionClosed: WebSocket connection is closed: code = 1006 (connection closed abnormally [internal]), no reason
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/data/experim/jaenv/lib/python3.6/site-packages/pyppeteer/connection.py", line 79, in _async_send
await self.dispose()
File "/home/data/experim/jaenv/lib/python3.6/site-packages/pyppeteer/connection.py", line 170, in dispose
await self._on_close()
File "/home/data/experim/jaenv/lib/python3.6/site-packages/pyppeteer/connection.py", line 153, in _on_close
f'Protocol error {cb.method}: Target closed.', # type: ignore
asyncio.base_futures.InvalidStateError: invalid state
This is the attempt that leads to the above error:
import asyncio
from pyppeteer import launch
from pyppeteer.network_manager import Request
import requests
async def interceptor(interceptReq, cert,key):
res = requests.get(interceptReq.url , cert=('../../widshared/certexp.cer','../../widshared/certpriv.pem'))
if not res.ok:
return await interceptReq.abort('connectionrefused')
else:
return await interceptReq.respond({
'status': res.status_code,
'headers': res.headers,
'body': res.content
})
async def main():
browser = await launch(headless = True )
page = await browser.newPage()
await page.setRequestInterception(True)
with open("../../widshared/certexp.cer", 'r') as cr:
cert = cr.read()
with open("../../widshared/certpriv.pem", 'r') as cr:
key = cr.read()
page.on('request', lambda interceptReq: asyncio.ensure_future(interceptor(interceptReq, cert, key )) )
await page.goto('https://client.badssl.com')
await page.screenshot({'path': 'clientbadssl.png'})
await browser.close()
asyncio.get_event_loop().run_until_complete(main())
I want to create custom request manager for crawler with dynamic waiting.
My crawler need to make requests to sites which prohibit parallel requests from same ip address. If such blocking occurs requests returns with HTTP error codes 403, 503, 429, etc.
In case of error I want to wait some time and repeat request. But for simplicity of parsers they just call get and receive correct page.
I want to use aiohttp and new async with syntax of Python 3.5 so my parsers classes can use async with for my requester class same way if they used aiohttp.ClientSession like this:
# somewhere in a parser
async def get_page(self, requester, page_index):
async with requester.get(URL_FMT.format(page_index)) as response:
html_content = await response.read()
result = self.parsing_page(html_content)
return result
if requester is aiohttp.ClientSession, then response is aiohtpp.ClientResponse which have __aenter__ and __aexit__ methods, so async with working as expected.
But if I put my requester class in the middle it is not working anymore.
Traceback (most recent call last):
File "/opt/project/api/tornado_runner.py", line 6, in <module>
from api import app
File "/opt/project/api/api.py", line 20, in <module>
loop.run_until_complete(session.login())
File "/usr/local/lib/python3.5/asyncio/base_events.py", line 337, in run_until_complete
return future.result()
File "/usr/local/lib/python3.5/asyncio/futures.py", line 274, in result
raise self._exception
File "/usr/local/lib/python3.5/asyncio/tasks.py", line 239, in _step
result = coro.send(None)
File "/opt/project/api/viudata/session.py", line 72, in login
async with self.get('https://www.viudata.com') as resp:
AttributeError: __aexit__
Unclosed client session
client_session: <aiohttp.client.ClientSession object at 0x7f44f61ef240>
It is looking like this.
class Requester:
def __init__(self, session: aiohttp.ClientSession):
self.session = session
async def get(self, *args, **kwargs):
is_result_successful = False
while not is_result_successful:
response = await self.session.get(*args, **kwargs)
if response.status in [503, 403, 429]:
await self.wait_some_time()
else:
is_result_successful = True
return response
From my understanding self.session.get is coroutine function so I will await it. Result is aiohttp.ClientResponse which have __aenter__ or __aexit__. But if return it parser's code of async with block return odd error.
Can you say what I need to replace to with my requester class as with aiohttp.ClientSession?
You should write additional code to support async with protocol.
See client.request() and _RequestContextManager for inspiration.