Why does asyncio.create_task not run the method? - python-3.x

Code example:
async def download_page(session, url):
print(True)
async def downloader_init(session):
while True:
url = await download_queue.get()
task = asyncio.create_task(download_page(session, url))
print(task)
print(f"url: {url}")
async def get_urls(url):
while True:
try:
url = find_somewhere_url
await download_queue.put(url)
except NoSuchElementException:
break
return True
async def main():
async with aiohttp.ClientSession(headers=headers) as session:
get_urls_task = asyncio.create_task(get_urls(url))
downloader_init_task = asyncio.create_task(downloader_init(session))
asyncio.gather(get_urls_task, downloader_init_task)
if __name__ == "__main__":
asyncio.get_event_loop().run_until_complete(main())
Output:
<Task pending coro=<download_page() running at main.py:69>>
url: https://someurl.com/example
<Task pending coro=<download_page() running at main.py:69>>
url: https://someurl.com/example
<Task pending coro=<download_page() running at main.py:69>>
url: https://someurl.com/example
Why is the method download_page not executed?
The strange thing is that the script just ends its work, there are no errors anywhere.
downloader_init should work endlessly, but it does not.
In download_queue, method get_urls adds links as it finds them, after which it stops working.
downloader_init should immediately execute as soon as a new link appears in the queue, but it starts its work only when get_urls has completed its work.

Try this instead:
Note: Your problem wasn't with the task creation, it was because
there wasn't an await at the asyncio.gather part.
import asyncio
import aiohttp
async def download_page(session, url):
# Dummy function.
print(f"session={session}, url={url}")
async def downloader_init(session):
while True:
url = await download_queue.get()
task = asyncio.create_task(download_page(session, url))
print(f"task={task}, url={url}")
async def get_urls(url):
while True:
try:
url = find_somewhere_url()
await download_queue.put(url)
except NoSuchElementException:
break
async def main():
async with aiohttp.ClientSession(headers=headers) as session:
get_urls_task = asyncio.create_task(get_urls(url))
downloader_init_task = asyncio.create_task(downloader_init(session))
# Use await here to make it finish the tasks.
await asyncio.gather(get_urls_task, downloader_init_task)
if __name__ == "__main__":
# Use this as it deals with the loop creation, shutdown,
# and other stuff for you.
asyncio.run(main()) # This is new in Python 3.7

Related

Running coroutines in different thread with same event loop

I want to run a coroutine in a different thread and get the result that the coroutine returns.
class Main:
def __init__(self, result_from_io_task=None):
self._io_task_result = result_from_io_task
async def io_task(self):
await asyncio.sleep(2)
return "slept of 2s"
def non_async_func(self):
#This can't be made async.
if not self._io_task_result:
#run io_task and get its result
#event loop will be running in the main thread so I can fire the task
task = asyncio.create_task(self.io_task)
#can't await task since I am in non-async func and I cannot
#return from non_async_func until and unless I know what
#self.io_task has returned. Tried following but my app hangs forever.
while not task.done():
pass
I also tried, but it doesn't work "
def run_in_thread(coro, loop):
output = []
def run():
fut = asyncio.run_coroutine_threadsafe(coro, loop)
output.append(fut)
thr = Thread(target=run)
thr.start()
return output
async def main():
main_obj = Main(result_from_io_task=None)
v = main_obj.non_async_func()
How can I spawn a new thread and run the given coroutine using event loop running in main thread
Unfortunately, my codebase depends on python < 3.8 and asyncio.to_thread is not available in python 3.7
Based on the example of my answer, I'm introducing another implementation of the asynchronous decorator that does not use asyncio.to_thread() function but uses ThreadPoolExecutor instead.
import asyncio
import requests
import concurrent.futures
def asynchronous(func):
async def wrapper(*args, **kwargs):
with concurrent.futures.ThreadPoolExecutor() as executor:
future = executor.submit(func, *args, **kwargs)
return future.result()
return wrapper
#asynchronous
def request(url):
with requests.Session() as session:
response = session.get(url)
try:
return response.json()
except requests.JSONDecodeError:
return response.text
async def main():
task = asyncio.create_task(request("https://google.com/"))
print("waiting for response...")
result = await task
print(result)
if __name__ == "__main__":
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
loop.close()

Need to parse two sessions at the same time with telethon on Python

i have some troubles with parsing two or more sessions at the same time with telethon. I have tried this:
class NewSession:
def __init__(self, session_name):
self.client = TelegramClient(session_name, api_id, api_hash)
self.session_name = session_name
async def pool(self):
print("working with:", self.session_name)
#self.client.on(events.NewMessage(outgoing=True))
async def main(event):
message = event.message.to_dict()
msg_text = message['message']
print(msg_text)
try:
await self.client.start()
await self.client.run_until_disconnected()
finally:
await self.client.disconnect()
async def main():
user = NewSession("321")
user2 = NewSession("123")
await user.pool()
await user2.pool()
if __name__ == '__main__':
asyncio.run(main())
But only one is working. Need help :)
The problem is inside your main function. When you await for a coroutine to return it doesn't mean that the execution continues to the next expression. So, in your code the line await user2.pool() is going to be executed only when the user.poll() coroutines returns a value, this is when the session '321' is disconnected.
You need to run the tasks concurrently; you can use the function asyncio.gather. Reworking your main:
async def main():
user = NewSession("321")
user2 = NewSession("123")
await asyncio.gather(user.pool(), user2.pool())

Concurrency Loop in python

My Scenario:- Start, Wait, Start, Stop or Kill
Starting the first event & waiting for some time.
If I reach the waiting time, I need to start the second event & return both event result.
But, if the first event completed before waiting time.
No need to start the second event.
Return the first event result
Ex:-
import asyncio
async def some_task():
print('io start')
await asyncio.sleep(2)
print('io end')
return "hello"
async def callback(loop):
await asyncio.sleep(4)
if loop.is_running():
print('doing other things')
if __name__ == "__main__":
loop = asyncio.get_event_loop()
loop2 = asyncio.get_event_loop()
a = loop.create_task(some_task())
b = loop2.create_task(callback(loop))
result = loop.run_until_complete(a)
loop2.run_until_complete(b)
loop.close()
loop2.close()
The variables loop and loop2 will get the same (main) event loop, which is why it will always still be running. Here is my approach:
import asyncio
async def async_main():
result = None
async def some_task():
print('io start')
await asyncio.sleep(6)
print('io end')
result = "hello"
return result
async def callback():
await asyncio.sleep(4)
if result is None:
print('doing other things')
awaited = await asyncio.gather(
asyncio.create_task(some_task()),
asyncio.create_task(callback()),
)
return awaited
asyncio.run(async_main())
Asyncio can be very confusing and I do not recommend it for non-experts, so here is an alternative using threading instead of asyncio:
import threading
import time
def do_tasks():
result = None
def some_task():
nonlocal result
print('io start')
time.sleep(2)
print('io end')
result = "hello"
def callback():
time.sleep(4)
if result is None:
print('doing other things')
threading.Thread(target=some_task).start()
threading.Thread(target=callback).start()
do_tasks()

Python: manager.Queue() with asyncio. How to resolve deadlock?

I am trying to figure out how to have a websocket based server listen to incoming requests, place them in a queue for another process to do work, then place the results in another queue where the websocket based server can wait for said result and send the response back to the client.
This is just me trying to learn and gain more experience with both asyncio and sharing data between processes. I am using Python 3.9.2 64bit.
Right now I am stuck with a deadlock as commented in the "producer_handler" function in the server code. Here is the code I am playing with:
Server:
import asyncio
import logging
import time
from multiprocessing import Manager, Process
import websockets
logging.root.setLevel(0)
def server(recievequeue, sendqueue):
async def consumer_handler(websocket, path):
while True:
logging.info('Waiting for request')
try:
request = await websocket.recv()
except Exception as exception:
logging.warning(f'consumer_handler Error: {exception}')
break
logging.info(f'Request: {request}')
recievequeue.put(request)
logging.info('Request placed in recievequeue')
async def producer_handler(websocket, path):
while True:
logging.info('Waiting for response')
response = sendqueue.get()# Deadlock is here.
try:
await websocket.send(response)
except Exception as exception:
logging.warning(f'producer_handler Error: {exception}')
break
logging.info('Response sent')
async def handler(websocket, path):
consumer_task = asyncio.ensure_future(consumer_handler(websocket, path))
producer_task = asyncio.ensure_future(producer_handler(websocket, path))
done, pending = await asyncio.wait([producer_task, consumer_task], return_when=asyncio.FIRST_COMPLETED)
for task in done:
logging.info(f'Canceling: {task}')
task.cancel()
for task in pending:
logging.info(f'Canceling: {task}')
task.cancel()
eventloop = asyncio.get_event_loop()
eventloop.run_until_complete(websockets.serve(handler, 'localhost', 8081, ssl=None))
eventloop.run_forever()
def message_handler(recievequeue, sendqueue):
while True:
# I just want to test getting a message from the recievequeue, and placing it in the sendqueue
request = recievequeue.get()
logging.info(f'Request: {request}')
time.sleep(3)
data = str(time.time())
logging.info(f'Work completed # {data}')
sendqueue.put(data)
def main():
logging.info('Starting Application')
manager = Manager()
sendqueue = manager.Queue()
recievequeue = manager.Queue()
test_process_1 = Process(target=server, args=(recievequeue, sendqueue), name='Server')
test_process_1.start()
test_process_2 = Process(target=message_handler, args=(recievequeue, sendqueue), name='Message Handler')
test_process_2.start()
test_process_1.join()
if __name__ == '__main__':
main()
And the client:
import asyncio
import logging
import websockets
logging.root.setLevel(0)
URI = "wss://localhost:8081"
async def test():
async def consumer_handler(connection):
while True:
try:
request = await connection.recv()
except Exception as exception:
logging.warning(f'Error: {exception}')
break
logging.info(request)
async def producer_handler(connection):
while True:
await asyncio.sleep(5)
try:
await connection.send('Hello World')
except Exception as exception:
logging.warning(f'Error: {exception}')
break
async with websockets.connect(URI, ssl=None) as connection:
consumer_task = asyncio.ensure_future(consumer_handler(connection))
producer_task = asyncio.ensure_future(producer_handler(connection))
while True:
await asyncio.wait([consumer_task, producer_task], return_when=asyncio.FIRST_COMPLETED)
def main():
logging.info('Starting Application')
eventloop = asyncio.get_event_loop()
try:
eventloop.run_until_complete(test())
eventloop.run_forever()
except Exception as exception:
logging.warning(f'Error: {exception}')
if __name__ == '__main__':
main()
If I remove the queues the server and multiple client can talk back and forth with no issues. I just can't figure out how to get() and put() the requests and responses. Any help would be appreciated!
So after looking through other posts I noticed others talking about deadlocks and using run_in_executor. After some more testing I found replacing the line causing the deadlock with the following code resolved the issue:
response = await eventloop.run_in_executor(None, sendqueue.get)

Run slow background blocking task from asyncio loop

I have asyncio crawler, that visits URLs and collects new URLs from HTML responses. I was inspired that great tool: https://github.com/aio-libs/aiohttp/blob/master/examples/legacy/crawl.py
Here is a very simplified piece of workflow, how it works:
import asyncio
import aiohttp
class Requester:
def __init__(self):
self.sem = asyncio.BoundedSemaphore(1)
async def fetch(self, url, client):
async with client.get(url) as response:
data = (await response.read()).decode('utf-8', 'replace')
print("URL:", url, " have code:", response.status)
return response, data
async def run(self, urls):
async with aiohttp.ClientSession() as client:
for url in urls:
await self.sem.acquire()
task = asyncio.create_task(self.fetch(url, client))
task.add_done_callback(lambda t: self.sem.release())
def http_crawl(self, _urls_list):
loop = asyncio.get_event_loop()
crawl_loop = asyncio.ensure_future(self.run(_urls_list))
loop.run_until_complete(crawl_loop)
r = Requester()
_url_list = ['https://www.google.com','https://images.google.com','https://maps.google.com','https://mail.google.com','https://news.google.com','https://video.google.com','https://books.google.com']
r.http_crawl(_url_list)
What I need now is to add some very slow beautifulsoap based function. I need that function do not block main loop and work as background process. For instance, I will handle HTTP responses.
I read python docs about it and found that: https://docs.python.org/3/library/asyncio-eventloop.html#asyncio.loop.run_in_executor
I tried to add it to my code, but it does not work as should (I use cpu_bound only for demo):
import asyncio
import aiohttp
import concurrent.futures
def cpu_bound():
return sum(i * i for i in range(10 ** 7))
class Requester:
def __init__(self):
self.sem = asyncio.BoundedSemaphore(1)
async def fetch(self, url, client):
async with client.get(url) as response:
data = (await response.read()).decode('utf-8', 'replace')
print("URL:", url, " have code:", response.status)
####### Blocking operation #######
loop = asyncio.get_running_loop()
with concurrent.futures.ProcessPoolExecutor() as pool:
result = await loop.run_in_executor(pool, cpu_bound)
print('custom process pool', result)
#################################
return response, data
async def run(self, urls):
async with aiohttp.ClientSession() as client:
for url in urls:
await self.sem.acquire()
task = asyncio.create_task(self.fetch(url, client))
task.add_done_callback(lambda t: self.sem.release())
def http_crawl(self, _urls_list):
loop = asyncio.get_event_loop()
crawl_loop = asyncio.ensure_future(self.run(_urls_list))
loop.run_until_complete(crawl_loop)
r = Requester()
_url_list = ['https://www.google.com','https://images.google.com','https://maps.google.com','https://mail.google.com','https://news.google.com','https://video.google.com','https://books.google.com']
r.http_crawl(_url_list)
For now, it doesn't work as expected, it blocks HTTP requests every time:
URL: https://www.google.com have code: 200
custom process pool 333333283333335000000
URL: https://images.google.com have code: 200
custom process pool 333333283333335000000
URL: https://maps.google.com have code: 200
custom process pool 333333283333335000000
URL: https://mail.google.com have code: 200
custom process pool 333333283333335000000
URL: https://news.google.com have code: 200
custom process pool 333333283333335000000
URL: https://video.google.com have code: 200
custom process pool 333333283333335000000
How to correctly put the task in the background inside the main asyncio process?
Are there best practices on how to do that in a simple way, or I should use Redis for task planning?
I believe that since you are setting your BoundedSemaphore to 1 it is only allowing one instance of your task to run at a time.
You can use the ratelimiter package to limit the number of concurrent requests in a certain amount of time.
I would also upload code that works for me. It is two independent async queues, and one of them spawn high-CPU consumption process in a separate loop:
import asyncio
import functools
import aiohttp
import concurrent.futures
def cpu_bound(num):
return sum(i * i for i in range(10 ** num))
class Requester:
def __init__(self):
self.threads = 3
self.threads2 = 10
self.pool = concurrent.futures.ProcessPoolExecutor()
async def fetch(self, url):
try:
timeout = aiohttp.ClientTimeout(total=10)
async with self.client.get(url, allow_redirects=False, verify_ssl=False, timeout=timeout) as response:
data = (await response.read()).decode('utf-8', 'replace')
print("URL:", url, " have code:", response.status)
resp_list = {'url': str(response.real_url), 'data': str(data), 'headers': dict(response.headers)}
return resp_list
except Exception as err:
print(err)
return {}
async def heavy_worker(self, a):
while True:
resp_list = await a.get()
if resp_list.keys():
####### Blocking operation #######
try:
loop = asyncio.get_event_loop()
result = await loop.run_in_executor(self.pool, functools.partial(cpu_bound, num=5))
print('wappalazer', result)
except Exception as err:
print(err)
#################################
a.task_done()
else:
a.task_done()
async def fetch_worker(self, q, a):
while True:
url = await q.get()
resp_list = await self.fetch(url)
q.task_done()
await a.put(resp_list)
async def main(self, urls):
# Create an queues those we will use to store our "workload".
q = asyncio.Queue()
a = asyncio.Queue()
# Create workers tasks to process the queue concurrently.
workers_fetch = [asyncio.create_task(self.fetch_worker(q, a)) for _ in range(self.threads)]
workers_heavy = [asyncio.create_task(self.heavy_worker(a)) for _ in range(self.threads2)]
for url in urls:
await q.put(url)
# wait for all tasks to be processed
await q.join()
await a.join()
# Cancel our worker tasks.
for worker in workers_fetch:
worker.cancel()
await asyncio.gather(*workers_fetch , return_exceptions=True)
for worker in workers_heavy:
worker.cancel()
await asyncio.gather(*workers_heavy , return_exceptions=True)
async def run(self, _urls_list):
async with aiohttp.ClientSession() as self.client:
task_for_first_run = asyncio.create_task(self.main(_urls_list))
await asyncio.sleep(1)
await task_for_first_run
print("All tasks completed")
def http_crawl(self, _urls_list):
asyncio.run(self.run(_urls_list))
r = Requester()
_url_list = ['http://aaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaa.aa', 'https://www.google.com','https://images.google.com','https://maps.google.com','https://mail.google.com',
'https://news.google.com','https://video.google.com','https://books.google.com', 'https://www.google.com',
'https://images.google.com','https://maps.google.com','https://mail.google.com','https://news.google.com',
'https://video.google.com','https://books.google.com', 'https://www.google.com','https://images.google.com',
'https://maps.google.com','https://mail.google.com','https://news.google.com','https://video.google.com',
'https://books.google.com', 'https://www.google.com','https://images.google.com','https://maps.google.com',
'https://mail.google.com','https://news.google.com','https://video.google.com','https://books.google.com',
'https://www.google.com','https://images.google.com','https://maps.google.com','https://mail.google.com',
'https://news.google.com','https://video.google.com','https://books.google.com']
r.http_crawl(_url_list)

Resources