aiodocker async creation of the container - python-3.x

I have been going through the aiodocker library.
And the documentation indicated that they watch the python-asyncio tag. So I wanted to ask about how to async my docker code, as I could not figure out from the documentation and the source code. Here is the code that I need to async (detach=True below won't work because sometimes the container exits with a non-zero status code. Having them asynchronous would help me with handling this better.):
import docker
def synchronous_request(url):
client = docker.from_env()
local_dir = '/home/ubuntu/git/docker-scraper/data'
volumes = {local_dir: {'bind': '/download/', 'mode': 'rw'}}
environment = {'URL': url}
client.containers.run('wgettor:latest', auto_remove=True, volumes=volumes, environment=environment)
My attempt, with aiodocker is:
import aiodocker
async def make_historical_request(url):
docker = await aiodocker.Docker()
client = await aiodocker.DockerContainers(docker)
local_dir = '/home/ubuntu/git/docker-scraper/data'
volumes = {local_dir: {'bind': '/download/', 'mode': 'rw'}}
environment = {'URL': url}
await client.run(config={"auto_remove": "True",
"volumes": volumes,
"environment": environment}, name="wgettor:latest")
Would appreciate if you could show me how to do it properly.
Trying to achieve something like this (the below won't work concurrently):
import docker
import asyncio
from collections import namedtuple
URL = namedtuple('URL', 'val')
URLs = (
URL('https://www.google.com'),
URL('https://www.yahoo.com')
)
client = docker.from_env()
local_dir = '/home/ubuntu/git/docker-scraper/data-test'
volumes = {local_dir: {'bind': '/download/', 'mode': 'rw'}}
async def run_container(client, volumes, environment, *, pid):
print("Starting the container on pid: {}".format(pid))
return client.containers.run('wgettor:latest', auto_remove=True, detach=True,
volumes=volumes, environment=environment)
async def make_historical_request(url, *, pid):
print("Starting the retrieval of: {}, on pid: {}".format(url, pid))
environment = {'URL': url}
return await run_container(client, volumes, environment, pid=pid)
async def main():
tasks = [asyncio.ensure_future(make_historical_request(url.val, pid=ix)) for ix, url in enumerate(URLs)]
await asyncio.wait(tasks)
if __name__ == '__main__':
asyncio.run(main())
With the help of Freund Alleind, I believe it should be something like this:
async def run_container(docker, url):
config = {
'Env': ["URL="+url],
'HostConfig': {
'Binds': local_dir + ":" + "/download/"
}
}
try:
await asyncio.sleep(random.random() * 0.001)
container = await docker.containers.create_or_replace(
config=config,
name="wgettor:latest",
)
await container.start()
await container.kill()
return url
except DockerError as err:
print(f'Error starting wgettor:latest, container: {err}')
async def main():
start = time.time()
docker = Docker()
futures = [run_container(docker, url) for url in URLs]
# futures = [fetch_async(i) for i in range(1, MAX_CLIENTS + 1)]
for i, future in enumerate(asyncio.as_completed(futures)):
result = await future
print('{} {}'.format(">>" * (i + 1), result))
print("Process took: {:.2f} seconds".format(time.time() - start))

async def run_container(client, volumes, environment, *, pid):
print('Starting the container on pid: {}'.format(pid))
return client.containers.run(..)
This function definetly must do await.
(Update) Try something like this:
import aiodocker
async def run_container(docker, name, config):
try:
container = await docker.containers.create_or_replace(
config=config,
name=name,
)
await container.start()
return container
except DockerError as err:
print(f'Error starting {name} container: {err}')
You should create docker as
from aiodocker import Docker
docker = Docker()
config = {}
loop.run_until_complete(run_container(docker, ..., config)
After some research of Engine API I can say, that if you want to mount some volume, you can use such config:
config = {
'Image': 'imagename',
'HostConfig': {'Binds':['/local_path:/container_path']},
}

Related

playwright and httpx coroutine conflicts,How to solve it?

I created a BaseSpider class that needs to use playwright and httpx, but in the case of coroutines, they conflict.
code:
class BaseSpider():
def __init__(self) :
self.play_wright = sync_playwright().start()
async def req(self, client: AsyncClient, i):
res = await client.get('https://www.baidu.com')
print(f'{i + 1},status_code = {res.status_code}')
return res
async def main(self):
async with httpx.AsyncClient() as client:
response = await client.get('https://www.baidu.com')
print(response)
if __name__ == '__main__':
base = BaseSpider()
asyncio.run(base.main())
run error:asyncio.run() cannot be called from a running event loop
I comment # self.play_wright = sync_playwright().start().They're ready to work.
How to get Playwright and HTTPX to run together in coroutines?
Thank you for your help!

Using aiobotocore with context managers

I'm trying to use the aiobotocore library with context managers, but I'm having a hard time trying to configure my credentials.
I need to create a class that configure my AWS client so I can use the put, read and delete functions in this library.
The following code is being used to this:
from contextlib import AsyncExitStack
from aiobotocore.session import AioSession
from credentials import aws_access_key_id, aws_secret_access_key
class AWSConnectionManager:
def __init__(self, aws_acces_key_id, aws_secret_access_key):
self.aws_acces_key_id = aws_acces_key_id
self.aws_secret_access_key = aws_secret_access_key
self._exit_stack = AsyncExitStack()
self._client = None
print('__init__')
async def __aenter__(self):
session = AioSession
self._client = await self._exit_stack.enter_async_context(session.create_client('s3'))
print('__aenter__')
async def __aexit__(self, exc_type, exc_val, exc_tb):
await self._exit_stack.__aexit__(exc_type, exc_val, exc_tb)
print('__aexit__')
res = AWSConnectionManager(aws_acces_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key)
But, it doesn't pass through aenter and aexit method.
With the code above I have the following output:
__init__
<__main__.AWSConnectionManager object at 0x7f03921ac640>
Does anyone know what can be wrong with my code?
First: you need to fix 'session = AioSession' => 'session = AioSession()' + add return + pass credentials
async def __aenter__(self):
session = AioSession()
self._client = await self._exit_stack.enter_async_context(
session.create_client(
's3',
aws_secret_access_key=self.aws_secret_access_key,
aws_access_key_id=self.aws_access_key_id,
)
)
return self
Second: you need to write/add proxy calls for put_object/get_object or make _client public by rename _client => client
async def save_file(self, content, s3_filename: str):
await self._client.put_object(Bucket=self.bucket, Body=content, Key=f'{s3_filename}')
async def load_file(self, name):
obj = await self._client.get_object(Bucket=self.bucket, Key=f'{name}')
return obj['Body'].read()
now you can use like
async with SkyFileStorageProxy() as storage:
await storage.load_file(name='test.txt')

Need to parse two sessions at the same time with telethon on Python

i have some troubles with parsing two or more sessions at the same time with telethon. I have tried this:
class NewSession:
def __init__(self, session_name):
self.client = TelegramClient(session_name, api_id, api_hash)
self.session_name = session_name
async def pool(self):
print("working with:", self.session_name)
#self.client.on(events.NewMessage(outgoing=True))
async def main(event):
message = event.message.to_dict()
msg_text = message['message']
print(msg_text)
try:
await self.client.start()
await self.client.run_until_disconnected()
finally:
await self.client.disconnect()
async def main():
user = NewSession("321")
user2 = NewSession("123")
await user.pool()
await user2.pool()
if __name__ == '__main__':
asyncio.run(main())
But only one is working. Need help :)
The problem is inside your main function. When you await for a coroutine to return it doesn't mean that the execution continues to the next expression. So, in your code the line await user2.pool() is going to be executed only when the user.poll() coroutines returns a value, this is when the session '321' is disconnected.
You need to run the tasks concurrently; you can use the function asyncio.gather. Reworking your main:
async def main():
user = NewSession("321")
user2 = NewSession("123")
await asyncio.gather(user.pool(), user2.pool())

asyncpg + aiogram. cannot perform operation: another operation is in progress

How I can fix it? I played with it a lot and for a long time, but nothing came of it.
sql_db.py:
import asyncio
import asyncpg
LOG_PG = {"database": 'test_bot',
"user": 'misha',
"password": '1234',
"host": 'localhost'}
class Database:
SELECT_USER_LANG = "SELECT lang FROM lang_set WHERE user_id = $1 AND bot_token = $2"
def __init__(self, loop: asyncio.AbstractEventLoop):
self.pool = loop.run_until_complete(
asyncpg.create_pool(**LOG_PG)
)
async def get_lang(self, user_id, token):
search_d = [user_id, token]
res = await self.pool.fetchval(self.SELECT_USER_LANG, *search_d)
if res is None:
return "ru"
return res
I tried to insert this loop everywhere, run without it, multiple combinations in the code itself. But nothing changed. I do not know how to describe the problem in more detail
main.py:
from aiogram import Bot, Dispatcher
from aiogram.types import Message
import asyncio
from sql_db import Database
loop = asyncio.get_event_loop()
token = "TOKEN"
dp = Dispatcher()
bot = Bot(token=token, parse_mode="HTML")
db = Database(loop)
async def echo_msg(message: Message):
user_id = message.from_user.id
await message.send_copy(user_id)
await db.get_lang(user_id, token)
dp.message.register(callback=echo_msg)
if __name__ == '__main__':
dp.run_polling(bot, skip_updates=True)
error:
...
File "/home/mickey/Desktop/chat_admin/venv/lib/python3.8/site-packages/asyncpg/pool.py", line 867, in release
return await asyncio.shield(ch.release(timeout))
File "/home/mickey/Desktop/chat_admin/venv/lib/python3.8/site-packages/asyncpg/pool.py", line 224, in release
raise ex
File "/home/mickey/Desktop/chat_admin/venv/lib/python3.8/site-packages/asyncpg/pool.py", line 214, in release
await self._con.reset(timeout=budget)
File "/home/mickey/Desktop/chat_admin/venv/lib/python3.8/site-packages/asyncpg/connection.py", line 1367, in reset
await self.execute(reset_query, timeout=timeout)
File "/home/mickey/Desktop/chat_admin/venv/lib/python3.8/site-packages/asyncpg/connection.py", line 318, in execute
return await self._protocol.query(query, timeout)
File "asyncpg/protocol/protocol.pyx", line 323, in query
File "asyncpg/protocol/protocol.pyx", line 707, in asyncpg.protocol.protocol.BaseProtocol._check_state
asyncpg.exceptions._base.InterfaceError: cannot perform operation: another operation is in progress
Works through such a launch. It must be turned on through the aiogram. I do not know how to formulate, but I was lucky to understand the problem
...
data_ = {}
class Database:
def __init__(self, pool: asyncpg.create_pool):
self.pool = pool
async def get_lang(self, user_id, token):
search_d = [user_id, token]
async with self.pool.acquire() as conn:
res = await conn.fetchval(SELECT_USER_LANG, *search_d)
if res is None:
return "ru"
return res
async def create_pool():
pool = await asyncpg.create_pool(**LOG_PG)
data_["db"] = Database(pool)
async def echo_msg(message: Message):
user_id = message.from_user.id
await message.send_copy(user_id)
await data_["db"].get_lang(user_id, token)
dp.message.register(callback=echo_msg)
if __name__ == '__main__':
dp.startup.register(create_pool) # ANSWER
dp.run_polling(bot, skip_updates=True)

Run slow background blocking task from asyncio loop

I have asyncio crawler, that visits URLs and collects new URLs from HTML responses. I was inspired that great tool: https://github.com/aio-libs/aiohttp/blob/master/examples/legacy/crawl.py
Here is a very simplified piece of workflow, how it works:
import asyncio
import aiohttp
class Requester:
def __init__(self):
self.sem = asyncio.BoundedSemaphore(1)
async def fetch(self, url, client):
async with client.get(url) as response:
data = (await response.read()).decode('utf-8', 'replace')
print("URL:", url, " have code:", response.status)
return response, data
async def run(self, urls):
async with aiohttp.ClientSession() as client:
for url in urls:
await self.sem.acquire()
task = asyncio.create_task(self.fetch(url, client))
task.add_done_callback(lambda t: self.sem.release())
def http_crawl(self, _urls_list):
loop = asyncio.get_event_loop()
crawl_loop = asyncio.ensure_future(self.run(_urls_list))
loop.run_until_complete(crawl_loop)
r = Requester()
_url_list = ['https://www.google.com','https://images.google.com','https://maps.google.com','https://mail.google.com','https://news.google.com','https://video.google.com','https://books.google.com']
r.http_crawl(_url_list)
What I need now is to add some very slow beautifulsoap based function. I need that function do not block main loop and work as background process. For instance, I will handle HTTP responses.
I read python docs about it and found that: https://docs.python.org/3/library/asyncio-eventloop.html#asyncio.loop.run_in_executor
I tried to add it to my code, but it does not work as should (I use cpu_bound only for demo):
import asyncio
import aiohttp
import concurrent.futures
def cpu_bound():
return sum(i * i for i in range(10 ** 7))
class Requester:
def __init__(self):
self.sem = asyncio.BoundedSemaphore(1)
async def fetch(self, url, client):
async with client.get(url) as response:
data = (await response.read()).decode('utf-8', 'replace')
print("URL:", url, " have code:", response.status)
####### Blocking operation #######
loop = asyncio.get_running_loop()
with concurrent.futures.ProcessPoolExecutor() as pool:
result = await loop.run_in_executor(pool, cpu_bound)
print('custom process pool', result)
#################################
return response, data
async def run(self, urls):
async with aiohttp.ClientSession() as client:
for url in urls:
await self.sem.acquire()
task = asyncio.create_task(self.fetch(url, client))
task.add_done_callback(lambda t: self.sem.release())
def http_crawl(self, _urls_list):
loop = asyncio.get_event_loop()
crawl_loop = asyncio.ensure_future(self.run(_urls_list))
loop.run_until_complete(crawl_loop)
r = Requester()
_url_list = ['https://www.google.com','https://images.google.com','https://maps.google.com','https://mail.google.com','https://news.google.com','https://video.google.com','https://books.google.com']
r.http_crawl(_url_list)
For now, it doesn't work as expected, it blocks HTTP requests every time:
URL: https://www.google.com have code: 200
custom process pool 333333283333335000000
URL: https://images.google.com have code: 200
custom process pool 333333283333335000000
URL: https://maps.google.com have code: 200
custom process pool 333333283333335000000
URL: https://mail.google.com have code: 200
custom process pool 333333283333335000000
URL: https://news.google.com have code: 200
custom process pool 333333283333335000000
URL: https://video.google.com have code: 200
custom process pool 333333283333335000000
How to correctly put the task in the background inside the main asyncio process?
Are there best practices on how to do that in a simple way, or I should use Redis for task planning?
I believe that since you are setting your BoundedSemaphore to 1 it is only allowing one instance of your task to run at a time.
You can use the ratelimiter package to limit the number of concurrent requests in a certain amount of time.
I would also upload code that works for me. It is two independent async queues, and one of them spawn high-CPU consumption process in a separate loop:
import asyncio
import functools
import aiohttp
import concurrent.futures
def cpu_bound(num):
return sum(i * i for i in range(10 ** num))
class Requester:
def __init__(self):
self.threads = 3
self.threads2 = 10
self.pool = concurrent.futures.ProcessPoolExecutor()
async def fetch(self, url):
try:
timeout = aiohttp.ClientTimeout(total=10)
async with self.client.get(url, allow_redirects=False, verify_ssl=False, timeout=timeout) as response:
data = (await response.read()).decode('utf-8', 'replace')
print("URL:", url, " have code:", response.status)
resp_list = {'url': str(response.real_url), 'data': str(data), 'headers': dict(response.headers)}
return resp_list
except Exception as err:
print(err)
return {}
async def heavy_worker(self, a):
while True:
resp_list = await a.get()
if resp_list.keys():
####### Blocking operation #######
try:
loop = asyncio.get_event_loop()
result = await loop.run_in_executor(self.pool, functools.partial(cpu_bound, num=5))
print('wappalazer', result)
except Exception as err:
print(err)
#################################
a.task_done()
else:
a.task_done()
async def fetch_worker(self, q, a):
while True:
url = await q.get()
resp_list = await self.fetch(url)
q.task_done()
await a.put(resp_list)
async def main(self, urls):
# Create an queues those we will use to store our "workload".
q = asyncio.Queue()
a = asyncio.Queue()
# Create workers tasks to process the queue concurrently.
workers_fetch = [asyncio.create_task(self.fetch_worker(q, a)) for _ in range(self.threads)]
workers_heavy = [asyncio.create_task(self.heavy_worker(a)) for _ in range(self.threads2)]
for url in urls:
await q.put(url)
# wait for all tasks to be processed
await q.join()
await a.join()
# Cancel our worker tasks.
for worker in workers_fetch:
worker.cancel()
await asyncio.gather(*workers_fetch , return_exceptions=True)
for worker in workers_heavy:
worker.cancel()
await asyncio.gather(*workers_heavy , return_exceptions=True)
async def run(self, _urls_list):
async with aiohttp.ClientSession() as self.client:
task_for_first_run = asyncio.create_task(self.main(_urls_list))
await asyncio.sleep(1)
await task_for_first_run
print("All tasks completed")
def http_crawl(self, _urls_list):
asyncio.run(self.run(_urls_list))
r = Requester()
_url_list = ['http://aaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaa.aa', 'https://www.google.com','https://images.google.com','https://maps.google.com','https://mail.google.com',
'https://news.google.com','https://video.google.com','https://books.google.com', 'https://www.google.com',
'https://images.google.com','https://maps.google.com','https://mail.google.com','https://news.google.com',
'https://video.google.com','https://books.google.com', 'https://www.google.com','https://images.google.com',
'https://maps.google.com','https://mail.google.com','https://news.google.com','https://video.google.com',
'https://books.google.com', 'https://www.google.com','https://images.google.com','https://maps.google.com',
'https://mail.google.com','https://news.google.com','https://video.google.com','https://books.google.com',
'https://www.google.com','https://images.google.com','https://maps.google.com','https://mail.google.com',
'https://news.google.com','https://video.google.com','https://books.google.com']
r.http_crawl(_url_list)

Resources