How to work with asyncpg connection pool in Faust agent? - asyncpg

I want Faust agent to write to PostgreSQL table. I'd like to use asyncpg connection pool but cannot find a clean way to inject it into the app initialization code.

Simply add the below function to your faust App
class KafkaWorker(faust.App):
def __init__(self, *args: List, **kwargs: Dict) -> None:
self.broker : str = kwargs.pop('broker')
self._db_pool = None
super().__init__(*args, broker=KafkaWorker._broker_faust_string(self.broker), **kwargs)
async def db_pool(self) -> Pool:
''' '''
if not self._db_pool:
logging.warning('kafka.db_pool initialization...')
self._db_pool = await db.db_pool()
logging.warning('kafka.db_pool initialization...done ✓')
return self._db_pool
where db.db_pool is
from os import environ
import asyncpg
from asyncpg.pool import Pool
from sqlalchemy.orm import scoped_session, sessionmaker
from sqlalchemy.ext.declarative import declarative_base
session = scoped_session(sessionmaker())
Base = declarative_base()
async def db_pool() -> Pool:
return await asyncpg.create_pool(
dsn=environ.get('DB_CNX_STRING', 'postgresql://postgres:postgres#postgres:5432/actions')
)
and then you access it as
#kafka.agent(actions_topic)
async def store_actions(actions: StreamT):
async for action in actions:
db_pool = await current_agent().app.db_pool()
async with db_pool.acquire() as conn:
try:
yield await save_action(conn, action.to_representation())
except StoreException:
logger.exception(f'Error while inserting action in DB, continuing....')
finally:
yield action.id

Related

Feed ProcessPoolExecutor with results from asyncio

I have a bunch of online data that I want to download and process efficiently. Downloading already takes some time but cpu-bound processing takes much longer. I struggle to implement a combination of async and ProcessPoolExecutor.
import asyncio
import time
import aiohttp
from aiohttp import ClientSession
from concurrent.futures import ProcessPoolExecutor
class WebData:
def __init__(self, url):
self.url = url
self.binary = b''
async def download(self, client):
time.sleep(0.2)
try:
async with client.get(self.url, timeout=5) as resp:
self.binary = await resp.read()
print(f'Downloaded {self.url}')
except (aiohttp.ClientConnectionError,
asyncio.exceptions.TimeoutError):
pass
return
def process(self):
print(f'Start processing {self.url}')
time.sleep(1)
print(f'Finished processing {self.url}')
async def main():
list_urls = [f'https://www.google.com/search?q={i}'
for i in range(10)]
list_obj = [WebData(url) for url in list_urls]
with ProcessPoolExecutor() as executor:
async with ClientSession() as session:
tasks = [obj.download(session) for obj in list_obj]
await asyncio.gather(*tasks)
list_futures = [
executor.submit(obj.process)
for obj in list_obj]
return list_futures
res = asyncio.run(main())
This works as expected but it fails to accomplish what I am looking for. It first downloads all data and starts processing it only afterwards, which leaves my cores idle during download. Is there any way I can pipe the downloaded objects to the executor while other objects are still downloading?
I found this thread but it isn't what I need.
You should submit the self.process inside after the coroutine ends. For that, you can have a separate asynchronous method that will await the download method and submit the process to ProcessPoolExecutor.
class WebData:
def __init__(self, url):
"""The code has not been changed"""
async def download(self, client):
"""The code has not been changed"""
def process(self):
"""The code has not been changed"""
async def execute(self, session, pool):
await self.download(session)
pool.submit(self.process)
async def main():
list_urls = [f'https://www.google.com/search?q={i}' for i in range(10)]
list_obj = [WebData(url) for url in list_urls]
with ProcessPoolExecutor() as pool:
async with ClientSession() as session:
list_futures = await asyncio.gather(*[obj.execute(session, pool) for obj in list_obj])
return list_futures

What is the best way to set a timeout condition for functions decorated with #run_on_executor in Tornado?

What would be the best approach for setting a timeout condition on a task/function that's been submitted to a ThreadPoolExecutor using tornado.concurrent's #run_on_executor decorator? Example Tornado handler below:
import json
import time
import tornado.web
from concurrent.futures import ThreadPoolExecutor
from tornado.concurrent import run_on_executor
class MyHandler(tornado.web.RequestHandler):
def initialize(self) -> None:
self.executor = ThreadPoolExecutor(1)
#run_on_executor
def blocking_function(self) -> None:
""" Run Blocking Function on ThreadPoolExecutor. """
seconds = 10
time.sleep(seconds)
response = json.dumps({"message": f"Slept for {seconds} seconds."})
return response
async def get(self) -> None:
response = await self.blocking_function()
self.write(response)
Does something like tornado.gen.with_timeout found here exist for #run_on_executor?
Thank you for your time.
Since run_on_executor returns a Future object, you can use it with gen.with_timetout:
from datetime import timedelta
async def get(self):
response = await gen.with_timeout(
timedelta(seconds=5),
self.blocking_function()
)
...
Don't forget to handle the timeout exception.

Using python compiled protobuf pb2 as key and value serializer

I am trying to read data from a kafka topiv which has been serialized using google's protobuf.
I compiled the proto files using protoc which generated pb2 files.
Now i am trying to use faust and create a stream processor but i can't find the correct way to use the pb2 files as key_serializer and value_serializer.
Here is what i have tried:
import faust
from proto.topic_pb2 import topic
app = faust.App(
'faust-consumer',
broker='kafka://',
store="memory://",
cache="memory://",
)
schema = faust.Schema(
## key_type=topic.PK,
## value_type=topic,
key_serializer=topic.PK,
value_serializer=topic,
)
topic = app.topic(
'topic',
schema=schema
)
#app.agent(topic)
async def consume(topic):
async for event in topic:
print(event)
if __name__ == "__main__":
app.main()
Does anybody have any idea how to used the pb2 in the serializers?
Man, I was trying to do the same the past week. After struggling I finally got something working - not the best way - but it works well enough.
So initially I used this python compiler: https://github.com/danielgtaylor/python-betterproto to generate the *.py files with dataclasses / type hinting.
Then, I was able to create Faust.Record classes dynamically by using a helper:
import abc
import inspect
from typing import Type
import betterproto
import faust
GENERATED_SUFFIX = "__FaustRecord_Auto"
def _import_relative_class(module: str, klass_name: str):
resolved_import = __import__(module, fromlist=[klass_name])
klass = getattr(resolved_import, klass_name)
return klass
def _is_record(attype: Type):
return (
inspect.isclass(attype)
and isinstance(attype, betterproto.Message)
or isinstance(attype, abc.ABCMeta)
)
def _build_record_annotations(klass: Type):
annotations = {}
for atname, attype in klass.__annotations__.items():
if _is_record(attype):
annotations[atname] = make_faust_record(attype)
elif isinstance(attype, str):
subklass = _import_relative_class(klass.__module__, attype)
annotations[atname] = make_faust_record(subklass)
else:
annotations[atname] = attype
return annotations
def make_faust_record(klass: Type):
type_name = f"{klass.__name__}{GENERATED_SUFFIX}"
record_type = type(type_name, (faust.Record, klass), {})
record_type.__annotations__ = _build_record_annotations(klass)
record_type._init_subclass()
return record_type
Now you can use it like:
import faust
from proto.your_models import YourModel # Import your generated proto here
from faust_converter import make_faust_record
app = faust.App(
'faust-consumer',
broker='kafka://',
store="memory://",
cache="memory://",
)
model_record = make_faust_record(YourModel)
topic = app.topic(
'topic',
value_type=model_record
)
#app.agent(topic)
async def consume(topic):
async for event in topic:
print(event)
if __name__ == "__main__":
app.main()
I was also experimenting with using Protobuf with Faust.
Mentioned below is the solution using Faust Serialiser Codecs.
faust-protobuf https://github.com/hemantkashniyal/faust-protobuf
proto_serializer.py
from faust.serializers import codecs
from typing import Any
from google.protobuf import json_format
from google.protobuf.json_format import MessageToJson
from google.protobuf.json_format import MessageToDict
from google.protobuf import text_format
from google.protobuf.text_format import MessageToString
from google.protobuf.text_format import MessageToBytes
class ProtobufSerializer(codecs.Codec):
def __init__(self, pb_type: Any):
self.pb_type = pb_type
super(self.__class__, self).__init__()
def _dumps(self, pb: Any) -> bytes:
return pb.SerializeToString()
def _loads(self, s: bytes) -> Any:
pb = self.pb_type()
pb.ParseFromString(s)
return pb
app.py
import faust
from google.protobuf.json_format import MessageToJson
from .proto.greetings_pb2 import Greeting
from .proto_serializer import ProtobufSerializer
app = faust.App(
'faust-consumer',
broker='kafka://', # TODO: update kafka endpoint
store="memory://",
cache="memory://",
)
greetings_schema = faust.Schema(
key_serializer=ProtobufSerializer(pb_type=Greeting),
value_serializer=ProtobufSerializer(pb_type=Greeting),
)
topic = app.topic(
'greetings',
schema=greetings_schema
)
#app.agent(topic)
async def consume(topic):
async for event in topic:
print(MessageToJson(event))
#app.timer(5)
async def produce():
for i in range(10):
data = Greeting(hello="world", message=i)
await consume.send(value=data)
if __name__ == "__main__":
app.main()
I was able to do it by creating a Serializer class as so:
import faust
from abc import ABCMeta, abstractmethod
from google.protobuf.json_format import MessageToDict
from faust.serializers.codecs import Codec
from importlib import import_module
def get_proto(topic_name, only_pk=False):
if not hasattr(get_proto, "topics"):
setattr(get_proto, "topics", dict())
get_proto.topics[topic_name] = import_module(
"protodef.{}_pb2".format(topic_name)
).__getattribute__(topic_name.split(".")[-1])
if only_pk:
return getattr(get_proto, "topics").get(topic_name).PK
else:
return getattr(get_proto, "topics").get(topic_name)
class ProtoSerializer(Codec, metaclass=ABCMeta):
#abstractmethod
def only_key(self):
...
def as_proto(self, topic_name):
self._proto = get_proto(topic_name, self.only_key())
return self
def _loads(self, b):
data = MessageToDict(
self._proto.FromString(b),
preserving_proto_field_name=True,
including_default_value_fields=True,
)
# remove the key object from the unserialized message
data.pop("key", None)
return data
def _dumps(self, o):
# for deletes
if not o:
return None
obj = self._proto()
# add the key object to them message before serializing
if hasattr(obj, "PK"):
for k in obj.PK.DESCRIPTOR.fields_by_name.keys():
if k not in o:
raise Exception(
"Invalid object `{}` for proto `{}`".format(o, self._proto)
)
setattr(obj.key, k, o[k])
for k, v in o.items():
if hasattr(obj, k):
setattr(obj, k, v)
else:
ghost.debug(
"Invalid value-attribute `%s` for proto `%s`", k, self._proto
)
return obj.SerializeToString()
class ProtoValue(ProtoSerializer):
def only_key(self):
return False
class ProtoKey(ProtoSerializer):
def only_key(self):
return True
and then use it as follows:
import faust
from utils.serializer import ProtoKey, ProtoValue
app = faust.App(
'faust-consumer',
broker='kafka://',
store="memory://",
cache="memory://",
)
topic = app.topic(
'topic',
key_serializer=ProtoKey().as_proto('topic'),
value_serializer=ProtoValue().as_proto('topic')
)
#app.agent(topic)
async def consume(topic):
async for event in topic:
print(event)
if __name__ == "__main__":
app.main()

How create class Python 3.x (singleton) for asyncpg?

I wanted to organize a connection pool when initializing the class with the method below
import asyncio
import asyncpg
class DBCommands:
def __init__(self, uri: str) -> None:
loop = asyncio.get_event_loop()
self.pool: asyncpg.pool.Pool = loop.run_until_complete(asyncpg.create_pool(dsn=uri))
async def get_id_admins(self) -> list:
async with self.pool.acquire():
result = await self.pool.fetch("SELECT chat_id FROM users WHERE role_user = 'admin'")
admins_id = [row[0] for row in result]
return admins_id
Since the pool should be one, with the above implementation, this will not work. I decided to use singleton, but I don’t understand how to implement this. Below is the version that I came up with. Tell me how best to solve this problem. In addition, I do not understand how best and where to close connections. I'm new to using patterns and just starting to study OOP.
import asyncio
import asyncpg
class Singleton(type):
_instances = {}
def __call__(cls, *args, **kwargs):
if cls not in cls._instances:
cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs)
return cls._instances[cls]
class DBManager(metaclass=Singleton):
#classmethod
def connect(cls, uri):
loop = asyncio.get_event_loop()
return loop.run_until_complete(asyncpg.create_pool(dsn=uri))
class DBCommands:
def __init__(self, uri) -> None:
self.uri = uri
self.pool = DBManager.connect(uri)
async def get_id_admins(self) -> list:
async with self.pool.acquire():
result = await self.pool.fetch("SELECT chat_id FROM users WHERE role_user = 'admin'")
admins_id = [row[0] for row in result]
return admins_id
I have an assumption that opening and closing a pool can be added to __aenter__ and __aexit__
You can use a class attribute and create the pool the first time it's needed in an async function:
class Database:
self.pool = None
...
async def get_id_admins(self)
if self.pool is None:
self.pool = await asyncpg.create_pool(dsn=...`).
I generally use a regular class and create a single instance attached to global object (like the aiohttp application for web applications) as in:
class Database:
def __init__(self, dsn):
self.dsn = dsn
self.pool = None
async def connect(self):
"""Initialize asyncpg Pool"""
self.pool = await asyncpg.create_pool(dsn=self.dsn, min_size=2, max_size=4)
logging.info("successfully initialized database pool")
async def get_id_admins(self):
...
And use it like:
async def startup(app):
await app.database.connect()
async def shutdown(app):
await app.database.pool.close()
def main():
app = web.Application()
app.database = Database(app.config.DSN)
app.on_startup.append(startup)
app.on_shutdown.append(shutdown)

aiomysql and sqlalchemy basic example produces syntax error on python 3.6

I am trying to integrate sqlalchemy with aiomysql on python 3.6, using their official example on github here is my full code
import sqlalchemy as sa
import asyncio
from aiomysql.sa import create_engine
DB1 = dict(host="xxx",...)
DB2 = dict(host="yyy",...)
DATABASES = dict(db1=db1, db2=db2)
async def get_engine(loop, configs):
configs = configs.copy()
configs['loop'] = loop
engine = await create_engine(**configs)
return engine
class Engine(object):
__shared_state = {}
running = None
def __init__(self, loop):
print("init", Engine.running)
self.__dict__ = Engine.__shared_state
self.loop = loop
if not Engine.running:
self.ignite(loop)
def connect(self, key, configs, loop):
engine = loop.run_until_complete(get_engine(loop, configs))
self.__dict__[key] = engine
def ignite(self, loop):
Engine.running = True
for key, configs in DATABASES.items():
self.connect(key, configs, loop)
def DoMyQueries(conn):
pass
ioloop = asyncio.get_event_loop()
engine = Engine(ioloop)
async with engine.db1.acquire() as conn:
DoMyQueries(conn)
engine.db1.close()
await engine.wait_closed()
but i am getting the following error
File "myfile.py", line 45
async with engine.db1.acquire() as conn:
^
SyntaxError: invalid syntax
what am i missing in my code? I know the error is pretty obvious but how do I fix it?
async with can occur only inside async def . Move your code into an async def main() and call it with run_until_complete()

Resources