I am trying to read data from a kafka topiv which has been serialized using google's protobuf.
I compiled the proto files using protoc which generated pb2 files.
Now i am trying to use faust and create a stream processor but i can't find the correct way to use the pb2 files as key_serializer and value_serializer.
Here is what i have tried:
import faust
from proto.topic_pb2 import topic
app = faust.App(
schema = faust.Schema(
## key_type=topic.PK,
## value_type=topic,
topic = app.topic(
async def consume(topic):
async for event in topic:
if __name__ == "__main__":
Does anybody have any idea how to used the pb2 in the serializers?
Man, I was trying to do the same the past week. After struggling I finally got something working - not the best way - but it works well enough.
So initially I used this python compiler: https://github.com/danielgtaylor/python-betterproto to generate the *.py files with dataclasses / type hinting.
Then, I was able to create Faust.Record classes dynamically by using a helper:
import abc
import inspect
from typing import Type
import betterproto
import faust
GENERATED_SUFFIX = "__FaustRecord_Auto"
def _import_relative_class(module: str, klass_name: str):
resolved_import = __import__(module, fromlist=[klass_name])
klass = getattr(resolved_import, klass_name)
return klass
def _is_record(attype: Type):
return (
and isinstance(attype, betterproto.Message)
or isinstance(attype, abc.ABCMeta)
def _build_record_annotations(klass: Type):
annotations = {}
for atname, attype in klass.__annotations__.items():
if _is_record(attype):
annotations[atname] = make_faust_record(attype)
elif isinstance(attype, str):
subklass = _import_relative_class(klass.__module__, attype)
annotations[atname] = make_faust_record(subklass)
annotations[atname] = attype
return annotations
def make_faust_record(klass: Type):
type_name = f"{klass.__name__}{GENERATED_SUFFIX}"
record_type = type(type_name, (faust.Record, klass), {})
record_type.__annotations__ = _build_record_annotations(klass)
return record_type
Now you can use it like:
import faust
from proto.your_models import YourModel # Import your generated proto here
from faust_converter import make_faust_record
app = faust.App(
model_record = make_faust_record(YourModel)
topic = app.topic(
async def consume(topic):
async for event in topic:
if __name__ == "__main__":
I was also experimenting with using Protobuf with Faust.
Mentioned below is the solution using Faust Serialiser Codecs.
faust-protobuf https://github.com/hemantkashniyal/faust-protobuf
from faust.serializers import codecs
from typing import Any
from google.protobuf import json_format
from google.protobuf.json_format import MessageToJson
from google.protobuf.json_format import MessageToDict
from google.protobuf import text_format
from google.protobuf.text_format import MessageToString
from google.protobuf.text_format import MessageToBytes
class ProtobufSerializer(codecs.Codec):
def __init__(self, pb_type: Any):
self.pb_type = pb_type
super(self.__class__, self).__init__()
def _dumps(self, pb: Any) -> bytes:
return pb.SerializeToString()
def _loads(self, s: bytes) -> Any:
pb = self.pb_type()
return pb
import faust
from google.protobuf.json_format import MessageToJson
from .proto.greetings_pb2 import Greeting
from .proto_serializer import ProtobufSerializer
app = faust.App(
broker='kafka://', # TODO: update kafka endpoint
greetings_schema = faust.Schema(
topic = app.topic(
async def consume(topic):
async for event in topic:
async def produce():
for i in range(10):
data = Greeting(hello="world", message=i)
await consume.send(value=data)
if __name__ == "__main__":
I was able to do it by creating a Serializer class as so:
import faust
from abc import ABCMeta, abstractmethod
from google.protobuf.json_format import MessageToDict
from faust.serializers.codecs import Codec
from importlib import import_module
def get_proto(topic_name, only_pk=False):
if not hasattr(get_proto, "topics"):
setattr(get_proto, "topics", dict())
get_proto.topics[topic_name] = import_module(
if only_pk:
return getattr(get_proto, "topics").get(topic_name).PK
return getattr(get_proto, "topics").get(topic_name)
class ProtoSerializer(Codec, metaclass=ABCMeta):
def only_key(self):
def as_proto(self, topic_name):
self._proto = get_proto(topic_name, self.only_key())
return self
def _loads(self, b):
data = MessageToDict(
# remove the key object from the unserialized message
data.pop("key", None)
return data
def _dumps(self, o):
# for deletes
if not o:
return None
obj = self._proto()
# add the key object to them message before serializing
if hasattr(obj, "PK"):
for k in obj.PK.DESCRIPTOR.fields_by_name.keys():
if k not in o:
raise Exception(
"Invalid object `{}` for proto `{}`".format(o, self._proto)
setattr(obj.key, k, o[k])
for k, v in o.items():
if hasattr(obj, k):
setattr(obj, k, v)
"Invalid value-attribute `%s` for proto `%s`", k, self._proto
return obj.SerializeToString()
class ProtoValue(ProtoSerializer):
def only_key(self):
return False
class ProtoKey(ProtoSerializer):
def only_key(self):
return True
and then use it as follows:
import faust
from utils.serializer import ProtoKey, ProtoValue
app = faust.App(
topic = app.topic(
async def consume(topic):
async for event in topic:
if __name__ == "__main__":
First of all, I have tried looking for answers in this website. But no luck...
What I wanna achieve is that starting an independent thread in the request handling thread to do some asynchronous task. The tricky point is that there are some database operations needed in this independent thread.
Here is an example. Five files included.
Detail code below...
# deployer/__init__.py
from flask import Flask
from deployer.models import db
def create_app():
app = Flask(__name__)
# Add route for index
def index():
return {'code': 200, 'message': 'OK'}
return app
# manager.py
from os import environ
from flask_script import Manager, Server
from deployer import create_app
from flask_restful import Api
from deployer.views import HostView
env = environ.get('APM_ENV', 'dev')
app = create_app('config.%sConfig' % env.capitalize())
api = Api(app)
api.add_resource(HostView, '/api/v1/hosts')
manager = Manager(app)
manager.add_command("server", Server(host='', port=9527))
if __name__ == '__main__':
# deployer/views.py
from flask_restful import Resource, reqparse
from flask import jsonify
from deployer.models import db, Host
from deployer.operators import HostInitiator
parser = reqparse.RequestParser()
parser.add_argument('host', type=int, help='Specify an unique host.')
class HostView(Resource):
def get(self):
h = db.session.query(Host).filter(Host.id == 1).one()
return jsonify(
def post(self):
h = Host(
initiator = HostInitiator(host=h)
return {
'code': 'Harbor',
'ip_addr_v4': '',
'state': 'created'
# deployer/models.py
from sqlalchemy import Column, Integer, String
from flask_sqlalchemy import SQLAlchemy
db = SQLAlchemy()
class Host(db.Model):
__tablename__ = 'br_host'
id = Column(Integer, primary_key=True, autoincrement=True)
code = Column(String(128), index=True, nullable=False)
ip_addr_v4 = Column(String(15), nullable=False)
state = Column(String(16), nullable=False)
# deployer/operators.py
from threading import Thread
from deployer.models import db
class HostInitiator(Thread):
def __init__(self, host):
self.host = host
def run(self):
# Update Host.state [created-->initating]
db.session.query(Host).filter(Host.id == self.host.id).update({'state': 'initating'})
# do some initiating things...
# Update Host.state [initating-->ready]
db.session.query(Host).filter(Host.id == self.host.id).update({'state': 'ready'})
Always got outside application context error with code above. The error message indicates that no database operation is permitted in the HostInitiator thread.
It suggests me to push a context or move my code into a view function. I'm suffering this quite a while, please help out if you guys have any suggestions. Thanks in advance.
The code works for me
def test_multi_threading_query():
# module which i create Flask app instance
from app.main import app
# module which i create sqlalchemhy instance
from app.model.db import db, Post
with app.app_context():
posts = Post.query.all()
p = posts[0]
p.foo = 1
def test_view():
from threading import Thread
t = Thread(target=test_multi_threading_query)
return ''
# main.py
app = Flask(__main__)
db = SQLAlchemy()
class Post(db.Model):
id = db.Column(db.Integer, primary_key=True)
foo = db.Column(db.Integer)
I am trying to do the following:
#patch('uuid.uuid4', autospec=True)
def test_generate_adid(self, patched_uuid, app_api):
patched_uuid.return_value = "9e51ab81-6d65-4b81-af3b-8f7f49d69ba7"
adid = app_api.generate_adid()
assert adid == "9e51ab81-6d65-4b81-af3b-8f7f49d69ba7"
Where app_api is a fixture of the class under test.
However, in my app_api class, uuid4() is not getting patched and keeps returning a uuid other than the one I am trying to force. Here is what the generate_adid() instance method looks like:
from uuid import uuid4
def generate_adid(self):
adid = str(uuid4())
return adid
The failing unit test error:
AssertionError: assert '90b29e86-e3b0-40aa-8971-f868f90cb009' == '9e51ab81-6d65-4b81-af3b-8f7f49d69ba7'
I have consulted this post: How to mock uuid generation in a test case? but still am having no luck.
What am I doing wrong? Thanks to all of those who reply in advance.
EDIT: Here is the full code:
from requests import Session
from random import uniform
from hashlib import md5
from hmac import new
from uuid import uuid4
from json import dumps
class AppApi:
def __init__(self, account):
self.account = account
self.session = Session()
def generate_adid(self):
adid = str(uuid4())
return adid
Test Case:
from src import AppApi
from pytest import fixture
from unittest.mock import patch
from json import loads
"email": "user#email.com",
"username": "user",
"password": "s3cr3t"
def app_api():
app_api = AppApi(ACCOUNT)
yield app_api
class TestAppApi:
def test_generate_adid(self, patched_uuid, app_api):
patched_uuid.return_value = "9e51ab81-6d65-4b81-af3b-8f7f49d69ba7"
adid = app_api.generate_adid()
assert adid == "9e51ab81-6d65-4b81-af3b-8f7f49d69ba7"
In your example you're patching the uuid4() function in the uuid module rather than the function uuid4() in the module which you're trying to test. Take a look at Python unnit.test docs where to patch
Using your example above you need to patch the uuid4() imported into the src module. You need to use #patch("src.uuid4")
from src import AppApi
from pytest import fixture
from unittest.mock import patch
from json import loads
"email": "user#email.com",
"username": "user",
"password": "s3cr3t"
def app_api():
app_api = AppApi(ACCOUNT)
yield app_api
class TestAppApi:
def test_generate_adid(self, patched_uuid, app_api):
patched_uuid.return_value = "9e51ab81-6d65-4b81-af3b-8f7f49d69ba7"
adid = app_api.generate_adid()
assert adid == "9e51ab81-6d65-4b81-af3b-8f7f49d69ba7"
Hope this helps!
I wanted to post and update data in yaml file through tornado api call .. could you please give some code example
import tornado.web
import tornado.ioloop
import nest_asyncio
import json
class basicRequestHandler(tornado.web.RequestHandler):
def get(self):
self.write("Hello , world...." )
if __name__ =="__main__":
app = tornado.web.Application([(r"/", basicRequestHandler)])
print("I'm listening on port 8881")
Prepared for you this example:
from typing import Dict
import tornado.web
import tornado.ioloop
import yaml
from tornado.escape import json_decode
class BaseHandler(tornado.web.RequestHandler):
yaml_filename = 'data.yaml'
json_args: Dict
async def prepare(self):
self.json_args = json_decode(self.request.body) or {}
def data_received(self, chunk):
class MainHandler(BaseHandler):
def post(self):
with open(self.yaml_filename, 'w') as file:
# ...
yaml.dump(self.json_args, file)
def put(self):
with open(self.yaml_filename) as file:
yaml_data = yaml.full_load(file)
# ...
# ...
with open(self.yaml_filename, 'w') as file:
yaml.dump(yaml_data, file)
if __name__ == "__main__":
app = tornado.web.Application([(r"/", MainHandler)])
print("I'm listening on port 8881")
In folder [Root]/src/app, I have a file services_factory.py, for example:
class Describing:
def __init__(self):
def get_description(self):
class APIService(Describing):
def __init__(self):
def get_description(self):
return 'Here provide services for APIs'
class DatabaseService(Describing):
def __init__(self):
def get_description(self):
return 'Here provide services for Database'
class Injector:
def __init__(self):
def get_service(self, type='API'):
services = {
"API": APIService,
"DB": DatabaseService
return services[type]()
At the end of file services_factory.py, I add an unittest, ex:
def test_services_injector():
injector = Injector()
api_service = injector.get_service('API')
db_service = injector.get_service('DB')
assert api_service.get_description() == 'Here provide services for APIs'
assert db_service.get_description() == 'Here provide services for Database'
Then, cmd: $ pytest src/app/services_injector.py, it worked nicely.
But when I create a file test_services_factory.py in [Root]/tests/app, for example:
import unittest
from unittest.mock import patch
def test_services_injector():
assert 'a' == 'a'
I can't import the classes in my services_factory.py.
So, how can I quickly fix this problem?
I want Faust agent to write to PostgreSQL table. I'd like to use asyncpg connection pool but cannot find a clean way to inject it into the app initialization code.
Simply add the below function to your faust App
class KafkaWorker(faust.App):
def __init__(self, *args: List, **kwargs: Dict) -> None:
self.broker : str = kwargs.pop('broker')
self._db_pool = None
super().__init__(*args, broker=KafkaWorker._broker_faust_string(self.broker), **kwargs)
async def db_pool(self) -> Pool:
''' '''
if not self._db_pool:
logging.warning('kafka.db_pool initialization...')
self._db_pool = await db.db_pool()
logging.warning('kafka.db_pool initialization...done ✓')
return self._db_pool
where db.db_pool is
from os import environ
import asyncpg
from asyncpg.pool import Pool
from sqlalchemy.orm import scoped_session, sessionmaker
from sqlalchemy.ext.declarative import declarative_base
session = scoped_session(sessionmaker())
Base = declarative_base()
async def db_pool() -> Pool:
return await asyncpg.create_pool(
dsn=environ.get('DB_CNX_STRING', 'postgresql://postgres:postgres#postgres:5432/actions')
and then you access it as
async def store_actions(actions: StreamT):
async for action in actions:
db_pool = await current_agent().app.db_pool()
async with db_pool.acquire() as conn:
yield await save_action(conn, action.to_representation())
except StoreException:
logger.exception(f'Error while inserting action in DB, continuing....')
yield action.id