Python multiprocessing within node.js - Prints on sub process not working

Python multiprocessing within node.js - Prints on sub process not working - node.js

I have a node.js application that runs a client interface which exposes action that triggers machine-learn tasks. Since python is a better choice when implementing machine-learn related stuff, I've implemented a python application that runs on demand machine learning tasks.
Now, I need to integrate both applications. It has been decided that we need to use a single (AWS) instance to integrate both applications.
One way found to do such integration was using python-shell node module. There, the communications between Python and Node are done by stdin and stdout.
On node I have something like this:
'use strict';
const express = require('express');
const PythonShell = require('python-shell');
var app = express();
app.listen(8000, function () {
console.log('Example app listening on port 8000!');
});
var options = {
mode: 'text',
pythonPath: '../pythonapplication/env/Scripts/python.exe',
scriptPath: '../pythonapplication/',
pythonOptions: ['-u'], // Unbuffered
};
var pyshell = new PythonShell('start.py', options);
pyshell.on('message', function (message) {
console.log(message);
});
app.get('/task', function (req, res) {
pyshell.send('extract-job');
});
app.get('/terminate', function (req, res) {
pyshell.send('terminate');
pyshell.end(function (err, code, signal) {
console.log(err)
console.log(code)
console.log(signal);
});
});
On python, I have a main script which loads some stuff and the calls a server script, that runs forever reading lines with sys.stdin.readline() and then executes the corresponding task.
start.py is:
if __name__ == '__main__':
# data = json.loads(sys.argv[1])
from multiprocessing import Manager, Pool
import logging
import provider, server
# Get logging setup objects
debug_queue, debug_listener = provider.shared_logging(logging.DEBUG, 'python-server-debug.log')
info_queue, info_listener = provider.shared_logging(logging.INFO, 'python-server.log')
logger = logging.getLogger(__name__)
# Start logger listener
debug_listener.start()
info_listener.start()
logger.info('Initializing pool of workers...')
pool = Pool(initializer=provider.worker, initargs=[info_queue, debug_queue])
logger.info('Initializing server...')
try:
server.run(pool)
except (SystemError, KeyboardInterrupt) as e:
logger.info('Execution terminated without errors.')
except Exception as e:
logger.error('Error on main process:', exc_info=True)
finally:
pool.close()
pool.join()
debug_listener.stop()
info_listener.stop()
print('Done.')
Both info_queue and debug_queue are multiprocessing.Queue to handle multiprocessing logging. If I run my python application as standalone, everything works fine, even when using the pool of workers (logs get properly logged, prints, get properly printed...)
But, if I try to run using python-shell, only my main process prints and logs get printed and logged correctly... Every message (print or log) from my pool of workers get held until I terminate the python script.
In other words, every message will get held until the finally step on server.py run...
Does anyone has any insights on this issue? Have you guys heard about python-bridge module? Is it a better solution? Can you suggest a better approach for such integration that does not uses two separated servers?
Here I post my real provider script, and a quick mock I did for the server script (the real one has too much stuff)
mock server.py:
import json
import logging
import multiprocessing
import sys
import time
from json.decoder import JSONDecodeError
from threading import Thread
def task(some_args):
logger = logging.getLogger(__name__)
results = 'results of machine learn task goes here, as a string'
logger.info('log whatever im doing')
# Some machine-learn task...
logger.info('Returning results.')
return results
def answer_node(message):
print(message)
# sys.stdout.write(message)
# sys.stdout.flush()
def run(pool, recrutai, job_pool, candidate_queue):
logger = logging.getLogger(__name__)
workers = []
logger.info('Server is ready and waiting for commands')
while True:
# Read input stream
command = sys.stdin.readline()
command = command.split('\n')[0]
logger.debug('Received command: %s', command)
if command == 'extract-job':
logger.info(
'Creating task.',
)
# TODO: Check data attributes
p = pool.apply_async(
func=task,
args=('args'),
callback=answer_node
)
# What to do with workers array?!
workers.append(p)
elif command == 'other-commands':
pass
# Other task here
elif command == 'terminate':
raise SystemError
else:
logger.warn(
'Received an invalid command %s.',
command
)
my provider.py:
import logging
import os
from logging.handlers import QueueHandler, QueueListener
from multiprocessing import Queue
def shared_logging(level, file_name):
# Create main logging file handler
handler = logging.FileHandler(file_name)
handler.setLevel(level)
# Create logging format
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)
# Create queue shared between all process to centralize logging features
logger_queue = Queue() # multiprocessing.Queue
# Create logger queue listener to send records from logger_queue to handler
logger_listener = QueueListener(logger_queue, handler)
return logger_queue, logger_listener
def process_logging(info_queue, debug_queue, logger_name=None):
# Create logging queue handlers
debug_queue_handler = QueueHandler(debug_queue)
debug_queue_handler.setLevel(logging.DEBUG)
info_queue_handler = QueueHandler(info_queue)
info_queue_handler.setLevel(logging.INFO)
# Setup level of process logger
logger = logging.getLogger()
if logger_name:
logger = logging.getLogger(logger_name)
logger.setLevel(logging.DEBUG)
# Add handlers to the logger
logger.addHandler(debug_queue_handler)
logger.addHandler(info_queue_handler)
def worker(info_queue, debug_queue):
# Setup worker process logging
process_logging(info_queue, debug_queue)
logging.debug('Process %s initialized.', os.getpid())

Related

Python gRPC service for Envoy ratelimit

I am trying to create a small service to respond to Envoy's rate limiting queries. I have compiled all the relevant protobuff files and the one relevant for the service I am trying to implement is here:
https://github.com/envoyproxy/envoy/blob/v1.17.1/api/envoy/service/ratelimit/v3/rls.proto
There is a service definition in there but inside of the "compiled" python file, all I see about it is this:
_RATELIMITSERVICE = _descriptor.ServiceDescriptor(
name='RateLimitService',
full_name='envoy.service.ratelimit.v3.RateLimitService',
file=DESCRIPTOR,
index=0,
serialized_options=None,
create_key=_descriptor._internal_create_key,
serialized_start=1531,
serialized_end=1663,
methods=[
_descriptor.MethodDescriptor(
name='ShouldRateLimit',
full_name='envoy.service.ratelimit.v3.RateLimitService.ShouldRateLimit',
index=0,
containing_service=None,
input_type=_RATELIMITREQUEST,
output_type=_RATELIMITRESPONSE,
serialized_options=None,
create_key=_descriptor._internal_create_key,
),
])
_sym_db.RegisterServiceDescriptor(_RATELIMITSERVICE)
DESCRIPTOR.services_by_name['RateLimitService'] = _RATELIMITSERVICE
here is my feeble attempt at implementing the service
import logging
import asyncio
import grpc
from envoy.service.ratelimit.v3.rls_pb2 import RateLimitResponse, RateLimitRequest
class RL:
def ShouldRateLimit(self, request):
result = RateLimitResponse()
def add_handler(servicer, server):
rpc_method_handlers = {
'ShouldRateLimit': grpc.unary_unary_rpc_method_handler(
RL.ShouldRateLimit,
request_deserializer=RateLimitRequest.FromString,
response_serializer=RateLimitResponse.SerializeToString,
)
}
generic_handler = grpc.method_handlers_generic_handler(
'envoy.service.ratelimit.v3.RateLimitService',
rpc_method_handlers
)
server.add_generic_rpc_handlers((generic_handler,))
async def serve():
server = grpc.aio.server()
add_handler(RL(), server)
listen_addr = '[::]:5051'
server.add_insecure_port(listen_addr)
logging.info(f'Starting server on {listen_addr}')
await server.start()
if __name__ == '__main__':
logging.basicConfig(level=logging.DEBUG)
asyncio.run(serve())
How am I supposed to return (or even instantiate) a RateLimitResponse back to the caller ?

how to put a method in to thread and use it when performing the test

I have this part of code which is doing psubscribe to redis. I want to run this part of code in a thread an working in the background while the other part of code will check some notifications from this below.
def psubscribe(context, param1, param2, param3):
context.test_config = load_config()
RedisConnector(context.test_config["redis_host"],
context.test_config["redis_db_index"])
redis_notification_subscriber_connector = RedisConnector(context.test_config["notification__redis_host"],
int(param3),
int(context.test_config[
"notification_redis_port"]))
context.redis_connectors = redis_notification_connector.psubscribe_to_redis_event(param1,
timeout_seconds=int(
param2)
)
what I have done till now: but its not running :(
context.t = threading.Thread(target=psubscribe, args=['param1', 'param2', 'param3'])
context.t.start()

It is actually working. I think you didn't need actually to pass context variable to your psubscribe function.
Here is an example:
Start http server that listens on port 8000 as a background thread
Send http requests to it and validate response
Feature scenario:
Scenario: Run background process and validate responses
Given Start background process
Then Validate outputs
background_steps.py file:
import threading
import logging
from behave import *
from features.steps.utils import run_server
import requests
#given("Start background process")
def step_impl(context):
context.t = threading.Thread(target=run_server, args=[8000])
context.t.daemon = True
context.t.start()
#then("Validate outputs")
def step_impl(context):
response = requests.get('http://127.0.0.1:8000')
assert response.status_code == 501
utils.py file
from http.server import HTTPServer, BaseHTTPRequestHandler
def run_server(port, server_class=HTTPServer, handler_class=BaseHTTPRequestHandler):
server_address = ('', port)
httpd = server_class(server_address, handler_class)
httpd.serve_forever()

Passing an external queue to flask route function?

I'm trying to modify flask request callback so it can communicate with other code while executing the callback. The example explains it better:
from flask import Flask, request
from queue import Queue
flask_input_queue = Queue()
flask_output_queue = Queue()
app = Flask(__name__)
#app.route("/voice", methods=['GET', 'POST'])
def voice():
# The receiver on the other end gets notified we got a request
flask_output_queue.put(str(request))
# This blocks until the external party responds with something
response = flask_input_queue.get()
# But how do the queues end up in the function scope to begin with?
return response
app.run(debug=True)
Here the external code would have a channel using the queues into the web server. This allows me to completely abstract the concept of the web server on the other part of the code.
However for that, I need to be able to pass information to the callback method in ways other that just URLs. Frankly it doesn't have to be a queue other IPC mechanisms will also work ok but they all rely on having a way to pass data into the callback.
Is there a way to do that in flask?

The _URLCallbackClass in combination with add_url_rule is used instead of the decorator. That _URLCallbackClass gets the queue as instance attributes. Given that the actual callback function is the method of _URLCallbackClass, we smuggled the queues into the callback function.
The rest of the complexity just arises from providing a working example.
logging.basicConfig(format='[Thread: %(threadName)s-%(thread)d] %(message)s', level=logging.INFO) [0/0]
logger = logging.getLogger(__name__)
class ControllableServer(threading.Thread):
class _URLCallbackClass():
def __init__(self, input_queue, output_queue):
self.input_queue = input_queue
self.output_queue = output_queue
def url_callback(self):
self.output_queue.put("[URL callback] I just got called")
response_from_the_queue = self.input_queue.get()
return Response(response_from_the_queue, 200)
def __init__(self, input_queue, output_queue):
super().__init__(daemon=True)
self.input_queue = input_queue
self.output_queue = output_queue
self._flask = Flask(__name__)
def run(self):
callback_class = ControllableServer._URLCallbackClass(self.input_queue, self.output_queue)
self._flask.add_url_rule('/endpoint', 'url_callback', callback_class.url_callback)
logger.info(f"Starting flask")
self._flask.run()
def call_URL_in_separate_thread(url):
def call_URL(url):
logger.info(f"Calling {url}")
response = requests.get(url)
logger.info(f"Got response: {response.text}")
return response.text
url_caller_thread = threading.Thread(target=call_URL, args=(url,))
url_caller_thread.start()
if __name__ == "__main__":
flask_input_queue = Queue()
flask_output_queue = Queue()
controllable_server = ControllableServer(flask_input_queue, flask_output_queue)
controllable_server.start()
call_URL_in_separate_thread("http://127.0.0.1:5000/endpoint")
message_from_within_the_callback = flask_output_queue.get()
logger.info(f"Got message from queue: {message_from_within_the_callback}")
message_to_the_callback = "I come from the outside !###$#"
flask_input_queue.put(message_to_the_callback)
logger.info(f"Sending message to queue: {message_to_the_callback}")
Output:
[Thread: Thread-1-140465413375744] Starting flask
[Thread: Thread-2-140465404983040] Calling http://127.0.0.1:5000/endpoint
* Serving Flask app "basic_flask_passing_variable" (lazy loading)
* Environment: production
WARNING: This is a development server. Do not use it in a production deployment.
Use a production WSGI server instead.
* Debug mode: off
[Thread: Thread-1-140465413375744] * Running on http://127.0.0.1:5000/ (Press CTRL+C to quit)
[Thread: MainThread-140465450415936] Got message from queue: [URL callback] I just got called
[Thread: MainThread-140465450415936] Sending message to queue: I come from the outside !###$#
[Thread: Thread-3-140465396041472] 127.0.0.1 - - [03/Mar/2020 18:33:32] "GET /endpoint HTTP/1.1" 200 -
[Thread: Thread-2-140465404983040] Got response: I come from the outside !###$#

How do I fix 'Popped wrong app context' in Flask with APScheduler

I'm adding background tasks with APScheduler on runtime depending on an API call. In other words, there are no background tasks when the app, starts. When user makes call on an API, tasks are added on runtime. But I'm getting an error that says:
AssertionError: Popped wrong app context
The application works just fine if I comment out the lines where background tasks are scheduled.
My app structure is as follows:
/project
manage.py
requirements.txt
/app
/models
/routes
/utils
/api
config.py
__init__.py
My manage.py file looks like this:
app = create_app('dev')
app.app_context().push()
manager = Manager(app)
migrate = Migrate(app, db, render_as_batch=True)
manager.add_command('db', MigrateCommand)
with app.app_context():
scheduler = BackgroundScheduler()
scheduler.start()
#manager.command
def run():
app.run()
atexit.register(lambda: scheduler.shutdown())
if __name__ == '__main__':
manager.run()
init.py inside app folder is:
from flask import Flask
from flask_restful import Api
from flask_sqlalchemy import SQLAlchemy
from email_scheduler.routes.routes import set_routes
from .config import config_by_name
# from app.models.task import TaskModel
db = SQLAlchemy()
def create_app(config_name):
app = Flask(__name__)
app.config.from_object(config_by_name[config_name])
api = Api(app)
set_routes(api)
from email_scheduler.models.api_models import TaskModel, User
db.init_app(app)
with app.app_context():
db.create_all()
return app
My api.py file is:
class SignUp(Resource):
def clean_scheduling_time(self, schedule_time):
day = schedule_time.split(' ')[0].lower()[:3]
hour, mins = schedule_time.split(' ')[1].split(':')
return day, hour, mins
def post(self):
args = user_parser.parse_args()
username, password = args.get('username'), args.get('password')
schedule_time, email_to = args.get('schedule_time'), args.get('email_to')
if username is None or password is None:
abort(400) # missing arguments
from email_scheduler.models.api_models import User
if User.query.filter_by(username=username).first() is not None:
abort(400) # existing user
user = User(username=username, schedule_time=schedule_time.split(' ')[1], email_to=email_to)
user.hash_password(password)
user.save_to_db()
from manage import scheduler
from email_scheduler.utils.utils import send_email
day, hour, mins = self.clean_scheduling_time(args.get('schedule_time'))
trigger = CronTrigger(day_of_week=day, hour=int(hour), minute=int(mins))
scheduler.add_job(send_email, trigger=trigger)
print(scheduler.get_jobs())
return make_response(jsonify({'username': username}), 200)
What's weird is that even though I get this error on the terminal, the task somehow gets scheduled and is run. And if I take out the code from api that schedules the tasks, the API runs just fine. What am I doing wrong?

The problem is in your manage.py file.
You're running the following line globally:
app.app_context().push()
Which you correctly need for the worker to have access to app context. Move it inside the function that the worker calls.
Ie NOT this:
app = create_app()
app.app_context().push()
def your_async_fn():
# your code for the worker...
But this:
def your_async_fn():
app = create_app()
app.app_context().push()
# your code for the worker...

How to store response when http client closed a connection?

I'm using falcon framework in python to form json responses of web api.
For instance I have a function called logic() that works for 30-90min. I want something like this:
When http-client asks for /api/somepath.json we call
somepath_handle()
somepath_handle() runs logic() in another thread/process
When logic() is finished, thread is closed
somepath_handle() reads response of logic() from return
If somepath_handle() was killed before logic() was finished, then thread/etc with logic() isn't stopped until it is finished
The code:
def somepath_handle():
run_async_logic()
response=wait_for_async_logic_response() # read response of logic()
return_response(response)

If your process takes such a long time, I advise you to send the result to the user using email, or maybe a live notification system ?

I am using a simple worker to create the queue where I am processing some commands. If add simple response storage than there will be possibility to process any requests and not loss them when connection was lost.
Example:
It's main function that used falconframework.org to response to requests.
main.py:
from flow import Flow
import falcon
import threading
import storage
__version__ = 0.1
__author__ = 'weldpua2008#gmail.com'
app = falcon.API(
media_type='application/json')
app.add_route('/flow', Flow())
THREADS_COUNT = 1
# adding the workers to process queue of command
worker = storage.worker
for _ in xrange(THREADS_COUNT):
thread = threading.Thread(target=worker)
thread.daemon = True
thread.start()
It's simple storage with worker code
storage.py:
from Queue import Queue
import subprocess
import logging
main_queque = Queue()
def worker():
global main_roles_queque
while True:
try:
cmd = main_queque.get()
#do_work(item)
#time.sleep(5)
handler = subprocess.Popen(
cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
stdout, stderr = handler.communicate()
logging.critical("[queue_worker]: stdout:%s, stderr:%s, cmd:%s" %(stdout, stderr, cmd))
main_queque.task_done()
except Exception as error:
logging.critical("[queue_worker:error] %s" %(error))
It's class that will process any requests [POST, GET]
flow.py:
import storage
import json
import falcon
import random
class Flow(object):
def on_get(self, req, resp):
storage_value = storage.main_queque.qsize()
msg = {"qsize": storage_value}
resp.body = json.dumps(msg, sort_keys=True, indent=4)
resp.status = falcon.HTTP_200
#curl -H "Content-Type: application/json" -d '{}' http://10.206.102.81:8888/flow
def on_post(self, req, resp):
r = random.randint(1, 10000000000000)
cmd = 'sleep 1;echo "ss %s"' % str(r)
storage.main_queque.put(cmd)
storage_value = cmd
msg = {"value": storage_value}
resp.body = json.dumps(msg, sort_keys=True, indent=4)
resp.status = falcon.HTTP_200

Develop Reference

node.js excel linux python-3.x azure haskell apache-spark rust .htaccess string

Python multiprocessing within node.js - Prints on sub process not working - node.js

Related

Python gRPC service for Envoy ratelimit

how to put a method in to thread and use it when performing the test

Passing an external queue to flask route function?

How do I fix 'Popped wrong app context' in Flask with APScheduler

How to store response when http client closed a connection?

Categories

Resources