Azure function app stops responding after few seconds resulting in timeout - azure

I am trying to run spark-nlp as azure function.
I have a function app which is run with a docker container. My function app code is run on python and I also install java as I run pyspark within it. I use python's flask within one function to handle incoming requests.
Once the function app starts and container is running, for the first few seconds I get responses for my API calls but after only few seconds (~15-20 seconds) the API calls start timing out due to no response from the server.
The function app is running on dedicated app service plan and is set to 'always on'.
What is the reason for such a behavior?
Here is my function app code:
import logging
import azure.functions as func
# Imports for Spark-NLP
import os
import sys
sys.path.append('/home/site/wwwroot/contextSpellCheck/spark-2.4.7-bin-hadoop2.7/python')
sys.path.append('/home/site/wwwroot/contextSpellCheck/spark-2.4.7-bin-hadoop2.7/python/lib/py4j-0.10.7-src.zip')
import sparknlp
from sparknlp.annotator import *
from sparknlp.common import *
from sparknlp.base import *
from sparknlp.annotator import *
from flask import Flask, request
app = Flask(__name__)
spark = sparknlp.start()
documentAssembler = DocumentAssembler().setInputCol("text").setOutputCol("document")
tokenizer = RecursiveTokenizer().setInputCols(["document"]).setOutputCol("token").setPrefixes(["\"", "(", "[", "\n"]).setSuffixes([".", ",", "?", ")", "!", "'s"])
spellModel = ContextSpellCheckerModel.load("/home/site/wwwroot/contextSpellCheck/spellcheck_dl_en_2.5.0_2.4_1588756259065").setInputCols("token").setOutputCol("checked")
finisher = Finisher().setInputCols("checked")
pipeline = Pipeline(stages=[documentAssembler, tokenizer, spellModel, finisher])
empty_ds = spark.createDataFrame([[""]]).toDF("text")
lp = LightPipeline(pipeline.fit(empty_ds))
#app.route('/api/testFunction', methods = ['GET', 'POST'])
def annotate():
global lp
if request.method == 'GET':
text = request.args.get('text')
elif request.method == 'POST':
req_body = request.get_json()
text = req_body['text']
return lp.annotate(text)
def main(req: func.HttpRequest, context: func.Context) -> func.HttpResponse:
logging.info('Python HTTP trigger function processed a request.')
return func.WsgiMiddleware(app).handle(req, context)

It may be that you are creating a pipeline per request. You have a stack with several languages, it could be that one of the libraries has this functionality.
See the section on "Avoid creating lots of pipelines" in https://stanfordnlp.github.io/CoreNLP/memory-time.html#avoid-creating-lots-of-pipelines

Related

Python gRPC service for Envoy ratelimit

I am trying to create a small service to respond to Envoy's rate limiting queries. I have compiled all the relevant protobuff files and the one relevant for the service I am trying to implement is here:
https://github.com/envoyproxy/envoy/blob/v1.17.1/api/envoy/service/ratelimit/v3/rls.proto
There is a service definition in there but inside of the "compiled" python file, all I see about it is this:
_RATELIMITSERVICE = _descriptor.ServiceDescriptor(
name='RateLimitService',
full_name='envoy.service.ratelimit.v3.RateLimitService',
file=DESCRIPTOR,
index=0,
serialized_options=None,
create_key=_descriptor._internal_create_key,
serialized_start=1531,
serialized_end=1663,
methods=[
_descriptor.MethodDescriptor(
name='ShouldRateLimit',
full_name='envoy.service.ratelimit.v3.RateLimitService.ShouldRateLimit',
index=0,
containing_service=None,
input_type=_RATELIMITREQUEST,
output_type=_RATELIMITRESPONSE,
serialized_options=None,
create_key=_descriptor._internal_create_key,
),
])
_sym_db.RegisterServiceDescriptor(_RATELIMITSERVICE)
DESCRIPTOR.services_by_name['RateLimitService'] = _RATELIMITSERVICE
here is my feeble attempt at implementing the service
import logging
import asyncio
import grpc
from envoy.service.ratelimit.v3.rls_pb2 import RateLimitResponse, RateLimitRequest
class RL:
def ShouldRateLimit(self, request):
result = RateLimitResponse()
def add_handler(servicer, server):
rpc_method_handlers = {
'ShouldRateLimit': grpc.unary_unary_rpc_method_handler(
RL.ShouldRateLimit,
request_deserializer=RateLimitRequest.FromString,
response_serializer=RateLimitResponse.SerializeToString,
)
}
generic_handler = grpc.method_handlers_generic_handler(
'envoy.service.ratelimit.v3.RateLimitService',
rpc_method_handlers
)
server.add_generic_rpc_handlers((generic_handler,))
async def serve():
server = grpc.aio.server()
add_handler(RL(), server)
listen_addr = '[::]:5051'
server.add_insecure_port(listen_addr)
logging.info(f'Starting server on {listen_addr}')
await server.start()
if __name__ == '__main__':
logging.basicConfig(level=logging.DEBUG)
asyncio.run(serve())
How am I supposed to return (or even instantiate) a RateLimitResponse back to the caller ?

how to put a method in to thread and use it when performing the test

I have this part of code which is doing psubscribe to redis. I want to run this part of code in a thread an working in the background while the other part of code will check some notifications from this below.
def psubscribe(context, param1, param2, param3):
context.test_config = load_config()
RedisConnector(context.test_config["redis_host"],
context.test_config["redis_db_index"])
redis_notification_subscriber_connector = RedisConnector(context.test_config["notification__redis_host"],
int(param3),
int(context.test_config[
"notification_redis_port"]))
context.redis_connectors = redis_notification_connector.psubscribe_to_redis_event(param1,
timeout_seconds=int(
param2)
)
what I have done till now: but its not running :(
context.t = threading.Thread(target=psubscribe, args=['param1', 'param2', 'param3'])
context.t.start()
It is actually working. I think you didn't need actually to pass context variable to your psubscribe function.
Here is an example:
Start http server that listens on port 8000 as a background thread
Send http requests to it and validate response
Feature scenario:
Scenario: Run background process and validate responses
Given Start background process
Then Validate outputs
background_steps.py file:
import threading
import logging
from behave import *
from features.steps.utils import run_server
import requests
#given("Start background process")
def step_impl(context):
context.t = threading.Thread(target=run_server, args=[8000])
context.t.daemon = True
context.t.start()
#then("Validate outputs")
def step_impl(context):
response = requests.get('http://127.0.0.1:8000')
assert response.status_code == 501
utils.py file
from http.server import HTTPServer, BaseHTTPRequestHandler
def run_server(port, server_class=HTTPServer, handler_class=BaseHTTPRequestHandler):
server_address = ('', port)
httpd = server_class(server_address, handler_class)
httpd.serve_forever()

How to submit a query to Google Dialogflow using Flask

I have the following python script that is connecting to Google Dialogflow using Flask and is using a webhook to retrieve the response from Google Dialogflow.
The limitation is that I currently only enter the query in to the Google Dialogflow frontend, with the result returned here in variable result
However how can I use this same script to submit the query to Google Dialogflow, instead of entering in the front end?
Any help appreciated, thanks!
import json
import os
from flask import Flask
from flask import request
from flask import make_response`
`enter code here # Flask app should start in global layout
app = Flask(__name__)
#app.route('/webhook', methods=['POST'])
def webhook():
req = request.get_json(silent=True, force=True)
res = processRequest(req)
res = json.dumps(res, indent=4)
r = make_response(res)
r.headers['Content-Type'] = 'application/json'
return r
def processRequest(req):
result = req.get("queryResult")
result_message = result['fulfillmentText']
print(result_message)
#app.route('/test', methods=['GET'])
def test():
return "Hello there my friend !!"
if __name__ == '__main__':
port = int(os.getenv('PORT', 5000))
app.run(debug=True, port=port, host='0.0.0.0')`
I discovered shortly after posting this that a webhook is whats known as a reverse API, and only shows results as opposed to two way interactions.

Pass filepath as parameter to a URL in FLASK(Python)

I want to build an api which accepts a parameter from the user which is a filepath and then process the file given in that path. The file to be processed is already in the server where the api will be running.
As of now, I have written an api where I have hardcoded the filepath in my code which runs the api. Now, I want to configure my api in such a way that accepts a filepath from the user. My api should accept the path as a parameter and process the file that has been given in the path.
The api code is as follows:
The convert function returns the category of the file.
import ectd
from ectd import convert
from flask import Flask, request
from flask_restful import Resource, Api
#from flask.views import MethodView
app = Flask(__name__)
api = Api(app)
#convert(r'D:\files\67cecf40-71cf-4fc4-82e1-696ca41a9fba.pdf')
class ectdtext(Resource):
def get(self, result):
return {'data': ectd.convert(result)}
#api.add_resource(ectdtext, '/ectd/<result>')
categories=convert(r'D:\files\6628cb99-a400-4821-8b13-aa4744bd1286.pdf')
#app.route('/')
def returnResult():
return categories
if __name__ == '__main__':
app.run(host="0.0.0.0", port=5000)
So, I want to make changes to this code to accept a parameter from the user which will be a filepath and the convert function will process that filepath. I want to know how to make my api accept a filepath parameter from the user.
Trial with requests.args.get:
import ectd
from ectd import convert
from flask import Flask, request
from flask_restful import Resource, Api
#from flask.views import MethodView
app = Flask(__name__)
api = Api(app)
#convert(r'D:\files\67cecf40-71cf-4fc4-82e1-696ca41a9fba.pdf')
class ectdtext(Resource):
def get(self, result):
return {'data': ectd.convert(result)}
#api.add_resource(ectdtext, '/ectd/<result>')
#app.route('/')
def returnResult():
categories=convert(r'D:\files\'.format(request.args.get('categories')))
return categories
if __name__ == '__main__':
app.run(host="0.0.0.0", port=5000)
results in error :
"RuntimeError: Working outside of request context.
This typically means that you attempted to use functionality that needed
an active HTTP request. Consult the documentation on testing for
information about how to avoid this problem."
PRESENT SCENARIO:
I am able to post a filepath to the url. My question is now how do I use this posted url with filepath in my code to trigger my function that takes in the filepath and processes the file. Code to post the filepath:
import ectd
from ectd import convert
from flask import Flask, request
from flask_restful import Resource, Api
#from flask.views import MethodView
app = Flask(__name__)
api = Api(app)
class ectdtext(Resource):
def get(self, result):
return {'data': ectd.convert(result)}
#api.add_resource(ectdtext, '/ectd/<result>')
categories=convert('/home/brian/ajay/files/5ca21af9-5b67-45f8-969c-ae571431c665.pdf')
#app.route('/')
def returnResult():
return categories
#app.route('/', defaults={'path': ''})
#app.route('/<path:path>')
def get_dir(path):
return path
##app.route('/get_dir/<path>')
#def get_dir(path):
# return path
if __name__ == '__main__':
app.run(host="0.0.0.0", port=5000)

How do I fix 'Popped wrong app context' in Flask with APScheduler

I'm adding background tasks with APScheduler on runtime depending on an API call. In other words, there are no background tasks when the app, starts. When user makes call on an API, tasks are added on runtime. But I'm getting an error that says:
AssertionError: Popped wrong app context
The application works just fine if I comment out the lines where background tasks are scheduled.
My app structure is as follows:
/project
manage.py
requirements.txt
/app
/models
/routes
/utils
/api
config.py
__init__.py
My manage.py file looks like this:
app = create_app('dev')
app.app_context().push()
manager = Manager(app)
migrate = Migrate(app, db, render_as_batch=True)
manager.add_command('db', MigrateCommand)
with app.app_context():
scheduler = BackgroundScheduler()
scheduler.start()
#manager.command
def run():
app.run()
atexit.register(lambda: scheduler.shutdown())
if __name__ == '__main__':
manager.run()
init.py inside app folder is:
from flask import Flask
from flask_restful import Api
from flask_sqlalchemy import SQLAlchemy
from email_scheduler.routes.routes import set_routes
from .config import config_by_name
# from app.models.task import TaskModel
db = SQLAlchemy()
def create_app(config_name):
app = Flask(__name__)
app.config.from_object(config_by_name[config_name])
api = Api(app)
set_routes(api)
from email_scheduler.models.api_models import TaskModel, User
db.init_app(app)
with app.app_context():
db.create_all()
return app
My api.py file is:
class SignUp(Resource):
def clean_scheduling_time(self, schedule_time):
day = schedule_time.split(' ')[0].lower()[:3]
hour, mins = schedule_time.split(' ')[1].split(':')
return day, hour, mins
def post(self):
args = user_parser.parse_args()
username, password = args.get('username'), args.get('password')
schedule_time, email_to = args.get('schedule_time'), args.get('email_to')
if username is None or password is None:
abort(400) # missing arguments
from email_scheduler.models.api_models import User
if User.query.filter_by(username=username).first() is not None:
abort(400) # existing user
user = User(username=username, schedule_time=schedule_time.split(' ')[1], email_to=email_to)
user.hash_password(password)
user.save_to_db()
from manage import scheduler
from email_scheduler.utils.utils import send_email
day, hour, mins = self.clean_scheduling_time(args.get('schedule_time'))
trigger = CronTrigger(day_of_week=day, hour=int(hour), minute=int(mins))
scheduler.add_job(send_email, trigger=trigger)
print(scheduler.get_jobs())
return make_response(jsonify({'username': username}), 200)
What's weird is that even though I get this error on the terminal, the task somehow gets scheduled and is run. And if I take out the code from api that schedules the tasks, the API runs just fine. What am I doing wrong?
The problem is in your manage.py file.
You're running the following line globally:
app.app_context().push()
Which you correctly need for the worker to have access to app context. Move it inside the function that the worker calls.
Ie NOT this:
app = create_app()
app.app_context().push()
def your_async_fn():
# your code for the worker...
But this:
def your_async_fn():
app = create_app()
app.app_context().push()
# your code for the worker...

Resources