REDO: Starting Flask in a thread, ca. 2020 - multithreading

I have an old question looking for a fresh answer. I've tried the recipes presented in the similar but somewhat aged question "Start a flask application in a seperate thread", and some other similar solutions found in other posts.
The long and short of it is, I need to start a flask application in a 'background' thread, such that a wxPython GUI can run in the foreground. The solutions presented here seem to no longer have the desired effect. The flask app starts and the GUI never runs.
My suspicion is, the existing answers are out of date. That said, I'm open to the possibility that I've mangled something else that's hosing it up, please have a peek and advise accordingly.
Thanks for your eyeballs and brain cycles :)
My code follows.
#!/usr/bin/env python
"""
integrator.py (the app)
"""
import wx
from pubsub import pub
from flask import Flask
from flask_graphql import GraphQLView
from models import db_session
from schema import schema
from models import engine, db_session, Base, Idiom
flaskapp = Flask(__name__)
flaskapp.debug = True
flaskapp.add_url_rule(
'/graphql',
view_func=GraphQLView.as_view(
'graphql',
schema=schema,
graphiql=True
)
)
flaskapp.run(threaded=True,use_reloader=False)
#flaskapp.teardown_appcontext
def shutdown_session(exception=None):
db_session.remove()
class IntegratorTarget(wx.TextDropTarget):
def __init__(self, object):
wx.DropTarget.__init__(self)
self.object = object
def OnDropText(self, x, y, data):
print(f"<publish>{data}</publish>")
pub.sendMessage('default', arg1=data)
return True
class IntegratorFrame(wx.Frame):
def __init__(self, parent, title):
super(IntegratorFrame, self).__init__(parent, title = title,size = wx.DisplaySize())
self.panel = wx.Panel(self)
box = wx.BoxSizer(wx.HORIZONTAL)
dropTarget = IntegratorTarget(self.panel)
self.panel.SetDropTarget(dropTarget)
pub.subscribe(self.catcher, 'default')
self.panel.SetSizer(box)
self.panel.Fit()
self.Centre()
self.Show(True)
def catcher(self,arg1):
data = arg1
print(f"<subscribed>{data}</subscribed>\n\n")
return
ex = wx.App()
Base.metadata.create_all(bind=engine)
IntegratorFrame(None,'Praxis:Integrator')
ex.MainLoop()
-- eof --
""" models.py """
from sqlalchemy import *
from sqlalchemy.orm import (scoped_session, sessionmaker, relationship, backref)
from sqlalchemy.ext.declarative import declarative_base
engine = create_engine('sqlite:///.praxis/lexicon/unbound.db3', convert_unicode=True)
db_session = scoped_session(sessionmaker(autocommit=False,
autoflush=False,
bind=engine))
Base = declarative_base()
# We will need this for querying
Base.query = db_session.query_property()
class Idiom(Base):
__tablename__ = "idiomae"
id = Column(Integer, primary_key=True)
src = Column(String) # the text of the drag/paste operation
taxonomy = Column(String) # the type of resource referenced in the drag/paste operation
localblob = Column(String) # local path to media referenced in 'src'
timestamp = Column(DateTime) # date and time of capture
-- eof --
""" schema.py """
import graphene
from graphene import relay
from graphene_sqlalchemy import SQLAlchemyObjectType, SQLAlchemyConnectionField
from models import db_session, Idiom as IdiomaticModel
class Idiom(SQLAlchemyObjectType):
class Meta:
model = IdiomaticModel
interfaces = (relay.Node, )
class Query(graphene.ObjectType):
node = relay.Node.Field()
# Allows sorting over multiple columns, by default over the primary key
all_idioms = SQLAlchemyConnectionField(Idiom.connection)
# Disable sorting over this field
# all_departments = SQLAlchemyConnectionField(Department.connection, sort=None)
schema = graphene.Schema(query=Query)

I see where you tell Flask to be multi-threaded, but I don't see where you're starting up the Flask app in a thread. I expected to see something like
app = Flask(__name__)
# any extra configuration
def webserver():
app.run(use_reloader=False)
web_thread = threading.Thread(target=webserver)
web_thread.start()
... continue on with the main thread
I have a working example you can crib from here. Note the need to use appropriate locking of any data structures shared between the primary thread and the threads running Flask.

Related

How to test the GET method that takes parser inputs in flask/flask-restx?

I am making a flask app using Flask-restx and I take inputs from the user by request parsing as follows:
from flask_restx import Resource, reqparse
from .services.calculator import DimensionCalculator
parser = reqparse.RequestParser()
parser.add_argument("dimensions", type=float,
required=True,
action='split',
help="Dimensions of the rectangle (in meters)")
parser.add_argument("angle_inclination", type=float,
required=True,
action='append',
help="Angle of inclination of the Dachfläche (Neigung)")
#ns.route("/output")
class UserOutput(Resource):
#ns.expect(parser, validation=True)
def get(self):
args = parser.parse_args()
return DimensionCalculator.inputs(**args)
where ns is a namespace I have defined and the simplified version of DimensionCalculator.inputs is:
class DimensionCalculator:
def inputs(**user_input):
installation_place = user_input['installation_place']
num_rectangles = user_input['num_rectangles']
dimensions = user_input['dimensions']
angle_inclination = user_input['angle_inclination']
alignment = user_input['alignment']
direction = user_input['direction']
vendor = user_input['vendor']
output = {
"installation_place": installation_place,
"num_rectangles": num_rectangles,
"area_shape": area_shape,
"vendor": vendor
}
return output
I am writing tests using pytest. I have written the tests for all the classes and methods and the only one that I am unable to test is the GET method defined in the UserOutput. Is there a way to test the GET method?
Any help is appreciated.
Considering unit-testing tag, I'll present what I came up with on how you could test it in total isolation. Basically, get method makes two function calls on dependencies, so in unit sense, you have to check if these calls have indeed been made, as well as assert the arguments, right?
Project structure for purpose of the example:
+---Project
| |
| | __init__.py
| | config.py
| | dimension_calculator.py
| | parser_impl.py
| | user_output.py
| | user_output_test.py
So, everything is flat for simplicity.
Most importantly, you have to decouple your UserOutput module from dependencies. You shouldn't be hard-coding dependencies like that:
from .services.calculator import DimensionCalculator
Hypothetically, DimensionCalculator could contain complex business logic which shouldn't be in scope of the test. So, here's how the UserOutput module could look like:
from flask_restx import Resource, Api
from flask import Flask
from .config import Config
app = Flask(__name__)
api = Api(app)
ns = api.namespace('todos', description='TODO operations')
#ns.route("/output")
class UserOutput(Resource):
#ns.expect(Config.get_parser_impl(), validation=True)
def get(self):
args = Config.get_parser_impl().parse_args()
return Config.get_dimension_calculator_impl().inputs(**args)
if __name__ == '__main__':
app.run(debug=True)
As you can see, "external" dependencies can now be easily stubbed (this is part of a common pattern called dependency injection). Config module looks as follows:
from .parser_impl import parser
from .dimension_calculator import DimensionCalculator
class Config(object):
parser_impl = parser
calculator = DimensionCalculator
#staticmethod
def configure_dimension_calculator_impl(impl):
Config.calculator = impl
#staticmethod
def configure_parser_impl(impl):
Config.parser_impl = impl
#staticmethod
def get_dimension_calculator_impl():
return Config.calculator
#staticmethod
def get_parser_impl():
return Config.parser_impl
Last, but not least, is the place where we'll be stubbing the dependencies and injecting them:
from .user_output import UserOutput
from flask import Flask
from .config import Config
class ParserStub(object):
parse_args_call_count = 0
#staticmethod
def parse_args():
ParserStub.parse_args_call_count = ParserStub.parse_args_call_count + 1
return {'parser_stub': 2}
class DimensionCalculatorStub(object):
inputs_call_count = 0
#staticmethod
def inputs(**args):
DimensionCalculatorStub.inputs_call_count = DimensionCalculatorStub.inputs_call_count + 1
return {'stub': 1}
app = Flask(__name__)
def test_user_request_get():
with app.test_request_context():
# given
Config.configure_dimension_calculator_impl(DimensionCalculatorStub)
Config.configure_parser_impl(ParserStub)
uo = UserOutput()
# when
uo.get()
# then
assert DimensionCalculatorStub.inputs_call_count == 1
assert ParserStub.parse_args_call_count == 1
# assert arguments as well!
Test passes in my case. One thing missing is validation of arguments.
For completeness, I'll also include DimensionCalculator and the parser itself, though they are exactly the same as in your example. I've only modularized them:
from flask_restx import reqparse
parser = reqparse.RequestParser()
parser.add_argument("dimensions", type=float,
required=True,
action='split',
help="Dimensions of the rectangle (in meters)")
parser.add_argument("angle_inclination", type=float,
required=True,
action='append',
help="Angle of inclination of the Dachfläche (Neigung)")
and the dimension_calculator.py:
class DimensionCalculator:
#staticmethod
def inputs(**user_input):
installation_place = user_input['installation_place']
num_rectangles = user_input['num_rectangles']
dimensions = user_input['dimensions']
angle_inclination = user_input['angle_inclination']
alignment = user_input['alignment']
direction = user_input['direction']
vendor = user_input['vendor']
output = {
"installation_place": installation_place,
"num_rectangles": num_rectangles,
"area_shape": "EMPTY",
"vendor": vendor
}
return output
Important definitely there are dedicated frameworks for such stubs/mocks preparation and configuration (for example: https://pypi.org/project/pytest-mock/). I just wanted to present the concept and easiest approach possible.

Getting Interactive Brokers API into Pandas

New to Python and IB API and stuck on this simple thing. This application works correctly and prints IB server reply. However, I cannot figure out how to get this data into a panda's dataframe or any other variable for that matter. How do you "get the data out?" Thanks!
Nothing on forums, documentation or youtube that I can find with a useful example. I think the answer must be to return accountSummary to pd.Series, but no idea how.
Expected output would be a data series or variable that can be manipulated outside of the application.
from ibapi import wrapper
from ibapi.client import EClient
from ibapi.utils import iswrapper #just for decorator
from ibapi.common import *
import pandas as pd
class TestApp(wrapper.EWrapper, EClient):
def __init__(self):
wrapper.EWrapper.__init__(self)
EClient.__init__(self, wrapper=self)
#iswrapper
def nextValidId(self, orderId:int):
print("setting nextValidOrderId: %d", orderId)
self.nextValidOrderId = orderId
# here is where you start using api
self.reqAccountSummary(9002, "All", "$LEDGER")
#iswrapper
def error(self, reqId:TickerId, errorCode:int, errorString:str):
print("Error. Id: " , reqId, " Code: " , errorCode , " Msg: " , errorString)
#iswrapper
def accountSummary(self, reqId:int, account:str, tag:str, value:str, currency:str):
print("Acct Summary. ReqId:" , reqId , "Acct:", account,
"Tag: ", tag, "Value:", value, "Currency:", currency)
#IB API data returns here, how to pass it to a variable or pd.series
#iswrapper
def accountSummaryEnd(self, reqId:int):
print("AccountSummaryEnd. Req Id: ", reqId)
# now we can disconnect
self.disconnect()
def main():
app = TestApp()
app.connect("127.0.0.1", 4001, clientId=123)
test = app.accountSummary
app.run()
if __name__ == "__main__":
main()
Hi had the same problem and collections did it for me. Here is my code for CFDs data. Maybe it will help somebody. You will have your data in app.df. Any suggestion for improvement are more than welcome.
import collections
import datetime as dt
from threading import Timer
from ibapi.client import EClient
from ibapi.wrapper import EWrapper
from ibapi.contract import Contract
import pandas as pd
# get yesterday and put it to correct format yyyymmdd{space}{space}hh:mm:dd
yesterday = str(dt.datetime.today() - dt.timedelta(1))
yesterday = yesterday.replace('-','')
IP = '127.0.0.1'
PORT = 7497
class App(EClient, EWrapper):
def __init__(self):
super().__init__(self)
self.data = collections.defaultdict(list)
def error(self, reqId, errorCode, errorString):
print(f'Error {reqId}, {errorCode}, {errorString}')
def historicalData(self, reqId, bar):
self.data['date'].append(bar.date)
self.data['open'].append(bar.open)
self.data['high'].append(bar.high)
self.data['low'].append(bar.low)
self.data['close'].append(bar.close)
self.data['volume'].append(bar.volume)
self.df = pd.DataFrame.from_dict(self.data)
def stop(self):
self.done = True
self.disconnect()
# create App object
app = App()
print('App created...')
app.connect(IP, PORT, 0)
print('App connected...')
# create contract
contract = Contract()
contract.symbol = 'IBDE30'
contract.secType = 'CFD'
contract.exchange = 'SMART'
contract.currency = 'EUR'
print('Contract created...')
# request historical data for contract
app.reqHistoricalData(reqId=1,
contract=contract,
endDateTime=yesterday,
durationStr='1 W',
barSizeSetting='15 mins',
whatToShow='ASK',
useRTH=0,
formatDate=1,
keepUpToDate=False,
chartOptions=[])
Timer(4, app.stop).start()
app.run()
I'd store the data to a dictionary, create a dataframe from the dictionary, and append the new dataframe to the main dataframe using the concat function. Here's an example:
def accountSummary(self, reqId:int, account:str, tag:str, value:str, currency:str):
acct_dict = {"account": account, "value": value, "currency": currency}
acct_df = pd.DataFrame([acct_dict], columns=acct_dict.keys())
main_df = pd.concat([main_df, acct_df], axis=0).reset_index()
For more information, you might like Algorithmic Trading with Interactive Brokers

How to apply template method pattern in Python data science process while not knowing exactly the number of repeating steps

I like to apply the template method pattern for a data science project while I need to select or identify target subjects from a large pool of original subjects. I will create tags based on different characteristics of these subjects, i.e., age, sex, disease status, etc.
I prefer this code to be reused for future projects of similar nature. But all projects are somewhat different and the criteria of selecting subjects to be in the final filtered pool are different from one another. How do I structure the subject_selection_steps in such a way that it is flexible and customizable based on project needs. Currently, I only included three tags in my code, but I may need more or less in different projects.
import sys
from abc import ABC, abstractmethod
import pandas as pd
import datetime
import ctypes
import numpy as np
import random
import pysnooper
import var_creator.var_creator as vc
import feature_tagger.feature_tagger as ft
import data_descriptor.data_descriptor as dd
import data_transformer.data_transformer as dt
import helper_functions.helper_functions as hf
import sec1_data_preparation as data_prep
import sec2_prepped_data_import as prepped_data_import
class SubjectGrouping(ABC):
def __init__(self):
pass
def subject_selection_steps(self):
self._pandas_output_setting()
self.run_data_preparation()
self.import_processed_main_data()
self.inject_test_data()
self.create_all_subject_list()
self.CREATE_TAG1()
self.FILTER_SUBJECT_BY_TAG1()
self.CREATE_TAG2()
self.FILTER_SUBJECT_BY_TAG2()
self.CREATE_TAG3()
self.FILTER_SUBJECT_BY_TAG3()
self.finalize_data()
def _pandas_output_setting(self):
'''Set pandas output display setting'''
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 180)
#abstractmethod
def run_data_preparation(self):
'''Run data_preparation_steps from base class'''
pass
#abstractmethod
def import_processed_main_data(self):
'''Import processed main data'''
pass
def inject_test_data(self):
'''For unitest, by injecting mock cases that for sure fulfill/fail the defined subject selection criteria'''
pass
def create_all_subject_list(self):
'''Gather all the unique subject ids from all datasets and create a full subject list'''
pass
def CREATE_TAG1(self): pass
def FILTER_SUBJECT_BY_TAG1(self): pass
def CREATE_TAG2(self): pass
def FILTER_SUBJECT_BY_TAG2(self): pass
def CREATE_TAG3(self): pass
def FILTER_SUBJECT_BY_TAG3(self): pass
def finalize_data(self):
pass
class SubjectGrouping_Project1(SubjectGrouping, data_prep.DataPreparation_Project1):
def __init__(self):
self.df_dad = None
self.df_pc = None
self.df_nacrs = None
self.df_pin = None
self.df_reg = None
self.df_final_subject_group1 = None
self.df_final_subject_group2 = None
self.df_final_subject_group3 = None
self.control_panel = {
'save_file_switch': False, # WARNING: Will overwrite existing files
'df_subsampling_switch': True, # WARNING: Only switch to True when testing
'df_subsampling_n': 8999,
'random_seed': 888,
'df_remove_dup_switch': True,
'parse_date_switch': True,
'result_printout_switch': True,
'comp_loc': 'office',
'show_df_n_switch': False, # To be implemented. Show df length before and after record removal
'done_switch': False,
}
def run_data_preparation(self):
self.data_preparation_steps()
def import_processed_main_data(self):
x = prepped_data_import.PreppedDataImport_Project1()
x.data_preparation_steps()
x.prepped_data_import_steps()
df_dict = x.return_all_dfs()
self.df_d, self.df_p, self.df_n, self.df_p, self.df_r = (df_dict['DF_D'], df_dict['DF_P'],
df_dict['DF_N'], df_dict['DF_P'], df_dict['DF_R'])
del x
if __name__=='__main__':
x = SubjectGrouping_Project1()
x.subject_selection_steps()
Consider a Filter Pattern. It basically allows filtering of list of objects based on defined filters and you can easily introduce a new filter at a later point with minimal changes to your code.
Create an Criteria interface or abstract class.
class Criteria():
def filter(self, request):
raise NotImplementedError("Should have implemented this")
and have each of your filter extend from Criteria class. Let's consider one of the filters is an Age filter
class AgeFilter(Criteria):
def __init__(self, age=20):
self.age = age
def filter(self, list):
filteredList = []
for item in self.list:
if (item.age > self.age):
# add to the filteredList
return filteredList
Similar you can define other filters like DiseaseFilter, GenderFilter by extending from Criteria interface.
You can also do logical operations on your filters by defining And or Or filters as well. For eg.
class AndFilter(Criteria):
def __init__(self, filter1, filter2):
self.filter1 = filter1
self.filter2 = filter2
def filter(self, list):
filteredList1 = filter1.filter(list)
filteredList2 = filter2.filter(filteredList1)
return filteredList2
Assuming you have already defined your filters, after which your subject_selection_steps method will look like,
def subject_selection_steps(self):
# define list of filters
filterList = [ageFilter1, maleFilter, MalariaAndJaundiceFilter]
result = personList
for criteria in filterList:
result = criteria.filter(result)
return result

How to get the spider name in Scrapy pipeline outside of the process_item function?

I've written a few spiders that pull similar data from different sources. I've also written a pipeline that allows this data to be put in a database. I want to be able to use the same code for multiple spiders to output to different tables, named dynamically from the spider name.
Here is the pipeline.py code:
class DbPipeline(object):
def __init__(self):
"""
Initialises database connection and sessionmaker.
Creates table if it doesn't exist.
"""
engine = db_connect()
create_output_table(engine)
self.Session = sessionmaker(bind=engine)
def process_item(self, item, spider):
"""
Saves scraped products in database
"""
exists = self.check_item_exists(item)
if not exists:
session = self.Session()
product = Products(**item)
try:
session.add(product)
session.commit()
except:
session.rollback()
raise
finally:
session.close()
return item
def check_item_exists(self,item):
session = self.Session()
product = Products(**item)
result = session.query(Products).filter(Products.title == item['title']).first()
return result is not None
And here is the model.py file:
DeclarativeBase = declarative_base()
def create_output_table(engine):
DeclarativeBase.metadata.create_all(engine)
def db_connect():
"""
Connects to database from settings defined in settings.py
Returns an sqlalchemy engine instance
"""
return create_engine(URL(**settings.DATABASE))
class Products(DeclarativeBase):
"""Sqlalchemy table model"""
__tablename__ = "name"
id = Column(Integer, primary_key=True)
title = Column('title', String(200))
price = Column('price', String(10), nullable=True)
url = Column('url', String(200), nullable=True)
What i'm trying to do is get the __tablename__ variable to be the same as the spider name, which I can easily do in the process_item function as it is passed a spider object and can use spider.name and assign it to a class variable, however the function will run after the table is created/defined. How can I go about getting the spider name outside of the process_item function in the pipelines.py file?
Edit: I've tried the solutions listed in How to access scrapy settings from item Pipeline however access to the 'settings' doesn't give me access to the attributes assigned to the current spider running. I need to dynamically get the name of the spider based on what spider is running the pipelines. Thanks
It's pretty easy to get current spider name in your create_output_table:
class DbPipeline(object):
#classmethod
def from_crawler(cls, crawler):
return cls(crawler.spider.name)
def __init__(self, spider_name):
"""
Initializes database connection and sessionmaker.
Creates deals table.
"""
engine = db_connect()
create_output_table(engine, spider_name)
......
and (in models.py):
def create_output_table(engine, spider_name):
# now you have your spider_name
DeclarativeBase.metadata.create_all(engine)
The problem here is that Scrapy process your models.py file before your pipelines.py. So you need to find a way to generate your SQLAlchemy model later. You can use this thread as a starting point: Dynamically setting __tablename__ for sharding in SQLAlchemy?

Using SQLAlchemy outside of flask error

I trying to create a system with models that can be used within the flask's app context and by external jobs.
I have created a database.py with the following:
from sqlalchemy import MetaData, Column, Integer, String, Float, DateTime
from sqlalchemy.ext.declarative import declarative_base
metadata = MetaData()
Base = declarative_base(metadata=metadata)
class Stock(Base):
__tablename__ = 'Stock'
id = Column(Integer, primary_key=True)
ticker = Column(String(10), nullable=False)
time_stamp = Column(DateTime, nullable=False)
open = Column(Float, nullable=False)
high = Column(Float, nullable=False)
low = Column(Float, nullable=False)
close = Column(Float, nullable=False)
volume = Column(Integer, nullable=False)
price = Column(Float, nullable=True)
I have also then created a flask_db.py file:
from flask_sqlalchemy import SQLAlchemy
from database import metadata
db = SQLAlchemy(metadata=metadata)
This is then used in my app.py
from flask import Flask, render_template
from flask_db import db
import os
# create the flask app
app = Flask(__name__, template_folder="dist/", static_folder="dist/", static_url_path="")
# config
app.config.from_object(os.environ['APP_SETTINGS'])
# initialise db
db.init_app(app)
#app.route("/")
def index():
"""
renders basic index page
:return: template
"""
return render_template("index.html")
# app entry point
if __name__ == "__main__":
app.run()
When I run a file called create_db.py:
from flask_db import db
db.create_all()
I get the following error:
RuntimeError: No application found. Either work inside a view function or push an application context. See http://flask-sqlalchemy.pocoo.org/contexts/.
I am not really sure why no application is found.
The db extension will only get initialized when init_app is called, and the app context will only get created when the app is run. But because you're importing from your flask_db module, neither of those things happens - the code in the app module never gets run by the Python interpreter.
What you probably need to do is something like this:
from app import app, db
app_ctx = app.app_context()
ctx.push()
db.create_all()
ctx.pop()
I would think that should work. But I strongly suggest you look into using the application factory pattern to create your app, and using click commands (with the with_appcontext decorator) to add to Flask's cli interface, instead of using one-off scripts. Also, it's probably worth using Flask-Migrate for proper database migrations.
EDIT: in your app module, you probably also need to tell Flask-SQLAlchemy about your custom base model:
db = SQLAlchemy(metadata=metadata, model_class=Base)

Resources