How can I speed up this SQLAlchemy Query? In FastAPI - python-3.x

I have Volunteers table and it has 60 fields, I use FastAPI and SQLalchemy ORM:
class Volunteers(Base):
__tablename__ = "volunteers"
id = Column(Integer, primary_key=True, index=True, nullable=False)
candidate_id = Column(Integer)
full_name = Column(String)
......
And I have function where I import excel file and write data from excel to database. I import all users from excel to all_users_in_excel list. all_users_in_excel is a list of dictionaries. Dictionaries include each user information. Then I get one user at a time from all_users_in_excel list and create new_user object with values and append it to a new list.
def import_data(file: UploadFile = File(...), db: Session = Depends(get_db)):
all_users_in_excel = []
with open(f'{file.filename}', "wb") as buffer:
shutil.copyfileobj(file.file, buffer)
volunteer_data = pd.read_excel("assignment-data.xlsx", index_col=None)
for name in volunteer_data.iterrows():
new_user_from_excel = name[1].to_dict()
all_users_in_excel.append(new_user_from_excel)
saved_users = []
for key in all_users_in_excel:
new_user = {
"candidate_id": key["Candidate - ID"],
"full_name": key["Candidate - Full Name"],
"checkpoint": key["Candidate - Checkpoint"],
.....
"created_at": datetime.now()
}
saved_users.append(new_user)
db.bulk_insert_mappings(models.Volunteers, saved_users)
db.commit()
db.bulk_insert_mappings(models.Volunteers, saved_users) takes 20 seconds to save 110 users. How can I speed it up? I use Postgresql as a database and it is deployed to Heroku free subscription.

Related

Replace a large amount of data at once with SQLAlchemy?

I need to update a large amount of data daily (large means >3MB). I thought to store it as JSON, but SQLAlchemy doesn't support converting from JSON as far as I found. So now I'm trying to do it with Pickle. At the moment I'm storing every product I have in a huge Pickle file, to load it back in later and commit them. However, I keep getting errors saying my product class is not mapped, and I'm not sure what it means or how to fix it. Everything I came across while Googling didn't resemble my code in the slightest.
Here is my product class:
class Product:
id = ""
name = ""
store_name = ""
brand = ""
price = ""
amount = ""
info = ""
image = ""
And here is my Pickle / Database code:
def loadall():
with open('products.txt', mode='rb') as products_txt:
while True:
try:
yield pickle.load(products_txt)
except EOFError:
break
Session = sessionmaker(bind=db)
session = Session()
products = loadall()
with db.connect() as conn:
session.add_all(products)
session.commit()
(made after reading Saving and loading multiple objects in pickle file?)
Below should give you an idea (i limited test data to just 2 columns):
test.py :
#!/usr/bin/env python3
import json
from sqlalchemy import Column, Integer, String, create_engine
from sqlalchemy.orm import Session, as_declarative, registry
## configuration
engine = create_engine("sqlite+pysqlite:///:memory:", echo=True, future=True)
mapper_registry = registry()
#as_declarative()
class Base(object):
pass
class Product(Base):
__tablename__ = "product"
id = Column("id", Integer, primary_key=True)
name = Column(String)
info = Column(String)
def _main():
with Session(engine) as session:
Base.metadata.drop_all(engine)
Base.metadata.create_all(engine)
## test data
objects = [
Product(name="no-info"),
Product(name="red-color", info="large RED"),
]
session.add_all(objects)
session.commit()
session.expunge_all()
## test data: from JSON
# product_list = load_all_test() # load from test code
product_list = load_all_file() # load from the file
print(product_list)
# create Product() instances from JSON
products = [Product(**kw) for kw in product_list]
session.add_all(products)
session.commit()
def load_all_test():
test_json_content = """
[
{ "name": "json-product-1", "info": "some info from json-1" },
{ "name": "json-product-2", "info": "some info from json-2" }
]
"""
product_list = json.loads(test_json_content)
return product_list
def load_all_file():
with open("myjsonfile.json") as fh:
product_list = json.load(fh)
return product_list
_main()

Re-create postgresql database to do pytest

I am working on a proyect that works with these libraries (among others)
postgresql 10.5
pandas 1.1.1
psycopg2 2.7.5
pytest 5.0.1
python 3.7.1
I am trying to run pytest on a library we have created that uses sqlalchemy to access a postgresql database.
I want to re-create a database to test the methods that we have being using for few months.
I have tried different things but without success.
I know that postgresql cannot create an in memory database, which is why I am trying to use sqlite3 for that purpose. .
The method I am trying to run test over is:
DatabaseHelper.py
class DatabaseHelper(object):
"""
Helps accessing database.
"""
def __init__(self):
pass
# ...
def create_engine(self, host_dns, dbname, port, user, password):
"""
:param host_dns: dns route to the database
:param dbname: name of the database to access to
:param port: number or port of the database
:param user: name of the user to access de database
:param password: password to connect to the database
"""
self.host = host_dns
self.dbname = dbname
self.port = port
self.user = user
self.password = password
self.connection_str = f'postgresql://{self.user}:{self.password}#{self.host}:{self.port}/{self.dbname}'
self.engine = create_engine(self.connection_str)
# session_factory = sessionmaker(bind=self.engine)
Session = sessionmaker(bind=self.engine)
self.session = Session()
# print("Agora objectec created ok")
# ...
def read_db_to_df(self, **kwargs):
""" Reads a database and transforms into a pandas.DataFrame """
try:
default_reading_sql_args = {'con': self.session.connection()}
reading_sql_args = utils.merge_two_dicts(default_reading_sql_args, kwargs)
df = pd.read_sql(**reading_sql_args)
return df
except SQLAlchemyError as e:
# self.logger.error("Error reading db to df")
# self.logger.error(str(e).replace("\n", ""))
print(e)
return -20
MY_test_before_test.py
from sqlalchemy import MetaData, Column, Table, ForeignKey, select, PrimaryKeyConstraint, Index
from sqlalchemy import Integer, String
from sqlalchemy import create_engine
from sqlalchemy.schema import CreateTable, DropTable
from sqlalchemy.ext.declarative.api import DeclarativeMeta
from agora_db.agora_helper import AgoraHelper
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
from sqlalchemy.sql.functions import current_timestamp, current_user
engine = create_engine('sqlite:///:memory:')
Session = sessionmaker(bind=engine)
session = Session()
Base = declarative_base()
#This is how the database models look like:
class Schema(object):
""" A simple schema to provide a common argument for all tables """
__table_args__ = {"schema": "ifsmtd"}
class AbstractTable(object):
"""
A class that sets how the class is represents its objects as string.
"""
def __repr__(self):
""" Returns the object representation in string fromat in a way that can be used to reconstruct the object.
This returns an 'official' string representation of the object.
:Example:
"""
from sqlalchemy.inspection import inspect
table_inst = inspect(self)
table_name = type(self).__name__
column_key_value = [(c_attr.key, getattr(self, c_attr.key)) for c_attr in table_inst.mapper.column_attrs]
fields_str = ", ".join(["%s='%s'" % (c[0], c[1]) for c in column_key_value])
return "<" + table_name + "(" + fields_str + ")>"
class MyBasicTable(Base, Schema, AbstractTable):
__tablename__ = "mybasic_table"
timest_mov = Column(TIMESTAMP, primary_key=True, nullable=False, server_default=current_timestamp())
id_info = Column(String, primary_key=True, nullable=False)
id_wf = Column(String, primary_key=True, nullable=False)
process_name = Column(String, primary_key=True, nullable=False)
error_type = Column(String, primary_key=True, nullable=False)
resolution_status = Column(Boolean)
aud_timest_umo = Column(TIMESTAMP, server_default=current_timestamp())
aud_id_user = Column(String, server_default=current_user())
__table_args__ = (
PrimaryKeyConstraint('timest_mov', 'id_info', 'id_wf', 'process_name', 'error_type', name='pk_idx_mybasic_table'),
Index('pk_idx_mybasic_table', 'timest_mov', 'id_info', 'id_wf', 'process_name', 'error_type', unique=True),
{"schema": "ifsmtd"}
)
dbhelper = DatabaseHelper()
dbhelper.engine = engine
dbhelper.session = session
query = session.query(MyBasicTable.timest_mov.label("timest_mov"),
MyBasicTable.id_info .label("id_info "),
MyBasicTable.id_wf.label("id_wf"),
MyBasicTable.process_name.label("process_name"),
MyBasicTable.error_type.label("error_type"),
MyBasicTable.resolution_status.label("resolution_status")
)\
.distinct(MyBasicTable.id_jira.label("id_jira"))
df = dbhelper.read_db_to_df(sql=query.statement)
print(df)
The Error I get is:
(sqlite3.OperationalError) no such table: ifsmtd.mybasic_table
How could I do the test.
Looking at the code, there seems to be missing a call to Base.metadata.create_all(engine). This would create the initial database schema. After this call the tables are empty and you need to populate them.
Add the above statement just before using the database, but after defining the tables.
Base.metadata.create_all(engine) # This will create the schema!
dbhelper = DatabaseHelper()
On the use of sqlite: I have also gone that route and bumped into the fact that by default sqlite does not check foreign key constraints (it can be enabled!). There may be more differences!

InterfaceError in Python (Flask, SQLAlchemy) - Error binding parameter of a db.relationship parameter

I'm very new to Flask and SQLAlchemy but have a foundational understanding of Python. This is also my first post here and I think I've looked extensively for an answer to my problem.
I'm trying to create a very simple family tree database. I have two tables - a table of individuals, and a table of families (each family has an optional father and an optional mother - each of which is a foreign key representing another individual in the individuals table).
The individuals are created with a mandatory surname and optional fore and middle names. Also in the individuals table is a column with a relationship to the family table.
When I try to run this code, I get an error -
sqlalchemy.exc.InterfaceError: (sqlite3.InterfaceError) Error binding parameter 3 - probably unsupported type.
[SQL: INSERT INTO individual (forename, middle_name, surname, family_id) VALUES (?, ?, ?, ?)]
[parameters: ('David', None, 'Bloggs', <RelationshipProperty at 0x4314c68; no key>)]
The code is:
import os
from flask import Flask
from flask_sqlalchemy import SQLAlchemy
from flask_migrate import Migrate
basedir = os.path.abspath(os.path.dirname(__file__))
app = Flask(__name__)
app.config["SQLALCHEMY_DATABASE_URI"] = "sqlite:///"+os.path.join(basedir,"data.sqlite")
app.config["SQLALCHEMY_TRACK_MODIFICATIONS"] = False
db = SQLAlchemy(app)
Migrate(app, db)
class Family(db.Model):
__tablename__ = "family"
id = db.Column(db.Integer,primary_key=True)
mother_id = db.Column(db.Integer,db.ForeignKey("individual.id"))
father_id = db.Column(db.Integer,db.ForeignKey("individual.id"))
def __init__(self,mother_id='',father_id=''):
self.mother_id = mother_id
self.father_id = father_id
class Individual(db.Model):
__tablename__ = "individual"
id = db.Column(db.Integer, primary_key=True)
forename = db.Column(db.Text)
middle_name = db.Column(db.Text)
surname = db.Column(db.Text)
family_id = db.Column(db.Integer,db.ForeignKey("family.id"))
def __init__(self,surname,forename=None,middle_name=None):
self.forename = forename
self.middle_name = middle_name
self.surname = surname
self.family_id = db.relationship("family",backref="individual",lazy="dynamic")
def __repr__(self):
return f"""The individual is {self.forename} {self.middle_name}
{self.surname}, their father is {self.family.father_id.forename}
{self.family.father_id.middle_name} {self.family.father_id.surname}
and their mother is {self.family.mother_id.forename}
{self.family.mother_id.middle_name} {self.family.mother_id.surname}"""
db.create_all()
person1 = Individual("Bloggs", "David")
person2 = Individual("Bloggs", "Robert")
person3 = Individual("Smith", "Mary", "Ann")
db.session.add_all([person1,person2,person3])
db.session.commit()
print(person1)
print(person2)
print(person3)
Any help appreciated. Thank you.
it´s neccesary to talk about a point in your Database design
The DB needs a double relationship bewteen Family and Individual, because to create a Individual is neccesary a Family, on another place, to create a Family is necesary a mother and father id. This is a problem.
A way to design this is assign a familiy_id and every individual takes a family_id and a role in the family.
import os
from flask import Flask
from flask_sqlalchemy import SQLAlchemy
from flask_migrate import Migrate
basedir = os.path.abspath(os.path.dirname(__file__))
app = Flask(__name__)
app.config["SQLALCHEMY_DATABASE_URI"] = "sqlite:///"+os.path.join(basedir,"data.sqlite")
app.config["SQLALCHEMY_TRACK_MODIFICATIONS"] = False
db = SQLAlchemy(app)
Migrate(app, db)
class Family(db.Model):
__tablename__ = "family"
id = db.Column(db.Integer,primary_key=True)
family_id = db.Column(db.Integer, unique=True)
family_name = db.Column(db.Text)
def __init__(self,family_id, family_name):
self.family_id = family_id
self.family_name = family_name
class Individual(db.Model):
__tablename__ = "individual"
id = db.Column(db.Integer, primary_key=True)
forename = db.Column(db.Text)
middle_name = db.Column(db.Text)
surname = db.Column(db.Text)
role = db.Column(
db.Integer,
nullable=False,
server_default=db.text("'0'"),
comment="Indicates whether is the role in the family . 2 = Father, 1= Mother, 0 = Other",
)
family_id = db.Column(db.Integer,db.ForeignKey("family.id"))
def __init__(self, forename, middle_name, surname, family_id, role=0):
self.forename = forename
self.middle_name = middle_name
self.surname = surname
self.family_id = family_id
self.role = role
db.create_all()
family = Family(1, "Kay Fam.")
son = Individual("John", "David", "Kay", 1)
father = Individual("Dave", "Luis", "Kay", 1, 2)
db.session.add(family)
db.session.add(son)
db.session.add(father)
db.session.commit()
New case:
To assign a Individual to multiple Family/Role is neccesary to modify
class Individual(db.Model):
__tablename__ = "individual"
id = db.Column(db.Integer, primary_key=True)
individual_id = db.Column(db.Integer)
forename = db.Column(db.Text)
middle_name = db.Column(db.Text)
surname = db.Column(db.Text)
role = db.Column(
db.Integer,
nullable=False,
server_default=db.text("'0'"),
comment="Indicates whether is the role in the family . 2 = Father, 1= Mother, 0 = Other",
)
family_id = db.Column(db.Integer,db.ForeignKey("family.id"))
def __init__(self, individual_id, forename, middle_name, surname, family_id, role=0):
self.individual_id = individual_id
self.forename = forename
self.middle_name = middle_name
self.surname = surname
self.family_id = family_id
self.role = role
The individual_id its important, why? Because is not unique and its a number associated to the individual (The id indicates the position on the DB, the individual_id == DNI for example)
With this is possible to add the individual with differents familiy/roles more than one time.

Flask SqlAlchemy : TypeError: 'Class' object is not iterable

I'm following the Flask-SQLAlchemy tutorial. I have Flask 0.9, sqlalchemy 0.7.8 and flask-sqlalchemy 0.16 on python 2.6. (and I work with eclipse)
(The tuto is here : http://packages.python.org/Flask-SQLAlchemy/models.html)
I have 2 classes : a man and a wallet. There is a 1-1 relationship. (Each man has his own wallet)
class Man(db.Model):
sid = db.Column(db.Integer, primary_key=True)
name = db.Column(db.String(100), unique=False)
wallet = db.relationship('Wallet', backref='man', lazy='dynamic', uselist=False)
def __init__(self, wallet):
self.wallet = wallet
class Wallet(db.Model):
sid = db.Column(db.Integer, primary_key=True)
account = db.Column(db.Integer)
manId = db.Column(db.Integer, db.ForeignKey('man.sid'))
def __init__(self, account):
self.account = account
In my "main" module, I create my database :
app = Flask(__name__)
app.config['SQLALCHEMY_DATABASE_URI'] = 'sqlite:PATH'
db = SQLAlchemy(app)
In this very same module, I try to attach a Wallet to a Man :
if __name__ == "__main__":
db.create_all()
w1 = Wallet(132)
w2 = Wallet(18)
db.session.add(w1)
db.session.add(w2)
db.session.commit()
man1 = Man(w1)
db.session.add(man1)
db.session.commit()
But I get this error :
TypeError: 'Wallet' object is not iterable
I fail to understand why such an error appears. What is the right way of adding a mapped object ?
PS : I've been on the SQLAlchemy tutorial and I believe that they would declare things differently :
class Man(db.Model):
sid = db.Column(db.Integer, primary_key=True)
name = db.Column(db.String(100), unique=False)
wallet = db.relationship('Wallet', backref='man', lazy='dynamic', uselist=False)
manId = db.Column(db.Integer, db.ForeignKey('man.sid'))
def __init__(self, wallet):
self.wallet = wallet
class Wallet(db.Model):
sid = db.Column(db.Integer, primary_key=True)
account = db.Column(db.Integer)
def __init__(self, account):
self.account = account
Which tutorial should I trust ?
Thank you very much !
I fail to understand why such an error appears. What is the right way of adding a mapped object ?
Notice that when you configure you wallet relationship you use lazy="dynamic" option. This way you are setting up a dynamic relationship. As it is designed to be used with large collections it doesn't really makes much sense to use it with one-to-one relationship.
At the same time it alters the way you can assign to your scalar relationship, i.e. you cannot assing your single object directly:
self.wallet = wallet
but you must use an iterable
self.wallet = [wallet]
So you have two solutions here: either assign collection of one element as shown above or better yet stop using dynamic collections for this relationship.

Trying to use a UUID for a primary key. Getting stale data on SQLalchemy commit()

Starting SQLalchemy user here. I plan to use UUID's as the primary keys for my tables.
In the tutorial I saw some code for using the native Python UUID type in ORM classes. Eureka! I can use Postgresql's native UUID type for my system database and this TypeDecorator will stringify the UUID's for SQLite on my mobile clients.
http://docs.sqlalchemy.org/en/latest/core/types.html#backend-agnostic-guid-type
Sadness. When using this with an existing SQLite database that has stringified UUID's as the primary key I get stale data errors when I try to commit any changes.
This class crashes with stale data on commit.
class CommodityTypes(Base):
__tablename__ = 'CommodityTypes'
uuidKey = Column(GUID, primary_key=True)
myName = Column(String, unique = True)
sortKey = Column(Integer, unique = True)
, but this class works:
class NewTypes(Base):
__tablename__ = 'CommodityTypes'
uuidKey = Column(String, primary_key=True)
myName = Column(String, unique = True)
sortKey = Column(Integer, unique = True)
Queried objects from the CommodityTypes class show the python UUID type for uuidKey. The script queries the object correctly. I can change settings, but I can't commit. The decorated uuidKey doesn't seem to work.
I can go forward just using Strings for the uuidKey columns, but it frustrates me that the code from http://docs.sqlalchemy.org/en/latest/core/types.html#backend-agnostic-guid-type almost works.
Here's sample code with the problem. The string workaround not using the GUID type decorator is commented out.
#system modules
import uuid
#other modules
import sqlalchemy
from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import relationship, backref, sessionmaker
from sqlalchemy.types import TypeDecorator, CHAR
from sqlalchemy.dialects.postgresql import UUID
from sqlalchemy.orm.exc import MultipleResultsFound, NoResultFound
engine = create_engine('sqlite:////home/XXXX/XobfuscatedXXXX/XXXXXXXX.sqlite')
Base = declarative_base()
Session = sessionmaker(bind=engine)
class GUID(TypeDecorator):
"""Platform-independent GUID type.
Uses Postgresql's UUID type, otherwise uses
CHAR(32), storing as stringified hex values.
"""
impl = CHAR
def load_dialect_impl(self, dialect):
if dialect.name == 'postgresql':
return dialect.type_descriptor(UUID())
else:
return dialect.type_descriptor(CHAR(32))
def process_bind_param(self, value, dialect):
if value is None:
return value
elif dialect.name == 'postgresql':
return str(value)
else:
if not isinstance(value, uuid.UUID):
return "%.32x" % uuid.UUID(value)
else:
# hexstring
return "%.32x" % value
def process_result_value(self, value, dialect):
if value is None:
return value
else:
return uuid.UUID(value)
from sqlalchemy import Column, Boolean, DateTime, Date, Float, ForeignKey, Integer, Numeric, String
class CommodityTypes(Base):
__tablename__ = 'CommodityTypes'
uuidKey = Column(GUID, primary_key=True)
myName = Column(String, unique = True)
sortKey = Column(Integer, unique = True)
#class NewTypes(Base):
# __tablename__ = 'CommodityTypes'
# uuidKey = Column(String, primary_key=True)
# myName = Column(String, unique = True)
# sortKey = Column(Integer, unique = True)
if __name__=="__main__":
session = Session()
# newList = session.query(NewTypes).order_by(NewTypes.sortKey)
# for instance in newList:
# print(instance.myName)
#
# nt = newList[1]
# print(nt.myName)
# print(nt.sortKey)
# nt.sortKey = 11
# print(nt.sortKey)
# session.commit()
# print(nt.sortKey)
ctList = session.query(CommodityTypes).order_by(CommodityTypes.sortKey)
for instance in ctList:
print(instance.myName)
ct = ctList[1]
print(ct.myName)
print(ct.sortKey)
ct.sortKey = 22
print(ct.sortKey)
session.commit()
print(ct.sortKey)
Oh, forgot to mention software versions:
Python 3.1.3 (r313:86834, Dec 1 2010, 06:15:12)
[GCC 4.1.2 20080704 (Red Hat 4.1.2-48)] on linux2

Resources