Re-create postgresql database to do pytest - python-3.x

I am working on a proyect that works with these libraries (among others)
postgresql 10.5
pandas 1.1.1
psycopg2 2.7.5
pytest 5.0.1
python 3.7.1
I am trying to run pytest on a library we have created that uses sqlalchemy to access a postgresql database.
I want to re-create a database to test the methods that we have being using for few months.
I have tried different things but without success.
I know that postgresql cannot create an in memory database, which is why I am trying to use sqlite3 for that purpose. .
The method I am trying to run test over is:
DatabaseHelper.py
class DatabaseHelper(object):
"""
Helps accessing database.
"""
def __init__(self):
pass
# ...
def create_engine(self, host_dns, dbname, port, user, password):
"""
:param host_dns: dns route to the database
:param dbname: name of the database to access to
:param port: number or port of the database
:param user: name of the user to access de database
:param password: password to connect to the database
"""
self.host = host_dns
self.dbname = dbname
self.port = port
self.user = user
self.password = password
self.connection_str = f'postgresql://{self.user}:{self.password}#{self.host}:{self.port}/{self.dbname}'
self.engine = create_engine(self.connection_str)
# session_factory = sessionmaker(bind=self.engine)
Session = sessionmaker(bind=self.engine)
self.session = Session()
# print("Agora objectec created ok")
# ...
def read_db_to_df(self, **kwargs):
""" Reads a database and transforms into a pandas.DataFrame """
try:
default_reading_sql_args = {'con': self.session.connection()}
reading_sql_args = utils.merge_two_dicts(default_reading_sql_args, kwargs)
df = pd.read_sql(**reading_sql_args)
return df
except SQLAlchemyError as e:
# self.logger.error("Error reading db to df")
# self.logger.error(str(e).replace("\n", ""))
print(e)
return -20
MY_test_before_test.py
from sqlalchemy import MetaData, Column, Table, ForeignKey, select, PrimaryKeyConstraint, Index
from sqlalchemy import Integer, String
from sqlalchemy import create_engine
from sqlalchemy.schema import CreateTable, DropTable
from sqlalchemy.ext.declarative.api import DeclarativeMeta
from agora_db.agora_helper import AgoraHelper
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
from sqlalchemy.sql.functions import current_timestamp, current_user
engine = create_engine('sqlite:///:memory:')
Session = sessionmaker(bind=engine)
session = Session()
Base = declarative_base()
#This is how the database models look like:
class Schema(object):
""" A simple schema to provide a common argument for all tables """
__table_args__ = {"schema": "ifsmtd"}
class AbstractTable(object):
"""
A class that sets how the class is represents its objects as string.
"""
def __repr__(self):
""" Returns the object representation in string fromat in a way that can be used to reconstruct the object.
This returns an 'official' string representation of the object.
:Example:
"""
from sqlalchemy.inspection import inspect
table_inst = inspect(self)
table_name = type(self).__name__
column_key_value = [(c_attr.key, getattr(self, c_attr.key)) for c_attr in table_inst.mapper.column_attrs]
fields_str = ", ".join(["%s='%s'" % (c[0], c[1]) for c in column_key_value])
return "<" + table_name + "(" + fields_str + ")>"
class MyBasicTable(Base, Schema, AbstractTable):
__tablename__ = "mybasic_table"
timest_mov = Column(TIMESTAMP, primary_key=True, nullable=False, server_default=current_timestamp())
id_info = Column(String, primary_key=True, nullable=False)
id_wf = Column(String, primary_key=True, nullable=False)
process_name = Column(String, primary_key=True, nullable=False)
error_type = Column(String, primary_key=True, nullable=False)
resolution_status = Column(Boolean)
aud_timest_umo = Column(TIMESTAMP, server_default=current_timestamp())
aud_id_user = Column(String, server_default=current_user())
__table_args__ = (
PrimaryKeyConstraint('timest_mov', 'id_info', 'id_wf', 'process_name', 'error_type', name='pk_idx_mybasic_table'),
Index('pk_idx_mybasic_table', 'timest_mov', 'id_info', 'id_wf', 'process_name', 'error_type', unique=True),
{"schema": "ifsmtd"}
)
dbhelper = DatabaseHelper()
dbhelper.engine = engine
dbhelper.session = session
query = session.query(MyBasicTable.timest_mov.label("timest_mov"),
MyBasicTable.id_info .label("id_info "),
MyBasicTable.id_wf.label("id_wf"),
MyBasicTable.process_name.label("process_name"),
MyBasicTable.error_type.label("error_type"),
MyBasicTable.resolution_status.label("resolution_status")
)\
.distinct(MyBasicTable.id_jira.label("id_jira"))
df = dbhelper.read_db_to_df(sql=query.statement)
print(df)
The Error I get is:
(sqlite3.OperationalError) no such table: ifsmtd.mybasic_table
How could I do the test.

Looking at the code, there seems to be missing a call to Base.metadata.create_all(engine). This would create the initial database schema. After this call the tables are empty and you need to populate them.
Add the above statement just before using the database, but after defining the tables.
Base.metadata.create_all(engine) # This will create the schema!
dbhelper = DatabaseHelper()
On the use of sqlite: I have also gone that route and bumped into the fact that by default sqlite does not check foreign key constraints (it can be enabled!). There may be more differences!

Related

FLASK: AttributeError: 'MySQL' object has no attribute 'Model'

I just started learning Flask. I can't figure out how to initialize a mysql database. Do I need to use some version of Alchemy, can I not use mysql modules only instead? I'm getting confused as I google looking any docs on using flask and mysql and I get many hits that include slqalchemy, flask-mysql, flask-mysqldb, flask-mysql-connector. Each example is different and I am going cross-eyed.
With so many examples I suspect my syntax is mixed up. I finally was able to display schemas with following command so I assume am close:
# msql_connector.py
from application import mysql
print(mysql)
conn = mysql.connect()
cursor = conn.cursor()
cursor.execute('''SHOW SCHEMAS;''')
rv = cursor.fetchall()
for db in rv:
print(db)
However I now want to initialize the db or just be able to write values to it as per below. I am not sure if I actually do need to initialize first or whether I can define the model and add data during runtime.
My error:
AttributeError: 'MySQL' object has no attribute 'Model'
My main file:
# __init__.py
from flask import Flask
from config import Config
from flaskext.mysql import MySQL
app = Flask(__name__)
app.config.from_object(Config)
mysql = MySQL()
mysql.init_app(app)
from application import routes
Next my DB definitions:
# models.py
import flask
from application import mysql
class User(mysql.Model):
user_id = mysql.IntField(unique=True )
first_name = mysql.StringField( max_length = 50 )
last_name = mysql.StringField( max_length = 50 )
email = mysql.StringField( max_length = 50 )
password = mysql.StringField( max_length = 50 )
def __init__(self, user_id, first_name, last_name, email, password):
self.user_id = user_id
self.first_name = first_name
self.last_name = last_name
self.email = email
self.password = password
My configuration:
# config.py
import os
from flask_mysql_connector import MySQL
class Config(object):
SECRET_KEY = os.environ.get('SECRET_KEY') or 'djfdsjdsj4skldjess85'
MYSQL_DATABASE_HOST = 'localhost'
MYSQL_DATABASE_USER = 'flasker'
MYSQL_DATABASE_PASSWORD = '**********'
MYSQL_DATABASE_DB = 'UTA_Enrollment'
and last but not least:
import json
from application import app
from application import mysql
from flask import render_template, request, Response
from application.models import User, Course, Enrollment
#app.route("/user")
def user():
conn = mysql.connect()
cursor = conn.cursor()
cursor.execute('''SHOW SCHEMAS;''')
rv = cursor.fetchall()
User(user_id=1, first_name="John", last_name="Doh", email="jd#domain.com", password="abc123")
User(user_id=2, first_name="Jane", last_name="Doh", email="jad#domain.com", password="abc123")
users = User.objects.all()
return render_template("user.html", users=users)
So when I go to http://url/user I would like to be able to write the above data into the database.
My specs:
Python 3.9.6
click==8.1.3
Flask==2.2.2
Flask-MySQL==1.5.2
flask-mysql-connector==1.1.0
Flask-WTF==1.0.1
importlib-metadata==6.0.0
itsdangerous==2.1.2
Jinja2==3.1.2
MarkupSafe==2.1.1
mysql-connector-python==8.0.31
numpy==1.24.1
pandas==1.5.2
protobuf==3.20.1
PyMySQL==1.0.2
python-dateutil==2.8.2
python-dotenv==0.21.0
pytz==2022.7
six==1.16.0
Werkzeug==2.2.2
WTForms==3.0.1
zipp==3.11.0
So to recap:
Do I have to use some form of Alchemy to run this type of syntax: title = mysql.StringField( max_length = 100 )
am I only limited to running executes as in cursor.execute('''SHOW SCHEMAS;''')
do I need to initialize the database (including creating the the tables and fields first?
So I couldnt find the elegant solution for MySQL modules. I went round and round to python-mysql, python-mysql-connector, and flask-msyql. It did turn out that using flask-sqlalchemy was fairly easy to get it to do what I needed.
pip install Flask-SQLAlchemy
from flask_sqlalchemy import SQLAlchemy
db = SQLAlchemy(app)
class Config(object):
SQLALCHEMY_DATABASE_URI = 'mysql+pymysql://..."
SQLALCHEMY_TRACK_MODIFICATIONS = False
class Course(db.Model):
course_id = db.Column( db.String(10), primary_key=True )
title = db.Column( db.String(100), nullable=False )
description = db.Column( db.String(255), nullable=False )
credits = db.Column( db.String(255), nullable=False )
term = db.Column( db.String(25), nullable=False )

How can I speed up this SQLAlchemy Query? In FastAPI

I have Volunteers table and it has 60 fields, I use FastAPI and SQLalchemy ORM:
class Volunteers(Base):
__tablename__ = "volunteers"
id = Column(Integer, primary_key=True, index=True, nullable=False)
candidate_id = Column(Integer)
full_name = Column(String)
......
And I have function where I import excel file and write data from excel to database. I import all users from excel to all_users_in_excel list. all_users_in_excel is a list of dictionaries. Dictionaries include each user information. Then I get one user at a time from all_users_in_excel list and create new_user object with values and append it to a new list.
def import_data(file: UploadFile = File(...), db: Session = Depends(get_db)):
all_users_in_excel = []
with open(f'{file.filename}', "wb") as buffer:
shutil.copyfileobj(file.file, buffer)
volunteer_data = pd.read_excel("assignment-data.xlsx", index_col=None)
for name in volunteer_data.iterrows():
new_user_from_excel = name[1].to_dict()
all_users_in_excel.append(new_user_from_excel)
saved_users = []
for key in all_users_in_excel:
new_user = {
"candidate_id": key["Candidate - ID"],
"full_name": key["Candidate - Full Name"],
"checkpoint": key["Candidate - Checkpoint"],
.....
"created_at": datetime.now()
}
saved_users.append(new_user)
db.bulk_insert_mappings(models.Volunteers, saved_users)
db.commit()
db.bulk_insert_mappings(models.Volunteers, saved_users) takes 20 seconds to save 110 users. How can I speed it up? I use Postgresql as a database and it is deployed to Heroku free subscription.

SqlAlchemy 'datetime.datetime' is not mapped

I'm using Scrapy to grab domains and their creation date using the Whois module. I am then adding them to a MySQL database using SqlAlchemy but I get the below error when adding the creation date to the database because the data type is <class 'datetime.datetime'>
sqlalchemy.orm.exc.UnmappedInstanceError: Class 'datetime.datetime' is not mapped
I tried to convert the date into a string but then I get another error.
pipelines.py:
class SaveDomainsPipeline(object):
def __init__(self):
engine = db_connect()
create_table(engine)
self.Session = sessionmaker(bind=engine)
def process_item(self, item, spider):
session = self.Session()
domain = Domains(**item)
domain_item = item['domain']
domain_whois = whois.query(domain_item)
creation_date = domain_whois.creation_date
try:
session.add_all([domain, creation_date])
session.commit()
models.py
class Domains(Base):
__tablename__ = "domains"
id = Column(Integer, primary_key=True)
date_added = Column(DateTime(timezone=True), server_default=func.now())
domain = Column('domain', Text())
creation_date = Column('creation_date', DateTime(timezone=True))
#creation_date = Column('creation_date', Text()) -- I also tried this
I made a rookie mistake in my original code.
As I initiated an instance of the class "Domains", I had to refer to it when populating the columns which I had originally missed. The working code can be found below.
class SaveDomainsPipeline(object):
def __init__(self):
engine = db_connect()
create_table(engine)
self.Session = sessionmaker(bind=engine)
def process_item(self, item, spider):
session = self.Session()
domains = Domains() #initiate instance of Domains class.
domains.domain = item['domain'] #Add the item "domain" from Items to DB
domain_whois = whois.query(domains.domain)
domains.creation_date = domain_whois.creation_date #Add the creation date to DB
try:
#save the instance which saves both the domain item and creation date.
session.add(domains)
session.commit()

Flask SQLAlchemy query returns a querystring instead of an actual resultselt

I have used flask-sqlalchemy to create a mixin in a file called itemAbstract.py, to be shared by two model classes: ItemModel and ItemHistoryModelrespectively. Below is the code I have written in the itemAbstract.py
from databaseHandler import databaseHandler
from sqlalchemy.ext.declarative import declared_attr
# pylint: disable=maybe-no-member
class Item(databaseHandler.Model):
__abstract__ = True
itemName = databaseHandler.Column(databaseHandler.String(80), nullable = False)
price = databaseHandler.Column(databaseHandler.Numeric, nullable = False)
itemImage = databaseHandler.Column(databaseHandler.String(1000), nullable = False)
#classmethod
#declared_attr
def restaurantId(cls):
return databaseHandler.Column(
databaseHandler.Integer, databaseHandler.ForeignKey("restaurant.restaurantId"))
#classmethod
#declared_attr
def restaurant(cls):
return databaseHandler.relationship(
"RestaurantModel", backref=databaseHandler.backref('items', lazy=True))
#classmethod
#declared_attr
def productTypeId(cls):
return databaseHandler.Column(
databaseHandler.Integer, databaseHandler.ForeignKey("product_type.productTypeId"))
#classmethod
#declared_attr
def productType(cls):
return databaseHandler.relationship(
"ProductTypeModel", backref=databaseHandler.backref('items', lazy=True))
And I have inherited it in the itemModel.py and itemHistoryModel.py like so:
from databaseHandler import databaseHandler
from sqlalchemy import and_, or_
from abstracts.itemAbstract import Item
# pylint: disable=maybe-no-member
class ItemModel(Item):
__tablename__ = 'item'
itemId = databaseHandler.Column(databaseHandler.Integer, primary_key = True)
And
from databaseHandler import databaseHandler
from sqlalchemy import and_, or_
from abstracts.itemAbstract import Item
# pylint: disable=maybe-no-member
class ItemHistoryModel(Item):
__tablename__ = 'item_history'
historyId = databaseHandler.Column(databaseHandler.Integer, primary_key = True)
I have a class method in both files that is supposed to help me get a list of items a restaurant sells by passing in the restaurantId as parameter
#classmethod
def findItemsByRestaurant(cls, param):
return cls.query.filter_by(restaurantId = param)
However, anytime I execute this method it returns a query string in the resultset instead of a list of items. Here is a sample resultset:
SELECT item_history.`itemName` AS `item_history_itemName`, item_history.price AS item_history_price, item_history.`itemImage` AS `item_history_itemImage`, item_history.`historyId` AS `item_history_historyId`
FROM item_history
WHERE false = 1
Somehow, SQLAlchemy makes my parameter false and assigns a value of 1 to it meanwhile the actual ID of the restaurant is 10. What am I doing wrong?
This is the databaseHandler.py file:
from flask_sqlalchemy import SQLAlchemy
databaseHandler = SQLAlchemy()
The Query object has a number of API methods for getting pythonic objects rather than amending the query:
get
all
from_statement
first
one_or_none
one
scalar (as_scalar to be depreciated)
count

Trying to use a UUID for a primary key. Getting stale data on SQLalchemy commit()

Starting SQLalchemy user here. I plan to use UUID's as the primary keys for my tables.
In the tutorial I saw some code for using the native Python UUID type in ORM classes. Eureka! I can use Postgresql's native UUID type for my system database and this TypeDecorator will stringify the UUID's for SQLite on my mobile clients.
http://docs.sqlalchemy.org/en/latest/core/types.html#backend-agnostic-guid-type
Sadness. When using this with an existing SQLite database that has stringified UUID's as the primary key I get stale data errors when I try to commit any changes.
This class crashes with stale data on commit.
class CommodityTypes(Base):
__tablename__ = 'CommodityTypes'
uuidKey = Column(GUID, primary_key=True)
myName = Column(String, unique = True)
sortKey = Column(Integer, unique = True)
, but this class works:
class NewTypes(Base):
__tablename__ = 'CommodityTypes'
uuidKey = Column(String, primary_key=True)
myName = Column(String, unique = True)
sortKey = Column(Integer, unique = True)
Queried objects from the CommodityTypes class show the python UUID type for uuidKey. The script queries the object correctly. I can change settings, but I can't commit. The decorated uuidKey doesn't seem to work.
I can go forward just using Strings for the uuidKey columns, but it frustrates me that the code from http://docs.sqlalchemy.org/en/latest/core/types.html#backend-agnostic-guid-type almost works.
Here's sample code with the problem. The string workaround not using the GUID type decorator is commented out.
#system modules
import uuid
#other modules
import sqlalchemy
from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import relationship, backref, sessionmaker
from sqlalchemy.types import TypeDecorator, CHAR
from sqlalchemy.dialects.postgresql import UUID
from sqlalchemy.orm.exc import MultipleResultsFound, NoResultFound
engine = create_engine('sqlite:////home/XXXX/XobfuscatedXXXX/XXXXXXXX.sqlite')
Base = declarative_base()
Session = sessionmaker(bind=engine)
class GUID(TypeDecorator):
"""Platform-independent GUID type.
Uses Postgresql's UUID type, otherwise uses
CHAR(32), storing as stringified hex values.
"""
impl = CHAR
def load_dialect_impl(self, dialect):
if dialect.name == 'postgresql':
return dialect.type_descriptor(UUID())
else:
return dialect.type_descriptor(CHAR(32))
def process_bind_param(self, value, dialect):
if value is None:
return value
elif dialect.name == 'postgresql':
return str(value)
else:
if not isinstance(value, uuid.UUID):
return "%.32x" % uuid.UUID(value)
else:
# hexstring
return "%.32x" % value
def process_result_value(self, value, dialect):
if value is None:
return value
else:
return uuid.UUID(value)
from sqlalchemy import Column, Boolean, DateTime, Date, Float, ForeignKey, Integer, Numeric, String
class CommodityTypes(Base):
__tablename__ = 'CommodityTypes'
uuidKey = Column(GUID, primary_key=True)
myName = Column(String, unique = True)
sortKey = Column(Integer, unique = True)
#class NewTypes(Base):
# __tablename__ = 'CommodityTypes'
# uuidKey = Column(String, primary_key=True)
# myName = Column(String, unique = True)
# sortKey = Column(Integer, unique = True)
if __name__=="__main__":
session = Session()
# newList = session.query(NewTypes).order_by(NewTypes.sortKey)
# for instance in newList:
# print(instance.myName)
#
# nt = newList[1]
# print(nt.myName)
# print(nt.sortKey)
# nt.sortKey = 11
# print(nt.sortKey)
# session.commit()
# print(nt.sortKey)
ctList = session.query(CommodityTypes).order_by(CommodityTypes.sortKey)
for instance in ctList:
print(instance.myName)
ct = ctList[1]
print(ct.myName)
print(ct.sortKey)
ct.sortKey = 22
print(ct.sortKey)
session.commit()
print(ct.sortKey)
Oh, forgot to mention software versions:
Python 3.1.3 (r313:86834, Dec 1 2010, 06:15:12)
[GCC 4.1.2 20080704 (Red Hat 4.1.2-48)] on linux2

Resources