I need to add the quantity of rows returned in this query:
queryPostgres = db.text("""
SELECT *, COUNT(*) OVER () as RowCount
FROM (
SELECT * ,
( 3958.75 *
acos(sin(:lat1 / 57.2958) * sin( cast(latitude as double precision) / 57.2958) +
cos(:lat1 / 57.2958) * cos( cast(latitude as double precision) / 57.2958) *
cos( cast(longitude as double precision) / 57.2958 - :lon1/57.2958)))
as distanceInMiles
FROM "job" ) zc
WHERE zc.distanceInMiles < :dst
ORDER BY zc.distanceInMiles
LIMIT :per_page
OFFSET :offset
""")
jobs = cls.query.\
from_statement(queryPostgres). \
params(lat1=float(lat1),
lon1=float(lon1),
dst=int(dst),
per_page=int(per_page),
offset=int(offset))
return jobs
As you can see I added the RowCount column to have the total count of rows.
However as it is not part of my model, I wonder what should I do in Marshmallow so I could add the number of rows(in the RowCount column)?
I thought I could do it with Marshmallow #post_dump() , however I could not figure out how to do it .
For more clarity here is my schema.
class JobSchema(ma.ModelSchema):
def validate_state(state):
"""Validate one of 55 USA states"""
if state not in states:
raise ValidationError(INVALID_US_STATE)
def validate_zipCode(zip):
if not zipcodes.is_real(zip):
raise ValidationError(INVALID_ZIP_CODE)
#pre_load
def get_longitude_for_zipCode_and_TimeCreated(self, data):
""" This method will pass valids long,lat and time_created
values to each job created during a POST request"""
# Getting zip from the request to obtain lat&lon from DB
result = modelZipCode.getZipCodeDetails(data['zipCode'])
print(result)
if result is None:
raise ValidationError(INVALID_ZIP_CODE_2)
schema = ZipCodeSchema(exclude=('id'))
zip, errors = schema.dump(result)
if errors:
raise ValidationError(INVALID_ZIP_CODE_3)
else:
data['longitude'] = zip['longitude']
data['latitude'] = zip['latitude']
data['time_created'] = str(datetime.datetime.utcnow())
title = fields.Str(required=True, validate=[validate.Length(min=4, max=80)])
city = fields.Str(required=True, validate=[validate.Length(min=5, max=100)])
state = fields.Str(required=True, validate=validate_state)
zipCode = fields.Str(required=True, validate=validate_zipCode)
description = fields.Str(required=False, validate=[validate.Length(max=80)])
narrative = fields.Str(required=False, validate=[validate.Length(max=250)])
companyLogo = fields.Str(required=False, validate=[validate.Length(max=250)])
companyName = fields.Str(required=True, validate=[validate.Length(min=5, max=250)])
companyURL = fields.Str(required=True, validate=[validate.Length(min=4, max=100)])
latitude = fields.Str(required=True)
longitude = fields.Str(required=True)
time_created = fields.DateTime()
# We add a post_dump hook to add an envelope to responses
#post_dump(pass_many=True)
def wrap(self, data, many):
#import pdb; pdb.set_trace()
if len(data) >= 1:
counter = data[0]['RowCount']
return {
data,
counter
}
class Meta:
model = modelJob
The most weird thing is that indeed my query is correctly returning the rowcount
Could some one please help me in finding out why I can not capture the rowcount key in the post_dump method ?
This need to be managed by the Marshmallow pre_dump or post_dump method.But indeed I decided to use an SQLAlchemy pagination methods as it gave me the total rows in the response.
Related
Very simple trying to run a query in Python 3 sqlalchemy to delete some records given string names of table and field to query against.
How do you get the table object from a string?
Given 1. how do you run a query via ORM with just a string of the field name?
I would assume all ORM's have an internal array or method like get with the name.
json_config = [
{"table": "tableA",
"field": "modified_on"
"expires": 30},
{"table": "tableB",
"field": "event_on"
"expires": 30}
]
for table_conf_item in self.json_config:
table_name = table_conf_item["table"]
field_name = table_conf_item["field"]
expire_after = table_conf_item["expires"]
table_obj = self.orm_session.TABLES[table_name]
field_obj = self.orm_session.TABLES[table_name].FIELDS[field_name]
result = self.orm_session.delete(table_obj).where(field_obj < expire_after)
self.orm_session.commit()
print(f"{table_name}: removed {result.row_count} objects")
Given the table's name, you can use reflection to get a Table object. Using SQLAlchemy's core layer, this is reasonably straightforward:
import sqlalchemy as sa
engine = sa.create_engine(...)
tbl = sa.Table(name_of_table, metadata, autoload_with=engine)
If you want to work with multiple tables, it may be more efficient to store them a Metadata instance for later access:
metadata = sa.MetaData()
metadata.reflect(engine, only=list_of_table_names)
tbl = metadata.tables[name_of_table]
Once you have a Table object you can reference columns by name like this: tbl.c[name_of_field].
Full example:
import sqlalchemy as sa
# Setup
engine = sa.create_engine('sqlite://', echo=True, future=True)
tbl = sa.Table(
't',
sa.MetaData(),
sa.Column('id', sa.Integer, primary_key=True),
sa.Column('foo', sa.Integer),
)
tbl.create(engine)
with engine.begin() as conn:
vals = [42, 43, 42, 43, 56, 87, 89]
conn.execute(tbl.insert(), [{'foo': v} for v in vals])
del tbl
# Reflect the table.
metadata = sa.MetaData()
metadata.reflect(engine, only=['t'])
tbl = metadata.tables['t']
# Define some statements.
q1 = sa.select(tbl).where(tbl.c['foo'] == 42)
q2 = sa.select(tbl.c['id'], tbl.c['foo']).where(tbl.c['foo'] == 43)
q3 = sa.delete(tbl).where(tbl.c['foo'] != 42)
# Execute the statements.
with engine.connect() as conn:
rows = conn.execute(q1)
for row in rows:
print(row)
print()
rows = conn.execute(q2)
for row in rows:
print(row)
print()
with engine.begin() as conn:
conn.execute(q3)
with engine.connect() as conn:
rows = conn.execute(q1)
for row in rows:
print(row)
print()
Doing the same through the ORM layer is more complicated, as table and column names must be mapped to ORM entity classes (models) and their attributes. This replicates the previous example for a simple mapping (it assumes the same initial data as above).
import sqlalchemy as sa
from sqlalchemy import orm
Base = orm.declarative_base()
class Thing(Base):
__tablename__ = 't'
id = sa.Column(sa.Integer, primary_key=True)
thing_foo = sa.Column('foo', sa.Integer)
engine = sa.create_engine(...)
Base.metadata.create_all(engine)
Session = orm.sessionmaker(engine, future=True)
tablename = 't'
columnname = 'foo'
with Session.begin() as s:
# Get the mappers for the Base class.
mappers = Base.registry.mappers
# Get the mapper for our table.
mapper = next(m for m in mappers if m.entity.__tablename__ == tablename)
# Get the entity class (Thing).
entity = mapper.entity
# Get the column from the Table.
table_column = mapper.selectable.c[columnname]
# Get the mapper property that corresponds to the column
# (the entity attribute may have a different name to the
# column in the database).
mapper_property = mapper.get_property_by_column(table_column)
# Get the queryable entity attribute (Thing.thing_foo).
attr = mapper.all_orm_descriptors[mapper_property.key]
q = sa.select(entity).where(attr != 42)
entities = s.scalars(q)
for entity in entities:
s.delete(entity)
with Session() as s:
for thing in s.scalars(sa.select(Thing)):
print(thing.id, thing.thing_foo)
How do I express relationship which depends on len of collection child in joined entity?
In below example, parent entity is AlgoOrder. Child entity is Order. And PrivateTrade is child entity of Order.
AlgoOrder --> Order --> PrivateTrade
The problem I am having is with "orders_pending_private_trade_update".
class AlgoOrder(DbModel):
__tablename__ = "algo_order"
id = sa.Column(sa.Integer, primary_key=True)
... stuff ...
# https://docs.sqlalchemy.org/en/14/orm/loading_relationships.html
open_orders = orm.relation(Order, primaryjoin=and_(Order.algo_order_id == id, Order.status == 'OPEN'), lazy='select')
orders_pending_private_trade_update = orm.relation(Order, primaryjoin=and_(Order.algo_order_id == id, , Order.status == 'CLOSED', len(Order.private_trades)==0), lazy='select')
#property
def pending_orders(self):
return self.open_orders + self.orders_pending_private_trade_update
class Order(DbModel):
__tablename__ = "order_hist"
algo_order_id = sa.Column(sa.Integer, sa.ForeignKey("algo_order.id"))
... stiff ...
private_trades = orm.relation(PrivateTrade, primaryjoin=and_(PrivateTrade.order_id == order_id))
class PrivateTrade(DbModel):
__tablename__ = "private_trade"
id = sa.Column(sa.Integer, primary_key=True)
order_id = sa.Column(sa.String, sa.ForeignKey("order_hist.order_id"))
In particular, the error at "orders_pending_private_trade_update" was with "len" on Order.private_trades:
Exception has occurred: TypeError (note: full exception trace is shown but execution is paused at: _run_module_as_main) object of type 'InstrumentedAttribute' has no len()
So, I tried:
from sqlalchemy.sql.expression import func
orders_pending_private_trade_update = orm.relation(Order, primaryjoin=and_(Order.algo_order_id == id, Order.status == 'CLOSED', func.count(Order.private_trades)==0), lazy='select', viewonly=True)
But then error was "foreign key columns are present in neither the parent nor the child's mapped tables":
Can't determine relationship direction for relationship 'AlgoOrder.orders_pending_private_trade_update' - foreign key columns are present in neither the parent nor the child's mapped tables <class 'sqlalchemy.exc.ArgumentError'> Can't determine relationship direction for relationship 'AlgoOrder.orders_pending_private_trade_update' - foreign key columns are present in neither the parent nor the child's mapped tables
I checked my tables, I do have them:
op.create_table(
'algo_order',
sa.Column('id', sa.Integer(), primary_key=True),
...
op.create_table(
'order_hist',
sa.Column('id', sa.Integer(), primary_key=True),
sa.Column('algo_order_id', sa.Integer, sa.ForeignKey("algo_order.id")),
...
op.create_table(
'private_trade',
sa.Column('id', sa.Integer(), primary_key=True),
sa.Column('order_id', sa.String(), sa.ForeignKey("order_hist.order_id"))
...
Thanks in advance.
I think I found it, but syntax pretty ugly: I used closed_order.session to do a new Query
import sqlalchemy as sa
import sqlalchemy.orm as orm
from sqlalchemy.sql.expression import func
import sqlalchemy.dialects.postgresql as psql
from sqlalchemy.ext.mutable import MutableDict
from sqlalchemy.sql.expression import and_
class AlgoOrder(DbModel):
__tablename__ = "algo_order"
id = sa.Column(sa.Integer, primary_key=True)
... other stuff ...
open_orders = orm.relation(Order, primaryjoin=and_(Order.algo_order_id == id, Order.status == 'OPEN'), lazy='select')
closed_orders = orm.relation(Order, primaryjoin=and_(Order.algo_order_id == id, Order.status == 'CLOSED'), lazy='dynamic', viewonly=True)
#property
def orders_pending_private_trade_update(self):
order_ids_with_no_private_trades = [ order.id for order in list(self.closed_orders.session.query(Order.id, func.count(PrivateTrade.id).label('count_private_trades')).join(PrivateTrade, isouter=True).group_by(Order.id).having(func.count(PrivateTrade.id) == 0).all())]
orders_with_no_private_trades = self.closed_orders.session.query(Order).filter(Order.id.in_(order_ids_with_no_private_trades)).order_by(Order.id.desc()).limit(1000).all()
return orders_with_no_private_trades
#property
def pending_orders(self):
return list(self.open_orders) + list(self.orders_pending_private_trade_update)
I also don't like "first(100)" as an attempt to limit number of rows fetched. And how/when you dispose of the list to prevent memory leak? I think above approach is bad. Should use generator instead of returning list.
Essentially raw sql what I am looking for is a generator which returns below:
select
order_id,
cnt
from (
select
order_hist.id,
order_hist.order_id,
count(private_trade.id) cnt
from order_hist
left join private_trade on private_trade.order_id = order_hist.order_id
where order_hist.status in ('CLOSED', 'CANCELLED')
group by order_hist.id, order_hist.order_id
) src
where
cnt=0
Any better way to do this? I think my solution shows the sqlalchemy syntax but it's computationally inefficient.
Here's solution using generator instead to avoid MemoryError:
def order_hist_missing_private_trade_get(engine):
order_hist_missing_private_trade_sql = '''
select
order_id,
cnt
from (
select
order_hist.id,
order_hist.order_id,
count(private_trade.id) cnt
from order_hist
left join private_trade on private_trade.order_id = order_hist.order_id
where order_hist.status in ('CLOSED', 'CANCELLED')
group by order_hist.id, order_hist.order_id
) src
where
cnt=0
'''
with engine.connect() as conn:
# https://stackoverflow.com/questions/7389759/memory-efficient-built-in-sqlalchemy-iterator-generator
conn.execution_options(stream_results=True)
rs = conn.execute(order_hist_missing_private_trade_sql)
while True:
batch = rs.fetchmany(10000)
for row in batch:
order_id = row['order_id']
yield order_id
Usage:
from sqlalchemy import create_engine
connstr : str = "postgresql://postgres:your_secret#localhost/postgres"
engine = create_engine(connstr)
generator = order_hist_missing_private_trade_get(engine)
while True:
order_id = next(generator)
print(f"order_id: {order_id}")
i need to load only the data from database by todays date.
date column in database is in TEXT...
''code to load all the data from database''
def load_database(self):
today_date = current_date[0:11]
while self.workingpatient_table.rowCount() > 0:
self.workingpatient_table.removeRow(0)
conn = sqlite3.connect(r'mylab.db')
content = ("SELECT * FROM daily_patients where date=?",(today_date))
result = conn.execute(content)
for row_index,row_data in enumerate(result):
self.workingpatient_table.insertRow(row_index)
for column_index,column_data in enumerate(row_data):
self.workingpatient_table.setItem(row_index,column_index,QTableWidgetItem(str(column_data)))
conn.close()
''when i run the program i get following error ''
result = conn.execute(content)
TypeError: argument 1 must be str, not tuple
any possible solution?
Change your line from
content = ("SELECT * FROM daily_patients where date=?",(today_date))
result = conn.execute(content)
to
content = ("SELECT * FROM daily_patients where date=?",(today_date, ))
result = conn.execute(*content)
I have a player ladder and I want to compare rankings and add info wether they moved up or down since last gained points. I select current and previous ladder list from sqlite3. Now I want to compare Rankings from each player and write the result into a new moverList.
#Get Ranks and Names from current Ranking table as a list
def getCurrentRankNew():
conn = get_db_connection()
currentList = conn.execute("SELECT ROW_NUMBER () OVER (ORDER BY Points DESC) Rank, Name FROM table").fetchall()
conn.close()
return currentList
#Get Ranks and Names from last Ranking table as a list
def getLastRankNew():
conn = get_db_previous()
lastList = conn.execute("SELECT ROW_NUMBER () OVER (ORDER BY Points DESC) Rank, Name FROM table").fetchall()
conn.close()
return lastList
The following is pseudeocode that does not work since I dont know how to do it. Goal is to make a comprehensive list, each value is a ladder moving indicator that is a result of a comparision of current and last table rankings for each Name.
Note that I try: except: the if-condition but when it fails that means on current list a new player entry exists that doesnt in the last list. In that case sqlite3 throws an error so except can be just mov = 'new'
def compareRanksNew():
currentRank = getCurrentRankNew()
lastRank = getLastRankNew()
try:
if CurrentRank < LastRank:
mov = 'up'
elif CurrentRank > LastRank:
mov = 'down'
else:
mov = '-'
except:
mov = 'new'
movList = [mov for i in currentRank if i in lastRank]
return movList
I solved this problem before using multiple lists and for loops but this is very slow like ~3 seconds for just ~350 table rows. I have read it is much faster to use comprehensive lists but I fail to get it working for days.
This is my old solution:
#Make a mov list for all players
def getMoverList():
conn = get_db_connection()
cur = conn.cursor()
cur.execute('SELECT Name FROM table ORDER BY Points DESC')
moverList = []
for Name in cur.fetchall():
playerName = str(Name[0])
moverList.append(compareRanks(playerName))
return moverList
#Get Rank from a playName from current Ranking table
def getCurrentRank(playerName):
conn = get_db_connection()
player = conn.execute("SELECT ROW_NUMBER () OVER (ORDER BY Points DESC) Rank, Name FROM table")
for item in player:
if playerName in item:
return item[0]
#Get Rank from a playName from previous Ranking table
def getLastRank(playerName):
conn = get_db_previous()
player = conn.execute("SELECT ROW_NUMBER () OVER (ORDER BY Points DESC) Rank, Name FROM table").fetchall()
conn.close()
for item in player:
if playerName in item:
return item[0]
#compare ranks and return moving value (mov)
def compareRanks(playerName):
CurrentRank = getCurrentRank(playerName)
LastRank = getLastRank(playerName)
try:
if CurrentRank < LastRank:
mov = 'up'
elif CurrentRank > LastRank:
mov = 'down'
else:
mov = '-'
except:
mov = 'new'
return mov
getMoverList()
conn.close()
One possible solution is to load both databases in one SQLite connection, the you can join the two databases in different ways and let the database engine itself perform the work.
First off, create a small test database, this is roughly like the layout of your database, just here for completeness:
# Create a couple of databases with some test data
for fn in ["db_cur.db", "db_prev.db"]:
db = sqlite3.connect(fn)
db.execute("CREATE TABLE players(Name, Points);")
for i in range(350):
if random.random() <= 0.01:
# 1 percent chance of a random score, near the "real" score
db.execute("INSERT INTO players(Name, Points) VALUES(?,?);", (f"Player {i}", i + random.randint(1, 10)))
else:
# Otherwise, just use the score based off the player name
db.execute("INSERT INTO players(Name, Points) VALUES(?,?);", (f"Player {i}", i))
db.commit()
db.close()
Then, with these two databases created, you can read from both of them in one connection by attaching them:
# Now load both databases so we can join them:
db = sqlite3.connect(":memory:")
db.execute("ATTACH 'db_cur.db' as cur;")
db.execute("ATTACH 'db_prev.db' as prev;")
sql = """
SELECT
prev_ranks.Rank,
prev_ranks.Points,
cur_ranks.Rank,
cur_ranks.Points,
prev_ranks.Name
FROM
(SELECT
ROW_NUMBER () OVER (ORDER BY Points DESC) AS Rank,
Name,
Points
FROM
prev.players) AS prev_ranks
INNER JOIN
(SELECT
ROW_NUMBER () OVER (ORDER BY Points DESC) AS Rank,
Name,
Points
FROM
cur.players) AS cur_ranks
ON
prev_ranks.Name = cur_ranks.Name;
"""
for prev_rank, prev_points, cur_rank, cur_points, name in db.execute(sql):
if prev_rank < cur_rank:
print(f"{name} moved down from {prev_rank} to {cur_rank}")
elif prev_rank > cur_rank:
print(f"{name} moved up from {prev_rank} to {cur_rank}")
Having recently upgraded sqlAlchemy and Python to 3.8, this code no longer works to get a row count from search results, via the sqlAlchemy ORM. It seems the use of _saved_cursor._result.rows has been depreciated. (Error: AttributeError: 'LegacyCursorResult' object has no attribute '_saved_cursor')
def get_clients(db, status):
clients = Table("clients", db.metadata, autoload=True)
qry = clients.select().where(clients.c.status == status)
res = qry.execute()
rowcount = len(res._saved_cursor._result.rows)
return rowcount
We have this very ugly code that works, but this way has to loop through all the results to get the count.
def get_clients(db, status):
clients = Table("clients", db.metadata, autoload=True)
qry = clients.select().where(clients.c.status == status)
res = qry.execute()
rowcount = 0
for row in res:
rowcount += 1
return rowcount
Without using raw sql, what is the most efficient means to get the row count using sqlAlchemy ORM?
The solution is to use the func method from sqlAlchemy, and to render the results as scalar.
from sqlalchemy import Table, func
def get_clients(db, status):
clients = Table("clients", db.metadata, autoload=True)
qry = select([func.count()]).select_from(clients).where(clients.c.status == status)
row_count = qry.execute().scalar()
return row_count