sqlAlchemy Row Count from Results - python-3.x

Having recently upgraded sqlAlchemy and Python to 3.8, this code no longer works to get a row count from search results, via the sqlAlchemy ORM. It seems the use of _saved_cursor._result.rows has been depreciated. (Error: AttributeError: 'LegacyCursorResult' object has no attribute '_saved_cursor')
def get_clients(db, status):
clients = Table("clients", db.metadata, autoload=True)
qry = clients.select().where(clients.c.status == status)
res = qry.execute()
rowcount = len(res._saved_cursor._result.rows)
return rowcount
We have this very ugly code that works, but this way has to loop through all the results to get the count.
def get_clients(db, status):
clients = Table("clients", db.metadata, autoload=True)
qry = clients.select().where(clients.c.status == status)
res = qry.execute()
rowcount = 0
for row in res:
rowcount += 1
return rowcount
Without using raw sql, what is the most efficient means to get the row count using sqlAlchemy ORM?

The solution is to use the func method from sqlAlchemy, and to render the results as scalar.
from sqlalchemy import Table, func
def get_clients(db, status):
clients = Table("clients", db.metadata, autoload=True)
qry = select([func.count()]).select_from(clients).where(clients.c.status == status)
row_count = qry.execute().scalar()
return row_count

Related

Get sqlalchemy table Model and Field objects from strings?

Very simple trying to run a query in Python 3 sqlalchemy to delete some records given string names of table and field to query against.
How do you get the table object from a string?
Given 1. how do you run a query via ORM with just a string of the field name?
I would assume all ORM's have an internal array or method like get with the name.
json_config = [
{"table": "tableA",
"field": "modified_on"
"expires": 30},
{"table": "tableB",
"field": "event_on"
"expires": 30}
]
for table_conf_item in self.json_config:
table_name = table_conf_item["table"]
field_name = table_conf_item["field"]
expire_after = table_conf_item["expires"]
table_obj = self.orm_session.TABLES[table_name]
field_obj = self.orm_session.TABLES[table_name].FIELDS[field_name]
result = self.orm_session.delete(table_obj).where(field_obj < expire_after)
self.orm_session.commit()
print(f"{table_name}: removed {result.row_count} objects")
Given the table's name, you can use reflection to get a Table object. Using SQLAlchemy's core layer, this is reasonably straightforward:
import sqlalchemy as sa
engine = sa.create_engine(...)
tbl = sa.Table(name_of_table, metadata, autoload_with=engine)
If you want to work with multiple tables, it may be more efficient to store them a Metadata instance for later access:
metadata = sa.MetaData()
metadata.reflect(engine, only=list_of_table_names)
tbl = metadata.tables[name_of_table]
Once you have a Table object you can reference columns by name like this: tbl.c[name_of_field].
Full example:
import sqlalchemy as sa
# Setup
engine = sa.create_engine('sqlite://', echo=True, future=True)
tbl = sa.Table(
't',
sa.MetaData(),
sa.Column('id', sa.Integer, primary_key=True),
sa.Column('foo', sa.Integer),
)
tbl.create(engine)
with engine.begin() as conn:
vals = [42, 43, 42, 43, 56, 87, 89]
conn.execute(tbl.insert(), [{'foo': v} for v in vals])
del tbl
# Reflect the table.
metadata = sa.MetaData()
metadata.reflect(engine, only=['t'])
tbl = metadata.tables['t']
# Define some statements.
q1 = sa.select(tbl).where(tbl.c['foo'] == 42)
q2 = sa.select(tbl.c['id'], tbl.c['foo']).where(tbl.c['foo'] == 43)
q3 = sa.delete(tbl).where(tbl.c['foo'] != 42)
# Execute the statements.
with engine.connect() as conn:
rows = conn.execute(q1)
for row in rows:
print(row)
print()
rows = conn.execute(q2)
for row in rows:
print(row)
print()
with engine.begin() as conn:
conn.execute(q3)
with engine.connect() as conn:
rows = conn.execute(q1)
for row in rows:
print(row)
print()
Doing the same through the ORM layer is more complicated, as table and column names must be mapped to ORM entity classes (models) and their attributes. This replicates the previous example for a simple mapping (it assumes the same initial data as above).
import sqlalchemy as sa
from sqlalchemy import orm
Base = orm.declarative_base()
class Thing(Base):
__tablename__ = 't'
id = sa.Column(sa.Integer, primary_key=True)
thing_foo = sa.Column('foo', sa.Integer)
engine = sa.create_engine(...)
Base.metadata.create_all(engine)
Session = orm.sessionmaker(engine, future=True)
tablename = 't'
columnname = 'foo'
with Session.begin() as s:
# Get the mappers for the Base class.
mappers = Base.registry.mappers
# Get the mapper for our table.
mapper = next(m for m in mappers if m.entity.__tablename__ == tablename)
# Get the entity class (Thing).
entity = mapper.entity
# Get the column from the Table.
table_column = mapper.selectable.c[columnname]
# Get the mapper property that corresponds to the column
# (the entity attribute may have a different name to the
# column in the database).
mapper_property = mapper.get_property_by_column(table_column)
# Get the queryable entity attribute (Thing.thing_foo).
attr = mapper.all_orm_descriptors[mapper_property.key]
q = sa.select(entity).where(attr != 42)
entities = s.scalars(q)
for entity in entities:
s.delete(entity)
with Session() as s:
for thing in s.scalars(sa.select(Thing)):
print(thing.id, thing.thing_foo)

update statement using loop over tuple of query and data fails in psycopg2

I have created a mini functional pipeline which creates an update statement with regex and then passes the statement and the data to pycopg2 to execute.
If I copy paste the statement outside of the loop it works, if I try to loop over all statements I get an error.
# Function to create statement
def psycopg2_regex_replace_chars(table, col, regex_chars_old, char_new):
query = "UPDATE {} SET {} = regexp_replace({}, %s , %s, 'g');".format(table, col, col)
data = (regex_chars_old, char_new)
return (query, data)
# Create functions with intelligible names
replace_separators_with_space = partial(psycopg2_regex_replace_chars,regex_chars_old='[.,/[-]]',char_new=' ')
replace_amper_with_and = partial(psycopg2_regex_replace_chars, regex_chars_old='&', char_new='and')
# create funcs_list
funcs_edit = [replace_separators_with_space,
replace_amper_with_and]
So far, so good.
This works
stmt = "UPDATE persons SET name = regexp_replace(name, %s , %s, 'g');"
data = ('[^a-zA-z0-9]', ' ')
cur.execute(stmt, data)
conn.commit()
This fails
tables = ["persons"]
cols = ["name", "dob"]
for table in tables:
for col in cols:
for func in funcs_edit:
query, data = func(table=table, col=col)
cur.execute(query, data)
conn.commit()
error
<ipython-input-92-c8ba5d469f88> in <module>
6 for func in funcs_edit:
7 query, data = func(table=table, col=col)
----> 8 cur.execute(query, data)
9 conn.commit()
ProgrammingError: function regexp_replace(date, unknown, unknown, unknown) does not exist
LINE 1: UPDATE persons SET dob = regexp_replace(dob, '[.,/[-]]' , ' ...
^
HINT: No function matches the given name and argument types. You might need to add explicit type casts.```

Adding count of total rows through Marshmallow with #post_dump?

I need to add the quantity of rows returned in this query:
queryPostgres = db.text("""
SELECT *, COUNT(*) OVER () as RowCount
FROM (
SELECT * ,
( 3958.75 *
acos(sin(:lat1 / 57.2958) * sin( cast(latitude as double precision) / 57.2958) +
cos(:lat1 / 57.2958) * cos( cast(latitude as double precision) / 57.2958) *
cos( cast(longitude as double precision) / 57.2958 - :lon1/57.2958)))
as distanceInMiles
FROM "job" ) zc
WHERE zc.distanceInMiles < :dst
ORDER BY zc.distanceInMiles
LIMIT :per_page
OFFSET :offset
""")
jobs = cls.query.\
from_statement(queryPostgres). \
params(lat1=float(lat1),
lon1=float(lon1),
dst=int(dst),
per_page=int(per_page),
offset=int(offset))
return jobs
As you can see I added the RowCount column to have the total count of rows.
However as it is not part of my model, I wonder what should I do in Marshmallow so I could add the number of rows(in the RowCount column)?
I thought I could do it with Marshmallow #post_dump() , however I could not figure out how to do it .
For more clarity here is my schema.
class JobSchema(ma.ModelSchema):
def validate_state(state):
"""Validate one of 55 USA states"""
if state not in states:
raise ValidationError(INVALID_US_STATE)
def validate_zipCode(zip):
if not zipcodes.is_real(zip):
raise ValidationError(INVALID_ZIP_CODE)
#pre_load
def get_longitude_for_zipCode_and_TimeCreated(self, data):
""" This method will pass valids long,lat and time_created
values to each job created during a POST request"""
# Getting zip from the request to obtain lat&lon from DB
result = modelZipCode.getZipCodeDetails(data['zipCode'])
print(result)
if result is None:
raise ValidationError(INVALID_ZIP_CODE_2)
schema = ZipCodeSchema(exclude=('id'))
zip, errors = schema.dump(result)
if errors:
raise ValidationError(INVALID_ZIP_CODE_3)
else:
data['longitude'] = zip['longitude']
data['latitude'] = zip['latitude']
data['time_created'] = str(datetime.datetime.utcnow())
title = fields.Str(required=True, validate=[validate.Length(min=4, max=80)])
city = fields.Str(required=True, validate=[validate.Length(min=5, max=100)])
state = fields.Str(required=True, validate=validate_state)
zipCode = fields.Str(required=True, validate=validate_zipCode)
description = fields.Str(required=False, validate=[validate.Length(max=80)])
narrative = fields.Str(required=False, validate=[validate.Length(max=250)])
companyLogo = fields.Str(required=False, validate=[validate.Length(max=250)])
companyName = fields.Str(required=True, validate=[validate.Length(min=5, max=250)])
companyURL = fields.Str(required=True, validate=[validate.Length(min=4, max=100)])
latitude = fields.Str(required=True)
longitude = fields.Str(required=True)
time_created = fields.DateTime()
# We add a post_dump hook to add an envelope to responses
#post_dump(pass_many=True)
def wrap(self, data, many):
#import pdb; pdb.set_trace()
if len(data) >= 1:
counter = data[0]['RowCount']
return {
data,
counter
}
class Meta:
model = modelJob
The most weird thing is that indeed my query is correctly returning the rowcount
Could some one please help me in finding out why I can not capture the rowcount key in the post_dump method ?
This need to be managed by the Marshmallow pre_dump or post_dump method.But indeed I decided to use an SQLAlchemy pagination methods as it gave me the total rows in the response.

Loop through columns - SQLAlchemy Core

I am trying to loop through the columns of all the tables in my database to select empty columns. I finally used raw sql and .format to get it to work, but how do I use SQLAlchemy to achieve the same result? Here is the code I've written:
from sqlalchemy import MetaData, create_engine, select
from sqlalchemy.sql import func
engine = create_engine('...')
conn = engine.connect()
tablemeta = MetaData(bind=engine, reflect=True)
for t in tablemeta.sorted_tables:
for col in t.c:
s = select([func.count(t.c[str(col)].distinct())])
s = s.scalar()
if s <= 1:
print(s)
But this results in a KeyError.
OK I got it to work:
for t in tablemeta.sorted_tables:
for col in t.c:
s = select([func.count(t.c[col.name].distinct())])
s = s.scalar()
if s <= 1:
print(s)

How to read and insert bytea columns using psycopg2?

I am working on a Python script to replicate some Postgresql tables from one environment to another (which does a little more than pg_dump). It works except when I am copying a table that has bytea data type.
I read the source table data in memory, then I dump the memory in the target database with concatenated inserts.
Here is my method that produces an insert statement:
def generateInsert(self, argCachedRow):
colOrd = 0;
valClauseList = []
hasBinary = False
for colData in argCachedRow:
colOrd += 1
colName = self.colOrdLookup.get(colOrd)
col = self.colLookup.get(colName)
dataType = col.dataType
insVal = None
if colData is not None:
strVal = str(colData)
if dataType.useQuote:
if "'" in strVal:
strVal = strVal.replace("'", "''")
insVal = "'%s'" % strVal
else:
if dataType.binary:
hasBinary = True
#insVal = psycopg2.Binary(colData)
#else:
insVal = strVal
else:
insVal = "NULL"
valClauseList.append(insVal)
valClause = ", ".join(valClauseList)
if hasBinary:
valClause = psycopg2.Binary(valClause)
result = "INSERT INTO %s VALUES (%s)" % (self.name, valClause)
return result
which works with every table that doesn't have binary data.
I also tried (intuitively) to wrap just the binary column data in psycopg2.Binary, which is the commented out line and then not do it to the whole row value list but that didn't work either.
Here is my simple DataType wrapper, which is loaded by reading Postgres' information_schema tables:
class DataType(object):
def __init__(self, argDispName, argSqlName, argUseQuote, argBin):
self.dispName = argDispName
self.sqlName = argSqlName
self.useQuote = argUseQuote
self.binary = argBin
How do I read and insert bytea columns using psycopg2?
If you have this database structure:
CREATE TABLE test (a bytea,
b int,
c text)
then inserting binary data into the request can be done like so, without any wrappers:
bin_data = b'bytes object'
db = psycopg2.connect(*args) # DB-API 2.0
c = db.cursor()
c.execute('''INSERT INTO test VALUES (%s, %s, %s)''', (bin_data, 1337, 'foo'))
c.execute('''UPDATE test SET a = %s''', (bin_data + b'1',))
Then, when you query it:
c.execute('''SELECT a FROM test''')
You'll receive a memoryview, which is easily converted back to bytes:
mview = c.fetchone()
new_bin_data = bytes(mview)
print(new_bin_data)
Output: b'bytes object1'
Also, I'd suggest you not to assemble queries by string formatting. psycopg2's built-in parameter substitution is much more convenient and you don't have to worry about validating data to protect from SQL injections.

Resources