Best way of using Django queryset where JSONField != {} - python-3.x

class JobAnalysis(Base, XYZ):
env_vars = JSONField(
default=dict
)
job = models.ForeignKey(
Job, related_name='jobanalyses'
)
seller = models.ForeignKey(
ABC,
null=True
)
class Usage(Base):
job = models.ForeignKey(
Job, null=True, blank=True
)
I want all usages where env_vars has some key pair.
usages_qs = Usage.objects.filter(
job__jobanalyses__seller__isnull=True
).exclude(
job__jobanalyses__env_vars__exact={}
)
I am using above queryset to fetch all usage information where seller is null and env_vars is not equals {}
usages_qs.query
SELECT "Usage"."job",
FROM "Usage"
LEFT OUTER JOIN "Job" ON ("Usage"."job_id" = "Job"."id")
LEFT OUTER JOIN "JobAnalysis" ON ("Job"."id" = "JobAnalysis"."job_id")
WHERE ("JobAnalysis"."seller_id" IS NULL
AND NOT ("Usage"."job_id" IN
(SELECT U2."job_id"
FROM "JobAnalysis" U2
WHERE U2."env_vars" = '{}')
AND "Usage"."job_id" IS NOT NULL))
But I am seeing performance issue here because .exclude(job__jobanalyses__env_vars__exact={}) create inner query and because of that this select statement is timing out.
Is there any better way of writing Django queryset for getting all usage record where seller is null and env_vars != {}?

Related

sqlalchemy: express relationship which depends on len of collection child in joined entity?

How do I express relationship which depends on len of collection child in joined entity?
In below example, parent entity is AlgoOrder. Child entity is Order. And PrivateTrade is child entity of Order.
AlgoOrder --> Order --> PrivateTrade
The problem I am having is with "orders_pending_private_trade_update".
class AlgoOrder(DbModel):
__tablename__ = "algo_order"
id = sa.Column(sa.Integer, primary_key=True)
... stuff ...
# https://docs.sqlalchemy.org/en/14/orm/loading_relationships.html
open_orders = orm.relation(Order, primaryjoin=and_(Order.algo_order_id == id, Order.status == 'OPEN'), lazy='select')
orders_pending_private_trade_update = orm.relation(Order, primaryjoin=and_(Order.algo_order_id == id, , Order.status == 'CLOSED', len(Order.private_trades)==0), lazy='select')
#property
def pending_orders(self):
return self.open_orders + self.orders_pending_private_trade_update
class Order(DbModel):
__tablename__ = "order_hist"
algo_order_id = sa.Column(sa.Integer, sa.ForeignKey("algo_order.id"))
... stiff ...
private_trades = orm.relation(PrivateTrade, primaryjoin=and_(PrivateTrade.order_id == order_id))
class PrivateTrade(DbModel):
__tablename__ = "private_trade"
id = sa.Column(sa.Integer, primary_key=True)
order_id = sa.Column(sa.String, sa.ForeignKey("order_hist.order_id"))
In particular, the error at "orders_pending_private_trade_update" was with "len" on Order.private_trades:
Exception has occurred: TypeError (note: full exception trace is shown but execution is paused at: _run_module_as_main) object of type 'InstrumentedAttribute' has no len()
So, I tried:
from sqlalchemy.sql.expression import func
orders_pending_private_trade_update = orm.relation(Order, primaryjoin=and_(Order.algo_order_id == id, Order.status == 'CLOSED', func.count(Order.private_trades)==0), lazy='select', viewonly=True)
But then error was "foreign key columns are present in neither the parent nor the child's mapped tables":
Can't determine relationship direction for relationship 'AlgoOrder.orders_pending_private_trade_update' - foreign key columns are present in neither the parent nor the child's mapped tables <class 'sqlalchemy.exc.ArgumentError'> Can't determine relationship direction for relationship 'AlgoOrder.orders_pending_private_trade_update' - foreign key columns are present in neither the parent nor the child's mapped tables
I checked my tables, I do have them:
op.create_table(
'algo_order',
sa.Column('id', sa.Integer(), primary_key=True),
...
op.create_table(
'order_hist',
sa.Column('id', sa.Integer(), primary_key=True),
sa.Column('algo_order_id', sa.Integer, sa.ForeignKey("algo_order.id")),
...
op.create_table(
'private_trade',
sa.Column('id', sa.Integer(), primary_key=True),
sa.Column('order_id', sa.String(), sa.ForeignKey("order_hist.order_id"))
...
Thanks in advance.
I think I found it, but syntax pretty ugly: I used closed_order.session to do a new Query
import sqlalchemy as sa
import sqlalchemy.orm as orm
from sqlalchemy.sql.expression import func
import sqlalchemy.dialects.postgresql as psql
from sqlalchemy.ext.mutable import MutableDict
from sqlalchemy.sql.expression import and_
class AlgoOrder(DbModel):
__tablename__ = "algo_order"
id = sa.Column(sa.Integer, primary_key=True)
... other stuff ...
open_orders = orm.relation(Order, primaryjoin=and_(Order.algo_order_id == id, Order.status == 'OPEN'), lazy='select')
closed_orders = orm.relation(Order, primaryjoin=and_(Order.algo_order_id == id, Order.status == 'CLOSED'), lazy='dynamic', viewonly=True)
#property
def orders_pending_private_trade_update(self):
order_ids_with_no_private_trades = [ order.id for order in list(self.closed_orders.session.query(Order.id, func.count(PrivateTrade.id).label('count_private_trades')).join(PrivateTrade, isouter=True).group_by(Order.id).having(func.count(PrivateTrade.id) == 0).all())]
orders_with_no_private_trades = self.closed_orders.session.query(Order).filter(Order.id.in_(order_ids_with_no_private_trades)).order_by(Order.id.desc()).limit(1000).all()
return orders_with_no_private_trades
#property
def pending_orders(self):
return list(self.open_orders) + list(self.orders_pending_private_trade_update)
I also don't like "first(100)" as an attempt to limit number of rows fetched. And how/when you dispose of the list to prevent memory leak? I think above approach is bad. Should use generator instead of returning list.
Essentially raw sql what I am looking for is a generator which returns below:
select
order_id,
cnt
from (
select
order_hist.id,
order_hist.order_id,
count(private_trade.id) cnt
from order_hist
left join private_trade on private_trade.order_id = order_hist.order_id
where order_hist.status in ('CLOSED', 'CANCELLED')
group by order_hist.id, order_hist.order_id
) src
where
cnt=0
Any better way to do this? I think my solution shows the sqlalchemy syntax but it's computationally inefficient.
Here's solution using generator instead to avoid MemoryError:
def order_hist_missing_private_trade_get(engine):
order_hist_missing_private_trade_sql = '''
select
order_id,
cnt
from (
select
order_hist.id,
order_hist.order_id,
count(private_trade.id) cnt
from order_hist
left join private_trade on private_trade.order_id = order_hist.order_id
where order_hist.status in ('CLOSED', 'CANCELLED')
group by order_hist.id, order_hist.order_id
) src
where
cnt=0
'''
with engine.connect() as conn:
# https://stackoverflow.com/questions/7389759/memory-efficient-built-in-sqlalchemy-iterator-generator
conn.execution_options(stream_results=True)
rs = conn.execute(order_hist_missing_private_trade_sql)
while True:
batch = rs.fetchmany(10000)
for row in batch:
order_id = row['order_id']
yield order_id
Usage:
from sqlalchemy import create_engine
connstr : str = "postgresql://postgres:your_secret#localhost/postgres"
engine = create_engine(connstr)
generator = order_hist_missing_private_trade_get(engine)
while True:
order_id = next(generator)
print(f"order_id: {order_id}")

Left Join in Django

I have the following models:
class Patient(models.Model):
patient_first_name = models.CharField(max_length=50)
patient_last_name = models.CharField(max_length=50)
patient_name = models.CharField(max_length=100)
patient_email = models.EmailField(max_length=100)
gender = models.CharField(max_length=50)
class PatientMedicalRecord(models.Model):
patient = models.ForeignKey(Patient)
mrn = models.CharField(max_length=50, unique=True)
patient_height = models.IntegerField(blank=True, null=True)
patient_weight = models.IntegerField(blank=True, null=True)
age_risk = models.BooleanField(default=False)
I want to query on patient table for getting all the patient. also i need MRN column value from PatientMedicalRecord table which contain record for particular patient if exists.
How can i do this with djnago ORM?
Following are sql query gives me perfect result.
SELECT a.id,--remaining field, b.mrn FROM patient as a LEFT JOIN patient_medical_record as b ON a.id=b.patient_id;
How can i do this with django annotate ?
You can fetch related objects using the object_set. In your example, here is how you would do it:
patient = Patient.objects.get(pk=1) # You can use any attribute to get the Patient object
patient_medical_records = patient.patientmedicalrecord_set.all()
patient_mrns = []
for record in patient_medical_records:
patient_mrns.append(record.mrn)
You can also defined a related_name property in your model for the relationship to query relationships with. For example:
class PatientMedicalRecord(models.Model):
patient = models.ForeignKey(Patient, on_delete=models.CASCADE, related_name='patient_records')
Then you would query it like this:
patient = Patient.objects.get(pk=1)
patient_medical_records = patient.patient_records.all()

Django Subquery many values

class Category(models.Model):
name = models.CharField(max_length=100)
date = models.DateTimeField(auto_now=True)
class Hero(models.Model):
name = models.CharField(max_length=100)
category = models.ForeignKey(Category, on_delete=models.CASCADE)
I want Categoty model name, data, id
In cookbook , I wrote the code as above.
hero_qs = Hero.objects.filter(
category=OuterRef("pk")
).order_by("-benevolence_factor")
Category.objects.all().annotate(
most_benevolent_hero=Subquery(
hero_qs.values('name')[:1]
)
)
It seems that only one value can be entered in hero_qs.values('name')
Is it possible to get name, data, id with one annotate?
You can try Concatenating the fields if you really want to use a single annotation
from django.db.models import Subquery, OuterRef, CharField, Value as V
from django.db.models.functions import Concat
hero_qs = Hero.objects.filter(
category=OuterRef("pk")
).order_by("-benevolence_factor").annotate(
details=Concat('name', V(','), 'id', output_field=CharField())
)
Category.objects.all().annotate(
most_benevolent_hero=Subquery(
hero_qs.values('details')[:1]
)
)
Then you can use string interpolation to separate that data out which is a relatively inexpensive operation
name, id = category.most_benevolent_hero.split(',')

query multiple SQLalchemy ORM

I'm new in SQLalchemy I need to calculate multiple of some price in my one of the table. This is my tables:
class Order(DeclarativeBase):
__tablename__ = 'order'
id = Field(Integer, primary_key=True)
products = relationship("OrderProduct", back_populates="order", lazy='dynamic')
and
class OrderProduct(DeclarativeBase):
__tablename__ = 'order_products'
id = Field(Integer, primary_key=True)
order_id = Column(Integer, ForeignKey('order.id'), nullable=False)
order = relationship("Order", back_populates="products", protected=True)
product_id = Column(Integer, ForeignKey('product.id'), nullable=False)
product = relationship("Product", back_populates="order_product")
quantity = Field(Integer, nullable=False)
and
class Product(DeclarativeBase):
__tablename__ = 'product'
id = Field(Integer, primary_key=True)
price = Field(Integer)
order_product = relationship("OrderProduct", back_populates="product", protected=True)
I want to multiple price with this situation OrderProduct.quantity * Product.price and products in Order table is an array of Products
I write SQL query like this and it works:
SELECT SUM(price*quantity) FROM product
JOIN order_products ON product.id = order_products.product_id
JOIN order ON order_products.order_id = order.id;
I tried to make it in ORM like this but it takes me Product and I can calculate only price without multiple in quantity:
result = 0
for product in Product.query\
.join(OrderProduct).filter(Product.id == OrderProduct.product_id)\
.join(Order).filter(OrderProduct.order_id == self.id):
result = product.price + result
return result
I make this as #hybrid_property and its work well.
I use a framework that name is restfulpy. it has ORM for sqlalchemy, session in this framework is scoped_session, but it gives me SQL query in debuging mode instead of executing the query like this :
sum_price = {Query}SELECT sum(product.price * order_products.quantity) AS sum_1
FROM product JOIN order_products ON product.id = order_products.product_id JOIN "order" ON "order".id = order_products.order_id
WHERE product.id = order_products.product_id AND order_products.order_id = :order_id_1
Well, Can anyone help me out this problem?
with regards
I just realized that SQLalchemy is a great ORM! When you have a relation between two, three or ... tables, SQLalchemy make a join between them and you just query it!
I made it hard for me and my friends :) SQLalchemy is powerful than what I'm thinking!
This is the right answer:
#hybrid_property
def total_price(self):
return DBSession.query(
func.sum(Product.price * OrderProduct.quantity))\
.filter(OrderProduct.product_id == Product.id) \
.filter(OrderProduct.order_id == self.id).scalar()
DBSession is the same with session
from sqlalchemy import func
query = session.query(func.sum(Product.price*OrderProduct.quantity))
.join(OrderProduct).filter(Product.id == OrderProduct.product_id)
.join(Order).filter(OrderProduct.order_id == self.id)

Trying to join two tables in sqlalchemy orm query, getting an error

Trying to join the tables below using this command:
Subscription.query.filter( return Subscription.query.filter(Subscription.watch_id == id).join(User).filter_by(watch_id=id)
I get this error:
sqlalchemy.exc.InvalidRequestError: Could not find a FROM clause to join from. Tried joining to <class 'app.user.model.User'>, but got: Can't find any foreign key relationships between 'wm_subscription' and 'user'.
Essentially my end goal is to get a query that gets a List of Users that share a watch_id. Not sure if the models or the query is correct. Anybody know what's wrong?
Database = declarative_base(cls=DbBase)
class Subscription(Database):
__tablename__ = 'wm_subscription'
subscription_id = UniqueIdPk()
watch_id = UniqueIdRefNotNull(index=True)
user_id = UniqueIdRefNotNull(ForeignKey('User.user_id'), index=True)
subscription_watch = relationship('Watch',
primaryjoin='Subscription.watch_id == Watch.watch_id',
foreign_keys='Watch.watch_id',
uselist=True)
subscription_user = relationship('User',
primaryjoin='Subscription.watch_id == User.user_id',
foreign_keys='User.user_id',
uselist=True,
backref='user')
class User(Database, UserMixin):
__tablename__ = 'user'
user_id = UniqueIdPk()
# Google sub ID - unique to user https://developers.google.com/identity/protocols/OpenIDConnect
google_id = Column(String(length=50))
# override email mixin for unique index
email = Email(unique=True)
first_name = Name()
last_name = Name()
def get_id(self):
return self.user_id
This is the correct query:
Subscription.query.filter(Subscription.watch_id == id).join(User)

Resources