how to translate sqlalchemy query correctly - python-3.x

all hi!
have a query
query = insert(balance)
query = query.on_conflict_do_nothing(
index_elements=["query_id", "warehouse", "product"]
)
I use sqlalchemy compile for translation in raw sql
sql_str = (
query.compile(
dialect=postgresql.dialect(),
)
)
I get output
INSERT INTO balance (id, query_id, warehouse, product) VALUES (%(id)s, %(query_id)s, %(warehouse)s, %(product)s) ON CONFLICT (query_id, warehouse, product) DO NOTHING
how to get the?
INSERT INTO balance (id, query_id, warehouse, product) VALUES ($1, $2, $3, $4) ON CONFLICT (query_id, warehouse, product) DO NOTHING
full code:
import asyncio
import logging
import sys
from contextlib import asynccontextmanager
from typing import AsyncGenerator
import async_timeout
import asyncpgsa
import stackprinter
from faker import Faker
from sqlalchemy.dialects import postgresql
logging.basicConfig(level=logging.DEBUG)
import sqlalchemy
from sqlalchemy import Boolean, Column, Table, MetaData, NUMERIC, func
from sqlalchemy.dialects.postgresql import UUID, TIMESTAMP, insert
from sqlalchemy.types import Integer
metadata = MetaData()
stock_balance: sqlalchemy.table = Table(
"stock_balance",
metadata,
Column("warehouse", UUID, primary_key=True),
Column("product", UUID, primary_key=True),
Column("balance", NUMERIC(15, 3), index=True),
Column("reserve", NUMERIC(15, 3), index=True),
)
balance: sqlalchemy.table = Table(
"balance",
metadata,
Column("id", Integer, primary_key=True),
Column("query_id", UUID(as_uuid=True), nullable=False, index=True),
Column("warehouse", UUID, index=True, nullable=False),
Column("product", UUID, index=True, nullable=False),
Column("balance", Boolean, index=True, server_default="f"),
Column("reserve", Boolean, index=True, server_default="f"),
Column("count", NUMERIC(15, 3), nullable=False),
Column("date_time", TIMESTAMP, server_default=func.now(tz="UTC"), index=True),
Column("updated", Boolean, server_default="f", index=True),
)
def data_traffic():
fake = Faker("ru_RU")
balance = fake.boolean()
return {
"id": fake.uuid4(),
"warehouse": fake.uuid4(),
"product": fake.uuid4(),
"balance": balance,
"count": fake.pyfloat(left_digits=15, right_digits=3, min_value=0),
"reserve": not balance,
}
#asynccontextmanager
async def connect_db() -> AsyncGenerator:
try:
with async_timeout.timeout(5):
conn = await asyncpgsa.create_pool(
f"postgresql://postgres:some_secret#"
f"localhost:10001/stockbalance_test",
# echo=True,
min_size=1,
max_size=1,
dialect=postgresql.dialect()
)
async with conn.acquire() as c:
yield c
await conn.close()
except Exception as exc:
logging.error(
"Server Errors: {}\n{}\n{}\n{}".format(
exc, sys.exc_info()[0], sys.exc_info()[1], stackprinter.format()
)
)
yield None
finally:
await conn.close()
async def update_balance(conn, data: list):
query = insert(balance)
query = query.on_conflict_do_nothing(
index_elements=["query_id", "warehouse", "product"]
)
sql_str = (
query.compile(
dialect=postgresql.dialect(),
)
)
await conn.executemany(
str(sql_str),
[
(
item["id"],
item["warehouse"],
item["product"],
item["balance"],
item["reserve"],
item["count"],
)
for item in data
],
)
async def main():
async with connect_db() as conn:
try:
await update_balance(conn, [data_traffic()])
except Exception as exc:
print(exc)
if __name__ == "__main__":
asyncio.run(main())
syntax error at or near "%"

found a solution. Maybe someone will help. it looks like this
query = insert(balance).values(
[
{
"query_id": item["id"],
"warehouse": item["warehouse"],
"product": item["product"],
}
for item in data
]
)
query = query.on_conflict_do_nothing(
index_elements=["query_id", "warehouse", "product"]
)
sql_str, args = compile_query(query)
print("sql_str: ", sql_str)
print("args: ", args)
await conn.executemany(sql_str, [tuple(args)])
sql_str: INSERT INTO balance (query_id, warehouse, product) VALUES ($2, $1, $3) ON CONFLICT (query_id, warehouse, product) DO NOTHING
args: ['d982e84e-0f09-43dd-a296-aaf51dd6c36d', '4373a8df-0fd8-4f2f-855c-4a7b36a7ad9e', '831ce109-ff0f-4d0b-8c53-d148bb699bdf']

Related

How to Create Partition table at runtime over Apache Beam using Python

I am trying to Create a new partition Bigquery table on runtime with following code, but i am not getting option to pass column names "_time" over which partition need to be done on my new BQ table.
Can any please please help me on it.
My Code
#------------Import Lib-----------------------#
import apache_beam as beam
from apache_beam.options.pipeline_options import PipelineOptions, StandardOptions
import os, sys
import argparse
import logging
from apache_beam.options.pipeline_options import SetupOptions
from datetime import datetime
#------------Set up BQ parameters-----------------------#
# Replace with Project Id
project = 'xxxx'
#plitting Of Records----------------------#
class Transaction_DB_UC2(beam.DoFn):
def process(self, element):
logging.info(element)
result = json.loads(element)
data_time = result.get('_time', 'null')
data_dest = result.get('dest', 'null')
data_DBID = result.get('DBID', 'null')
data_SESSIONID = result.get('SESSIONID', 'null')
data_USERHOST = result.get('USERHOST', 'null')
data_raw = result.get('_raw', 'null')
data_ACTION = result.get('ACTION', 'null')
data_host = result.get('host', 'null')
data_result = result.get('result', 'null')
data_DBUSER = result.get('DBUSER', 'null')
data_OS_USERNAME = result.get('OS_USERNAME', 'null')
data_ACTION_NAME = result.get('ACTION', 'null').replace('100','LOGON').replace('101','LOGOFF')
return [{"_time": data_time[:-8], "dest": data_dest, "DBID": data_DBID, "SESSIONID": data_SESSIONID, "_raw": data_raw, "USERHOST": data_USERHOST, "ACTION": data_ACTION, "host": data_host, "result": data_result, "DBUSER": data_DBUSER, "OS_USERNAME": data_OS_USERNAME, "ACTION_NAME": data_ACTION_NAME}]
def run(argv=None, save_main_session=True):
parser = argparse.ArgumentParser()
parser.add_argument(
'--input',
dest='input',
help='Input file to process.')
parser.add_argument(
'--pro_id',
dest='pro_id',
type=str,
default='ORACLE_SEC_DEFAULT',
help='project id')
known_args, pipeline_args = parser.parse_known_args(argv)
pipeline_options = PipelineOptions(pipeline_args)
pipeline_options.view_as(SetupOptions).save_main_session = save_main_session
p1 = beam.Pipeline(options=pipeline_options)
#data_f = sys.argv[1]
logging.info('***********')
logging.info(known_args.input)
data_loading = (
p1
|'Read from File' >> beam.io.ReadFromText(known_args.input,skip_header_lines=0)
)
project_id = "xxxxx"
dataset_id = 'test123'
table_schema_DB_UC2 = ('_time:DATETIME, dest:STRING, DBID:STRING, SESSIONID:STRING, _raw:STRING, USERHOST:STRING, ACTION:STRING, host:STRING, result:STRING, DBUSER:STRING, OS_USERNAME:STRING, ACTION_NAME:STRING')
# Persist to BigQuery
# WriteToBigQuery accepts the data as list of JSON objects
#---------------------Index = DB-UC2----------------------------------------------------------------------------------------------------------------------
result = (
data_loading
| 'Clean-DB-UC2' >> beam.ParDo(Transaction_DB_UC2())
| 'Write-DB-UC2' >> beam.io.WriteToBigQuery(
table=known_args.pro_id,
dataset=dataset_id,
project=project_id,
schema=table_schema_DB_UC2,
create_disposition=beam.io.BigQueryDisposition.CREATE_IF_NEEDED,
write_disposition=beam.io.BigQueryDisposition.WRITE_APPEND))
result = p1.run()
result.wait_until_finish()
if __name__ == '__main__':
#logging.getLogger().setLevel(logging.INFO)
path_service_account = 'ml-fbf8cabcder.json'
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = path_service_account
run()
I want to create partition on filed "_time", please suggest how it can be achieved.
Thanks.
I believe that you can do that with additional_bq_parameters (Note the limitations) with the timePartitioning parameter.
When creating a new BigQuery table, there are a number of extra parameters
that one may need to specify. For example, clustering, partitioning, data
encoding, etc. It is possible to provide these additional parameters by
passing a Python dictionary as additional_bq_parameters (Reference).
In your case, you could add to your WriteToBigQuery transform the timePartitioning parameter with the required type and optional field fields (Note that field must be a top-level TIMESTAMP or DATE field):
additional_bq_parameters={'timePartitioning': {
'type': 'DAY',
'field': '_time'
}}
I didn't have the time to try it out yet. I'll try to reproduce tomorrow.
Let me know if it works for you.
EDIT
Finally got the chance to try the timePartitioning parameter to create a partitioned table and it worked.
Here is a simple pipeline code to test it.
#!/usr/bin/env python
import apache_beam as beam
PROJECT='YOUR_PROJECT'
BUCKET='YOUR_BUCKET'
def run():
argv = [
'--project={0}'.format(PROJECT),
'--job_name=YOUR_JOB_NAME',
'--save_main_session',
'--staging_location=gs://{0}/staging/'.format(BUCKET),
'--temp_location=gs://{0}/staging/'.format(BUCKET),
'--region=us-central1',
'--runner=DataflowRunner'
]
p = beam.Pipeline(argv=argv)
table_schema = {'fields': [
{'name': 'country', 'type': 'STRING', 'mode': 'NULLABLE'},
{'name': '_time', 'type': 'DATETIME', 'mode': 'NULLABLE'},
{'name': 'query', 'type': 'STRING', 'mode': 'NULLABLE'}]}
additional_bq_parameters = {
'timePartitioning': {'type': 'DAY', 'field': '_time'}}
elements = (p | beam.Create([
{'country': 'mexico', '_time': '2020-06-10 22:19:26', 'query': 'acapulco'},
{'country': 'canada', '_time': '2020-12-11 15:42:32', 'query': 'influenza'},
]))
elements | beam.io.WriteToBigQuery(
table='YOUR_DATASET.YOUR_NEW_TABLE',
schema=table_schema,
additional_bq_parameters=additional_bq_parameters,
create_disposition=beam.io.BigQueryDisposition.CREATE_IF_NEEDED,
write_disposition=beam.io.BigQueryDisposition.WRITE_TRUNCATE
)
p.run()
if __name__ == '__main__':
run()

How to produce the following json using flask-sqlachemy?

I am creating a API endpoints using flask-sqlalchemy and marshmallow in python. For example I have two collections/tables one is items and other is stores. Items will have attributes like item_id, item_name, item_price and available stores_list. Store will have attributes like store_id, store_name, store_location and available items_list. I require the following JSON response when i request list of items.
[
{
item_id:1,
item_name:"Laptop",
item_price:"20",
store_list:[
{
store_id:1,
store_name:"ABC",
store_location:"USA"
},
{
store_id:2,
store_name:"BBC",
store_location:"USA"
},
{
store_id:3,
store_name:"CBC",
store_location:"USA"
}
]
},
{
item_id:2,
item_name:"Laptop",
item_price:"20",
store_list:[
{
store_id:1,
store_name:"ABC",
store_location:"USA"
},
{
store_id:2,
store_name:"BBC",
store_location:"USA"
},
{
store_id:3,
store_name:"CBC",
store_location:"USA"
}
]
}
......... and so on
]
I require the following JSON response when i request list of stores.
[
{
store_id:1,
store_name:"ABC",
store_location:"USA",
items_list:[
{
items_id:1,
items_name:"Laptop",
items_price:"65"
},
{
items_id:2,
items_name:"Keyboard",
items_price:"56"
},
{
items_id:3,
items_name:"Mouse",
items_price:"56"
}
]
},
{
store_id:2,
store_name:"BBC",
store_location:"UK",
items_list:[
{
items_id:1,
items_name:"Laptop",
items_price:"23"
},
{
items_id:2,
items_name:"BBC",
items_price:"Speaker"
},
{
items_id:3,
items_name:"Mouse",
items_price:"24"
}
]
}
......... and so on
]
So far I have tried the following
#ITEMS MODEL
from requests import Response
from flask import request, url_for
from datetime import datetime
from typing import List
from db import db
from models.store import Stores
#Bartiny Ingredients Generic Types Model
class Items(db.Model):
__tablename__ = "items"
item_id = db.Column(db.Integer, primary_key=True)
item_name = db.Column(db.String(100), nullable=False,)
item_price = db.Column(db.String(10), nullable=False,)
store_lsit = db.relationship('Stores', backref=db.backref('items'))
#classmethod
def find_by_name(cls, name: str) -> "Items":
return cls.query.filter_by(gen_type_name=name).first()
#classmethod
def find_by_id(cls, _id: int) -> "Items":
return cls.query.filter_by(id=_id).first()
#classmethod
def find_all(cls) -> List["Items"]:
return cls.query.all()
def save_to_db(self) -> None:
db.session.add(self)
db.session.commit()
def delete_from_db(self) -> None:
db.session.delete(self)
db.session.commit()
STORE MODEL
from requests import Response
from flask import request, url_for
from datetime import datetime
from typing import List
from db import db
from models.items import Items
#Bartiny Ingredients Generic Types Model
class Stores(db.Model):
__tablename__ = "stores"
store_id = db.Column(db.Integer, primary_key=True)
store_name = db.Column(db.String(100), nullable=False,)
store_locations = db.Column(db.String(10), nullable=False,)
items_list = db.relationship('Items', backref=db.backref('stores'))
#classmethod
def find_by_name(cls, name: str) -> "Stores":
return cls.query.filter_by(gen_type_name=name).first()
#classmethod
def find_by_id(cls, _id: int) -> "Stores":
return cls.query.filter_by(id=_id).first()
#classmethod
def find_all(cls) -> List["Stores"]:
return cls.query.all()
def save_to_db(self) -> None:
db.session.add(self)
db.session.commit()
def delete_from_db(self) -> None:
db.session.delete(self)
db.session.commit()
SCHEMAS
# Items Schema
from ma import ma
from marshmallow import pre_dump
from models.item import Items
class ItemsSchema(ma.ModelSchema):
class Meta:
model = Items
# Store Schema
from ma import ma
from marshmallow import pre_dump
from models.store import Stores
class StoresSchema(ma.ModelSchema):
class Meta:
model = Stores
Resources
# Store Resource
from flask_restful import Resource
from models.store import Stores
from schemas.store import StoresSchema
store_list_schema = StoreSchema(many=True)
class StoreList(Resource):
#classmethod
def get(cls):
return {"stores": store_list_schema.dump(Stores.find_all())}, 200
# Items Resource
from flask_restful import Resource
from models.item import Items
from schemas.item import ItemsSchema
item_list_schema = ItemsSchema(many=True)
class StoreList(Resource):
#classmethod
def get(cls):
return {"items": item_list_schema.dump(Items.find_all())}, 200
The following is the code for app starting
from flask import Flask, jsonify
from flask_restful import Api
from marshmallow import ValidationError
from db import db
from ma import ma
from resources.item import Item, ItemList
from resources.store import Store, StoreList
app = Flask(__name__)
app.config["SQLALCHEMY_DATABASE_URI"] = "sqlite:///data.db"
app.config["SQLALCHEMY_TRACK_MODIFICATIONS"] = False
api = Api(app)
#app.before_first_request
def create_tables():
db.create_all()
#app.errorhandler(ValidationError)
def handle_marshmallow_validation(err):
return jsonify(err.messages), 400
jwt = JWTManager(app)
api.add_resource(StoreList, "/stores")
api.add_resource(ItemList, "/items")
if __name__ == "__main__":
db.init_app(app)
ma.init_app(app)
app.run(port=5000, debug=True)
Looks like jsonify is your friend...
https://www.google.com/search?q=flask+sqlalchemy+to+json

DRF use field-value instead of default (pk) in POST request

Trying send valid POST request.
It's works when i send like this:
{
"name":"but",
"sklad":1
}
Response is something like this, all is valid:
{
"name": "but",
"getsklad": "fence"
}
But when I'm trying send this, with 'fence':
{
"name":"but",
"sklad":"fence"
}
Got error:
Invalid type. The primary key value was expected, str was received.
Have this piece of code in models.py:
class Sklad(models.Model):
storages = (
('fence', 'Забор'),
('sett', 'Тротуарка'),
)
storage = models.CharField(
max_length=10,
choices=storages,
help_text='Name of storage',
default='sett'
)
class Zabor(models.Model):
sklad = models.ForeignKey(Sklad, on_delete=models.CASCADE)
name = models.CharField(max_length=10)
...
This in serializers.py:
class ZaborPostSerializer(serializers.ModelSerializer):
class Meta:
model = Zabor
fields = (
...
'name',
'sklad',
)
And Views.py part:
class ZaborView(APIView):
def post(self, request):
serializer = ZaborPostSerializer(data=request.data)
if serializer.is_valid():
serializer.save()
return Response({'status': 'Success'})
else:
return Response({'status': 'Failed'})
I know what when i write '1' in request, this means 'pk'. But how I can send name rather than the id/pk?
I am beginner. So don't kick me hard for stupid questions)
Since you are providing the Sklad FOREIGN_KEY, which is unique you can filter by that value only with one type: integer, string, etc.
You can set the storage field of Sklad to the primary key:
class Sklad(models.Model):
storages = (
('fence', 'Забор'),
('sett', 'Тротуарка'),
)
storage = models.CharField(
max_length=10,
choices=storages,
help_text='Name of storage',
default='sett',
primary_key=True
)
Now you can directly filter with your string value.
Another option is to filter in the view:
from django.shortcuts import get_object_or_404
from django.http import Http404
class ZaborView(APIView):
def post(self, request):
dataDict = dict(request.data)
try:
sklad = get_object_or_404(Sklad.objects, storage=dataDict['sklad'][0])
serializer = ZaborPostSerializer(data=request.data)
if serializer.is_valid():
serializer.save()
return Response({'status': 'Success'})
else:
return Response({'status': 'Failed'})
except Http404:
return Response({'status': 'Failed'})
Note that it will only work if you have only one Sklad with the given storage field.
If you have multiple ones you can pick the first one, since you do not supply a primary key and you have not other fields to filter:
from django.shortcuts import get_list_or_404
.....
sklad = list(get_list_or_404(Sklad.objects, storage=dataDict['sklad'][0]))[0]
.....
Hope it helps !

error: groovy.lang.MissingPropertyException No such property: sql for class: Script9

I need to compare two values in data format
import groovy.sql.*
com.eviware.soapui.support.GroovyUtils.registerJdbcDriver( "oracle.jdbc.driver.OracleDriver" )
def messageId1 = context.expand( '${#Project#Id1}' )
def messageId2 = context.expand( '${#Project#Id2}' )
def first = sql.firstRow("select timestamp from table where Messageid = '"+messageId1+"'")
def second = sql.firstRow("select timestamp from table where Messageid = '"+messageId2+"'")
assert first < second, 'OK'
I get following error:
error: groovy.lang.MissingPropertyException No such property: sql for class: Script9
I had the same issue and the mistake that I made is, I didn't create a SQL Instance...So this would help you
def DBurl = '<Your database URL>'
def DBuser = '<Database user name>'
def DBpassword = '<Database Password>'
def DBdriver = '< Your database driver>'
def sql = Sql.newInstance(DBurl, DBuser, DBpassword, DBdriver)

Flask Dynamo connection issue(AttributeError: 'Dynamo' object has no attribute 'tables')

from flask import Flask
from flask_dynamo import Dynamo
import os
os.environ['AWS_ACCESS_KEY_ID'] = ''
os.environ['AWS_SECRET_ACCESS_KEY'] = ''
os.environ['AWS_REGION'] = 'ap-south-1'
app = Flask(__name__)
app.config['DYNAMO_TABLES'] = [
{
'TableName': 'users',
'KeySchema': [dict(AttributeName='username', KeyType='HASH')],
'AttributeDefinitions': [dict(AttributeName='username', AttributeType='S')],
'ProvisionedThroughput': dict(ReadCapacityUnits=5, WriteCapacityUnits=5)
}, {
'TableName': 'groups',
'KeySchema': [dict(AttributeName='name', KeyType='HASH')],
'AttributeDefinitions': [dict(AttributeName='name', AttributeType='S')],
'ProvisionedThroughput': dict(ReadCapacityUnits=5, WriteCapacityUnits=5)
}
]
app.config['DYNAMO_ENABLE_LOCAL'] = True
app.config['DYNAMO_LOCAL_HOST'] = 'localhost'
app.config['DYNAMO_LOCAL_PORT'] = 9000
dynamo = Dynamo()
The table configuration for flask-dynamo has been defined and dynamo instance created,when i try to get the create all tables get error of builtins.AttributeError AttributeError: 'Dynamo' object has no attribute 'tables'
#app.route('/', methods=['GET'])
def hello_world():
with app.app_context():
dynamo.create_all()
return 'table created!'
environment
python3
flask-dynamo
Thanks in advance
This line:
dynamo = Dynamo()
should be:
dynamo = Dynamo(app)
That way, the dynamo instance can access the tables and other configurations you defined on the app object.

Resources