Initialize Model Class Variable At Runtime - python-3.x

I am trying to import student data from an Excel workbook. I have to select column_name of the class StudentMasterResource dynamically which is present in the file. I got all column name in constants module has one dictionary which name column_name. When I do it for the first time, it works, then it fails
constants.py
column_name = dict()
resource.py
from common_account import constants
from import_export import widgets, fields, resources
def getClassName(key):
if key in constants.column_name:
return constants.column_name[key]
return key
class StudentMasterResource(resources.ModelResource):
organisation_id = fields.Field(
column_name=getClassName('organisation_id'),
attribute='organisation_id',
widget=widgets.ForeignKeyWidget(OrganisationMaster, 'organisation_name'),
saves_null_values=True
)
name = fields.Field(
column_name=getClassName('Name'),
attribute='name',
saves_null_values=True,
widget=widgets.CharWidget()
)
date_of_birth = fields.Field(
column_name=getClassName('date'),
attribute='date_of_birth',
saves_null_values=True,
widget=widgets.DateWidget()
)
views.py
from common_account import constants
from tablib import Dataset
#api_view(['POST'])
#permission_classes([IsAuthenticated])
def student_import(request):
if request.method == 'POST':
context_data = dict()
data_set = Dataset()
file = request.FILES['myfile']
extension = file.name.split(".")[-1].lower()
column_data = request.data
is_import = column_name['is_import']
constants.valid_data.clear()
constants.invalid_data.clear()
if extension == 'csv':
data = data_set.load(file.read().decode('utf-8'), format=extension)
else:
data = data_set.load(file.read(), format=extension)
constants.column_name = {
'date' : column_data.get('birth'),
'name' : column_data.get('name'),
}
if is_import == 'No':
result = student_resource.import_data(data_set, organisation_id = request.user.organisation_id,
offering_id = offering_id,all_invalid_data = False, dry_run=True, raise_errors=True)
context_data['valid_data'] = constants.valid_data
context_data['invalid_data'] = constants.invalid_data
context_data[constants.RESPONSE_RESULT] = {"Total records":student_resource.total_cnt,
"skip records":len(constants.invalid_data),
"Records imported": len(constants.valid_data),
}
return JsonResponse(context_data)
elif is_import == 'Yes':
result = student_resource.import_data(data_set, organisation_id = request.user.organisation_id,
offering_id = offering_id,all_invalid_data = False, dry_run=False, raise_errors=False)
context_data[constants.RESPONSE_ERROR] = False
context_data[constants.RESPONSE_MESSAGE] = 'Data Imported !!!'
context_data[constants.RESPONSE_RESULT] = {"Total records":student_resource.total_cnt,
"skip records":len(constants.invalid_data),
"Records imported": len(constants.valid_data),
}
return JsonResponse(context_data)

Related

Dataframe becoming empty after calling a method in DataWriter class that deletes records from delta table

How can I prevent the dataframe data from becoming empty after calling the delete_processed_data() method in my DataWriter class that also has a register_processed_data() method which inserts data into a delta table?
I'm not overwriting the dataframe and it's only being used as a condition to check if it has any data with a count.
Here's my complete code (databricks notebook):
from datetime import *
import pandas as pd
from dn.utils import table
import pyspark.sql.functions as F
from delta.tables import *
from pyspark.sql.types import *
import json
import pytz
import calendar
list_countries = (
table.get_silver_table(table_name='stpos_dim_itemticket')
.select('pais')
.distinct()
)
list_countries = [row.pais for row in list_countries.collect()]
# Include "Todos" option
list_countries.insert(0, 'Todos')
dbutils.widgets.removeAll()
dbutils.widgets.text(name='category', defaultValue='Todos', label='Categoria')
dbutils.widgets.text(name='today', defaultValue=str(date.today()), label='Fecha proceso')
dbutils.widgets.dropdown(name="country", defaultValue='Todos', choices=list_countries, label="Pais")
dbutils.widgets.dropdown(name='forced_load', defaultValue='no', choices=['si', 'no'], label='Forzar carga')
dbutils.widgets.dropdown(name="reprocessing", defaultValue='si', choices=['si', 'no'], label="Reproceso")
country = dbutils.widgets.get('country').strip()
category = dbutils.widgets.get("category").strip()
today = datetime.strptime(dbutils.widgets.get('today').strip(), '%Y-%m-%d')
wave_date = today.replace(day=1)
forced_load = dbutils.widgets.get('forced_load').strip()
reprocessing = dbutils.widgets.get('reprocessing').lower().strip()
print(f"Categoria: {category}")
print(f"Fecha proceso: {today.strftime('%Y-%m-%d')}")
print(f"Pais: {country}")
print(f"Forzar carga: {forced_load}")
print(f'Reproceso: {reprocessing}')
print(f"Fecha ola: {wave_date.strftime('%Y-%m-%d')}")
class DataExtractor():
def __init__(self, category, today, country, list_countries, wave_date, reprocessing, forced_load):
self.category = category
self.today = today
self.country = country
self.list_countries = list_countries
self.wave_date = wave_date
self.reprocessing = reprocessing
self.forced_load = forced_load
if self.reprocessing == 'no' or self.forced_load == 'si':
self.days_for_wave = self.get_days_for_wave()
if self.country.lower() == 'todos':
self.country_condition = "lower(pais) = lower(pais)"
else:
self.country_condition = f"lower(pais) = lower('{country}')"
if self.category.lower() == 'todos':
self.category_condition = "lower(categoria) = lower(categoria)"
else:
self.category_condition = f"lower(categoria) = lower('{category}')"
def get_days_for_wave_by_country(self, country, path_file):
days_for_wave = (
spark.read.format("com.crealytics.spark.excel")
.option("header", "true")
.option("treatEmptyValuesAsNulls", "true")
.option("inferSchema", "true")
.load(path_file)
.where(f"fecha_ola = '{self.wave_date}'")
.where(f"lower(pais) = lower('{country}')")
.selectExpr(
"fecha_ola",
"to_date(fecha) as fecha_transaccion",
"pais")
)
if days_for_wave.count() == 0:
# Año y mes deseado
year = self.wave_date.year
month = self.wave_date.month
# Obtener el número de días en el mes especificado
_, num_days = calendar.monthrange(year, month)
# Crear una lista con todos los días del mes
days = [(date(year, month, day),) for day in range(1, num_days+1)]
# Convertir cada fecha a una cadena de texto
days_str = [(day[0].strftime("%Y-%m-%d"),) for day in days]
# Convert list to dataframe
days_for_wave = (
spark.createDataFrame(days_str)
.withColumnRenamed("_1", "fecha_transaccion")
.withColumn("fecha_ola", F.lit(self.wave_date))
.withColumn("pais", F.lit(country))
.selectExpr(
"fecha_ola",
"to_date(fecha_transaccion) AS fecha_transaccion",
"pais")
)
print(f"Loaded {days_for_wave.count()} days for wave {self.wave_date.strftime('%Y-%m-%d')} and country {country}")
return days_for_wave
def get_days_for_wave(self):
"""
Get the days for the wave
"""
# Load dim_dia_ola.xlsx with wave definition
path_file = "dbfs:/mnt/storetrack/transitraw/dichterneira/storelive/dim_dia_ola.xlsx"
print(f'Loading days for wave from file: {path_file}...')
if self.country.lower() == 'todos':
# Get list of countries (excluding 'Todos')
list_of_countries = self.list_countries[1:]
else:
list_of_countries = [self.country]
schema = StructType([
StructField("fecha_ola", TimestampType(), nullable=True),
StructField("fecha_transaccion", DateType(), nullable=True),
StructField("pais", StringType(), nullable=True)
])
# Crear un DataFrame vacío con el esquema especificado
days_for_wave = spark.createDataFrame([], schema=schema)
for country in list_of_countries:
days_for_wave_by_country = self.get_days_for_wave_by_country(country, path_file)
max_day_of_wave = days_for_wave_by_country.agg(F.max("fecha_transaccion")).collect()[0][0]
if self.today.date() > max_day_of_wave and self.forced_load == 'no':
print(f"Today {self.today.strftime('%Y-%m-%d')} is not included in wave days for country {country} and wave {self.wave_date.strftime('%Y-%m-%d')}")
else:
if country == list_of_countries[0]:
days_for_wave = days_for_wave_by_country
else:
days_for_wave = days_for_wave.union(days_for_wave_by_country)
return days_for_wave
def get_data_items(self):
"""
Filter sales by category, wave and country
"""
if self.reprocessing == 'si' and self.forced_load == 'no':
sales_filtered = (
table.get_silver_table(table_name='sl_fact_item_ticket')
.where(f"fecha_ola = '{self.wave_date}'")
.where(self.country_condition)
.where(self.category_condition)
)
else:
sales_filtered = (
table.get_silver_table(table_name='stpos_dim_itemticket')
.drop("fecha_ola")
.where(self.country_condition)
.where(self.category_condition)
.selectExpr("*", "to_date(date) as fecha_transaccion")
.join(self.days_for_wave, ["fecha_transaccion", "pais"], how="inner")
.drop("fecha_transaccion")
)
print(f"{sales_filtered.count()} items loaded. [Get data items]")
return sales_filtered
def get_product_catalog(self):
product_catalog = (
table.get_bronze_table(table_name='brz_catalogo_productos', module_name='catalogo')
.where(self.country_condition)
.selectExpr(
"upc as barcode",
"pais",
"categoria",
"marca",
"submarca",
"fabricante",
"""CASE WHEN lower(split(contenido, ' ')[1]) = 'ml' THEN 'L'
WHEN lower(split(contenido, ' ')[1]) = 'gr' THEN 'Kg'
WHEN lower(split(contenido, ' ')[1]) = 'und' THEN 'Und'
END AS unidad_std""",
"conversion AS contenido_std",
"split(contenido, ' ')[0] AS contenido",
"split(contenido, ' ')[1] AS unidad_medida",
"idref AS id_ref"
)
)
return product_catalog
class DataEnricher():
def __init__(self, reprocessing, forced_load):
self.reprocessing = reprocessing
self.forced_load = forced_load
def rename_fields(self, df_item):
if self.reprocessing == 'no' or self.forced_load == 'si':
print("Renaming fields...")
df_item = (
df_item
.selectExpr(
'CAST(fecha_ola AS DATE) AS fecha_ola',
'pdv AS nombre_pdv',
'marca',
'submarca',
'pais',
'contenido',
'unidad_medida',
'CAST(cantidad AS DOUBLE) as cantidad',
'CAST(precio_local AS DOUBLE) as precio_local',
'barcode',
'date AS fecha_transaccion',
'categoria',
'categoria_name',
'descripcion',
'id_ref',
'posdb_id',
'id_ticket',
'id_item',
'id_pdv',
'venta_usd',
'venta_local',
'precio_usd',
'id_canasto'
)
)
return df_item
def calculate_standard_fields(self, df_item):
if self.reprocessing == 'no' or self.forced_load == 'si':
print("Caculating standard fields...")
df_item = (
df_item
# Add column with converted Ml to L and Gr to Kg
.withColumn("contenido_std",
F.when(F.col("unidad_medida") == "Ml", F.col("contenido") / 1000)
.when(F.col("unidad_medida") == "Gr", F.col("contenido")/1000)
.otherwise(F.col("contenido")))
.withColumn("unidad_std",
F.when(F.col("unidad_medida") == "Ml", F.lit("L"))
.when(F.col("unidad_medida") == "Gr", F.lit("Kg")))
)
return df_item
def calculate_fields(self, df_items):
"""
Set the time zone of the dataframe
"""
if self.reprocessing == 'no' or self.forced_load == 'si':
print("Calulating time zone field...")
# Create dataframe with the time zone
time_zone = [(1, '05:00:00', '09:59:59'),
(2, '10:00:00', '13:59:59'),
(3, '14:00:00', '19:59:59'),
(4, '20:00:00', '23:59:59'),
(4, '00:00:00', '04:59:59')]
time_zone = spark.createDataFrame(time_zone, ['id_franja', 'inicio', 'fin'])
# Convert inicio and fin to datetime
time_zone = (
time_zone
.withColumn("inicio", F.to_timestamp(F.col("inicio"), "HH:mm:ss"))
.withColumn("fin", F.to_timestamp(F.col("fin"), "HH:mm:ss"))
)
df_items = (
df_items
.withColumn("hora_transaccion", F.substring(F.col("fecha_transaccion"), 12, 8))
.withColumn("hora_transaccion", F.to_timestamp(F.col("hora_transaccion"), "HH:mm:ss"))
.join(time_zone, on=F.col("hora_transaccion").between(F.col("inicio"), F.col("fin")), how="left")
.drop("hora_transaccion", "inicio", "fin")
)
return df_items
def update_product_features(self, data, product_catalog):
if data.count() > 0:
print("Updating fields from brz_catalogo_productos")
data = (
data
.drop("categoria", "marca", "submarca", "fabricante", "unidad_std", "contenido_std", "contenido", "unidad_medida", "id_ref")
.join(product_catalog, on=["barcode", "pais"], how="left")
)
return data
class DataWriter():
def __init__(self, wave_date, country, category):
self.wave_date = wave_date
self.country = country
self.category = category
if self.country.lower() == 'todos':
self.country_condition = "lower(pais) = lower(pais)"
else:
self.country_condition = f"lower(pais) = lower('{country}')"
if self.category.lower() == 'todos':
self.category_condition = "lower(categoria) = lower(categoria)"
else:
self.category_condition = f"lower(categoria) = lower('{category}')"
def delete_processed_data(self, datos):
df_categoria_activa = (
table.get_bronze_table(
table_name='sl_configuracion_procesamiento_zona_silver',
module_name='storetrack'
)
.where(f"fecha_ola = '{wave_date}' and lower(trim(procesar)) = 'si'")
.where(self.country_condition)
.where(self.category_condition)
.selectExpr(
"categoria",
"pais",
"fecha_ola"
)
)
if datos.count() > 0:
display(datos.where("categoria = 'Galletas dulces'"))
table_path = table.get_silver_table_path(table_name="sl_fact_item_ticket")
deltaTableToWrite = DeltaTable.forPath(spark, table_path)
print("Deleting old rows...")
deltaTableToWrite.alias('current')\
.merge(
df_categoria_activa.alias('delete'),
'current.pais = delete.pais AND current.categoria = delete.categoria AND current.fecha_ola = delete.fecha_ola')\
.whenMatchedDelete()\
.execute()
display(datos.where("categoria = 'Galletas dulces'"))
def register_processed_data(self, data):
if data.count() > 0:
print("Inserting new rows...")
display(data.where("categoria = 'Galletas dulces'"))
table_path = table.get_silver_table_path(table_name="sl_fact_item_ticket")
deltaTableToWrite = DeltaTable.forPath(spark, table_path)
deltaTableToWrite.alias('current')\
.merge(
data.alias('new'),
'current.id_item = new.id_item AND current.fecha_ola = new.fecha_ola')\
.whenNotMatchedInsert(values =
{
"fecha_ola": "new.fecha_ola",
"marca": "new.marca",
"submarca": "new.submarca",
"pais": "new.pais",
"contenido": "new.contenido",
"unidad_medida": "new.unidad_medida",
"cantidad": "new.cantidad",
"precio_local": "new.precio_local",
"barcode": "new.barcode",
"fecha_transaccion": "new.fecha_transaccion",
"categoria": "new.categoria",
"categoria_name": "new.categoria_name",
"descripcion": "new.descripcion",
"id_ref": "new.id_ref",
"posdb_id": "new.posdb_id",
"id_ticket": "new.id_ticket",
"id_item": "new.id_item",
"id_pdv": "new.id_pdv",
"venta_usd": "new.venta_usd",
"venta_local": "new.venta_local",
"precio_usd": "new.precio_usd",
"nombre_pdv": "new.nombre_pdv",
"contenido_std": "new.contenido_std",
"unidad_std": "new.unidad_std",
"id_canasto": "new.id_canasto",
"id_franja": "new.id_franja"
}
)\
.execute()
display(data.where("categoria = 'Galletas dulces'"))
print(f"{data.count()} items loaded. [Write processed data]")
else:
print("No data to save in silver.sl_fact_item_ticket")
if __name__ == '__main__':
data_extractor = DataExtractor(category, today, country, list_countries, wave_date, reprocessing, forced_load)
data = data_extractor.get_data_items()
product_catalog = data_extractor.get_product_catalog()
cleaner = DataCleaner(wave_date, country, category, reprocessing, forced_load)
data = cleaner.clean_data(data)
data_enricher = DataEnricher(reprocessing, forced_load)
data = data_enricher.rename_fields(data)
data = data_enricher.calculate_standard_fields(data)
data = data_enricher.calculate_fields(data)
data = data_enricher.update_product_features(data, product_catalog)
data_write = DataWriter(wave_date, country, category)
data_write.delete_processed_data(data)
data_write.register_processed_data(data)
The parameters with which I am running the notebook are:
Categoria: Todos
Fecha proceso: 2022-12-01
Pais: Todos
Forzar carga: no
Reproceso: si
Fecha ola: 2022-12-01
The following output is displayed:
993313 items loaded. [Get data items]
62023 items loaded. [Remove blocked categories]
Updating fields from brz_catalogo_productos
[DISLPLAY ROWS DATAFRAME data]
Deleting old rows...
Query returned no results
No data to save in silver.sl_fact_item_ticket
Any insights on why the dataframe is getting cleared would be greatly appreciated.

how to work with foreign key field in django

Hi Everyone i am working work django framework, where i used to upload excel file in Dailytrip table, current i get car_mumber from car table, but now i need to store car_number from Car_team table also team_id, i am storing car_id and team_id in car_team table also i need to store team_id in dailytrip table automaticlly based on car_id(car_number) i am to much confuse how to i work that, pls help me out
models.py
class Car_team(BaseModel):
team = models.ForeignKey(
Team,
models.CASCADE,
verbose_name='Team',
null=True,
)
car=models.ForeignKey(
Car,
models.CASCADE,
verbose_name='Car',
null=True)
city =models.ForeignKey(
City,
models.CASCADE,
verbose_name='City',
)
start_date=models.DateField(null=True, blank=True)
end_date=models.DateField(null=True, blank=True)
views.py
def add_payout_uber_daily_data(request):
if request.method == 'POST':
form = UberPerformanceDataForm(request.POST, request.FILES, request=request)
if form.is_valid():
date = form.cleaned_data['date']
excel_file = request.FILES['file']
df = pd.read_excel(excel_file)
is_na = pd.isna(df['Date']).sum().sum() + pd.isna(df['Name']).sum().sum() + pd.isna(df['UUID']).sum().sum() + pd.isna(df['Net Fare With Toll']).sum().sum() + pd.isna(df['Trips']).sum().sum() + pd.isna(df['Uber KMs']).sum().sum() + pd.isna(df['CashCollected']).sum().sum() + pd.isna(df['UberToll']).sum().sum() + pd.isna(df['Tips']).sum().sum() + pd.isna(df['Hours Online']).sum().sum() + pd.isna(df['Ratings']).sum().sum() + pd.isna(df['Acceptance Rate']).sum().sum() + pd.isna(df['Cancellation Rate']).sum().sum()
error_list = []
if is_na > 0:
error_list.append('Found #N/A or blank values in the sheet. Please correct and re-upload')
context = {'error_list': error_list, 'menu_payout': 'active','submenu_daily_data': 'active','form': form, }
return render(request, 'add_payout_uber_daily_data.html', context=context)
date_match = True
for d in df['Date']:
if str(d.strftime("%Y-%m-%d")) != str(date):
date_match = False
break
if not date_match:
error_list.append('Some dates are not matching in excel')
if len(error_list) > 0:
context = {'error_list': error_list, 'menu_payout': 'active','submenu_daily_data': 'active','form': form, }
return render(request, 'add_payout_uber_daily_data.html', context=context)
DailyTrip.objects.filter(date=date).update(is_active=0)
for i in df.index:
uuid = df['UUID'][i]
driver_id = None
car_id = None
fleet_id = None
manager_id = None
try:
driver = Driver.objects.get(uber_uuid=uuid)
driver_id = driver.id
except Driver.DoesNotExist:
driver_id = None
#replce car code and store car_number,car_id,team_id via car_team only this logic need to change current get car_number direct car table but we need car_number vai foriegn key
try:
car = Car.objects.get(car_number=df["Car Number"][i])
car_id = car.id
manager_id = car.manager_id
except Car.DoesNotExist:
car_id = None
try:
fleet = Fleet.objects.get(name=df["Fleet Name"][i])
fleet_id = fleet.id
except Fleet.DoesNotExist:
fleet_id = None
name = df['Name'][i]
car_number = df['Car Number'][i]
fare_total = df['Net Fare With Toll'][i]
trips = df['Trips'][i]
pool_trips = 0
hours_online = df['Hours Online'][i]
total_km = df['Uber KMs'][i]
cash_collected = abs(df['CashCollected'][i])
toll = df['UberToll'][i]
tip_amount = df['Tips'][i]
fare_avg = float(fare_total)/int(trips)
fare_per_hour_online = float(fare_total)/float(hours_online)
fare_per_km = fare_total/total_km
trips_per_hour = trips/hours_online
km_per_trip = total_km/trips
rating = df['Ratings'][i]
acceptance_rate_perc = float(df['Acceptance Rate'][i])/100
driver_cancellation_rate = float(df['Cancellation Rate'][i])/100
obj, created = DailyTrip.all_objects.update_or_create(
date=date, uuid=uuid,
defaults={
'car_id': car_id,
'manager_id': manager_id,
'car_number': car_number,
'driver_id': driver_id,
'car_id': car_id,
'fleet_id': fleet_id,
'driver_name': name,
'fare_total': fare_total,
'trips': trips,
'pool_trips': pool_trips,
'hours_online': hours_online,
'total_km': total_km,
'cash_collected': cash_collected,
'toll': toll,
'tip_amount': tip_amount,
'fare_avg': fare_avg,
'fare_per_hour_online':fare_per_hour_online,
'fare_per_km':fare_per_km,
'trips_per_hour': trips_per_hour,
'km_per_trip': km_per_trip,
'rating': rating,
'acceptance_rate_perc': acceptance_rate_perc,
'driver_cancellation_rate': driver_cancellation_rate,
'is_active': 1,
'comments': None}
)
if len(error_list) > 0:
DailyTrip.objects.filter(date=date).update(is_active=0)
context = {'error_list': error_list, 'menu_payout': 'active','submenu_daily_data': 'active','form': form, }
return render(request, 'add_payout_uber_daily_data.html', context=context)
else:
messages.success(request, 'Daily Trips added Successfully...')
return redirect('/fleet/payout/daily_data/add/uber')
else:
form = UberPerformanceDataForm(initial={})
context = {
'menu_payout': 'active',
'submenu_daily_data': 'active',
'form': form,
}
return render(request, 'add_payout_uber_daily_data.html', context=context)
You can try that :
to get car_number from car_team -->
car_team = car_team.objects.objects.all().last() # to get the last car_team for example
car_number = car_team.car.car_number # to get the car number from the car_team
try:
car = Car.objects.get(car_number=df["Car Number"][i])
car_id = car.id
car1=Car_team.objects.filter(car_id=car_id)
if car1:
team_id=car1[0].team_id
else:
team_id=None
except Car.DoesNotExist:
car_id = None
team_id= None

I've been looking for the way to return multiple SQL results with Json format in Python3

I've been looking for the way to output multiple SQL result with Json format in Python3. My API gets request parameters(luid) and passes them to BigQuery in SQL and return the result to client with Json format. So far , I succeeded in building the api that can handle only one parameter but if you put multiple parameters , this api could return only first parameter's result. I'm guessing this problem is occurred by using return jsonify({request_luid: str(row[0]) }) in outer loop. I have no idea how should I change my code and logic.Could anyone tell me?? I'm welcome to any idea .
output of my code
{
"XXXXXXX5e30ab17f6b536879d25555": "True"⬅︎My SQL seems work correctly
}
ideal otput
{
"XXXXXXX5e30ab17f6b536879d25555": "True",
"XXXXXXX8r30ab17f6b536879d25555": "False",
"XXXXXXX9t30ab17f6b536879d25555": "True",
}
Endpoint
https://test-project-galvanic-ripsaw-281806.df.r.appspot.com?luid=XXXXXXX5e30ab17f6b536879d25555&luid=XXXXXXX8r30ab17f6b536879d25555
&luid=XXXXXXX9t30ab17f6b536879d25555
main.py
#app.route('/')
def get_request():
request_luids = request.args.getlist('luid') or ''
for i in range(len(request_luids)):
request_luid = request_luids[i]
client = bigquery.Client()
query = """SELECT EXISTS(
SELECT 1
FROM `test-project-281806.hitobito_test.test3` as p
WHERE p.luid = '{}'
AND p.cv_date IS NOT NULL limit 1000)""".format(request_luid)
job_config = bigquery.QueryJobConfig(
query_parameters=[
bigquery.ScalarQueryParameter("request_luid", "STRING", request_luid)
]
)
query_job = client.query(query)
query_res = query_job.result()
for row in query_res:
return jsonify({request_luid: str(row[0]) })
if __name__ == "__main__":
app.run()
I think the problem is here:
for row in query_res:
return jsonify({request_luid: str(row[0]) })
you can use this piece of code:
#app.route('/')
def get_request():
request_luids = request.args.getlist('luid') or ''
result = {} # define an empty dictionary for final result
for i in range(len(request_luids)):
request_luid = request_luids[i]
client = bigquery.Client()
query = """SELECT EXISTS(
SELECT 1
FROM `test-project-281806.hitobito_test.test3` as p
WHERE p.luid = '{}'
AND p.cv_date IS NOT NULL limit 1000)""".format(request_luid)
job_config = bigquery.QueryJobConfig(
query_parameters=[
bigquery.ScalarQueryParameter("request_luid", "STRING", request_luid)
]
)
query_job = client.query(query)
query_res = query_job.result()
# you don't need other rows of result because you return the first element, so break the for loop
for row in query_res:
temp_result = jsonify({request_luid: str(row[0]) })
break
result.update(temp_result) # add temp_result to the final result
# then return the final result
return result
if __name__ == "__main__":
app.run()
My final code is below . Thank you so much for good advice ! Maryam Abdoli
from flask import Flask, request, jsonify
from google.cloud import bigquery
import json
app = Flask(__name__)
#app.route('/')
def get_request():
request_luids = request.args.getlist('luid') or ''
result = {}
for i in range(len(request_luids)):
request_luid = str(request_luids[i])
client = bigquery.Client()
query = """SELECT EXISTS(
SELECT 1
FROM `test-project-281806.hitobito_test.test3` as p
WHERE p.luid = '{}'
AND p.cv_date IS NOT NULL)""".format(request_luid)
job_config = bigquery.QueryJobConfig(
query_parameters=[
bigquery.ScalarQueryParameter("request_luid", "STRING", request_luid)
]
)
query_job = client.query(query)
query_res = query_job.result()
for row in query_res:
temp_result = {request_luid: str(row[0])}
break
result.update(temp_result)
return json.dumps(result)
if __name__ == "__main__":
app.run()

Use dictionary instead of list of dictionary to reduce program complexity

Trying to validate the consistency between DynamoDB tables, Used list of dictionary to store dynamodb table items, Which is taking longer time for execution.
New to python, any help to convert List of dictionary to dictionary please, To reduce my program complexity .
#!/usr/bin/python
import sys
import boto3
import argparse
import argparse
def table_consistency_check(table, column_name):
paginator = dynamoClient.get_paginator('scan')
modified_accounts = []
params = {
'TableName': table
}
page_iterator = paginator.paginate(**params)
for page in page_iterator:
for item in page['Items']:
account = item['account_name']['S']
license_key = item[column_name]['S']
credentials = {
'account_name': account,
column_name: license_key
}
modified_accounts.append(credentials)
return modified_accounts
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Find all accounts with different license key and account key hash')
parser.add_argument('-r', '--region', nargs='?', type=str, default='us-west-2')
try:
args = parser.parse_args()
except:
exit_code = int(str(sys.exc_info()[1]))
accounts_table = 'accounts_table'
Credentail_table = 'credential_table'
dynamoClient = boto3.client('dynamodb', region_name=args.region)
account1 = table_consistency_check(accounts_table, 'license_key')
account2 = table_consistency_check(Credentail_table, 'access_key_hash')
output = []
for acct_item in account1:
for creds_item in account2:
if acct_item['account_name'] == creds_item['account_name']:
if creds_item['access_key_hash'].startswith('ORIGINAL_KEY_'):
val = creds_item['access_key_hash']
length = len('ORIGINAL_KEY_')
str = val[length:]
if acct_item['license_key'] != str:
output.append(creds_item['account_name'])
print('Duplicate record found')
print('Account Name : ' + acct_item['account_name'] + ', License Key : ' + acct_item[
'license_key'] + ', Access Key Hash : ' + creds_item['access_key_hash'])
if not output:
print('the tables are consistent, No duplicate item found')

Dictionary with functions versus dictionary with class

I'm creating a game where i have the data imported from a database, but i have a little problem...
Currently i get a copy of the data as a dictionary, which i need to pass as argument to my GUI, however i also need to process some data, like in this example:
I get the data as a dict (I've created the UseDatabase context manager and is working):
def get_user(name: str, passwd: str):
user = {}
user['name'] = name
user['passwd'] = passwd
with UseDatabase() as cursor:
_SQL = "SELECT id, cash, ruby FROM user WHERE name='Admin' AND password='adminpass'"
cursor.execute(_SQL)
res = cursor.fetchall()
if res:
user['id'] = res[0][0]
user['cash'] = res[0][1]
user['ruby'] = res[0][2]
return user
return res
.
.
.
def get_activities():
with UseDatabase() as cursor:
_SQL = "SELECT * FROM activities WHERE user_id='2'"
cursor.execute(_SQL)
res = cursor.fetchall()
if res:
ids = [i[0] for i in res]
activities = {}
for i in res:
activities[i[0]] = {'title':i[1],'unlock':i[2],'usr_progress':i[3]}
return (ids, activities)
return res
Need it as a dict in my GUI ("content" argument):
class SideBar:
def __init__(self, screen: 'pygame.display.set_mode()', box_width: int, box_height: int, content: dict, font: 'font = pygame.font.Font()'):
#content dict: {id: {'title':'','unlock':'','usr_progress':''},...}
self.box_width = box_width
self.box_height = box_height
self.box_per_screen = screen.get_height() // box_height
self.content = content
self.current_box = 1
self.screen = screen
self.font = font
self.generate_bar()
def generate_bar (self):
active = [i for i in self.content.keys() if i in range(self.current_box, self.current_box+self.box_per_screen)]
for i in range(self.box_per_screen):
gfxdraw.box(self.screen,pygame.Rect((0,i*self.box_height),(self.screen.get_width()/3,self.screen.get_height()/3)),(249,0,0,170))
self.screen.blit(self.font.render(str(active[i]) + ' - ' + self.content[active[i]]['title'], True, (255,255,255)),(10,i*self.box_height+4))
for i in range(self.box_per_screen):
pygame.draw.rect(self.screen,(50,0,0),pygame.Rect((0,i*self.box_height),(self.screen.get_width()/3,self.screen.get_height()/3)),2)
But still need to make some changes in the data:
def unlock_act(act_id):
if user['cash'] >= activities[act_id]['unlock'] and activities[act_id]['usr_progress'] == 0:
user['cash'] -= activities[act_id]['unlock']
activities[act_id]['usr_progress'] = 1
So the question is: in this situation should i keep a copy of the data as dict, and create a class with it plus the methods i need or use functions to edit the data inside the dict?

Resources