How to Automate SQL code using Python in efficient way

How to Automate SQL code using Python in efficient way - python-3.x

I have a sql code which looks like this:
Database: RedShift
WITH
X as
(
SELECT distinct pn , pg , ic, sr , cm, fq , m1 , m2 , m3 , m4
FROM table1 ORDER BY 1,2,3
),
table2 AS
(
Select g,p,t , avg(ss) as ss , avg(ce) as ce , sum(av) as ps
from
(
select distinct ic AS g , pn AS p , cm AS t , ss , cast((sum_m1/nullif(sum_m2,0)) as decimal(3,2)) as
ce , av
from
(
select *
, cast((sum(m3) over (partition by ic, pn,cm)) as decimal) as ss
, sum(m1) over (partition by ic, pn,cm) as sum_m1
, sum(m2) over (partition by ic, pn,cm) as sum_m2
, cast((avg(m2) over (partition by ic, pn,cm)) as decimal) as av
from X
ORDER BY 1,2,3
)
order by 1,2,3
)
where ss is not null
group by 1,2,3
order by 1,2,3
)
The group by value g,p,t changes every time and so it creates table for every new combination of g,p,t values.
One way to automate is dump this sql code in Python which might be inefficient:
Here is my approach-> I replace all values in list by {} braces
Example say:
I store all possible group by values in a list.
G=[g1,g2,g3]
P=[p1,p2,p3]
T=[t1,t2,t3]
Connection to databse:
c= psycopg2.connect(database=db,host=host,port=port,user=user,password=password,sslmode='require')
data2={}
for g in G:
for p in P:
for t in T:
sqlstr=( """ WITH
X as
(
SELECT distinct pn , pg , ic, sr , cm, fq , m1 , m2 , m3 , m4
FROM table1 ORDER BY 1,2,3
),
table2 AS
Select {},{},{} , avg(ss) as ss , avg(ce) as ce , sum(av) as ps
from
(
select distinct ic AS g , pn AS p , cm AS t , ss , cast((sum_m1/nullif(sum_m2,0)) as decimal(3,2)) as
ce , av
from
(
select *
, cast((sum(m3) over (partition by ic, pn,cm)) as decimal) as ss
, sum(m1) over (partition by ic, pn,cm) as sum_m1
, sum(m2) over (partition by ic, pn,cm) as sum_m2
, cast((avg(m2) over (partition by ic, pn,cm)) as decimal) as av
from X
ORDER BY 1,2,3
)
order by 1,2,3
)
where ss is not null
group by 1,2,3
order by 1,2,3
), select * from table2 """).format(g,p,t)
data2[g+"_"+p+"_"+t] = pd.read_sql_query(sqlstr, c)
Is there any better way to pass parameters as in above code sequence of {} should be maintained to pass parameters in order ??
Can we use some other approach other than SQL? In pythonic way?

Related

Databricks Error in SQL statement: ParseException: mismatched input 'Service_Date

I am running this script in Azure Databricks using spark SQL , getting this error:
Error in SQL statement: ParseException:
mismatched input 'Service_Date' expecting {'(', 'DESC', 'DESCRIBE', 'FROM', 'MAP', 'REDUCE', 'SELECT', 'TABLE', 'VALUES', 'WITH'}(line 16, pos 0)
I am new to Databricks so wondering any tips on troubleshooting this.
CREATE OR REPLACE VIEW operations_staging.v_claims AS (
/
WITH Snapshot_Date AS
(
SELECT T1.claim_number,
T1.source_system,
MAX(T1.snapshot_date) snapshot_date
FROM bhc_claim.medical_claim T1
GROUP BY T1.claim_number,
T1.source_system
),
/
Service_Date AS
(
SELECT T1.claim_number,
T1.source_system,
MIN(T1.service_from_date) claim_service_date
FROM bhc_claim.medical_claim_detail T1
GROUP BY T1.claim_number,
T1.source_system
),
Pend_Step1 AS
(
SELECT T1.claim_num Claim_Number,
T1.tax_id,
T1.provider provider_name,
TO_DATE(T1.incurred,"MM/dd/yyyy") Service_Date,
TO_DATE(T1.received,"MM/dd/yyyy") Received_Date,
TO_DATE(T1.report_date,"MM/dd/yyyy") Report_Date,
T1.pending_amount Pend_Amount,
T1.pend_code Pend_Code,
T1.pend_code_description Pend_Code_Desc,
T1.hold_reason_code Hold_Code,
T1.hold_code_description Hold_Code_Desc
FROM loomis_2021.pu T1 -- 277,011
GROUP BY T1.claim_num,
T1.tax_id,
T1.provider,
T1.incurred,
T1.received,
T1.report_date
T1.pending_amount,
T1.pend_code,
T1.pend_code_description,
T1.hold_reason_code,
T1.hold_code_description
),
Pend_Step2 AS
(
SELECT Claim_Number,
concat_ws(",", collect_set(DISTINCT T1.Hold_Code)) Hold_Code,
concat_ws(",", collect_set(DISTINCT T1.Pend_Code)) Pend_Code
FROM Pend_Step1 T1
GROUP BY Claim_Number
),
Pend_Step3 AS
(
SELECT T1.Claim_Number,
T1.tax_id,
T1.provider_name,
T1.Service_Date,
T1.Received_Date,
T1.Report_Date,
T2.Hold_Code,
T2.Pend_Code,
T1.Pend_Amount
FROM Pend_Step1 T1
LEFT JOIN Pend_Step2 T2
ON T1.Claim_Number = T2.Claim_Number
GROUP BY T1.Claim_Number,
T1.tax_id,
T1.provider_name,
T1.Service_Date,
T1.Received_Date,
T1.Report_Date,
T2.Hold_Code,
T2.Pend_Code,
T1.Pend_Amount
),
Pend_Step4 AS
(
SELECT T1.Claim_Number,
T1.tax_id,
T1.provider_name,
T1.Service_Date,
T1.Received_Date,
T1.Hold_Code,
T1.Pend_Code,
SUM(T1.Pend_Amount) Pend_Amount
FROM Pend_Step3 T1 -- 277,011
GROUP BY T1.Claim_Number,
T1.tax_id,
T1.provider_name,
T1.Service_Date,
T1.Received_Date,
T1.Hold_Code,
T1.Pend_Code
),
Paid_Previous_Step1 AS
(
SELECT MAX(claim_received_date) Max_Received_Date
FROM bhc_claim.medical_claim
WHERE DAYOFWEEK(claim_received_date) = 1
AND claim_received_date < NOW()
),
Paid_Previous_Step2 AS
(
SELECT T1.`claim_#` Claim_Number,
T1.tax_id Tax_ID,
CASE WHEN T1.provider_group_name IS NOT NULL THEN T1.provider_group_name
ELSE CONCAT(T1.provider_first_name,T1.provider_last_name)
END provider_name,
TO_DATE(T1.last_refresh_date,"yyyyMMdd") Refresh_Date,
TO_DATE(T1.received_date,"yyyyMMdd") Received_Date,
TO_DATE(T1.processed_date,"yyyyMMdd") Processed_Date,
MIN(TO_DATE(T1.from_dos,"yyyyMMdd")) Service_Date,
'Issued' Status,
SUM(T1.covered) Paid_Amount,
SUM(T1.billed) Billed_Amount
FROM Loomis_2021.paid_previous T1
CROSS JOIN Paid_Previous_Step1 T2
WHERE TO_DATE(T1.last_refresh_date,"yyyyMMdd") >= T2.Max_Received_Date
GROUP BY T1.`claim_#`,
T1.tax_id,
CASE WHEN T1.provider_group_name IS NOT NULL THEN T1.provider_group_name
ELSE CONCAT(T1.provider_first_name,T1.provider_last_name)
END,
T1.last_refresh_date,
T1.received_date,
T1.processed_date
),
Paid_Previous_Step3 AS
(
SELECT T1.Claim_Number,
MAX(T1.Refresh_Date) Refresh_Date
FROM Paid_Previous_Step2 T1
GROUP BY T1.Claim_Number
),
Paid_Previous_Step4 AS
(
SELECT T1.Claim_Number,
T1.Tax_ID,
T1.provider_name,
T1.Refresh_Date,
T1.Received_Date,
T1.Processed_Date,
T1.Service_Date,
T1.Status,
T1.Paid_Amount,
T1.Billed_Amount
FROM Paid_Previous_Step2 T1 -- 60,746
INNER JOIN Paid_Previous_Step3 T2 -- 60,746
ON T1.Claim_Number = T2.Claim_Number --3,816,359
AND T1.Refresh_Date = T2.Refresh_Date
),
Claim_Detail_Step1 AS
(
SELECT T1.claim_number,
T1.source_system,
MAX(T1.snapshot_date) snapshot_date
FROM bhc_claim.medical_claim_detail T1 -- 277,011
GROUP BY T1.claim_number,
T1.source_system
),
Revenue_Code_Step1 AS
(
SELECT T1.claim_number,
T1.source_system,
T1.snapshot_date,
concat_ws(",", collect_set(DISTINCT T2.revenue_code)) Revenue_Code
FROM Claim_Detail_Step1 T1
INNER JOIN bhc_claim.medical_claim_detail T2
ON T1.claim_number = T2.claim_number
AND T1.source_system = T2.source_system
AND T1.snapshot_date = T2.snapshot_date -- 277,011
GROUP BY T1.claim_number,
T1.source_system,
T1.snapshot_date
),
Provider_Detail_Step1 AS
(
SELECT T1.claim_number,
T1.source_system,
T2.provider_npi,
T2.provider_tin,
T2.provider_type,
CASE WHEN provider_group_name IS NOT NULL THEN provider_group_name
ELSE CONCAT(T2.provider_first_name,T2.provider_last_name)
END provider_name,
T2.sequence_number
FROM Claim_Detail_Step1 T1
INNER JOIN bhc_claim.medical_claim_detail_provider T2
ON T1.claim_number = T2.claim_number
AND T1.source_system = T2.source_system
AND T1.snapshot_date = T2.snapshot_date
WHERE T2.provider_type = 'BILLING'
GROUP BY T1.claim_number,
T1.source_system,
T2.provider_npi,
T2.provider_tin,
T2.provider_type,
CASE WHEN provider_group_name IS NOT NULL THEN provider_group_name
ELSE CONCAT(T2.provider_first_name,T2.provider_last_name)
END,
T2.sequence_number
),
Market_Detail_Step1 AS
(
SELECT REPLACE(T1.hios_plan_id_standard_component_variant, '-', '') Hios_ID,
MAX(T1.plan_year) plan_year
FROM bdp.plans T1
GROUP BY 1
),
Market_Detail_Step2 AS
(
SELECT T1.Hios_ID,
T1.plan_year,
TRIM(T2.market) market
FROM Market_Detail_Step1 T1
INNER JOIN bdp.plans T2
ON T1.Hios_ID = REPLACE(T2.hios_plan_id_standard_component_variant, '-', '')
AND T1.plan_year = T2.plan_year
),
Market_Detail_Step3 AS
(
SELECT T1.member_id,
TO_DATE(T1.source_start_date) source_start_date,
TO_DATE(T1.source_end_date) source_end_date,
T2.market
FROM dev.enrollment__base_enrollment T1 -- 3,568,717 | 3,568,717
LEFT JOIN Market_Detail_Step2 T2
ON T2.Hios_ID = T1.plan_id
WHERE T1.source_start_date <> T1.source_end_date
AND T1.effectuation_date_utc IS NOT NULL
GROUP BY T1.member_id,
TO_DATE(T1.source_start_date),
TO_DATE(T1.source_end_date),
T2.market
),
Remark_Code_Step1 AS
(
SELECT T1.claim_number,
T1.source_system,
concat_ws(",", collect_set(DISTINCT T1.remark_code)) Remark_Code
FROM bhc_claim.medical_claim_detail_remark T1 -- 3,731,653 | 3,731,653
INNER JOIN bhc_claim.medical_claim_detail T2
ON T1.claim_number = T2.claim_number
AND T1.source_system = T2.source_system
AND T1.snapshot_date = T2.snapshot_date
GROUP BY T1.claim_number,
T1.source_system
),
Integration_Step1 AS
(
SELECT CASE WHEN T1.claim_number IS NOT NULL THEN T1.claim_number
WHEN T7.Claim_Number IS NOT NULL THEN T7.Claim_Number
WHEN T10.Claim_Number IS NOT NULL THEN T10.Claim_Number
END Claim_Number,
TO_DATE(CASE WHEN T1.claim_received_date IS NOT NULL THEN T1.claim_received_date
WHEN T7.Received_Date IS NOT NULL THEN T7.Received_Date
WHEN T10.Received_Date IS NOT NULL THEN T10.Received_Date
END) Received_Date,
TO_DATE(CASE WHEN T1.claim_processed_date IS NOT NULL THEN T1.claim_processed_date
WHEN T10.Processed_Date IS NOT NULL THEN T10.Processed_Date
END) Processed_Date,
TO_DATE(CASE WHEN T3.claim_service_date IS NOT NULL THEN T3.claim_service_date
WHEN T7.Service_Date IS NOT NULL THEN T7.Service_Date
WHEN T10.Service_Date IS NOT NULL THEN T10.Service_Date
END) Service_Date,
TO_DATE(T1.check_date) Check_Date,
CASE WHEN T7.Claim_Number IS NOT NULL THEN 'Loomis'
WHEN T10.Claim_Number IS NOT NULL THEN 'Loomis'
ELSE T1.source_system
END Source_System,
CASE WHEN T1.claim_status_description = 'P' AND T1.total_excluded_amount = T1.total_original_claim_amount THEN 'Denied'
WHEN T10.status = 'Issued' THEN 'Issued'
ELSE T1.claim_status_description
END Status,
T1.payment_status_description Payment_Status,
CASE WHEN T7.Claim_Number IS NOT NULL
THEN 'Pending' END Pend_Status,
CASE WHEN T10.Claim_Number IS NOT NULL
THEN 'Paid Previous' END Paid_Previous_Status,
T1.claim_submission_type_description Submission_Type,
T1.line_of_business Segment,
T7.Hold_Code,
T7.Pend_Code,
T5.Remark_Code,
T9.Revenue_Code,
CASE WHEN T7.Pend_Code IN('17','18','19','44','60','63','86','89','97')
OR T7.Hold_Code LIKE('%13%')
OR T7.Hold_Code LIKE('%70%')
THEN 'Bright'
ELSE 'Loomis'
END Bucket_Owner,
T1.member_id Member_ID,
T1.subscriber_id Subscriber_ID,
T1.subscriber_group_number Subscriber_Group,
T4.provider_npi,
CASE WHEN T4.provider_tin IS NOT NULL THEN T4.provider_tin
WHEN T7.tax_id IS NOT NULL THEN T7.tax_id
WHEN T10.Tax_ID IS NOT NULL THEN T10.Tax_ID
END provider_tin,
CASE WHEN T4.provider_name IS NOT NULL THEN T4.provider_name
WHEN T7.provider_name IS NOT NULL THEN T7.provider_name
WHEN T10.provider_name IS NOT NULL THEN T10.provider_name
END provider_name,
CASE WHEN T10.Billed_Amount IS NOT NULL THEN T10.Billed_Amount
ELSE (T1.total_billed_amount/100)
END Billed_Amount,
(T1.total_processed_amount/100) Processed_Amount,
CASE WHEN T10.Paid_Amount IS NOT NULL THEN T10.Paid_Amount
ELSE (T1.total_paid_amount/100)
END Paid_Amount,
(T1.total_interest_paid_amount/100) Interest_Paid_Amount,
T7.Pend_Amount
FROM bhc_claim.medical_claim T1 -- 3,472,165
INNER JOIN Snapshot_Date T2 -- 3,472,165
ON T1.claim_number = T2.claim_number
AND T1.source_system = T2.source_system
AND T1.snapshot_date = T2.snapshot_date
LEFT JOIN Service_Date T3 -- 3,472,165
ON T1.claim_number = T3.claim_number
AND T1.source_system = T3.source_system
LEFT JOIN Provider_Detail_Step1 T4 -- 3,498,170 MATCH
ON T1.claim_number = T4.claim_number
AND T1.source_system = T4.source_system
LEFT JOIN Remark_Code_Step1 T5
ON T1.claim_number = T5.claim_number
AND T1.source_system = T5.source_system
FULL OUTER JOIN Pend_Step4 T7 -- 3,472,419 | Match Disctinct 3,472,419
ON LEFT(T1.claim_number,10) = T7.Claim_Number
LEFT JOIN Revenue_Code_Step1 T9
ON T1.claim_number = T9.claim_number
AND T1.source_system = T9.source_system
FULL OUTER JOIN Paid_Previous_Step4 T10 --3,816,359 | Match Disctinct 3,472,419
ON LEFT(T1.claim_number,10) = T10.Claim_Number
),
FINAL AS
(
SELECT T1.Claim_Number,
T1.Received_Date,
T1.Processed_Date,
T1.Service_Date,
T1.Report_Date
T1.Check_Date,
T1.Source_System,
T1.Status,
T1.Payment_Status,
T1.Pend_Status,
T1.Paid_Previous_Status,
T1.Submission_Type,
T1.Segment,
MIN(T2.Market) Market,
T1.Hold_Code,
T1.Pend_Code,
T1.Remark_Code,
T1.Revenue_Code,
T1.Bucket_Owner,
T1.Member_ID,
T1.Subscriber_ID,
T1.Subscriber_Group,
T1.provider_npi,
T1.provider_tin,
T1.provider_name,
T1.Billed_Amount,
T1.Processed_Amount,
T1.Paid_Amount,
T1.Interest_Paid_Amount,
T1.Pend_Amount
FROM Integration_Step1 T1
LEFT JOIN Market_Detail_Step3 T2
ON T1.member_id = T2.member_id
AND T1.Service_Date >= T2.source_start_date
AND T1.Service_Date <= T2.source_end_date
GROUP BY T1.Claim_Number,
T1.Received_Date,
T1.Processed_Date,
T1.Service_Date,
T1.Report_Date,
T1.Check_Date,
T1.Source_System,
T1.Status,
T1.Payment_Status,
T1.Pend_Status,
T1.Paid_Previous_Status,
T1.Submission_Type,
T1.Segment,
T1.Hold_Code,
T1.Pend_Code,
T1.Remark_Code,
T1.Revenue_Code,
T1.Bucket_Owner,
T1.Member_ID,
T1.Subscriber_ID,
T1.Subscriber_Group,
T1.provider_npi,
T1.provider_tin,
T1.provider_name,
T1.Billed_Amount,
T1.Processed_Amount,
T1.Paid_Amount,
T1.Interest_Paid_Amount,
T1.Pend_Amount
)
SELECT T1.Claim_Number,
T1.Received_Date,
T1.Processed_Date,
T1.Service_Date,
T1.Report_Date,
T1.Check_Date,
T1.Source_System,
T1.Status,
T1.Payment_Status,
T1.Pend_Status,
T1.Paid_Previous_Status,
T1.Submission_Type,
T1.Segment,
T1.Market,
T1.Hold_Code,
T1.Pend_Code,
T1.Remark_Code,
T1.Revenue_Code,
T1.Bucket_Owner,
T1.Member_ID,
T1.Subscriber_ID,
T1.Subscriber_Group,
T1.provider_npi,
T1.provider_tin,
T1.provider_name,
T1.Billed_Amount,
T1.Processed_Amount,
T1.Paid_Amount,
T1.Interest_Paid_Amount,
T1.Pend_Amount
FROM FINAL T1-- 3,789,713
)
;

I think your intend to comment and use "/" and "/" is not correct and hence the error . Can you please remove them and test the query ?

SQL Server 2017 - Dynamically generate a string based on the number of columns in another string

I have the following table & data:
CREATE TABLE dbo.TableMapping
(
[GenericMappingKey] [nvarchar](256) NULL,
[GenericMappingValue] [nvarchar](256) NULL,
[TargetMappingKey] [nvarchar](256) NULL,
[TargetMappingValue] [nvarchar](256) NULL
)
INSERT INTO dbo.TableMapping
(
[GenericMappingKey]
,[GenericMappingValue]
,[TargetMappingKey]
,[TargetMappingValue]
)
VALUES
(
'Generic'
,'Col1Source|Col1Target;Col2Source|Col2Target;Col3Source|Col3Target;Col4Source|Col4Target;Col5Source|Col5Target;Col6Source|Col6Target'
,'Target'
,'Fruit|Apple;Car|Red;House|Bungalo;Gender|Female;Material|Brick;Solution|IT'
)
I would need to be able to automatically generate my GenericMappingValue string dynamically based on the number of column pairs in the TargetMappingValue column.
Currently, there are 6 column mapping pairs. However, if I only had two mapping column pairs in my TargetMapping such as the following...
'Fruit|Apple;Car|Red'
then I would like for the GenericMappingValue to be automatically generated (updated) such as the following since, as a consequence, I would only have 2 column pairs in my string...
'Col1Source|Col1Target;Col2Source|Col2Target'
I've started building the following query logic:
DECLARE #Mapping nvarchar(256)
SELECT #Mapping = [TargetMappingValue] from TableMapping
print #Mapping
SELECT count(*) ColumnPairCount
FROM String_split(#Mapping, ';')
The above query gives me a correct count of 6 for my column pairs.
How would I be able to continue my logic to achieve my automatically generated mapping string?

I think I understand what you are after. This should get you moving in the right direction.
Since you've tagged 2017 you can use STRING_AGG()
You'll want to split your TargetMappingValue using STRING_SPLIT() with ROW_NUMER() in a sub-query. (NOTE: We aren't guaranteed order using string_split() with ROW_NUMBER here, but will work for this situation. Example below using OPENJSON if we need to insure accurate order.)
Then you can then use that ROW_NUMBER() as the column indicator/number in a CONCAT().
Then bring it all back together using STRING_AGG()
Have a look at this working example:
DECLARE #TableMapping TABLE
(
[GenericMappingKey] [NVARCHAR](256) NULL
, [GenericMappingValue] [NVARCHAR](256) NULL
, [TargetMappingKey] [NVARCHAR](256) NULL
, [TargetMappingValue] [NVARCHAR](256) NULL
);
INSERT INTO #TableMapping (
[GenericMappingKey]
, [GenericMappingValue]
, [TargetMappingKey]
, [TargetMappingValue]
)
VALUES ( 'Generic'
, 'Col1Source|Col1Target;Col2Source|Col2Target;Col3Source|Col3Target;Col4Source|Col4Target;Col5Source|Col5Target;Col6Source|Col6Target'
, 'Target'
, 'Fruit|Apple;Car|Red;House|Bungalo;Gender|Female;Material|Brick;Solution|IT' );
SELECT [col].[GenericMappingKey]
, STRING_AGG(CONCAT('Col', [col].[ColNumber], 'Source|Col', [col].[ColNumber], 'Target'), ';') AS [GeneratedGenericMappingValue]
, [col].[TargetMappingKey]
, [col].[TargetMappingValue]
FROM (
SELECT *
, ROW_NUMBER() OVER ( ORDER BY (
SELECT 1
)
) AS [ColNumber]
FROM #TableMapping
CROSS APPLY STRING_SPLIT([TargetMappingValue], ';')
) AS [col]
GROUP BY [col].[GenericMappingKey]
, [col].[TargetMappingKey]
, [col].[TargetMappingValue];
Here's an example of what an update would look like assuming your primary key is the GenericMappingKey column:
--This what an update would look like
--Assuming your primary key is the [GenericMappingKey] column
UPDATE [upd]
SET [upd].[GenericMappingValue] = [g].[GeneratedGenericMappingValue]
FROM (
SELECT [col].[GenericMappingKey]
, STRING_AGG(CONCAT('Col', [col].[ColNumber], 'Source|Col', [col].[ColNumber], 'Target'), ';') AS [GeneratedGenericMappingValue]
, [col].[TargetMappingKey]
, [col].[TargetMappingValue]
FROM (
SELECT *
, ROW_NUMBER() OVER ( ORDER BY (
SELECT 1
)
) AS [ColNumber]
FROM #TableMapping
CROSS APPLY [STRING_SPLIT]([TargetMappingValue], ';')
) AS [col]
GROUP BY [col].[GenericMappingKey]
, [col].[TargetMappingKey]
, [col].[TargetMappingValue]
) AS [g]
INNER JOIN #TableMapping [upd]
ON [upd].[GenericMappingKey] = [g].[GenericMappingKey];
Shnugo brings up a great point in the comments in that we are not guarantee sort order with string_split() and using row number. In this particular situation it wouldn't matter as the output mappings in generic. But what if you needed to used elements from your "TargetMappingValue" column in the final "GenericMappingValue", then you would need to make sure sort order was accurate.
Here's an example showing how to use OPENJSON() and it's "key" which would guarantee that order using Shnugo example:
SELECT [col].[GenericMappingKey]
, STRING_AGG(CONCAT('Col', [col].[colNumber], 'Source|Col', [col].[colNumber], 'Target'), ';') AS [GeneratedGenericMappingValue]
, [col].[TargetMappingKey]
, [col].[TargetMappingValue]
FROM (
SELECT [tm].*
, [oj].[Key] + 1 AS [colNumber] --Use the key as our order/column number, adding 1 as it is zero based.
, [oj].[Value] -- and if needed we can bring the split value out.
FROM #TableMapping [tm]
CROSS APPLY OPENJSON('["' + REPLACE([tm].[TargetMappingValue], ';', '","') + '"]') [oj] --Basically turn the column value into JSON string.
) AS [col]
GROUP BY [col].[GenericMappingKey]
, [col].[TargetMappingKey]
, [col].[TargetMappingValue];

if the data is already in the table and you want to break it out into columns, this should work
select
v.value
,left(v.value, charindex('|',v.value) -1) col1
,reverse(left(reverse(v.value), charindex('|',reverse(v.value)) -1)) col2
from String_split(#mapping,';') v

SQL Oracle Sub-query

I am having a issue getting this Sub-query to run. I am using Toad Data Point -Oracle. I get syntax error. I have tried several different ways with no luck. I am knew to sub-query's
Select *
from FINC.VNDR_ITEM_M as M
where M.ACCT_DOC_NBR = A.ACCT_DOC_NBR
(SELECT A.CLIENT_ID,
A.SRC_SYS_ID,
A.CO_CD,
A.ACCT_NBR,
A.CLR_DT,
A.ASGN_NBR,
A.FISCAL_YR,
A.ACCT_DOC_NBR,
A.LINE_ITEM_NBR,
A.MFR_PART_NBR,
A.POST_DT,
A.DRCR_IND,
A.DOC_CRNCY_AMT,
A.CRNCY_CD,
A.BSL_DT
FROM FINC.VNDR_ITEM_F A
WHERE A.CLR_DT IN (SELECT MAX(B.CLR_DT)
FROM FINC.VNDR_ITEM_F AS B
where (B.ACCT_DOC_NBR = A.ACCT_DOC_NBR and B.FISCAL_YR=A.FISCAL_YR and B.LINE_ITEM_NBR = A.LINE_ITEM_NBR and B.SRC_SYS_ID =A.SRC_SYS_ID and B.POST_DT=A.POST_DT and B.CO_CD=A.CO_CD)
and (B.CO_CD >='1000' and B.CO_CD <= '3000' or B.CO_CD ='7090') and (B.POST_DT Between to_date ('08/01/2018','mm/dd/yyyy')
AND to_date ('08/31/2018', 'mm/dd/yyyy')) and (B.SRC_SYS_ID ='15399') and (B.FISCAL_YR ='2018'))
GROUP BY
A.CLIENT_ID,
A.SRC_SYS_ID,
A.CO_CD,
A.ACCT_NBR,
A.CLR_DT,
A.ASGN_NBR,
A.FISCAL_YR,
A.ACCT_DOC_NBR,
A.LINE_ITEM_NBR,
A.MFR_PART_NBR,
A.POST_DT,
A.DRCR_IND,
A.DOC_CRNCY_AMT,
A.CRNCY_CD,
A.BSL_DT)

Your syntax is broken, you put subquery just at the end. Now it looks like:
select *
from dual as m
where a.dummy = m.dummy
(select dummy from dual)
It is in incorrect place, not joined, not aliased. What you should probably do is:
select *
from dual m
join (select dummy from dual) a on a.dummy = m.dummy
You also have some redundant, unnecessary brackets, but that's minor flaw. Full code (I cannot test it without data access):
select *
from FINC.VNDR_ITEM_M M
join (SELECT A.CLIENT_ID, A.SRC_SYS_ID, A.CO_CD, A.ACCT_NBR, A.CLR_DT, A.ASGN_NBR,
A.FISCAL_YR, A.ACCT_DOC_NBR, A.LINE_ITEM_NBR, A.MFR_PART_NBR, A.POST_DT,
A.DRCR_IND, A.DOC_CRNCY_AMT, A.CRNCY_CD, A.BSL_DT
FROM FINC.VNDR_ITEM_F A
WHERE A.CLR_DT IN (SELECT MAX(B.CLR_DT)
FROM FINC.VNDR_ITEM_F AS B
where B.ACCT_DOC_NBR = A.ACCT_DOC_NBR
and B.FISCAL_YR=A.FISCAL_YR
and B.LINE_ITEM_NBR = A.LINE_ITEM_NBR
and B.SRC_SYS_ID =A.SRC_SYS_ID
and B.POST_DT=A.POST_DT
and B.CO_CD=A.CO_CD
and (('1000'<=B.CO_CD and B.CO_CD<='3000') or B.CO_CD='7090')
and B.POST_DT Between to_date ('08/01/2018', 'mm/dd/yyyy')
AND to_date ('08/31/2018', 'mm/dd/yyyy')
and B.SRC_SYS_ID ='15399' and B.FISCAL_YR ='2018')
GROUP BY A.CLIENT_ID, A.SRC_SYS_ID, A.CO_CD, A.ACCT_NBR, A.CLR_DT, A.ASGN_NBR,
A.FISCAL_YR, A.ACCT_DOC_NBR, A.LINE_ITEM_NBR, A.MFR_PART_NBR, A.POST_DT,
A.DRCR_IND, A.DOC_CRNCY_AMT, A.CRNCY_CD, A.BSL_DT) A
on M.ACCT_DOC_NBR = A.ACCT_DOC_NBR and M.CO_CD=A.CO_CD;

You need to add an alias to the SubSelect (or Derived Table in Standard SQL):
select *
from
( select .......
) AS dt
join ....

Recursive Relationship Query

I am looking to implement graph tables to map the role hierarchy for my application in Azure SQL. The graph will look like a tree if it is laid out. With the parent being able to manage any role that falls beneath it on the tree.
So I have a roles node table and a canmanage edge table.
I am familiar with querying the first level and the second level of relationships, however I need to have a query where I can put in any role and receive a list of all the children that fall under it.
I am familiar with this sort of thing in NEO4J, but I have not found any documentation on how to accomplish this in Azure SQL.
How do I go about running a recursive query to get all the child roles give a specific role name or id?

This is possible from SQL Server 2017 and Azure SQL DB using the new graph database capabilities and the new MATCH clause to model this type of relationship. Unfortunately in v1 polymorphism and transitive closure are not natively included but are possible using recursive queries. If you look at the last query, it keep the parameter you input as the top-level manager and iterates over the rest.
A sample script:
USE tempdb
GO
-- NODES
DROP TABLE IF EXISTS dbo.roles
-- EDGES
DROP TABLE IF EXISTS dbo.canManage
DROP TABLE IF EXISTS dbo.isManagedBy
GO
CREATE TABLE dbo.roles (
roleId INT PRIMARY KEY,
roleName VARCHAR(20) UNIQUE NOT NULL
) AS NODE
CREATE TABLE dbo.canManage AS EDGE;
CREATE TABLE dbo.isManagedBy AS EDGE;
GO
-- Populate node table
INSERT INTO dbo.roles ( roleId, roleName )
VALUES
( 1, 'CEO' ),
( 2, 'VP 1' ),
( 3, 'VP 2' ),
( 4, 'Sales Manager 1' ),
( 5, 'Sales Manager 2' ),
( 6, 'Ops Manager 1' ),
( 7, 'Ops Manager 2' ),
( 8, 'Sales Lead 1' ),
( 9, 'Salesperson 1' ),
( 10, 'Salesperson 2' ),
( 11, 'Salesperson 3' )
GO
-- Populate edge table
INSERT INTO dbo.canManage ( $from_id, $to_id )
SELECT ceo.$node_id, VPs.$node_id
FROM dbo.roles ceo
CROSS JOIN dbo.roles VPs
WHERE ceo.roleName = 'CEO'
AND VPs.roleName Like 'VP%'
-- VP 1 manages Sales Managers
INSERT INTO dbo.canManage ( $from_id, $to_id )
SELECT a.$node_id, b.$node_id
FROM dbo.roles a
CROSS JOIN dbo.roles b
WHERE a.roleName = 'VP 1'
AND b.roleName Like 'Sales Manager%'
-- VP 2 manages Ops Managers
INSERT INTO dbo.canManage ( $from_id, $to_id )
SELECT a.$node_id, b.$node_id
FROM dbo.roles a
CROSS JOIN dbo.roles b
WHERE a.roleName = 'VP 2'
AND b.roleName Like 'Ops Manager%'
-- Sales Manger 1 manages Sales Leads
INSERT INTO dbo.canManage ( $from_id, $to_id )
SELECT a.$node_id, b.$node_id
FROM dbo.roles a
CROSS JOIN dbo.roles b
WHERE a.roleName = 'Sales Manager 1'
AND b.roleName Like 'Sales Lead%'
-- Sales Leads 1 manages all salespersons
INSERT INTO dbo.canManage ( $from_id, $to_id )
SELECT a.$node_id, b.$node_id
FROM dbo.roles a
CROSS JOIN dbo.roles b
WHERE a.roleName = 'Sales Lead 1'
AND b.roleName Like 'Salesperson%'
-- Create the inverse edge / relationship
INSERT INTO dbo.isManagedBy ( $from_id, $to_id )
SELECT $to_id, $from_id
FROM dbo.canManage
GO
-- Now write the graph queries:
-- Manages
SELECT FORMATMESSAGE( '%s manages %s', r1.roleName, r2.roleName ) manages
FROM dbo.roles r1, dbo.canManage canManage, dbo.roles r2
WHERE MATCH ( r1-(canManage)->r2 )
-- Same manager
SELECT FORMATMESSAGE( '%s and %s have the same manager %s', r1.roleName, r3.roleName, r2.roleName )
FROM dbo.roles r1, dbo.isManagedBy m1, dbo.roles r2, dbo.isManagedBy m2, dbo.roles r3
WHERE MATCH ( r1-(m1)->r2<-(m2)-r3 )
AND r1.$node_id < r3.$node_id
-- Recursive
-- walk the tree ... CEO manages everyone ...
;WITH cte AS (
SELECT 1 xlevel, r1.roleName manager, r2.roleName managed
FROM dbo.roles r1, dbo.canManage canManage, dbo.roles r2
WHERE MATCH ( r1-(canManage)->r2 )
AND r1.roleName = 'CEO'
UNION ALL
SELECT c.xlevel + 1, r1.roleName, r2.roleName
FROM cte c, dbo.roles r1, dbo.canManage canManage, dbo.roles r2
WHERE c.managed = r1.roleName
AND MATCH ( r1-(canManage)->r2 )
)
SELECT *
FROM cte
ORDER BY xlevel, manager, managed
;WITH cte AS (
SELECT 1 xlevel, r1.roleName manager, r2.roleName managed
FROM dbo.roles r1, dbo.canManage canManage, dbo.roles r2
WHERE MATCH ( r1-(canManage)->r2 )
AND r1.roleName = 'CEO'
UNION ALL
SELECT c.xlevel + 1, c.manager, r2.roleName
FROM cte c, dbo.roles r1, dbo.canManage canManage, dbo.roles r2
WHERE c.managed = r1.roleName
AND MATCH ( r1-(canManage)->r2 )
)
SELECT *
FROM cte
ORDER BY xlevel, manager, managed

Spotfire - advanced row level security

I'm working on row level security in Spotfire (6.5) report.
It should be implemented on 3 levels, lets call it L1, L2 and L3. There is additional mapping table that contains Userlogins and specified values on all levels where user has access. Additionaly if user is not in mapping table he is some kind of Root user so he has access to everything.
On DB side it looks like that:
CREATE TABLE SECURITY
(
USER_ID VARCHAR2(100 BYTE)
, L1 VARCHAR2(100 BYTE)
, L2 VARCHAR2(100 BYTE)
, L3 VARCHAR2(100 BYTE)
--, L1L2L3 VARCHAR2(100 BYTE) -- option there could be one column that contains lowest possible level
);
INSERT INTO SECURITY (USER_ID, L1) VALUES ('UNAME1','A');
INSERT INTO SECURITY (USER_ID, L2) VALUES ('UNAME2','BB');
INSERT INTO SECURITY (USER_ID, L3) VALUES ('UNAME3','CCC');
CREATE TABLE SECURED_DATA
(
L1 VARCHAR2(100 BYTE)
, L2 VARCHAR2(100 BYTE)
, L3 VARCHAR2(100 BYTE)
, V1 NUMBER
);
INSERT INTO SECURED_DATA (L1, V1) VALUES ('A',1);
INSERT INTO SECURED_DATA (L1, L2, V1) VALUES ('B','BB',2);
INSERT INTO SECURED_DATA (L1, L2, L3, V1) VALUES ('C','CC','CCC',3);
Finally I've made Information Link and then I've changed its' sql code to something like that:
SELECT
M.*
FROM
SECURITY S
INNER JOIN SECURED_DATA M
ON
(
M.L1 = S.L1
AND S.USER_ID = (%CURRENT_USER%)
)
UNION ALL
SELECT
M.*
FROM
SECURITY S
INNER JOIN SECURED_DATA M
ON
(
M.L2 = S.L2
AND S.USER_ID = (%CURRENT_USER%)
)
UNION ALL
SELECT
M.*
FROM
SECURITY S
INNER JOIN SECURED_DATA M
ON
(
M.L3 = S.L3
AND S.USER_ID = (%CURRENT_USER%)
)
UNION ALL
SELECT
M.*
FROM
SECURED_DATA M
WHERE
(
SELECT
COUNT(1)
FROM
SECURITY S
WHERE S.USER_ID = (%CURRENT_USER%)
)
=0
It works fine, but I'm worndering if there is more smart and more Spotfire way to get it?
Many thanks and regards,
Maciej

My guess on "more smart and more Spotfire way" is that you want to be able to cache a single data set and use it for multiple users, limiting it in the analytic rather than in the data pull. There is some danger to this, if we're doing it for security's sake, because the data will technically be in the analytic, and if they have permission to edit and add visualizations, you no longer control what they can and cannot see. If there's any authoring allowed in Web Player for the specific analytic, I recommend all securities be done DataBase-side.
If you want to do it in Spotfire anyways, here is my recommendation:
Have an Information Link (for example case, named IL_SecurityCheck) which is Select * from SECURITY WHERE S.USER_ID = (%CURRENT_USER%).
If they move from a cover page to the page with the data in it, you can put the code in the script to change pages; if not, you can use a method I explained here: Spotfire Current Date in input field with calendar popup to fire off a script on open.
Button Script required:
from Spotfire.Dxp.Data import *
crossSource = Document.Data.Tables["IL_SecurityCheck"]
rowCount = crossSource.RowCount
rowIndexSet = IndexSet(rowCount, True)
print rowCount
#rowCount = Document.Data.Tables["Managed Care UpDownStream"].RowCount
colCurs = DataValueCursor.CreateFormatted(crossSource.Columns["L1"])
colCurs2 = DataValueCursor.CreateFormatted(crossSource.Columns["L2"])
colCurs3 = DataValueCursor.CreateFormatted(crossSource.Columns["L3"])
x = ""
if rowIndexSet.IsEmpty != True:
for row in crossSource.GetRows(rowIndexSet, colCurs):
if colCurs.CurrentValue is not None:
x += "[L1] = '" + colCurs.CurrentValue + "' and "
for row in crossSource.GetRows(rowIndexSet, colCurs2):
if colCurs2.CurrentValue is not None:
x += "[L2] = '" + colCurs2.CurrentValue + "' and "
for row in crossSource.GetRows(rowIndexSet, colCurs3):
if colCurs3.CurrentValue is not None:
x += "[L3] = '" + colCurs3.CurrentValue + "' and "
x = x[:len(x) - 4]
else:
x = "1=1"
Document.Properties["SecurityLimits"] = x
Visualization Data Limited by Expression: ${SecurityLimits}

Develop Reference

node.js excel linux python-3.x azure haskell apache-spark rust .htaccess string

How to Automate SQL code using Python in efficient way - python-3.x

Related

Databricks Error in SQL statement: ParseException: mismatched input 'Service_Date

SQL Server 2017 - Dynamically generate a string based on the number of columns in another string

SQL Oracle Sub-query

Recursive Relationship Query

Spotfire - advanced row level security

Categories

Resources