Please can you help me in writing Python Logic to PullJSON data from an API and load it to SQL Table directly.
I have written this Python Code and It works fine with Flat JSON Structure but fails If JSON has a child array.
For Example, Sales can have a header record and multiple line records in a single JSON file attached. I am able to pull and load header data but failing to load line data.
Here is my existing Python Code:
import requests
from requests.auth import HTTPBasicAuth
import json
import pandas as pd
Endpoint = "https://api.cin7.com/api"
FullEndpointExtension = "/v1/SalesOrders?order=Id&page=1&rows=250"
User_name = "XXXXX"
Password_key = "YYYYYY"
Authentication = HTTPBasicAuth(User_name, Password_key)
try:
response = requests.get(Endpoint + FullEndpointExtension, auth = Authentication)
DataFrame= pd.read_json(response.text)
DataFrame = DataFrame[['id','createdDate','modifiedDate','createdBy','processedBy','isApproved','reference','memberId','firstName','lastName','company','email','phone','mobile','fax','deliveryFirstName','deliveryLastName','deliveryCompany','deliveryAddress1','deliveryAddress2','deliveryCity','deliveryState','deliveryPostalCode','deliveryCountry','billingFirstName','billingLastName','billingCompany','billingAddress1','billingAddress2','billingCity','billingPostalCode','billingState','billingCountry','branchId','branchEmail','projectName','trackingCode','internalComments','productTotal','freightTotal','freightDescription','surcharge','surchargeDescription','discountTotal','discountDescription','total','currencyCode','currencyRate','currencySymbol','taxStatus','taxRate','source','isVoid','memberEmail','memberCostCenter','memberAlternativeTaxRate','costCenter','alternativeTaxRate','estimatedDeliveryDate','salesPersonId','salesPersonEmail','paymentTerms','customerOrderNo','voucherCode','deliveryInstructions','status','stage','invoiceDate','invoiceNumber','dispatchedDate','logisticsCarrier','logisticsStatus','distributionBranchId','lineItems']]
#Capture Error Message
except Exception as ErrorMessage:
Message = str(ErrorMessage)
print(Message)
else:
Message = 'Successfully Completed'
print(Message)
DataFrame
Sample code:
import json
import requests
import pandas as pd
from io import StringIO
response = requests.get("https://api.github.com/events")
DataFrame= pd.read_json(StringIO(response.text))
for I in DataFrame['actor']:
print(DataFrame['id'],I['id'],I['login'])
Sample JSON:
[
{
"id": 111,
"createdDate": "2020-09-08T17:58:00Z",
"modifiedDate": "2020-09-10T02:01:09Z",
"createdBy": 0,
"processedBy": 99999,
"isApproved": true,
"reference": "4000000000",
"memberId": 1111,
"firstName": "RAM",
"lastName": "Live Credit Card Order",
"company": "",
"email": "ram#abc.co.nz",
"phone": "041411111",
"mobile": "",
"fax": "",
"deliveryFirstName": "Ram",
"deliveryLastName": "Live Credit Card Order",
"deliveryCompany": "",
"deliveryAddress1": "XXXX",
"deliveryAddress2": " ",
"deliveryCity": "XXXXX",
"deliveryState": "QLD",
"deliveryPostalCode": "XXXXX",
"deliveryCountry": "Australia",
"billingFirstName": "Ram",
"billingLastName": "Live Credit Card Order",
"billingCompany": "",
"billingAddress1": "XXXXXX",
"billingAddress2": "",
"billingCity": "XXXXXX",
"billingPostalCode": "XXXX",
"billingState": "QLD",
"billingCountry": "Australia",
"branchId": 3,
"branchEmail": "",
"projectName": "ABC",
"trackingCode": "",
"internalComments": "",
"productTotal": 29.9500,
"freightTotal": 0.0000,
"freightDescription": "",
"surcharge": 0.0000,
"surchargeDescription": "",
"discountTotal": 0.0000,
"discountDescription": "",
"total": 29.9500,
"currencyCode": "AUD",
"currencyRate": 1.0,
"currencySymbol": "$",
"taxStatus": "Incl",
"taxRate": 0.1,
"source": "API",
"customFields": {
"orders_1000": null,
"orders_1001": null,
"orders_1002": null,
"orders_1003": null,
"orders_1004": null
},
"isVoid": false,
"memberEmail": "ram#abc.co.nz",
"memberCostCenter": null,
"memberAlternativeTaxRate": null,
"costCenter": "",
"alternativeTaxRate": "",
"estimatedDeliveryDate": null,
"salesPersonId": 0,
"salesPersonEmail": null,
"paymentTerms": "",
"customerOrderNo": "",
"voucherCode": null,
"deliveryInstructions": "Please leave at reception",
"status": "APPROVED",
"stage": "Dispatched",
"invoiceDate": "2020-09-08T17:58:00Z",
"invoiceNumber": 5011111,
"dispatchedDate": "2020-09-08T20:31:00Z",
"logisticsCarrier": "BCD",
"logisticsStatus": 10,
"distributionBranchId": 0,
"lineItems": [
{
"id": 363,
"createdDate": "2020-09-08T17:58:00Z",
"transactionId": 267777,
"parentId": 199999,
"productId": 4444,
"productOptionId": 333333,
"integrationRef": "0",
"sort": 1,
"code": "T02392",
"name": "Mat Grey",
"option1": "",
"option2": "",
"option3": "",
"qty": 1.0,
"styleCode": "T02392",
"barcode": null,
"sizeCodes": null,
"lineComments": "",
"unitCost": 11.9900,
"unitPrice": 14.9500,
"discount": 0.0000,
"qtyShipped": 1.0,
"holdingQty": 0.0,
"accountCode": "",
"stockControl": "FIFO",
"stockMovements": [
{
"batch": null,
"quantity": 1.0,
"serial": null
}
],
"sizes": []
},
{
"id": 365,
"createdDate": "2020-09-08T17:58:00Z",
"transactionId": 267777,
"parentId": 199999,
"productId": 4444,
"productOptionId": 333333,
"integrationRef": "0",
"sort": 1,
"code": "A1111",
"name": "Mat Grey",
"option1": "",
"option2": "",
"option3": "",
"qty": 1.0,
"styleCode": "ABCXYZ",
"barcode": "",
"sizeCodes": null,
"lineComments": "",
"unitCost": 11.9900,
"unitPrice": 0.0000,
"discount": 0.0000,
"qtyShipped": 1.0,
"holdingQty": 0.0,
"accountCode": "",
"stockControl": "Batch",
"stockMovements": [
{
"batch": "100",
"quantity": 1.0,
"serial": null
}
],
"sizes": []
},
{
"id": 364,
"createdDate": "2020-09-08T17:58:00Z",
"transactionId": 2641,
"parentId": 0,
"productId": 4410,
"productOptionId": 4411,
"integrationRef": "0",
"sort": 2,
"code": "DELIVERYFEE",
"name": "DELIVERYFEE",
"option1": "",
"option2": "",
"option3": "",
"qty": 1.0,
"styleCode": "DeliveryFee",
"barcode": "",
"sizeCodes": null,
"lineComments": "",
"unitCost": 0.0000,
"unitPrice": 8.0000,
"discount": 0.0000,
"qtyShipped": 1.0,
"holdingQty": 0.0,
"accountCode": "ABCXYZ",
"stockControl": "ABCXYZ",
"stockMovements": [],
"sizes": []
}
]
}
]
Finally solved id:
import json
import pandas as pd
from pandas import json_normalize
#https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.json_normalize.html
#https://www.kaggle.com/jboysen/quick-tutorial-flatten-nested-json-in-pandas
Endpoint = "https://api.cin7.com/api"
FullEndpointExtension = "/v1/SalesOrders?where=id=2666&order=Id&page=1&rows=250"
User_name = "xxxxxx"
Password_key = "xxxxxxx"
Authentication = HTTPBasicAuth(User_name, Password_key)
r = requests.get(Endpoint + FullEndpointExtension, auth = Authentication)
d = json.loads(r.text)
result1=json_normalize(data=d, record_path='lineItems', meta=['id'],errors='ignore', meta_prefix='Header')
print(result1)
From the eSig API, GET /v2.1/accounts/{accountId}/users/{userId}/profile, this will return the authenticationMethods information, but when I tried on sandbox, I didn't get that section.
the whole api url:
https://demo.docusign.net/restapi/v2.1/accounts/xxxxxxxx-xxxxxxxx-xxxxxxxx-xxxxxxxx-xxxxxxxx/users/xxxxxxxx-xxxxxxxx-xxxxxxxx-xxxxxxxx-xxxxxxxx/profile
Response:
{
"displayProfile": "true",
"displayOrganizationInfo": "false",
"displayPersonalInfo": "false",
"displayUsageHistory": "false",
"companyName": "dd",
"title": "Z",
"userDetails": {
"userName": "X Y",
"userId": "xxxxxxx-xxxxx-xxxx-a2a2-79cac67194c7",
"uri": "/users/xxxxx-xxxxx-xxxx-a2a2-xxxxxxx",
"email": "xxx#gmail.com",
"createdDateTime": "2019-10-07T12:38:53.2330000Z",
"userAddedToAccountDateTime": "0001-01-01T08:00:00.0000000Z",
"firstName": "X",
"lastName": "Y",
"permissionProfileId": "xxxxxx",
"permissionProfileName": "Account Administrator",
"defaultAccountId": "xxxx-xxxxx-xxxx-xxxx-xxxxxxxxxxxx"
},
"address": {
"address1": "",
"address2": "",
"city": "",
"stateOrProvince": "",
"postalCode": "",
"phone": "",
"fax": "",
"country": ""
},
"usageHistory": {
"signedCount": "0",
"sentCount": "0"
},
"userProfileLastModifiedDate": "2019-10-07T06:08:49.1700000Z"
}
In our application we obtain the field values as columns using Spark sql. Im' trying to figure out how to put the columns values to nested json object and push to Elasticsearch. Also is there a way to parameterise values in selectExpr to pass to the regex?
We are currently using the Spark Java API.
Dataset<Row> data = rowExtracted.selectExpr("split(value,\"[|]\")[0] as channelId",
"split(value,\"[|]\")[1] as country",
"split(value,\"[|]\")[2] as product",
"split(value,\"[|]\")[3] as sourceId",
"split(value,\"[|]\")[4] as systemId",
"split(value,\"[|]\")[5] as destinationId",
"split(value,\"[|]\")[6] as batchId",
"split(value,\"[|]\")[7] as orgId",
"split(value,\"[|]\")[8] as businessId",
"split(value,\"[|]\")[9] as orgAccountId",
"split(value,\"[|]\")[10] as orgBankCode",
"split(value,\"[|]\")[11] as beneAccountId",
"split(value,\"[|]\")[12] as beneBankId",
"split(value,\"[|]\")[13] as currencyCode",
"split(value,\"[|]\")[14] as amount",
"split(value,\"[|]\")[15] as processingDate",
"split(value,\"[|]\")[16] as status",
"split(value,\"[|]\")[17] as rejectCode",
"split(value,\"[|]\")[18] as stageId",
"split(value,\"[|]\")[19] as stageStatus",
"split(value,\"[|]\")[20] as stageUpdatedTime",
"split(value,\"[|]\")[21] as receivedTime",
"split(value,\"[|]\")[22] as sendTime"
);
StreamingQuery query = data.writeStream()
.outputMode(OutputMode.Append()).format("es").option("checkpointLocation", "C:\\checkpoint")
.start("spark_index/doc")
Actual output:
{
"_index": "spark_index",
"_type": "doc",
"_id": "test123",
"_version": 1,
"_score": 1,
"_source": {
"channelId": "test",
"country": "SG",
"product": "test",
"sourceId": "",
"systemId": "test123",
"destinationId": "",
"batchId": "",
"orgId": "test",
"businessId": "test",
"orgAccountId": "test",
"orgBankCode": "",
"beneAccountId": "test",
"beneBankId": "test",
"currencyCode": "SGD",
"amount": "53.0000",
"processingDate": "",
"status": "Pending",
"rejectCode": "test",
"stageId": "123",
"stageStatus": "Comment",
"stageUpdatedTime": "2019-08-05 18:11:05.999000",
"receivedTime": "2019-08-05 18:10:12.701000",
"sendTime": "2019-08-05 18:11:06.003000"
}
}
We need the above columns under a node "txn_summary" such as the below json:
Expected output:
{
"_index": "spark_index",
"_type": "doc",
"_id": "test123",
"_version": 1,
"_score": 1,
"_source": {
"txn_summary": {
"channelId": "test",
"country": "SG",
"product": "test",
"sourceId": "",
"systemId": "test123",
"destinationId": "",
"batchId": "",
"orgId": "test",
"businessId": "test",
"orgAccountId": "test",
"orgBankCode": "",
"beneAccountId": "test",
"beneBankId": "test",
"currencyCode": "SGD",
"amount": "53.0000",
"processingDate": "",
"status": "Pending",
"rejectCode": "test",
"stageId": "123",
"stageStatus": "Comment",
"stageUpdatedTime": "2019-08-05 18:11:05.999000",
"receivedTime": "2019-08-05 18:10:12.701000",
"sendTime": "2019-08-05 18:11:06.003000"
}
}
}
Adding all columns to a top level struct should give the expected output. In Scala:
import org.apache.spark.sql.functions._
data.select(struct(data.columns:_*).as("txn_summary"))
In Java I would suspect it it would be:
import org.apache.spark.sql.functions.struct;
data.select(struct(data.columns()).as("txn_summary"));
I have a transacton called updatewarranty.In that updatewarranty transaction i am updating a asset called warranty.
This is my json
{
"$class": "org.network.warranty.Transfer",
"TransferId": "9427",
"AuthKey": "",
"TransferDate": "2018-06-30T05:50:32.767Z",
"customer": {
"$class": "org.network.warranty.Customer",
"CustomerId": "2599",
"Address1": "",
"Address2": "",
"Authkey": "",
"City": "",
"Country": "",
"Email": "",
"Mobile": "",
"State": "",
"UserType": 0
},
"retailer": {
"$class": "org.network.warranty.Retailer",
"RetailerId": "8389",
"Address1": "",
"Address2": "",
"Authkey": "",
"City": "",
"Country": "",
"Email": "",
"Mobile": "",
"State": "",
"UserType": 0
},
"warranty": {
"$class": "org.network.warranty.Warranty",
"WarrentyId": "0766",
"End_Date": "2018-06-30T05:50:32.767Z",
"Start_Date": "2018-06-30T05:50:32.767Z",
"IS_Internationaly_Valid": "",
"Item_QRCode": ""
}
}
I have a transaction named getwarranty which takes the warranty id as input.
this is my json
{
"$class": "org.network.warranty.getWarranty",
"warranty": "resource:org.network.warranty.Warranty#0766"
}
When i see the transaction record for getwarranty i dont have the entire transfer record. I have only this information
{
"$class": "org.network.warranty.getWarranty",
"warranty": "resource:org.network.warranty.Warranty#0766",
"transactionId": "6e35c9cb-d3a6-41d8-8c95-fa22c7681824",
"timestamp": "2018-06-30T05:50:54.851Z"
}
how can i get the warranty asset?
I'm trying to figure out how to implement structured data for my company with 2 branches. The branches are in the same city, but there is not head offfice or a difference in hierarchy. The branches do not have their own landing page on the website. It's a dentist office so I wanted to use LocalBusiness -> Dentist.
Because I found that nesting the data was better then to separate the data, I thought of using #brancheOf or #subOrganization. However when I try this the testing tool tells me these properties do not work with #type 'Dentist'. Can somebody give me some advise on how to continue? This is what I had so far:
<script type='application/ld+json'>
{
"#context": "http://www.schema.org",
"#type": "Dentist",
"name": "",
"url": "",
"sameAs": [
""
],
"logo": "",
"image": "",
"description": "",
"address": {
"#type": "PostalAddress",
"streetAddress": "",
"addressLocality": "",
"addressRegion": "",
"postalCode": "",
"addressCountry": ""
},
"geo": {
"#type": "",
"latitude": "",
"longitude": ""
},
"hasMap": "",
"openingHours": "Mo 09:00-17:30 Tu 08:30-17:30 We, Th, Fr 08:00-17:30",
"contactPoint": {
"#type": "",
"contactType": "",
"telephone": "",
"email": ""
},
"#subarganization": "http://www.schema.org",
"#type": "Dentist" (I didn't continue here because I got the error, but I would enter address details etc. from the second branch).
}
</script>