Unable to parse list of Json blocks in U-SQL - azure

I have a file with list of json blocks and am stuck with processing/Reading them in U-Sql and writing to a text file.
{
"id": "0001",
"type": "donut",
"name": "Cake",
"ppu": 0.55,
"batters":
{
"batter":
[
{ "id": "1001", "type": "Regular" },
{ "id": "1002", "type": "Chocolate" },
{ "id": "1003", "type": "Blueberry" },
{ "id": "1004", "type": "Devil's Food" }
]
},
"topping":
[
{ "id": "5001", "type": "None" },
{ "id": "5002", "type": "Glazed" },
{ "id": "5005", "type": "Sugar" },
{ "id": "5007", "type": "Powdered Sugar" },
{ "id": "5006", "type": "Chocolate with Sprinkles" },
{ "id": "5003", "type": "Chocolate" },
{ "id": "5004", "type": "Maple" }
]
}
{
"id": "0002",
"type": "nut",
"name": "ake",
"ppu": 1.55,
"batters":
{
"batter":
[
{ "id": "1001", "type": "Regular" },
{ "id": "1002", "type": "Chocolate" },
{ "id": "1003", "type": "Blueberry" },
{ "id": "1004", "type": "Devil's Food" }
]
},
"topping":
[
{ "id": "5001", "type": "None" },
{ "id": "5002", "type": "Glazed" },
{ "id": "5005", "type": "Sugar" },
{ "id": "5007", "type": "Powdered Sugar" },
{ "id": "5006", "type": "Chocolate with Sprinkles" },
{ "id": "5003", "type": "Chocolate" },
{ "id": "5004", "type": "Maple" }
]
}
{
"id": "0003",
"type": "test",
"name": "ake",
"ppu": 1.55,
"batters":
{
"batter":
[
]
},
"topping":
[
{ "id": "5003", "type": "Chocolate" },
{ "id": "5004", "type": "Maple" }
]
}
can someone help me on this.
REFERENCE ASSEMBLY [Newtonsoft.Json];
REFERENCE ASSEMBLY [Microsoft.Analytics.Samples.Formats];
DECLARE #Full_Path string = #"C:\Users\test\Desktop\File\JsonTest.json";
USING [Microsoft.Analytics.Samples.Formats];
#RawExtract =
EXTRACT
[RawString] string
FROM
#Full_Path
USING
Extractors.Text(delimiter:'\n', quoting : false);
#ParsedJSONLines =
SELECT JsonFunctions.JsonTuple([RawString]) AS JSONLine
FROM #RawExtract;
#StagedData =
SELECT
JSONLine["id"] AS Id,
JSONLine["name"] AS Name,
JSONLine["type"] AS Type,
JSONLine["ppu"] AS PPU,
JSONLine["batters"] AS Batter
FROM
#ParsedJSONLines;
DECLARE #Output_Path string = #"C:\Users\Test\Desktop\File\Test2.csv";
OUTPUT #StagedData
TO #Output_Path
USING Outputters.Csv();
Am receiving error while evaluating expression .
Error while evaluating expression JsonFunctions.JsonTuple(RawString)

You cant use an Text Extraxtor to extract Json, unless you use Json Lines.
Using the extractor will split the json and you will get the error.
Use JsonExtractor instead of Text extractor.
https://github.com/Azure/usql/blob/master/Examples/DataFormats/Microsoft.Analytics.Samples.Formats/Json/JsonExtractor.cs

Related

how to get a value of a json http response in azure logic app

I'm trying to get "id" value set to a variable by parsing the below output sample. The REST API call will return multiple values as shown below and I'm interested in only getting the "id" value for the particular name that users has provided/set as input in the workflow earlier either by a parameter value or by initializing a variable. How do I do this value extraction in azure logic app?
Any help much appreciated.
[
{
"id": 1,
"name": "xyz-List",
"data": {
"urls": [
"*.test1.com",
"*.test2.com"
],
"type": "exact"
},
"modify_by": "admin#xyz.com",
"modify_time": "2022-06-29T21:05:27.000Z",
"modify_type": "Created",
"pending": 0
},
{
"id": 2,
"name": "abc-List",
"data": {
"urls": [
"www.mytesting.com"
],
"type": "exact"
},
"modify_by": "admin#xyz.com",
"modify_time": "2022-06-29T21:05:27.000Z",
"modify_type": "Created",
"pending": 0
},
{
"id": 3,
"name": "azure-list",
"data": {
"type": "exact",
"urls": [
"www.xyz.com",
"www.azure-test.com"
],
"json_version": 2
},
"modify_by": "admin#xyz.com",
"modify_time": "2022-09-26T01:25:20.000Z",
"modify_type": "Edited",
"pending": 0
}
]
I have reproduced from my end and could able to make it work by parsing your REST API call value. To iterate through the Parsed JSON I have used a for-each loop and extracted the Id using the below expression and set its value to a variable.
#items('For_each')['id']
Below is the complete flow of my logic app
RESULTS:
To reproduce the same in your logic app you can use the below code view that worked for me.
{
"definition": {
"$schema": "https://schema.management.azure.com/providers/Microsoft.Logic/schemas/2016-06-01/workflowdefinition.json#",
"actions": {
"Compose": {
"inputs": [
{
"data": {
"type": "exact",
"urls": [
"*.test1.com",
"*.test2.com"
]
},
"id": 1,
"modify_by": "admin#xyz.com",
"modify_time": "2022-06-29T21:05:27.000Z",
"modify_type": "Created",
"name": "xyz-List",
"pending": 0
},
{
"data": {
"type": "exact",
"urls": [
"www.mytesting.com"
]
},
"id": 2,
"modify_by": "admin#xyz.com",
"modify_time": "2022-06-29T21:05:27.000Z",
"modify_type": "Created",
"name": "abc-List",
"pending": 0
},
{
"data": {
"json_version": 2,
"type": "exact",
"urls": [
"www.xyz.com",
"www.azure-test.com"
]
},
"id": 3,
"modify_by": "admin#xyz.com",
"modify_time": "2022-09-26T01:25:20.000Z",
"modify_type": "Edited",
"name": "azure-list",
"pending": 0
}
],
"runAfter": {
"Initialize_variable": [
"Succeeded"
]
},
"type": "Compose"
},
"For_each": {
"actions": {
"Set_variable": {
"inputs": {
"name": "Id",
"value": "#items('For_each')['id']"
},
"runAfter": {},
"type": "SetVariable"
}
},
"foreach": "#body('Parse_JSON')",
"runAfter": {
"Parse_JSON": [
"Succeeded"
]
},
"type": "Foreach"
},
"Initialize_variable": {
"inputs": {
"variables": [
{
"name": "Id",
"type": "integer"
}
]
},
"runAfter": {},
"type": "InitializeVariable"
},
"Parse_JSON": {
"inputs": {
"content": "#outputs('Compose')",
"schema": {
"items": {
"properties": {
"data": {
"properties": {
"type": {
"type": "string"
},
"urls": {
"items": {
"type": "string"
},
"type": "array"
}
},
"type": "object"
},
"id": {
"type": "integer"
},
"modify_by": {
"type": "string"
},
"modify_time": {
"type": "string"
},
"modify_type": {
"type": "string"
},
"name": {
"type": "string"
},
"pending": {
"type": "integer"
}
},
"required": [
"id",
"name",
"data",
"modify_by",
"modify_time",
"modify_type",
"pending"
],
"type": "object"
},
"type": "array"
}
},
"runAfter": {
"Compose": [
"Succeeded"
]
},
"type": "ParseJson"
}
},
"contentVersion": "1.0.0.0",
"outputs": {},
"parameters": {},
"triggers": {
"manual": {
"inputs": {
"schema": {}
},
"kind": "Http",
"type": "Request"
}
}
},
"parameters": {}
}
Load this into your tenant. You can use basic expressions with a condition to get your result ...
{
"definition": {
"$schema": "https://schema.management.azure.com/providers/Microsoft.Logic/schemas/2016-06-01/workflowdefinition.json#",
"actions": {
"For_Each_Array_Item": {
"actions": {
"Condition": {
"actions": {
"Set_ID": {
"inputs": {
"name": "ID",
"value": "#item()['id']"
},
"runAfter": {},
"type": "SetVariable"
}
},
"expression": {
"and": [
{
"equals": [
"#item()['name']",
"abc-List"
]
}
]
},
"runAfter": {},
"type": "If"
}
},
"foreach": "#variables('Array Data')",
"runAfter": {
"Initialize_ID": [
"Succeeded"
]
},
"type": "Foreach"
},
"Initialize_Array_Data": {
"inputs": {
"variables": [
{
"name": "Array Data",
"type": "array",
"value": [
{
"data": {
"type": "exact",
"urls": [
"*.test1.com",
"*.test2.com"
]
},
"id": 1,
"modify_by": "admin#xyz.com",
"modify_time": "2022-06-29T21:05:27.000Z",
"modify_type": "Created",
"name": "xyz-List",
"pending": 0
},
{
"data": {
"type": "exact",
"urls": [
"www.mytesting.com"
]
},
"id": 2,
"modify_by": "admin#xyz.com",
"modify_time": "2022-06-29T21:05:27.000Z",
"modify_type": "Created",
"name": "abc-List",
"pending": 0
},
{
"data": {
"json_version": 2,
"type": "exact",
"urls": [
"www.xyz.com",
"www.azure-test.com"
]
},
"id": 3,
"modify_by": "admin#xyz.com",
"modify_time": "2022-09-26T01:25:20.000Z",
"modify_type": "Edited",
"name": "azure-list",
"pending": 0
}
]
}
]
},
"runAfter": {},
"type": "InitializeVariable"
},
"Initialize_ID": {
"inputs": {
"variables": [
{
"name": "ID",
"type": "integer"
}
]
},
"runAfter": {
"Initialize_Array_Data": [
"Succeeded"
]
},
"type": "InitializeVariable"
}
},
"contentVersion": "1.0.0.0",
"outputs": {},
"parameters": {
"ParameterTest1": {
"defaultValue": "\"\"",
"type": "String"
}
},
"triggers": {
"manual": {
"inputs": {
"method": "GET",
"schema": {}
},
"kind": "Http",
"type": "Request"
}
}
},
"parameters": {}
}
My example looks for the name abc-List and if it finds it, it sets the ID variable to be the associated ID of that record it found the name against.

Azure data factory json data conversion null value

I have a json feed in the below format. I need to update the data in NoSQL collection having a different schema as shown below. Using Azure data factory how can I transform input json schema to target schema?
Since the currentValue can be of different data type(array, number, complex type, string etc) for each record, Azure Data flow task is giving null value for 'Derived Column' schema modifier as well as 'Flatten' formatter.
Input Json
[
{
"type": "UPDATE",
"key": { "id": "112710876" },
"doc": [
{
"property": "org.numberOfEmployees",
"currentValue": [
{
"value": 2256,
"scope": "Consolidated"
},
{
"value": 516,
"scope": "Individual"
}
]
}
]
},
{
"type": "UPDATE",
"key": { "id": "081243215" },
"doc": [
{
"property": "org.startDate",
"currentValue": "1979-09-14T06:08:51Z"
}
]
},
{
"type": "UPDATE",
"key": { "id": "081243216" },
"doc": [
{
"property": "org.employeeCount",
"currentValue": "20000"
}
]
},
{
"type": "UPDATE",
"key": { "id": "081243216" },
"doc": [
{
"property": "org.headOffice",
"currentValue": {
"city": "NY",
"country": "US"
}
}
]
}
]
Target Schema
{
"$schema": "http://json-schema.org/draft-04/schema#",
"type": "object",
"properties": {
"id": {
"type": "integer"
},
"startDate": {
"type": "string"
},
"numberOfEmployees": {
"type": "array",
"items": [
{
"type": "object",
"properties": {
"value": {
"type": "integer"
},
"scope": {
"type": "string"
}
}
}
]
},
"employeeCount": {
"type": "integer"
},
"headOffice": {
"type": "object",
"properties": {
"city": {
"type": "string"
},
"country": {
"type": "string"
}
}
}
}
}
Is there any way I can stringify currentValue in data flow task, if there is no direct way to transform the input data to target schema?
Any help would be appreciated.
You can stringify it in a derived column using "toString()" or you can wait for our new Stringify transformation in October :)

Filtering Arrays in NodeJS without knowing where the value's location is

I'm working on this project that should scrape websites and output HTML in the form of a JSON, now the only useful things in those JSONs to us are "forms".
I wanted to filter that but the native array filter only works when I know the attribute's location relative to the entire page (DOM??) but that won't always be the case, and I fear checking every object's value till I reach the desired value isn't viable due to
some pages being humongous,
form being a string in other places we don't want, this is in NodeJS
Snippet of input:
[
{
"type": "element",
"tagName": "p",
"attributes": [],
"children": [
{
"type": "text",
"content": "This is how the HTML code above will be displayed in a browser:"
}
]
},
{
"type": "text",
"content": "\n"
},
{
"type": "element",
"tagName": "form",
"attributes": [
{
"key": "action",
"value": "/action_page.php"
},
{
"key": "target",
"value": "_blank"
}
],
"children": [
{
"type": "text",
"content": "\nFirst name:"
},
{
"type": "element",
"tagName": "br",
"attributes": [],
"children": []
},
{
"type": "text",
"content": "\n"
},
{
"type": "element",
"tagName": "input",
"attributes": [
{
"key": "type",
"value": "text"
},
{
"key": "name",
"value": "firstname0"
},
{
"key": "value",
"value": "John"
}
],
"children": []
},
{
"type": "element",
"tagName": "br",
"attributes": [],
"children": []
},
{
"type": "text",
"content": "\nLast name:"
},
{
"type": "element",
"tagName": "br",
"attributes": [],
"children": []
},
{
"type": "text",
"content": "\n"
},
{
"type": "element",
"tagName": "input",
"attributes": [
{
"key": "type",
"value": "text"
},
{
"key": "name",
"value": "lastname0"
},
{
"key": "value",
"value": "Doe"
}
],
"children": []
},
{
"type": "text",
"content": "\n"
},
{
"type": "element",
"tagName": "br",
"attributes": [],
"children": []
},
{
"type": "element",
"tagName": "br",
"attributes": [],
"children": []
},
{
"type": "text",
"content": "\n"
},
{
"type": "element",
"tagName": "input",
"attributes": [
{
"key": "type",
"value": "submit"
},
{
"key": "value",
"value": "Submit"
}
],
"children": []
},
{
"type": "text",
"content": "\n"
},
{
"type": "element",
"tagName": "input",
"attributes": [
{
"key": "type",
"value": "reset"
}
],
"children": []
},
{
"type": "text",
"content": "\n"
}
]
},
{
"type": "text",
"content": "\n"
}
]
A snippet of output:
[
{
"type": "element",
"tagName": "form",
"attributes": [
{
"key": "action",
"value": "/action_page.php"
},
{
"key": "target",
"value": "_blank"
}
],
"children": [
{
"type": "text",
"content": "\nFirst name:"
},
{
"type": "element",
"tagName": "br",
"attributes": [],
"children": []
},
{
"type": "text",
"content": "\n"
},
{
"type": "element",
"tagName": "input",
"attributes": [
{
"key": "type",
"value": "text"
},
{
"key": "name",
"value": "firstname0"
},
{
"key": "value",
"value": "John"
}
],
"children": []
},
{
"type": "element",
"tagName": "br",
"attributes": [],
"children": []
},
{
"type": "text",
"content": "\nLast name:"
},
{
"type": "element",
"tagName": "br",
"attributes": [],
"children": []
},
{
"type": "text",
"content": "\n"
},
{
"type": "element",
"tagName": "input",
"attributes": [
{
"key": "type",
"value": "text"
},
{
"key": "name",
"value": "lastname0"
},
{
"key": "value",
"value": "Doe"
}
],
"children": []
},
{
"type": "text",
"content": "\n"
},
{
"type": "element",
"tagName": "br",
"attributes": [],
"children": []
},
{
"type": "element",
"tagName": "br",
"attributes": [],
"children": []
},
{
"type": "text",
"content": "\n"
},
{
"type": "element",
"tagName": "input",
"attributes": [
{
"key": "type",
"value": "submit"
},
{
"key": "value",
"value": "Submit"
}
],
"children": []
},
{
"type": "text",
"content": "\n"
},
{
"type": "element",
"tagName": "input",
"attributes": [
{
"key": "type",
"value": "reset"
}
],
"children": []
},
{
"type": "text",
"content": "\n"
}
]
}
]
TL;DR: only retain forms and any of its children.
First of all, this input looks like very incomplete, it may be an array or an object. If I assume it's an array of objects, then I can use jsonpath to access any of the values.
var jp = require('jsonpath');
var formNodes = jp.query(nodes, `$..[?(#.tagName=="form")]`);
You can achive the same using vanila javascript, there was several stackoverflow questions for that. But I found jsonpath and xpath being easier to implement than those.

How to generate a JSON Schema?

I would like to generate a schema from a json object.
var GenerateSchema = require('generate-schema')
var schema = GenerateSchema.json(request.body);
Request.Body
{
"type": "FeatureCollection",
"features": [
{
"type": "Feature",
"geometry": {
"type": "Point",
"coordinates": [
102,
0.5
]
},
"properties": {
"prop0": "value0"
}
},
{
"type": "Feature",
"geometry": {
"type": "LineString",
"coordinates": [
[
102,
0
],
[
103,
1
],
[
104,
0
],
[
105,
1
]
]
},
"properties": {
"prop0": "value0",
"prop1": 0
}
},
{
"type": "Feature",
"geometry": {
"type": "Polygon",
"coordinates": [
[
[
100,
0
],
[
101,
0
],
[
101,
1
],
[
100,
1
],
[
100,
0
]
]
]
},
"properties": {
"prop0": "value0",
"prop1": {
"this": "that"
}
}
}
]
}
Schema generated from the request.body
{
"$id": "http://json-schema.org/draft-04/schema#",
"$schema": "http://json-schema.org/draft-04/schema#",
"title": "Product",
"type": "object",
"properties": {
"type": {
"type": "string"
},
"features": {
"type": "array",
"items": {
"type": "object",
"properties": {
"type": {
"type": "string"
},
"geometry": {
"type": "object",
"properties": {
"type": {
"type": "string"
},
"coordinates": {
"type": "array",
"items": {
"oneOf": [
{
"type": "number"
},
{
"type": "number"
},
{
"type": "number"
},
{
"type": "number"
},
{
"type": "number"
}
],
"type": "array"
}
}
}
},
"properties": {
"type": "object",
"properties": {
"prop0": {
"type": "string"
},
"prop1": {
"type": "object",
"properties": {
"this": {
"type": "string"
}
}
}
}
}
},
"required": [
"type",
"geometry",
"properties"
]
}
}
}
}
Schema validation with Ajv
[
{
keyword: 'type',
dataPath: '.features[0].geometry.coordinates[0]',
schemaPath: '#/properties/features/items/properties/geometry/properties/coordinates/items/type',
params: { type: 'array' },
message: 'should be array'
}
]
Why Ajv detects an issue ?
Assuming that you want coordinates to be either an array of numbers or an array of arrays of numbers then this schema doesn't look right to me:
"coordinates": {
"type": "array",
"items": {
"oneOf": [
{
"type": "number"
},
{
"type": "number"
},
{
"type": "number"
},
{
"type": "number"
},
{
"type": "number"
}
],
"type": "array"
}
}
You would typically use oneOf to choose between different schemas but the oneOf here says:
it can be a number
or a number
or a number
or a number
or a number
But then ultimately it doesn't matter because you have a {"type": "array"} in the end that seems to override everything which would explain why it fails for {"coordinates": [102, 0.5]} because that is not an array of arrays.
It seems that what you're looking for is more along the lines of: (untested)
"coordinates": {
"type": "array",
"items": {
"oneOf": [
{ "type": "number" },
{ "type": "array", "items": { "type": "number"} }
]
}
}
Which reads:
items in a coordinates array can be either "numbers" or "arrays of numbers".
I think that your generate-schema package got this wrong here.
I found my answer with QuickType library

JSON:API Matching Collections with its respective Includes

What exactly is the best practice for matching JSON:API data collections with their respective includes. Considering the following code below....
What if I wanted to loop through each venue and display the Owners full information for each Venue Record. Does JSON:API expect me to just search the include array for the matching Owner Record
find(included,data[$i].relationships.owner.data.id);
Would find() loop through the included array to look for the owner that has the matching id as the collection items owner in the relationships object ?
$(data).each(function(item){
var owner = find(included,'owner', item.relationships.owner.data.id)
})
I have not found a resource that explains this or perhapes I am mis understanding the point of json:api. If someone can explain this or point to a resource that relates to my question. I would appreciate it.
{
"links": {
"self": "http://127.0.0.1/api/venues?include=owner"
},
"data": [
{
"id": "5c5b49188fd33c7a989ba9b6",
"type": "venues",
"attributes": {
"name": "Kreiger - Smith",
"address": "69675 Reilly Vista",
"location": {
"type": "Point",
"coordinates": [
-112.110492,
36.098948
]
},
"events": [
{
"_id": "ad52825a8f4812e92f87b8c6",
"name": "Cool Awesome Event!",
"user": "b3daa77b4c04a9551b8781d0",
"id": "ad52825a8f4812e92f87b8c6"
}
],
"created_at": "2019-02-07T14:27:13.207Z",
"updated_at": "2019-02-07T14:27:13.207Z"
},
"relationships": {
"owner": {
"data": {
"id": "b3daa77b4c04a9551b8781d0",
"type": "users"
}
}
}
},
{
"id": "5c5b49188fd33c7a989ba9b7",
"type": "venues",
"attributes": {
"name": "Oberbrunner Inc",
"address": "1132 Kenyon Stravenue",
"location": {
"type": "Point",
"coordinates": [
-112.110492,
36.098948
]
},
"events": [
{
"_id": "ad52825a8f4812e92f87b8c6",
"name": "Cool Awesome Event!",
"user": "b3daa77b4c04a9551b8781d0",
"id": "ad52825a8f4812e92f87b8c6"
}
],
"created_at": "2019-02-07T14:27:13.207Z",
"updated_at": "2019-02-07T14:27:13.207Z"
},
"relationships": {
"owner": {
"data": {
"id": "b3daa77b4c04a9551b8781d0",
"type": "users"
}
}
}
},
{
"id": "5c5b49188fd33c7a989ba9b8",
"type": "venues",
"attributes": {
"name": "Gibson - Muller",
"address": "8457 Hailie Canyon",
"location": {
"type": "Point",
"coordinates": [
-112.110492,
36.098948
]
},
"events": [
{
"_id": "ad52825a8f4812e92f87b8c6",
"name": "Cool Awesome Event!",
"user": "b3daa77b4c04a9551b8781d0",
"id": "ad52825a8f4812e92f87b8c6"
}
],
"created_at": "2019-02-07T14:27:13.208Z",
"updated_at": "2019-02-07T14:27:13.208Z"
},
"relationships": {
"owner": {
"data": {
"id": "a1881c06eec96db9901c7bbf",
"type": "users"
}
}
}
}
],
"included": [
{
"id": "b3daa77b4c04a9551b8781d0",
"type": "users",
"attributes": {
"username": "killerjohn",
"firstname": "John",
"lastname": "Chapman"
}
},
{
"id": "a1881c06eec96db9901c7bbf",
"type": "users",
"attributes": {
"username": "numerical25",
"firstname": "Billy",
"lastname": "Gordon"
}
}
]
}
This is my best possible solution. But is there a better way ? Seems like alot more coding just to find a collections associated included data
axios.get('http://127.0.0.1:3000/api/venues?include=owner').then(function(response) {
var venues = response.data.data;
var data = response.data;
for(x in venues) {
var owner = data.included.find(function(element) {
if(element.id == venues[x].relationships.owner.data.id) {
return element;
}
});
}
});

Resources