I would like to generate a schema from a json object.
var GenerateSchema = require('generate-schema')
var schema = GenerateSchema.json(request.body);
Request.Body
{
"type": "FeatureCollection",
"features": [
{
"type": "Feature",
"geometry": {
"type": "Point",
"coordinates": [
102,
0.5
]
},
"properties": {
"prop0": "value0"
}
},
{
"type": "Feature",
"geometry": {
"type": "LineString",
"coordinates": [
[
102,
0
],
[
103,
1
],
[
104,
0
],
[
105,
1
]
]
},
"properties": {
"prop0": "value0",
"prop1": 0
}
},
{
"type": "Feature",
"geometry": {
"type": "Polygon",
"coordinates": [
[
[
100,
0
],
[
101,
0
],
[
101,
1
],
[
100,
1
],
[
100,
0
]
]
]
},
"properties": {
"prop0": "value0",
"prop1": {
"this": "that"
}
}
}
]
}
Schema generated from the request.body
{
"$id": "http://json-schema.org/draft-04/schema#",
"$schema": "http://json-schema.org/draft-04/schema#",
"title": "Product",
"type": "object",
"properties": {
"type": {
"type": "string"
},
"features": {
"type": "array",
"items": {
"type": "object",
"properties": {
"type": {
"type": "string"
},
"geometry": {
"type": "object",
"properties": {
"type": {
"type": "string"
},
"coordinates": {
"type": "array",
"items": {
"oneOf": [
{
"type": "number"
},
{
"type": "number"
},
{
"type": "number"
},
{
"type": "number"
},
{
"type": "number"
}
],
"type": "array"
}
}
}
},
"properties": {
"type": "object",
"properties": {
"prop0": {
"type": "string"
},
"prop1": {
"type": "object",
"properties": {
"this": {
"type": "string"
}
}
}
}
}
},
"required": [
"type",
"geometry",
"properties"
]
}
}
}
}
Schema validation with Ajv
[
{
keyword: 'type',
dataPath: '.features[0].geometry.coordinates[0]',
schemaPath: '#/properties/features/items/properties/geometry/properties/coordinates/items/type',
params: { type: 'array' },
message: 'should be array'
}
]
Why Ajv detects an issue ?
Assuming that you want coordinates to be either an array of numbers or an array of arrays of numbers then this schema doesn't look right to me:
"coordinates": {
"type": "array",
"items": {
"oneOf": [
{
"type": "number"
},
{
"type": "number"
},
{
"type": "number"
},
{
"type": "number"
},
{
"type": "number"
}
],
"type": "array"
}
}
You would typically use oneOf to choose between different schemas but the oneOf here says:
it can be a number
or a number
or a number
or a number
or a number
But then ultimately it doesn't matter because you have a {"type": "array"} in the end that seems to override everything which would explain why it fails for {"coordinates": [102, 0.5]} because that is not an array of arrays.
It seems that what you're looking for is more along the lines of: (untested)
"coordinates": {
"type": "array",
"items": {
"oneOf": [
{ "type": "number" },
{ "type": "array", "items": { "type": "number"} }
]
}
}
Which reads:
items in a coordinates array can be either "numbers" or "arrays of numbers".
I think that your generate-schema package got this wrong here.
I found my answer with QuickType library
Related
I'm trying to get "id" value set to a variable by parsing the below output sample. The REST API call will return multiple values as shown below and I'm interested in only getting the "id" value for the particular name that users has provided/set as input in the workflow earlier either by a parameter value or by initializing a variable. How do I do this value extraction in azure logic app?
Any help much appreciated.
[
{
"id": 1,
"name": "xyz-List",
"data": {
"urls": [
"*.test1.com",
"*.test2.com"
],
"type": "exact"
},
"modify_by": "admin#xyz.com",
"modify_time": "2022-06-29T21:05:27.000Z",
"modify_type": "Created",
"pending": 0
},
{
"id": 2,
"name": "abc-List",
"data": {
"urls": [
"www.mytesting.com"
],
"type": "exact"
},
"modify_by": "admin#xyz.com",
"modify_time": "2022-06-29T21:05:27.000Z",
"modify_type": "Created",
"pending": 0
},
{
"id": 3,
"name": "azure-list",
"data": {
"type": "exact",
"urls": [
"www.xyz.com",
"www.azure-test.com"
],
"json_version": 2
},
"modify_by": "admin#xyz.com",
"modify_time": "2022-09-26T01:25:20.000Z",
"modify_type": "Edited",
"pending": 0
}
]
I have reproduced from my end and could able to make it work by parsing your REST API call value. To iterate through the Parsed JSON I have used a for-each loop and extracted the Id using the below expression and set its value to a variable.
#items('For_each')['id']
Below is the complete flow of my logic app
RESULTS:
To reproduce the same in your logic app you can use the below code view that worked for me.
{
"definition": {
"$schema": "https://schema.management.azure.com/providers/Microsoft.Logic/schemas/2016-06-01/workflowdefinition.json#",
"actions": {
"Compose": {
"inputs": [
{
"data": {
"type": "exact",
"urls": [
"*.test1.com",
"*.test2.com"
]
},
"id": 1,
"modify_by": "admin#xyz.com",
"modify_time": "2022-06-29T21:05:27.000Z",
"modify_type": "Created",
"name": "xyz-List",
"pending": 0
},
{
"data": {
"type": "exact",
"urls": [
"www.mytesting.com"
]
},
"id": 2,
"modify_by": "admin#xyz.com",
"modify_time": "2022-06-29T21:05:27.000Z",
"modify_type": "Created",
"name": "abc-List",
"pending": 0
},
{
"data": {
"json_version": 2,
"type": "exact",
"urls": [
"www.xyz.com",
"www.azure-test.com"
]
},
"id": 3,
"modify_by": "admin#xyz.com",
"modify_time": "2022-09-26T01:25:20.000Z",
"modify_type": "Edited",
"name": "azure-list",
"pending": 0
}
],
"runAfter": {
"Initialize_variable": [
"Succeeded"
]
},
"type": "Compose"
},
"For_each": {
"actions": {
"Set_variable": {
"inputs": {
"name": "Id",
"value": "#items('For_each')['id']"
},
"runAfter": {},
"type": "SetVariable"
}
},
"foreach": "#body('Parse_JSON')",
"runAfter": {
"Parse_JSON": [
"Succeeded"
]
},
"type": "Foreach"
},
"Initialize_variable": {
"inputs": {
"variables": [
{
"name": "Id",
"type": "integer"
}
]
},
"runAfter": {},
"type": "InitializeVariable"
},
"Parse_JSON": {
"inputs": {
"content": "#outputs('Compose')",
"schema": {
"items": {
"properties": {
"data": {
"properties": {
"type": {
"type": "string"
},
"urls": {
"items": {
"type": "string"
},
"type": "array"
}
},
"type": "object"
},
"id": {
"type": "integer"
},
"modify_by": {
"type": "string"
},
"modify_time": {
"type": "string"
},
"modify_type": {
"type": "string"
},
"name": {
"type": "string"
},
"pending": {
"type": "integer"
}
},
"required": [
"id",
"name",
"data",
"modify_by",
"modify_time",
"modify_type",
"pending"
],
"type": "object"
},
"type": "array"
}
},
"runAfter": {
"Compose": [
"Succeeded"
]
},
"type": "ParseJson"
}
},
"contentVersion": "1.0.0.0",
"outputs": {},
"parameters": {},
"triggers": {
"manual": {
"inputs": {
"schema": {}
},
"kind": "Http",
"type": "Request"
}
}
},
"parameters": {}
}
Load this into your tenant. You can use basic expressions with a condition to get your result ...
{
"definition": {
"$schema": "https://schema.management.azure.com/providers/Microsoft.Logic/schemas/2016-06-01/workflowdefinition.json#",
"actions": {
"For_Each_Array_Item": {
"actions": {
"Condition": {
"actions": {
"Set_ID": {
"inputs": {
"name": "ID",
"value": "#item()['id']"
},
"runAfter": {},
"type": "SetVariable"
}
},
"expression": {
"and": [
{
"equals": [
"#item()['name']",
"abc-List"
]
}
]
},
"runAfter": {},
"type": "If"
}
},
"foreach": "#variables('Array Data')",
"runAfter": {
"Initialize_ID": [
"Succeeded"
]
},
"type": "Foreach"
},
"Initialize_Array_Data": {
"inputs": {
"variables": [
{
"name": "Array Data",
"type": "array",
"value": [
{
"data": {
"type": "exact",
"urls": [
"*.test1.com",
"*.test2.com"
]
},
"id": 1,
"modify_by": "admin#xyz.com",
"modify_time": "2022-06-29T21:05:27.000Z",
"modify_type": "Created",
"name": "xyz-List",
"pending": 0
},
{
"data": {
"type": "exact",
"urls": [
"www.mytesting.com"
]
},
"id": 2,
"modify_by": "admin#xyz.com",
"modify_time": "2022-06-29T21:05:27.000Z",
"modify_type": "Created",
"name": "abc-List",
"pending": 0
},
{
"data": {
"json_version": 2,
"type": "exact",
"urls": [
"www.xyz.com",
"www.azure-test.com"
]
},
"id": 3,
"modify_by": "admin#xyz.com",
"modify_time": "2022-09-26T01:25:20.000Z",
"modify_type": "Edited",
"name": "azure-list",
"pending": 0
}
]
}
]
},
"runAfter": {},
"type": "InitializeVariable"
},
"Initialize_ID": {
"inputs": {
"variables": [
{
"name": "ID",
"type": "integer"
}
]
},
"runAfter": {
"Initialize_Array_Data": [
"Succeeded"
]
},
"type": "InitializeVariable"
}
},
"contentVersion": "1.0.0.0",
"outputs": {},
"parameters": {
"ParameterTest1": {
"defaultValue": "\"\"",
"type": "String"
}
},
"triggers": {
"manual": {
"inputs": {
"method": "GET",
"schema": {}
},
"kind": "Http",
"type": "Request"
}
}
},
"parameters": {}
}
My example looks for the name abc-List and if it finds it, it sets the ID variable to be the associated ID of that record it found the name against.
I have a json feed in the below format. I need to update the data in NoSQL collection having a different schema as shown below. Using Azure data factory how can I transform input json schema to target schema?
Since the currentValue can be of different data type(array, number, complex type, string etc) for each record, Azure Data flow task is giving null value for 'Derived Column' schema modifier as well as 'Flatten' formatter.
Input Json
[
{
"type": "UPDATE",
"key": { "id": "112710876" },
"doc": [
{
"property": "org.numberOfEmployees",
"currentValue": [
{
"value": 2256,
"scope": "Consolidated"
},
{
"value": 516,
"scope": "Individual"
}
]
}
]
},
{
"type": "UPDATE",
"key": { "id": "081243215" },
"doc": [
{
"property": "org.startDate",
"currentValue": "1979-09-14T06:08:51Z"
}
]
},
{
"type": "UPDATE",
"key": { "id": "081243216" },
"doc": [
{
"property": "org.employeeCount",
"currentValue": "20000"
}
]
},
{
"type": "UPDATE",
"key": { "id": "081243216" },
"doc": [
{
"property": "org.headOffice",
"currentValue": {
"city": "NY",
"country": "US"
}
}
]
}
]
Target Schema
{
"$schema": "http://json-schema.org/draft-04/schema#",
"type": "object",
"properties": {
"id": {
"type": "integer"
},
"startDate": {
"type": "string"
},
"numberOfEmployees": {
"type": "array",
"items": [
{
"type": "object",
"properties": {
"value": {
"type": "integer"
},
"scope": {
"type": "string"
}
}
}
]
},
"employeeCount": {
"type": "integer"
},
"headOffice": {
"type": "object",
"properties": {
"city": {
"type": "string"
},
"country": {
"type": "string"
}
}
}
}
}
Is there any way I can stringify currentValue in data flow task, if there is no direct way to transform the input data to target schema?
Any help would be appreciated.
You can stringify it in a derived column using "toString()" or you can wait for our new Stringify transformation in October :)
when a http request is received. i need to insert the array value into a table. in my case the array is response required.
I used these things: when a http request is received and i used parse JSON and i used for each loop then inside the for-each i used insert entity but it's throwing an error. if anybody knows how to implement let me know the answer.
i used expression for RRT as : body('Parse_JSON')['ResponseRequired'][0]['ResponseRequiredType']
json schema
{
"$schema": "http://json-schema.org/draft-04/schema#",
"properties": {
"AssetErrorCode": {
"type": "string"
},
"AssetErrorDesc": {
"type": "string"
},
"AssetId": {
"type": "integer"
},
"CustomerId": {
"type": "integer"
},
"ResponseRequired": {
"items": [
{
"properties": {
"ResponseRequiredAdditionalData": {
"type": "string"
},
"ResponseRequiredAddress": {
"type": "string"
},
"ResponseRequiredFrequency": {
"type": "string"
},
"ResponseRequiredType": {
"type": "integer"
}
},
"required": [
"ResponseRequiredType",
"ResponseRequiredFrequency",
"ResponseRequiredAddress",
"ResponseRequiredAdditionalData"
],
"type": "object"
},
{
"properties": {
"ResponseRequiredAdditionalData": {
"type": "string"
},
"ResponseRequiredAddress": {
"type": "string"
},
"ResponseRequiredFrequency": {
"type": "string"
},
"ResponseRequiredType": {
"type": "integer"
}
},
"required": [
"ResponseRequiredType",
"ResponseRequiredFrequency",
"ResponseRequiredAddress",
"ResponseRequiredAdditionalData"
],
"type": "object"
},
{
"properties": {
"ResponseRequiredAdditionalData": {
"type": "string"
},
"ResponseRequiredAddress": {
"type": "string"
},
"ResponseRequiredFrequency": {
"type": "string"
},
"ResponseRequiredType": {
"type": "integer"
}
},
"required": [
"ResponseRequiredType",
"ResponseRequiredFrequency",
"ResponseRequiredAddress",
"ResponseRequiredAdditionalData"
],
"type": "object"
}
],
"type": "array"
},
"ServiceKey": {
"type": "string"
}
},
"required": [
"CustomerId",
"ServiceKey",
"AssetId",
"AssetErrorCode",
"AssetErrorDesc",
"ResponseRequired"
],
"type": "object"
}
used this expression : items('For_each')?['ResponseRequiredAddress']
solved the issue
I have a file with list of json blocks and am stuck with processing/Reading them in U-Sql and writing to a text file.
{
"id": "0001",
"type": "donut",
"name": "Cake",
"ppu": 0.55,
"batters":
{
"batter":
[
{ "id": "1001", "type": "Regular" },
{ "id": "1002", "type": "Chocolate" },
{ "id": "1003", "type": "Blueberry" },
{ "id": "1004", "type": "Devil's Food" }
]
},
"topping":
[
{ "id": "5001", "type": "None" },
{ "id": "5002", "type": "Glazed" },
{ "id": "5005", "type": "Sugar" },
{ "id": "5007", "type": "Powdered Sugar" },
{ "id": "5006", "type": "Chocolate with Sprinkles" },
{ "id": "5003", "type": "Chocolate" },
{ "id": "5004", "type": "Maple" }
]
}
{
"id": "0002",
"type": "nut",
"name": "ake",
"ppu": 1.55,
"batters":
{
"batter":
[
{ "id": "1001", "type": "Regular" },
{ "id": "1002", "type": "Chocolate" },
{ "id": "1003", "type": "Blueberry" },
{ "id": "1004", "type": "Devil's Food" }
]
},
"topping":
[
{ "id": "5001", "type": "None" },
{ "id": "5002", "type": "Glazed" },
{ "id": "5005", "type": "Sugar" },
{ "id": "5007", "type": "Powdered Sugar" },
{ "id": "5006", "type": "Chocolate with Sprinkles" },
{ "id": "5003", "type": "Chocolate" },
{ "id": "5004", "type": "Maple" }
]
}
{
"id": "0003",
"type": "test",
"name": "ake",
"ppu": 1.55,
"batters":
{
"batter":
[
]
},
"topping":
[
{ "id": "5003", "type": "Chocolate" },
{ "id": "5004", "type": "Maple" }
]
}
can someone help me on this.
REFERENCE ASSEMBLY [Newtonsoft.Json];
REFERENCE ASSEMBLY [Microsoft.Analytics.Samples.Formats];
DECLARE #Full_Path string = #"C:\Users\test\Desktop\File\JsonTest.json";
USING [Microsoft.Analytics.Samples.Formats];
#RawExtract =
EXTRACT
[RawString] string
FROM
#Full_Path
USING
Extractors.Text(delimiter:'\n', quoting : false);
#ParsedJSONLines =
SELECT JsonFunctions.JsonTuple([RawString]) AS JSONLine
FROM #RawExtract;
#StagedData =
SELECT
JSONLine["id"] AS Id,
JSONLine["name"] AS Name,
JSONLine["type"] AS Type,
JSONLine["ppu"] AS PPU,
JSONLine["batters"] AS Batter
FROM
#ParsedJSONLines;
DECLARE #Output_Path string = #"C:\Users\Test\Desktop\File\Test2.csv";
OUTPUT #StagedData
TO #Output_Path
USING Outputters.Csv();
Am receiving error while evaluating expression .
Error while evaluating expression JsonFunctions.JsonTuple(RawString)
You cant use an Text Extraxtor to extract Json, unless you use Json Lines.
Using the extractor will split the json and you will get the error.
Use JsonExtractor instead of Text extractor.
https://github.com/Azure/usql/blob/master/Examples/DataFormats/Microsoft.Analytics.Samples.Formats/Json/JsonExtractor.cs
Problem : I have created mapping and its working fine in elasticsearch
1.7.1 but after updating to 2.1.1 it will give me exception
EXCEPTION
response: '{"error":{"root_cause":[{"type":"mapper_parsing_exception","reason"
:"analyzer on field [_all] must be set when search_analyzer is set"}],"type":"ma
pper_parsing_exception","reason":"Failed to parse mapping [movie]: analyzer on f
ield [_all] must be set when search_analyzer is set","caused_by":{"type":"mapper
_parsing_exception","reason":"analyzer on field [_all] must be set when search_a
nalyzer is set"}},"status":400}',
toString: [Function],
toJSON: [Function] }
{
"settings": {
"number_of_shards": 1,
"number_of_replicas": 0,
"analysis": {
"filter": {
"nGram_filter": {
"type": "nGram",
"min_gram": 2,
"max_gram": 20,
"token_chars": [
"letter",
"digit",
"punctuation",
"symbol"
]
}
},
"analyzer": {
"nGram_analyzer": {
"type": "custom",
"tokenizer": "whitespace",
"filter": [
"lowercase",
"asciifolding",
"nGram_filter"
]
},
"whitespace_analyzer": {
"type": "custom",
"tokenizer": "whitespace",
"filter": [
"lowercase",
"asciifolding"
]
}
}
}
},
"mappings": {
"movie": {
"_all": {
"index_analyzer": "nGram_analyzer",
"search_analyzer": "whitespace_analyzer"
},
"properties": {
"movieName": {
"type": "string",
"index": "not_analyzed"
},
"movieYear": {
"type": "double"
},
"imageUrl": {
"type": "string"
},
"genre": {
"type": "string"
},
"director": {
"type": "string"
},
"producer": {
"type": "string"
},
"cast": {
"type": "String"
},
"writer": {
"type": "string"
},
"synopsis": {
"type": "string"
},
"rating": {
"type": "double"
},
"price": {
"type": "double"
},
"format": {
"type": "string"
},
"offer": {
"type": "double"
},
"offerString": {
"type": "string"
},
"language": {
"type": "string"
}
}
}
}
}
The error is quite clear if you ask me, you need to specify analyzer for _all in your movie mapping. Setting index_analyzer was removed in Elasticsearch 2.0.
"_all": {
"analyzer": "nGram_analyzer",
"search_analyzer": "whitespace_analyzer"
},