I have a json feed in the below format. I need to update the data in NoSQL collection having a different schema as shown below. Using Azure data factory how can I transform input json schema to target schema?
Since the currentValue can be of different data type(array, number, complex type, string etc) for each record, Azure Data flow task is giving null value for 'Derived Column' schema modifier as well as 'Flatten' formatter.
Input Json
[
{
"type": "UPDATE",
"key": { "id": "112710876" },
"doc": [
{
"property": "org.numberOfEmployees",
"currentValue": [
{
"value": 2256,
"scope": "Consolidated"
},
{
"value": 516,
"scope": "Individual"
}
]
}
]
},
{
"type": "UPDATE",
"key": { "id": "081243215" },
"doc": [
{
"property": "org.startDate",
"currentValue": "1979-09-14T06:08:51Z"
}
]
},
{
"type": "UPDATE",
"key": { "id": "081243216" },
"doc": [
{
"property": "org.employeeCount",
"currentValue": "20000"
}
]
},
{
"type": "UPDATE",
"key": { "id": "081243216" },
"doc": [
{
"property": "org.headOffice",
"currentValue": {
"city": "NY",
"country": "US"
}
}
]
}
]
Target Schema
{
"$schema": "http://json-schema.org/draft-04/schema#",
"type": "object",
"properties": {
"id": {
"type": "integer"
},
"startDate": {
"type": "string"
},
"numberOfEmployees": {
"type": "array",
"items": [
{
"type": "object",
"properties": {
"value": {
"type": "integer"
},
"scope": {
"type": "string"
}
}
}
]
},
"employeeCount": {
"type": "integer"
},
"headOffice": {
"type": "object",
"properties": {
"city": {
"type": "string"
},
"country": {
"type": "string"
}
}
}
}
}
Is there any way I can stringify currentValue in data flow task, if there is no direct way to transform the input data to target schema?
Any help would be appreciated.
You can stringify it in a derived column using "toString()" or you can wait for our new Stringify transformation in October :)
Related
I'm trying to access the values in JSON output that I received from two (2) Graph API calls, but each time I try to use them I get this error:
ExpressionEvaluationFailed. The execution of template action 'For_each' failed: the result of the evaluation of 'foreach' expression '#body('Parse_JSON_-_Managed_Devices')?['body']?['value']' is of type 'Null'. The result must be a valid array.
I have validated that my Graph API calls are properly formatted, and output is exactly what I'm expecting to be returned from both API calls. I get this error every time I either try to access the parsed JSON in an Azure Runbook or any other Logic App tasks.
I would love to know if someone has experienced this before and how it can be solved?
Graph query: https://graph.microsoft.com/beta/deviceManagement/managedDevices/?$select=id,userId,deviceName,userDisplayName,azureADDeviceId,managedDeviceName,emailAddress&$filter=operatingSystem eq 'windows'
JSON schema for managed devices
{
"properties": {
"body": {
"properties": {
"##odata.context": {
"type": "string"
},
"##odata.count": {
"type": "integer"
},
"##odata.nextLink": {
"type": "string"
},
"value": {
"items": {
"properties": {
"azureADDeviceId": {
"type": "string"
},
"deviceName": {
"type": "string"
},
"emailAddress": {
"type": "string"
},
"id": {
"type": "string"
},
"managedDeviceName": {
"type": "string"
},
"userDisplayName": {
"type": "string"
},
"userId": {
"type": "string"
}
},
"required": [
"id",
"userId",
"deviceName",
"userDisplayName",
"azureADDeviceId",
"managedDeviceName",
"emailAddress"
],
"type": "object"
},
"type": "array"
}
},
"type": "object"
}
},
"type": "object"
}
Graph query: https://graph.microsoft.com/beta/users?$select=id,displayName,mail,officeLocation&$filter=accountEnabled eq true
JSON schema used for users
Graph query: https://graph.microsoft.com/beta/users?$select=id,displayName,mail,officeLocation&$filter=accountEnabled eq true
{
"properties": {
"body": {
"properties": {
"##odata.context": {
"type": "string"
},
"##odata.nextLink": {
"type": "string"
},
"value": {
"items": {
"properties": {
"displayName": {
"type": "string"
},
"id": {
"type": "string"
},
"mail": {
"type": "string"
},
"officeLocation": {
"type": "string"
}
},
"required": [
"id",
"displayName",
"mail",
"officeLocation"
],
"type": "object"
},
"type": "array"
}
},
"type": "object"
}
},
"type": "object"
}
I would like to generate a schema from a json object.
var GenerateSchema = require('generate-schema')
var schema = GenerateSchema.json(request.body);
Request.Body
{
"type": "FeatureCollection",
"features": [
{
"type": "Feature",
"geometry": {
"type": "Point",
"coordinates": [
102,
0.5
]
},
"properties": {
"prop0": "value0"
}
},
{
"type": "Feature",
"geometry": {
"type": "LineString",
"coordinates": [
[
102,
0
],
[
103,
1
],
[
104,
0
],
[
105,
1
]
]
},
"properties": {
"prop0": "value0",
"prop1": 0
}
},
{
"type": "Feature",
"geometry": {
"type": "Polygon",
"coordinates": [
[
[
100,
0
],
[
101,
0
],
[
101,
1
],
[
100,
1
],
[
100,
0
]
]
]
},
"properties": {
"prop0": "value0",
"prop1": {
"this": "that"
}
}
}
]
}
Schema generated from the request.body
{
"$id": "http://json-schema.org/draft-04/schema#",
"$schema": "http://json-schema.org/draft-04/schema#",
"title": "Product",
"type": "object",
"properties": {
"type": {
"type": "string"
},
"features": {
"type": "array",
"items": {
"type": "object",
"properties": {
"type": {
"type": "string"
},
"geometry": {
"type": "object",
"properties": {
"type": {
"type": "string"
},
"coordinates": {
"type": "array",
"items": {
"oneOf": [
{
"type": "number"
},
{
"type": "number"
},
{
"type": "number"
},
{
"type": "number"
},
{
"type": "number"
}
],
"type": "array"
}
}
}
},
"properties": {
"type": "object",
"properties": {
"prop0": {
"type": "string"
},
"prop1": {
"type": "object",
"properties": {
"this": {
"type": "string"
}
}
}
}
}
},
"required": [
"type",
"geometry",
"properties"
]
}
}
}
}
Schema validation with Ajv
[
{
keyword: 'type',
dataPath: '.features[0].geometry.coordinates[0]',
schemaPath: '#/properties/features/items/properties/geometry/properties/coordinates/items/type',
params: { type: 'array' },
message: 'should be array'
}
]
Why Ajv detects an issue ?
Assuming that you want coordinates to be either an array of numbers or an array of arrays of numbers then this schema doesn't look right to me:
"coordinates": {
"type": "array",
"items": {
"oneOf": [
{
"type": "number"
},
{
"type": "number"
},
{
"type": "number"
},
{
"type": "number"
},
{
"type": "number"
}
],
"type": "array"
}
}
You would typically use oneOf to choose between different schemas but the oneOf here says:
it can be a number
or a number
or a number
or a number
or a number
But then ultimately it doesn't matter because you have a {"type": "array"} in the end that seems to override everything which would explain why it fails for {"coordinates": [102, 0.5]} because that is not an array of arrays.
It seems that what you're looking for is more along the lines of: (untested)
"coordinates": {
"type": "array",
"items": {
"oneOf": [
{ "type": "number" },
{ "type": "array", "items": { "type": "number"} }
]
}
}
Which reads:
items in a coordinates array can be either "numbers" or "arrays of numbers".
I think that your generate-schema package got this wrong here.
I found my answer with QuickType library
when a http request is received. i need to insert the array value into a table. in my case the array is response required.
I used these things: when a http request is received and i used parse JSON and i used for each loop then inside the for-each i used insert entity but it's throwing an error. if anybody knows how to implement let me know the answer.
i used expression for RRT as : body('Parse_JSON')['ResponseRequired'][0]['ResponseRequiredType']
json schema
{
"$schema": "http://json-schema.org/draft-04/schema#",
"properties": {
"AssetErrorCode": {
"type": "string"
},
"AssetErrorDesc": {
"type": "string"
},
"AssetId": {
"type": "integer"
},
"CustomerId": {
"type": "integer"
},
"ResponseRequired": {
"items": [
{
"properties": {
"ResponseRequiredAdditionalData": {
"type": "string"
},
"ResponseRequiredAddress": {
"type": "string"
},
"ResponseRequiredFrequency": {
"type": "string"
},
"ResponseRequiredType": {
"type": "integer"
}
},
"required": [
"ResponseRequiredType",
"ResponseRequiredFrequency",
"ResponseRequiredAddress",
"ResponseRequiredAdditionalData"
],
"type": "object"
},
{
"properties": {
"ResponseRequiredAdditionalData": {
"type": "string"
},
"ResponseRequiredAddress": {
"type": "string"
},
"ResponseRequiredFrequency": {
"type": "string"
},
"ResponseRequiredType": {
"type": "integer"
}
},
"required": [
"ResponseRequiredType",
"ResponseRequiredFrequency",
"ResponseRequiredAddress",
"ResponseRequiredAdditionalData"
],
"type": "object"
},
{
"properties": {
"ResponseRequiredAdditionalData": {
"type": "string"
},
"ResponseRequiredAddress": {
"type": "string"
},
"ResponseRequiredFrequency": {
"type": "string"
},
"ResponseRequiredType": {
"type": "integer"
}
},
"required": [
"ResponseRequiredType",
"ResponseRequiredFrequency",
"ResponseRequiredAddress",
"ResponseRequiredAdditionalData"
],
"type": "object"
}
],
"type": "array"
},
"ServiceKey": {
"type": "string"
}
},
"required": [
"CustomerId",
"ServiceKey",
"AssetId",
"AssetErrorCode",
"AssetErrorDesc",
"ResponseRequired"
],
"type": "object"
}
used this expression : items('For_each')?['ResponseRequiredAddress']
solved the issue
I'd like to write Avro records with Spark 2.2.0 where the schema has a
namespace and some nested records inside.
{
"type": "record",
"name": "userInfo",
"namespace": "my.example",
"fields": [
{
"name": "username",
"type": "string"
},
{
"name": "address",
"type": [
"null",
{
"type": "record",
"name": "address",
"fields": [
{
"name": "street",
"type": [
"null",
"string"
],
"default": null
},
{
"name": "box",
"type": [
"null",
{
"type": "record",
"name": "box",
"fields": [
{
"name": "id",
"type": "string"
}
]
}
],
"default": null
}
]
}
],
"default": null
}
]
}
I need to write out records like:
{
"username": "tom taylor",
"address": {
"my.example.address": {
"street": {
"string": "unknown"
},
"box": {
"my.example.box": {
"id": "id1"
}
}
}
}
}
However when I read some Avro GenericRecords with spark-avro (4.0.0) and do some conversion (e.g: I'm adding a namespace) and would want to write out the output:
df.foreach {
...
.write
.option("recordName", "userInfo")
.option("recordNamespace", "my.example")
...
}
then in the resulting GenericRecord the namespace of the nested records will contain the "full path" to that element from the parents.
I.e instead of my.example.box I get my.example.address.box . When I try to read this record back with the schema of course there's a mismatch.
What is the right way to define the namespace for the writer?
I have a file with list of json blocks and am stuck with processing/Reading them in U-Sql and writing to a text file.
{
"id": "0001",
"type": "donut",
"name": "Cake",
"ppu": 0.55,
"batters":
{
"batter":
[
{ "id": "1001", "type": "Regular" },
{ "id": "1002", "type": "Chocolate" },
{ "id": "1003", "type": "Blueberry" },
{ "id": "1004", "type": "Devil's Food" }
]
},
"topping":
[
{ "id": "5001", "type": "None" },
{ "id": "5002", "type": "Glazed" },
{ "id": "5005", "type": "Sugar" },
{ "id": "5007", "type": "Powdered Sugar" },
{ "id": "5006", "type": "Chocolate with Sprinkles" },
{ "id": "5003", "type": "Chocolate" },
{ "id": "5004", "type": "Maple" }
]
}
{
"id": "0002",
"type": "nut",
"name": "ake",
"ppu": 1.55,
"batters":
{
"batter":
[
{ "id": "1001", "type": "Regular" },
{ "id": "1002", "type": "Chocolate" },
{ "id": "1003", "type": "Blueberry" },
{ "id": "1004", "type": "Devil's Food" }
]
},
"topping":
[
{ "id": "5001", "type": "None" },
{ "id": "5002", "type": "Glazed" },
{ "id": "5005", "type": "Sugar" },
{ "id": "5007", "type": "Powdered Sugar" },
{ "id": "5006", "type": "Chocolate with Sprinkles" },
{ "id": "5003", "type": "Chocolate" },
{ "id": "5004", "type": "Maple" }
]
}
{
"id": "0003",
"type": "test",
"name": "ake",
"ppu": 1.55,
"batters":
{
"batter":
[
]
},
"topping":
[
{ "id": "5003", "type": "Chocolate" },
{ "id": "5004", "type": "Maple" }
]
}
can someone help me on this.
REFERENCE ASSEMBLY [Newtonsoft.Json];
REFERENCE ASSEMBLY [Microsoft.Analytics.Samples.Formats];
DECLARE #Full_Path string = #"C:\Users\test\Desktop\File\JsonTest.json";
USING [Microsoft.Analytics.Samples.Formats];
#RawExtract =
EXTRACT
[RawString] string
FROM
#Full_Path
USING
Extractors.Text(delimiter:'\n', quoting : false);
#ParsedJSONLines =
SELECT JsonFunctions.JsonTuple([RawString]) AS JSONLine
FROM #RawExtract;
#StagedData =
SELECT
JSONLine["id"] AS Id,
JSONLine["name"] AS Name,
JSONLine["type"] AS Type,
JSONLine["ppu"] AS PPU,
JSONLine["batters"] AS Batter
FROM
#ParsedJSONLines;
DECLARE #Output_Path string = #"C:\Users\Test\Desktop\File\Test2.csv";
OUTPUT #StagedData
TO #Output_Path
USING Outputters.Csv();
Am receiving error while evaluating expression .
Error while evaluating expression JsonFunctions.JsonTuple(RawString)
You cant use an Text Extraxtor to extract Json, unless you use Json Lines.
Using the extractor will split the json and you will get the error.
Use JsonExtractor instead of Text extractor.
https://github.com/Azure/usql/blob/master/Examples/DataFormats/Microsoft.Analytics.Samples.Formats/Json/JsonExtractor.cs