Group dictionaries according to its key - python-3.x

I would like to group the dictionaries returned from api inside for loop by its given key.Values for type and query are dynamic, depends from request params from user
def process_item(self, item, spider):
print(f"item {item}")
prints
{
"type": "sponsored_product",
"query": "leather wallet",
"asin": "B092S9S3G3"
},
{
"type": "search_product",
"query": "leather wallet",
"asin": "B092S91234"
},
{
"type": "search_product",
"query": "mens wallet",
"asin": "B092S9789"
}
So what I am trying to achieve is to group them and save in to json file as list of dicts
[
"mens wallet": [
{
"search_product": [
{
"asin": "asin number123"
},
{
"asin": "asin number567"
},
],
"sponsored_product": [
{
"asin": "asin number123"
},
{
"asin": "asin number567"
},
]
}
],
"leather wallet": [
{
"search_product": [
{
"asin": "asin number123"
},
{
"asin": "asin number567"
},
],
"sponsored_product": [
{
"asin": "asin number123"
},
{
"asin": "asin number567"
},
]
}
],
]
I was thinking of saving it first to the file and perform grouping later but I cant make it work.Do you have Idea on how to group them before saving to a json file?
for k, v in item.items():
with open("list1.json", "a", encoding="utf-8") as f:
json.dump(v, f, ensure_ascii=False, indent=4)
UPDATE:
So i tried the following code
import itertools
import json
buck = [
{"type": "sponsored_product", "query": "leather wallet", "asin": "B092S9S3G3"},
{"type": "search_product", "query": "leather wallet", "asin": "B092S91234"},
{"type": "search_product", "query": "mens wallet", "asin": "B092S9789"},
]
def key_func(k):
return k["query"]
def key_func2(k):
return k["type"]
info = sorted(buck, key=key_func)
groups = []
for key, group in itertools.groupby(buck, key_func):
# print(list(group))
groups.append({key: list(group)})
print(groups)
And with output
[
{
'leather wallet': [
{
'type': 'sponsored_product',
'query': 'leather wallet',
'asin': 'B092S9S3G3'
},
{
'type': 'search_product',
'query': 'leather wallet',
'asin': 'B092S91234'
}
]
},
{
'mens wallet': [
{
'type': 'search_product',
'query': 'mens wallet',
'asin': 'B092S9789'
}
]
}
]
What's remaining is to group by type

Related

Missing date data when getting data from Google Analytic 4 (GA4) to create chart with granularity daily?

I'm getting started with Google Analytics 4, and I have a task is creating chart report with custom dimensions. I have a problem when get data from API. It missing date data when getting data from GA4. I want to use this data to build a chart
const {
propertyId,
startDate,
endDate,
} = req.query;
// Request report data.
const analyticsDataClient = await getAnalyticsDataClient(getActiveShop(req));
const response = await analyticsDataClient.runPivotReport({
property: `properties/${propertyId}`,
dimensions: [
{ name: "date" },
{ name: "CUSTOM_DIMENSION_EVENT" },
{ name: "CUSTOM_DIMENSION_EVENT" },
{ name: "CUSTOM_DIMENSION_EVENT" },
],
metrics: [
{ name: "sessions" },
{ name: "activeUsers" },
{ name: "CUSTOM_METRIC_EVENT" },
{ name: "CUSTOM_METRIC_EVENT" },
],
dateRanges: [{ startDate: 2022-12-06, endDate: 2023-01-03 }], // Dateranges
pivots: [
{
fieldNames: ["date"],
limit: 30,
},
{
fieldNames: ["CUSTOM_DIMENSION_EVENT"],
limit: 5,
},
{
fieldNames: ["CUSTOM_DIMENSION_EVENT"],
limit: 5,
},
{
fieldNames: ["CUSTOM_DIMENSION_EVENT"],
limit: 5,
},
],
dimensionFilter: {
filter: {
fieldName: "CUSTOM_METRIC_EVENT", // id
stringFilter: {
matchType: "EXACT",
value: "CUSTOM_METRIC_EVENT_ID",
},
},
},
keepEmptyRows: true,
});
response GA4 API
{
"response": [
{
"pivotHeaders": [
{
"pivotDimensionHeaders": [
{
"dimensionValues": [
{
"value": "20221227", // return this date only
"oneValue": "value"
}
]
}
],
"rowCount": 1
},
{
"pivotDimensionHeaders": [
{
"dimensionValues": [
{
"value": "885c7b0d-bc65-47be-b7df-871947bc5de4",
"oneValue": "value"
}
]
}
],
"rowCount": 1
},
{
"pivotDimensionHeaders": [
{
"dimensionValues": [
{
"value": "New Page For Test GA4",
"oneValue": "value"
}
]
}
],
"rowCount": 1
},
{
"pivotDimensionHeaders": [
{
"dimensionValues": [
{
"value": "index",
"oneValue": "value"
}
]
}
],
"rowCount": 1
}
],
"dimensionHeaders": [
{
"name": "date"
},
{
"name": "customEvent:page_id"
},
{
"name": "customEvent:page_name"
},
{
"name": "customEvent:page_type"
}
],
"metricHeaders": [
{
"name": "sessions",
"type": "TYPE_INTEGER"
},
{
"name": "activeUsers",
"type": "TYPE_INTEGER"
},
{
"name": "customEvent:revenue",
"type": "TYPE_CURRENCY"
}
],
"rows": [
{
"dimensionValues": [
{
"value": "20221227",
"oneValue": "value"
},
{
"value": "885c7b0d-bc65-47be-b7df-871947bc5de4",
"oneValue": "value"
},
{
"value": "New Page For Test GA4",
"oneValue": "value"
},
{
"value": "index",
"oneValue": "value"
}
],
"metricValues": [
{
"value": "7",
"oneValue": "value"
},
{
"value": "7",
"oneValue": "value"
},
{
"value": "0",
"oneValue": "value"
},
{
"value": "8",
"oneValue": "value"
}
]
}
],
"aggregates": [],
"metadata": {
"dataLossFromOtherRow": false,
"schemaRestrictionResponse": {
"activeMetricRestrictions": []
},
"_schemaRestrictionResponse": "schemaRestrictionResponse",
"_currencyCode": "currencyCode",
"timeZone": "America/Los_Angeles",
"_timeZone": "timeZone"
},
"propertyQuota": null,
"kind": "analyticsData#runPivotReport"
},
null,
null
]
}
Although I set dateRanges { startDate: 2022-12-06, endDate: 2023-01-03 } , it only returns 20221227 for me? What's problem here?
If you check runPivotReport in the doucmentation you will find that the response body for a RunPivotReportResponse
Does not contain dates, the api probably assumes you know what dates you requested.
{
"pivotHeaders": [
{
object (PivotHeader)
}
],
"dimensionHeaders": [
{
object (DimensionHeader)
}
],
"metricHeaders": [
{
object (MetricHeader)
}
],
"rows": [
{
object (Row)
}
],
"aggregates": [
{
object (Row)
}
],
"metadata": {
object (ResponseMetaData)
},
"propertyQuota": {
object (PropertyQuota)
},
"kind": string
}

Filter nested result inside a nested object with elasticsearch

I'm trying to filter a nested object and sort by the result, however, I tried some things without success, I'll leave my initial attempt and it works partially, it just filters according to what I have in my search variable, but all the results come of this nested object as it is inside the 'root' which is another nested object
Elastic version: 7.13.0 with NodeJS
using #elastic/elasticsearch official package from npm
let params: RequestParams.Search = {
index: index,
body: {
size: 30,
query: {
bool: {
must: [
{
nested: {
path: "profile",
query: {
bool: {
must: [
{
match: {
"profile.id": profileId,
},
},
],
},
},
},
},
],
filter: [
{
nested: {
path: "profile.following",
ignore_unmapped: true,
query: {
query_string: {
fields: [
"profile.following.name",
"profile.following.username",
],
query: searchWord + "*",
},
},
},
},
],
},
},
},
};
I need it to be this specific 'profile.id' that is passed by parameter in the function, so the result is only 1 profile with N people that it follows
the document is mapped as follows, I left only the fields relevant to the question:
{
"mappings": {
"_doc": {
"properties": {
"id": {
"type": "integer"
},
"phone": {
"type": "text"
},
"profile": {
"type": "nested",
"properties": {
"id": {
"type": "integer"
},
"username": {
"type": "text"
},
"following": {
"type": "nested",
"properties": {
"id": {
"type": "integer"
},
"isAwaitingApproval": {
"type": "boolean"
},
"name": {
"type": "text"
},
"profilePicURL": {
"type": "text"
},
"username": {
"type": "text"
}
}
}
}
}
}
}
}
}
an example of a current result is:
with the following parameters (profileId:141, searchWord: "para" )
{
"res": [
{
"profilePicURL": "localimage",
"name": "donor donor",
"id": 140,
"username": "victorTesteElastic2",
"isAwaitingApproval": false
},
{
"profilePicURL": "localimage",
"name": "para ser seguido",
"id": 142,
"username": "victorprivate",
"isAwaitingApproval": true
}
]
}
the desired result is:
{
"res": [
{
"profilePicURL": "localimage",
"name": "para ser seguido",
"id": 142,
"username": "victorprivate",
"isAwaitingApproval": true
}
]
}
with some more research I got what I needed, I'll leave the answer here in case anyone needs it too
let params: RequestParams.Search = {
index: index,
body: {
size: 30,
query: {
bool: {
must: [
{
nested: {
path: "profile",
query: {
bool: {
must: [
{
match: {
"profile.id": profileId,
},
},
],
},
},
},
},
{
nested: {
path: "profile",
inner_hits: {
name: "profile",
},
query: {
nested: {
path: "profile.following",
inner_hits: {
name: "following",
},
ignore_unmapped: true,
query: {
query_string: {
fields: [
"profile.following.name",
"profile.following.username",
],
query: searchWord + "*",
},
},
},
},
},
},
],
},
},
},
};
I basically put in must what was in the filter, mapped the nested object from above, in this case the profile, and put the tag inner_hits for profile and inner_hits for followings, that's the only way it worked
the answer I need was returned here:
body.hits.hits[0].inner_hits.profile.hits.hits[0].inner_hits.following.hits.hits
below is an example of the answer:
{
"res": [
{
"_index": "donor",
"_type": "_doc",
"_id": "P3VWNnsB4coAEhD-F3fF",
"_nested": {
"field": "profile",
"offset": 0,
"_nested": {
"field": "following",
"offset": 0
}
},
"_score": 1,
"_source": {
"profilePicURL": "localimage",
"name": "donor donor",
"id": 140,
"username": "victorTesteElastic2",
"isAwaitingApproval": false
}
},
{
"_index": "donor",
"_type": "_doc",
"_id": "P3VWNnsB4coAEhD-F3fF",
"_nested": {
"field": "profile",
"offset": 0,
"_nested": {
"field": "following",
"offset": 1
}
},
"_score": 1,
"_source": {
"profilePicURL": "localimage",
"name": "para ser seguido",
"id": 142,
"username": "victorprivate",
"isAwaitingApproval": true
}
}
]
}
the filtered data I really need that have been matched in must is in this array, where I need to iterate and look at _source which is the data that is indexed

Prepare List from Different input Arrays and Objects in Jolt

Hi I am new to JOLT transformation and I am trying to transform some thing like below.
Main goal here is to have a list of objects without making the constant indexing in jolt.
Transformation of different objects to a common list .
Any Help is appreciate .
Data provides here is an example of what I expected.
Input :
{
"CIT": [
{
"name": "name_CIT_1",
"desc": "desc_CIT_1"
},
{
"name": "name_CIT_2",
"desc": "desc_CIT_2"
},
{
"name": "name_CIT_3",
"desc": "desc_CIT_3"
}
],
"BIT": {
"name": "name_BIT",
"desc": "desc_BIT"
},
"NIT": {
"name": "name_NIT",
"desc": "desc_NIT"
},
"KIT": {
"name": "name_KIT",
"desc": "desc_KIT"
}
}
Jolt:
[
{
"operation": "modify-default-beta",
"spec": {
"*": {}
}
},
{
"operation": "modify-overwrite-beta",
"spec": {}
},
{
"operation": "shift",
"spec": {
"CIT": {
"*": {
"name": "CollegeList[0].name",
"desc": "CollegeList[0].desc"
}
},
"BIT": {
"name": "CollegeList[1].name",
"desc": "CollegeList[1].desc"
},
"NIT": {
"name": "CollegeList[2].name",
"desc": "CollegeList[2].desc"
},
"KIT": {
"name": "CollegeList[3].name",
"desc": "CollegeList[3].desc"
}
}
}
]
Output:
{
"CollegeList" : [ {
"name" : [ "name_CIT_1", "name_CIT_2", "name_CIT_3" ],
"desc" : [ "desc_CIT_1", "desc_CIT_2", "desc_CIT_3" ]
}, {
"name" : "name_BIT",
"desc" : "desc_BIT"
}, {
"name" : "name_NIT",
"desc" : "desc_NIT"
}, {
"name" : "name_KIT",
"desc" : "desc_KIT"
} ]
}
Expected Output:
{
"CollegeList": [
{
"name": "name_CIT_1",
"desc": "desc_CIT_1"
},
{
"name": "name_CIT_2",
"desc": "desc_CIT_2"
},
{
"name": "name_CIT_3",
"desc": "desc_CIT_3"
},
{
"name": "name_BIT",
"desc": "desc_BIT"
},
{
"name": "name_NIT",
"desc": "desc_NIT"
},
{
"name": "name_KIT",
"desc": "desc_KIT"
}
]
}
You can use two levels of shift transformations. Indeed, the desired array is obtained within the first level except for the key of the array which is root as default. Then only renaming of the array's key occurs within the second level such as
[
{
"operation": "shift",
"spec": {
"*": "&1"
}
},
{
"operation": "shift",
"spec": {
"#(0,&)": "CollegeList"
}
}
]
Another approach for the same :
[
{
"operation": "shift",
"spec": {
"CIT": {
"*": "CollegeList[]"
},
"*": "CollegeList[]"
}
}
]

Flatten JSON read with JsonSlurper

Trying to read and transform a JSON file where the input file has:
{
"id": “A9”,
"roles": [
{"title": “A”, “type”: “alpha” },
{"title": “B”, “type”: “beta” },
]
},
{
"id": “A10”,
"roles": [
{"title": “D”, “type”: “delta” },
]
},
But requires transformation for a library which expects values at the same level :
{
"roles": [
{"id": “A9”, "title": “A”, “type”: “alpha” },
{"id": “A9”, "title": “B”, “type”: “beta” },
]
},
{
"roles": [
{"id": “A10”, "title": “D”, “type”: “delta” },
]
},
I'm able to read the input with JsonSlurper, but stuck on how to denormalize it.
With this data.json (notice I had to clean up trailing commas as Groovy's JSON parser will not accept them):
{
"records":[{
"id": "A9",
"roles": [
{"title": "A", "type": "alpha" },
{"title": "B", "type": "beta" }
]
},
{
"id": "A10",
"roles": [
{"title": "D", "type": "delta" }
]
}]
}
You can do it this way:
def parsed = new groovy.json.JsonSlurper().parse(new File("data.json"))
def records = parsed.records
records.each { record ->
record.roles.each { role ->
role.id = record.id
}
record.remove('id')
}

cloudant searching index by multiple values

Cloudant is returning error message:
{"error":"invalid_key","reason":"Invalid key use-index for this request."}
whenever I try to query against an index with the combination operator, "$or".
A sample of what my documents look like is:
{
"_id": "28f240f1bcc2fbd9e1e5174af6905349",
"_rev": "1-fb9a9150acbecd105f1616aff88c26a8",
"type": "Feature",
"properties": {
"PageName": "A8",
"PageNumber": 1,
"Lat": 43.051523,
"Long": -71.498852
},
"geometry": {
"type": "Polygon",
"coordinates": [
[
[
-71.49978935969642,
43.0508382914137
],
[
-71.49978564033566,
43.052210148524
],
[
-71.49791499857444,
43.05220740550381
],
[
-71.49791875962663,
43.05083554852429
],
[
-71.49978935969642,
43.0508382914137
]
]
]
}
}
The index that I created is for field "properties.PageName", which works fine when I'm just querying for one document, but as soon as I try for multiple ones, I would receive the error response as quoted in the beginning.
If it helps any, here is the call:
POST https://xyz.cloudant.com/db/_find
request body:
{
"selector": {
"$or": [
{ "properties.PageName": "A8" },
{ "properties.PageName": "M30" },
{ "properties.PageName": "AH30" }
]
},
"use-index": "pagename-index"
}
In order to perform an $or query you need to create a text (full text) index, rather than a json index. For example, I just created the following index:
{
"index": {
"fields": [
{"name": "properties.PageName", "type": "string"}
]
},
"type": "text"
}
I was then be able to perform the following query:
{
"selector": {
"$or": [
{ "properties.PageName": "A8" },
{ "properties.PageName": "M30" },
{ "properties.PageName": "AH30" }
]
}
}

Resources