I'm migrating data from Mongo to Arango and I need to reproduce a $group aggregation. I have successfully reproduced the results but I'm concerned that my approach maybe sub-optimal. Can the AQL be improved?
I have a collection of data that looks like this:
{
"_id" : ObjectId("5b17f9d85b2c1998598f054e"),
"department" : [
"Sales",
"Marketing"
],
"region" : [
"US",
"UK"
]
}
{
"_id" : ObjectId("5b1808145b2c1998598f054f"),
"department" : [
"Sales",
"Marketing"
],
"region" : [
"US",
"UK"
]
}
{
"_id" : ObjectId("5b18083c5b2c1998598f0550"),
"department" : "Development",
"region" : "Europe"
}
{
"_id" : ObjectId("5b1809a75b2c1998598f0551"),
"department" : "Sales"
}
Note the value can be a string, Array or not present
In Mongo I'm using the following code to aggregate the data:
db.test.aggregate([
{
$unwind:{
path:"$department",
preserveNullAndEmptyArrays: true
}
},
{
$unwind:{
path:"$region",
preserveNullAndEmptyArrays: true
}
},
{
$group:{
_id:{
department:{ $ifNull: [ "$department", "null" ] },
region:{ $ifNull: [ "$region", "null" ] },
},
count:{$sum:1}
}
}
])
In Arango I'm using the following AQL:
FOR i IN test
LET FIELD1=(FOR a IN APPEND([],NOT_NULL(i.department,"null")) RETURN a)
LET FIELD2=(FOR a IN APPEND([],NOT_NULL(i.region,"null")) RETURN a)
FOR f1 IN FIELD1
FOR f2 IN FIELD2
COLLECT id={department:f1,region:f2} WITH COUNT INTO counter
RETURN {_id:id,count:counter}
Edit:
The APPEND is used to convert string values into an Array
Both produce results that look like this;
{
"_id" : {
"department" : "Marketing",
"region" : "US"
},
"count" : 2.0
}
{
"_id" : {
"department" : "Development",
"region" : "Europe"
},
"count" : 1.0
}
{
"_id" : {
"department" : "Sales",
"region" : "null"
},
"count" : 1.0
}
{
"_id" : {
"department" : "Marketing",
"region" : "UK"
},
"count" : 2.0
}
{
"_id" : {
"department" : "Sales",
"region" : "UK"
},
"count" : 2.0
}
{
"_id" : {
"department" : "Sales",
"region" : "US"
},
"count" : 2.0
}
Your approach seems alright. I would suggest to use TO_ARRAY() instead of APPEND() to make it easier to understand though.
Both functions skip null values, thus it is unavoidable to provide some placeholder, or test for null explicitly and return an array with a null value (or whatever works best for you):
FOR doc IN test
FOR field1 IN doc.department == null ? [ null ] : TO_ARRAY(doc.department)
FOR field2 IN doc.region == null ? [ null ] : TO_ARRAY(doc.region)
COLLECT department = field1, region = field2
WITH COUNT INTO count
RETURN { _id: { department, region }, count }
Collection test:
[
{
"_key": "5b17f9d85b2c1998598f054e",
"department": [
"Sales",
"Marketing"
],
"region": [
"US",
"UK"
]
},
{
"_key": "5b18083c5b2c1998598f0550",
"department": "Development",
"region": "Europe"
},
{
"_key": "5b1808145b2c1998598f054f",
"department": [
"Sales",
"Marketing"
],
"region": [
"US",
"UK"
]
},
{
"_key": "5b1809a75b2c1998598f0551",
"department": "Sales"
}
]
Result:
[
{
"_id": {
"department": "Development",
"region": "Europe"
},
"count": 1
},
{
"_id": {
"department": "Marketing",
"region": "UK"
},
"count": 2
},
{
"_id": {
"department": "Marketing",
"region": "US"
},
"count": 2
},
{
"_id": {
"department": "Sales",
"region": null
},
"count": 1
},
{
"_id": {
"department": "Sales",
"region": "UK"
},
"count": 2
},
{
"_id": {
"department": "Sales",
"region": "US"
},
"count": 2
}
]
Related
I have below document:
{
"_id": "61f7d5cfd0c32b744d3f81c2",
"_form": "61e66b8fd0c32b744d3e24a0",
"_workflow": "61e54fe2d0c32b744d3e0b7c",
"_appUser": "61e6b098d0c32b744d3e3808",
"sectionResponse": [{
"_id": "61f7d5cfd0c32b744d3f81c3",
"name": "Project Details & Goals",
"order": 2,
"fieldResponse": [{
"_id": "61f7d5cfd0c32b744d3f81c4",
"fieldType": "Text",
"name": "Project Name",
"value": "TRT",
"order": 0
},
{
"_id": "61f7d5cfd0c32b744d3f81c5",
"fieldType": "Number",
"name": "Amount Requested",
"value": "20",
"order": 1
},
{
"_id": "61f7d5cfd0c32b744d3f81c6",
"fieldType": "Number",
"name": "Project Cost",
"value": "50",
"order": 1
},
{
"_id": "61f7d5cfd0c32b744d3f81c7",
"fieldType": "Comment",
"name": "Project Goals",
"value": "TRT",
"order": 3
}
]
},
{
"_id": "61f7d5cfd0c32b744d3f81c8",
"name": "Section Heading",
"order": 2,
"fieldResponse": [{
"_id": "61f7d5cfd0c32b744d3f81c9",
"fieldType": "Multiselectdropdown",
"name": "Multiselectdropdown",
"value": "Y",
"order": 0
},
{
"_id": "61f7d5cfd0c32b744d3f81ca",
"fieldType": "Image_Upload",
"name": "Image Upload",
"value": "Y",
"order": 1
}
]
}
],
"order": 2,
"status": "Reviewed",
"updatedAt": "2022-01-31T12:27:59.541Z",
"createdAt": "2022-01-31T12:27:59.541Z",
"__v": 0
}
Inside the document, there is a sectionResponse which contains response of multiple sections. Inside this, there is a fieldResponse which contains the name and value. I have to extract the value from all the documents where name is Amount Requested.
How can I write a query for such a situation?
Here is a solution that returns only matching material and requires no $unwind.
db.foo.aggregate([
// This stage alone is enough to give you the basic info.
// You will get not only doc _id but also an array of arrays
// (fieldResponse within sectionResponse) containing the whole
// fieldResponse doc. It is slight awkward but if you need structural data
// other than *just* the value, it is a good start:
{$project: {
// outer filter removes inner filter results where size is 0
// i.e. no "Amount Requested" found.
XX: {$filter: {input:
{$map: {
input: "$sectionResponse", as: "z1", in:
// inner filter gets only name = Amount Requested
{$filter: {input: "$$z1.fieldResponse",
as: "z1",
cond: {$eq:["$$z1.name","Amount Requested"]}
}}
}},
as: "z2",
cond: {$ne: ["$$z2", [] ]}
}}
}}
which yields (given a slightly expanded input set where subdocs were copied but the value and order changed for clarity):
{
"_id" : 0,
"XX" : [
[
{
"_id" : "61f7d5cfd0c32b744d3f81c5",
"fieldType" : "Number",
"name" : "Amount Requested",
"value" : "20",
"order" : 1
},
{
"_id" : "61f7d5cfd0c32b744d3f81c5",
"fieldType" : "Number",
"name" : "Amount Requested",
"value" : "77",
"order" : 18
}
],
[
{
"_id" : "61f7d5cfd0c32b744d3f81c5",
"fieldType" : "Number",
"name" : "Amount Requested",
"value" : "99",
"order" : 818
}
]
]
}
{
"_id" : 1,
"XX" : [
[
{
"_id" : "61f7d5cfd0c32b744d3f81c5",
"fieldType" : "Number",
"name" : "Amount Requested",
"value" : "333",
"order" : 1
}
]
]
}
{ "_id" : 2, "XX" : [ ] }
If you don't want to know about top level docs that contained
NO fieldResponses where name = "Amount Requested" then append this stage:
{$match: {XX: {$ne: [] }}}
Finally, if you really want just the values, append this reduce stage:
,{$addFields: {XX: {$reduce: {
input: "$XX",
initialValue: [],
in: {$concatArrays: ["$$value",
{$map: {input: "$$this",
as:"z",
in: "$$z.value"
}} ] }
}}
}}
which yields:
{ "_id" : 0, "XX" : [ "20", "77", "99" ] }
{ "_id" : 1, "XX" : [ "333" ] }
If you want a little more than just value(like order for example) then have $map return a doc instead of a scalar, e.g.:
{$map: {input: "$$this",
as:"z",
in: {v:"$$z.value",o:"$$z.order"}
}} ] }
to yield:
{
"_id" : 0,
"XX" : [
{
"v" : "20",
"o" : 1
},
{
"v" : "77",
"o" : 18
},
{
"v" : "99",
"o" : 818
}
]
}
{ "_id" : 1, "XX" : [ { "v" : "333", "o" : 1 } ] }
Again, the input set provided by the OP was expanded with additional {name:"Amount Requested"} subdocs tossed into different sectionResponse arrays to generate a more complex structure.
Maybe something like this which you may easy adapt to python supposing you need only value from sectionResponse.$[].fieldResponse.$[] elements having the name "Amount Requested":
db.collection.aggregate([
{
$match: {
"sectionResponse.fieldResponse.name": "Amount Requested"
}
},
{
"$project": {
"sectionResponse": {
"$filter": {
"input": {
"$map": {
"input": "$sectionResponse",
"as": "somesub",
"in": {
"_id": "$$somesub._id",
"fieldResponse": {
"$filter": {
"input": "$$somesub.fieldResponse",
"as": "sub",
"cond": {
"$eq": [
"$$sub.name",
"Amount Requested"
]
}
}
}
}
}
},
"as": "some",
"cond": {
"$gt": [
{
"$size": "$$some.fieldResponse"
},
0
]
}
}
}
}
},
{
$unwind: "$sectionResponse"
},
{
$unwind: "$sectionResponse.fieldResponse"
},
{
$project: {
value: "$sectionResponse.fieldResponse.value"
}
}
])
Match the documents containing at least one element with sectionResponse.fieldResponse.name:"Amount Requested"
project/map all sectionResponse.fieldResponse elements containing name="Amount Requested" ( non empty elements only )
unwind the sectionResponse array
unwind the fieldResponse array
project only the value field.
playground
For best results index on "sectionResponse.fieldResponse.name" need to be added.
I have written a small query to get data from two collections,
here is my query as you can see:
Customer.aggregate([
{
$lookup: {
from: "Product",
localField: "product",
foreignField: "_id",
as: "productdata"
}
}
]).exec(function (err, res) {
if (err) {
console.log('Error', err)
}
else {
console.log(res);
}
});
I am always getting below output:
{
_id: 605412b063db104dfcb3d78a,
updated_at: 2021-03-19T02:55:44.098Z,
name: 'bhavesh',
product: '6052c356a76d435cf857aa3c',
city: 'surat',
__v: 0,
productdata: []
},
My collections are as follow:
// Customer collection
{
"_id": ObjectId("6052c3e50016c24b24a37f4d"),
"updated_at": ISODate("2021-03-18T08:37:17.659+05:30"),
"name": "bhavesh",
"product": "6052c356a76d435cf857aa3c",
"city": "surat"
},
{
"_id": ObjectId("6052c40202dce351e8448441"),
"updated_at": ISODate("2021-03-18T08:37:46.184+05:30"),
"name": "alex",
"product": "6052c369b9395c55042373e6",
"city": "ahmdabad"
},
{
"_id": ObjectId("6052c4157d1616563c707732"),
"updated_at": ISODate("2021-03-18T08:38:05.935+05:30"),
"name": "lexa",
"product": "6052c37321b64b4c40ac65a5",
"city": "mumbai"
},
{
"_id": ObjectId("6052c42efa94db04ccf19560"),
"updated_at": ISODate("2021-03-18T08:38:30.411+05:30"),
"name": "bhumi",
"product": "6052c37ef633e64cb8b65423",
"city": "surat"
}
// Product collection
{
"_id": ObjectId("6052c356a76d435cf857aa3c"),
"updated_at": ISODate("2021-03-18T08:34:54.187+05:30"),
"name": "laptop",
"company": "6052c217a1abc325b01ec87c",
"price": 66
},
{
"_id": ObjectId("6052c369b9395c55042373e6"),
"updated_at": ISODate("2021-03-18T08:35:13.939+05:30"),
"name": "charger",
"company": "6052c2311993dc433c0657d7",
"price": 50
},
{
"_id": ObjectId("6052c37321b64b4c40ac65a5"),
"updated_at": ISODate("2021-03-18T08:35:23.503+05:30"),
"name": "tablet",
"company": "6052c2311993dc433c0657d7",
"price": 50
},
{
"_id": ObjectId("6052c37ef633e64cb8b65423"),
"updated_at": ISODate("2021-03-18T08:35:34.979+05:30"),
"name": "bettery",
"company": "6052c217a1abc325b01ec87c",
"price": 500
}
// company collection
{
"_id": ObjectId("6052c217a1abc325b01ec87c"),
"updated_at": ISODate("2021-03-18T08:29:35.387+05:30"),
"name": "artoon",
"status": "active"
},
{
"_id": ObjectId("6052c2311993dc433c0657d7"),
"updated_at": ISODate("2021-03-18T08:30:01.508+05:30"),
"name": "identix",
"status": "active"
}
So basically I want data from 3 collections which have unique relation if you need more description please comment.
The $lookup doesn't match anything is because the localField 'product' of Customer collection is type String and the foreignField _id of Product collection is type ObjectId. The 2 values are not the same even though the look the same from your post. Notice that the _id value doesn't have the single quote around it while other fields (product and company) do.
You need to make them all the same type in order for the search to match. That is making
product field in Customer collection ObjectId type
company field in Product collection ObjectId type
Try this query for performing join between 3 collections:
db.Customer.aggregate([
{
$lookup: {
from: "Product",
let: {
product_id: { $toObjectId: "$product" }
},
pipeline: [
{
$match: {
$expr: { $eq: ["$_id", "$$product_id"] }
}
},
{
$lookup: {
from: "company",
let: {
company_id: { $toObjectId: "$company" },
},
pipeline: [
{
$match: {
$expr: { $eq: ["$_id", "$$company_id"] }
}
}
],
as: "company"
}
},
{ $unwind: "$company" }
],
as: "Product"
}
},
{ $unwind: "$Product" }
]);
Output:
/* 1 createdAt:3/18/2021, 8:37:17 AM*/
{
"_id" : ObjectId("6052c3e50016c24b24a37f4d"),
"updated_at" : ISODate("2021-03-18T08:37:17.659+05:30"),
"name" : "bhavesh",
"product" : "6052c356a76d435cf857aa3c",
"city" : "surat",
"Product" : {
"_id" : ObjectId("6052c356a76d435cf857aa3c"),
"updated_at" : ISODate("2021-03-18T08:34:54.187+05:30"),
"name" : "laptop",
"company" : {
"_id" : ObjectId("6052c217a1abc325b01ec87c"),
"updated_at" : ISODate("2021-03-18T08:29:35.387+05:30"),
"name" : "artoon",
"status" : "active"
},
"price" : 66
}
},
/* 2 createdAt:3/18/2021, 8:37:46 AM*/
{
"_id" : ObjectId("6052c40202dce351e8448441"),
"updated_at" : ISODate("2021-03-18T08:37:46.184+05:30"),
"name" : "alex",
"product" : "6052c369b9395c55042373e6",
"city" : "ahmdabad",
"Product" : {
"_id" : ObjectId("6052c369b9395c55042373e6"),
"updated_at" : ISODate("2021-03-18T08:35:13.939+05:30"),
"name" : "charger",
"company" : {
"_id" : ObjectId("6052c2311993dc433c0657d7"),
"updated_at" : ISODate("2021-03-18T08:30:01.508+05:30"),
"name" : "identix",
"status" : "active"
},
"price" : 50
}
},
/* 3 createdAt:3/18/2021, 8:38:05 AM*/
{
"_id" : ObjectId("6052c4157d1616563c707732"),
"updated_at" : ISODate("2021-03-18T08:38:05.935+05:30"),
"name" : "lexa",
"product" : "6052c37321b64b4c40ac65a5",
"city" : "mumbai",
"Product" : {
"_id" : ObjectId("6052c37321b64b4c40ac65a5"),
"updated_at" : ISODate("2021-03-18T08:35:23.503+05:30"),
"name" : "tablet",
"company" : {
"_id" : ObjectId("6052c2311993dc433c0657d7"),
"updated_at" : ISODate("2021-03-18T08:30:01.508+05:30"),
"name" : "identix",
"status" : "active"
},
"price" : 50
}
},
/* 4 createdAt:3/18/2021, 8:38:30 AM*/
{
"_id" : ObjectId("6052c42efa94db04ccf19560"),
"updated_at" : ISODate("2021-03-18T08:38:30.411+05:30"),
"name" : "bhumi",
"product" : "6052c37ef633e64cb8b65423",
"city" : "surat",
"Product" : {
"_id" : ObjectId("6052c37ef633e64cb8b65423"),
"updated_at" : ISODate("2021-03-18T08:35:34.979+05:30"),
"name" : "bettery",
"company" : {
"_id" : ObjectId("6052c217a1abc325b01ec87c"),
"updated_at" : ISODate("2021-03-18T08:29:35.387+05:30"),
"name" : "artoon",
"status" : "active"
},
"price" : 500
}
}
I have following collection
{
"_id" : ObjectId("5b18d14cbc83fd271b6a157c"),
"status" : "pending",
"description" : "You have to complete the challenge...",
}
{
"_id" : ObjectId("5b18d31a27a37696ec8b5773"),
"status" : "completed",
"description" : "completed...",
}
{
"_id" : ObjectId("5b18d31a27a37696ec8b5775"),
"status" : "pending",
"description" : "pending...",
}
{
"_id" : ObjectId("5b18d31a27a37696ec8b5776"),
"status" : "inProgress",
"description" : "inProgress...",
}
I need to group by status and get all the keys dynamically which are in status
[
{
"completed": [
{
"_id": "5b18d31a27a37696ec8b5773",
"status": "completed",
"description": "completed..."
}
]
},
{
"pending": [
{
"_id": "5b18d14cbc83fd271b6a157c",
"status": "pending",
"description": "You have to complete the challenge..."
},
{
"_id": "5b18d31a27a37696ec8b5775",
"status": "pending",
"description": "pending..."
}
]
},
{
"inProgress": [
{
"_id": "5b18d31a27a37696ec8b5776",
"status": "inProgress",
"description": "inProgress..."
}
]
}
]
Not that I think it's a good idea and mostly because I don't see any "aggregation" here at all is that after "grouping" to add to an array you similarly $push all that content into array by the "status" grouping key and then convert into keys of a document in a $replaceRoot with $arrayToObject:
db.collection.aggregate([
{ "$group": {
"_id": "$status",
"data": { "$push": "$$ROOT" }
}},
{ "$group": {
"_id": null,
"data": {
"$push": {
"k": "$_id",
"v": "$data"
}
}
}},
{ "$replaceRoot": {
"newRoot": { "$arrayToObject": "$data" }
}}
])
Returns:
{
"inProgress" : [
{
"_id" : ObjectId("5b18d31a27a37696ec8b5776"),
"status" : "inProgress",
"description" : "inProgress..."
}
],
"completed" : [
{
"_id" : ObjectId("5b18d31a27a37696ec8b5773"),
"status" : "completed",
"description" : "completed..."
}
],
"pending" : [
{
"_id" : ObjectId("5b18d14cbc83fd271b6a157c"),
"status" : "pending",
"description" : "You have to complete the challenge..."
},
{
"_id" : ObjectId("5b18d31a27a37696ec8b5775"),
"status" : "pending",
"description" : "pending..."
}
]
}
That might be okay IF you actually "aggregated" beforehand, but on any practically sized collection all that is doing is trying force the whole collection into a single document, and that's likely to break the BSON Limit of 16MB, so I just would not recommend even attempting this without "grouping" something else before this step.
Frankly, the same following code does the same thing, and without aggregation tricks and no BSON limit problem:
var obj = {};
// Using forEach as a premise for representing "any" cursor iteration form
db.collection.find().forEach(d => {
if (!obj.hasOwnProperty(d.status))
obj[d.status] = [];
obj[d.status].push(d);
})
printjson(obj);
Or a bit shorter:
var obj = {};
// Using forEach as a premise for representing "any" cursor iteration form
db.collection.find().forEach(d =>
obj[d.status] = [
...(obj.hasOwnProperty(d.status)) ? obj[d.status] : [],
d
]
)
printjson(obj);
Aggregations are used for "data reduction" and anything that is simply "reshaping results" without actually reducing the data returned from the server is usually better handled in client code anyway. You're still returning all data no matter what you do, and the client processing of the cursor has considerably less overhead. And NO restrictions.
I have in mongodb differents records. I write down a little example:
{_id:"sad547er4w2v5x85b8", name:"Jhon", jobTime:600, floor:2, dept:5, age:25},
{_id:"xcz547wer4xcvcx1g2", name:"Alex", jobTime:841, floor:4, dept:1, age:55},
{_id:"xcnwep2321954ldfsl", name:"Alice", jobTime:100, floor:3, dept:3, age:55},
{_id:"23s3ih94h548jhfk2u", name:"Anne", jobTime:280, floor:2, dept:8, age:22},
{_id:"03dfsk9342hjwq1503", name:"Alexa", jobTime:355, floor:2, dept:6, age:25}
I tried to obtain this output, but I don't know how to group by twice to get that structure.
{[
{age:22, floors:[{floor:2,persons:[{name:"Anne",jobTime:280,dept:8}]}]},
{age:25, floors:[{floor:2,persons:[{name:"Jhon",jobTime:600,dept:5},{name:"Alexa",jobTime:355,dept:6}]}]},
{age:55, floors:[{floor:3,persons:[{name:"Alex",jobTime:841,dept:1}]},{floor:4,persons:[{name:"Alice",jobTime:100,dept:3}]}]}
]}
Exactly. Use "two" $group stages
collection.aggregate([
{ "$group": {
"_id": {
"age": "$age",
"floor": "$floor",
},
"persons": { "$push": {
"name": "$name",
"jobTime": "$jobTime",
"dept": "$dept"
}}
}},
{ "$group": {
"_id": "$_id.age",
"floors": { "$push": {
"floor": "$_id.floor",
"persons": "$persons"
}}
}}
],function(err,results) {
// deal with results here
})
Which produces:
{
"_id" : 25,
"floors" : [
{ "floor" : 2,
"persons" : [
{ "name" : "Jhon", "jobTime" : 600, "dept" : 5 },
{ "name" : "Alexa", "jobTime" : 355, "dept" : 6 }
]
}
]
},
{
"_id" : 55,
"floors" : [
{ "floor" : 3,
"persons" : [
{ "name" : "Alice", "jobTime" : 100, "dept" : 3 }
]
},
{ "floor" : 4,
"persons" : [
{ "name" : "Alex", "jobTime" : 841, "dept" : 1 }
]
}
]
},
{
"_id" : 22,
"floors" : [
{ "floor" : 2,
"persons" : [
{ "name" : "Anne", "jobTime" : 280, "dept" : 8 }
]
}
]
}
So the initial $group is on a compound key including the detail down to the items you want to add to the initial "array", for "persons". Then the second $group takes only part of the initial _id for it's key and again "pushes" the content into a new array.
I have a collection with documents like below.I want to get the all distinct value of name of attributes sub-document with their distinct value and count in collection.
Example :
var records = [
{
"attributes": [
{
"name": "color",
"value": "black",
"_id": "5441103a0348ebc91ee75b33"
}
],
"name": "ddd"
},
{
"attributes": [
{
"name": "color",
"value": "red",
"_id": "5441091393450f1619be99af"
},
{
"name": "size",
"value": "L",
"_id": "5441091393450f1619be99b0"
}
],
"name": "one"
},
{
"attributes": [
{
"name": "color",
"value": "black",
"_id": "5441092593450f1619be99b1"
},
{
"name": "size",
"value": "L",
"_id": "5441092593450f1619be99b2"
}
],
"name": "sdfsda"
},
{
"attributes": [
{
"name": "color",
"value": "green",
"_id": "5441093d93450f1619be99b3"
},
{
"name": "size",
"value": "S",
"_id": "5441093d93450f1619be99b4"
}
],
"name": "threee"
},
{
"attributes": [
{
"name": "color",
"value": "green",
"_id": "5441095793450f1619be99b5"
},
{
"name": "size",
"value": "M",
"_id": "5441095793450f1619be99b6"
}
],
"name": "one"
}
]
I want to get output like :
var output =
{
"color" : [
{value : 'red', count : 1}
{value : 'black', count : 2}
{value : 'green', count : 2}
],
"size" : [
{value : 'S', count : 2}
{value : 'L', count : 1}
{value : 'M', count : 1}
]
}
how can i get this output in mongodb?
Can i get this output by aggregate framework of mongodb, if yes, then how? -- high priority
Yes, aggregate can make it.
var output = {};
db.c.aggregate([{
$unwind : "$attributes"
}, {
$group : {
_id : {
name : "$name",
value : "$value"
},
count : {
$sum : 1
}
} // the output after this stage such as
// {_id:{name:"color", value:"green"}, count:2}
// {_id:{name:"size", value:"S"}, count:2}
}, {
$group : {
_id : "$_id.name",
contents : {
$push : {
value : "$_id.value",
count : "$count"
}
}
} // the output after this stage such as
// {_id:"color", contents:[{value:"green", count:2}]}
// {_id:"size", contents:[{value : 'S', count : 2}]}
}]).forEach(function(doc) {
output[doc._id] = doc.contens; // just convert to the format as expected
});