Pymongo query to extract value of a matching key - python-3.x

I have below document:
{
"_id": "61f7d5cfd0c32b744d3f81c2",
"_form": "61e66b8fd0c32b744d3e24a0",
"_workflow": "61e54fe2d0c32b744d3e0b7c",
"_appUser": "61e6b098d0c32b744d3e3808",
"sectionResponse": [{
"_id": "61f7d5cfd0c32b744d3f81c3",
"name": "Project Details & Goals",
"order": 2,
"fieldResponse": [{
"_id": "61f7d5cfd0c32b744d3f81c4",
"fieldType": "Text",
"name": "Project Name",
"value": "TRT",
"order": 0
},
{
"_id": "61f7d5cfd0c32b744d3f81c5",
"fieldType": "Number",
"name": "Amount Requested",
"value": "20",
"order": 1
},
{
"_id": "61f7d5cfd0c32b744d3f81c6",
"fieldType": "Number",
"name": "Project Cost",
"value": "50",
"order": 1
},
{
"_id": "61f7d5cfd0c32b744d3f81c7",
"fieldType": "Comment",
"name": "Project Goals",
"value": "TRT",
"order": 3
}
]
},
{
"_id": "61f7d5cfd0c32b744d3f81c8",
"name": "Section Heading",
"order": 2,
"fieldResponse": [{
"_id": "61f7d5cfd0c32b744d3f81c9",
"fieldType": "Multiselectdropdown",
"name": "Multiselectdropdown",
"value": "Y",
"order": 0
},
{
"_id": "61f7d5cfd0c32b744d3f81ca",
"fieldType": "Image_Upload",
"name": "Image Upload",
"value": "Y",
"order": 1
}
]
}
],
"order": 2,
"status": "Reviewed",
"updatedAt": "2022-01-31T12:27:59.541Z",
"createdAt": "2022-01-31T12:27:59.541Z",
"__v": 0
}
Inside the document, there is a sectionResponse which contains response of multiple sections. Inside this, there is a fieldResponse which contains the name and value. I have to extract the value from all the documents where name is Amount Requested.
How can I write a query for such a situation?

Here is a solution that returns only matching material and requires no $unwind.
db.foo.aggregate([
// This stage alone is enough to give you the basic info.
// You will get not only doc _id but also an array of arrays
// (fieldResponse within sectionResponse) containing the whole
// fieldResponse doc. It is slight awkward but if you need structural data
// other than *just* the value, it is a good start:
{$project: {
// outer filter removes inner filter results where size is 0
// i.e. no "Amount Requested" found.
XX: {$filter: {input:
{$map: {
input: "$sectionResponse", as: "z1", in:
// inner filter gets only name = Amount Requested
{$filter: {input: "$$z1.fieldResponse",
as: "z1",
cond: {$eq:["$$z1.name","Amount Requested"]}
}}
}},
as: "z2",
cond: {$ne: ["$$z2", [] ]}
}}
}}
which yields (given a slightly expanded input set where subdocs were copied but the value and order changed for clarity):
{
"_id" : 0,
"XX" : [
[
{
"_id" : "61f7d5cfd0c32b744d3f81c5",
"fieldType" : "Number",
"name" : "Amount Requested",
"value" : "20",
"order" : 1
},
{
"_id" : "61f7d5cfd0c32b744d3f81c5",
"fieldType" : "Number",
"name" : "Amount Requested",
"value" : "77",
"order" : 18
}
],
[
{
"_id" : "61f7d5cfd0c32b744d3f81c5",
"fieldType" : "Number",
"name" : "Amount Requested",
"value" : "99",
"order" : 818
}
]
]
}
{
"_id" : 1,
"XX" : [
[
{
"_id" : "61f7d5cfd0c32b744d3f81c5",
"fieldType" : "Number",
"name" : "Amount Requested",
"value" : "333",
"order" : 1
}
]
]
}
{ "_id" : 2, "XX" : [ ] }
If you don't want to know about top level docs that contained
NO fieldResponses where name = "Amount Requested" then append this stage:
{$match: {XX: {$ne: [] }}}
Finally, if you really want just the values, append this reduce stage:
,{$addFields: {XX: {$reduce: {
input: "$XX",
initialValue: [],
in: {$concatArrays: ["$$value",
{$map: {input: "$$this",
as:"z",
in: "$$z.value"
}} ] }
}}
}}
which yields:
{ "_id" : 0, "XX" : [ "20", "77", "99" ] }
{ "_id" : 1, "XX" : [ "333" ] }
If you want a little more than just value(like order for example) then have $map return a doc instead of a scalar, e.g.:
{$map: {input: "$$this",
as:"z",
in: {v:"$$z.value",o:"$$z.order"}
}} ] }
to yield:
{
"_id" : 0,
"XX" : [
{
"v" : "20",
"o" : 1
},
{
"v" : "77",
"o" : 18
},
{
"v" : "99",
"o" : 818
}
]
}
{ "_id" : 1, "XX" : [ { "v" : "333", "o" : 1 } ] }
Again, the input set provided by the OP was expanded with additional {name:"Amount Requested"} subdocs tossed into different sectionResponse arrays to generate a more complex structure.

Maybe something like this which you may easy adapt to python supposing you need only value from sectionResponse.$[].fieldResponse.$[] elements having the name "Amount Requested":
db.collection.aggregate([
{
$match: {
"sectionResponse.fieldResponse.name": "Amount Requested"
}
},
{
"$project": {
"sectionResponse": {
"$filter": {
"input": {
"$map": {
"input": "$sectionResponse",
"as": "somesub",
"in": {
"_id": "$$somesub._id",
"fieldResponse": {
"$filter": {
"input": "$$somesub.fieldResponse",
"as": "sub",
"cond": {
"$eq": [
"$$sub.name",
"Amount Requested"
]
}
}
}
}
}
},
"as": "some",
"cond": {
"$gt": [
{
"$size": "$$some.fieldResponse"
},
0
]
}
}
}
}
},
{
$unwind: "$sectionResponse"
},
{
$unwind: "$sectionResponse.fieldResponse"
},
{
$project: {
value: "$sectionResponse.fieldResponse.value"
}
}
])
Match the documents containing at least one element with sectionResponse.fieldResponse.name:"Amount Requested"
project/map all sectionResponse.fieldResponse elements containing name="Amount Requested" ( non empty elements only )
unwind the sectionResponse array
unwind the fieldResponse array
project only the value field.
playground
For best results index on "sectionResponse.fieldResponse.name" need to be added.

Related

Dynamic keys after $group by

I have following collection
{
"_id" : ObjectId("5b18d14cbc83fd271b6a157c"),
"status" : "pending",
"description" : "You have to complete the challenge...",
}
{
"_id" : ObjectId("5b18d31a27a37696ec8b5773"),
"status" : "completed",
"description" : "completed...",
}
{
"_id" : ObjectId("5b18d31a27a37696ec8b5775"),
"status" : "pending",
"description" : "pending...",
}
{
"_id" : ObjectId("5b18d31a27a37696ec8b5776"),
"status" : "inProgress",
"description" : "inProgress...",
}
I need to group by status and get all the keys dynamically which are in status
[
{
"completed": [
{
"_id": "5b18d31a27a37696ec8b5773",
"status": "completed",
"description": "completed..."
}
]
},
{
"pending": [
{
"_id": "5b18d14cbc83fd271b6a157c",
"status": "pending",
"description": "You have to complete the challenge..."
},
{
"_id": "5b18d31a27a37696ec8b5775",
"status": "pending",
"description": "pending..."
}
]
},
{
"inProgress": [
{
"_id": "5b18d31a27a37696ec8b5776",
"status": "inProgress",
"description": "inProgress..."
}
]
}
]
Not that I think it's a good idea and mostly because I don't see any "aggregation" here at all is that after "grouping" to add to an array you similarly $push all that content into array by the "status" grouping key and then convert into keys of a document in a $replaceRoot with $arrayToObject:
db.collection.aggregate([
{ "$group": {
"_id": "$status",
"data": { "$push": "$$ROOT" }
}},
{ "$group": {
"_id": null,
"data": {
"$push": {
"k": "$_id",
"v": "$data"
}
}
}},
{ "$replaceRoot": {
"newRoot": { "$arrayToObject": "$data" }
}}
])
Returns:
{
"inProgress" : [
{
"_id" : ObjectId("5b18d31a27a37696ec8b5776"),
"status" : "inProgress",
"description" : "inProgress..."
}
],
"completed" : [
{
"_id" : ObjectId("5b18d31a27a37696ec8b5773"),
"status" : "completed",
"description" : "completed..."
}
],
"pending" : [
{
"_id" : ObjectId("5b18d14cbc83fd271b6a157c"),
"status" : "pending",
"description" : "You have to complete the challenge..."
},
{
"_id" : ObjectId("5b18d31a27a37696ec8b5775"),
"status" : "pending",
"description" : "pending..."
}
]
}
That might be okay IF you actually "aggregated" beforehand, but on any practically sized collection all that is doing is trying force the whole collection into a single document, and that's likely to break the BSON Limit of 16MB, so I just would not recommend even attempting this without "grouping" something else before this step.
Frankly, the same following code does the same thing, and without aggregation tricks and no BSON limit problem:
var obj = {};
// Using forEach as a premise for representing "any" cursor iteration form
db.collection.find().forEach(d => {
if (!obj.hasOwnProperty(d.status))
obj[d.status] = [];
obj[d.status].push(d);
})
printjson(obj);
Or a bit shorter:
var obj = {};
// Using forEach as a premise for representing "any" cursor iteration form
db.collection.find().forEach(d =>
obj[d.status] = [
...(obj.hasOwnProperty(d.status)) ? obj[d.status] : [],
d
]
)
printjson(obj);
Aggregations are used for "data reduction" and anything that is simply "reshaping results" without actually reducing the data returned from the server is usually better handled in client code anyway. You're still returning all data no matter what you do, and the client processing of the cursor has considerably less overhead. And NO restrictions.

Converting a MongoDB aggregate into an ArangoDB COLLECT

I'm migrating data from Mongo to Arango and I need to reproduce a $group aggregation. I have successfully reproduced the results but I'm concerned that my approach maybe sub-optimal. Can the AQL be improved?
I have a collection of data that looks like this:
{
"_id" : ObjectId("5b17f9d85b2c1998598f054e"),
"department" : [
"Sales",
"Marketing"
],
"region" : [
"US",
"UK"
]
}
{
"_id" : ObjectId("5b1808145b2c1998598f054f"),
"department" : [
"Sales",
"Marketing"
],
"region" : [
"US",
"UK"
]
}
{
"_id" : ObjectId("5b18083c5b2c1998598f0550"),
"department" : "Development",
"region" : "Europe"
}
{
"_id" : ObjectId("5b1809a75b2c1998598f0551"),
"department" : "Sales"
}
Note the value can be a string, Array or not present
In Mongo I'm using the following code to aggregate the data:
db.test.aggregate([
{
$unwind:{
path:"$department",
preserveNullAndEmptyArrays: true
}
},
{
$unwind:{
path:"$region",
preserveNullAndEmptyArrays: true
}
},
{
$group:{
_id:{
department:{ $ifNull: [ "$department", "null" ] },
region:{ $ifNull: [ "$region", "null" ] },
},
count:{$sum:1}
}
}
])
In Arango I'm using the following AQL:
FOR i IN test
LET FIELD1=(FOR a IN APPEND([],NOT_NULL(i.department,"null")) RETURN a)
LET FIELD2=(FOR a IN APPEND([],NOT_NULL(i.region,"null")) RETURN a)
FOR f1 IN FIELD1
FOR f2 IN FIELD2
COLLECT id={department:f1,region:f2} WITH COUNT INTO counter
RETURN {_id:id,count:counter}
Edit:
The APPEND is used to convert string values into an Array
Both produce results that look like this;
{
"_id" : {
"department" : "Marketing",
"region" : "US"
},
"count" : 2.0
}
{
"_id" : {
"department" : "Development",
"region" : "Europe"
},
"count" : 1.0
}
{
"_id" : {
"department" : "Sales",
"region" : "null"
},
"count" : 1.0
}
{
"_id" : {
"department" : "Marketing",
"region" : "UK"
},
"count" : 2.0
}
{
"_id" : {
"department" : "Sales",
"region" : "UK"
},
"count" : 2.0
}
{
"_id" : {
"department" : "Sales",
"region" : "US"
},
"count" : 2.0
}
Your approach seems alright. I would suggest to use TO_ARRAY() instead of APPEND() to make it easier to understand though.
Both functions skip null values, thus it is unavoidable to provide some placeholder, or test for null explicitly and return an array with a null value (or whatever works best for you):
FOR doc IN test
FOR field1 IN doc.department == null ? [ null ] : TO_ARRAY(doc.department)
FOR field2 IN doc.region == null ? [ null ] : TO_ARRAY(doc.region)
COLLECT department = field1, region = field2
WITH COUNT INTO count
RETURN { _id: { department, region }, count }
Collection test:
[
{
"_key": "5b17f9d85b2c1998598f054e",
"department": [
"Sales",
"Marketing"
],
"region": [
"US",
"UK"
]
},
{
"_key": "5b18083c5b2c1998598f0550",
"department": "Development",
"region": "Europe"
},
{
"_key": "5b1808145b2c1998598f054f",
"department": [
"Sales",
"Marketing"
],
"region": [
"US",
"UK"
]
},
{
"_key": "5b1809a75b2c1998598f0551",
"department": "Sales"
}
]
Result:
[
{
"_id": {
"department": "Development",
"region": "Europe"
},
"count": 1
},
{
"_id": {
"department": "Marketing",
"region": "UK"
},
"count": 2
},
{
"_id": {
"department": "Marketing",
"region": "US"
},
"count": 2
},
{
"_id": {
"department": "Sales",
"region": null
},
"count": 1
},
{
"_id": {
"department": "Sales",
"region": "UK"
},
"count": 2
},
{
"_id": {
"department": "Sales",
"region": "US"
},
"count": 2
}
]

add fields where condition match to nested array

I have following users collection
[{
"_id" : ObjectId("5afadfdf08a7aa6f1a27d986"),
"firstName" : "bruce",
"friends" : [ ObjectId("5afd1c42af18d985a06ac306"),ObjectId("5afd257daf18d985a06ac6ac") ]
},
{
"_id" : ObjectId("5afbfe21daf4b13ddde07dbe"),
"firstName" : "clerk",
"friends" : [],
}]
and have friends collection
[{
"_id" : ObjectId("5afd1c42af18d985a06ac306"),
"recipient" : ObjectId("5afaab572c4ec049aeb0bcba"),
"requester" : ObjectId("5afadfdf08a7aa6f1a27d986"),
"status" : 2,
},
{
"_id" : ObjectId("5afd257daf18d985a06ac6ac"),
"recipient" : ObjectId("5afadfdf08a7aa6f1a27d986"),
"requester" : ObjectId("5afbfe21daf4b13ddde07dbe"),
"status" : 1,
}]
suppose I have an user logged in with _id: "5afaab572c4ec049aeb0bcba" and this _id matches the recipient of the friends
Now I have to add a field friendsStatus which contains the status from friends collection... And if does not matches the any recipient from the array then its status should be 0
So when I get all users then my output should be
[{
"_id" : ObjectId("5afadfdf08a7aa6f1a27d986"),
"firstName" : "bruce",
"friends" : [ ObjectId("5afd1c42af18d985a06ac306") ],
"friendStatus": 2
},
{
"_id" : ObjectId("5afbfe21daf4b13ddde07dbe"),
"firstName" : "clerk",
"friends" : [],
"friendStatus": 0
}]
Thanks in advance!!!
If you have MongoDB 3.6 then you can use $lookup with a "sub-pipeline"
User.aggregate([
{ "$lookup": {
"from": Friend.collection.name,
"let": { "friends": "$friends" },
"pipeline": [
{ "$match": {
"recipient": ObjectId("5afaab572c4ec049aeb0bcba"),
"$expr": { "$in": [ "$_id", "$$friends" ] }
}},
{ "$project": { "status": 1 } }
],
"as": "friends"
}},
{ "$addFields": {
"friends": {
"$map": {
"input": "$friends",
"in": "$$this._id"
}
},
"friendsStatus": {
"$ifNull": [ { "$min": "$friends.status" }, 0 ]
}
}}
])
For earlier versions, it's ideal to actually use $unwind in order to ensure you don't breach the BSON Limit:
User.aggregate([
{ "$lookup": {
"from": Friend.collection.name,
"localField": "friends",
"foreignField": "_id",
"as": "friends"
}},
{ "$unwind": { "path": "$friends", "preserveNullAndEmptyArrays": true } },
{ "$match": {
"$or": [
{ "friends.recipient": ObjectId("5afaab572c4ec049aeb0bcba") },
{ "friends": null }
]
}},
{ "$group": {
"_id": "$_id",
"firstName": { "$first": "$firstName" },
"friends": { "$push": "$friends._id" },
"friendsStatus": {
"$min": {
"$ifNull": ["$friends.status",0]
}
}
}}
])
There is "one difference" from the most optimal form here in that the pipeline optimization does not actually "roll-up" the $match condition into the $lookup itself:
{
"$lookup" : {
"from" : "friends",
"as" : "friends",
"localField" : "friends",
"foreignField" : "_id",
"unwinding" : {
"preserveNullAndEmptyArrays" : true
}
}
},
{
"$match" : { // <-- outside will preserved array
Because of the preserveNullAndEmptyArrays option being true then the "fully optimized" action where the condition would actually be applied to the foreign collection "before" results are returned does not happen.
So the only purpose of unwinding here is purely to avoid what would normally be a target "array" from the $lookup result causing the parent document to grow beyond the BSON Limit. Additional conditions of the $match are then applied "after" this stage. The default $unwind without the option presumes false for the preservation and a matching condition is added instead to do this. This of course would result in the documents with no foreign matches being excluded.
And not really advisable because of that BSON Limit, but there is also applying $filter to the resulting array of $lookup:
User.aggregate([
{ "$lookup": {
"from": Friend.collection.name,
"localField": "friends",
"foreignField": "_id",
"as": "friends"
}},
{ "$addFields": {
"friends": {
"$map": {
"input": {
"$filter": {
"input": "$friends",
"cond": {
"$eq": [
"$$this.recipient",
ObjectId("5afaab572c4ec049aeb0bcba")
]
}
}
},
"in": "$$this._id"
}
},
"friendsStatus": {
"$ifNull": [
{ "$min": {
"$map": {
"input": {
"$filter": {
"input": "$friends",
"cond": {
"$eq": [
"$$this.recipient",
ObjectId("5afaab572c4ec049aeb0bcba")
]
}
}
},
"in": "$$this.status"
}
}},
0
]
}
}}
])
In either case we're basically adding the "additional condition" to the join being not just on the directly related field but also with the additional constraint of the queried ObjectId value for "recipient".
Not really sure what you are expecting for "friendsStatus" since the result is an array and there can possibly be more than one ( as far as I know ) and therefore just applying $min here to extract one value from the array in either case.
The governing condition in each case is $ifNull which is applied where there isn't anything in the "friends" output array to extract from and then you simply return the result of 0 where that is the case.
All output the same thing:
{
"_id" : ObjectId("5afadfdf08a7aa6f1a27d986"),
"firstName" : "bruce",
"friends" : [
ObjectId("5afd1c42af18d985a06ac306")
],
"friendsStatus" : 2
}
{
"_id" : ObjectId("5afbfe21daf4b13ddde07dbe"),
"firstName" : "clerk",
"friends" : [ ],
"friendsStatus" : 0
}

Mongoose format datetime field in find query retrieving result [duplicate]

Given collection(#name: users) Structure:
{
"_id" : ObjectId("57653dcc533304a40ac504fc"),
"username" : "XYZ",
"followers" : [
{
"count" : 31,
"ts" : ISODate("2016-06-17T18:30:00.996Z")
},
{
"count" : 31,
"ts" : ISODate("2016-06-18T18:30:00.288Z")
}
]
}
I want to query this collection based on username field, and ts to be returned in 'yyyy-mm-dd' format.
Expected Output:
{
"_id" : ObjectId("57653dcc533304a40ac504fc"),
"username" : "XYZ",
"followers" : [
{
"count" : 31,
"date" : "2016-06-17"
},
{
"count" : 31,
"date" : "2016-06-18"
}
]
}
I have tried something like this:
db.users.aggregate([
{$match:{"username":"xyz"}},
{$project:{ "followers":{"count":1,
"date":"$followers.ts.toISOString().slice(0,10).replace(/-/g,'-')"
}}
}
])
But it doesn't seems to be working. Can anyone please help?
Thanks much.
Consider running an aggregation pipeline that will allow you to flatten the data list first, project the new field using the $dateToString operator, then regroup the flattened docs to get your desired result.
The above can be shown in three distinct pipelines:
db.users.aggregate([
{ "$match": { "username": "xyz" } },
{ "$unwind": "$followers" },
{
"$project": {
"username": 1,
"count": "$followers.count",
"date": { "$dateToString": { "format": "%Y-%m-%d", "date": "$followers.ts" } }
}
},
{
"$group": {
"_id": "$_id",
"username": { "$first": "$username" },
"followers": { "$push": {
"count": "$count",
"date": "$date"
}}
}
}
])
With MongoDB 3.4 and newer, you can use the new $addFields pipeline step together with $map to create the array field without the need to unwind and group:
db.users.aggregate([
{ "$match": { "username": "xyz" } },
{
"$addFields": {
"followers": {
"$map": {
"input": "$followers",
"as": "follower",
"in": {
"count": "$$follower.count",
"date": {
"$dateToString": {
"format": "%Y-%m-%d",
"date": "$$follower.ts"
}
}
}
}
}
}
}
])
The best and easiest way to do this is to transform each element in the array with the $map operator. Of course in the "in" expression, you need to use the $dateToString to convert you "date" to string using a format specifiers.
db.coll.aggregate(
[
{ "$match": { "username": "XYZ" } },
{ "$project": {
"username": 1,
"followers": {
"$map": {
"input": "$followers",
"as": "f",
"in": {
"count": "$$f.count",
"date": {
"$dateToString": {
"format": "%Y-%m-%d",
"date": "$$f.ts"
}
}
}
}
}
}}
]
)
which produces:
{
"_id" : ObjectId("57653dcc533304a40ac504fc"),
"username" : "XYZ",
"followers" : [
{
"count" : 31,
"date" : "2016-06-17"
},
{
"count" : 31,
"date" : "2016-06-18"
}
]
}

How to get distinct value of sub document field with their count in collection in mongodb?

I have a collection with documents like below.I want to get the all distinct value of name of attributes sub-document with their distinct value and count in collection.
Example :
var records = [
{
"attributes": [
{
"name": "color",
"value": "black",
"_id": "5441103a0348ebc91ee75b33"
}
],
"name": "ddd"
},
{
"attributes": [
{
"name": "color",
"value": "red",
"_id": "5441091393450f1619be99af"
},
{
"name": "size",
"value": "L",
"_id": "5441091393450f1619be99b0"
}
],
"name": "one"
},
{
"attributes": [
{
"name": "color",
"value": "black",
"_id": "5441092593450f1619be99b1"
},
{
"name": "size",
"value": "L",
"_id": "5441092593450f1619be99b2"
}
],
"name": "sdfsda"
},
{
"attributes": [
{
"name": "color",
"value": "green",
"_id": "5441093d93450f1619be99b3"
},
{
"name": "size",
"value": "S",
"_id": "5441093d93450f1619be99b4"
}
],
"name": "threee"
},
{
"attributes": [
{
"name": "color",
"value": "green",
"_id": "5441095793450f1619be99b5"
},
{
"name": "size",
"value": "M",
"_id": "5441095793450f1619be99b6"
}
],
"name": "one"
}
]
I want to get output like :
var output =
{
"color" : [
{value : 'red', count : 1}
{value : 'black', count : 2}
{value : 'green', count : 2}
],
"size" : [
{value : 'S', count : 2}
{value : 'L', count : 1}
{value : 'M', count : 1}
]
}
how can i get this output in mongodb?
Can i get this output by aggregate framework of mongodb, if yes, then how? -- high priority
Yes, aggregate can make it.
var output = {};
db.c.aggregate([{
$unwind : "$attributes"
}, {
$group : {
_id : {
name : "$name",
value : "$value"
},
count : {
$sum : 1
}
} // the output after this stage such as
// {_id:{name:"color", value:"green"}, count:2}
// {_id:{name:"size", value:"S"}, count:2}
}, {
$group : {
_id : "$_id.name",
contents : {
$push : {
value : "$_id.value",
count : "$count"
}
}
} // the output after this stage such as
// {_id:"color", contents:[{value:"green", count:2}]}
// {_id:"size", contents:[{value : 'S', count : 2}]}
}]).forEach(function(doc) {
output[doc._id] = doc.contens; // just convert to the format as expected
});

Resources