$aggregation, $sum on two collections in MongoDB - node.js

Hi suppose I have these two collections
sample1 :
{
"_id" : {
"date" : ISODate("2020-02-11T18:30:00Z"),
"price" : 4,
"offer" : 0,
"itemCode" : "A001"
"customerId" : ObjectId("5e43de778b57693cd46859eb"),
"sellerId" : ObjectId("5e43e5cdc11f750864f46820"),
},
"charges" : 168
}
{
"_id" : {
"date" : ISODate("2020-02-11T18:30:00Z"),
"coverPrice" :5.5 ,
"offer" : 38,
"itemCode" : "B001"
"customerId" : ObjectId("5e43de778b57693cd46859eb"),
"sellerId" : ObjectId("5e43e5cdc11f750864f46820"),
},
"charges" : 209.5
}
NOTE : sample1's _id doesnot have any ObjectId().
sample2 :
{
"paymentReceivedOnDate" : ISODate("2020-02-12T18:30:00Z"),
"customerId" : ObjectId("5e43de778b57693cd46859eb"),
"sellerId" : ObjectId("5e43e5cdc11f750864f46820"),
"amount" : 30,
}
{
"paymentReceivedOnDate" : ISODate("2020-02-12T18:30:00Z"),
"customerId" : ObjectId("5e43de778b57693cd46859eb"),
"sellerId" : ObjectId("5e43e5cdc11f750864f46820"),
"amount" : 160,
}
{
"paymentReceivedOnDate" : ISODate("2020-02-11T18:30:00Z"),
"customerId" : ObjectId("5e43de778b57693cd46859eb"),
"sellerId" : ObjectId("5e43e5cdc11f750864f46820"),
"amount" : 50,
}
My problem statements :
1: Firstly i need to calculate the totalCharges from sample 1 collection. against the [date,customerId,sellerId ]
2: Secondly i need to calculate totalAmount from sample 2 collection.
3: Than i need calculte the outstanding i.e [totalCharges - totalAmount].
4: lastly and most importantly i need to save the projected result into a new collection suppose "result" with the following fields-['customerId','sellerId','date','totalCharges','outstanding'(i.e: [totalCharges - totalAmount]),'totalAmount'.

You can try below query :
db.sample1.aggregate([
/** groups data & sum up charges */
{ $group: { _id: { date: '$_id.date', customerId: '$_id.customerId', sellerId: '$_id.sellerId' }, totalCharges: { $sum: '$charges' } } },
/** finds matching docs from sample2 */
{
$lookup:
{
from: "sample2",
let: { customerId: '$_id.customerId', sellerId: '$_id.sellerId' },
pipeline: [
{
$match:
{
$expr:
{
$and:
[
{ $eq: ["$customerId", "$$customerId"] },
{ $eq: ["$sellerId", "$$sellerId"] }
]
}
}
},
{ $project: { amount: 1, _id: 0 } }
],
as: "TotalAmount" // TotalAmount is an array of objects, each object will have just amount field in it.
}
},
/** retains only needed fields */
{
$project: {
totalCharges: 1, outstanding: {
$subtract: ['$totalCharges', {
$reduce: {
input: '$TotalAmount',
initialValue: 0,
in: { $add: ["$$value", "$$this.amount"] }
}
}]
}, TotalAmount: {
$reduce: {
input: '$TotalAmount',
initialValue: 0,
in: { $add: ["$$value", "$$this.amount"] }
}
}
}
}
])
Test : MongoDB-Playground
Ref : aggregation-pipeline
Note : At the end of the aggregation you can have either $merge or $out stage to write aggregation results into new collection, If your MongoDB v >=4.2 then prefer $merge cause it will merge fields to existing documents/adds new documents to existing collection or if no collection is found with given name it would create new collection, But where as $out will completely replaces existing collection if provided collection name already exists or creates new collection with provided name.

Related

MongoDB : add New Field to existing sub document after $look stage or merge lookup response to main document

I want new field "isActive" inside modifierStatus sub document which is coming from modifieritems collection.
modifieritems collection :
{
"_id" : ObjectId("5e6a5a0e6d40624b12453a67"),
"modifierName" : "xxx",
"isActive" : 1
}
{
"_id" : ObjectId("5e6a5a0e6d40624b12453a6a"),
"modifierName" : "yyy",
"isActive" : 0
}
favoritedrinks collection :
{
"alcoholName" : "whiskey",
"modifierList" : [{
"_id" : ObjectId("5e6a5a0e6d40624b12453a61"),
"modifierId" : ObjectId("5e6a5a0e6d40624b12453a67"),
"modifierName" : "xxx",
}
{
"_id" : ObjectId("5e6a5a0e6d40624b12453a66"),
"modifierId" : ObjectId("5e6a5a0e6d40624b12453a6a"),
"modifierName" : "yyy",
}]
}
my query is :
db.getCollection('favoritedrinks').aggregate([
{ "$sort": { "alcoholName": 1 } },
{"$lookup": {
"from": "modifieritems",
localField: 'modifierList.modifierId',
foreignField: '_id',
as: 'modifier'
}},
{
$project:{
"alcoholName" : "$alcoholName",
"modifierStatus":"$modifier",
}
},
]);
But my expected result :
{
"alcoholName" : "Whiskey",
"modifierStatus" : [
{
"_id" : ObjectId("5e6a5a0e6d40624b12453a61"),
"modifierId" : ObjectId("5e6a5a0e6d40624b12453a67"),
"modifierName" : "xxx",
"isActive" : 1,
},
{
"_id" : ObjectId("5e6a5a0e6d40624b12453a66"),
"modifierId" : ObjectId("5e6a5a0e6d40624b12453a6a"),
"modifierName" : "yyy",
"isActive" : 0,
}
]
}
anyone please help me?
Try this query :
Update with new requirement :
db.favoritedrinks.aggregate([
{
"$sort": {
"alcoholName": 1
}
},
{
"$lookup": {
"from": "modifieritems",
localField: "modifierList.modifierId",
foreignField: "_id",
as: "modifierStatus"
}
},
{
$addFields: {
modifierStatus: {
$map: {
input: "$modifierList",
as: "m",
in: {
$mergeObjects: [
{
$arrayElemAt: [ /** As filter would only get one object (cause you'll have only one matching doc in modifieritems coll for each "modifierList.modifierId", So getting first element out of array, else you need to take this array into an object & merge that field to particular object of 'modifierList') */
{
$filter: {
input: "$modifierStatus",
cond: {
$eq: [
"$$this._id",
"$$m.modifierId"
]
}
}
},
0
]
},
"$$m"
]
}
}
}
}
},
{
$project: {
modifierStatus: 1,
alcoholName: 1,
_id: 0
}
}
])
Test : MongoDB-Playground
Old :
db.favoritedrinks.aggregate([
{
"$sort": {
"alcoholName": 1
}
},
{
$lookup: {
from: "modifieritems",
let: {
id: "$modifierList.modifierId"
},
pipeline: [
{
$match: { $expr: { $in: ["$_id", "$$id"] } }
},
/** Adding a new field modifierId(taken from _id field of modifieritems doc)
* to each matched document from modifieritems coll */
{
$addFields: {
modifierId: "$_id"
}
}
],
as: "modifierStatus"
}
},
/** By mentioning 0 to particular fields to remove them & retain rest all other fields */
{
$project: {
modifierList: 0,
_id: 0
}
}
])
Test : MongoDB-Playground
When you want $project to include a field's current value while keeping the same field name, you need only specify :1. When you use "$field" you are explicitly setting the value, which will overwrite any existing value.
Try making your projection:
{
$project:{
"alcoholName" : 1,
"modifier.isActive": 1,
"modifier.modifierName": 1
}
}

Mongodb aggregate $group stage takes a long time

I'm practicing how to use MongoDB aggregation, but they seem to take a really long time (running time).
The problem seems to happen whenever I use $group. All other queries run just fine.
I have some 1.3 million dummy documents that need to perform two basic operations: get a count of the IP addresses and unique IP addresses.
My schema looks something like this:
{
"_id":"5da51af103eb566faee6b8b4",
"ip_address":"...",
"country":"CL",
"browser":{
"user_agent":...",
}
}
Running a basic $group query takes about 12s on average, which is much too slow.
I did a little research, and someone suggested creating an index on ip_addresses. That seems to have slowed it down because queries now take 13-15s.
I use MongoDB and the query I'm running looks like this:
visitorsModel.aggregate([
{
'$group': {
'_id': '$ip_address',
'count': {
'$sum': 1
}
}
}
]).allowDiskUse(true)
.exec(function (err, docs) {
if (err) throw err;
return res.send({
uniqueCount: docs.length
})
})
Any help is appreciated.
Edit: I forgot to mention, someone suggested it might be a hardware issue? I'm running the query on a core i5, 8GB RAM laptop if it helps.
Edit 2: The query plan:
{
"stages" : [
{
"$cursor" : {
"query" : {
},
"fields" : {
"ip_address" : 1,
"_id" : 0
},
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "metrics.visitors",
"indexFilterSet" : false,
"parsedQuery" : {
},
"winningPlan" : {
"stage" : "COLLSCAN",
"direction" : "forward"
},
"rejectedPlans" : [ ]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 1387324,
"executionTimeMillis" : 7671,
"totalKeysExamined" : 0,
"totalDocsExamined" : 1387324,
"executionStages" : {
"stage" : "COLLSCAN",
"nReturned" : 1387324,
"executionTimeMillisEstimate" : 9,
"works" : 1387326,
"advanced" : 1387324,
"needTime" : 1,
"needYield" : 0,
"saveState" : 10930,
"restoreState" : 10930,
"isEOF" : 1,
"invalidates" : 0,
"direction" : "forward",
"docsExamined" : 1387324
}
}
}
},
{
"$group" : {
"_id" : "$ip_address",
"count" : {
"$sum" : {
"$const" : 1
}
}
}
}
],
"ok" : 1
}
This is some info about using $group aggregation stage, if it uses indexes, and its limitations and what can be tried to overcome these.
1. The $group Stage Doesn't Use Index:
Mongodb Aggregation: Does $group use index?
2. $group Operator and Memory:
The $group stage has a limit of 100 megabytes of RAM. By default, if
the stage exceeds this limit, $group returns an error. To allow for
the handling of large datasets, set the allowDiskUse option to true.
This flag enables $group operations to write to temporary files.
See MongoDb docs on $group Operator and Memory
3. An Example Using $group and Count:
A collection called as cities:
{ "_id" : 1, "city" : "Bangalore", "country" : "India" }
{ "_id" : 2, "city" : "New York", "country" : "United States" }
{ "_id" : 3, "city" : "Canberra", "country" : "Australia" }
{ "_id" : 4, "city" : "Hyderabad", "country" : "India" }
{ "_id" : 5, "city" : "Chicago", "country" : "United States" }
{ "_id" : 6, "city" : "Amritsar", "country" : "India" }
{ "_id" : 7, "city" : "Ankara", "country" : "Turkey" }
{ "_id" : 8, "city" : "Sydney", "country" : "Australia" }
{ "_id" : 9, "city" : "Srinagar", "country" : "India" }
{ "_id" : 10, "city" : "San Francisco", "country" : "United States" }
Query the collection to count the cities by each country:
db.cities.aggregate( [
{ $group: { _id: "$country", cityCount: { $sum: 1 } } },
{ $project: { country: "$_id", _id: 0, cityCount: 1 } }
] )
The Result:
{ "cityCount" : 3, "country" : "United States" }
{ "cityCount" : 1, "country" : "Turkey" }
{ "cityCount" : 2, "country" : "Australia" }
{ "cityCount" : 4, "country" : "India" }
4. Using allowDiskUse Option:
db.cities.aggregate( [
{ $group: { _id: "$country", cityCount: { $sum: 1 } } },
{ $project: { country: "$_id", _id: 0, cityCount: 1 } }
], { allowDiskUse : true } )
Note, in this case it makes no difference in query performance or output. This is to show the usage only.
5. Some Options to Try (suggestions):
You can try a few things to get some result (for trial purposes only):
Use $limit stage and restrict the number of documents processed and
see what is the result. For example, you can try { $limit: 1000 }.
Note this stage needs to come before the $group stage.
You can also use the $match, $project stages before the $group
stage to control the shape and size of the input. This may
return a result (instead of an error).
[EDIT ADD]
Notes on Distinct and Count:
Using the same cities collection - to get unique countries and a count of them you can try using the aggregate stage $count along with $group as in the following two queries.
Distinct:
db.cities.aggregate( [
{ $match: { country: { $exists: true } } },
{ $group: { _id: "$country" } },
{ $project: { country: "$_id", _id: 0 } }
] )
The Result:
{ "country" : "United States" }
{ "country" : "Turkey" }
{ "country" : "India" }
{ "country" : "Australia" }
To get the above result as a single document with an array of unique values, use the $addToSetoperator:
db.cities.aggregate( [
{ $match: { country: { $exists: true } } },
{ $group: { _id: null, uniqueCountries: { $addToSet: "$country" } } },
{ $project: { _id: 0 } },
] )
The Result: { "uniqueCountries" : [ "United States", "Turkey", "India", "Australia" ] }
Count:
db.cities.aggregate( [
{ $match: { country: { $exists: true } } },
{ $group: { _id: "$country" } },
{ $project: { country: "$_id", _id: 0 } },
{ $count: "uniqueCountryCount" }
] )
The Result: { "uniqueCountryCount" : 4 }
In the above queries the $match stage is used to filter any documents with non-existing or null countryfield. The $project stage reshapes the result document(s).
MongoDB Query Language:
Note the two queries get similar results when using the MongoDB query language commands: db.collection.distinct("country") and db.cities.distinct("country").length (note the distinct returns an array).
You can create index
db.collectionname.createIndex( { ip_address: "text" } )
Try this, it is more faster.
I think it will help you.

How to group records by day using timestamp as a field in mongodb and nodejs

I would like to group records by day for certain period. I have tried so far using this code added into the aggregate function:
{
$group : {
_id : { day: { $dayOfMonth: "$timestamp" }},
count: { $sum: 1 }
}
}
And this is how a document looks like:
{
"_id" : ObjectId("ec9cddd50e08a84cd3f4cccb"),
"orgid" : "5c48500d84430a3a4b828e85",
"timestamp" : ISODate("2019-03-28T14:00:00.000Z"),
"apiid" : {
"zxczxczxczxczxc" : {
"errortotal" : 6,
"hits" : 6,
"humanidentifier" : "Feedback",
"identifier" : "663cfc345e42401c6443cfd635301f8f",
"lasttime" : ISODate("2019-03-28T14:58:07.355Z"),
"success" : 0,
"totalrequesttime" : 0.0,
"requesttime" : 0.0
}
},
"apikeys" : {
"00000000" : {
"errortotal" : 3,
"hits" : 3,
"humanidentifier" : "",
"identifier" : "00000000",
"lasttime" : ISODate("2019-03-28T14:55:10.438Z"),
"success" : 0,
"totalrequesttime" : 0.0,
"requesttime" : 0.0
},
"cae81afc" : {
"errortotal" : 3,
"hits" : 3,
"humanidentifier" : "EE5RqcXMTqcWEx8nZv3vRATLspK2",
"identifier" : "cbe81afc",
"lasttime" : ISODate("2019-03-28T14:58:07.355Z"),
"success" : 0,
"totalrequesttime" : 0.0,
"requesttime" : 0.0
}
}
Any idea how can I achieve this?
Result I get is: [ { _id: { day: null }, count: 3 } ], it seems wrong for me since I have two documents with the same date and another document with different timestamp
UPDATE:
I also have this inside aggregate fuction:
// Project things as a key/value array, along with the original doc
{
$project: {
array: {$objectToArray: '$apikeys'},
doc: '$$ROOT',
}
},
// Match the docs with a field value of 'x'
{$match: {'array.v.humanidentifier': {$in: trialCustomerUsers}}},
If I comment this part it will work fine the grouping, but the thing is I would also do some where statement in cases where I also dont know what woudl be the key, that's why I had to add this piece of code
Just accumulate the records in a new field with the $push operator
{
$group : {
_id : { day: { $dayOfMonth: "$timestamp" }},
records: { $push: "$$ROOT" }
}
}
You have $projected your all the root document in the doc field using $$ROOT. Now your aggregation should be as followed
db.collection.aggregate([
{ "$project": {
"array": { "$objectToArray": "$apikeys" },
"doc": "$$ROOT"
}},
{ "$match": { "array.v.humanidentifier": { "$in": trialCustomerUsers }}},
{ "$group" : {
"_id" : { "day": { "$dayOfMonth": "$doc.timestamp" }},
"count": { "$sum": 1 }
}}
])
Change this line
_id : { day: { $dayOfMonth: "$timestamp" }}
to
_id : { day: { $day: "$timestamp" } }
or you can do something like this
$group : {
_id : null,
day: '$timestamp',
count: { $sum: 1 }
}

MongoDB aggregate on Nested data

I have nested data as below,
{
"_id" : ObjectId("5a30ee450889c5f0ebc21116"),
"academicyear" : "2017-18",
"fid" : "be02",
"fname" : "ABC",
"fdept" : "Comp",
"degree" : "BE",
"class" : "1",
"sem" : "8",
"dept" : "Comp",
"section" : "Theory",
"subname" : "BDA",
"fbValueList" : [
{
"_id" : ObjectId("5a30eecd3e3457056c93f7af"),
"score" : 20,
"rating" : "Fair"
},
{
"_id" : ObjectId("5a30eefd3e3457056c93f7b0"),
"score" : 10,
"rating" : "Fair"
},
{
"_id" : ObjectId("5a337e53341bf419040865c4"),
"score" : 88,
"rating" : "Excellent"
},
{
"_id" : ObjectId("5a337ee2341bf419040865c7"),
"score" : 75,
"rating" : "Very Good"
},
{
"_id" : ObjectId("5a3380b583dde50ddcea350e"),
"score" : 72,
"rating" : "Very Good"
}
]
},
{
"_id" : ObjectId("5a3764f1bc19b77dd9fd9a57"),
"academicyear" : "2017-18",
"fid" : "be02",
"fname" : "ABC",
"fdept" : "Comp",
"degree" : "BE",
"class" : "1",
"sem" : "5",
"dept" : "Comp",
"section" : "Theory",
"subname" : "BDA",
"fbValueList" : [
{
"_id" : ObjectId("5a3764f1bc19b77dd9fd9a59"),
"score" : 88,
"rating" : "Excellent"
},
{
"_id" : ObjectId("5a37667aee64bce1b14747d2"),
"score" : 74,
"rating" : "Good"
},
{
"_id" : ObjectId("5a3766b3ee64bce1b14747dc"),
"score" : 74,
"rating" : "Good"
}
]
}
We are trying to perform aggregation using this,
db.fbresults.aggregate([{$match:{academicyear:"2017-18",fdept:'Comp'}},{$group:{_id: {fname: "$fname", rating:"$fbValueList.rating"},count: {"$sum":1}}}])
and we get result like,
{ "_id" : { "fname" : "ABC", "rating" : [ "Fair","Fair","Excellent","Very Good", "Very Good", "Excellent", "Good", "Good" ] }, "count" : 2 }
but we are expecting result like,
{ "_id" : { "fname" : "ABC", "rating_group" : [
{
rating: "Excellent"
count: 2
},
{
rating: "Very Good"
count: 2
},
{
rating: "Good"
count: 2
},
{
rating: "Fair"
count: 2
},
] }, "count" : 2 }
We want to get individual faculty group by their name and inside that group by their rating response and count of rating.
We have already tried this one but we did not the result.
Mongodb Aggregate Nested Group
This should get you going:
db.collection.aggregate([{
$match: {
academicyear: "2017-18",
fdept:'Comp'
}
}, {
$unwind: "$fbValueList" // flatten the fbValueList array into multiple documents
}, {
$group: {
_id: {
fname: "$fname",
rating:"$fbValueList.rating"
},
count: {
"$sum": 1 // this will give us the count per combination of fname and fbValueList.rating
}
}
}, {
$group: {
_id: "$_id.fname", // we only want one bucket per fname
rating_group: {
$push: { // we push the exact structure you were asking for
rating: "$_id.rating",
count: "$count"
}
},
count: {
$avg: "$count" // this will be the average across all entries in the fname bucket
}
}
}])
This is a long aggregation pipeline, there may be some aggregations that are un-necessary, so please check and discard whichever are irrelevant.
NOTE: This will only work with Mongo 3.4+.
You need to use $unwind and then $group and $push ratings with their counts.
matchAcademicYear = {
$match: {
academicyear:"2017-18", fdept:'Comp'
}
}
groupByNameAndRating = {
$group: {
_id: {
fname: "$fname", rating:"$fbValueList.rating"
},
count: {
"$sum":1
}
}
}
unwindRating = {
$unwind: "$_id.rating"
}
addFullRating = {
$addFields: {
"_id.full_rating": "$count"
}
}
replaceIdRoot = {
$replaceRoot: {
newRoot: "$_id"
}
}
groupByRatingAndFname = {
$group: {
_id: {
"rating": "$rating",
"fname": "$fname"
},
count: {"$sum": 1},
full_rating: {"$first": "$full_rating"}
}
}
addFullRatingAndCount = {
$addFields: {
"_id.count": "$count",
"_id.full_rating": "$full_count"
}
}
groupByFname = {
$group: {
_id: "$fname",
rating_group: { $push: {rating: "$rating", count: "$count"}},
count: { $first: "$full_rating"}
}
}
db.fbresults.aggregate([
matchAcademicYear,
groupByNameAndRating,
unwindRating,
addFullRating,
unwindRating,
replaceIdRoot,
groupByRatingAndFname,
addFullRatingAndCount,
replaceIdRoot,
groupByFname
])

Mongo Node driver how to get all fields of $max aggregate from an array of objects

I have a collection called "products" which has an array of "bids" objects.
I want to find out the Maximum bid for each product, for this I am aggregating Products on $max with $bids.bidamount field. However this is only giving me the largest bid amount. How do I project all the bid fields for the max aggregation.
Here is a sample document
{
"_id" : ObjectId("58109a5138fe12215cfdc064"),
"product_id" : 2,
"item_name" : "Auction Item1",
"item_description" : "Test",
"seller_name" : "ak#gmail.com",
"item_price" : "20",
"item_quantity" : 7,
"sale_type" : "Auction",
"posted_at" : "2016:10:26 04:58:09",
"expires_at" : "2016:10:30 04:58:09",
"bids" : [
{
"bid_id" : 1,
"bidder" : "ak#gmail.com",
"bid_amount" : 300,
"bit_time" : "2016:10:26 22:36:29"
},
{
"bid_id" : 2,
"bidder" : "ak#gmail.com",
"bid_amount" : 100,
"bit_time" : "2016:10:26 22:37:29"
}
],
"orders" : [
{
"buyer" : "ak#gmail.com",
"quantity" : "2"
},
{
"buyer" : "ak#gmail.com",
"quantity" : "3"
}
]
}
Here is my mongo query:
db.products.aggregate([
{
$project: {
bidMax: { $max: "$bids.bid_amount"}
}
}
])
which gives the following result:
{
"_id" : ObjectId("58109a5138fe12215cfdc064"),
"bidMax" : 300
}
db.products.aggregate([{$unwind:"$bids"},{$group:{_id:"$_id", sum:{$sum:"$bids.bid_amount"}}},{$project:{doc:"$$ROOT", _id:1, sum:1}, {$sort:{"sum":-1}},{$limit:1}]),
which return something like { "_id" : ObjectId("5811b667c50fb1ec88227860"), "sum" : 600, doc:{your document....} }
This should do it:
db.products.aggregate([{
$unwind: '$bids'
}, {
$group: {
_id: '$products_id',
maxBid: {
$max: '$bids.bid_amount'
}
}
}])
db.collectionName.aggregate(
[
{
$group:
{
_id: "$product_id",
maxBidAmount: { $max: "$bids.bid_amount" }
}
}
]
)
Hey use this query, you will get the result.

Resources