Mongodb lookup + group by with mongoose - node.js

I have two different mongoose collection as follow :
{ "_id" : 1, "countryId" : 1, "price" : 12, "quantity" : 24 }
{ "_id" : 2, "countryId" : 2, "price" : 20, "quantity" : 1 }
{ "_id" : 3 }
{ "_id" : 4, "countryId" : 1, "price" : 12, "quantity" : 24 }
{ "_id" : 1, "id" : 1, description: "Colombia"}
{ "_id" : 3, "id" : 2, description: "Mexic" }
I'm trying to aggregate them so that i can have a result as follow :
{"country":"Colombia","total":48}
{"country":"Mexic","total":1}
I've tried many things but it's always failing here is the last version of what i'm working on ( i've changed the data but you get the idea ) :
Model.aggregate([
{
$lookup:
{
from: "countryList",
localField: "countryId",
foreignField: "id",
as: "country"
},
{
$project: {
quantity:1, country:{$country:"$countryList.description"}
}
},{
$group:{
{ _id : null, qtyCountry: { $sum: "$quantity" } }
}
}
}],function (err, result) {
if (err) {
console.log(err);
} else {
console.log(result)
}
}
);
Is it even possible ?

Yes, it is possible. You can try the following aggregation pipeline.
var pipeline = [
{"$match":{"countryId":{"$exists":true}}},
{"$group" : {"_id":"$countryId", "quantity":{"$sum":"$quantity"}}},
{"$lookup":{"from":"countryList","localField":"_id", "foreignField":"id","as":"country"}},
{"$unwind":"$country"},
{"$project": {"country":"$country.description", "total":"$quantity", _id:0}}
]
Sample output:
{ "country" : "Mexic", "total" : 1 }
{ "country" : "Colombia", "total" : 48 }

Related

Mongodb aggregate $group stage takes a long time

I'm practicing how to use MongoDB aggregation, but they seem to take a really long time (running time).
The problem seems to happen whenever I use $group. All other queries run just fine.
I have some 1.3 million dummy documents that need to perform two basic operations: get a count of the IP addresses and unique IP addresses.
My schema looks something like this:
{
"_id":"5da51af103eb566faee6b8b4",
"ip_address":"...",
"country":"CL",
"browser":{
"user_agent":...",
}
}
Running a basic $group query takes about 12s on average, which is much too slow.
I did a little research, and someone suggested creating an index on ip_addresses. That seems to have slowed it down because queries now take 13-15s.
I use MongoDB and the query I'm running looks like this:
visitorsModel.aggregate([
{
'$group': {
'_id': '$ip_address',
'count': {
'$sum': 1
}
}
}
]).allowDiskUse(true)
.exec(function (err, docs) {
if (err) throw err;
return res.send({
uniqueCount: docs.length
})
})
Any help is appreciated.
Edit: I forgot to mention, someone suggested it might be a hardware issue? I'm running the query on a core i5, 8GB RAM laptop if it helps.
Edit 2: The query plan:
{
"stages" : [
{
"$cursor" : {
"query" : {
},
"fields" : {
"ip_address" : 1,
"_id" : 0
},
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "metrics.visitors",
"indexFilterSet" : false,
"parsedQuery" : {
},
"winningPlan" : {
"stage" : "COLLSCAN",
"direction" : "forward"
},
"rejectedPlans" : [ ]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 1387324,
"executionTimeMillis" : 7671,
"totalKeysExamined" : 0,
"totalDocsExamined" : 1387324,
"executionStages" : {
"stage" : "COLLSCAN",
"nReturned" : 1387324,
"executionTimeMillisEstimate" : 9,
"works" : 1387326,
"advanced" : 1387324,
"needTime" : 1,
"needYield" : 0,
"saveState" : 10930,
"restoreState" : 10930,
"isEOF" : 1,
"invalidates" : 0,
"direction" : "forward",
"docsExamined" : 1387324
}
}
}
},
{
"$group" : {
"_id" : "$ip_address",
"count" : {
"$sum" : {
"$const" : 1
}
}
}
}
],
"ok" : 1
}
This is some info about using $group aggregation stage, if it uses indexes, and its limitations and what can be tried to overcome these.
1. The $group Stage Doesn't Use Index:
Mongodb Aggregation: Does $group use index?
2. $group Operator and Memory:
The $group stage has a limit of 100 megabytes of RAM. By default, if
the stage exceeds this limit, $group returns an error. To allow for
the handling of large datasets, set the allowDiskUse option to true.
This flag enables $group operations to write to temporary files.
See MongoDb docs on $group Operator and Memory
3. An Example Using $group and Count:
A collection called as cities:
{ "_id" : 1, "city" : "Bangalore", "country" : "India" }
{ "_id" : 2, "city" : "New York", "country" : "United States" }
{ "_id" : 3, "city" : "Canberra", "country" : "Australia" }
{ "_id" : 4, "city" : "Hyderabad", "country" : "India" }
{ "_id" : 5, "city" : "Chicago", "country" : "United States" }
{ "_id" : 6, "city" : "Amritsar", "country" : "India" }
{ "_id" : 7, "city" : "Ankara", "country" : "Turkey" }
{ "_id" : 8, "city" : "Sydney", "country" : "Australia" }
{ "_id" : 9, "city" : "Srinagar", "country" : "India" }
{ "_id" : 10, "city" : "San Francisco", "country" : "United States" }
Query the collection to count the cities by each country:
db.cities.aggregate( [
{ $group: { _id: "$country", cityCount: { $sum: 1 } } },
{ $project: { country: "$_id", _id: 0, cityCount: 1 } }
] )
The Result:
{ "cityCount" : 3, "country" : "United States" }
{ "cityCount" : 1, "country" : "Turkey" }
{ "cityCount" : 2, "country" : "Australia" }
{ "cityCount" : 4, "country" : "India" }
4. Using allowDiskUse Option:
db.cities.aggregate( [
{ $group: { _id: "$country", cityCount: { $sum: 1 } } },
{ $project: { country: "$_id", _id: 0, cityCount: 1 } }
], { allowDiskUse : true } )
Note, in this case it makes no difference in query performance or output. This is to show the usage only.
5. Some Options to Try (suggestions):
You can try a few things to get some result (for trial purposes only):
Use $limit stage and restrict the number of documents processed and
see what is the result. For example, you can try { $limit: 1000 }.
Note this stage needs to come before the $group stage.
You can also use the $match, $project stages before the $group
stage to control the shape and size of the input. This may
return a result (instead of an error).
[EDIT ADD]
Notes on Distinct and Count:
Using the same cities collection - to get unique countries and a count of them you can try using the aggregate stage $count along with $group as in the following two queries.
Distinct:
db.cities.aggregate( [
{ $match: { country: { $exists: true } } },
{ $group: { _id: "$country" } },
{ $project: { country: "$_id", _id: 0 } }
] )
The Result:
{ "country" : "United States" }
{ "country" : "Turkey" }
{ "country" : "India" }
{ "country" : "Australia" }
To get the above result as a single document with an array of unique values, use the $addToSetoperator:
db.cities.aggregate( [
{ $match: { country: { $exists: true } } },
{ $group: { _id: null, uniqueCountries: { $addToSet: "$country" } } },
{ $project: { _id: 0 } },
] )
The Result: { "uniqueCountries" : [ "United States", "Turkey", "India", "Australia" ] }
Count:
db.cities.aggregate( [
{ $match: { country: { $exists: true } } },
{ $group: { _id: "$country" } },
{ $project: { country: "$_id", _id: 0 } },
{ $count: "uniqueCountryCount" }
] )
The Result: { "uniqueCountryCount" : 4 }
In the above queries the $match stage is used to filter any documents with non-existing or null countryfield. The $project stage reshapes the result document(s).
MongoDB Query Language:
Note the two queries get similar results when using the MongoDB query language commands: db.collection.distinct("country") and db.cities.distinct("country").length (note the distinct returns an array).
You can create index
db.collectionname.createIndex( { ip_address: "text" } )
Try this, it is more faster.
I think it will help you.

toObjectId for arrrays of String to be used with lookup and aggregate [duplicate]

This question already has an answer here:
Matching ObjectId to String for $graphLookup
(1 answer)
Closed 3 years ago.
I have 2 models, one is Group and another is Students. Group looks like this
{
"_id" : ObjectId("5c8d28ef7e0e542854b7b904"),
"name" : "Homeroom ",
"year" : 2019,
"schoolID" : ObjectId("5c1a735fc98da061141475a1"),
"teachers" : [
{
"_id" : "5c1a7677c98da061141475aa",
"firstName" : "Ayam"
},
{
"_id" : "5c1a7677c98da061141475a9",
"firstName" : "Itik"
}
],
"addedOn" : ISODate("2019-03-16T16:48:47.372Z"),
"lastModified" : ISODate("2019-03-16T16:48:47.372Z"),
"__v" : 0,
"status" : 1,
"students" : [
"5c1a79f7c98da061141475b7",
"5c3bfea37774fb0b55000cb5",
"5c1a7c69c98da061141475bb",
"5c3bfea37774fb0b55000cb4",
"5c1a7d32c98da061141475be",
"5c3bfea37774fb0b55000cb7"
]
}
Where the field students above stores the _id (in String format) of students inside Students model.
Now I'm trying to do a lookup with aggregate, and I comes up with something like this:
Group.aggregate([
{ $match: { _id: mongoose.mongo.ObjectId(groupID) } },
{ $lookup: {
from: "Students", localField: "students", foreignField: "_id", as: "studentList"
} },
{ $unwind: "$studentList" },
{ $replaceRoot: { newRoot: "$students" } }
], function(err, result){
if (err){
console.log("imin err 102: " )
console.log(err)
}else{
console.log("imini 105 result")
console.log(result);
}
});
Now I understand that the code above won't yield any result, since students inside Model Group is stored as String, while the _id inside model Student is an ObjectId. Mongodb now have $toObjectId but since my Model holds an array of String, I don't know how to implement $toObjectId correctly.
Here's sample documents for Students
{
"_id" : ObjectId("5c1a79f7c98da061141475b7"),
"firstName" : "Ibrahim",
"kelasID" : ObjectId("5c429f9906f2a805bc6cd494"),
"lastName" : "Ali",
"schoolID" : ObjectId("5c1a735fc98da061141475a1"),
"year" : 2018,
"__v" : 0,
"addedOn" : ISODate("2018-12-25T04:27:47.909Z"),
"checkIn" : false,
"checkInStatus" : 1,
"contactNo1" : "012225656",
"father" : "Ali",
"fatherID" : "8852245",
"idType" : 0,
"lastModified" : ISODate("2018-12-25T04:27:47.909Z"),
"mother" : "",
"motherID" : ""
}
{
"_id" : ObjectId("5c3bfea37774fb0b55000cb5"),
"idType" : 0,
"checkIn" : false,
"checkInStatus" : 1,
"year" : 2019,
"schoolID" : ObjectId("5c1a735fc98da061141475a1"),
"kelasID" : ObjectId("5c1a7534c98da061141475a3"),
"firstName" : "Umar",
"lastName" : "Bin Al-Khattab",
"contactNo1" : "601222",
"status" : 1,
"addedOn" : ISODate("2019-01-14T03:14:43.597Z"),
"lastModified" : ISODate("2019-01-14T03:14:43.597Z"),
"__v" : 0
}
{
"_id" : ObjectId("5c1a7c69c98da061141475bb"),
"idType" : 0,
"checkIn" : false,
"checkInStatus" : 1,
"year" : 2018,
"schoolID" : ObjectId("5c1a735fc98da061141475a1"),
"kelasID" : ObjectId("5c1a7540c98da061141475a5"),
"firstName" : "Abdul Rahman",
"lastName" : "Affan",
"father" : "Affan",
"fatherID" : "54321",
"contactNo1" : "602288",
"status" : 1,
"addedOn" : ISODate("2018-12-25T04:30:16.130Z"),
"lastModified" : ISODate("2018-12-25T04:30:16.130Z"),
"__v" : 0
}
You have to $map over the students field to convert the String ids to ObjectId
Group.aggregate(
[
{ "$match": { "_id": mongoose.mongo.ObjectId(groupID) } },
{ "$addFields": {
"students": {
"$map": {
"input": "$students",
"in": { "$toObjectId": "$$this" }
}
}
}},
{
"$lookup": {
"from": "Students",
"localField": "students",
"foreignField": "_id",
"as": "studentList"
}
},
{ "$unwind": "$studentList" },
{ "$replaceRoot": { "newRoot": "$students" } }
],
function(err, result) {
if (err) {
console.log("imin err 102: ");
console.log(err);
} else {
console.log("imini 105 result");
console.log(result);
}
}
)

How to get max values for distinct elements in mongodb?

I have records in my collection
{
"_id" : ObjectId("5c37a71c54956d08afb590ef"),
"user_id" : 45,
"result" : 9,
}
{
"_id" : ObjectId("5c37a7ad54956d08afb590f0"),
"user_id" : 1,
"result" : 3,
}
{
"_id" : ObjectId("5c37a80254956d08afb590f1"),
"user_id" : 45,
"result" : 10,
}
How to get distinct records with max values (result) for each user (user_id field is unique) ?
I expect result like this:
{
"_id" : ObjectId("5c37a80254956d08afb590f1"),
"user_id" : 45, //distinct user_id
"result" : 10, //max result for user
}
{
"_id" : ObjectId("5c37a7ad54956d08afb590f0"),
"user_id" : 1, //distinct user_id
"result" : 3, //max result for user
}
You can use below aggregation:
db.col.aggregate([
{
$sort: { result: -1 }
},
{
$group: {
_id: "$user_id",
result: { $first: "$result" },
o_id: { $first: "$_id" }
}
},
{
$project: {
_id: "$o_id",
user_id: "$_id",
result: 1
}
}
])
You need to use $sort first to be able to capture both _id and result from highest result document using $group and $first operators. Output:
{ "result" : 3, "_id" : ObjectId("5c37a7ad54956d08afb590f0"), "user_id" : 1 }
{ "result" : 10, "_id" : ObjectId("5c37a80254956d08afb590f1"), "user_id" : 45 }

want to update only value element of specific key on mongodb

I have a dataset like this:
{
"_id" : ObjectId("5a7bee68996b551034015a15"),
"sequenceid" : 1,
"fruit" : [
{
"name" : "#APPLE",
"value" : 2
},
{
"name" : "#BANANA",
"value" : 1
},
{
"name" : "#ORANGE",
"value" : 5
}
}
want to update only Apple value i.e from 2 to 25. Expected result will be:
{
"_id" : ObjectId("5a7bee68996b551034015a15"),
"sequenceid" : 1,
"fruit" : [
{
"name" : "#APPLE",
"value" : 25
},
{
"name" : "#BANANA",
"value" : 1
},
{
"name" : "#ORANGE",
"value" : 5
}
}
I tried the code but this will replace all entry and do only one entry. My code is
db.Collection.update({'sequenceid': 1}, {$set: {'fruit' : {'name': '#APPLE', 'value': parseFloat(25)}}}, function(error, result){
if(error){
console.log('error');
} else {
console.log('success');
}
});
It can produce the result:
{
"_id" : ObjectId("5a7bee68996b551034015a15"),
"sequenceid" : 1,
"fruit" : [
{
"name" : "#APPLE",
"value" : 25
}
}//Delete all my rest entry
How I can Do this. I am a newbie on MongoDB
This will update only the first occurrence of record.For reference MongoDB - Update objects in a document's array (nested updating)
db.collection.update({ _id: ObjectId("5a7bf5586262dc7b9f3a8422") ,"fruit.name" : "#APPLE"},
{ $set:
{
"fruit.$.value" : 25
}
})
If you are writing JavaScript query then you can update like this
db.collection.find({'sequenceid': 1}).forEach(function(x){
x.fruit.forEach(function(y){
if(y.name=="#APPLE")
{
y.value = 25
}
})
db.collection.update({_id:x._id},x)
})
db.Collection.update({
_id: ObjectId("5a7bee68996b551034015a15"),
"fruit": {
$elemMatch: {
"name": "#APPLE"
}
}
}, {
$set: {
"fruit.$.value": 25
}
})
In above update operation $elemMatch operator is used to search a value in an array and in $set stage positional operator $ is used to update value of specific key belonging to an array element

Mongo Node driver how to get all fields of $max aggregate from an array of objects

I have a collection called "products" which has an array of "bids" objects.
I want to find out the Maximum bid for each product, for this I am aggregating Products on $max with $bids.bidamount field. However this is only giving me the largest bid amount. How do I project all the bid fields for the max aggregation.
Here is a sample document
{
"_id" : ObjectId("58109a5138fe12215cfdc064"),
"product_id" : 2,
"item_name" : "Auction Item1",
"item_description" : "Test",
"seller_name" : "ak#gmail.com",
"item_price" : "20",
"item_quantity" : 7,
"sale_type" : "Auction",
"posted_at" : "2016:10:26 04:58:09",
"expires_at" : "2016:10:30 04:58:09",
"bids" : [
{
"bid_id" : 1,
"bidder" : "ak#gmail.com",
"bid_amount" : 300,
"bit_time" : "2016:10:26 22:36:29"
},
{
"bid_id" : 2,
"bidder" : "ak#gmail.com",
"bid_amount" : 100,
"bit_time" : "2016:10:26 22:37:29"
}
],
"orders" : [
{
"buyer" : "ak#gmail.com",
"quantity" : "2"
},
{
"buyer" : "ak#gmail.com",
"quantity" : "3"
}
]
}
Here is my mongo query:
db.products.aggregate([
{
$project: {
bidMax: { $max: "$bids.bid_amount"}
}
}
])
which gives the following result:
{
"_id" : ObjectId("58109a5138fe12215cfdc064"),
"bidMax" : 300
}
db.products.aggregate([{$unwind:"$bids"},{$group:{_id:"$_id", sum:{$sum:"$bids.bid_amount"}}},{$project:{doc:"$$ROOT", _id:1, sum:1}, {$sort:{"sum":-1}},{$limit:1}]),
which return something like { "_id" : ObjectId("5811b667c50fb1ec88227860"), "sum" : 600, doc:{your document....} }
This should do it:
db.products.aggregate([{
$unwind: '$bids'
}, {
$group: {
_id: '$products_id',
maxBid: {
$max: '$bids.bid_amount'
}
}
}])
db.collectionName.aggregate(
[
{
$group:
{
_id: "$product_id",
maxBidAmount: { $max: "$bids.bid_amount" }
}
}
]
)
Hey use this query, you will get the result.

Resources