MongoDB aggregate on Nested data - node.js

I have nested data as below,
{
"_id" : ObjectId("5a30ee450889c5f0ebc21116"),
"academicyear" : "2017-18",
"fid" : "be02",
"fname" : "ABC",
"fdept" : "Comp",
"degree" : "BE",
"class" : "1",
"sem" : "8",
"dept" : "Comp",
"section" : "Theory",
"subname" : "BDA",
"fbValueList" : [
{
"_id" : ObjectId("5a30eecd3e3457056c93f7af"),
"score" : 20,
"rating" : "Fair"
},
{
"_id" : ObjectId("5a30eefd3e3457056c93f7b0"),
"score" : 10,
"rating" : "Fair"
},
{
"_id" : ObjectId("5a337e53341bf419040865c4"),
"score" : 88,
"rating" : "Excellent"
},
{
"_id" : ObjectId("5a337ee2341bf419040865c7"),
"score" : 75,
"rating" : "Very Good"
},
{
"_id" : ObjectId("5a3380b583dde50ddcea350e"),
"score" : 72,
"rating" : "Very Good"
}
]
},
{
"_id" : ObjectId("5a3764f1bc19b77dd9fd9a57"),
"academicyear" : "2017-18",
"fid" : "be02",
"fname" : "ABC",
"fdept" : "Comp",
"degree" : "BE",
"class" : "1",
"sem" : "5",
"dept" : "Comp",
"section" : "Theory",
"subname" : "BDA",
"fbValueList" : [
{
"_id" : ObjectId("5a3764f1bc19b77dd9fd9a59"),
"score" : 88,
"rating" : "Excellent"
},
{
"_id" : ObjectId("5a37667aee64bce1b14747d2"),
"score" : 74,
"rating" : "Good"
},
{
"_id" : ObjectId("5a3766b3ee64bce1b14747dc"),
"score" : 74,
"rating" : "Good"
}
]
}
We are trying to perform aggregation using this,
db.fbresults.aggregate([{$match:{academicyear:"2017-18",fdept:'Comp'}},{$group:{_id: {fname: "$fname", rating:"$fbValueList.rating"},count: {"$sum":1}}}])
and we get result like,
{ "_id" : { "fname" : "ABC", "rating" : [ "Fair","Fair","Excellent","Very Good", "Very Good", "Excellent", "Good", "Good" ] }, "count" : 2 }
but we are expecting result like,
{ "_id" : { "fname" : "ABC", "rating_group" : [
{
rating: "Excellent"
count: 2
},
{
rating: "Very Good"
count: 2
},
{
rating: "Good"
count: 2
},
{
rating: "Fair"
count: 2
},
] }, "count" : 2 }
We want to get individual faculty group by their name and inside that group by their rating response and count of rating.
We have already tried this one but we did not the result.
Mongodb Aggregate Nested Group

This should get you going:
db.collection.aggregate([{
$match: {
academicyear: "2017-18",
fdept:'Comp'
}
}, {
$unwind: "$fbValueList" // flatten the fbValueList array into multiple documents
}, {
$group: {
_id: {
fname: "$fname",
rating:"$fbValueList.rating"
},
count: {
"$sum": 1 // this will give us the count per combination of fname and fbValueList.rating
}
}
}, {
$group: {
_id: "$_id.fname", // we only want one bucket per fname
rating_group: {
$push: { // we push the exact structure you were asking for
rating: "$_id.rating",
count: "$count"
}
},
count: {
$avg: "$count" // this will be the average across all entries in the fname bucket
}
}
}])

This is a long aggregation pipeline, there may be some aggregations that are un-necessary, so please check and discard whichever are irrelevant.
NOTE: This will only work with Mongo 3.4+.
You need to use $unwind and then $group and $push ratings with their counts.
matchAcademicYear = {
$match: {
academicyear:"2017-18", fdept:'Comp'
}
}
groupByNameAndRating = {
$group: {
_id: {
fname: "$fname", rating:"$fbValueList.rating"
},
count: {
"$sum":1
}
}
}
unwindRating = {
$unwind: "$_id.rating"
}
addFullRating = {
$addFields: {
"_id.full_rating": "$count"
}
}
replaceIdRoot = {
$replaceRoot: {
newRoot: "$_id"
}
}
groupByRatingAndFname = {
$group: {
_id: {
"rating": "$rating",
"fname": "$fname"
},
count: {"$sum": 1},
full_rating: {"$first": "$full_rating"}
}
}
addFullRatingAndCount = {
$addFields: {
"_id.count": "$count",
"_id.full_rating": "$full_count"
}
}
groupByFname = {
$group: {
_id: "$fname",
rating_group: { $push: {rating: "$rating", count: "$count"}},
count: { $first: "$full_rating"}
}
}
db.fbresults.aggregate([
matchAcademicYear,
groupByNameAndRating,
unwindRating,
addFullRating,
unwindRating,
replaceIdRoot,
groupByRatingAndFname,
addFullRatingAndCount,
replaceIdRoot,
groupByFname
])

Related

Group by month and date in mongodb and display the list

My DB products schema structure
{ "_id" : "507d95d5719dbef170f15bf9" , "name" : "AC3 Series Charger", "type" : "accessory", "price" : 19,"created":"1626083461034"}
{ "_id" : "507d95d5719dbef170f15bfa" , "name" : "AC3 Case Green", "type" : "case" , "price" : 12, "created":"1625805979235"}
{ "_id" : "507d95d5719dbef170f15bfb" , "name" : "Phone Extended Warranty", "type" : "warranty", "price" : 38,"created":"1624374320653" }
{ "_id" : "507d95d5719dbef170f15bfc" , "name" : "AC3 Case Black", "type" : "case" , "price" : 12.5,"created":"1624606153646" }
{ "_id" : "507d95d5719dbef170f15bfd" , "name" : "AC3 Case Red", "type" : "case" , "price" : 12, "created":"1624362066717"}
{ "_id" : "507d95d5719dbef170f15bfe" , "name" : "Phone Service Basic Plan", "type" : "service", "price":9.5,"created":"1624446320186"}
{ "_id" : "507d95d5719dbef170f15bff" , "name" : "Phone Service Core Plan", "type" : "service", "price" :3.5,"created":"1624605403064"}
{ "_id" : "507d95d5719dbef170f15c00" , "name" : "Phone Service Family Plan", "type" : "service", "price":10.4,"created":"1624446320173"}
I want to get the records based month and day and for that, I've written a query like below
db.collection.aggregate([
{ "$group": {
_id: { month:{ "$month": { "$toDate": "$created" }, day: "$day" } },
product: { "$push": "$name" }
}}
])
but it is giving the error
An object representing an expression must have exactly one field: { $month: { $toDate: \"$created\" }, day: \"$day\" }
Expected output
{
month:5
day:12
products:[
{
"name" : "Phone Service Family Plan"
},
{"name" : "Phone Service Core Plan"}
]
}
You need to convert your string to long first by using $toLong and also use $dayOfMonth operator since there's no $day in MongoDB:
db.collection.aggregate([
{
"$group": {
_id: {
month: { "$month": { "$toDate": { $toLong: "$created" } } },
day: { "$dayOfMonth": { "$toDate": { $toLong: "$created" } } }
},
product: {
"$push": "$name"
}
}
},
{
$group: {
_id: "$_id.month",
monthDays: {
$push: {
day: "$_id.day",
product: "$product"
}
}
}
},
{
$project: {
_id: 0,
month: "$_id",
monthDays: 1
}
},
])
Mongo Playground

wrong counting with $filter

My db cofiguration looks like:
{
"_id" : ObjectId("5ece47aa6510a611b47aac5a"),
"boats" : [
{
"_id" : ObjectId("5ece47aa6510a611b47aac6e"),
"model" : "Dufour",
"year" : 2019,
"about" : [
{
"_id" : ObjectId("5ece47aa6510a611b47aac71"),
"Capacity" : 14,
"characteristics" : [
{
"_id" : ObjectId("5ece47aa6510a611b47aac73"),
"fuel" : "petrol",
"fuelCap" : 200
},
{
"_id" : ObjectId("5ece47aa6510a611b47aac73"),
"fuel" : "petrol",
"fuelCap" : 120
},
]
},
{
"_id" : ObjectId("5ece47aa6510a611b47aac71"),
"Capacity" : 8,
"characteristics" : [
{
"_id" : ObjectId("5ece47aa6510a611b47aac73"),
"fuel" : "benzin",
"fuelCap" : 180
},
{
"_id" : ObjectId("5ece47aa6510a611b47aac73"),
"fuel" : "petrol",
"fuelCap" : 100
},
]
},
{...},
{...},
]
}
Now i am trying to count the number of boats which have "fuel" : "petrol", so i use the code bellow:
router.get('/boat', async(req, res)=>{
try{
const fuelData = await Boat.aggregate([
{
$project: {
fuelData: {
$filter: {
input: "$boats",
as: "boats",
cond: {
$filter:{
input:"$$boats.about",
as:"about",
cond:{
$filter:{
input:"$$about.characteristics",
as:"characteristics",
cond:{
$eq:["$$activity1.activity.type", "STILL"]
}
}
}
}
}
}
}
}
},
{
$project: {
boatsCount: {$size : "$fuelData" }
}
}
])
res.status(201).send(fuelData)
}catch(e){
res.send(e)
}
})
The problem is that return wrong number of boatCount. And it seems like it returns the number of the boats which are inside the db. Any help how to count correctly the boats which have "fuel" : "petrol"?
Is there anything wrong in my code?
https://mongoplayground.net/p/D0FEhTMJEJ1
Hope this is what you need.
The sample data you have provided is missing a ] & }.
So I added 1 additional boat with 2 about.
db.collection.aggregate([
{
$match: {
"boats.about.characteristics.fuel": "petrol"
}
},
{
$unwind: "$boats"
},
{
$unwind: "$boats.about"
},
{
$match: {
"boats.about.characteristics.fuel": "petrol"
}
},
{
$group: {
_id: null,
count: {
$sum: 1
}
}
}
])

Mongodb aggregate $group stage takes a long time

I'm practicing how to use MongoDB aggregation, but they seem to take a really long time (running time).
The problem seems to happen whenever I use $group. All other queries run just fine.
I have some 1.3 million dummy documents that need to perform two basic operations: get a count of the IP addresses and unique IP addresses.
My schema looks something like this:
{
"_id":"5da51af103eb566faee6b8b4",
"ip_address":"...",
"country":"CL",
"browser":{
"user_agent":...",
}
}
Running a basic $group query takes about 12s on average, which is much too slow.
I did a little research, and someone suggested creating an index on ip_addresses. That seems to have slowed it down because queries now take 13-15s.
I use MongoDB and the query I'm running looks like this:
visitorsModel.aggregate([
{
'$group': {
'_id': '$ip_address',
'count': {
'$sum': 1
}
}
}
]).allowDiskUse(true)
.exec(function (err, docs) {
if (err) throw err;
return res.send({
uniqueCount: docs.length
})
})
Any help is appreciated.
Edit: I forgot to mention, someone suggested it might be a hardware issue? I'm running the query on a core i5, 8GB RAM laptop if it helps.
Edit 2: The query plan:
{
"stages" : [
{
"$cursor" : {
"query" : {
},
"fields" : {
"ip_address" : 1,
"_id" : 0
},
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "metrics.visitors",
"indexFilterSet" : false,
"parsedQuery" : {
},
"winningPlan" : {
"stage" : "COLLSCAN",
"direction" : "forward"
},
"rejectedPlans" : [ ]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 1387324,
"executionTimeMillis" : 7671,
"totalKeysExamined" : 0,
"totalDocsExamined" : 1387324,
"executionStages" : {
"stage" : "COLLSCAN",
"nReturned" : 1387324,
"executionTimeMillisEstimate" : 9,
"works" : 1387326,
"advanced" : 1387324,
"needTime" : 1,
"needYield" : 0,
"saveState" : 10930,
"restoreState" : 10930,
"isEOF" : 1,
"invalidates" : 0,
"direction" : "forward",
"docsExamined" : 1387324
}
}
}
},
{
"$group" : {
"_id" : "$ip_address",
"count" : {
"$sum" : {
"$const" : 1
}
}
}
}
],
"ok" : 1
}
This is some info about using $group aggregation stage, if it uses indexes, and its limitations and what can be tried to overcome these.
1. The $group Stage Doesn't Use Index:
Mongodb Aggregation: Does $group use index?
2. $group Operator and Memory:
The $group stage has a limit of 100 megabytes of RAM. By default, if
the stage exceeds this limit, $group returns an error. To allow for
the handling of large datasets, set the allowDiskUse option to true.
This flag enables $group operations to write to temporary files.
See MongoDb docs on $group Operator and Memory
3. An Example Using $group and Count:
A collection called as cities:
{ "_id" : 1, "city" : "Bangalore", "country" : "India" }
{ "_id" : 2, "city" : "New York", "country" : "United States" }
{ "_id" : 3, "city" : "Canberra", "country" : "Australia" }
{ "_id" : 4, "city" : "Hyderabad", "country" : "India" }
{ "_id" : 5, "city" : "Chicago", "country" : "United States" }
{ "_id" : 6, "city" : "Amritsar", "country" : "India" }
{ "_id" : 7, "city" : "Ankara", "country" : "Turkey" }
{ "_id" : 8, "city" : "Sydney", "country" : "Australia" }
{ "_id" : 9, "city" : "Srinagar", "country" : "India" }
{ "_id" : 10, "city" : "San Francisco", "country" : "United States" }
Query the collection to count the cities by each country:
db.cities.aggregate( [
{ $group: { _id: "$country", cityCount: { $sum: 1 } } },
{ $project: { country: "$_id", _id: 0, cityCount: 1 } }
] )
The Result:
{ "cityCount" : 3, "country" : "United States" }
{ "cityCount" : 1, "country" : "Turkey" }
{ "cityCount" : 2, "country" : "Australia" }
{ "cityCount" : 4, "country" : "India" }
4. Using allowDiskUse Option:
db.cities.aggregate( [
{ $group: { _id: "$country", cityCount: { $sum: 1 } } },
{ $project: { country: "$_id", _id: 0, cityCount: 1 } }
], { allowDiskUse : true } )
Note, in this case it makes no difference in query performance or output. This is to show the usage only.
5. Some Options to Try (suggestions):
You can try a few things to get some result (for trial purposes only):
Use $limit stage and restrict the number of documents processed and
see what is the result. For example, you can try { $limit: 1000 }.
Note this stage needs to come before the $group stage.
You can also use the $match, $project stages before the $group
stage to control the shape and size of the input. This may
return a result (instead of an error).
[EDIT ADD]
Notes on Distinct and Count:
Using the same cities collection - to get unique countries and a count of them you can try using the aggregate stage $count along with $group as in the following two queries.
Distinct:
db.cities.aggregate( [
{ $match: { country: { $exists: true } } },
{ $group: { _id: "$country" } },
{ $project: { country: "$_id", _id: 0 } }
] )
The Result:
{ "country" : "United States" }
{ "country" : "Turkey" }
{ "country" : "India" }
{ "country" : "Australia" }
To get the above result as a single document with an array of unique values, use the $addToSetoperator:
db.cities.aggregate( [
{ $match: { country: { $exists: true } } },
{ $group: { _id: null, uniqueCountries: { $addToSet: "$country" } } },
{ $project: { _id: 0 } },
] )
The Result: { "uniqueCountries" : [ "United States", "Turkey", "India", "Australia" ] }
Count:
db.cities.aggregate( [
{ $match: { country: { $exists: true } } },
{ $group: { _id: "$country" } },
{ $project: { country: "$_id", _id: 0 } },
{ $count: "uniqueCountryCount" }
] )
The Result: { "uniqueCountryCount" : 4 }
In the above queries the $match stage is used to filter any documents with non-existing or null countryfield. The $project stage reshapes the result document(s).
MongoDB Query Language:
Note the two queries get similar results when using the MongoDB query language commands: db.collection.distinct("country") and db.cities.distinct("country").length (note the distinct returns an array).
You can create index
db.collectionname.createIndex( { ip_address: "text" } )
Try this, it is more faster.
I think it will help you.

Better Way to Aggregate and Assign Random Winner

I'm trying to aggregate a set of transactions using the data set below and choose a winner in every grade. The winner is randomly chosen from within the grade.
{ "_id" : ObjectId("5ce6fb4b3d1be918e574500a"),
"eventId" : ObjectId("5ce2f540bf126322a6be559b"),
"donationAmt" : 32,
"ccTranId" : "HzP4B",
"firstName" : "Jason",
"lastName" : "Jones",
"grade" : "1",
"teacher" : "Smith, Bob",
"studentId" : 100 },
{ "_id" : ObjectId("5ce6fb4b3d1be918e574500b"),
"eventId" : ObjectId("5ce2f540bf126322a6be559b"),
"donationAmt" : 15,
"ccTranId" : "HzP4A",
"firstName" : "Joey",
"lastName" : "Jones",
"grade" : "1",
"teacher" : "Smith, Jane",
"studentId" : 200 },
{ "_id" : ObjectId("5ce6fb4b3d1be918e574500c"),
"eventId" : ObjectId("5ce2f540bf126322a6be559b"),
"donationAmt" : 25,
"ccTranId" : "HzP4D",
"firstName" : "Carrie",
"lastName" : "Jones",
"grade" : "2",
"teacher" : "Smith, Sally",
"studentId" : 300 }
I'm using this script to aggregate.
Donation.aggregate([
{
$match: {
eventId: mongoose.Types.ObjectId(eventId)
}
},
{
"$group": {
"_id": "$studentId",
"first": { "$first": "$firstName" },
"last": { "$first": "$lastName" },
"grade": { "$first": "$grade" },
"teacher": { "$first": "$teacher" }
}
},
{
"$group": {
"_id": "$grade",
"students": {
$push: '$$ROOT'
}
}
}
, { $sort: { _id: 1 } }
])
The output gives me this to work with. Then, I iterate through the each element and assign one of the students in the subdocument as winner.
The double group seems sloppy and it would be nice to execute an expression within a $project clause to randomly assign the winner.
Is there a cleaner way?
{
"_id":"1",
"students":[
{
"_id":100,
"first":"Jason",
"last":"Jones",
"grade":"1",
"teacher":"Smith, Bob"
},
{
"_id":200,
"first":"Joey",
"last":"Jones",
"grade":"1",
"teacher":"Smith, Jae"
}
]
},
{
"_id":"2",
students":[ .... ]
},
Random means that you need to get unpredictable results. The only operator that can help you in MongoDB is $sample. Unfortunately you can't sample arrays. All you can do is to apply filtering condition and then run { $sample: { size: 1 } } on that filtered data set:
db.col.aggregate([
{
$match: {
eventId: ObjectId("5ce2f540bf126322a6be559b"),
grade: "2"
}
},
{ $sample: { size: 1 } }
])
To make it a little bit more useful you can take advantage of $facet and run multiple samples for every grade in one query:
db.col.aggregate([
{
$match: {
eventId: ObjectId("5ce2f540bf126322a6be559b")
}
},
{
$facet: {
winner1: [
{ $match: { grade: "1" } },
{ $sample: { size: 1 } }
],
winner2: [
{ $match: { grade: "2" } },
{ $sample: { size: 1 } }
]
// other grades ...
}
}
])

Mongo Node driver how to get all fields of $max aggregate from an array of objects

I have a collection called "products" which has an array of "bids" objects.
I want to find out the Maximum bid for each product, for this I am aggregating Products on $max with $bids.bidamount field. However this is only giving me the largest bid amount. How do I project all the bid fields for the max aggregation.
Here is a sample document
{
"_id" : ObjectId("58109a5138fe12215cfdc064"),
"product_id" : 2,
"item_name" : "Auction Item1",
"item_description" : "Test",
"seller_name" : "ak#gmail.com",
"item_price" : "20",
"item_quantity" : 7,
"sale_type" : "Auction",
"posted_at" : "2016:10:26 04:58:09",
"expires_at" : "2016:10:30 04:58:09",
"bids" : [
{
"bid_id" : 1,
"bidder" : "ak#gmail.com",
"bid_amount" : 300,
"bit_time" : "2016:10:26 22:36:29"
},
{
"bid_id" : 2,
"bidder" : "ak#gmail.com",
"bid_amount" : 100,
"bit_time" : "2016:10:26 22:37:29"
}
],
"orders" : [
{
"buyer" : "ak#gmail.com",
"quantity" : "2"
},
{
"buyer" : "ak#gmail.com",
"quantity" : "3"
}
]
}
Here is my mongo query:
db.products.aggregate([
{
$project: {
bidMax: { $max: "$bids.bid_amount"}
}
}
])
which gives the following result:
{
"_id" : ObjectId("58109a5138fe12215cfdc064"),
"bidMax" : 300
}
db.products.aggregate([{$unwind:"$bids"},{$group:{_id:"$_id", sum:{$sum:"$bids.bid_amount"}}},{$project:{doc:"$$ROOT", _id:1, sum:1}, {$sort:{"sum":-1}},{$limit:1}]),
which return something like { "_id" : ObjectId("5811b667c50fb1ec88227860"), "sum" : 600, doc:{your document....} }
This should do it:
db.products.aggregate([{
$unwind: '$bids'
}, {
$group: {
_id: '$products_id',
maxBid: {
$max: '$bids.bid_amount'
}
}
}])
db.collectionName.aggregate(
[
{
$group:
{
_id: "$product_id",
maxBidAmount: { $max: "$bids.bid_amount" }
}
}
]
)
Hey use this query, you will get the result.

Resources