Let's say i have this 2 of huge documents:
[
{
_id: ....,
status: "A",
class: "DIP1A",
"created.user._id": ...,
"created.dt": ....,
"category": "private",
price: 100.00 //type double
},
{
_id: ....,
status: "A",
class: "DIP2A",
"created.user._id": ...
"created.dt": ...,
"category": "public",
price: 200.00 //type double
},
];
Query:
var pipeline = [
{
$match: {
"created.user._id": ....
}
},
{
$unwind: "$class"
},
{
$unwind: "$price"
},
{
$group: {
_id: "$class",
price: {
$sum: "$price"
},
count: {
$sum: 1
}
}
},
{
$project: {
_id: 0,
class: '$_id',
count: 1,
price: 1
}
}
];
db.myCollection.aggregate(pipeline);
Problem issue:
Query without calculate/$sum "$price", it's running really faster;
Indexes:
db.myCollection.ensureIndex({ 'created.user._id': -1 });
db.myCollection.ensureIndex({ 'created.user._id': -1, class: 1 });
db.myCollection.ensureIndex({ 'created.user._id': -1, price: 1});
Performance:
without $sum calc : 5 second with huge of records.
with $sum cals : 20 minutes with huge of records.
The one thing you really should do is move the $project stage to right after the $match stage (if the documents contain more data then stated in your question (huge documents)).
You want as little data as possible through the pipeline.
Also i see an $unwind on price and class but in your example they aren't array's. It could be a copy/paste issue ;-)
Like :
var pipeline = [
{
$match: {
"created.user._id": ....
}
},
{
$project: {
_id: 0,
class: '$_id',
count: 1,
price: 1
}
},
{
$unwind: "$class"
},
{
$unwind: "$price"
},
{
$group: {
_id: "$class",
price: {
$sum: "$price"
},
count: {
$sum: 1
}
}
},
];
Related
On my backend I use mongoDB with nodejs and mongoose
I have many records in mongodb with this structure:
{
..fields
type: 'out',
user: 'id1', <--mongodb objectID,
orderPayment: [
{
_id: 'id1',
paid: true,
paymentSum: 40
},
{
_id: 'id2',
paid: true,
paymentSum: 60,
},
{
_id: 'id3',
paid: false,
paymentSum: 50,
}
]
},
{
..fields
type: 'in',
user: 'id1', <--mongodb objectID
orderPayment: [
{
_id: 'id1',
paid: true,
paymentSum: 10
},
{
_id: 'id2',
paid: true,
paymentSum: 10,
},
{
_id: 'id3',
paid: false,
paymentSum: 77,
}
]
}
I need to group this records by 'type' and get sum with conditions.
need to get sum of 'paid' records and sum of noPaid records.
for a better understanding, here is the result Ι need to get
Output is:
{
out { <-- type field
paid: 100, <-- sum of paid
noPaid: 50 <-- sum of noPaid
},
in: { <-- type field
paid: 20, <-- sum of paid
noPaid: 77 <-- sum of noPaid
}
}
Different solution would be this one. It may give better performance than solution of #YuTing:
db.collection.aggregate([
{
$project: {
type: 1,
paid: {
$filter: {
input: "$orderPayment",
cond: "$$this.paid"
}
},
noPaid: {
$filter: {
input: "$orderPayment",
cond: { $not: "$$this.paid" }
}
}
}
},
{
$set: {
paid: { $sum: "$paid.paymentSum" },
noPaid: { $sum: "$noPaid.paymentSum" }
}
},
{
$group: {
_id: "$type",
paid: { $sum: "$paid" },
noPaid: { $sum: "$noPaid" }
}
}
])
Mongo Playground
use $cond in $group
db.collection.aggregate([
{
"$unwind": "$orderPayment"
},
{
"$group": {
"_id": "$type",
"paid": {
"$sum": {
$cond: {
if: { $eq: [ "$orderPayment.paid", true ] },
then: "$orderPayment.paymentSum",
else: 0
}
}
},
"noPaid": {
"$sum": {
$cond: {
if: { $eq: [ "$orderPayment.paid", false ] },
then: "$orderPayment.paymentSum",
else: 0
}
}
}
}
}
])
mongoplayground
I have multiple documents in a collection like this
[
{
_id: 123,
data: 1,
details: [
{
item: "a",
day: 1
},
{
item: "a",
day: 2
},
{
item: "a",
day: 3
},
{
item: "a",
day: 4
}
],
someMoreField: "xyz"
}
]
Now I want document with _id: 123 and details field should only contain day within range of 1 to 3. So the result will be like below.
{
_id: 123,
data: 1,
details: [
{
item: 'a',
day: 1,
},
{
item: 'a',
day: 2,
},
{
item: 'a',
day: 3,
},
],
someMoreField: 'xyz',
};
I tried to do this by aggregate query as:
db.collectionaggregate([
{
$match: {
_id: id,
'details.day': { $gt: 1, $lte: 3 },
},
},
{
$project: {
_id: 1,
details: {
$filter: {
input: '$details',
as: 'value',
cond: {
$and: [
{ $gt: ['$$value.date', 1] },
{ $lt: ['$$value.date', 3] },
],
},
},
},
},
},
])
But this gives me empty result. Could someone please guide me through this?
You are very close, you just need to change the $gt to $gte and $lt to $lte.
Another minor syntax error is you're accessing $$value.date but the schema you provided does not have that field, it seems you need to change it to $$value.day, like so:
db.collection.aggregate([
{
$match: {
_id: 123,
"details.day": {
$gt: 1,
$lte: 3
}
}
},
{
$project: {
_id: 1,
details: {
$filter: {
input: "$details",
as: "value",
cond: {
$and: [
{
$gte: [
"$$value.day",
1
]
},
{
$lte: [
"$$value.day",
3
]
},
],
},
},
},
},
},
])
Mongo Playground
I am calculating the notification percentage in my app for tracking some statistics.
My Collection:
[
{
_id: "123",
status: "seen",
userId: "589"
},
{
_id: "223",
status: "seen",
userId: "589"
},
{
_id: "474",
status: "unseen",
userId: "589"
},
{
_id: "875",
status: "seen",
userId: "112"
},
{
_id: "891",
status: "unseen",
userId: "112"
}
]
Expected Result:
Here we can see that, UserId - 589 has received 3 notifications out of which 2 are seen. So the calculation is (totalNumOfSeen/totalNumOfNoticationsSent) * 100
[{
userId: "589",
notificationPercentage : 66.66
},{
userId: "112",
notificationPercentage : 50
}]
I am using a facet for grouping and matching but that is returning me an array of object and I am not getting how to perform divide on this.
My Query:
db.collection.aggregate([
{
$facet: {
totalNumOfSeen: [
{
$match: {
userId: "589",
status: "seen"
}
},
{
$group: {
_id: "$userId",
totalNumOfSeen: {
$sum: 1
}
}
}
],
totalNumOfNoticationsSent: [
{
$match: {
userId: "589",
}
},
{
$group: {
_id: "$userId",
totalNumOfNoticationsSent: {
$sum: 1
}
}
}
]
}
}
])
The Above Query is giving me the below Result:
[
{
"totalNumOfNoticationsSent": [
{
"_id": "589",
"totalNumOfNoticationsSent": 3
}
],
"totalNumOfSeen": [
{
"_id": "589",
"totalNumOfSeen": 2
}
]
}
]
MongoPlayground - https://mongoplayground.net/p/jHn2ZlshgDL
Now I need to add one more field as notificationPercentage and calculate the notification percentage based on the above facet result. Really appreciate the help.
You can try,
$group by userId and get totalSeen count using $cond if status is seen, get total count of notification using $sum,
$project to show required fields, and calculate percentage using $divide and $multiply
db.collection.aggregate([
{
$group: {
_id: "$userId",
totalSeen: {
$sum: { $cond: [{ $eq: ["$status", "seen"] }, 1, 0] }
},
total: { $sum: 1 }
}
},
{
$project: {
_id: 0,
userId: "$_id",
notificationPercentage: {
$multiply: [{ $divide: ["$totalSeen", "$total"] }, 100]
}
}
}
])
Playground
Document
[
{
type: 1,//credit
amount: 60
},
{
type: 2,//debit
amount: 35
},
{
type: 3,//credit
amount: 25
},
{
type: 4,//debit
amount: 80
},
{
type: 5,//credit
amount: 70
},
]
Result
[
{
_id: {
Name: "Credition",
Type: [1, 3, 5]
},
Total_Amount: 155
},
{
_id: {
Name: "Debition",
Type: [2, 4]
},
Total_Amount: 115
},
]
In my schema, there are millions of logs records in which few are credited logs, few are debited logs.
I want to use MongoDB aggregate pipe and have to group like above for million records at a time
Yes you can do that first you need to add a new field transaction on the basis of the type of logs, then you can group the logs on the basis of that field.
Working example - https://mongoplayground.net/p/e4kqeKLIuIr
db.collection.aggregate([
{
$addFields: {
transaction: {
$cond: {
if: {
$in: [
"$type",
[
1,
3,
5
]
]
},
then: "Credition",
else: "Debition"
}
}
}
},
{
$group: {
_id: "$transaction",
Type: {
$addToSet: "$type"
},
Total_Amount: {
$sum: "$amount"
}
}
}
])
After this, you can also use $project operator to change the name or structure of the record, if needed
You can use the operator $cond during the grouping stage:
db.collection.aggregate([
{
$group: {
_id: {
$cond: [
{
$in: [ "$type", [1,3,5] ]
},
"Credition",
"Debition"
]
},
type: {
$addToSet: "$type"
},
amount: {
$sum: "$amount"
}
}
},
{
$project: {
_id: {
Name: "$_id",
Type: "$type"
},
Total_Amount: "$amount"
}
}
])
MongoPlayground
Trying to sum the above data with the group aggregate always return 0.
{
team: "5d091e8b402c1d089042000d"
_id: "5d2e63ce89be227d171eb4a7"
goals: [
{
player: "5d20636a4b0bf2670c1e014c"
createdAt: "1970-01-01T00:18:31.111+00:00"
value: 1
_id: 5d2e63ce89be227d171eb4aa
},
{
player: "5d20636a4b0bf2670c1e014c"
createdAt: "1970-01-01T00:18:31.111+00:00"
value: 1
_id: 5d2e63ce89be227d171eb4aa
}
]
}
{
team: "5d091e8b402c1d089042000d"
_id: "5d2e63ce89be227d171eb4a7"
goals: [
{
player: "5d20636a4b0bf2670c1e014c"
createdAt: "1970-01-01T00:18:31.111+00:00"
value: 1
_id: 5d2e63ce89be227d171eb4aa
},
{
player: "5d20636a4b0bf2670c1e014c"
createdAt: "1970-01-01T00:18:31.111+00:00"
value: 1
_id: 5d2e63ce89be227d171eb4aa
}
]
}
Current Try:
{
_id: "$team",
total: {
"$sum": {
"$filter": {
"input": "$goals",
"as": "s",
"cond": { "$eq": [ "$$s.player", "5d20636a4b0bf2670c1e014c"] }
}
}
}
}
the above returns:
{ _id: 5d20636a4b0bf2670c1e014c, total: 0}
It would be easier to move filtering as a separate pipeline stage and then run double $sum (inner one sums the array from single document, outer aggregates cross-document value)
db.collection.aggregate([
{
$addFields: {
goals: {
$filter: {
input: "$goals",
as: "g",
cond: {
$eq: [ "$$g.player", "5d20636a4b0bf2670c1e014c" ]
}
}
}
}
},
{
$group: {
_id: "$team",
total: { $sum: { $sum: "$goals.value" } }
}
}
])
Mongo Playground