Aggregate + $project + $group + $sum + $count + $group again in MongoDB - node.js

Consider the Schema :
const mongoose = require("mongoose");
const Schema = mongoose.Schema;
const EightWeekGamePlanSchema = new Schema({
Week: {
type: Number,
required: true
},
LeadId: {
type: String,
required: true
},
SupplierGeneralId: {
type: String,
required: true
},
// ... More properties
TotalClaimsLeftToBeClaimedByClientType: {
// incresed by 1 every time it's claimed
type: Number,
required: true
},
InsertDate: {
type: Date
// default: Date.now
}
});
module.exports = EightWeekGamePlan = mongoose.model(
"eightweekgameplan",
EightWeekGamePlanSchema
);
And consider the Mongo query :
EightWeekGamePlan.aggregate(
[
// 1.Group by supplier
// 2.Within each supplier group by month & year
// Use count & sum
{
$project: {
month: { $month: "$InsertDate" },
year: { $year: "$InsertDate" }
}
},
{
$group: {
_id: {
SupplierGeneralId: "$SupplierGeneralId",
month: "$month",
year: "$year"
},
ClaimsSummary : { $sum: "$TotalClaimsLeftToBeClaimedByClientType" } ,
// TotalLeadsPerSupplierAndDate : ...
// Here I want to group again , by LeadID and count all the
// unique LeadID's
}
}
]
I want to group by SupplierGeneralId and Month + Year of InsertDate ,
Summarize for each month the TotalClaimsLeftToBeClaimedByClientType
Group again but this time by the LeadID , and count all the unique LeadIds for each supplier (previously grouped by SupplierGeneralId, Month, Year).
However I'm getting
[ { _id: { month: 1, year: 2020 }, ClaimsSummary: 0 } ]
...even though there is data.
What's wrong with the pipeline ? and how can I group again to get the unique LeadIds for each supplier ?
Thanks
EDIT:
I've added more fields to the Project but now I'm getting empty array in the $push :
EightWeekGamePlan.aggregate(
[
// 1.Group by supplier
// 2.Within each supplier group by month & year
// Use count & sum
{ $sort: { SupplierGeneralId: 1 } },
{
$project: {
month: { $month: "$InsertDate" },
year: { $year: "$InsertDate" },
SupplierGeneralId: "$SupplierGeneralId",
TotalClaimsLeftToBeClaimedByClientType:
"$TotalClaimsLeftToBeClaimedByClientType"
}
},
{
$group: {
_id: {
SupplierGeneralId: "$SupplierGeneralId",
month: "$month",
year: "$year"
},
LeadsCollection: {
$push: {
LeadId: "$LeadId"
}
},
ClaimsSummary: { $sum: "$TotalClaimsLeftToBeClaimedByClientType" }
}
}
]
Output:
[
[0] {
[0] _id: {
[0] SupplierGeneralId: 'qCwHWFD1cBvrfPp5hdBL6M',
[0] month: 1,
[0] year: 2020
[0] },
[0] LeadsCollection: [
[0] {}, {}, {}, {}, {}, {}, {}, {},
[0] {}, {}, {}, {}, {}, {}, {}, {},
[0] {}, {}, {}, {}, {}, {}, {}, {},
[0] {}, {}, {}, {}, {}, {}, {}, {},
[0] {}, {}, {}, {}, {}, {}, {}, {},
[0] {}, {}, {}, {}, {}, {}, {}, {},
[0] {}, {}, {}, {}, {}, {}, {}, {},
[0] {}, {}, {}, {}, {}, {}, {}, {},
[0] {}, {}, {}, {}, {}, {}, {}, {}
[0] ],
[0] ClaimsSummary: 4
[0] }
[0] ]
SECOND EDIT :
EightWeekGamePlan.aggregate(
[
{ $sort: { SupplierGeneralId: 1 } },
{
$group: {
_id: {
SupplierGeneralId: "$SupplierGeneralId",
month: { $month: "$InsertDate" },
year: { $year: "$InsertDate" }
},
LeadsUploaded: {
$push: {
LeadId: "$LeadId"
}
},
Sells: { $sum: "$TotalClaimsLeftToBeClaimedByClientType" }
}
},
{
$project: {
Sells: "$Sells",
LeadsUploaded: {
$reduce: {
input: { $setUnion: "$LeadsUploaded.LeadId" },
initialValue: [],
in: {
$concatArrays: [
"$$value",
[
{
Lead_ID: "$$this"
}
]
]
}
}
}
}
}
]

You should just drop the $project stage, you're grouping right after so theres no real point of doing it, adding it just makes the pipeline less efficient.
Rewrite your pipeline as:
EightWeekGamePlan.aggregate(
[
// 1.Group by supplier
// 2.Within each supplier group by month & year
// Use count & sum
{ $sort: { SupplierGeneralId: 1 } },
{
$group: {
_id: {
SupplierGeneralId: "$SupplierGeneralId",
month: {"$month" : "$InsertDate"},
year: { $year: "$InsertDate" },
},
LeadsCollection: {
$push: {
LeadId: "$LeadId"
}
},
ClaimsSummary: { $sum: "$TotalClaimsLeftToBeClaimedByClientType" }
}
}
]

Related

How to find difference between dates in different documents using Loopback 3 remote method in MongoDB?

Suppose my 2 documents are like this in mongodb. How do I find date difference between the fields "eventCreatedAt".
I am using loopback 3 remote method implementation for this.
[
{
"eventType": "offer",
"currentPrice": 3,
"seller": "Medicalstudent",
"buyer": "musiciann",
"eventCreatedAt": "2022-03-09T10:25:20.308Z",
"isExistingOffer": true,
"timeDiffOfPreTran": 1,
"id": "6228853556f56afb3d995d50",
"nftId": "622880fc56f56afb3d995d4f"
},
{
"eventType": "offer",
"currentPrice": 5,
"seller": "Medicalstudentss",
"buyer": "music",
"eventCreatedAt": "2022-03-09T10:25:20.308Z",
"isExistingOffer": false,
"timeDiffOfPreTran": 4,
"id": "622f2d8e550a568093b54c93",
"nftId": "622880fc56f56afb3d995d4f"
}
]
db.collection.aggregate([
{
$lookup: {
from: "nft",
localField: "nftId",
foreignField: "nftId",
as: "docs",
pipeline: [
{
$setWindowFields: {
partitionBy: "",
sortBy: { eventCreatedAt: 1 },
output: {
shift: {
$shift: {
output: "$eventCreatedAt",
by: -1,
default: "$$REMOVE"
}
}
}
}
},
{
$set: {
shift: {
$dateDiff: {
startDate: { $toDate: "$shift" },
endDate: { $toDate: "$eventCreatedAt" },
unit: "day"
}
}
}
}
]
}
}
])
mongoplayground
db.collection.aggregate([
{
$setWindowFields: {
partitionBy: "",
sortBy: { eventCreatedAt: 1 },
output: {
shift: {
$shift: {
output: "$eventCreatedAt",
by: -1,
default: "$$REMOVE"
}
}
}
}
},
{
$set: {
shift: {
$dateDiff: {
startDate: { $toDate: "$shift" },
endDate: { $toDate: "$eventCreatedAt" },
unit: "day"
}
}
}
}
])
mongoplayground

MongoDB get SUM of fields with conditions

On my backend I use mongoDB with nodejs and mongoose
I have many records in mongodb with this structure:
{
..fields
type: 'out',
user: 'id1', <--mongodb objectID,
orderPayment: [
{
_id: 'id1',
paid: true,
paymentSum: 40
},
{
_id: 'id2',
paid: true,
paymentSum: 60,
},
{
_id: 'id3',
paid: false,
paymentSum: 50,
}
]
},
{
..fields
type: 'in',
user: 'id1', <--mongodb objectID
orderPayment: [
{
_id: 'id1',
paid: true,
paymentSum: 10
},
{
_id: 'id2',
paid: true,
paymentSum: 10,
},
{
_id: 'id3',
paid: false,
paymentSum: 77,
}
]
}
I need to group this records by 'type' and get sum with conditions.
need to get sum of 'paid' records and sum of noPaid records.
for a better understanding, here is the result Ι need to get
Output is:
{
out { <-- type field
paid: 100, <-- sum of paid
noPaid: 50 <-- sum of noPaid
},
in: { <-- type field
paid: 20, <-- sum of paid
noPaid: 77 <-- sum of noPaid
}
}
Different solution would be this one. It may give better performance than solution of #YuTing:
db.collection.aggregate([
{
$project: {
type: 1,
paid: {
$filter: {
input: "$orderPayment",
cond: "$$this.paid"
}
},
noPaid: {
$filter: {
input: "$orderPayment",
cond: { $not: "$$this.paid" }
}
}
}
},
{
$set: {
paid: { $sum: "$paid.paymentSum" },
noPaid: { $sum: "$noPaid.paymentSum" }
}
},
{
$group: {
_id: "$type",
paid: { $sum: "$paid" },
noPaid: { $sum: "$noPaid" }
}
}
])
Mongo Playground
use $cond in $group
db.collection.aggregate([
{
"$unwind": "$orderPayment"
},
{
"$group": {
"_id": "$type",
"paid": {
"$sum": {
$cond: {
if: { $eq: [ "$orderPayment.paid", true ] },
then: "$orderPayment.paymentSum",
else: 0
}
}
},
"noPaid": {
"$sum": {
$cond: {
if: { $eq: [ "$orderPayment.paid", false ] },
then: "$orderPayment.paymentSum",
else: 0
}
}
}
}
}
])
mongoplayground

Pagination in aggregation and also used $project in mongoose

i want to show user list of object with projection and also want to show the total page i want output like this
{
"Success": true,
"message": " Fetched post comment successfully.",
"data": {
"docs": [
{
"_id": "60101fcc4077e698facd63aa",
"commentDetail": {
"hasReply": false,
"likeCount": 0,
"angryCount": 0,
"favCount": 0,
"totalReaction": 0,
"description": "four comment",
"media": null,
"date": "2021-01-26T13:57:32.220Z",
"action": [],
"commentReplies": []
},
"userID": "5f5238b5458b7c63a477bf87",
"postID": "5fb7a19bcae255415e99781b",
"commentID": "60101fcb4077e698facd63a9",
},
{
"_id": "60101fcc4077e698facd63aa",
"commentDetail": {
"hasReply": false,
"likeCount": 0,
"angryCount": 0,
"favCount": 0,
"totalReaction": 0,
"description": "four comment",
"media": null,
"date": "2021-01-26T13:57:32.220Z",
"action": [],
"commentReplies": []
},
"userID": "5f5238b5458b7c63a477bf87",
"postID": "5fb7a19bcae255415e99781b",
"commentID": "60101fcb4077e698facd63a9",
}
],
"count": 1
}
}
i write this query
getPostAllComment = await this.comment.aggregate([
{ $match: { postID: ObjectId(getPostCommentDTO.postID) } },
{ $sort: { createdAt: 1 } }, //sort on created At
{ $skip: (parseInt(getPostCommentDTO.pageNum) - 1) * parseInt(getPostCommentDTO.pageSize) },
{ $limit: parseInt(getPostCommentDTO.pageSize) },
{
$group: {
_id: "$postID",
docs: { $push: "$$ROOT" },
count: { $sum: 1 },
},
},
{ $project: { _id: 0 }
}
it give me output like above which i am expecting but my $project is not working here.so how can i show output exactly like above with total page count field at root level not in each document with projection.i already tried to used $facet but it not give me output like i above explain
I get my desire result by this query
getPostAllComment = await this.comment.aggregate([
{ $match: { postID: ObjectId(getPostCommentDTO.postID) } },
{
$group: {
_id: null,
collection: { $push: "$$ROOT" },
count: { $sum: 1 },
},
},
{ $unwind: "$collection" },
{ $sort: { createdAt: 1 } }, //sort on created At
{ $skip: (parseInt(getPostCommentDTO.pageNum) - 1) * parseInt(getPostCommentDTO.pageSize) },
{ $limit: parseInt(getPostCommentDTO.pageSize) },
{
$project: {
hasReply: "$collection.commentDetail.hasReply",
likeCount: "$collection.commentDetail.likeCount",
angryCount: "$collection.commentDetail.angryCount",
favCount: "$collection.commentDetail.favCount",
totalReaction: "$collection.commentDetail.totalReaction",
date: "$collection.commentDetail.date",
media: "$collection.commentDetail.media",
description: "$collection.commentDetail.description",
action: "$collection.commentDetail.action",
recentCommentReplies: "$collection.commentDetail.commentReplies",
userProfilePicture: "$collection.userProfilePicture",
userProfilePictureThumbnail: "$collection.userProfilePictureThumbnail",
userName: "$collection.userName",
userID: "$collection.userID",
postID: "$collection.postID",
commentID: "$collection.commentID",
createdAt: "$collection.createdAt",
updatedAt: "$collection.updatedAt",
count: "$count",
},
},
{
$group: {
_id: "$postID",
docs: { $push: "$$ROOT" },
total: { $first: "$count" },
},
},
{ $project: { "docs.count": 0, _id: 0 } },
])
return getPostAllComment[0]

MongoDB Get every Nth element of array

I've been working on a small project that takes MQTT data from sensors and stores it in a MongoDB database. I'm working with nodeJS and mongoose. These are my schemas.
export const SensorSchema = new mongoose.Schema({
name: { type: String, required: true, unique: true },
location: { type: String, required: true },
type: { type: String, required: true },
unit: { type: String, required: true },
measurements: { type: [MeasurementSchema] }
},
{
toObject: { virtuals: true },
toJSON: { virtuals: true }
});
export const MeasurementSchema = new mongoose.Schema({
value: {type: Number, required: true},
time: {type: Date, required: true}
});
First I wrote a function that retrieves all measurements that were made in between two timestamps.
const values = Sensor.aggregate([
{ $match: Sensor.getValuesFromPath(sensorPath) },
{ $unwind: "$measurements"},
{ $match: { "measurements.time": { $gte: startTime, $lte: endTime} }},
{ $replaceRoot: { newRoot: "$measurements" } },
{ $project: { _id: 0}},
{ $sort: {time: 1}}
]).exec();
In order to draw a graph in the UI, I need to somehow sort and then limit the data that gets sent to the client. I want to send every Nth Value in a certain interval to ensure that the data somewhat resembles the course of the data.
I would prefer a solution that doesn't fetch all the data from the database.
How would I go about doing this on the db? Can I somehow access the positional index of an element after sorting it? Is $arrayElemAt or $elemMatch the solution?
Befure you run $unwind you can use $filter to apply start/end Date filtering. This will allow you to process measurements as an array. In the next step you can get every N-th element by using $range to define a list of indexes and $arrayElemAt to retrieve elements from these indexes:
const values = Sensor.aggregate([
{ $match: Sensor.getValuesFromPath(sensorPath) },
{ $addFields: {
measurements: {
$filter: {
input: "$measurements",
cond: { $and: [
{ $gte: [ "$$this.time", startTime ] },
{ $lte: [ "$$this.time", endTime ] }
]
}
}
}
} },
{ $addFields: {
measurements: {
$map: {
input: input: { $range: [ 0, { $size: "$measurements" }, N ] },
as: "index",
in: { $arrayElemAt: [ "$measurements", "$$index" ] }
}
}
} },
{ $unwind: "$measurements" },
{ $replaceRoot: { newRoot: "$measurements" } },
{ $project: { _id: 0}},
{ $sort: {time: 1}}
]).exec();
The following aggregation (i) retrieves all measurements that were made in between two timestamps, (ii) sorts by timestamp for each sensor, and (iii) gets every Nth value (specified by the variable EVERY_N).
Sample documents (with some arbitrary data for testing):
{
name: "s-1",
location: "123",
type: "456",
measurements: [ { time: 2, value: 12 }, { time: 3, value: 13 },
{ time: 4, value: 15 }, { time: 5, value: 22 },
{ time: 6, value: 34 }, { time: 7, value: 9 },
{ time: 8, value: 5 }, { time: 9, value: 1 },
]
},
{
name: "s-2",
location: "789",
type: "900",
measurements: [ { time: 1, value: 31 }, { time: 3, value: 32 },
{ time: 4, value: 35 }, { time: 6, value: 39 },
{ time: 7, value: 6}, { time: 8, value: 70 },
{ time: 9, value: 74 }, { time: 10, value: 82 }
]
}
The aggregation:
var startTime = 3, endTime = 10
var EVERY_N = 2 // value can be 3, etc.
db.collection.aggregate( [
{
$unwind: "$measurements"
},
{
$match: {
"measurements.time": { $gte: startTime, $lte: endTime }
}
},
{
$sort: { name: 1, "measurements.time": 1 }
},
{
$group: {
_id: "$name",
measurements: { $push: "$measurements" },
doc: { $first: "$$ROOT" }
}
},
{
$addFields: {
"doc.measurements": "$measurements"
}
},
{
$replaceRoot: { newRoot: "$doc" }
},
{
$addFields: {
measurements: {
$reduce: {
input: { $range: [ 0, { $size: "$measurements" } ] },
initialValue: [ ],
in: { $cond: [ { $eq: [ { $mod: [ "$$this", EVERY_N ] }, 0 ] },
{ $concatArrays: [ "$$value", [ { $arrayElemAt: [ "$measurements", "$$this" ] } ] ] },
"$$value"
]
}
}
}
}
}
] )

MongoDB aggregate lookup match not working the same without lookup

Can you please help? I'm trying to aggregate data over the past 12 months by both ALL publication data specific to a certain publisher and per publication to return a yearly graph data analysis based on the subscription type.
Here's a snapshot of the Subscriber model:
const SubscriberSchema = new Schema({
publication: { type: Schema.Types.ObjectId, ref: "publicationcollection" },
subType: { type: String }, // Print, Digital, Bundle
subStartDate: { type: Date },
subEndDate: { type: Date },
});
Here's some data for the reader (subscriber) collection:
{
_id: ObjectId("5dc14d3fc86c165ed48b6872"),
publication: ObjectId("5d89db9d82273f1d18970deb"),
subStartDate: "2019-11-20T00:00:00.000Z",
subtype: "print"
},
{
_id: ObjectId("5dc14d3fc86c165ed48b6871"),
publication: ObjectId("5d89db9d82273f1d18970deb"),
subStartDate: "2019-11-19T00:00:00.000Z",
subtype: "print"
},
{
_id: ObjectId("5dc14d3fc86c165ed48b6870"),
publication: ObjectId("5d89db9d82273f1d18970deb"),
subStartDate: "2019-11-18T00:00:00.000Z",
subtype: "digital"
},
{
_id: ObjectId("5dc14d3fc86c165ed48b6869"),
publication: ObjectId("5d8b36c3148c1e5aec64662c"),
subStartDate: "2019-11-19T00:00:00.000Z",
subtype: "print"
}
The publication model has plenty of fields but the _id and user fields are the only point of reference in the following queries.
Here's some data for the publication collection:
// Should use
{ "_id": {
"$oid": "5d8b36c3148c1e5aec64662c"
},
"user": {
"$oid": "5d24bbd89f09024590db9dcd"
},
"isDeleted": false
},
// Should use
{ "_id": {
"$oid": "5d89db9d82273f1d18970deb"
},
"user": {
"$oid": "5d24bbd89f09024590db9dcd"
},
"isDeleted": false
},
// Shouldn't use as deleted === true
{ "_id": {
"$oid": "5d89db9d82273f1d18970dec"
},
"user": {
"$oid": "5d24bbd89f09024590db9dcd"
},
"isDeleted": true
},
// Shouldn't use as different user ID
{ "_id": {
"$oid": "5d89db9d82273f1d18970dfc"
},
"user": {
"$oid": "5d24bbd89f09024590db9efd"
},
"isDeleted": true
}
When I do a lookup on a publication ID with the following, I'm getting perfect results:
Subscriber.aggregate([
{
$match: {
$and: [
{ 'publication': mongoose.Types.ObjectId(req.params.id) },
],
"$expr": { "$eq": [{ "$year": "$subStartDate" }, new Date().getFullYear()] }
}
},
{
/* group by year and month of the subscription event */
$group: {
_id: { year: { $year: "$subStartDate" }, month: { $month: "$subStartDate" }, subType: "$subType" },
count: { $sum: 1 }
},
},
{
/* sort descending (latest subscriptions first) */
$sort: {
'_id.year': -1,
'_id.month': -1
}
},
{
$limit: 100,
},
])
However, when I want to receive data from the readercollections (Subscriber Model) for ALL year data, I'm not getting the desired results (if any) from all of the things I'm trying (I'm posting the best attempt result below):
Publication.aggregate([
{
$match:
{
user: mongoose.Types.ObjectId(id),
isDeleted: false
}
},
{
$project: {
_id: 1,
}
},
{
$lookup: {
from: "readercollections",
let: { "id": "$_id" },
pipeline: [
{
$match:
{
$expr: {
$and: [
{ $eq: ["$publication", "$$id"] },
{ "$eq": [{ "$year": "$subStartDate" }, new Date().getFullYear()] }
],
}
}
},
{ $project: { subStartDate: 1, subType: 1 } }
],
as: "founditem"
}
},
// {
// /* group by year and month of the subscription event */
// $group: {
// _id: { year: { $year: "$founditem.subStartDate" }, month: { $month: "$foundtitem.subStartDate" }, subType: "$founditem.subType" },
// count: { $sum: 1 }
// },
// },
// {
// /* sort descending (latest subscriptions first) */
// $sort: {
// '_id.year': -1,
// '_id.month': -1
// }
// },
], function (err, result) {
if (err) {
console.log(err);
} else {
res.json(result);
}
})
Which returns the desired data without the $group (commented out) but I need the $group to work or I'm going to have to map a dynamic array based on month and subtype which is completely inefficient.
When I'm diagnosing, it looks like this $group is the issue but I can't see how to fix as it works in the singular $year/$month group. So I tried the following:
{
/* group by year and month of the subscription event */
$group: {
_id: { year: { $year: "$subStartDate" }, month: { $month: "$subStartDate" }, subType: "$founditem.subType" },
count: { $sum: 1 }
},
},
And it returned the $founditem.subType fine, but any count or attempt to get $year or $month of the $founditem.subStartDate gave a BSON error.
The output from the single publication ID lookup in the reader collection call that works (and is plugging into the line graph perfectly) is:
[
{
"_id": {
"year": 2019,
"month": 11,
"subType": "digital"
},
"count": 1
},
{
"_id": {
"year": 2019,
"month": 11,
"subType": "print"
},
"count": 3
}
]
This is the output I'd like for ALL publications rather than just a single lookup of a publication ID within the reader collection.
Thank you for any assistance and please let me know if you need more details!!

Resources