I am trying to do a per-day aggregation in MongoDB. I already have an aggregation where I successfully group the data by day. However, I want to do the aggregation in such a way where days with no data show up, but empty. That is, they are empty bins.
Below is what I have so far. I have not been able to find anything in the MongoDB documentation or otherwise that suggests how to do aggregations and produce empty bins:
app.models.profile_view.aggregate(
{ $match: { user: req.user._id , 'viewing._type': 'user' } },
{ $project: {
day: {'$dayOfMonth': '$start'},month: {'$month':'$start'},year: {'$year':'$start'},
duration: '$duration'
} },
{ $group: {
_id: { day:'$day', month:'$month', year:'$year' },
count: { $sum: 1 },
avg_duration: { $avg: '$duration' }
} },
{ $project: { _id: 0, date: '$_id', count: 1, avg_duration: 1 }}
).exec().then(function(time_series) {
console.log(time_series)
return res.send(200, [{ key: 'user', values: time_series }])
}, function(err) {
console.log(err.stack)
return res.send(500, { error: err, code: 200, message: 'Failed to retrieve profile view data' })
})
I don't think you will be able to solve this problem using aggregation. When you use $group, mongo can only group based on the data you are providing it. In this case, how would mongo know which date values are missing or even what the range of acceptable dates is?
I think your best option would be to add the missing date values to the result of your aggregation.
Starting in Mongo 5.1, it's a perfect use case for the new $densify aggregation operator:
// { date: ISODate("2021-12-05") }
// { date: ISODate("2021-12-05") }
// { date: ISODate("2021-12-03") }
// { date: ISODate("2021-12-07") }
db.collection.aggregate([
{ $group: {
_id: { $dateTrunc: { date: "$date", unit: "day" } },
total: { $count: {} }
}},
// { _id: ISODate("2021-12-03"), total: 1 }
// { _id: ISODate("2021-12-05"), total: 2 }
// { _id: ISODate("2021-12-07"), total: 1 }
{ $densify: { field: "_id", range: { step: 1, unit: "day", bounds: "full" } } },
// { _id: ISODate("2021-12-03"), total: 1 }
// { _id: ISODate("2021-12-04") }
// { _id: ISODate("2021-12-05"), total: 2 }
// { _id: ISODate("2021-12-06") }
// { _id: ISODate("2021-12-07"), total: 1 }
{ $project: {
day: "$_id",
_id: 0,
total: { $cond: [ { $not: ["$total"] }, 0, "$total" ] }
}}
])
// { day: ISODate("2021-12-03"), total: 1 }
// { day: ISODate("2021-12-04"), total: 0 }
// { day: ISODate("2021-12-05"), total: 2 }
// { day: ISODate("2021-12-06"), total: 0 }
// { day: ISODate("2021-12-07"), total: 1 }
This:
$groups documents by day with their $count
$dateTrunc truncates your dates at the beginning of their day (the truncation unit).
$densifies documents ($densify) by creating new documents in a sequence of documents where certain values for a field (in our case field: "_id") are missing:
the step for our densification is 1 day: range: { step: 1, unit: "day" }
finally transforms ($project) fields:
renames _id to day
add the total field for new documents included during the densify stage ({ views: { $cond: [ { $not: ["$views"] }, 0, "$views" ] })
Related
I am trying to show results where the sum of records is greater or equal to 4 and the status matches a string. If I leave off the status field it works fine but adding it in always gives me an empty array even when there should be data.
const bookings = await Booking.aggregate([
{
$group: {
_id: {
$dateToString: {
format: "%Y/%m/%d",
date: "$bookingDate",
},
},
totalBookings: {
$sum: 1,
},
},
},
{
$match: {
totalBookings: {
$gte: 4,
},
status: "Accepted",
},
},
]);
Each booking will have it's own status. So you need to add that as part of $group
{
$group: {
_id: {
status: "$status",
"d": {
$dateToString: {
format: "%Y/%m/%d",
date: "$bookingDate",
}
},
},
totalBookings: {
$sum: 1,
},
}
}
Then you need to change your match as below
$match: {
totalBookings: {
$gte: 4,
},
"_id.status": "Accepted",
}
It will give you all the Accepted Booking on the given BookinDate which is >= 4.
I have the below code snippet which will retrieve Date and count from a MongoDb collection from a specific date. Example: Retrieve date, count from 05-05-2020.
||Date||Count||
|05-06-2020|4|
|05-07-2020|25| and so on.
i want to add another logic to retrieve aggregate sum of 7 days instead of individual dates. Appreciate any help.
mongoClient.db().collection(COLLECTION.AUDIT).aggregate([
{
$match: { created_at: { $gt: date } }
},
{
$group: {
_id: {
$dateToString: { format: "%Y-%m-%d", date: "$created_at" }
},
count: { $sum: 1 }
}
},
{
$sort: { "_id": -1 }
}
])
The simplest way to do what I think you're asking would be to transform your group operator to $week instead of $dateToString. Since a week is 7 days, this will group all the documents from the same week, and return a count of the documents, along with the number of the week. To get both results from 1 query, combine them into a facet. So:
mongoClient.db().collection(COLLECTION.AUDIT).aggregate([
{
$match: { created_at: { $gt: date } }
},
{
$facet: {
by_week: {
$group: {
_id: { $week: $created_at},
count: { $sum: 1 }
},
{ $sort: { "_id": -1 }}
},
by_day: {
$group: {
_id: {
$dateToString: { format: "%Y-%m-%d", date: "$created_at" }
},
count: { $sum: 1 }
}
},
{ $sort: { "_id": -1 }}
}
},
])
I have a query that is returning the total number of entries in a collection per year-month, grouped by a location.
This is returning data exactly as I need it if the location has results for the year-month in question.
However, is it possible to insert an entry for a month that does not have a result? For instance lets say if my $match has a date range of 01-2019 to 12-2019. I would like to have all 12 entries for the month with a default of total: 0.
Truncated Schema :
{
branchId: { type: String, required: true },
orgId: { type: String, required: true },
stars: { type: Number, default: 0 },
reviewUpdatedAt: { type: Date, default: Date.now }
}
What I've tried:
[
{
$match: {
stars: { $exists: true, $gte: 1 },
orgId: '100003'
reviewUpdatedAt: { $gte: new Date(fromDate), $lte: new Date(toDate) }
}
},
{
$group: {
_id: {
date: {
$dateToString: {
format: "%m-%Y",
date: "$reviewUpdatedAt"
}
},
loc: "$branchId"
},
total: {
$sum: 1
}
}
},
{
$group: {
_id: "$_id.loc",
reviews: {
$push: {
total: "$total",
"date": "$_id.date"
}
}
}
}
]
Starting in Mongo 5.1, it's a perfect use case for the new $densify aggregation operator:
// { date: "02-2019", value: 12 }
// { date: "03-2019", value: 2 }
// { date: "11-2019", value: 3 }
db.collection.aggregate([
{ $set: {
date: { $dateFromString: { // "02-2019" => ISODate("2019-04-01")
dateString: { $concat: [ "01-", "$date" ] },
format: "%d-%m-%Y"
}}
}},
{ $densify: {
field: "date",
range: {
step: 1,
unit: "month",
bounds: [ISODate("2019-01-01"), ISODate("2020-01-01")]
}
}},
{ $set: {
value: { $cond: [ { $not: ["$value"] }, 0, "$value" ] },
date: { $dateToString: { format: "%m-%Y", date: "$date" } } // ISODate("2019-04-01") => "02-2019"
}}
])
// { date: "01-2019", value: 0 }
// { date: "02-2019", value: 12 }
// { date: "03-2019", value: 2 }
// { date: "04-2019", value: 0 }
// { date: "05-2019", value: 0 }
// { date: "06-2019", value: 0 }
// { date: "07-2019", value: 0 }
// { date: "08-2019", value: 0 }
// { date: "09-2019", value: 0 }
// { date: "10-2019", value: 0 }
// { date: "11-2019", value: 3 }
// { date: "12-2019", value: 0 }
This:
casts date strings into dates (the first $set stage)
densifies documents ($densify) by creating new documents in a sequence of documents where certain values for a field (in our case field: "date") are missing:
the step for our densification is 1 month: range: { step: 1, unit: "month", ... }
and we densify within the range of dates provided with bounds: [ISODate("2019-01-01"), ISODate("2020-01-01")]
sets dates back to date strings: date: { $dateToString: { format: "%m-%Y", date: "$date" } }
and also sets ($set) views to 0 only for new documents included during the densify stage ({ value: { $cond: [ { $not: ["$value"] }, 0, "$value" ] })
At first I thought this can be easily achieved through code, but even with MongoDB you can do that but with an input from code :
Let's say if your fromDate is June-2018 & toDate is June-2019, then by using your programming language you can easily get all months between those two dates in this format mm-yyyy. You can try to do this using MongoDB but I would rather prefer as an input to query.
Query :
db.collection.aggregate([
{
$group: {
_id: {
date: {
$dateToString: {
format: "%m-%Y",
date: "$reviewUpdatedAt"
}
},
loc: "$branchId"
},
Total: {
$sum: 1
}
}
},
{
$group: {
_id: "$_id.loc",
reviews: {
$push: {
Total: "$Total",
"date": "$_id.date"
}
}
}
},
/** Overwrite existing reviews field with new array, So forming new array ::
* as you're passing all months between these dates get a difference of two arrays (input dates - existing dates after group)
* while will leave us with an array of missing dates, we would iterate on that missing dates array &
* concat actual reviews array with each missing date
* */
{
$addFields: {
reviews: {
$reduce: {
input: {
$setDifference: [
[
"06-2018",
"07-2018",
"08-2018",
"09-2018",
"10-2018",
"11-2018",
"12-2018",
"01-2019",
"02-2019",
"03-2019",
"04-2019",
"05-2019",
"06-2019"
],
"$reviews.date"
]
},
initialValue: "$reviews",
in: {
$concatArrays: [
"$$value",
[
{
date: "$$this",
Total: 0
}
]
]
}
}
}
}
}
])
Test : MongoDB-Playground
Ref : javascript-get-all-months-between-two-dates
so step back and realize you seek a display of data that doesn't exist in the db...let's say there is no data for 3/19. this is not a mongo issue but universal for any db. one creates a 'time table' in your case perhaps it is month/year...and for mongo it is documents/collection...this provides framework data for each month for the initial match..to which one's join ($lookup in mongo) will have null for 3/19...
adding a time table is standard in analytic apps -some come with that feature embedded as part of their time based analytics feature so the database doesn't need to do anything.....but to do so via general query/reporting in mongo and sql databases one would need to manually add that time collection/table
I have a collection which contains half million data, and i want an average for all months between the date i enter. Right now i am getting the data for whole year, but i want it seperated by single month i.e 12 data range for every single month.
Below is the aggregation pipeline i am using.
let filter = 'y';
const date = new Date();
let checkDate = moment().subtract(1.5, 'years')._d;
MeterData.aggregate([
{
$group: {
_id: "$meter_id",
// total: { $sum: 1 },
totalEnergy: filter !== 'a' ? {
$sum: {
$toDouble: {
$cond: {
if: {
$gte: [
"$date", checkDate
]
},
then: "$energy.Energy",
else: 0
}
}
}
} : { $sum: { $toDouble:
"$energy.Energy"
} }
},
}
]);
Here i am getting totalEnergy for all year, in totalEnergy field, but now i want totalEnergy plus monthly calculations for the year i enter.
Any idea on how to do that. ?
Below is a sample document from the collection.
{"_id":{"$oid":"5e557779ed588826d84cef11"},
"meter_id":"1001",
"date":{"$date":{"$numberLong":"1509474600000"}},
"parameter_name":"hvac","voltage":{"unit":"V"},
"current":{"unit":"AMP"},
"powerFactor":{"unit":"phi"},
"angle":{"unit":"degree"},
"activePower":{"unit":"kwh"},
"reactivePower":{"unit":"kwh"},
"apparentPower":{"unit":"kwh"},
"frequency":{"unit":"hz"},
"thd":{"unit":"percentage"},
"energy":{"Energy":"5.7"},
"power":{"unit":"watt"},
As per suggested by Ryan Gunner, i got my answer which i am pasting below, i just have one more problem.
[
{
meter_id: '1001',
month: '2017-10',
totalEnergy: 0,
averageEnergy: 0
} + 11 more months......
]
Now what i need is the total of the energy for 12 months. For example total of totalEnergy field for all 12 months in a single variable.
how about something like this?
var startDate = new ISODate('2020-04-01');
var endDate = new ISODate('2019-04-01');
db.collection.aggregate(
{
$match: {
$expr: {
$and: [
{ $gt: ['$date', endDate] },
{ $lt: ['$date', startDate] }]
}
}
},
{
$group: {
_id: {
meter: '$meter_id',
month: { $dateToString: { format: '%Y-%m', date: '$date' } }
},
totalEnergy: { $sum: { $toDouble: '$energy.Energy' } },
averageEnergy: { $avg: { $toDouble: '$energy.Energy' } }
}
},
{
$project: {
meter_id: '$_id.meter',
month: '$_id.month',
totalEnergy: '$totalEnergy',
averageEnergy: '$averageEnergy',
_id: 0
}
},
{
$sort: { meter_id: 1 }
}
{
$group: {
_id: null,
grandTotalEnergy: { $sum: '$totalEnergy' },
monthlyData: { $push: '$$ROOT' }
}
},
{ $project: { _id: 0 } }
)
update: added grandTotalEnergy field and pushed monthlyData to an array.
We currently have the following code to group all documents from a collection by creation date:
this.aggregate( [
{ $match: { language: options.criteria.language, status: 1, type:{ $ne: "challenge" } }},
{ $group: {
_id: {
y: { '$year': '$created' },
m: { '$month': '$created' },
d: { '$dayOfMonth': '$created' },
h: { '$hour': '$created' },
min: { '$minute': '$created' },
s: { '$second': '$created' }
},
count: { $sum : 1 }
}},
{$project: {
date: "$_id", // so this is the shorter way
count: 1,
_id: 0
}},
{ $sort: { "date": 1 } }
], function(err, result){
if(err) {
return callback(err);
}
callback(null, result);
});
However, we now would like to group the results based on the start date instead of the creation date. The start date is not a field of the current collection, but it is a field of the currentRevision object, that is linked in this collection.
I tried this:
this.aggregate( [
{ $match: { language: options.criteria.language, status: 1, type:{ $ne: "challenge" } }},
{ $group: {
_id: {
y: { '$year': '$currentRevision.start_date' },
m: { '$month': '$currentRevision.start_date' },
d: { '$dayOfMonth': '$currentRevision.start_date' },
h: { '$hour': '$currentRevision.start_date' },
min: { '$minute': '$currentRevision.start_date' },
s: { '$second': '$currentRevision.start_date' }
},
count: { $sum : 1 }
}},
{$project: {
date: "$_id", // so this is the shorter way
count: 1,
_id: 0
}},
{ $sort: { "date": 1 } }
], function(err, result){
if(err) {
return callback(err);
}
callback(null, result);
});
but that just gives me an error: "failed to query db"
any idea on how to solve this?