How to generate time series data from existing data in mongodb - node.js

I have to generate this report.
In my MongoDB database I have a collection of orders like this:
[
{
_id: "mongoId", // 5f5ea6276ba53b06944de28c
createdAt: "2020-09-15T23:07:19.370Z",
totalPrice: 34, // is calculated from the client (quantity * price)
orderDetail: [
{
_id: "product-A-Id", // 5f5ea403e91ed91a44b62c92
quantity: 4,
price: 5.5,
},
{
_id: "product-B-Id",
quantity: 1,
price: 3.5,
},
{
_id: "product-C-Id",
quantity: 1,
price: 8.5,
},
],
},
{
_id: "mongoId",
createdAt: "2020-09-15T23:08:20.370Z",
totalPrice: 15.5,
orderDetail: [
{
_id: "product-C-Id",
quantity: 3,
price: 3,
},
{
_id: "product-D-Id",
quantity: 1,
price: 6.5,
},
],
},
{
_id: "mongoId",
createdAt: "2020-09-15T23:09:25.370Z",
totalPrice: 22.5,
orderDetail: [
{
_id: "product-D-Id",
quantity: 5,
price: 4.5,
},
],
},
]
To make this I have to generate time series data each two hours from timestamp now (in every request), the example of response desired is this:
[
{
id: "sales",
data: [
{
x: "00:00",
y: 150,
},
{
x: "22:00",
y: 100,
},
{
x: "20:00",
y: 150,
},
{
x: "18:00",
y: 50,
},
{
x: "16:00",
y: 100,
},
],
},
]
Using nodejs and express like framework I could generate sales of the last 2 hours:
const valueDateRange = 2 * 60 * 60 * 1000; // 2 hours
const currentPeriod = new Date(new Date().getTime() - valueDateRange);
// The last 2 hours sales
const calculateTotalSales = await Order.aggregate([
{
$match: { createdAt: { $gte: currentPeriod } },
},
{
$group: { _id: null, TotalAmount: { $sum: "$totalPrice" } },
},
]);
But now how to generate the time series data each 2 hours, so much thanks for the attention

The cleanest way I've found is to rework each docs date using the modulo operator to group them by hour blocks. You can easily change if you need bigger blocks in the future.
https://mongoplayground.net/p/aYAJKL_5dMD (I added extra sample data)
db.orders.aggregate([
{$addFields: {
date: {
$let: {
vars: {
hour: {$hour: '$createdAt'},
remainder: {$mod: [
{$hour: '$createdAt'},
2 // Two hour blocks, can be 2,3,4,6,8,12
]},
},
in: {
$dateFromParts: {
year: {$year: '$createdAt'},
month: {$month: '$createdAt'},
day: {$dayOfMonth: '$createdAt'},
hour: {$subtract: ['$$hour', '$$remainder']}
}
}
}
}
}},
{$group: {
_id: '$date',
x: {$last: '$date'},
y: {$sum: '$totalPrice'}
}}
]);
Update:
After reading your question again, I think your looking for total per hour regardless of the day. You can do so like this:
https://mongoplayground.net/p/cpW9JKllDIN
const totals = await db.orders.aggregate([
{$addFields: {
hour: {
$let: {
vars: {
hour: {$hour: '$createdAt'},
remainder: {$mod: [
{$hour: '$createdAt'},
2 // Two hour blocks, can be 2,3,4,6,8,12
]},
},
in: {$subtract: ['$$hour', '$$remainder']}
}
}
}},
{$group: {
_id: '$hour',
x: {$last: '$hour'},
y: {$sum: '$totalPrice'}
}}
])
Then to include hours that have no sales you can map an array:
let points = [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22].map(x => {
let total = totals.find(t => t.x === x);
return {
x: `${x < 10 ? `0${x}` : x}:00`,
y: total ? total.y : 0
};
});

After trying many times, I ended up with this code, I hope it helps someone in the future, to prove it, make sure you have the updated data at the time you make the request, otherwise it would result 0 because compare the dates with the current moment.
Mongo Playgournd
node.js: v12.5.0
express: "^4.17.1"
Mongodb version v4.2.3
router.get("/orders", async (req, res) => {
let valueDateRange = 24 * 60 * 60 * 1000; // 24 hours
const current = new Date();
const previous = new Date(new Date().getTime() - valueDateRange);
try {
const order = await Order.aggregate([
{
$match: {
createdAt: { $lt: current, $gte: previous },
},
},
{
$sort: { createdAt: 1 },
},
{
$group: {
_id: null,
docs: {
$push: {
createdAt: "$createdAt",
totalPrice: "$totalPrice",
missing: false,
},
},
start: {
$first: {
$toInt: {
$divide: [
{
$subtract: [
{ $toLong: "$$NOW" },
{ $multiply: [24, 60, 60, 1000] },
],
},
1000,
],
},
},
},
end: {
$last: { $toInt: { $divide: [{ $toLong: "$$NOW" }, 1000] } },
},
},
},
{
$addFields: {
docs: {
$map: {
input: {
$range: [
{ $toInt: "$start" },
{ $add: [{ $toInt: "$end" }, 7200] }, // 2 hours range
7200,
],
},
as: "ts",
in: {
ts_exists: {
$filter: {
input: "$docs",
as: "d",
cond: {
$and: [
{
$gte: [
{
$toInt: {
$divide: [{ $toLong: "$$d.createdAt" }, 1000],
},
},
{ $subtract: ["$$ts", 7200] },
],
},
{
$lt: [
{
$toInt: {
$divide: [{ $toLong: "$$d.createdAt" }, 1000],
},
},
"$$ts",
],
},
],
},
},
},
ts: "$$ts",
},
},
},
},
},
{
$unwind: "$docs",
},
{
$project: {
_id: 0,
y: {
$reduce: {
input: "$docs.ts_exists",
initialValue: 0,
in: { $add: ["$$value", "$$this.totalPrice"] },
},
},
x: {
$dateToString: {
format: "%Y-%m-%d %H:%M",
date: { $toDate: { $multiply: ["$docs.ts", 1000] } },
},
},
},
},
]);
const firstDeleted = order.shift(); // the first always send 0, therefore I delete it
res.send(order);
} catch (error) {
res.send(error);
}
});
Returns data every 2 hours, if there is no quantity put it 0
[
{
"y": 0,
"x": "2020-09-15 18:24"
},
{
"y": 0,
"x": "2020-09-15 20:24"
},
{
"y": 0,
"x": "2020-09-15 22:24"
},
{
"y": 0,
"x": "2020-09-16 00:24"
},
{
"y": 0,
"x": "2020-09-16 02:24"
},
{
"y": 0,
"x": "2020-09-16 04:24"
},
{
"y": 0,
"x": "2020-09-16 06:24"
},
{
"y": 0,
"x": "2020-09-16 08:24"
},
{
"y": 0,
"x": "2020-09-16 10:24"
},
{
"y": 0,
"x": "2020-09-16 12:24"
},
{
"y": 0,
"x": "2020-09-16 14:24"
},
{
"y": 3,
"x": "2020-09-16 16:24"
}
]

Related

Mongodb search field with range inside array of object

I have multiple documents in a collection like this
[
{
_id: 123,
data: 1,
details: [
{
item: "a",
day: 1
},
{
item: "a",
day: 2
},
{
item: "a",
day: 3
},
{
item: "a",
day: 4
}
],
someMoreField: "xyz"
}
]
Now I want document with _id: 123 and details field should only contain day within range of 1 to 3. So the result will be like below.
{
_id: 123,
data: 1,
details: [
{
item: 'a',
day: 1,
},
{
item: 'a',
day: 2,
},
{
item: 'a',
day: 3,
},
],
someMoreField: 'xyz',
};
I tried to do this by aggregate query as:
db.collectionaggregate([
{
$match: {
_id: id,
'details.day': { $gt: 1, $lte: 3 },
},
},
{
$project: {
_id: 1,
details: {
$filter: {
input: '$details',
as: 'value',
cond: {
$and: [
{ $gt: ['$$value.date', 1] },
{ $lt: ['$$value.date', 3] },
],
},
},
},
},
},
])
But this gives me empty result. Could someone please guide me through this?
You are very close, you just need to change the $gt to $gte and $lt to $lte.
Another minor syntax error is you're accessing $$value.date but the schema you provided does not have that field, it seems you need to change it to $$value.day, like so:
db.collection.aggregate([
{
$match: {
_id: 123,
"details.day": {
$gt: 1,
$lte: 3
}
}
},
{
$project: {
_id: 1,
details: {
$filter: {
input: "$details",
as: "value",
cond: {
$and: [
{
$gte: [
"$$value.day",
1
]
},
{
$lte: [
"$$value.day",
3
]
},
],
},
},
},
},
},
])
Mongo Playground

mongodb - Is it possible to group with the $bucket operator by date?

is it possible to group by date with the mongodb $bucekt operator ?
{
$bucket: {
groupBy: // date field?
boundaries: [0, 5, 10, 15, 20, 30, 50, 75, 100, 500],
default: 'overBucket',
output: {
count: {
$sum: 1,
},
localities: {
$push: '$$ROOT',
},
},
},
},
Thanks in advance for the suggestions :)
If I understand correctly, I think the below code is what you are looking for:
db.events.aggregate([
{
$match: {
"type": {
"$in": [
"EVENT",
"SUBEVENT"
]
},
"verification.status": "APPROVED",
"verification.public": true,
},
},
{
$group: {
_id: {
"$dateToString": {
"date": "$baseData.startDate",
"format": "%Y-%m-%d"
},
},
count: {
$sum: 1,
},
events: {
$push: "$$ROOT",
},
},
},
])
Let me know if you are trying to achieve $bucket stage with days as boundaries
Mongo Playground Sample Execution

MongoDB Get every Nth element of array

I've been working on a small project that takes MQTT data from sensors and stores it in a MongoDB database. I'm working with nodeJS and mongoose. These are my schemas.
export const SensorSchema = new mongoose.Schema({
name: { type: String, required: true, unique: true },
location: { type: String, required: true },
type: { type: String, required: true },
unit: { type: String, required: true },
measurements: { type: [MeasurementSchema] }
},
{
toObject: { virtuals: true },
toJSON: { virtuals: true }
});
export const MeasurementSchema = new mongoose.Schema({
value: {type: Number, required: true},
time: {type: Date, required: true}
});
First I wrote a function that retrieves all measurements that were made in between two timestamps.
const values = Sensor.aggregate([
{ $match: Sensor.getValuesFromPath(sensorPath) },
{ $unwind: "$measurements"},
{ $match: { "measurements.time": { $gte: startTime, $lte: endTime} }},
{ $replaceRoot: { newRoot: "$measurements" } },
{ $project: { _id: 0}},
{ $sort: {time: 1}}
]).exec();
In order to draw a graph in the UI, I need to somehow sort and then limit the data that gets sent to the client. I want to send every Nth Value in a certain interval to ensure that the data somewhat resembles the course of the data.
I would prefer a solution that doesn't fetch all the data from the database.
How would I go about doing this on the db? Can I somehow access the positional index of an element after sorting it? Is $arrayElemAt or $elemMatch the solution?
Befure you run $unwind you can use $filter to apply start/end Date filtering. This will allow you to process measurements as an array. In the next step you can get every N-th element by using $range to define a list of indexes and $arrayElemAt to retrieve elements from these indexes:
const values = Sensor.aggregate([
{ $match: Sensor.getValuesFromPath(sensorPath) },
{ $addFields: {
measurements: {
$filter: {
input: "$measurements",
cond: { $and: [
{ $gte: [ "$$this.time", startTime ] },
{ $lte: [ "$$this.time", endTime ] }
]
}
}
}
} },
{ $addFields: {
measurements: {
$map: {
input: input: { $range: [ 0, { $size: "$measurements" }, N ] },
as: "index",
in: { $arrayElemAt: [ "$measurements", "$$index" ] }
}
}
} },
{ $unwind: "$measurements" },
{ $replaceRoot: { newRoot: "$measurements" } },
{ $project: { _id: 0}},
{ $sort: {time: 1}}
]).exec();
The following aggregation (i) retrieves all measurements that were made in between two timestamps, (ii) sorts by timestamp for each sensor, and (iii) gets every Nth value (specified by the variable EVERY_N).
Sample documents (with some arbitrary data for testing):
{
name: "s-1",
location: "123",
type: "456",
measurements: [ { time: 2, value: 12 }, { time: 3, value: 13 },
{ time: 4, value: 15 }, { time: 5, value: 22 },
{ time: 6, value: 34 }, { time: 7, value: 9 },
{ time: 8, value: 5 }, { time: 9, value: 1 },
]
},
{
name: "s-2",
location: "789",
type: "900",
measurements: [ { time: 1, value: 31 }, { time: 3, value: 32 },
{ time: 4, value: 35 }, { time: 6, value: 39 },
{ time: 7, value: 6}, { time: 8, value: 70 },
{ time: 9, value: 74 }, { time: 10, value: 82 }
]
}
The aggregation:
var startTime = 3, endTime = 10
var EVERY_N = 2 // value can be 3, etc.
db.collection.aggregate( [
{
$unwind: "$measurements"
},
{
$match: {
"measurements.time": { $gte: startTime, $lte: endTime }
}
},
{
$sort: { name: 1, "measurements.time": 1 }
},
{
$group: {
_id: "$name",
measurements: { $push: "$measurements" },
doc: { $first: "$$ROOT" }
}
},
{
$addFields: {
"doc.measurements": "$measurements"
}
},
{
$replaceRoot: { newRoot: "$doc" }
},
{
$addFields: {
measurements: {
$reduce: {
input: { $range: [ 0, { $size: "$measurements" } ] },
initialValue: [ ],
in: { $cond: [ { $eq: [ { $mod: [ "$$this", EVERY_N ] }, 0 ] },
{ $concatArrays: [ "$$value", [ { $arrayElemAt: [ "$measurements", "$$this" ] } ] ] },
"$$value"
]
}
}
}
}
}
] )

Mondogb $sum not working with SchemaTypes.Double

i'm fairly new to MongoDB and Mongoose and i'm working on a bug here. Apparently $sum is not working with a field whose type is SchemaTypes.Double. This double type is available thanks to a package called mongoose-double.
I don't know if MongoDB doesn't support Double so that's why we have this package, or is this because of MongoDB Version (it's on 3.6 AFAIK). But anyways, here's the code:
Schedule.aggregate([{
$match: findTerm
},
{
$facet: {
totalizer: [{
$group: {
_id: '$store',
totalServices: {
$sum: 1
},
totalValue: {
$sum: '$value'
},
totalComission: {
$sum: '$comissionValue'
}
}
}
],
data: [{
$project: {
'employee.name': 1,
'customer.name': 1,
'service.name': 1,
'info.channel': 1,
value: 1,
scheduleDate: 1,
scheduleStart: 1,
scheduleEnd: 1,
comissionValue: 1,
status: 1,
paymentMethod: 1
}
},
{
$sort: sort
},
{
$skip: req.body.limit * req.body.page
},
{
$limit: req.body.limit
}
]
}
}
]).exec((e, response) => {
if (e) {
// handle error
}
res.status(200).send(response[0]);
});
This findTerm is sent by the frontend app and has this format:
{ store: '5b16cceb56a44e2f6cd0324b',
status: { '$in': [ 0, 1, 2, 3 ] },
paymentMethod: { '$in': [ 0, 1, 2, 3, 4, 5 ] },
'info.channel': { '$in': [ 'app', 'admin' ] },
scheduleStart: { '$gte': '2019-11-01 00:00' },
scheduleEnd: { '$lte': '2020-03-31 23:59' }
}
My comissionValue field is in the root of my Schedule Schema:
comissionValue: {
type: SchemaTypes.Double,
default: 0
},
But my result is the following, as shown in my console.log in the frontend
As you can see my totalComission inside my totalizer is null, but my first object inside data has a comissionValue of 0.6.
How can i kno what's wrong here? I've tried different combinations of $facet, filtering only Schedules that has a comissionValue not equal 0 and null, but i only got a result of 0 for totalComission.
EDIT
Here's some sample data:
A Schedule object:
customer: {
id: "5e41a7ba11340930742aa689",
name: "renan lima",
phone: "+5511999999999",
email: "sampleemail#gmail.com",
doc: "00000000000",
avatar: null
},
employee: {
id: "5b16cebd29bcf613f02b6fb4",
name: "Anderson Zanardi",
avatar: "anderson.jpg"
},
service: {
noValue: false,
filters: [],
id: "5b637acd634e14086c9a3aea",
name: "Barba Masculina"
},
info: {
channel: "app",
id: "5e41a7ba11340930742aa689",
name: "renan lima"
},
comissionType: null,
comissionValue: 0,
paymentMethod: 0,
_id: "5e41a7c011340930742aa68a",
store: "5b16cceb56a44e2f6cd0324b",
scheduleDate: "2020-03-16T15:00:00.000Z",
scheduleStart: "2020-03-16 09:00",
scheduleEnd: "2020-03-16 09:30",
status: 2,
value: 30,
color: "blue",
logStatus: [],
__v: 0,
created: "2020-02-10T18:58:08.845Z",
updated: "2020-02-10T18:58:08.845Z"
My response received for the Schedule.aggregate:
{
"totalizer": [{
"_id": null,
"storesCount": [{
"store": "5b16cceb56a44e2f6cd0324b",
"count": 12
}],
"totalValue": 410.5,
"totalServices": 12,
"totalComission": 75
}],
"data": [{
"_id": "5e5d04dcb4a2f42204598ebf",
"service": {
"name": "Outros"
},
"info": {
"channel": "admin"
},
"comissionValue": 0,
"paymentMethod": 0,
"customer": {
"name": "teste"
},
"employee": {
"name": "Gabriel Barreto"
},
"scheduleDate": "2020-03-02T13:06:00.000Z",
"scheduleStart": "2020-03-02 10:06",
"scheduleEnd": "2020-03-02 10:36",
"status": 0,
"value": null
}]
}
Here the comission is 75, i don't know if it's because of the scheduleStart and scheduleDate in the findTerm on my $match that this time is starting at 2020-03-01 and ends at 2020-03-31 and in that range there's 3 schedules with 25 of comission.
Maybe my pagination is making it return null? Since i need it to sum all my comission for a given start/end range, even if in a certain page it doesn't have a comission.
EDIT 2
I added a sample data in Mongo Playground, the schedule array in the configuration column matchs the query used in the $match property on the query column.
Here's the link: https://mongoplayground.net/p/nmyAsY4g7LS

Node Mongodb Driver: different result on aggregate

how you doing?
I have a trouble making a aggregation in my project, my aggregation result is different in Robo3T and Node.
db.getCollection('companies').aggregate([
{ '$match': { _id: { '$eq': ObjectId("5e30a4fe11e6e80d7fb544a4")} } },
{ $unwind: '$jobVacancies' },
{
$project: {
jobVacancies: {
_id: 1,
name: 1,
city: 1,
openingDate: 1,
closingDate: 1,
createdAt: 1,
quantity: 1,
steps: {
$filter: {
input: '$jobVacancies.steps',
as: 'step',
cond: {
$and: [
{ $eq: ['$$step.order', 0] },
{ $ne: ['$$step.users', undefined] },
{ $ne: ['$$step.users', null] },
{ $ne: ['$$step.users', []] },
],
},
},
},
},
},
},
{ $match: { 'jobVacancies.steps': { $ne: [] } } },
])
In Robo3T this is returning 1 object, but in Node (the same aggregation) is resulting 6 objects. Can you help me? Thank you
EDIT
Nodejs:
The first match create the ObjectId match for company in context of GraphQL based on my filter.
const companies = await this.MongoClient.db
.collection('companies')
.aggregate([
{
$match: await this.getFilterObject(
filters.filter(f => !f.field.includes('$$jobVacancy') && !f.field.includes('StepOrder')),
),
},
{ $unwind: '$jobVacancies' },
{
$project: {
jobVacancies: {
_id: 1,
name: 1,
city: 1,
openingDate: 1,
closingDate: 1,
createdAt: 1,
quantity: 1,
steps: {
$filter: {
input: '$jobVacancies.steps',
as: 'step',
cond: {
$and: [
{ $eq: ['$$step.order', order] },
{ $ne: ['$$step.users', undefined] },
{ $ne: ['$$step.users', null] },
{ $ne: ['$$step.users', []] },
],
},
},
},
},
},
},
{ $match: { 'jobVacancies.steps': { $ne: [] } } },
])
.toArray();
EDIT 3
This is the result of console.dir (with {depth:null}) of the pipeline
[
{
'$match': {
_id: {
'$eq': ObjectID {
_bsontype: 'ObjectID',
id: Buffer [Uint8Array] [
94, 48, 164, 254, 17,
230, 232, 13, 127, 181,
68, 164
]
}
}
}
},
{ '$unwind': '$jobVacancies' },
{
'$project': {
jobVacancies: {
_id: 1,
name: 1,
city: 1,
openingDate: 1,
closingDate: 1,
createdAt: 1,
quantity: 1,
steps: {
'$filter': {
input: '$jobVacancies.steps',
as: 'step',
cond: {
'$and': [
{ '$eq': [ '$$step.order', 0 ] },
{ '$ne': [ '$$step.users', undefined ] },
{ '$ne': [ '$$step.users', null ] },
{ '$ne': [ '$$step.users', [] ] }
]
}
}
}
}
}
},
{ '$match': { 'jobVacancies.steps': { '$ne': [] } } }
]
I think i found the solution, the document is created with properties:
jobVacancies: {
steps: {
users: []
}
}
But sometimes users array is undefined in mongodb, so I verify with
{ '$ne': [ '$$step.users', undefined ] }
I think JS undefined is different then mongodb undefined, so I initialized all steps with an empty array of users, and removed this verification and worked! –

Resources