Document
[
{
type: 1,//credit
amount: 60
},
{
type: 2,//debit
amount: 35
},
{
type: 3,//credit
amount: 25
},
{
type: 4,//debit
amount: 80
},
{
type: 5,//credit
amount: 70
},
]
Result
[
{
_id: {
Name: "Credition",
Type: [1, 3, 5]
},
Total_Amount: 155
},
{
_id: {
Name: "Debition",
Type: [2, 4]
},
Total_Amount: 115
},
]
In my schema, there are millions of logs records in which few are credited logs, few are debited logs.
I want to use MongoDB aggregate pipe and have to group like above for million records at a time
Yes you can do that first you need to add a new field transaction on the basis of the type of logs, then you can group the logs on the basis of that field.
Working example - https://mongoplayground.net/p/e4kqeKLIuIr
db.collection.aggregate([
{
$addFields: {
transaction: {
$cond: {
if: {
$in: [
"$type",
[
1,
3,
5
]
]
},
then: "Credition",
else: "Debition"
}
}
}
},
{
$group: {
_id: "$transaction",
Type: {
$addToSet: "$type"
},
Total_Amount: {
$sum: "$amount"
}
}
}
])
After this, you can also use $project operator to change the name or structure of the record, if needed
You can use the operator $cond during the grouping stage:
db.collection.aggregate([
{
$group: {
_id: {
$cond: [
{
$in: [ "$type", [1,3,5] ]
},
"Credition",
"Debition"
]
},
type: {
$addToSet: "$type"
},
amount: {
$sum: "$amount"
}
}
},
{
$project: {
_id: {
Name: "$_id",
Type: "$type"
},
Total_Amount: "$amount"
}
}
])
MongoPlayground
Related
On my backend I use mongoDB with nodejs and mongoose
I have many records in mongodb with this structure:
{
..fields
type: 'out',
user: 'id1', <--mongodb objectID,
orderPayment: [
{
_id: 'id1',
paid: true,
paymentSum: 40
},
{
_id: 'id2',
paid: true,
paymentSum: 60,
},
{
_id: 'id3',
paid: false,
paymentSum: 50,
}
]
},
{
..fields
type: 'in',
user: 'id1', <--mongodb objectID
orderPayment: [
{
_id: 'id1',
paid: true,
paymentSum: 10
},
{
_id: 'id2',
paid: true,
paymentSum: 10,
},
{
_id: 'id3',
paid: false,
paymentSum: 77,
}
]
}
I need to group this records by 'type' and get sum with conditions.
need to get sum of 'paid' records and sum of noPaid records.
for a better understanding, here is the result Ι need to get
Output is:
{
out { <-- type field
paid: 100, <-- sum of paid
noPaid: 50 <-- sum of noPaid
},
in: { <-- type field
paid: 20, <-- sum of paid
noPaid: 77 <-- sum of noPaid
}
}
Different solution would be this one. It may give better performance than solution of #YuTing:
db.collection.aggregate([
{
$project: {
type: 1,
paid: {
$filter: {
input: "$orderPayment",
cond: "$$this.paid"
}
},
noPaid: {
$filter: {
input: "$orderPayment",
cond: { $not: "$$this.paid" }
}
}
}
},
{
$set: {
paid: { $sum: "$paid.paymentSum" },
noPaid: { $sum: "$noPaid.paymentSum" }
}
},
{
$group: {
_id: "$type",
paid: { $sum: "$paid" },
noPaid: { $sum: "$noPaid" }
}
}
])
Mongo Playground
use $cond in $group
db.collection.aggregate([
{
"$unwind": "$orderPayment"
},
{
"$group": {
"_id": "$type",
"paid": {
"$sum": {
$cond: {
if: { $eq: [ "$orderPayment.paid", true ] },
then: "$orderPayment.paymentSum",
else: 0
}
}
},
"noPaid": {
"$sum": {
$cond: {
if: { $eq: [ "$orderPayment.paid", false ] },
then: "$orderPayment.paymentSum",
else: 0
}
}
}
}
}
])
mongoplayground
I am calculating the notification percentage in my app for tracking some statistics.
My Collection:
[
{
_id: "123",
status: "seen",
userId: "589"
},
{
_id: "223",
status: "seen",
userId: "589"
},
{
_id: "474",
status: "unseen",
userId: "589"
},
{
_id: "875",
status: "seen",
userId: "112"
},
{
_id: "891",
status: "unseen",
userId: "112"
}
]
Expected Result:
Here we can see that, UserId - 589 has received 3 notifications out of which 2 are seen. So the calculation is (totalNumOfSeen/totalNumOfNoticationsSent) * 100
[{
userId: "589",
notificationPercentage : 66.66
},{
userId: "112",
notificationPercentage : 50
}]
I am using a facet for grouping and matching but that is returning me an array of object and I am not getting how to perform divide on this.
My Query:
db.collection.aggregate([
{
$facet: {
totalNumOfSeen: [
{
$match: {
userId: "589",
status: "seen"
}
},
{
$group: {
_id: "$userId",
totalNumOfSeen: {
$sum: 1
}
}
}
],
totalNumOfNoticationsSent: [
{
$match: {
userId: "589",
}
},
{
$group: {
_id: "$userId",
totalNumOfNoticationsSent: {
$sum: 1
}
}
}
]
}
}
])
The Above Query is giving me the below Result:
[
{
"totalNumOfNoticationsSent": [
{
"_id": "589",
"totalNumOfNoticationsSent": 3
}
],
"totalNumOfSeen": [
{
"_id": "589",
"totalNumOfSeen": 2
}
]
}
]
MongoPlayground - https://mongoplayground.net/p/jHn2ZlshgDL
Now I need to add one more field as notificationPercentage and calculate the notification percentage based on the above facet result. Really appreciate the help.
You can try,
$group by userId and get totalSeen count using $cond if status is seen, get total count of notification using $sum,
$project to show required fields, and calculate percentage using $divide and $multiply
db.collection.aggregate([
{
$group: {
_id: "$userId",
totalSeen: {
$sum: { $cond: [{ $eq: ["$status", "seen"] }, 1, 0] }
},
total: { $sum: 1 }
}
},
{
$project: {
_id: 0,
userId: "$_id",
notificationPercentage: {
$multiply: [{ $divide: ["$totalSeen", "$total"] }, 100]
}
}
}
])
Playground
Given this Orders collection:
// Order documents
[
{
_id: "order_123",
items: [
{ _id: "item_123", type: "T-Shirt" },
{ _id: "item_234", type: "Hoodie" },
{ _id: "item_345", type: "Hat" },
],
refunds: [
{
_id: "refund_123",
items: ["item_123", "item_234"],
},
{
_id: "refund_234",
items: ["item_345"],
},
],
},
]
Is it possible to map refunds.items -> items._id, allowing us to filter by type?
This is how we currently get the refund sub-documents:
db.orders.aggregate([
{
$replaceRoot: {
newRoot: {
order: "$$ROOT",
refunds: "$$ROOT.refunds",
},
},
},
{
$unwind: "$refunds",
},
{
$project: {
order: "$order",
refund: "$refunds",
},
},
]);
Which gives us:
// Refund documents
[
{
refund: {
_id: "refund_123",
items: ["item_123", "item_234"],
},
order: { ... }, // The original order document
},
{
refund: {
_id: "refund_234",
items: ["item_345"],
},
order: { ... }, // The original order document
},
]
From here, we want to map up refund.items -> order.items._id to produce the following output:
[
{
_id: "refund_123",
items: [
{ _id: "item_123", type: "T-Shirt" },
{ _id: "item_234", type: "Hoodie" },
],
},
{
_id: "refund_234",
items: [
{ _id: "item_345", type: "Hat" }
],
},
]
Allowing us to filter refund documents by type.
You can do this using $unwind and $filter,
$unwind deconstruct array refunds
$project to show refund id in _id, and filter items that are in refunds.items array using $filter
db.orders.aggregate([
{ $unwind: "$refunds" },
{
$project: {
_id: "$refunds._id",
items: {
$filter: {
input: "$items",
cond: { $in: ["$$this._id", "$refunds.items"] }
}
}
}
}
])
Playground
I've been working on a small project that takes MQTT data from sensors and stores it in a MongoDB database. I'm working with nodeJS and mongoose. These are my schemas.
export const SensorSchema = new mongoose.Schema({
name: { type: String, required: true, unique: true },
location: { type: String, required: true },
type: { type: String, required: true },
unit: { type: String, required: true },
measurements: { type: [MeasurementSchema] }
},
{
toObject: { virtuals: true },
toJSON: { virtuals: true }
});
export const MeasurementSchema = new mongoose.Schema({
value: {type: Number, required: true},
time: {type: Date, required: true}
});
First I wrote a function that retrieves all measurements that were made in between two timestamps.
const values = Sensor.aggregate([
{ $match: Sensor.getValuesFromPath(sensorPath) },
{ $unwind: "$measurements"},
{ $match: { "measurements.time": { $gte: startTime, $lte: endTime} }},
{ $replaceRoot: { newRoot: "$measurements" } },
{ $project: { _id: 0}},
{ $sort: {time: 1}}
]).exec();
In order to draw a graph in the UI, I need to somehow sort and then limit the data that gets sent to the client. I want to send every Nth Value in a certain interval to ensure that the data somewhat resembles the course of the data.
I would prefer a solution that doesn't fetch all the data from the database.
How would I go about doing this on the db? Can I somehow access the positional index of an element after sorting it? Is $arrayElemAt or $elemMatch the solution?
Befure you run $unwind you can use $filter to apply start/end Date filtering. This will allow you to process measurements as an array. In the next step you can get every N-th element by using $range to define a list of indexes and $arrayElemAt to retrieve elements from these indexes:
const values = Sensor.aggregate([
{ $match: Sensor.getValuesFromPath(sensorPath) },
{ $addFields: {
measurements: {
$filter: {
input: "$measurements",
cond: { $and: [
{ $gte: [ "$$this.time", startTime ] },
{ $lte: [ "$$this.time", endTime ] }
]
}
}
}
} },
{ $addFields: {
measurements: {
$map: {
input: input: { $range: [ 0, { $size: "$measurements" }, N ] },
as: "index",
in: { $arrayElemAt: [ "$measurements", "$$index" ] }
}
}
} },
{ $unwind: "$measurements" },
{ $replaceRoot: { newRoot: "$measurements" } },
{ $project: { _id: 0}},
{ $sort: {time: 1}}
]).exec();
The following aggregation (i) retrieves all measurements that were made in between two timestamps, (ii) sorts by timestamp for each sensor, and (iii) gets every Nth value (specified by the variable EVERY_N).
Sample documents (with some arbitrary data for testing):
{
name: "s-1",
location: "123",
type: "456",
measurements: [ { time: 2, value: 12 }, { time: 3, value: 13 },
{ time: 4, value: 15 }, { time: 5, value: 22 },
{ time: 6, value: 34 }, { time: 7, value: 9 },
{ time: 8, value: 5 }, { time: 9, value: 1 },
]
},
{
name: "s-2",
location: "789",
type: "900",
measurements: [ { time: 1, value: 31 }, { time: 3, value: 32 },
{ time: 4, value: 35 }, { time: 6, value: 39 },
{ time: 7, value: 6}, { time: 8, value: 70 },
{ time: 9, value: 74 }, { time: 10, value: 82 }
]
}
The aggregation:
var startTime = 3, endTime = 10
var EVERY_N = 2 // value can be 3, etc.
db.collection.aggregate( [
{
$unwind: "$measurements"
},
{
$match: {
"measurements.time": { $gte: startTime, $lte: endTime }
}
},
{
$sort: { name: 1, "measurements.time": 1 }
},
{
$group: {
_id: "$name",
measurements: { $push: "$measurements" },
doc: { $first: "$$ROOT" }
}
},
{
$addFields: {
"doc.measurements": "$measurements"
}
},
{
$replaceRoot: { newRoot: "$doc" }
},
{
$addFields: {
measurements: {
$reduce: {
input: { $range: [ 0, { $size: "$measurements" } ] },
initialValue: [ ],
in: { $cond: [ { $eq: [ { $mod: [ "$$this", EVERY_N ] }, 0 ] },
{ $concatArrays: [ "$$value", [ { $arrayElemAt: [ "$measurements", "$$this" ] } ] ] },
"$$value"
]
}
}
}
}
}
] )
Let's say i have this 2 of huge documents:
[
{
_id: ....,
status: "A",
class: "DIP1A",
"created.user._id": ...,
"created.dt": ....,
"category": "private",
price: 100.00 //type double
},
{
_id: ....,
status: "A",
class: "DIP2A",
"created.user._id": ...
"created.dt": ...,
"category": "public",
price: 200.00 //type double
},
];
Query:
var pipeline = [
{
$match: {
"created.user._id": ....
}
},
{
$unwind: "$class"
},
{
$unwind: "$price"
},
{
$group: {
_id: "$class",
price: {
$sum: "$price"
},
count: {
$sum: 1
}
}
},
{
$project: {
_id: 0,
class: '$_id',
count: 1,
price: 1
}
}
];
db.myCollection.aggregate(pipeline);
Problem issue:
Query without calculate/$sum "$price", it's running really faster;
Indexes:
db.myCollection.ensureIndex({ 'created.user._id': -1 });
db.myCollection.ensureIndex({ 'created.user._id': -1, class: 1 });
db.myCollection.ensureIndex({ 'created.user._id': -1, price: 1});
Performance:
without $sum calc : 5 second with huge of records.
with $sum cals : 20 minutes with huge of records.
The one thing you really should do is move the $project stage to right after the $match stage (if the documents contain more data then stated in your question (huge documents)).
You want as little data as possible through the pipeline.
Also i see an $unwind on price and class but in your example they aren't array's. It could be a copy/paste issue ;-)
Like :
var pipeline = [
{
$match: {
"created.user._id": ....
}
},
{
$project: {
_id: 0,
class: '$_id',
count: 1,
price: 1
}
},
{
$unwind: "$class"
},
{
$unwind: "$price"
},
{
$group: {
_id: "$class",
price: {
$sum: "$price"
},
count: {
$sum: 1
}
}
},
];