How to create mongodb aggregation pipeline between two collections? - node.js

I want to create a Mongodb aggregation pipeline for a collection named Transaction.
The Transaction collection has values amount, categoryID, description and I also have a Category collection with values type, icon and color.
I want the pipeline to show the top3 categories with their percentage values and a others category with its percentage value.
the transaction type should be Expense which it should get from the Category collection and it should show all transactions having category type Expense. The top3 should then give the results as transaction with category (example)
type : Rent
percentage:45
type: Entertainment
percentage: 30
type: Food
percentage: 20
type: Others
percentage: 5
I tried it with Category collection but I don't want category to store amount, but Transaction should store amount.
Category.aggregate([
{
$match: {
type: 'expense'
}
},
{
$group: {
_id: "$name",
amount: { $sum: "$amount" }
}
},
{
$group: {
_id: null,
totalExpense: { $sum: "$amount" },
categories: {
$push: {
name: "$_id",
amount: "$amount"
}
}
}
},
{
$project: {
_id: 0,
categories: {
$map: {
input: "$categories",
as: "category",
in: {
name: "$$category.name",
percent: { $multiply: [{ $divide: ["$$category.amount", "$totalExpense"] }, 100] }
}
}
}
}
},
{
$unwind: "$categories"
},
{
$sort: { "categories.percent": -1 }
},
{
$limit: 3
}
])
This was the pipeline I used for it.
//edit
Tried the method suggested by Joe
Transaction.aggregate([
// Join the Transaction collection with the Category collection
{
$lookup: {
from: 'Category',
localField: 'categoryID',
foreignField: '_id',
as: 'category',
},
},
// Unwind the category array to separate documents
{
$unwind: '$category',
},
// Filter for transactions where the category type is "Expense"
{
$match: {
'category.type': 'Expense',
},
},
// Group transactions by category type and calculate the percentage
{
$group: {
_id: '$category.type',
total: { $sum: '$amount' },
count: { $sum: 1 },
},
},
{
$project: {
_id: 0,
category: '$_id',
percentage: {
$multiply: [{ $divide: ['$count', { $sum: '$count' }] }, 100],
},
},
},
// Sort the categories by percentage in descending order
{
$sort: { percentage: -1 },
},
// Limit the result to top 3 categories
{
$limit: 3,
},
// group the rest of the categories as others
{
$group: {
_id: null,
top3: { $push: '$$ROOT' },
others: { $sum: { $subtract: [100, { $sum: '$top3.percentage' }] } },
},
},
{
$project: {
top3: 1,
others: { category: 'Others', percentage: '$others' },
},
},
]);
I am getting an empty array rather than the values. I have data in the collections with the correct ID's. What might be the issue?
//Answer
This aggregation worked for me
Transaction.aggregate([
{
$match: {
userID: { $eq: UserID },
type: 'Expense',
},
},
{
$addFields: { categoryID: { $toObjectId: '$categoryID' } },
},
{
$lookup: {
from: 'categories',
localField: 'categoryID',
foreignField: '_id',
as: 'category_info',
},
},
{
$unwind: '$category_info',
},
{
$group: {
_id: '$category_info.name',
amount: { $sum: '$amount' },
},
},
{
$sort: {
amount: -1,
},
},
{
$group: {
_id: null,
total: { $sum: '$amount' },
data: { $push: '$$ROOT' },
},
},
{
$project: {
results: {
$map: {
input: {
$slice: ['$data', 3],
},
in: {
category: '$$this._id',
percentage: {
$round: {
$multiply: [{ $divide: ['$$this.amount', '$total'] }, 100],
},
},
},
},
},
others: {
$cond: {
if: { $gt: [{ $size: '$data' }, 3] },
then: {
amount: {
$subtract: [
'$total',
{
$sum: {
$slice: ['$data.amount', 3],
},
},
],
},
percentage: {
$round: {
$multiply: [
{
$divide: [
{
$subtract: [
'$total',
{ $sum: { $slice: ['$data.amount', 3] } },
],
},
'$total',
],
},
100,
],
},
},
},
else: {
amount: null,
percentage: null,
},
},
},
},
},
]);

Related

I am trying to get retrive data from mongodb but not getting expected output

DB Data -
[{
title: "Vivo X50",
category: "mobile",
amount: 35000
},
{
title: "Samsung M32",
category: "mobile",
amount: 18000
},
{
title: "Lenovo 15E253",
category: "laptop",
amount: 85000
},
{
title: "Dell XPS 15R",
category: "laptop",
amount: 115000
}]
Expected Output:
[{
category: "mobile",
qty: 2,
totalAmount: 53000
},
{
category: "laptop",
qty: 2,
totalAmount: 200000
}]
Code I am running (Using mongoose)
let products = await Product.aggregate([
{
$project: { _id: 0, category: 1, amount: 1 },
},
{
$group: {
_id: "$category",
qty: { $sum: 1 },
totalAmount: { $sum: "$amount" },
},
},
]);
Result I am Getting.
[
{
"_id": "laptop",
"count": 2,
"totalSum": 200000
},
{
"_id": "mobile",
"count": 2,
"totalSum": 53000
}
]
As you can clearly see that I am able to get correct data but I want correct name also category instead of _id. Please help me with that. Thanks in advance
You need $project as the last stage to decorate the output document.
{
$project: {
_id: 0,
category: "$_id",
qty: "$qty",
totalAmount: "$totalAmount"
}
}
Meanwhile, the first stage $project doesn't need.
db.collection.aggregate([
{
$group: {
_id: "$category",
qty: {
$sum: 1
},
totalAmount: {
$sum: "$amount"
}
}
},
{
$project: {
_id: 0,
category: "$_id",
qty: "$qty",
totalAmount: "$totalAmount"
}
}
])
Sample Mongo Playground
You can use the following query to get your expected output. cheers~
await Product.aggregate([
{
$group: {
_id: "$category",
qty: {
$sum: 1
},
totalAmount: {
$sum: "$amount"
},
},
},
{
$addFields: {
category: "$_id"
}
},
{
$project: {
_id: 0
},
}
])

How to aggregate with many conditions on MongoDB. Double $lookup etc

How to display "hardest category" based on in which "study" size of notLearnedWords was the highest. MongoDB Aggregation
I have these 3 models:
Study
WordSet
Category
Study model has reference into WordSet, then WordSet has reference into Category.
And based on Studies i'm displaying statistics.
How i can display "The hardest category" based on size of "notLearnedWords" was the highest?
I don't know on which place i should start with that querying.
For now i display "hardestCategory" as element that is most used.
I think that condition would look something like this:
{ $max: { $size: '$notLearnedWords' } } // size of the study with most notLearnedWords
I would achieve a response like this:
"stats": [
{
"_id": null,
"numberOfStudies": 4,
"averageStudyTime": 82.5,
"allStudyTime": 330,
"longestStudy": 120,
"allLearnedWords": 8
"hardestCategory": "Work" // only this field is missing
}
]
I've tried to do it like this:
const stats = await Study.aggregate([
{ $match: { user: new ObjectID(currentUserId) } },
{
$lookup: {
from: 'users',
localField: 'user',
foreignField: '_id',
as: 'currentUser',
},
},
{
$lookup: {
from: 'wordsets',
let: { wordSetId: '$learnedWordSet' },
pipeline: [
{ $match: { $expr: { $eq: ['$_id', '$$wordSetId'] } } },
{
$project: {
_id: 0,
category: 1,
},
},
{ $unwind: '$category' },
{
$group: {
_id: '$category',
count: { $sum: 1 },
},
},
{ $sort: { count: -1 } },
{ $limit: 1 },
{
$lookup: {
from: 'categories',
localField: '_id',
foreignField: '_id',
as: 'category',
},
},
{
$project: {
_id: 0,
category: { $arrayElemAt: ['$category.name', 0] },
},
},
],
as: 'wordSet',
},
},
{
$group: {
_id: null,
numberOfStudies: { $sum: 1 },
averageStudyTime: { $avg: '$studyTime' },
allStudyTime: { $sum: '$studyTime' },
longestStudy: { $max: '$studyTime' },
allLearnedWords: {
$sum: { $size: '$learnedWords' },
},
hardestCategory: {
$first: {
$first: '$wordSet.category',
},
},
studyWithMostNotLearnedWords: { $max: { $size: '$notLearnedWords' } },
},
},
]);
Study
const studySchema = new mongoose.Schema({
name: {
type: String,
},
studyTime: {
type: Number,
},
learnedWords: [String],
notLearnedWords: [String],
learnedWordSet: {
type: mongoose.Schema.Types.ObjectId,
ref: 'WordSet',
},
user: {
type: mongoose.Schema.Types.ObjectId,
ref: 'User',
},
});
WordSet
const wordSetSchema = new mongoose.Schema({
name: {
type: String,
},
category: {
type: [
{
type: mongoose.Schema.Types.ObjectId,
ref: 'Category',
required: true,
},
],
},
});
Category
const categorySchema = new mongoose.Schema({
name: {
type: String,
},
});

Count by category and sum it up in MongoDB

I have a product collection in MongoDb which sole fields like _id, category, user_id.
I want to check and count the sum number of each category in collection given the matching the user_id and then sum up all the count again at the end.
my solution is :
return Product.aggregate([
{ $match: { "user_id": "id if user that added the product" } },
{ "$unwind": "$category" },
{
"$group": {
"_id": {
'category': '$category',
},
"count": { "$sum": 1 }
}
},
{ "$sort": { "_id.category": 1 } },
{
"$group": {
"_id": "$_id.category",
"count": { "$first": "$count" }
}
}
])
the code gives me the count of each category without matching the condition of user_id. But when I add the $match it fails.
Product Schema:
const ProductSchema = new Schema({
title: {
type: String,
required: true
},
description: {
type: String,
required: true
},
quantity: {
type: Number,
default: -1
},
category:
{
type: String,
required: true
},
manufactured_by: {
type: String,
required: true
},
user_id: {
type: Schema.Types.ObjectId,
ref: 'user',
required: true
}
})
my result if I dont add the condition:
[
{
"_id": "A Tables",
"count": 1
},
{
"_id": "C Tables",
"count": 4
},
{
"_id": "B Tables",
"count": 2
}
]
Not sure what you are trying to achieve from the last stage in your pipeline.
But the following should give you desired output (without any complications that you added)
async getSellerStatistics(seller_id) {
return await Product.aggregate([
{ $match: { user_id: seller_id } }
{ $unwind: "$category" },
{
$group: {
_id: "$category",
count: { $sum: 1 },
},
},
{ $sort: { _id: 1 } },
])
}

MongoDB Lack of performace on $group

I'm working on a project where a user can place bets about a match and then earns points if he has bet for the winning team.
I'm building a leaderboard where I need to choose the 50 best players on the platform (those who have the most points).
Processing points dynamically compelled me to look at the aggregate method in order to calculate the user points from these Models:
const UserSchema = new mongoose.Schema({
admin: { type: Boolean, default: true },
username: String,
password: String,
pronos: [{ type: mongoose.Schema.Types.ObjectId, ref: 'Prono', default: [] }],
});
const PronoSchema = new mongoose.Schema({
match: { type: mongoose.Schema.Types.ObjectId, ref: 'Match' },
local: Number,
guest: Number,
coeff: Number,
});
const StepSchema = new mongoose.Schema({
matchs: [{ type: mongoose.Schema.Types.ObjectId, ref: 'Match', default: [] }],
name: String,
});
const CompetitionSchema = new mongoose.Schema({
steps: [{ type: mongoose.Schema.Types.ObjectId, ref: 'Step', default: [] }],
start: { type: mongoose.Schema.Types.Date },
name: String,
});
const MatchSchema = new mongoose.Schema({
local: { type: mongoose.Schema.Types.ObjectId, ref: 'Team' },
guest: { type: mongoose.Schema.Types.ObjectId, ref: 'Team' },
localScore: { type: Number, default: -1 },
guestScore: { type: Number, default: -1 },
date: { type: mongoose.Schema.Types.Date },
});
To sum up all these code:
A player places bets, called pronos on Matches that are inside Steps, them being inside a Competition. So every steps of the competition has its matches.
I've been producing this to calculate the points for the players and I would like to know if I'm heading to the right direction:
const users = await User.aggregate([
{ $unwind: '$pronos' },
{
$lookup: {
from: 'pronos',
localField: 'pronos',
foreignField: '_id',
as: 'pronoObjects',
}
},
{ $unwind: '$pronoObjects' },
{
$lookup: {
from: 'matches',
localField: 'pronoObjects.match',
foreignField: '_id',
as: 'matches',
}
},
{ $unwind: '$matches' },
{
$addFields: {
pointsEarned: {
$switch: {
branches: [
{
case: {
$and: [
{ $eq: ['$pronoObjects.local', '$matches.localScore'] },
{ $eq: ['$pronoObjects.guest', '$matches.guestScore'] },
],
},
then: 3,
},
{
case: {
$and: [
{ $lt: [{ $subtract: ['$pronoObjects.local', '$pronoObjects.guest'] }, 0] },
{ $lt: [{ $subtract: ['$matches.localScore', '$matches.guestScore'] }, 0] }
]
},
then: 1,
},
{
case: {
$and: [
{ $gt: [{ $subtract: ['$pronoObjects.local', '$pronoObjects.guest'] }, 0] },
{ $gt: [{ $subtract: ['$matches.localScore', '$matches.guestScore'] }, 0] }
]
},
then: 1,
},
],
default: 0,
}
}
}
},
{
$group: {
_id: '$_id',
points: { $sum: '$pointsEarned' }
}
},
{
$lookup: {
from: 'users',
localField: '_id',
foreignField: '_id',
as: 'user',
}
}
]);
Since it's working the way I want, I planned to build up a ranking by Competition, where a user can select a competition ID and see the ranking for it.
When trying to achieve that, I've been using so many unwind methods that my response was 10k lines long before grouping. Thus I would like to know if anyone can hint me about the right way to achieve this.
I'm not looking for the complete answer, I'm new to mongo and would like to know about good practices or learn new aggregation methods.
Thanks in advance.
EDIT:
I managed to get the points for every user with this aggregation. The problem is that it takes more than 10 seconds to run over 400 matches with only 2 users.
Am I missing something ?
Here is the request I use:
return await Competition.aggregate([
{
$match: {
$expr: {
$eq: ['$_id', { $toObjectId: compId }],
}
}
},
{ $unwind: '$steps' },
{
$lookup: {
from: 'steps',
as: 'stepsObject',
localField: 'steps',
foreignField: '_id',
}
},
{ $unwind: '$stepsObject' },
{ $unwind: '$stepsObject.matchs' },
{
$project: {
_id: 1,
stepsObject: 1,
}
},
{
$lookup: {
from: 'matches',
as: 'matchesObject',
let: { otherid: '$stepsObject.matchs' },
pipeline: [
{
$match: {
$expr: {
$and: [
{ $eq: ['$$otherid', '$_id'] },
{ $ne: ['$localScore', -1] },
]
}
}
}
],
},
},
{ $unwind: '$matchesObject' },
{
$lookup: {
from: 'users',
as: 'users',
pipeline: [
{
$project: {
_id: 1,
pronos: 1,
}
}
],
}
},
{ $unwind: '$users' },
{ $unwind: '$users.pronos' },
{
$lookup: {
from: 'pronos',
as: 'pronosObject',
let: { matchid: '$matchesObject._id', knownpronos: '$users.pronos' },
pipeline: [
{
$match: {
$expr:
{
$and: [
{ $eq: ['$$matchid', '$match'], },
{ $eq: ['$$knownpronos', '$_id'] },
]
}
}
}
]
// localField: 'users.pronos',
// foreignField: '_id',
}
},
{ $unwind: '$pronosObject' },
{
$addFields: {
pointsEarned: {
$switch: {
branches: [
{
case: {
$and: [
{ $eq: ['$pronosObject.local', '$matchesObject.localScore'] },
{ $eq: ['$pronosObject.guest', '$matchesObject.guestScore'] },
],
},
then: { $multiply: [3, '$pronosObject.coeff'] },
},
{
case: {
$and: [
{ $lt: [{ $subtract: ['$pronosObject.local', '$pronosObject.guest'] }, 0] },
{ $lt: [{ $subtract: ['$matchesObject.localScore', '$matchesObject.guestScore'] }, 0] }
]
},
then: { $multiply: [1, '$pronosObject.coeff'] },
},
{
case: {
$and: [
{ $gt: [{ $subtract: ['$pronosObject.local', '$pronosObject.guest'] }, 0] },
{ $gt: [{ $subtract: ['$matchesObject.localScore', '$matchesObject.guestScore'] }, 0] }
]
},
then: { $multiply: [1, '$pronosObject.coeff'] },
},
],
default: 0,
}
}
}
},
{
$group: {
_id: '$users._id',
pointsEarned: { $sum: '$pointsEarned' },
}
}
]);
EDIT 2:
I rewrote the entire pipeline with a different approach much faster. I still have a tiny problem. If I get rid of the final group, the request only take 4 milliseconds to run, but the final group to group points by id takes it up to 550ms. I don't know how can I optimize this since it's the final addition of all points for every user.
Here is what I came with:
return await Competition.aggregate([
{
$match: {
$expr: {
$eq: ['$_id', { $toObjectId: compId }],
}
}
},
{ $unwind: '$steps' },
{
$lookup: {
from: 'steps',
as: 'stepsObject',
localField: 'steps',
foreignField: '_id',
}
},
{ $unwind: '$stepsObject' },
{
$project: {
_id: 1,
stepsObject: 1,
}
},
{
$lookup: {
from: 'matches',
as: 'matchesObject',
let: { otherid: '$stepsObject.matchs' },
pipeline: [
{
$match: {
$expr: {
$and: [
{ $in: ['$_id', '$$otherid'] },
{ $ne: ['$localScore', -1] },
]
}
}
}
],
},
},
{
$lookup: {
from: 'users',
as: 'users',
pipeline: [
{
$project: {
_id: 1,
pronos: 1,
}
}
],
}
},
{ $unwind: '$users' },
{ $unwind: '$users.pronos' },
{
$lookup: {
from: 'pronos',
as: 'pronosObject',
localField: 'users.pronos',
foreignField: '_id',
}
},
{ $unwind: '$pronosObject' },
{
$project: {
user_id: '$users._id',
pronosObject: 1,
matchesObjects: {
$arrayElemAt: [
{
$filter: {
input: '$matchesObject',
as: 'matchesObjects',
cond: { $eq: ['$$matchesObjects._id', '$pronosObject.match'] }
}
}, 0
]
},
}
},
{
$addFields: {
pointsEarned: {
$switch: {
branches: [
{
case: {
$and: [
{ $eq: ['$pronosObject.local', '$matchesObjects.localScore'] },
{ $eq: ['$pronosObject.guest', '$matchesObjects.guestScore'] },
],
},
then: { $multiply: [3, '$pronosObject.coeff'] },
},
{
case: {
$and: [
{ $lt: [{ $subtract: ['$pronosObject.local', '$pronosObject.guest'] }, 0] },
{ $lt: [{ $subtract: ['$matchesObjects.localScore', '$matchesObjects.guestScore'] }, 0] }
]
},
then: { $multiply: [1, '$pronosObject.coeff'] },
},
{
case: {
$and: [
{ $gt: [{ $subtract: ['$pronosObject.local', '$pronosObject.guest'] }, 0] },
{ $gt: [{ $subtract: ['$matchesObjects.localScore', '$matchesObjects.guestScore'] }, 0] }
]
},
then: { $multiply: [1, '$pronosObject.coeff'] },
},
],
default: 0,
}
}
}
},
{
$group: { // This is causing me trouble
_id: '$user_id',
pointsEarned: { $sum: '$pointsEarned' },
}
}
]);

MongoDB aggregated query for conversation messages

I am working on chat and I want to get conversations order by created date and messages order by created in a single array of object. This query gets conversation with messages of limit 10 order by date.
db.models.conversations.aggregate([
{ $match: { 'participants': Number(uid) } },
{ $sort: { 'created': -1 } },
{ $skip: Number(skip) },
{ $limit: Number(limit) },
{
$lookup: {
localField: '_id',
foreignField: 'conversation_id',
from: 'message_details',
as: 'messages'
}
},
{ '$unwind': '$messages' },
{ $match: { 'messages.receiver_uid': Number(uid) } },
{ $match: { 'messages.status': 1 } },
{ $sort: { 'messages.created': 1 } },
{
$lookup: {
localField: 'messages.message_id',
foreignField: '_id',
from: 'messages',
as: 'messages.content'
}
},
{ '$unwind': '$messages.content' },
{ $sort: { 'messages.content.created': -1 } },
{ '$addFields': { 'messages.created': '$messages.content.created' } },
{
$group: {
_id: '$_id',
participants: { $first: '$participants' },
created: { $first: '$created' },
messages: { $push: '$messages' }
}
},
{
$project: {
_id: '$_id',
participants: '$participants',
messages: {
$slice: ['$messages', Number(messages_limit)]
},
updated: { $arrayElemAt: ['$messages.created', 0] }
}
},
{ $sort: { 'updated': -1 } }
]);
this query returns conversation in right order but messages in wrong order of date

Resources