Related
I want to create a Mongodb aggregation pipeline for a collection named Transaction.
The Transaction collection has values amount, categoryID, description and I also have a Category collection with values type, icon and color.
I want the pipeline to show the top3 categories with their percentage values and a others category with its percentage value.
the transaction type should be Expense which it should get from the Category collection and it should show all transactions having category type Expense. The top3 should then give the results as transaction with category (example)
type : Rent
percentage:45
type: Entertainment
percentage: 30
type: Food
percentage: 20
type: Others
percentage: 5
I tried it with Category collection but I don't want category to store amount, but Transaction should store amount.
Category.aggregate([
{
$match: {
type: 'expense'
}
},
{
$group: {
_id: "$name",
amount: { $sum: "$amount" }
}
},
{
$group: {
_id: null,
totalExpense: { $sum: "$amount" },
categories: {
$push: {
name: "$_id",
amount: "$amount"
}
}
}
},
{
$project: {
_id: 0,
categories: {
$map: {
input: "$categories",
as: "category",
in: {
name: "$$category.name",
percent: { $multiply: [{ $divide: ["$$category.amount", "$totalExpense"] }, 100] }
}
}
}
}
},
{
$unwind: "$categories"
},
{
$sort: { "categories.percent": -1 }
},
{
$limit: 3
}
])
This was the pipeline I used for it.
//edit
Tried the method suggested by Joe
Transaction.aggregate([
// Join the Transaction collection with the Category collection
{
$lookup: {
from: 'Category',
localField: 'categoryID',
foreignField: '_id',
as: 'category',
},
},
// Unwind the category array to separate documents
{
$unwind: '$category',
},
// Filter for transactions where the category type is "Expense"
{
$match: {
'category.type': 'Expense',
},
},
// Group transactions by category type and calculate the percentage
{
$group: {
_id: '$category.type',
total: { $sum: '$amount' },
count: { $sum: 1 },
},
},
{
$project: {
_id: 0,
category: '$_id',
percentage: {
$multiply: [{ $divide: ['$count', { $sum: '$count' }] }, 100],
},
},
},
// Sort the categories by percentage in descending order
{
$sort: { percentage: -1 },
},
// Limit the result to top 3 categories
{
$limit: 3,
},
// group the rest of the categories as others
{
$group: {
_id: null,
top3: { $push: '$$ROOT' },
others: { $sum: { $subtract: [100, { $sum: '$top3.percentage' }] } },
},
},
{
$project: {
top3: 1,
others: { category: 'Others', percentage: '$others' },
},
},
]);
I am getting an empty array rather than the values. I have data in the collections with the correct ID's. What might be the issue?
//Answer
This aggregation worked for me
Transaction.aggregate([
{
$match: {
userID: { $eq: UserID },
type: 'Expense',
},
},
{
$addFields: { categoryID: { $toObjectId: '$categoryID' } },
},
{
$lookup: {
from: 'categories',
localField: 'categoryID',
foreignField: '_id',
as: 'category_info',
},
},
{
$unwind: '$category_info',
},
{
$group: {
_id: '$category_info.name',
amount: { $sum: '$amount' },
},
},
{
$sort: {
amount: -1,
},
},
{
$group: {
_id: null,
total: { $sum: '$amount' },
data: { $push: '$$ROOT' },
},
},
{
$project: {
results: {
$map: {
input: {
$slice: ['$data', 3],
},
in: {
category: '$$this._id',
percentage: {
$round: {
$multiply: [{ $divide: ['$$this.amount', '$total'] }, 100],
},
},
},
},
},
others: {
$cond: {
if: { $gt: [{ $size: '$data' }, 3] },
then: {
amount: {
$subtract: [
'$total',
{
$sum: {
$slice: ['$data.amount', 3],
},
},
],
},
percentage: {
$round: {
$multiply: [
{
$divide: [
{
$subtract: [
'$total',
{ $sum: { $slice: ['$data.amount', 3] } },
],
},
'$total',
],
},
100,
],
},
},
},
else: {
amount: null,
percentage: null,
},
},
},
},
},
]);
How to display "hardest category" based on in which "study" size of notLearnedWords was the highest. MongoDB Aggregation
I have these 3 models:
Study
WordSet
Category
Study model has reference into WordSet, then WordSet has reference into Category.
And based on Studies i'm displaying statistics.
How i can display "The hardest category" based on size of "notLearnedWords" was the highest?
I don't know on which place i should start with that querying.
For now i display "hardestCategory" as element that is most used.
I think that condition would look something like this:
{ $max: { $size: '$notLearnedWords' } } // size of the study with most notLearnedWords
I would achieve a response like this:
"stats": [
{
"_id": null,
"numberOfStudies": 4,
"averageStudyTime": 82.5,
"allStudyTime": 330,
"longestStudy": 120,
"allLearnedWords": 8
"hardestCategory": "Work" // only this field is missing
}
]
I've tried to do it like this:
const stats = await Study.aggregate([
{ $match: { user: new ObjectID(currentUserId) } },
{
$lookup: {
from: 'users',
localField: 'user',
foreignField: '_id',
as: 'currentUser',
},
},
{
$lookup: {
from: 'wordsets',
let: { wordSetId: '$learnedWordSet' },
pipeline: [
{ $match: { $expr: { $eq: ['$_id', '$$wordSetId'] } } },
{
$project: {
_id: 0,
category: 1,
},
},
{ $unwind: '$category' },
{
$group: {
_id: '$category',
count: { $sum: 1 },
},
},
{ $sort: { count: -1 } },
{ $limit: 1 },
{
$lookup: {
from: 'categories',
localField: '_id',
foreignField: '_id',
as: 'category',
},
},
{
$project: {
_id: 0,
category: { $arrayElemAt: ['$category.name', 0] },
},
},
],
as: 'wordSet',
},
},
{
$group: {
_id: null,
numberOfStudies: { $sum: 1 },
averageStudyTime: { $avg: '$studyTime' },
allStudyTime: { $sum: '$studyTime' },
longestStudy: { $max: '$studyTime' },
allLearnedWords: {
$sum: { $size: '$learnedWords' },
},
hardestCategory: {
$first: {
$first: '$wordSet.category',
},
},
studyWithMostNotLearnedWords: { $max: { $size: '$notLearnedWords' } },
},
},
]);
Study
const studySchema = new mongoose.Schema({
name: {
type: String,
},
studyTime: {
type: Number,
},
learnedWords: [String],
notLearnedWords: [String],
learnedWordSet: {
type: mongoose.Schema.Types.ObjectId,
ref: 'WordSet',
},
user: {
type: mongoose.Schema.Types.ObjectId,
ref: 'User',
},
});
WordSet
const wordSetSchema = new mongoose.Schema({
name: {
type: String,
},
category: {
type: [
{
type: mongoose.Schema.Types.ObjectId,
ref: 'Category',
required: true,
},
],
},
});
Category
const categorySchema = new mongoose.Schema({
name: {
type: String,
},
});
I'm trying to populate my user document with his talents and talent media. But I'm getting repeated objects. I want to get talent as an object and the medias against that talent inside an array.
my user model:
{
_id: '5f1acd6e6985114f2c1567ea',
name: 'test user',
email: 'test#email.com'
}
talent model
{
_id: '5f1acd6e6985114f2c1567fa',
categoryId: '5f1acd6e6985114f2c1567ga',
userId: '5f1acd6e6985114f2c1567ea'
level: '5',
}
talent-media model
{
_id: 5f1acd6e6985114f2c156710',
talentId: '5f1acd6e6985114f2c1567fa',
media: 'file.jpg',
fileType: 'image'
}
I have another model for storing the category
{
_id: '5f1acd6e6985114f2c1567ga',
title: 'java'
}
I want the result as follows
user: {
_id: '',
name: 'test user',
email: 'test#email.com',
talents: [
{
_id: '',
level: '5',
categoryId: {
_id: '',
title: 'java'
},
medias: [
{
_id: '',
file: 'file1.jpg',
fileType: 'image'
},
{
_id: '',
file: 'file2.jpg',
fileType: 'image'
},
]
}
]
}
And I also tried adding talent-medias embedded in talent documents. But in MongoDB document found is not mostly recommended.
Is it better to have talent model like this,
{
_id: '5f1acd6e6985114f2c1567fa',
categoryId: '5f1acd6e6985114f2c1567ga',
userId: '5f1acd6e6985114f2c1567ea'
level: '5',
medias: [
{
_id: '',
file: 'file1.jpg',
fileType: 'image'
},
{
_id: '',
file: 'file2.jpg',
fileType: 'image'
},
]
}
You can achieve this by using $lookup multiple times:
db.users.aggregate([
{
$lookup: {
from: "talents",
let: {
userId: "$_id"
},
pipeline: [
{
$match: {
$expr: {
$eq: [
"$$userId",
"$userId"
]
}
}
},
{
$lookup: {
from: "categories",
let: {
categoryId: "$categoryId"
},
pipeline: [
{
$match: {
$expr: {
$eq: [
"$$categoryId",
"$_id"
]
}
}
},
{
$project: {
_id: "",
title: 1
}
}
],
as: "categoryId"
}
},
{
$lookup: {
from: "talent_media",
let: {
talentId: "$_id"
},
pipeline: [
{
$match: {
$expr: {
$eq: [
"$$talentId",
"$talentId"
]
}
}
},
{
$project: {
_id: "",
file: "$media",
fileType: "$fileType"
}
}
],
as: "medias"
}
},
{
$unwind: {
path: "$categoryId",
preserveNullAndEmptyArrays: true
}
},
{
$project: {
_id: "",
categoryId: 1,
level: 1,
medias: 1
}
}
],
as: "talents"
}
},
{
$project: {
_id: "",
email: 1,
name: 1,
talents: 1
}
}
])
*You'll probably have to adapt the collection names.
MongoPlayground
You have to use nested $lookup,
lookup with Talent Collection and match userId
$match for specific user _id document, commented here
db.User.aggregate([
/** add here
Optional for single user
{
$match: {
_id: "XXXXXXXXXXXX"
}
},
*/
{
$lookup: {
from: "Talent",
as: "talents",
let: {
userId: "$_id"
},
pipeline: [
{
$match: {
$expr: {
$eq: [
"$$userId",
"$userId"
]
}
}
},
we are now inside Talent Document and lookup with Category Collection and match with categoryId
$unwind category because it will return array and we need object
{
$lookup: {
from: "Category",
as: "categoryId",
localField: "categoryId",
foreignField: "_id"
}
},
{
$unwind: {
path: "$categoryId",
preserveNullAndEmptyArrays: true
}
},
again we are inside Talent Document and lookup with Talent Media Collection and match with talentId
$project for add/remove not needed fields, here removed talentID from TalentMedia nad removed userId from Talent
{
$lookup: {
from: "TalentMedia",
as: "medias",
let: {
talentId: "$_id"
},
pipeline: [
{
$match: {
$expr: {
$eq: [
"$$talentId",
"$talentId"
]
}
}
},
{
$project: {
talentId: 0
}
}
]
}
},
{
$project: {
userId: 0
}
}
]
}
}
])
Separated query in parts for explanation, You can combine as they are in sequence.
Working Playground: https://mongoplayground.net/p/poIbvFUMvT4
I'm working on a project where a user can place bets about a match and then earns points if he has bet for the winning team.
I'm building a leaderboard where I need to choose the 50 best players on the platform (those who have the most points).
Processing points dynamically compelled me to look at the aggregate method in order to calculate the user points from these Models:
const UserSchema = new mongoose.Schema({
admin: { type: Boolean, default: true },
username: String,
password: String,
pronos: [{ type: mongoose.Schema.Types.ObjectId, ref: 'Prono', default: [] }],
});
const PronoSchema = new mongoose.Schema({
match: { type: mongoose.Schema.Types.ObjectId, ref: 'Match' },
local: Number,
guest: Number,
coeff: Number,
});
const StepSchema = new mongoose.Schema({
matchs: [{ type: mongoose.Schema.Types.ObjectId, ref: 'Match', default: [] }],
name: String,
});
const CompetitionSchema = new mongoose.Schema({
steps: [{ type: mongoose.Schema.Types.ObjectId, ref: 'Step', default: [] }],
start: { type: mongoose.Schema.Types.Date },
name: String,
});
const MatchSchema = new mongoose.Schema({
local: { type: mongoose.Schema.Types.ObjectId, ref: 'Team' },
guest: { type: mongoose.Schema.Types.ObjectId, ref: 'Team' },
localScore: { type: Number, default: -1 },
guestScore: { type: Number, default: -1 },
date: { type: mongoose.Schema.Types.Date },
});
To sum up all these code:
A player places bets, called pronos on Matches that are inside Steps, them being inside a Competition. So every steps of the competition has its matches.
I've been producing this to calculate the points for the players and I would like to know if I'm heading to the right direction:
const users = await User.aggregate([
{ $unwind: '$pronos' },
{
$lookup: {
from: 'pronos',
localField: 'pronos',
foreignField: '_id',
as: 'pronoObjects',
}
},
{ $unwind: '$pronoObjects' },
{
$lookup: {
from: 'matches',
localField: 'pronoObjects.match',
foreignField: '_id',
as: 'matches',
}
},
{ $unwind: '$matches' },
{
$addFields: {
pointsEarned: {
$switch: {
branches: [
{
case: {
$and: [
{ $eq: ['$pronoObjects.local', '$matches.localScore'] },
{ $eq: ['$pronoObjects.guest', '$matches.guestScore'] },
],
},
then: 3,
},
{
case: {
$and: [
{ $lt: [{ $subtract: ['$pronoObjects.local', '$pronoObjects.guest'] }, 0] },
{ $lt: [{ $subtract: ['$matches.localScore', '$matches.guestScore'] }, 0] }
]
},
then: 1,
},
{
case: {
$and: [
{ $gt: [{ $subtract: ['$pronoObjects.local', '$pronoObjects.guest'] }, 0] },
{ $gt: [{ $subtract: ['$matches.localScore', '$matches.guestScore'] }, 0] }
]
},
then: 1,
},
],
default: 0,
}
}
}
},
{
$group: {
_id: '$_id',
points: { $sum: '$pointsEarned' }
}
},
{
$lookup: {
from: 'users',
localField: '_id',
foreignField: '_id',
as: 'user',
}
}
]);
Since it's working the way I want, I planned to build up a ranking by Competition, where a user can select a competition ID and see the ranking for it.
When trying to achieve that, I've been using so many unwind methods that my response was 10k lines long before grouping. Thus I would like to know if anyone can hint me about the right way to achieve this.
I'm not looking for the complete answer, I'm new to mongo and would like to know about good practices or learn new aggregation methods.
Thanks in advance.
EDIT:
I managed to get the points for every user with this aggregation. The problem is that it takes more than 10 seconds to run over 400 matches with only 2 users.
Am I missing something ?
Here is the request I use:
return await Competition.aggregate([
{
$match: {
$expr: {
$eq: ['$_id', { $toObjectId: compId }],
}
}
},
{ $unwind: '$steps' },
{
$lookup: {
from: 'steps',
as: 'stepsObject',
localField: 'steps',
foreignField: '_id',
}
},
{ $unwind: '$stepsObject' },
{ $unwind: '$stepsObject.matchs' },
{
$project: {
_id: 1,
stepsObject: 1,
}
},
{
$lookup: {
from: 'matches',
as: 'matchesObject',
let: { otherid: '$stepsObject.matchs' },
pipeline: [
{
$match: {
$expr: {
$and: [
{ $eq: ['$$otherid', '$_id'] },
{ $ne: ['$localScore', -1] },
]
}
}
}
],
},
},
{ $unwind: '$matchesObject' },
{
$lookup: {
from: 'users',
as: 'users',
pipeline: [
{
$project: {
_id: 1,
pronos: 1,
}
}
],
}
},
{ $unwind: '$users' },
{ $unwind: '$users.pronos' },
{
$lookup: {
from: 'pronos',
as: 'pronosObject',
let: { matchid: '$matchesObject._id', knownpronos: '$users.pronos' },
pipeline: [
{
$match: {
$expr:
{
$and: [
{ $eq: ['$$matchid', '$match'], },
{ $eq: ['$$knownpronos', '$_id'] },
]
}
}
}
]
// localField: 'users.pronos',
// foreignField: '_id',
}
},
{ $unwind: '$pronosObject' },
{
$addFields: {
pointsEarned: {
$switch: {
branches: [
{
case: {
$and: [
{ $eq: ['$pronosObject.local', '$matchesObject.localScore'] },
{ $eq: ['$pronosObject.guest', '$matchesObject.guestScore'] },
],
},
then: { $multiply: [3, '$pronosObject.coeff'] },
},
{
case: {
$and: [
{ $lt: [{ $subtract: ['$pronosObject.local', '$pronosObject.guest'] }, 0] },
{ $lt: [{ $subtract: ['$matchesObject.localScore', '$matchesObject.guestScore'] }, 0] }
]
},
then: { $multiply: [1, '$pronosObject.coeff'] },
},
{
case: {
$and: [
{ $gt: [{ $subtract: ['$pronosObject.local', '$pronosObject.guest'] }, 0] },
{ $gt: [{ $subtract: ['$matchesObject.localScore', '$matchesObject.guestScore'] }, 0] }
]
},
then: { $multiply: [1, '$pronosObject.coeff'] },
},
],
default: 0,
}
}
}
},
{
$group: {
_id: '$users._id',
pointsEarned: { $sum: '$pointsEarned' },
}
}
]);
EDIT 2:
I rewrote the entire pipeline with a different approach much faster. I still have a tiny problem. If I get rid of the final group, the request only take 4 milliseconds to run, but the final group to group points by id takes it up to 550ms. I don't know how can I optimize this since it's the final addition of all points for every user.
Here is what I came with:
return await Competition.aggregate([
{
$match: {
$expr: {
$eq: ['$_id', { $toObjectId: compId }],
}
}
},
{ $unwind: '$steps' },
{
$lookup: {
from: 'steps',
as: 'stepsObject',
localField: 'steps',
foreignField: '_id',
}
},
{ $unwind: '$stepsObject' },
{
$project: {
_id: 1,
stepsObject: 1,
}
},
{
$lookup: {
from: 'matches',
as: 'matchesObject',
let: { otherid: '$stepsObject.matchs' },
pipeline: [
{
$match: {
$expr: {
$and: [
{ $in: ['$_id', '$$otherid'] },
{ $ne: ['$localScore', -1] },
]
}
}
}
],
},
},
{
$lookup: {
from: 'users',
as: 'users',
pipeline: [
{
$project: {
_id: 1,
pronos: 1,
}
}
],
}
},
{ $unwind: '$users' },
{ $unwind: '$users.pronos' },
{
$lookup: {
from: 'pronos',
as: 'pronosObject',
localField: 'users.pronos',
foreignField: '_id',
}
},
{ $unwind: '$pronosObject' },
{
$project: {
user_id: '$users._id',
pronosObject: 1,
matchesObjects: {
$arrayElemAt: [
{
$filter: {
input: '$matchesObject',
as: 'matchesObjects',
cond: { $eq: ['$$matchesObjects._id', '$pronosObject.match'] }
}
}, 0
]
},
}
},
{
$addFields: {
pointsEarned: {
$switch: {
branches: [
{
case: {
$and: [
{ $eq: ['$pronosObject.local', '$matchesObjects.localScore'] },
{ $eq: ['$pronosObject.guest', '$matchesObjects.guestScore'] },
],
},
then: { $multiply: [3, '$pronosObject.coeff'] },
},
{
case: {
$and: [
{ $lt: [{ $subtract: ['$pronosObject.local', '$pronosObject.guest'] }, 0] },
{ $lt: [{ $subtract: ['$matchesObjects.localScore', '$matchesObjects.guestScore'] }, 0] }
]
},
then: { $multiply: [1, '$pronosObject.coeff'] },
},
{
case: {
$and: [
{ $gt: [{ $subtract: ['$pronosObject.local', '$pronosObject.guest'] }, 0] },
{ $gt: [{ $subtract: ['$matchesObjects.localScore', '$matchesObjects.guestScore'] }, 0] }
]
},
then: { $multiply: [1, '$pronosObject.coeff'] },
},
],
default: 0,
}
}
}
},
{
$group: { // This is causing me trouble
_id: '$user_id',
pointsEarned: { $sum: '$pointsEarned' },
}
}
]);
I am working on chat and I want to get conversations order by created date and messages order by created in a single array of object. This query gets conversation with messages of limit 10 order by date.
db.models.conversations.aggregate([
{ $match: { 'participants': Number(uid) } },
{ $sort: { 'created': -1 } },
{ $skip: Number(skip) },
{ $limit: Number(limit) },
{
$lookup: {
localField: '_id',
foreignField: 'conversation_id',
from: 'message_details',
as: 'messages'
}
},
{ '$unwind': '$messages' },
{ $match: { 'messages.receiver_uid': Number(uid) } },
{ $match: { 'messages.status': 1 } },
{ $sort: { 'messages.created': 1 } },
{
$lookup: {
localField: 'messages.message_id',
foreignField: '_id',
from: 'messages',
as: 'messages.content'
}
},
{ '$unwind': '$messages.content' },
{ $sort: { 'messages.content.created': -1 } },
{ '$addFields': { 'messages.created': '$messages.content.created' } },
{
$group: {
_id: '$_id',
participants: { $first: '$participants' },
created: { $first: '$created' },
messages: { $push: '$messages' }
}
},
{
$project: {
_id: '$_id',
participants: '$participants',
messages: {
$slice: ['$messages', Number(messages_limit)]
},
updated: { $arrayElemAt: ['$messages.created', 0] }
}
},
{ $sort: { 'updated': -1 } }
]);
this query returns conversation in right order but messages in wrong order of date