Mongodb aggregate distinct with unique and sort - node.js

I have a Ranks collection with documents which looks like this:
[
{
"_id": "1",
"url": "ex1.com",
"keyword": "k1",
"rank": 19,
"createdAt": "2021-06-02",
"user": "616c542660d23fc17469b47e"
},
{
"_id": "2",
"url": "ex1.com",
"keyword": "k1",
"rank": 14,
"createdAt": "2021-06-01",
"user": "616c542660d23fc17469b47e"
},
{
"_id": "3",
"url": "ex1.com",
"keyword": "k2",
"rank": 8,
"createdAt": "2021-05-01",
"user": "616c542660d23fc17469b47e"
},
{
"_id": "4",
"url": "ex2.com",
"keyword": "k3",
"rank": 4,
"createdAt": "2021-05-01",
"user": "616c542660d23fc17469b47e"
}
]
users collection with documents which looks like this:
[
{
_id: "616c542660d23fc17469b47e",
email: "some#email.com"
}
]
I'm trying to run an aggregation which will return each user object + user's data array that grouped by url, each url object has keywords array that includes unique and last (by date) rank keyword
This is what I tried but the query returns all url's keywords, how can i make it return unique and last (by createdAt date) keywords
Rank.aggregate([
{
$match: {}
},
{
$lookup: {
from: 'users',
localField: 'user',
foreignField: '_id',
as: 'user'
}
},
{
$project: {
user: {
$arrayElemAt: ['$user', 0]
},
url: '$url',
keyword: '$keyword',
rank: '$rank',
createdAt: '$createdAt',
}
},
{
$sort: {
createdAt: -1
}
},
{
$group: {
_id: '$user._id',
user: {
$first: '$user'
},
data: {
$push: {
id: '$_id',
url: '$url',
keyword: '$keyword',
rank: '$rank',
createdAt: '$createdAt',
}
}
}
}
])
Expected output:
[{
user: {
_id: "616c542660d23fc17469b47e",
email: "some#email.com"
},
data: [
{
url: "ex1.com",
keywords: [
{
keyword: "k1",
rank: 19,
createdAt: "2021-06-02",
},
{
keyword: "k2",
rank: 8,
createdAt: "2021-05-01"
},
]
},
{
url: "ex2.com",
keywords: [
{
keyword: "k3",
rank: 4,
createdAt: "2021-05-01"
},
]
}
]
}]

Here it is the solution that I came out with. Playground
Full explanation:
We group by "$url","$user" and "$keyword" to get the unique combinations of this fields. AT this point waht we want is only the unique keywords, but we have to use the user and url fields, becouse we would groupBy those later too.Because we order them by createdAt, if we get the first document it will be the last one created.
{
"$sort": {
"createdAt": 1
}
},
{
"$group": {
"_id": [
"$url",
"$user",
"$keyword"
],
"keywords": {
$first: "$$ROOT"
}
}
},
Then we will format this keyword information a bit to group it by url. This step will give us the keywords per URL.
{
"$project": {
"url": "$keywords.url",
"user": "$keywords.user",
"keywords": "$keywords",
"_id": 0
}
},
{
"$group": {
"_id": [
"$user",
"$url"
],
"data": {
$push: "$$ROOT"
}
}
},
Finally we will group the URLs by user. Notice that we have grouped by URL and by user in each groupBy in order to not lose those fields.
{
"$project": {
"url": {
$first: "$data.keywords.url"
},
"user": {
$first: "$data.keywords.user"
},
"keywords": "$data.keywords",
"_id": 0
}
},
{
"$group": {
"_id": "$user",
"data": {
$push: "$$ROOT"
}
}
},
At this step we have almost all the information we needed grouped together. We would perform a lookUp to get the email from the Users collection and do the final mapping to remove some redundant data.
{
$lookup: {
from: "users",
localField: "_id",
foreignField: "_id",
as: "user"
}
},
{
"$unwind": "$user"
},
{
"$project": {
"_id": 0,
"data.user": 0,
"data.keywords._id": 0,
"data.keywords.url": 0,
"data.keywords.user": 0
}
},

Related

Aggregation: Return documents based on fields in a subdocument

I’m using an aggregation to return data via a lookup to build the links between documents.
At the moment, the linking is working when User A creates links between their own assets to navigate.
But if User A is viewing an asset that’s been shared with them by User B and navigates to one that has a link to an asset that hasn’t been shared with them, those are the documents I need to exclude from the results.
So, I need the documents for assets that have a document in attributes that contains my userId, or — as in the $match — the $_id of an attribute that's in the attributes array in assets. When an asset is shared with someone, a document in attributes is created.
The data for a Link is:
{
"_id": {
"$oid": "63769c377615fe4cdb4995a6"
},
"userId": "620920aa9ddac2074a50472f",
"toAsset": {
"$oid": "63769c117615fe4cdb499515"
},
"fromAsset": {
"$oid": "63769c067615fe4cdb4994d9"
},
"comment": "<p>Linking of Note 0001 to Note 0002.</p>",
"createdAt": {
"$date": {
"$numberLong": "1668717623761"
}
},
"updatedAt": {
"$date": {
"$numberLong": "1668717623761"
}
},
"isEmbedded": false,
"isActive": true,
"__v": 0
}
The data for an Asset, as in toAsset and fromAsset, is:
{
"_id": {
"$oid": "6377a8d834671794449f0dca"
},
"userId": "636b73f31527830f7bd7a47e",
"folderId": "636b73f31527830f7bd7a482",
"title": "Note that hasn't been shared",
"note": "<p>Here's a Note that hasn't been shared.</p>",
"typeOfAsset": "note",
"createdAt": {
"$date": {
"$numberLong": "1668786392389"
}
},
"updatedAt": {
"$date": {
"$numberLong": "1668786392389"
}
},
"isActive": 3,
"meta": [...],
"preferences": [...],
"sequence": 1,
"tags": [],
"attributes": [
{
"$oid": "6377a8d834671794449f0dc8"
}
],
"__v": 0
}
I’m using attributes to manage what assets have been shared with whom, and the data is:
{
"_id": {
"$oid": "6377a8d834671794449f0dc8"
},
"userId": "636b73f31527830f7bd7a47e",
"numberOfViews": 2,
"isFavourite": false,
"isToRead": false,
"typeOfAccess": "isOwner",
"sharing": {
"typeOfShare": "withUsers",
"sharedWith": [],
"segementsForUrl": []
},
"__v": 0
}
Now, the task here is to somehow how return the assets that have been shared, but after a bunch of different attempts (as per the code that’s been commented out), I’ve so far failed.
The code is:
const match = {
$match: {
[args.directionOfLink]: new mongoose.Types.ObjectId(args.assetId)
}
}
const project = {
$project: {
_id: 0,
id: '$_id',
userId: 1,
[directionOfLink]: 1,
comment: 1,
createdAt: 1,
updatedAt: 1,
isActive: 1,
score: {
$meta: 'searchScore'
}
}
}
const lookup = {
$lookup: {
from: 'assets',
localField: directionOfLink,
foreignField: '_id',
let: { attributesInAsset: '$attributes' },
pipeline: [
{
$lookup: {
from: 'attributes',
as: 'attributes',
pipeline: [{
$match: {
$expr: {
$in: [ '$_id', '$$attributesInAsset' ]
// $and: [
// { $eq: [ '$userId', context.body.variables.userId ] },
// { $in: [ '$typeOfAccess', ['isOwner', 'asAuthor', 'asReader'] ] },
// ]
}
}
}]
}
},
{
$project: {
_id: 1,
userId: 1,
folderId: 1,
title: 1,
typeOfAsset: 1,
attributes: 1,
createdAt: 1,
updatedAt: 1,
isActive: 1
}
}
],
as: directionOfLink
}
}
Here, directionOfLink is either "toAsset" or "fromAsset".
Any thoughts would be appreciated.
As a non expert in MongoDB, it's possible this isn't the most performant approach, but at least it works:
const lookup = {
$lookup: {
from: 'assets',
localField: directionOfLink,
foreignField: '_id',
as: directionOfLink,
pipeline: [
{
$lookup: {
from: 'assets_attributes',
as: 'attributesInAssets',
pipeline: [
{
$match: {
$expr: {
$and: [
{ $eq: [ '$userId', context.body.variables.userId ] },
{ $in: [ '$typeOfAccess', ['isOwner', 'asAuthor', 'asReader'] ] },
]
}
}
}
]
}
},
{
$unwind: '$attributesInAssets'
},
{
$match: {
$expr: {
$in: [ '$attributesInAssets._id', '$attributes' ]
}
}
},
{
$group: {
_id: '$_id',
userId: { $first: '$userId' },
folderId: { $first: '$folderId' },
title: { $first: '$title' },
typeOfAsset: { $first: '$typeOfAsset' },
createdAt: { $first: '$createdAt' },
updatedAt: { $first: '$updatedAt' },
isActive: { $first: '$isActive' },
attributes: { $first: '$attributes' },
attributesInAssets: {
$push: '$attributesInAssets._id'
}
}
},
{
$project: {
_id: 1,
userId: 1,
folderId: 1,
title: 1,
typeOfAsset: 1,
attributes: 1,
attributesInAssets: 1,
createdAt: 1,
updatedAt: 1,
isActive: 1
}
}
]
}
}
const redact = {
$redact: {
$cond: {
if: {
$gt: [ {
$size: `$${directionOfLink}`
}, 0 ]
},
then: '$$KEEP',
else: '$$PRUNE'
}
}
}

MongoDB $lookup don't replace all of the other objects

I am trying to use the $lookup method to find users for this object. But when I use it, it replaces the other objects. The data that was outputed is this
"notifications": {
"author": [
{
"username": "UnusualAbsurd",
"status": "Michael Ohare",
"createdAt": "2022-03-08T14:02:53.728Z",
"id": "1",
"avatar": "https://avatars.dicebear.com/api/avataaars/UnusualAbsurd.png"
}
]
}
But what I expected was this
"notifications": [
"author": [
{
"username": "UnusualAbsurd",
"status": "Michael Ohare",
"createdAt": "2022-03-08T14:02:53.728Z",
"id": "1",
"avatar": "https://avatars.dicebear.com/api/avataaars/UnusualAbsurd.png"
}
],
"type": "REQUEST",
"value": "1"
]
How do I make it output the expected version?
This is my code right now
const data = await this.notificatioModel.aggregate([
{
$match: { author: id },
},
{
$project: { _id: 0, __v: 0 },
},
{
$lookup: {
from: 'users',
localField: 'notifications.author',
foreignField: 'id',
as: 'notifications.author',
pipeline: [
{
$project: { _id: 0, email: 0, password: 0 },
},
],
},
},
]);
This is my notification document
This is my user document

Percentage of amount in a subdocument grouped per type in Mongoose/NodeJS

I have the following MongoDB schema:
const userSchema = new mongoose.Schema({
email: {
type: String,
required: [true, 'Email is required.']
},
transactions: [
{
categoryName: {
type: String,
required: [true, 'Category name in transaction is required.']
},
categoryType: {
type: String,
required: [true, 'Category type in transaction is required.']
},
amount: {
type: Number,
required: [true, 'Transaction amount is required.']
}
}
]})
transactions.categoryType can only be Income or Expense. Now per queried _id, I want to return the ratio/percentage of transactions.CategoryName per Income and Expense. Meaning if I have the following data:
{
"_id": 000001,
"email": "asdasd#email.com"
"transactions": [
{
"categoryName": "Food",
"categoryType": "Expense",
"amount": 200
},
{
"categoryName": "Rent",
"categoryType": "Expense",
"amount": 1000
},
{
"categoryName": "Salary",
"categoryType": "Income",
"amount": 15000
}
]
}
the result that I would want is:
{ "email": "asdasd#email.com",
"Income": [["Salary", 100]],
"Expense": [["Food", 16.67],["Rent",83.33]],
}
Now, I have the following query:
return User.aggregate([
{ $match: { _id : ObjectId(request.params.id) } },
{ $unwind : "$transactions"},
{ $group : { _id : { type: "$transactions.categoryType" },
        total: {$sum : "$transactions.amount"},
transactionsArray: { $push: "$transactions"}
        }
},
{ $project: {
_id: 0,
transactionsArray:1,
    type: "$_id.type",
total:1
}
}
])
which returns a data like this:
[
{
"total": 1200,
"transactions": [
{
"categoryName": "Food",
"categoryType": "Expense",
"amount": 200,
},
{
"categoryName": "Rent",
"categoryType": "Expense",
"amount": 1000,
}
],
"type": "Expense"
},
{
"total": 15000,
"transactions": [
{
"categoryName": "Salary",
"categoryType": "Income",
"amount": 15000,
}
],
"type": "Income"
}
]
Now, I do not know how am I going to further process the result set to divide the transactions.amount by the total to get the result that I want.
You may go with multiple steps in aggregations
$unwind to deconstruct the array
$group- first group to group by _id and $categoryType. So we can get the total amount and an amount for particular transaction. This helps to calculate the ratio.
$map helps to loop over the array and calculate the ratio
$reduce- You need comma separated string array of objects. So loop it and get the structure.
$group to group by _id only so we can get the key value pair of category type and Income/Expense when we push
$replaceRoot to make the $grp object as root which should be merged with already existing fields ($mergeObjects)
$project for remove unwanted fields
Here is the code
db.collection.aggregate([
{ "$unwind": "$transactions" },
{
"$group": {
"_id": { id: "$_id", catType: "$transactions.categoryType" },
"email": { "$first": "$email" },
"amount": { "$sum": "$transactions.amount" },
"category": {
$push: { k: "$transactions.categoryName", v: "$transactions.amount" }
}
}
},
{
$addFields: {
category: {
$map: {
input: "$category",
in: {
k: "$$this.k",
v: {
"$multiply": [
{ "$divide": [ "$$this.v","$amount" ]},
100
]
}
}
}
}
}
},
{
"$addFields": {
category: {
"$reduce": {
"input": "$category",
"initialValue": [],
"in": {
"$concatArrays": [
[
[ "$$this.k", { $toString: "$$this.v" } ]
],
"$$value"
]
}
}
}
}
},
{
"$group": {
"_id": "$_id.id",
"email": { "$first": "$email" },
"grp": { "$push": { k: "$_id.catType", v: "$category" } }
}
},
{
"$replaceRoot": {
"newRoot": {
"$mergeObjects": [ { "$arrayToObject": "$grp" }, "$$ROOT" ]
}
}
},
{ "$project": { grp: 0 } }
])
Working Mongo playground

Mongodb, mongoose - Sorting by _id and date using aggregate and group

I am trying to sort by the task._id & date in desc order. I am able to sort by task._id but sortibg bydate doesnt work, I tried changing the order in aggregate still no luck. I get the response but just the order by usertasks were added in collection and not by the usertask.date
User(name, address, etc)
Task(name, icon, assignee)
UserTask(User.ObjectId, Task.ObjectId, date)
User Collection:
{
"users": [
{
"name": "Bill",
"phone": "345"
},
{
"name": "Steve",
"phone": "123"
},
{
"name": "Elon",
"phone": "567"
}
]
}
Task collection:
{
"tasks": [
{
"name": "Run 100m",
"icon": "run",
"assignee": "Elon"
},
{
"name": "Walk 1 hour",
"icon": "walk",
"assignee": "Bill"
},
{
"name": "Jog 30 minutes",
"icon": "jog",
"assignee": "Steve"
}
]
}
UserTasks:
{
"_id": "5e72fec..",
"user": "5e72fa4..",
"task": "5e72fbac..",
"date": "2020-03-03T05:10:10.000Z",
"createdAt": "2020-03-19T05:10:37.027Z",
"updatedAt": "2020-03-19T05:10:37.027Z",
"__v": 0
},
{
"_id": "5e72fed3..",
"user": "5e72fa4e..",
"task": "5e72fbac..",
"date": "2020-03-12T05:10:10.000Z",
"createdAt": "2020-03-19T05:10:43.296Z",
"updatedAt": "2020-03-19T05:10:43.296Z",
"__v": 0
},
{
"_id": "5e72fed6..",
"user": "5e72fa..",
"task": "5e72fb..",
"date": "2020-03-15T05:10:10.000Z",
"createdAt": "2020-03-19T05:10:46.057Z",
"updatedAt": "2020-03-19T05:10:46.057Z",
"__v": 0
},
{
"_id": "5e72feda...",
"user": "5e72fa4..",
"task": "5e72fb..",
"date": "2020-03-07T05:10:10.000Z",
"createdAt": "2020-03-19T05:10:50.785Z",
"updatedAt": "2020-03-19T05:10:50.785Z",
"__v": 0
}
This is the Aggregate that needs changing
UserTask.aggregate([
{
$lookup: {
from: "tasks",
localField: "task",
foreignField: "_id",
as: "matchedTask"
}
},
{
$unwind: "$matchedTask"
},
{
$lookup: {
from: "users",
localField: "user",
foreignField: "_id",
as: "matchedUser"
}
},
{
$unwind: "$matchedUser"
},
{
$group: {
_id: "$matchedTask._id",
name: {$first: "$matchedTask.name"},
icon: {$first: "$matchedTask.icon"},
assignee: { $first: "$matchedTask.assignee" },
userdata: {
$push: {
name: "$matchedUser.name",
date: "$date"
}
}
}
},
{
$sort: { _id: 1, "userdata.date": -1 }
}
])
.exec()
.then(doc => res.status(200).json(doc))
.catch(err => res.status(400).json("Error: " + err));
The response is shown below, please note the usertask.date. it is NOT sorted
{
"_id": "5e...",
"name": "Run 100m",
"icon": "run",
"assignee": "Elon",
"userdata": [
{
"name": "Elon",
"date": "2020-03-21T20:02:38.143Z"
},
{
"name": "Bill",
"date": "2020-03-11T20:02:38.000Z"
},
{
"name": "Steve",
"date": "2020-03-19T20:02:38.000Z"
}
]
}
As you can see the it is not sorted by date - desc order. The result should be like shown below
"userdata": [
{
"name": "Elon",
"date": "2020-03-21T20:02:38.143Z"
},
{
"name": "Steve",
"date": "2020-03-19T20:02:38.000Z"
},
{
"name": "Bill",
"date": "2020-03-11T20:02:38.000Z"
}
]
$sort will use the last object which comes out from the aggregate pipe and theres no field "date" in this object:
{
$group: {
_id: "$matchedTask._id",
name: {$first: "$matchedTask.name"},
icon: {$first: "$matchedTask.icon"},
assignee: { $first: "$matchedTask.assignee" },
userdata: {
$push: {
name: "$matchedUser.name",
execDate: "$date"
}
}
}
},
your group has to returned a field named date in order to be able to sort it
{
$group: {
_id: "$matchedTask._id",
name: ....,
date: ....
}
}
{ $sort: {date: -1}}
if the value you want to sort is indide another object you must specify it on sort:
{$sort: {"userdata.date": -1}}
I fixed it, had to use sort two times, now I am able to get the result as I want
Solution provided below
UserTask.aggregate([
{
$lookup: {
from: "tasks",
localField: "task",
foreignField: "_id",
as: "matchedTask"
}
},
{
$unwind: "$matchedTask"
},
{
$lookup: {
from: "users",
localField: "user",
foreignField: "_id",
as: "matchedUser"
}
},
{
$unwind: "$matchedUser"
},
{
$sort: { date: -1 }
},
{
$group: {
_id: "$matchedTask._id",
name: { $first: "$matchedTask.name" },
icon: { $first: "$matchedTask.icon" },
assignee: { $first: "$matchedTask.assignee" },
userdata: {
$push: {
name: "$matchedUser.name",
execDate: "$date"
}
}
}
},
{
$sort: { _id: 1 }
}
])
.exec()
.then(doc => res.status(200).json(doc))
.catch(err => res.status(400).json("Error: " + err));

Mongoose Aggregate with Lookup

I have a simple two collections like below :
assignments:
[
{
"_id": "593eff62630a1c35781fa325",
"topic_id": 301,
"user_id": "59385ef6d2d80c00d9bdef97"
},
{
"_id": "593eff62630a1c35781fa326",
"topic_id": 301,
"user_id": "59385ef6d2d80c00d9bdef97"
}
]
and users collection:
[
{
"_id": "59385ef6d2d80c00d9bdef97",
"name": "XX"
},
{
"_id": "59385b547e8918009444a3ac",
"name": "YY"
}
]
and my intent is, an aggregate query by user_id on assignment collection, and also I would like to include user.name in that group collection. I tried below:
Assignment.aggregate([{
$match: {
"topic_id": "301"
}
},
{
$group: {
_id: "$user_id",
count: {
$sum: 1
}
}
},
{
$lookup: {
"from": "kullanicilar",
"localField": "user_id",
"foreignField": "_id",
"as": "user"
}
},
{
$project: {
"user": "$user",
"count": "$count",
"_id": "$_id"
}
},
But the problem is that user array is always blank.
[ { _id: '59385ef6d2d80c00d9bdef97', count: 1000, user: [] } ]
I want something like :
[ { _id: '59385ef6d2d80c00d9bdef97', count: 1000, user: [_id:"59385ef6d2d80c00d9bdef97",name:"XX"] } ]

Resources