Get fields that MongoDB aggregate doesn't match - node.js

I am creating a MERN app that allows users to sign up and save their skills to a database. I am creating an admin panel that allows me to search for users by skill. User's skills will look like this in the database:
skills: [
{skill: 'React', yearsExperience: 3},
{skill: 'HTML', yearsExperience: 5},
{skill: 'JavaScript', yearsExperience: 5},
{skill: 'Git', yearsExperience: 3},
{skill: 'TypeScript', yearsExperience: 1},
{skill: 'C++', yearsExperience: 1}
]
I have an aggregation query set up that allows me to find all users who match a query by atleast 25%. Meaning if I search for someone with React skills, SQL skills, and C++ skills it will pull back anyone matching at least one of those. My question at this point is how can I modify my aggregation query to create a new field on the document that lists what fields did not match on the query? So this field would show Mark as missing React and SQL skills if he only has C++ experience.
Here is my current query that shows matching users and the percentage they match to the query I search for (in this case greater than 3 years SQL experience, and greater than 2 years GIT experience):
await User.aggregate([
{
$addFields: {
matchingSkills: {
$filter: {
input: '$skills',
cond: {
$or: [
{
$and: [
{ $gt: ['$$this.yearsExperience', 3] },
{ $eq: ['$$this.skill', 'SQL'] },
],
},
{
$and: [
{ $gt: [ "$$this.yearsExperience", 2] },
{ $eq: [ "$$this.skill", "Git"] }
]
}
],
},
},
},
},
},
{
$addFields: {
matchingSkills: '$$REMOVE',
percentageMatch: {
$multiply: [
{ $divide: [{ $size: '$matchingSkills' }, skillSearch.length] },
100,
],
},
},
},
{
$match: { percentageMatch: { $gte: 25 } },
},
]);

To get a list of missing skills,
Add field requiredSkills which will be an array of just the searched skills names.
let requiredSkillsStage = {
$addFields: {
matchingSkills: {
$filter: {
input: "$skills",
cond: {
$or: [{
$and: [{
$gt: ["$$this.yearsExperience", 3]
},
{
$eq: ["$$this.skill", "SQL"]
}
]
},
{
$and: [{
$gt: ["$$this.yearsExperience", 2]
},
{
$eq: ["$$this.skill", "Git"]
}
]
}
]
}
}
},
requiredSkills: ["SQL", "Git"]
}
}
Use $map to get matched skills' names.
let matchingSkillsNamesStage = {
$addFields: {
"matchingSkillsNames": {
$map: {
input: "$matchingSkills",
as: "matchingSkill",
in: "$$matchingSkill.skill"
}
}
}
}
Filter-out matched skills from requiredSkills
let missingSkillsStage = {
$addFields: {
"missingSkills": {
$filter: {
input: "$requiredSkills",
cond: {
$not: {
$in: [
"$$this",
"$matchingSkillsNames"
]
}
}
}
}
}
}
Try it out to see how the output would look like

Related

Mongodb $lookup inside $addfield?

I have a collection named users, and this is how one specific user will look like:
{
_id: 'Object ID',
name: 'String',
cart: [
{
product_id: 'Product object ID',
quantity: 'Number',
},
...
],
}
I want my desired results to look like this:
{
_id: 'Object ID',
name: 'String',
cart: [
{
product_id: 'Product object ID',
quantity: 'Number',
product_details: {
'all the details of the product from Products collection which matches the product_id',
},
},
...
],
}
I tried adding addFields into lookup but it's getting too complicated and doesn't work as desired. What's the best way to aggregate this?
You can achieve this in several different ways, here's what I consider to be the most simple:
db.users.aggregate([
{
"$lookup": {
"from": "products",
let: {
cart: "$cart"
},
pipeline: [
{
$match: {
$expr: {
$in: [
"$_id",
"$$cart.product_id"
]
}
}
},
{
$replaceRoot: {
newRoot: {
"$mergeObjects": [
"$$ROOT",
{
"$arrayElemAt": [
{
$filter: {
input: "$$cart",
cond: {
$eq: [
"$_id",
"$$this.product_id"
]
}
}
},
0
]
}
]
}
}
}
],
"as": "cart"
}
}
])
Mongo Playground

MongoDB query that shows how many fields document matches

I am creating a MERN app that allows users to sign up and save their skills to a database. I am creating an admin panel that allows me to search for users by skill. User's skills will look like this in the database:
skills: [
{skill: 'React', yearsExperience: 3},
{skill: 'HTML', yearsExperience: 5},
{skill: 'JavaScript', yearsExperience: 5},
{skill: 'Git', yearsExperience: 3},
{skill: 'TypeScript', yearsExperience: 1},
{skill: 'C++', yearsExperience: 1}
]
I am using OR queries to query the database to pull back users who match several requirements like this (User's with greater than 3 years experience in C, or users with greater than 3 years experience in SQL):
$or: [
{ skills: { $elemMatch: { skill: 'C', yearsExperience: { $gt: 3 } } } },
{ skills: { $elemMatch: { skill: 'SQL', yearsExperience: { $gt: 3 } } } },
];
My question is, how can I return the users in an array with a property called percentageMatch that shows how many fields the query searched for that they matched? For example, if Mark has both C and SQL experience with greater than 3 years his property will say 100%, but if James has only 3 years or greater of SQL experience, and no C experience his percentageMatching will say 50%.
// Response of match query
[
{
name: 'Mark',
skills: [...],
percentageMatch: 100%
},
{
name: 'James',
skills: [...],
percentageMatch: 50%
}
]
You can do it with aggregation,
$match to get your desired document
$filter to filter the array based on your condition and assign it to matchingSkills using $addFields
then get the percentage using $multiply, $divide.
Here is the code
db.collection.aggregate([
{
"$match": { name: "mark" }
},
{
$addFields: {
matchingSkills: {
$filter: {
input: "$skills",
cond: {
$or: [
{
$and: [
{ $gt: [ "$$this.yearsExperience", 3 ] },
{ $eq: [ "$$this.skill", "HTML" ]}
]
},
{
$and: [
{ $gt: [ "$$this.yearsExperience", 3] },
{ $eq: [ "$$this.skill", "Git"] }
]
}
]
}
}
}
}
},
{
$addFields: {
matchingSkills: "$$REMOVE",
percentageMatch: {
$multiply: [
{ $divide: [ { $size: "$matchingSkills" }, 2 ]}, // yu already know how many values you need to pass, thats' why `2`
100
]
}
}
}
])
Working Mongo playground

search from child collection contains the keyword from parent and child collection

I am facing a problem with getting data from database using parent child relationship collections.
Here is my collection structure --
-post
---post cloth : brand id from brands collections
-----brand
Now I am getting data from post and post cloth with keyword search from post cloth and brand table if any key matches from post cloth and brand. Till post cloth it is working fine along with keyword search in or condition, Now I also need to search from brand and return the result if keyword contains in brand as well.
here are my cases --
data returned : if any of post_cloths keys matches the keyword searched
data returned : if any of the post_cloths keys matches the keyword OR lookup with brand name matches the keyword
data returned : if all keys from post_cloths not matches the keyword but lookup with brand name matches the keyword
data not returned : if no keys from post_cloths matches the keyword and also lookup with brand name not matches the keyword
Here is my code :
var page = 0;
if (req.query.page >= 0) {
page = req.query.page;
}
let filter = { 'totalCloth': { $gte: 1 } };
if (req.query.user != null && req.query.user != '') {
filter.createdBy = ObjectID(req.query.user);
}
console.log(filter);
var searchQuery = [];
var brandSearchQuery = [];
if (req.query.keyword != null && req.query.keyword != '') {
console.log(req.query.keyword);
keyword = req.query.keyword;
searchQuery = [
{
$regexFind: {
input: '$category',
regex: new RegExp(keyword),
options: 'i',
},
},
{
$regexFind: {
input: '$color',
regex: new RegExp(keyword),
options: 'i',
},
},
{
$regexFind: {
input: '$country',
regex: new RegExp(keyword),
options: 'i',
},
},
{
$regexFind: {
input: '$size',
regex: new RegExp(keyword),
options: 'i',
},
},
{
$regexFind: {
input: '$clothMaterial',
regex: new RegExp(keyword),
options: 'i',
},
},
];
brandSearchQuery = [
{
$regexFind: {
input: '$name',
regex: new RegExp(keyword),
options: 'i',
},
},
];
} else {
searchQuery = [{}];
brandSearchQuery = [{}];
}
// get the post details
// PostModel.find(filter).countDocuments().then(countPosts => {
PostModel.aggregate([
{
$lookup: {
from: 'post_cloths',
let: { postId: '$_id' },
pipeline: [
//lookup for brand
{
$lookup: {
from: 'brands',
let: { brandId: '$brandId' },
pipeline: [
{
$match: {
$expr:
{
$and:
[
{ $eq: ['$_id', '$$brandId'] },
{ $or: brandSearchQuery },
],
},
},
},
],
as: 'brand',
},
},
//end of brand lookup
{
$match: {
$expr: {
$and:
[
{ $eq: ['$postId', '$$postId'] },
{
$or: searchQuery,
},
],
},
},
},
{
$project: {
totalBrands: { $size: '$brand' },
},
},
{
$match: {
$expr:
{ $or: [{ $match: { totalBrands: { $gte: 1 } } }] },
},
},
],
as: 'postCloth',
},
},
{
$project: {
image: 1,
createdAt: 1,
createdBy: 1,
mediaUrl: {
$concat: [process.env.PROJECT_URL + '/files/', '$image'],
},
totalCloth: { $size: '$postCloth' },
},
},
//check for post cloth object if length is greater than equals to 1
{
$match: filter,
},
{ $skip: 12 * page },
{ $limit: 12 },
{ $sort: { createdAt: -1 } },
]).exec(function(err, post) {
return apiResponse.successResponseWithData(res, 'Successful', post);
});
I am getting data properly, but not while searching from brand. Please suggest how we can search the data from the cases given. there is simple keyword search.
Thanks in advance
The problem is with you're main's $lookup's pipeline:
first you start with the brand $lookup, which i'll assume works ( if you provide schema's for your collections it would be easy to verify), however right after that $lookup you do this:
{
$match: {
$expr:
{
$and:
[
{ $eq: ['$postId', '$$postId'] },
{
$or: searchQuery,
},
],
},
},
},
This means if the searchQuery fails even if a brand exists the document will be filtered out, you should change it to:
{
$match: {
$expr:
{
$and:
[
{ $eq: ['$postId', '$$postId'] },
{
$or: [
{
$or: searchQuery
},
{
$gt: [{$size: "$brand"}, 0]
}
],
},
],
},
},
},
Now this will also matched documents that have any brands in the brand field, meaning the brand matched the nested $lookup, you can then drop the next 2 stages that check for the brand size.
I also recommend that you move the $eq for the postId to the start of the $lookup, this will improve performance immensely, after all the changes the entire pipeline would look like:
PostModel.aggregate([
{
$lookup: {
from: 'post_cloths',
let: { postId: '$_id' },
pipeline: [
{
$match: { $eq: ['$postId', '$$postId'] },
},
{
$lookup: {
from: 'brands',
let: { brandId: '$brandId' },
pipeline: [
{
$match: {
$expr:
{
$and:
[
{ $eq: ['$_id', '$$brandId'] },
{ $or: brandSearchQuery },
],
},
},
},
],
as: 'brand',
},
},
{
$match: {
$expr: {
$and:
[
{
$or: [
{
$or: searchQuery,
},
{
$gt: [{ $size: '$brand' }, 0],
},
],
},
],
},
},
},
],
as: 'postCloth',
},
},
{
$project: {
image: 1,
createdAt: 1,
createdBy: 1,
mediaUrl: {
$concat: [process.env.PROJECT_URL + '/files/', '$image'],
},
totalCloth: { $size: '$postCloth' },
},
},
{
$match: filter,
},
{ $skip: 12 * page },
{ $limit: 12 },
{ $sort: { createdAt: -1 } },
])

Mongo: add fields with count of how many times another field appears

I'm new to MongoDB. I am writing an app using mongoose and NodeJS. I start with this collection:
[
{ name: "Joe", hobby: "Food"},
{ name: "Lyn", hobby: "Food"},
{ name: "Rex", hobby: "Play"},
{ name: "Rex", hobby: "Shop"},
...
]
And I want to output a subset of the documents with two new fields: nameCount showing how many times the document's name value appears, and hobbyCount showing the same thing for the document's hobby:
[
{ name: "Joe", hobby: "Food", nameCount: 1, hobbyCount: 2 },
{ name: "Lyn", hobby: "Food", nameCount: 1, hobbyCount: 2 },
{ name: "Rex", hobby: "Play", nameCount: 2, hobbyCount: 1 },
{ name: "Rex", hobby: "Shop", nameCount: 2, hobbyCount: 1 }
]
From my research and fiddling about I got the following query to work but it seems over the top, inefficient and over-complicated.
db.members.aggregate([
{$skip: 0},
{$limit: 4},
{
$lookup: {
from: "members",
let: { name: "$name"},
pipeline: [
{ $match: { $expr: { $eq: ["$name", "$$name"] } } },
{ $count: "count" }
],
as: "nameCount"
}
},
{ $unwind: "$nameCount" },
{ $addFields: { nameCount: "$nameCount.count" } },
{
$lookup: {
from: "members",
let: { hobby: "$hobby"},
pipeline: [
{ $match: { $expr: { $eq: ["$hobby", "$$hobby"] } } },
{ $count: "count" }
],
as: "hobbyCount"
}
},
{ $unwind: "$hobbyCount" },
{ $addFields: { hobbyCount: "$hobbyCount.count" } }
]);
Mongo Playground
It's bugging me in particular, not just that the query seems overdone, but that it looks like I'm running two new searches per record found through the whole collection when maybe the nameCount and hobbyCount could be compiled in a single search.
Update
Valijon posted an answer that made me realize that I oversimplified my actual problem when trying to post the minimum required. In reality the collection is filtered (with a $match, $skip and $take) before the first lookup that I posted. As a result, Valijon's answer doesn't actually work for me, although it's a great answer for the way I originally posed the problem. Sorry, I'm updating the OP
See the playground
EDIT: We need to use only 1 $lookup (we match both by name and hobby) and count nameCount and hobbyCount by applying $filter or $reduce operators
db.members.aggregate([
{
$skip: 1
},
{
$limit: 2
},
{
$lookup: {
from: "members",
let: {
name: "$name",
hobby: "$hobby"
},
pipeline: [
{
$match: {
$expr: {
$or: [
{
$eq: [
"$name",
"$$name"
]
},
{
$eq: [
"$hobby",
"$$hobby"
]
}
]
}
}
}
],
as: "count"
}
},
{
$project: {
_id: 0,
name: 1,
hobby: 1,
nameCount: {
$reduce: {
input: "$count",
initialValue: 0,
in: {
$add: [
"$$value",
{
$cond: [
{
$eq: [
"$name",
"$$this.name"
]
},
1,
0
]
}
]
}
}
},
hobbyCount: {
$size: {
$filter: {
input: "$count",
cond: {
$eq: [
"$hobby",
"$$this.hobby"
]
}
}
}
}
}
}
])
MongoPlayground

MongoDB Mongoose aggregate query deeply nested array remove empty results and populate references

This question is a follow up to a previous question for which I have accepted an answer already. I have an aggregate query that returns the results of a deeply nested array of subdocuments based on a date range. The query returns the correct results within the specified date range, however it also returns an empty array for the results that do not match the query.
Technologies: MongoDB 3.6, Mongoose 5.5, NodeJS 12
Question 1:
Is there any way to remove the results that don't match the query?
Question 2:
Is there any way to 'populate' the Person db reference in the results? For example to get the Person Display Name I usually use 'populate' such as find().populate({ path: 'Person', select: 'DisplayName'})
Records schema
let RecordsSchema = new Schema({
RecordID: {
type: Number,
index: true
},
RecordType: {
type: String
},
Status: {
type: String
},
// ItemReport array of subdocuments
ItemReport: [ItemReportSchema],
}, {
collection: 'records',
selectPopulatedPaths: false
});
let ItemReportSchema = new Schema({
// ObjectId reference
ReportBy: {
type: Schema.Types.ObjectId,
ref: 'people'
},
ReportDate: {
type: Date,
required: true
},
WorkDoneBy: [{
Person: {
type: Schema.Types.ObjectId,
ref: 'people'
},
CompletedHours: {
type: Number,
required: true
},
DateCompleted: {
type: Date
}
}],
});
Query
Works but also returns empty results and also need to populate the Display Name property of the Person db reference
db.records.aggregate([
{
"$project": {
"ItemReport": {
$map: {
input: "$ItemReport",
as: "ir",
in: {
WorkDoneBy: {
$filter: {
input: "$$ir.WorkDoneBy",
as: "value",
cond: {
"$and": [
{ "$ne": [ "$$value.DateCompleted", null ] },
{ "$gt": [ "$$value.DateCompleted", new Date("2017-01-01T12:00:00.000Z") ] },
{ "$lt": [ "$$value.DateCompleted", new Date("2018-12-31T12:00:00.000Z") ] }
]
}
}
}
}
}
}
}
}
])
Actual Results
{
"_id": "5dcb6406e63830b7aa5427ca",
"ItemReport": [
{
"WorkDoneBy": [
{
"_id": "5dcb6406e63830b7aa53d8ea",
"PersonID": 111,
"ReportID": 8855,
"CompletedHours": 3,
"DateCompleted": "2017-01-20T05:00:00.000Z",
"Person": "5dcb6409e63830b7aa54fdba"
}
]
}
]
},
{
"_id": "5dcb6406e63830b7aa5427f1",
"ItemReport": [
{
"WorkDoneBy": [
{
"_id": "5dcb6406e63830b7aa53dcdc",
"PersonID": 4,
"ReportID": 9673,
"CompletedHours": 17,
"DateCompleted": "2017-05-18T04:00:00.000Z",
"Person": "5dcb6409e63830b7aa54fd69"
},
{
"_id": "5dcb6406e63830b7aa53dcdd",
"PersonID": 320,
"ReportID": 9673,
"CompletedHours": 3,
"DateCompleted": "2017-05-18T04:00:00.000Z",
"Person": "5dcb6409e63830b7aa54fe88"
}
]
}
]
},
{
"_id": "5dcb6406e63830b7aa5427f2",
"ItemReport": [
{
"WorkDoneBy": []
}
]
},
{
"_id": "5dcb6406e63830b7aa5427f3",
"ItemReport": [
{
"WorkDoneBy": []
}
]
},
{
"_id": "5dcb6406e63830b7aa5427f4",
"ItemReport": [
{
"WorkDoneBy": []
}
]
},
{
"_id": "5dcb6406e63830b7aa5427f5",
"ItemReport": [
{
"WorkDoneBy": []
}
]
},
Desired results
Note the results with an empty "WorkDoneBy" array are removed (question 1), and the "Person" display name is populated (question 2).
{
"_id": "5dcb6406e63830b7aa5427f1",
"ItemReport": [
{
"WorkDoneBy": [
{
"_id": "5dcb6406e63830b7aa53dcdc",
"CompletedHours": 17,
"DateCompleted": "2017-05-18T04:00:00.000Z",
"Person": {
_id: "5dcb6409e63830b7aa54fe88",
DisplayName: "Joe Jones"
}
},
{
"_id": "5dcb6406e63830b7aa53dcdd",
"CompletedHours": 3,
"DateCompleted": "2017-05-18T04:00:00.000Z",
"Person": {
_id: "5dcb6409e63830b7aa54fe88",
DisplayName: "Alice Smith"
}
}
]
}
]
},
First question is relatively easy to answer and there are multiple ways to do that. I would prefer using $anyElementTrue along with $map as those operators are pretty self-explanatory.
{
"$match": {
$expr: { $anyElementTrue: { $map: { input: "$ItemReport", in: { $gt: [ { $size: "$$this.WorkDoneBy" }, 0 ] } } } }
}
}
MongoPlayground
Second part is a bit more complicated but still possible. Instead of populate you need to run $lookup to bring the data from other collection. The problem is that your Person values are deeply nested so you need to prepare a list of id values before using $reduce and $setUnion. Once you get the data you need to merge your nested objects with people entities using $map and $mergeObjects.
{
$addFields: {
people: {
$reduce: {
input: "$ItemReport",
initialValue: [],
in: { $setUnion: [ "$$value", "$$this.WorkDoneBy.Person" ] }
}
}
}
},
{
$lookup: {
from: "people",
localField: "peopleIds",
foreignField: "_id",
as: "people"
}
},
{
$project: {
_id: 1,
ItemReport: {
$map: {
input: "$ItemReport",
as: "ir",
in: {
WorkDoneBy: {
$map: {
input: "$$ir.WorkDoneBy",
as: "wdb",
in: {
$mergeObjects: [
"$$wdb",
{
Person: { $arrayElemAt: [{ $filter: { input: "$people", cond: { $eq: [ "$$this._id", "$$wdb.Person" ] } } } , 0] }
}
]
}
}
}
}
}
}
}
}
Complete Solution

Resources