mongodb - $lookup with $if operator - node.js

I am building a side project. where I want to use nested $lookup and $unwind if certain condition match. I created simple prototype below. some users have numbers and some don't have numbers. I want to do join if the number is not null or undefined
here is the code -
users.aggregate([
{
$lookup: "number",
localField:"user.number",
foreignField:"name",
as:"number"
}
{
$unwind: {
path: "$number",
preserveNullAndEmptyArrays: true
}
}
,
{
$lookup: "countries",
localField:"number.countrycode",
foreignField:"name",
as:"countries"
},
{
$unwind:"countries",
//preserveNullAndEmptyArrays
},
$project:{
name:1,
number:"$number.phoneNumber",
countryCode:"$countries.countryCode"
}
])
Output:
[{
name:"yashraj basan"
number:"123345677",
countryCode:"US"
},
{
name:"krutik basan"
number:"123345679",
countryCode:"FR"
}]
Right now i am getting all users who have numbers but i am want both users who have number and who don't have number
Expected output:
[{
name:"yashraj basan"
number:"123345677",
countryCode:"US"
},
{
name:"krutik basan"
number:"123345679",
countryCode:"FR"
},
{
name:"dhruvam basan",
number:"",
countrycode:""
},{
name:"foo bar",
number:"",
countrycode:""
}]
I appreciate all your inputs. thank you

There's no need to run $lookup conditionally. You can do it for every user but then when you use $unwind you can take advantage of preserveNullAndEmptyArrays option:
{
$lookup: {
from:"countries",
localField:"user.number.countrycode",
foreignField:"name",
as:"countries"
}
},
{
$unwind: {
path: "countries",
preserveNullAndEmptyArrays: true
}
}

Related

How can I write query in mongodb?

I have a collection of mongodb like this :
[{
"_id":"ObjectId(""51780fb5c9c41825e3e21fc4"")",
"name":"CS 101",
"students":[
{
"name":"raj",
"year":2016
},
{
"name":"rahul",
"year":2017
},
{
"name":"anil",
"year":2018
}
]
},
{
"_id":"ObjectId(""51780fb5c9c41825e3e21fs4"")",
"name":"CS 102",
"students":[
{
"name":"mukesh",
"year":2016
},
{
"name":"mohan",
"year":2017
},
{
"name":"mangal",
"year":2018
}
]
}
]
I've been looking for similar questions like this one: Mongo db - Querying nested array and objects but in that question they're looking for a specific element inside the "messages" object (in my case) for example. Same as in this other question: Query for a field in an object in an array with Mongo? where they're using $mapan d I don't think it fits my needs.
The documents to find have this structure:
[{
"_id":"ObjectId(""51780fb5c9c41825e3e21fc4"")",
"name":"CS 101",
"students":[
"raj","rahul","anil"
]
},
{
"_id":"ObjectId(""51780fb5c9c41825e3e21fs4"")",
"name":"CS 102",
"students":[
"mukesh","mohan","mangal"
]
}
]
how to solve this?
From the question and datasets, you are trying to return students with an array of student's name (string) instead of the array of student object.
Use $project to display students as students.name array.
db.collection.aggregate([
{
$project: {
"_id": "$_id",
"name": "$name",
"students": "$students.name"
}
}
])
Sample Solution 1 on Mongo Playground
OR
Use $set to replace the students field with students.name array.
db.collection.aggregate([
{
$set: {
"students": "$students.name"
}
}
])
Sample Solution 2 on Mongo Playground

Mongoose: Infinite scroll with filtering

I have these two models:
User.js
const UserSchema = new Schema({
profile: {
type: Schema.Types.ObjectId,
ref: "profiles",
},
following: [
{
type: Schema.Types.ObjectId,
ref: "users",
},
],
});
module.exports = User = mongoose.model("users", UserSchema);
Profile.js
const ProfileSchema = new Schema({
videoURL: {
type: String,
},
});
module.exports = Profile = mongoose.model("profiles", ProfileSchema);
Here's an example of a User document:
{
"following": [
{
"profile":{
"videoURL":"video_url_1"
}
},
{
"profile":{
"videoURL":"video_url_2"
}
},
{
"profile":{}
},
{
"profile":{
"videoURL":"video_url_3"
}
},
{
"profile":{
"videoURL":"video_url_4"
}
},
{
"profile":{
"videoURL":"video_url_5"
}
},
{
"profile":{}
},
{
"profile":{
"videoURL":"video_url_6"
}
}
]
}
I am trying to implement an infinite scroll of the videos of the users followed by the connected user.
This means, I will have to filter user.following.profile.videoURL
WHERE videoURL exists
Suppose, I will be loading two videos, by two videos:
Response 1: ["video_url_1","video_url_2"]
Response 2: ["video_url_3","video_url_4"]
Response 3: ["video_url_5","video_url_6"]
Usually, infinite scroll is easy because all I have to load the documents 2 by 2 by order of storage without filtering on any field.
Example: Displaying the followed users two by two in an infinite scroll
User.findById(user_id).populate({
path: "following",
options: {
skip: 2 * page,
limit: 2,
},
});
But, now I have to perform filtering on each followed_user.profile.video, and return two by two. And I don't see how I can perform BOTH the filtering and the infinite scroll at the same time.
NOTE: According to the documentation:
In general, there is no way to make populate() filter stories based on properties of the story's author. For example, the below query won't return any results, even though author is populated.
const story = await Story.
findOne({ 'author.name': 'Ian Fleming' }).
populate('author').
exec();
story; // null
So I suppose, there is no way for me to use populate to filter based user.followers, based on each user.follower.profile.videoURL
I am not sure it is possible with populate method, but you can try aggregation pipeline,
$match user_id condition
$lookup with aggregation pipeline in users collection for following
$match following id condition
$lookup with profile for following.profile
$match videoURL should exists
$project to show profile field and get first element using $arrayElemAt
$slice to do pagination in following
let page = 0;
let limit = 2;
let skip = limit * page;
User.aggregate([
{ $match: { _id: mongoose.Types.ObjectId(user_id) } },
{
$lookup: {
from: "users",
let: { following: "$following" },
pipeline: [
{ $match: { $expr: { $in: ["$_id", "$$following"] } } },
{
$lookup: {
from: "profiles",
localField: "profile",
foreignField: "_id",
as: "profile"
}
},
{ $match: { "profile.videoURL": { $exists: true } } },
{
$project: {
profile: { $arrayElemAt: ["$profile", 0] }
}
}
],
as: "following"
}
},
{
$addFields: {
following: {
$slice: ["$following", skip, limit]
}
}
}
])
Playground
Suggestion:
You can improve your schema design,
removing profile schema and add profile object in users collection, so you can achieve easily your requirement using populate method,
put match condition in following populate for videoURL exists
const UserSchema = new Schema({
profile: {
type: {
videoURL: {
type: String
}
}
},
following: [
{
type: Schema.Types.ObjectId,
ref: "users"
}
]
});
module.exports = User = mongoose.model("users", UserSchema);
User.findById(user_id).populate({
path: "following",
match: {
"profile.videoURL": { $ne: null }
},
options: {
skip: 2 * page,
limit: 2,
}
});
So what you want is table with infinite scroll and:
You can opt given ways to approach your problem :
Load data (first page) into grid.
Set filter on a col.
Load data again, this time using the filter.

MongoDB/Mongoose: Search based on value within a given document without first returning that document

I'd like to do a search in MongoDB using either Mongo or Mongoose based on the value of a field in a document.
Let's say I had three MongoDB documents that looked like this:
{
name: "Michael",
mentored: ["Dwight", "Ryan", "Jim"]
},
{
name: "Jim",
mentored: ["Toby", "Roy", "Darryl"]
},
{
name: "Stanley",
mentored: ["Pam", "Meredith", "Angela"]
}
Let's further say I want to do a search for anyone who Michael has not mentored, which in this case would be Stanley (let's assume that the people in the arrays don't necessarily have their own records). I know I can do a search like this in Mongoose to get the result I want:
User.findOne({ name: "Michael" })
.then((person) => {
const mentored = person.mentored
return User.find({ name: { $nin: mentored } })
)
.then((person2) => {
console.log(person2); // Stanley
})
However, is there any way to do this without first returning the array from the database and then doing a second search? Something like this:
User.findOne({ name: { $nin: { "Michael's mentored people array" } } })
Ultimately I'm looking to see if there's any way to make more efficient such a situation in which arrays can get 10s of thousands of values long. Many thanks.
I think what you are doing is already efficient for large arrays.
But you could try the following, as suggested by #D.SM. but using aggregation the intermediate results will have to be loaded in memory, which does not seem efficient to me.
User.aggregate([
{
$match: {
name: "Michael"
}
},
{
$lookup: {
from: "collection",
as: "notMentored",
let: {
mentored: "$mentored",
/*
You probably want to remove "Michael" from the result, one way is to add him to the mentored array
mentored: { $concatArrays: ["$mentored", ["$name"]]}
*/
},
pipeline: [{
$match: {
$expr: { $not: { $in: ["$name", "$$mentored"] } }
}
}]
}
},
{
$unwind: "$notMentored"
},
{
$replaceRoot: {
newRoot: "$notMentored"
}
}
])

Push if not present or update a nested array mongoose [duplicate]

I have documents that looks something like that, with a unique index on bars.name:
{ name: 'foo', bars: [ { name: 'qux', somefield: 1 } ] }
. I want to either update the sub-document where { name: 'foo', 'bars.name': 'qux' } and $set: { 'bars.$.somefield': 2 }, or create a new sub-document with { name: 'qux', somefield: 2 } under { name: 'foo' }.
Is it possible to do this using a single query with upsert, or will I have to issue two separate ones?
Related: 'upsert' in an embedded document (suggests to change the schema to have the sub-document identifier as the key, but this is from two years ago and I'm wondering if there are better solutions now.)
No there isn't really a better solution to this, so perhaps with an explanation.
Suppose you have a document in place that has the structure as you show:
{
"name": "foo",
"bars": [{
"name": "qux",
"somefield": 1
}]
}
If you do an update like this
db.foo.update(
{ "name": "foo", "bars.name": "qux" },
{ "$set": { "bars.$.somefield": 2 } },
{ "upsert": true }
)
Then all is fine because matching document was found. But if you change the value of "bars.name":
db.foo.update(
{ "name": "foo", "bars.name": "xyz" },
{ "$set": { "bars.$.somefield": 2 } },
{ "upsert": true }
)
Then you will get a failure. The only thing that has really changed here is that in MongoDB 2.6 and above the error is a little more succinct:
WriteResult({
"nMatched" : 0,
"nUpserted" : 0,
"nModified" : 0,
"writeError" : {
"code" : 16836,
"errmsg" : "The positional operator did not find the match needed from the query. Unexpanded update: bars.$.somefield"
}
})
That is better in some ways, but you really do not want to "upsert" anyway. What you want to do is add the element to the array where the "name" does not currently exist.
So what you really want is the "result" from the update attempt without the "upsert" flag to see if any documents were affected:
db.foo.update(
{ "name": "foo", "bars.name": "xyz" },
{ "$set": { "bars.$.somefield": 2 } }
)
Yielding in response:
WriteResult({ "nMatched" : 0, "nUpserted" : 0, "nModified" : 0 })
So when the modified documents are 0 then you know you want to issue the following update:
db.foo.update(
{ "name": "foo" },
{ "$push": { "bars": {
"name": "xyz",
"somefield": 2
}}
)
There really is no other way to do exactly what you want. As the additions to the array are not strictly a "set" type of operation, you cannot use $addToSet combined with the "bulk update" functionality there, so that you can "cascade" your update requests.
In this case it seems like you need to check the result, or otherwise accept reading the whole document and checking whether to update or insert a new array element in code.
if you dont mind changing the schema a bit and having a structure like so:
{ "name": "foo", "bars": { "qux": { "somefield": 1 },
"xyz": { "somefield": 2 },
}
}
You can perform your operations in one go.
Reiterating 'upsert' in an embedded document for completeness
I was digging for the same feature, and found that in version 4.2 or above, MongoDB provides a new feature called Update with aggregation pipeline.
This feature, if used with some other techniques, makes possible to achieve an upsert subdocument operation with a single query.
It's a very verbose query, but I believe if you know that you won't have too many records on the subCollection, it's viable. Here's an example on how to achieve this:
const documentQuery = { _id: '123' }
const subDocumentToUpsert = { name: 'xyz', id: '1' }
collection.update(documentQuery, [
{
$set: {
sub_documents: {
$cond: {
if: { $not: ['$sub_documents'] },
then: [subDocumentToUpsert],
else: {
$cond: {
if: { $in: [subDocumentToUpsert.id, '$sub_documents.id'] },
then: {
$map: {
input: '$sub_documents',
as: 'sub_document',
in: {
$cond: {
if: { $eq: ['$$sub_document.id', subDocumentToUpsert.id] },
then: subDocumentToUpsert,
else: '$$sub_document',
},
},
},
},
else: { $concatArrays: ['$sub_documents', [subDocumentToUpsert]] },
},
},
},
},
},
},
])
There's a way to do it in two queries - but it will still work in a bulkWrite.
This is relevant because in my case not being able to batch it is the biggest hangup. With this solution, you don't need to collect the result of the first query, which allows you to do bulk operations if you need to.
Here are the two successive queries to run for your example:
// Update subdocument if existing
collection.updateMany({
name: 'foo', 'bars.name': 'qux'
}, {
$set: {
'bars.$.somefield': 2
}
})
// Insert subdocument otherwise
collection.updateMany({
name: 'foo', $not: {'bars.name': 'qux' }
}, {
$push: {
bars: {
somefield: 2, name: 'qux'
}
}
})
This also has the added benefit of not having corrupted data / race conditions if multiple applications are writing to the database concurrently. You won't risk ending up with two bars: {somefield: 2, name: 'qux'} subdocuments in your document if two applications run the same queries at the same time.

Right outer join in aggregation pipeline

I have two collections, let's call them Cats and Parties, with the following schemas:
Cat
{ name: String }
Party
{ date: Date, attendants: [{ cat: { ref: 'Cat' }, role: String }] }
where role symbolizes some other attribute, say, whether the attending cat is a VIP member.
Now I want to get a list of all cats that exist (even those poor kitties who never attended any party) and for each cat, I want a list of all the roles it ever had for at least one party. Furthermore, I want this entire list to be sorted by the (per cat) last attended party's date with cats who never attended any party being last.
This raises the following problems for me:
Aggregrating over Parties excludes party-pooper kitties who never joined a party.
Aggregating over Cats sort of goes »the wrong way« because I cannot $lookup parties the cat attended because that information is in a subdocument array.
The pipeline I currently have gives me all cats who attended at least one party with a list of their roles, but doesn't sort by the last attended party. In fact, I could live with excluding cats who never attended a party, but the sorting is crucial for me:
Party.aggregate([
{ $unwind: '$attendants' },
{ $project: { role: '$attendants.role', cat: '$attendants.cat' } },
{
$group: {
_id: '$cat',
roles: { $addToSet: '$role' }
}
},
{
$lookup: {
from: 'cats',
localField: '_id',
foreignField: '_id',
as: 'cat'
}
},
{ $unwind: '$cat' },
// (*)
{ $addFields: { 'cat.roles': '$roles' } },
{ $replaceRoot: { newRoot: '$cat' } }
])
My current idea would basically be a right outer join at (*) to add a list of parties the cat has attended, $project that to the party's date and then $group using $max to get the latest date. Then I can $unwind that now one-element array and $sort over it in the end.
The problem is that right outer joins don't exist in mongo, AFAIK, and I don't know how to get that list of parties per cat within the pipeline.
To clarify, the expected output should be something like
[
{
"_id": "59982d3c7ca25936f8c327c8",
"name": "Mr. Kitty",
"roles": ["vip", "birthday cat"],
"dateOfLastParty": "2017-06-02"
},
{
"_id": "59982d3c7ca25936f8c327c9",
"name": "Snuffles",
"roles": ["best looking cat"],
"dateOfLastParty": "2017-06-01"
},
...
{
"_id": "59982d3c7ca25936f8c327c4",
"name": "Sad Face McLazytown",
"roles": [],
"dateOfLastParty": null
},
]
As stated, you want the "cats" so use the Cat model and do the "left outer join" that is actually inherent to $lookup, rather than asking for a "right outer join" from the opposing collection, since a "right outer join" is not possible with MongoDB at this time.
It's also far more practical as a "left join", because you want "cats" as your primary source of output. The only thing to consider when linking to "Party" is that each "Cat" is listed in an array, and therefore you get the whole document back. So all that needs to be done is in "post processing" after the $lookup, you simply "filter" the array content for the matching entry of the current cat.
Fortunately we get good features with $arrayElemAt and $indexOfArray, that allow us to do that exact extraction:
let kitties = await Cat.aggregate([
{ '$lookup': {
'from': Party.collection.name,
'localField': '_id',
'foreignField': 'attendants.cat',
'as': 'parties'
}},
{ '$replaceRoot': {
'newRoot': {
'$let': {
'vars': {
'parties': {
'$map': {
'input': '$parties',
'as': 'p',
'in': {
'date': '$$p.date',
'role': {
'$arrayElemAt': [
'$$p.attendants.role',
{ '$indexOfArray': [ '$$p.attendants.cat', '$_id' ] }
]
}
}
}
}
},
'in': {
'_id': '$_id',
'name': '$name',
'roles': '$$parties.role',
'dateOfLastParty': { '$max': '$$parties.date' }
}
}
}
}}
]);
So my concept of "optimal" processing here actually uses $replaceRoot here because you can define the whole document under a $let statement. The reason I'm doing that is so we can take the "parties" array output from the previous $lookup and reshape each entry extracting the matching "role" data for the current "kitty" at that given party. This we can actually make a variable itself.
The reason for the "array variable" is because we can then use $max to extract the "largest/last" date property as "singular" and still extract the "role" values as an "array" from that reshaped content. This makes it easy to define the fields you wanted.
And since it was a "left join" started from Cat in the first place, then those poor kitties that missed out on all parties are still there, and still have the desired output.
Two aggregation pipeline stages. What could be more simple!
As a full listing:
const mongoose = require('mongoose'),
Schema = mongoose.Schema;
mongoose.Promise = global.Promise;
mongoose.set('debug',true);
const uri = 'mongodb://localhost/catparty',
options = { useMongoClient: true };
const catSchema = new Schema({
name: String
});
const partySchema = new Schema({
date: Date,
attendants: [{
cat: { type: Schema.Types.ObjectId, ref: 'Cat' },
role: String
}]
});
const Cat = mongoose.model('Cat', catSchema);
const Party = mongoose.model('Party', partySchema);
function log(data) {
console.log(JSON.stringify(data,undefined,2))
}
(async function() {
try {
const conn = await mongoose.connect(uri,options);
// Clean collections
await Promise.all(
Object.keys(conn.models).map( m => conn.models[m].remove({}) )
);
var cats = await Cat.insertMany(
['Fluffy', 'Snuggles', 'Whiskers', 'Socks'].map( name => ({ name }) )
);
cats.shift();
cats = cats.map( (cat,idx) =>
({ cat: cat._id, role: (idx === 0) ? 'Host' : 'Guest' })
);
log(cats);
let party = await Party.create({
date: new Date(),
attendants: cats
});
log(party);
let kitties = await Cat.aggregate([
{ '$lookup': {
'from': Party.collection.name,
'localField': '_id',
'foreignField': 'attendants.cat',
'as': 'parties'
}},
{ '$replaceRoot': {
'newRoot': {
'$let': {
'vars': {
'parties': {
'$map': {
'input': '$parties',
'as': 'p',
'in': {
'date': '$$p.date',
'role': {
'$arrayElemAt': [
'$$p.attendants.role',
{ '$indexOfArray': [ '$$p.attendants.cat', '$_id' ] }
]
}
}
}
}
},
'in': {
'_id': '$_id',
'name': '$name',
'roles': '$$parties.role',
'dateOfLastParty': { '$max': '$$parties.date' }
}
}
}
}}
]);
log(kitties);
} catch(e) {
console.error(e);
} finally {
mongoose.disconnect();
}
})();
And example output:
[
{
"_id": "59a00d9528683e0f59e53460",
"name": "Fluffy",
"roles": [],
"dateOfLastParty": null
},
{
"_id": "59a00d9528683e0f59e53461",
"name": "Snuggles",
"roles": [
"Host"
],
"dateOfLastParty": "2017-08-25T11:44:21.903Z"
},
{
"_id": "59a00d9528683e0f59e53462",
"name": "Whiskers",
"roles": [
"Guest"
],
"dateOfLastParty": "2017-08-25T11:44:21.903Z"
},
{
"_id": "59a00d9528683e0f59e53463",
"name": "Socks",
"roles": [
"Guest"
],
"dateOfLastParty": "2017-08-25T11:44:21.903Z"
}
]
And you should be able to see how those "roles" values actually become an array with more data. And if you need that to be a "unique list", then simply wrap with $setDifference as in:
'roles': { '$setDifference': [ '$$parties.role', [] ] },
And that is also covered

Resources