Right outer join in aggregation pipeline

Right outer join in aggregation pipeline - node.js

I have two collections, let's call them Cats and Parties, with the following schemas:
Cat
{ name: String }
Party
{ date: Date, attendants: [{ cat: { ref: 'Cat' }, role: String }] }
where role symbolizes some other attribute, say, whether the attending cat is a VIP member.
Now I want to get a list of all cats that exist (even those poor kitties who never attended any party) and for each cat, I want a list of all the roles it ever had for at least one party. Furthermore, I want this entire list to be sorted by the (per cat) last attended party's date with cats who never attended any party being last.
This raises the following problems for me:
Aggregrating over Parties excludes party-pooper kitties who never joined a party.
Aggregating over Cats sort of goes »the wrong way« because I cannot $lookup parties the cat attended because that information is in a subdocument array.
The pipeline I currently have gives me all cats who attended at least one party with a list of their roles, but doesn't sort by the last attended party. In fact, I could live with excluding cats who never attended a party, but the sorting is crucial for me:
Party.aggregate([
{ $unwind: '$attendants' },
{ $project: { role: '$attendants.role', cat: '$attendants.cat' } },
{
$group: {
_id: '$cat',
roles: { $addToSet: '$role' }
}
},
{
$lookup: {
from: 'cats',
localField: '_id',
foreignField: '_id',
as: 'cat'
}
},
{ $unwind: '$cat' },
// (*)
{ $addFields: { 'cat.roles': '$roles' } },
{ $replaceRoot: { newRoot: '$cat' } }
])
My current idea would basically be a right outer join at (*) to add a list of parties the cat has attended, $project that to the party's date and then $group using $max to get the latest date. Then I can $unwind that now one-element array and $sort over it in the end.
The problem is that right outer joins don't exist in mongo, AFAIK, and I don't know how to get that list of parties per cat within the pipeline.
To clarify, the expected output should be something like
[
{
"_id": "59982d3c7ca25936f8c327c8",
"name": "Mr. Kitty",
"roles": ["vip", "birthday cat"],
"dateOfLastParty": "2017-06-02"
},
{
"_id": "59982d3c7ca25936f8c327c9",
"name": "Snuffles",
"roles": ["best looking cat"],
"dateOfLastParty": "2017-06-01"
},
...
{
"_id": "59982d3c7ca25936f8c327c4",
"name": "Sad Face McLazytown",
"roles": [],
"dateOfLastParty": null
},
]

As stated, you want the "cats" so use the Cat model and do the "left outer join" that is actually inherent to $lookup, rather than asking for a "right outer join" from the opposing collection, since a "right outer join" is not possible with MongoDB at this time.
It's also far more practical as a "left join", because you want "cats" as your primary source of output. The only thing to consider when linking to "Party" is that each "Cat" is listed in an array, and therefore you get the whole document back. So all that needs to be done is in "post processing" after the $lookup, you simply "filter" the array content for the matching entry of the current cat.
Fortunately we get good features with $arrayElemAt and $indexOfArray, that allow us to do that exact extraction:
let kitties = await Cat.aggregate([
{ '$lookup': {
'from': Party.collection.name,
'localField': '_id',
'foreignField': 'attendants.cat',
'as': 'parties'
}},
{ '$replaceRoot': {
'newRoot': {
'$let': {
'vars': {
'parties': {
'$map': {
'input': '$parties',
'as': 'p',
'in': {
'date': '$$p.date',
'role': {
'$arrayElemAt': [
'$$p.attendants.role',
{ '$indexOfArray': [ '$$p.attendants.cat', '$_id' ] }
]
}
}
}
}
},
'in': {
'_id': '$_id',
'name': '$name',
'roles': '$$parties.role',
'dateOfLastParty': { '$max': '$$parties.date' }
}
}
}
}}
]);
So my concept of "optimal" processing here actually uses $replaceRoot here because you can define the whole document under a $let statement. The reason I'm doing that is so we can take the "parties" array output from the previous $lookup and reshape each entry extracting the matching "role" data for the current "kitty" at that given party. This we can actually make a variable itself.
The reason for the "array variable" is because we can then use $max to extract the "largest/last" date property as "singular" and still extract the "role" values as an "array" from that reshaped content. This makes it easy to define the fields you wanted.
And since it was a "left join" started from Cat in the first place, then those poor kitties that missed out on all parties are still there, and still have the desired output.
Two aggregation pipeline stages. What could be more simple!
As a full listing:
const mongoose = require('mongoose'),
Schema = mongoose.Schema;
mongoose.Promise = global.Promise;
mongoose.set('debug',true);
const uri = 'mongodb://localhost/catparty',
options = { useMongoClient: true };
const catSchema = new Schema({
name: String
});
const partySchema = new Schema({
date: Date,
attendants: [{
cat: { type: Schema.Types.ObjectId, ref: 'Cat' },
role: String
}]
});
const Cat = mongoose.model('Cat', catSchema);
const Party = mongoose.model('Party', partySchema);
function log(data) {
console.log(JSON.stringify(data,undefined,2))
}
(async function() {
try {
const conn = await mongoose.connect(uri,options);
// Clean collections
await Promise.all(
Object.keys(conn.models).map( m => conn.models[m].remove({}) )
);
var cats = await Cat.insertMany(
['Fluffy', 'Snuggles', 'Whiskers', 'Socks'].map( name => ({ name }) )
);
cats.shift();
cats = cats.map( (cat,idx) =>
({ cat: cat._id, role: (idx === 0) ? 'Host' : 'Guest' })
);
log(cats);
let party = await Party.create({
date: new Date(),
attendants: cats
});
log(party);
let kitties = await Cat.aggregate([
{ '$lookup': {
'from': Party.collection.name,
'localField': '_id',
'foreignField': 'attendants.cat',
'as': 'parties'
}},
{ '$replaceRoot': {
'newRoot': {
'$let': {
'vars': {
'parties': {
'$map': {
'input': '$parties',
'as': 'p',
'in': {
'date': '$$p.date',
'role': {
'$arrayElemAt': [
'$$p.attendants.role',
{ '$indexOfArray': [ '$$p.attendants.cat', '$_id' ] }
]
}
}
}
}
},
'in': {
'_id': '$_id',
'name': '$name',
'roles': '$$parties.role',
'dateOfLastParty': { '$max': '$$parties.date' }
}
}
}
}}
]);
log(kitties);
} catch(e) {
console.error(e);
} finally {
mongoose.disconnect();
}
})();
And example output:
[
{
"_id": "59a00d9528683e0f59e53460",
"name": "Fluffy",
"roles": [],
"dateOfLastParty": null
},
{
"_id": "59a00d9528683e0f59e53461",
"name": "Snuggles",
"roles": [
"Host"
],
"dateOfLastParty": "2017-08-25T11:44:21.903Z"
},
{
"_id": "59a00d9528683e0f59e53462",
"name": "Whiskers",
"roles": [
"Guest"
],
"dateOfLastParty": "2017-08-25T11:44:21.903Z"
},
{
"_id": "59a00d9528683e0f59e53463",
"name": "Socks",
"roles": [
"Guest"
],
"dateOfLastParty": "2017-08-25T11:44:21.903Z"
}
]
And you should be able to see how those "roles" values actually become an array with more data. And if you need that to be a "unique list", then simply wrap with $setDifference as in:
'roles': { '$setDifference': [ '$$parties.role', [] ] },
And that is also covered

Related

How can I get only the array element as output instead of whole object in MongoDB?

Below is my code to display review array data which is part of the restaurant collection object:
async get(reviewId) {
const restaurantsCollection = await restaurants();
reviewId = ObjectId(reviewId)
const r = await restaurantsCollection.findOne(
{ reviews: { $elemMatch: { _id : reviewId } } },
{"projection" : { "reviews.$": true }}
)
return r
}
My object looks like:
{
_id: '6176e58679a981181d94dfaf',
name: 'The Blue Hotel',
location: 'Noon city, New York',
phoneNumber: '122-536-7890',
website: 'http://www.bluehotel.com',
priceRange: '$$$',
cuisines: [ 'Mexican', 'Italian' ],
overallRating: 0,
serviceOptions: { dineIn: true, takeOut: true, delivery: true },
reviews: []
}
My output looks like:
{
"_id": "6174cfb953edbe9dc5054f99", // restaurant Id
"reviews": [
{
"_id": "6176df77d4639898b0c155f0", // review Id
"title": "This place was great!",
"reviewer": "scaredycat",
"rating": 5,
"dateOfReview": "10/13/2021",
"review": "This place was great! the staff is top notch and the food was delicious! They really know how to treat their customers"
}
]
}
What I want as output:
{
"_id": "6176df77d4639898b0c155f0",
"title": "This place was great!",
"reviewer": "scaredycat",
"rating": 5,
"dateOfReview": "10/13/2021",
"review": "This place was great! the staff is top notch and the food was delicious! They really know how to treat their customers"
}
How can I get the output as only the review without getting the restaurant ID or the whole object?

So the query operators, find and findOne do not allow "advanced" restructure of data.
So you have 2 alternatives:
The more common approach will be to do this in code, usually people either use some thing mongoose post trigger or have some kind of "shared" function that handles all of these transformations, this is how you avoid code duplication.
Use the aggregation framework, like so:
const r = await restaurantsCollection.aggregate([
{
$match: { reviews: { $elemMatch: { _id : reviewId } } },
},
{
$replaceRoot: {
newRoot: {
$arrayElemAt: [
{
$filter: {
input: "$reviews",
as: "review",
cond: {$eq: ["$$review._id", reviewId]}
}
},
0
]
}
}
}
])
return r[0]

Mongoose: Infinite scroll with filtering

I have these two models:
User.js
const UserSchema = new Schema({
profile: {
type: Schema.Types.ObjectId,
ref: "profiles",
},
following: [
{
type: Schema.Types.ObjectId,
ref: "users",
},
],
});
module.exports = User = mongoose.model("users", UserSchema);
Profile.js
const ProfileSchema = new Schema({
videoURL: {
type: String,
},
});
module.exports = Profile = mongoose.model("profiles", ProfileSchema);
Here's an example of a User document:
{
"following": [
{
"profile":{
"videoURL":"video_url_1"
}
},
{
"profile":{
"videoURL":"video_url_2"
}
},
{
"profile":{}
},
{
"profile":{
"videoURL":"video_url_3"
}
},
{
"profile":{
"videoURL":"video_url_4"
}
},
{
"profile":{
"videoURL":"video_url_5"
}
},
{
"profile":{}
},
{
"profile":{
"videoURL":"video_url_6"
}
}
]
}
I am trying to implement an infinite scroll of the videos of the users followed by the connected user.
This means, I will have to filter user.following.profile.videoURL
WHERE videoURL exists
Suppose, I will be loading two videos, by two videos:
Response 1: ["video_url_1","video_url_2"]
Response 2: ["video_url_3","video_url_4"]
Response 3: ["video_url_5","video_url_6"]
Usually, infinite scroll is easy because all I have to load the documents 2 by 2 by order of storage without filtering on any field.
Example: Displaying the followed users two by two in an infinite scroll
User.findById(user_id).populate({
path: "following",
options: {
skip: 2 * page,
limit: 2,
},
});
But, now I have to perform filtering on each followed_user.profile.video, and return two by two. And I don't see how I can perform BOTH the filtering and the infinite scroll at the same time.
NOTE: According to the documentation:
In general, there is no way to make populate() filter stories based on properties of the story's author. For example, the below query won't return any results, even though author is populated.
const story = await Story.
findOne({ 'author.name': 'Ian Fleming' }).
populate('author').
exec();
story; // null
So I suppose, there is no way for me to use populate to filter based user.followers, based on each user.follower.profile.videoURL

I am not sure it is possible with populate method, but you can try aggregation pipeline,
$match user_id condition
$lookup with aggregation pipeline in users collection for following
$match following id condition
$lookup with profile for following.profile
$match videoURL should exists
$project to show profile field and get first element using $arrayElemAt
$slice to do pagination in following
let page = 0;
let limit = 2;
let skip = limit * page;
User.aggregate([
{ $match: { _id: mongoose.Types.ObjectId(user_id) } },
{
$lookup: {
from: "users",
let: { following: "$following" },
pipeline: [
{ $match: { $expr: { $in: ["$_id", "$$following"] } } },
{
$lookup: {
from: "profiles",
localField: "profile",
foreignField: "_id",
as: "profile"
}
},
{ $match: { "profile.videoURL": { $exists: true } } },
{
$project: {
profile: { $arrayElemAt: ["$profile", 0] }
}
}
],
as: "following"
}
},
{
$addFields: {
following: {
$slice: ["$following", skip, limit]
}
}
}
])
Playground
Suggestion:
You can improve your schema design,
removing profile schema and add profile object in users collection, so you can achieve easily your requirement using populate method,
put match condition in following populate for videoURL exists
const UserSchema = new Schema({
profile: {
type: {
videoURL: {
type: String
}
}
},
following: [
{
type: Schema.Types.ObjectId,
ref: "users"
}
]
});
module.exports = User = mongoose.model("users", UserSchema);
User.findById(user_id).populate({
path: "following",
match: {
"profile.videoURL": { $ne: null }
},
options: {
skip: 2 * page,
limit: 2,
}
});

So what you want is table with infinite scroll and:
You can opt given ways to approach your problem :
Load data (first page) into grid.
Set filter on a col.
Load data again, this time using the filter.

mongodb - $lookup with $if operator

I am building a side project. where I want to use nested $lookup and $unwind if certain condition match. I created simple prototype below. some users have numbers and some don't have numbers. I want to do join if the number is not null or undefined
here is the code -
users.aggregate([
{
$lookup: "number",
localField:"user.number",
foreignField:"name",
as:"number"
}
{
$unwind: {
path: "$number",
preserveNullAndEmptyArrays: true
}
}
,
{
$lookup: "countries",
localField:"number.countrycode",
foreignField:"name",
as:"countries"
},
{
$unwind:"countries",
//preserveNullAndEmptyArrays
},
$project:{
name:1,
number:"$number.phoneNumber",
countryCode:"$countries.countryCode"
}
])
Output:
[{
name:"yashraj basan"
number:"123345677",
countryCode:"US"
},
{
name:"krutik basan"
number:"123345679",
countryCode:"FR"
}]
Right now i am getting all users who have numbers but i am want both users who have number and who don't have number
Expected output:
[{
name:"yashraj basan"
number:"123345677",
countryCode:"US"
},
{
name:"krutik basan"
number:"123345679",
countryCode:"FR"
},
{
name:"dhruvam basan",
number:"",
countrycode:""
},{
name:"foo bar",
number:"",
countrycode:""
}]
I appreciate all your inputs. thank you

There's no need to run $lookup conditionally. You can do it for every user but then when you use $unwind you can take advantage of preserveNullAndEmptyArrays option:
{
$lookup: {
from:"countries",
localField:"user.number.countrycode",
foreignField:"name",
as:"countries"
}
},
{
$unwind: {
path: "countries",
preserveNullAndEmptyArrays: true
}
}

MongoDB/Mongoose: Search based on value within a given document without first returning that document

I'd like to do a search in MongoDB using either Mongo or Mongoose based on the value of a field in a document.
Let's say I had three MongoDB documents that looked like this:
{
name: "Michael",
mentored: ["Dwight", "Ryan", "Jim"]
},
{
name: "Jim",
mentored: ["Toby", "Roy", "Darryl"]
},
{
name: "Stanley",
mentored: ["Pam", "Meredith", "Angela"]
}
Let's further say I want to do a search for anyone who Michael has not mentored, which in this case would be Stanley (let's assume that the people in the arrays don't necessarily have their own records). I know I can do a search like this in Mongoose to get the result I want:
User.findOne({ name: "Michael" })
.then((person) => {
const mentored = person.mentored
return User.find({ name: { $nin: mentored } })
)
.then((person2) => {
console.log(person2); // Stanley
})
However, is there any way to do this without first returning the array from the database and then doing a second search? Something like this:
User.findOne({ name: { $nin: { "Michael's mentored people array" } } })
Ultimately I'm looking to see if there's any way to make more efficient such a situation in which arrays can get 10s of thousands of values long. Many thanks.

I think what you are doing is already efficient for large arrays.
But you could try the following, as suggested by #D.SM. but using aggregation the intermediate results will have to be loaded in memory, which does not seem efficient to me.
User.aggregate([
{
$match: {
name: "Michael"
}
},
{
$lookup: {
from: "collection",
as: "notMentored",
let: {
mentored: "$mentored",
/*
You probably want to remove "Michael" from the result, one way is to add him to the mentored array
mentored: { $concatArrays: ["$mentored", ["$name"]]}
*/
},
pipeline: [{
$match: {
$expr: { $not: { $in: ["$name", "$$mentored"] } }
}
}]
}
},
{
$unwind: "$notMentored"
},
{
$replaceRoot: {
newRoot: "$notMentored"
}
}
])

Find documents by populate match result in mongoose

For Ecample, From this data with mongoose :
Data Students :
[{
"_id": ObjectId("5afbb519a7fe344ff8db67e6"),
"name":"Jack",
"age":20
...
},{
"_id": ObjectId("5afbb534a7fe344gf7db64e7"),
"name":"Joni",
"age":20
...
}]
Data Activities :
[{
"_id": ObjectId("5afbb554a7fe344ff8db67e9"),
"name":"Going to Market",
"student": ObjectId("5afbb519a7fe344ff8db67e6")
...
},{
"_id": ObjectId("5afbb784a7fe344gf7db64e2"),
"name":"Playing with Friends",
"student":ObjectId("5afbb534a7fe344gf7db64e7")
...
}]
I want to find a data base on conditions in populate with regex & This is what i am did :
let term = new Regex('Jack','i');
let result = await Activities.find({})
.populate('student', null, { name: { $regex: term } })
.exec();
But the result is always return all data of Activites (The regex query doesn't work)
What Should I Do?

You should try with using match
let result = await Activities.find({})
.populate({ path: 'student', match: { name: { $regex: term } } })
.exec();

Develop Reference

node.js excel linux python-3.x azure haskell apache-spark rust .htaccess string

Right outer join in aggregation pipeline - node.js

Related

How can I get only the array element as output instead of whole object in MongoDB?

Mongoose: Infinite scroll with filtering

mongodb - $lookup with $if operator

MongoDB/Mongoose: Search based on value within a given document without first returning that document

Find documents by populate match result in mongoose

Categories

Resources