mongoose recursive populate - node.js

I have been searching for a while and I didn't find any good answer. I have n-deep tree that I am storing in DB and I would like to populate all parents so in the end I get the full tree
node
-parent
-parent
.
.
-parent
So far I populate to level 2, and as I mentioned I need to get to level n.
Node.find().populate('parent').exec(function (err, items) {
if (!err) {
Node.populate(items, {path: 'parent.parent'}, function (err, data) {
return res.send(data);
});
} else {
res.statusCode = code;
return res.send(err.message);
}
});

you can do this now (with https://www.mongodb.com/blog/post/introducing-version-40-mongoose-nodejs-odm)
var mongoose = require('mongoose');
// mongoose.Promise = require('bluebird'); // it should work with native Promise
mongoose.connect('mongodb://......');
var NodeSchema = new mongoose.Schema({
children: [{type: mongoose.Schema.Types.ObjectId, ref: 'Node'}],
name: String
});
var autoPopulateChildren = function(next) {
this.populate('children');
next();
};
NodeSchema
.pre('findOne', autoPopulateChildren)
.pre('find', autoPopulateChildren)
var Node = mongoose.model('Node', NodeSchema)
var root=new Node({name:'1'})
var header=new Node({name:'2'})
var main=new Node({name:'3'})
var foo=new Node({name:'foo'})
var bar=new Node({name:'bar'})
root.children=[header, main]
main.children=[foo, bar]
Node.remove({})
.then(Promise.all([foo, bar, header, main, root].map(p=>p.save())))
.then(_=>Node.findOne({name:'1'}))
.then(r=>console.log(r.children[1].children[0].name)) // foo
simple alternative, without Mongoose:
function upsert(coll, o){ // takes object returns ids inserted
if (o.children){
return Promise.all(o.children.map(i=>upsert(coll,i)))
.then(children=>Object.assign(o, {children})) // replace the objects children by their mongo ids
.then(o=>coll.insertOne(o))
.then(r=>r.insertedId);
} else {
return coll.insertOne(o)
.then(r=>r.insertedId);
}
}
var root = {
name: '1',
children: [
{
name: '2'
},
{
name: '3',
children: [
{
name: 'foo'
},
{
name: 'bar'
}
]
}
]
}
upsert(mycoll, root)
const populateChildren = (coll, _id) => // takes a collection and a document id and returns this document fully nested with its children
coll.findOne({_id})
.then(function(o){
if (!o.children) return o;
return Promise.all(o.children.map(i=>populateChildren(coll,i)))
.then(children=>Object.assign(o, {children}))
});
const populateParents = (coll, _id) => // takes a collection and a document id and returns this document fully nested with its parents, that's more what OP wanted
coll.findOne({_id})
.then(function(o){
if (!o.parent) return o;
return populateParents(coll, o.parent))) // o.parent should be an id
.then(parent => Object.assign(o, {parent})) // replace that id with the document
});

Another approach is to take advantage of the fact that Model.populate() returns a promise, and that you can fulfill a promise with another promise.
You can recursively populate the node in question via:
Node.findOne({ "_id": req.params.id }, function(err, node) {
populateParents(node).then(function(){
// Do something with node
});
});
populateParents could look like the following:
var Promise = require('bluebird');
function populateParents(node) {
return Node.populate(node, { path: "parent" }).then(function(node) {
return node.parent ? populateParents(node.parent) : Promise.fulfill(node);
});
}
It's not the most performant approach, but if your N is small this would work.

Now with Mongoose 4 this can be done. Now you can recurse deeper than a single level.
Example
User.findOne({ userId: userId })
.populate({
path: 'enrollments.course',
populate: {
path: 'playlists',
model: 'Playlist',
populate: {
path: 'videos',
model: 'Video'
}
}
})
.populate('degrees')
.exec()
You can find the official documentation for Mongoose Deep Populate from here.

Just don't :)
There is no good way to do that. Even if you do some map-reduce, it will have terrible performance and problems with sharding if you have it or will ever need it.
Mongo as NoSQL database is really great for storing tree documents. You can store whole tree and then use map-reduce to get some particular leafs from it if you don't have a lot of "find particular leaf" queries. If this doesn't work for you, go with two collections:
Simplified tree structure: {_id: "tree1", tree: {1: [2, {3: [4, {5: 6}, 7]}]}}. Numbers are just IDs of nodes. This way you'll get whole document in one query. Then you just extract all ids and run second query.
Nodes: {_id: 1, data: "something"}, {_id: 2, data: "something else"}.
Then you can write simple recurring function which will replace node ids from first collection with data from second. 2 queries and simple client-side processing.
Small update:
You can extend second collection to be a little more flexible:
{_id: 2, data: "something", children:[3, 7], parents: [1, 12, 13]}
This way you'll be able to start your search from any leaf. And then, use map-reduce to get to the top or to the bottom of this part of tree.

This is a more straight forward approach to caub's answer and great solution. I found it a bit hard to make sense of at first so I put this version together.
Important, you need both 'findOne' and 'find' middleware hooks in place for this solution to work. *
* Also, the model definition must come after the middleware definition *
const mongoose = require('mongoose');
const NodeSchema = new mongoose.Schema({
children: [mongoose.Schema.Types.ObjectId],
name: String
});
const autoPopulateChildren = function (next) {
this.populate('children');
next();
};
NodeSchema
.pre('findOne', autoPopulateChildren)
.pre('find', autoPopulateChildren)
const Node = mongoose.model('Node', NodeSchema)
const root = new Node({ name: '1' })
const main = new Node({ name: '3' })
const foo = new Node({ name: 'foo' })
root.children = [main]
main.children = [foo]
mongoose.connect('mongodb://localhost:27017/try', { useNewUrlParser: true }, async () => {
await Node.remove({});
await foo.save();
await main.save();
await root.save();
const result = await Node.findOne({ name: '1' });
console.log(result.children[0].children[0].name);
});

I tried #fzembow's solution but it seemed to return the object from the deepest populated path. In my case I needed to recursively populate an object, but then return the very same object. I did it like that:
// Schema definition
const NodeSchema = new Schema({
name: { type: String, unique: true, required: true },
parent: { type: Schema.Types.ObjectId, ref: 'Node' },
});
const Node = mongoose.model('Node', NodeSchema);
// method
const Promise = require('bluebird');
const recursivelyPopulatePath = (entry, path) => {
if (entry[path]) {
return Node.findById(entry[path])
.then((foundPath) => {
return recursivelyPopulatePath(foundPath, path)
.then((populatedFoundPath) => {
entry[path] = populatedFoundPath;
return Promise.resolve(entry);
});
});
}
return Promise.resolve(entry);
};
//sample usage
Node.findOne({ name: 'someName' })
.then((category) => {
if (category) {
recursivelyPopulatePath(category, 'parent')
.then((populatedNode) => {
// ^^^^^^^^^^^^^^^^^ here is your object but populated recursively
});
} else {
...
}
})
Beware it's not very efficient. If you need to run such query often or at deep levels, then you should rethink your design

Maybe a lot late for that but mongoose has some documentation on this :
Ancestors Tree Array
Materialized Path Tree Array
I think the first one is more appropriate to you as you are looking to populate parents.
With that solution, you can with one regex query, search all the documents matching your designered output tree.
You would setup documents with this Schema :
Tree: {
name: String,
path: String
}
Paths field would be the absolute path in your tree :
/mens
/mens/shoes
/mens/shoes/boots
/womens
/womens/shoes
/womens/shoes/boots
For example you could search all the childrens of your node '/mens/shoes' with one query :
await Tree.find({ path: /^\/mens/shoes })
It would return all the documents where the path starts with /mens/shoes :
/mens/shoes
/mens/shoes/boots
Then you'd only need some client-side logic to arrange it in a tree structure (a map-reduce)

Related

Mongoose: After finding document, iterate over a value in the document and run a new query on each

I have one schema which contains an array of references to another schema (among other fields):
const RecipeIngredient = new Schema({
ingredientId: { // store id ref so I can populate later
type: Schema.Types.ObjectId,
ref: 'ingredients',
required: true
},
// there are a couple other fields but not relevant here
});
const Recipe = new Schema({
ingredients: [RecipeIngredient]
});
I'm trying to write a route which will first find a recipe by _id, populate the ingredients array (already have this working), and finally iterate over each ingredient in that array.
router.get('/:recipeId/testing', async (req, res) => {
const { recipeId } = req.params
let recipe = await Recipe
.findById(recipeId)
.populate({
path: 'ingredients.ingredientId',
model: 'Ingredient',
select: '_id ......' //I'm selecting other fields too
})
.lean()
.exec();
if (recipe) {
const { ingredients } = recipe;
const newIngredients = [];
await ingredients.forEach(async (ingr) => {
// here I'd like to be able to run a new query
// and append the result to an array outside of the forEach
// I do need information about the ingr in order to run the new query
newIngredients.push(resultOfNewQuery);
});
return res.json(newIngredients)
};
return res.status(404).json({ noRecipeFound: 'No recipe found.'});
})
I've tried approaching this in a few different ways, and the closest I've gotten was executing the new query within each iteration, but because the query is async, I return the response before I've actually collected the documents from the inner query.
I also attempted to use .cursor() in the initial query, but that won't work for me because I do need to access the ingredients field on the recipe once it is resolved before I can iterate and run the new queries.
Any ideas would be appreciated! I'm definitely opening to restructuring this whole route if my approach is not ideal.
I was able to make this work by using a for loop:
const newIngredients = [];
for (let idx = 0; idx < ingredients.length; idx++) {
const { fieldsImInterestedIn } = ingredients[idx];
const matchingIngredients = await Ingredient
.find(fieldsImInterestedIn)
.lean()
.exec()
.catch(err => res.status(404).json({ noIngredientsFound: 'No ingredients found' }));
newIngredients.push(ingredientsToChooseFrom[randomIndex]);
};
return res.json(newIngredients);
still a little perplexed as to why this was able to work while forEach wasn't, but I'll happily move on...

Using updateOne method to update an object field inside array - throws error "Cannot create field 'url' in element"

I have MongoDB database (with Mongoose) containing a collection of Products (among others), which looks like this:
[
{
name: 'Product A',
url: 'product-a',
category: 'accesory',
price: 12,
shortDescription: ['example description'],
technicalSpecs: [{ speed: 10, weight: 20 }],
images: [],
reviews: [],
relatedProducts: [
{
url: 'product-b',
name: 'Product B',
// to be added in Update query
//id: id_of_related_product
}
]
} /* other Product objects */
]
As every MongoDB document is provided with _id property by default, but within the relatedProducts array i only have url and name properties, i want to add the id property (associated with corresponding Product) for each object in the relatedProducts array, so i will be able to conveniently query and process those related products.
I came up with an idea to query all Products to get only those, which have non-empty relatedProducts array. Then i loop them and i search for Product model, which has specific url and name properties - this let's me get it's true (added by MongoDB) _id. At the end i want to add this _id to matching object inside relatedProducts array.
My code:
async function assignIDsToRelatedProducts(/* Model constructor */ Product) {
const productsWithRelatedOnes = await Product.find(
{ relatedProducts: { $ne: [] }}, ['relatedProducts', 'name', 'url']
);
for (const productItem of productsWithRelatedOnes) {
for (const relatedProduct of productItem.relatedProducts) {
const product = await Product.findOne(
{ url: relatedProduct.url, name: relatedProduct.name },
'_id'
);
// throws error
await productItem.updateOne(
{ 'relatedProducts.url': relatedProduct.url },
{ $set: { 'relatedProducts.$.id': product._id } }
);
}
}
}
However it throws the following error:
MongoError: Cannot create field 'url' in element {relatedProducts: [ /* array's objects here */ ]}
I don't know why MongoDB tries to create field 'url', as i use it to project/query url field (not create it) in updateOne method. How to fix this?
And - as i am newbie to MongoDB - is there a simpler way of achieving my goal? I feel that those two nested for..of loops are unnecessary, or even preceding creation of productsWithRelatedOnes variable is.
Is it possible to do with Mongoose Virtuals? I have tried it, but i couldn't match virtual property within the same Product Model - attach it to each object in relatedProducts array - after calling .execPopulate i received either an empty array or undefined (i am aware i should post at-the-time code of using Virtual, but for now i switched to above solution).
Although i didn't find solution or even reason of my problem, i solved it with a slightly other approach:
async function assignIDsToRelatedProducts(Product) {
const productsHavingRelatedProducts = Product.find({ relatedProducts: { $ne: [] }});
for await (const withRelated of productsHavingRelatedProducts) {
for (const relatedProductToUpdate of withRelated.relatedProducts) {
const relatedProduct = await Product
.findOne(
{ url: relatedProductToUpdate.url, name: relatedProductToUpdate.name },
['url', '_id']
);
await Product.updateMany(
{ 'relatedProducts.url': relatedProduct.url },
{ $set: { 'relatedProducts.$.id': relatedProduct._id } }
);
}
}
const amountOfAllProducts = await Product.find({}).countDocuments();
const amountOfRelatedProductsWithID = await Product
.find({ 'relatedProducts.id': { $exists: true } }).countDocuments();
console.log('All done?', amountOfAllProducts === amountOfRelatedProductsWithID);
}
Yet, i still suppose it can be done more concisely, without the initial looping. Hopefully somebody will suggest better solution. :)

mongoose filter by multiple conditions and execute to update data

I am wondering what would be the best approach to make schema functions using mongoose. I have never used this so the way I think is somewhat limited, same goes for looking for docs, without knowing what's available, is not very efficient.
Through docs I found that either using findOneAndUpdate might solve the problem; but there are some constraints.
Here is the code I am planning to run:
models/Bookmark.js
const mongoose = require('mongoose')
const bookmarkItemSchema = new mongoose.Schema({
restaurantId: String,
cachedAttr: {
name: String,
latitude: Number,
longitude: Number,
},
})
const bookmarkListSchema = new mongoose.Schema({
listName: String,
items: [bookmarkItemSchema],
})
const bookmarkSchema = new mongoose.Schema({
userId: {
type: mongoose.Schema.Types.ObjectId,
ref: 'User',
},
lists: [bookmarkListSchema],
})
// const add = (lists, userId) => {
// let bookmark = Bookmark.findOne({userId})
// bookmark.lists.listName === lists.listName //current, new
// ? bookmark.lists.items.push(lists.items)
// : bookmark.lists.push(lists)
// return bookmark
// }
mongoose.model('Bookmark', bookmarkSchema)
Routes/bookmark.js
router.post('/bookmarks', async (req, res) => {
const {lists} = req.body
console.log(lists)
if (!lists) {
return res.status(422).send({error: 'You must provide lists'})
}
let bookmark = Bookmark.findOne({"userId": req.user._id})
if (bookmark.lists.listName === lists.listName){
let item = lists.items
bookmark.lists.items.push(item)
await bookmark.save()
res.send(bookmark)
}
try {
// const bookmark = Bookmark.add(lists, req.user._id, obj)
// await bookmark.save()
// res.send(bookmark)
let bookmark = Bookmark.findOne({"userId": req.user._id})
if (bookmark.lists.listName === lists.listName){ // THIS IS UNDEFINED. How to get this object?
let item = lists.items
bookmark.lists.items.push(item)
await bookmark.save()
res.send(bookmark)
}
} catch (e) {
res.status(422).send({error: e.message})
}
})
The req.body looks like this:
{
"lists": {
"listName": "My Saved List",
"items": {
"restaurantId": "abcdefg",
"cachedAttr": {
"name": "abcdefg",
"latitude": 200,
"longitude": 200
}
}
}
}
Basically what I commented out in the models/Bookmark.js file is what I would really like to do.
If the userId's list name already exists, then I would like to just add an item to the list.
Otherwise, I would like to add a new list to the object.
What is the best approach for doing this? Is there a straight forward mongoose api that I could use for this problem? or do I need to make two separated function that would handle each case and make that as schema methods and handle it in the routes file?

Mongoose: Populate a tree with parent referencing top-down

As I have stated in another question, I am working on a project involving a tree.
The tree uses parent referencing, so every node has the id of its parent
I need to load the tree top-down (from root to children) from the db and replace the parent references by children arrays (because the client needs them)
I've chosen this method, because I estimate 98% of the operation to be create/update on nodes (and this way I only have to create 1 node on update, instead of also updating the parent to add the child to the array) and only about 2% to be read operations (I only have to read the complete tree, there is no use case for reading parts or subtrees)
The Tree models is:
const mongoose = require("mongoose");
const Promise = require("bluebird");
mongoose.Promise = Promise;
const Node = require("./node-model");
const TreeSchema = new mongoose.Schema({
root: { type: Schema.Types.ObjectId, ref: 'Node' },
});
And the Node model:
const mongoose = require("mongoose");
const Promise = require("bluebird");
mongoose.Promise = Promise;
const NodeSchema = new mongoose.Schema({
parent: Schema.Types.ObjectId,
children: [], // to be populated on loading the tree
data: {
d1: String,
//...
}
});
NodeSchema.methods.populateTree = function() {
return this.constructor.find({ parent: this._id }).exec()
.then(function(arrayOfChildren) {
return Promise.each(arrayOfChildren, function(child){
this.children.push(child); // PROBLEM: 'this' is undfined here!
delete child.parent; // delete parent reference because JSON has problems with circular references
return child.populateTree();
});
});
}
Also, there is a tree container:
const TreeContainerSchema = new mongoose.Schema({
owner: { type: Schema.Types.ObjectId, ref: 'User', required: true },
tree: { type: Schema.Types.ObjectId, ref: 'Tree' },
});
I'm trying to load the complete tree (in his container) to send it back to the client as JSON as follows:
getTreeContainerById = function(req, res) {
var promise = TreeContainer.
findById(req.params.id).
populate("owner", "name"). // only include name
populate({
path: "tree",
populate: {
path: "root",
populate: "data"
}
}).exec();
promise.then(function(treeContainer){
return treeContainer.tree.root.populateTree()
.then(function(){ return treeContainer });
}).then(function(treeContainer) {
// I need the tree container here to send it back to the client
res.json(treeContainer);
});
};
But this implementation isn't working. The problems I face are:
In the populateTree schema method, I can't access the current node through "this" (it is undefined) but I need the reference somehow to add the children to the array
If I try child.parent.children.push instead, this also isn't working, because I only have the id of the parent (in child.parent) and not the entity (and I don't think it is the correct approach to load it again from the database)
In an earlier version, I had the problem, that the JSON was send back to the client, before the tree was completely populated, but I think i solved this through the use of the schema method
In general, I don't know, if this is the correct approach to solve my problem (populate the children references and delete the parent references in my tree) or if there is a more appropriate solution
I hope, I could make my problem clear. Any help is much appreciated!
With populateTree as follows it works:
NodeSchema.methods.populateTree = function() {
var node = this;
return this.constructor.find({ parent: this._id }).exec()
.then(function(arrayOfChildren) {
return Promise.each(arrayOfChildren, function(child){
node.children.push(child);
child.parent = null;
return child.populateTree();
});
});
}
Thanks to #danh who suggested the same!

Result of mongoose.save is incorrect when adding item to mixed schema array

Following is a function that shows the issue:
var mongoose = require('mongoose');
var connection = mongoose.createConnection('mongodb://localhost:27017');
connection.once('open', function () {
var schema = new mongoose.Schema({
obj: [{}] //mongoose.Schema.Types.Mixed
});
var Model = connection.model('mtest', schema);
var model = new Model({
obj: [{ name: 'Original' }]
});
model.save(function (err, res) {
console.log('result 1', res);
Model.findOne({_id: res._id}, function (err, res) {
res.obj[0].name = 'Modified';
res.obj.push({ name: 'other' });
//res.markModified('obj'); // using markModified does not help
res.save(function (err, res) {
console.log('result 2', res);
connection.close();
process.exit();
});
});
})
});
The output of "result 2" shows "Modified" for the first item in "obj": obj: [ { name: 'Modified' }, { name: 'other' } ].
However, in the database the value of the first item is still "Original".
This only happens when pushing a second item into the array (otherwise the first item is indeed modified).
Adding markModified does resolve the issue.
I'm using an array of empty objects types in the schema because in reality this use case deals with with schemas that inherit from each other, so no single schema can be used here.
Is it a bug? The only workaround I've found is to clear the array and add all the items again. I'd like to know if there's a better solution.
You could either alter your markModified call to identify the index of the element you changed "outside" of the array access methods:
res.obj[0].name = 'Modified';
res.obj.push({ name: 'other' });
res.markModified('obj.0');
Or switch to using the set array access method to alert name (which looks pretty goofy, but does work):
res.obj[0].name = 'Modified';
res.obj.set(0, res.obj[0]);
res.obj.push({ name: 'other' });

Resources