NodeJs with Mongoose - Nested queries async issue - node.js

I have to mongodb collections, like this:
UserGroup collection
fields:
name: String
group_id: Number
User collection
fields:
user_name: String
group_id: Number
I want generate a report like this:
ADMINISTRATORS
--------------------------
jlopez
rdiaz
OPERATORS
--------------------------
amiralles
dcamponits
But I get the following report:
ADMINISTRATORS
--------------------------
OPERATORS
--------------------------
jlopez
rdiaz
amiralles
dcamponits
Following is the code to generate the report:
UserGroup.find({}, (err, groups) => {
for(var i in groups){
console.log(groups[i].name)
console.log("--------------------")
User.find( {group_id : groups[i].group_id}, (err, users) =>{
for(var j in users){
console.log(users[j].user_name)
}
})
}
})
Clearly, this is a problem of the NodeJs/Mongoose asynchronicity.
QUESTION: How do I make the first For cycle wait until the internal cycle ends for each UserGrop?
Thanks in advance,
David.

You can run an aggregation pipeline that uses $lookup to do a "left-join" to another collection in the same database to filter in documents from the "joined" collection for processing. With this you won't need any async library:
UserGroup.aggregate([
{
"$lookup": {
"from": "users",
"localField": "group_id",
"foreignField": "group_id",
"as": "users"
}
},
{
"$project": {
"_id": 0,
"name": 1,
"users": {
"$map": {
"input": "$users",
"as": "user",
"in": "$$user.user_name"
}
}
}
}
], (err, groups) => {
if (err) throw err;
console.log(JSON.stringify(groups, null, 4));
})
Sample Output
[
{
"name": "ADMINISTRATORS",
"users": ["jlopez", "rdiaz"]
},
{
"name": "OPERATORS",
"users": ["amiralles", "dcamponits"]
}
]

Add support for promises to mongoose. I use q, but you can use bluebird too.
mongoose.Promise = require('q').Promise;
Then you can use q.all to resolve once all of the user queries have completed.
var promises = [];
var groups = [];
UserGroup.find({}, (err, groups) => {
for(var i in groups){
groups.push(groups[i]);
promises.push(User.find( {group_id : groups[i].group_id}));
}
});
q.all(promises).then( function(usersByGroup){
var indx = 0;
usersByGroup.forEach(function(users){
var grp = groups[indx];
console.log(groups[i].name);
console.log("--------------------");
for(var j in users){
console.log(users[j].user_name)
}
indx++;
});
});

This is a good use case for asyc, you can get a get basic idea from following code. it is based on async each & waterfall. [ Please add proper error handling for the following code yourself.]
UserGroup.find({}, (err, groups) => {
async.each(groups, (group, callback) =>{
async.waterfall([
(wCallback) => {
User.find({group_id : group.group_id}, wCallback)
},
(users, wCallback) => {
console.log(group.name)
console.log("--------------------")
for(var j in users){
console.log(users[j].user_name)
}
wCallback()
}
], callback)
})
})

Related

Trouble with async mongoose queries

This is the array that i get from my database, let's call it product_list
{
"seller": "5cee0e69f67e171ac8ef14c7",
"products": [...]
},
{
"seller": "5d1c36910aec8934cefdda8e",
"products": [...]
}
And i want to send the same array but transforming the seller field into the complete seller object. In the given example would be these ones.
{
"_id": "5cee0e69f67e171ac8ef14c7",
"name": "Will"
},
{
"_id": "5d1c36910aec8934cefdda8e",
"name" : "Jess"
}
So my desired output would be
{
"seller": {
"_id": "5cee0e69f67e171ac8ef14c7",
"name": "Will"
},
"products": [...]
},
{
"seller": {
"_id": "5d1c36910aec8934cefdda8e",
"name": "Jess"
},
"products": [...]
}
I tried the following
product_list.forEach(element => {
User.findById(element.seller, function(err, user){
element.seller = user;
};
});
What i'm trying is to take all the seller ids from the product_list object, convert it to the complete object with User.findById, and return the updated product_list. But the problem is that i'm not to good with asynchronous code, and since the mongoose calls are callbacks when i return the object with res.json(product_list), the mongoose query is not finished and i receive the object without the modification.
I tried promises and awaits but im not getting any result.
I hope you understand my explanation, and thank you very much.
You can try this approach, wrap User.findById and you can use the async / await pattern.
// Wrapper function for User.findById. We could use utils.promisify for this.
function getUserById(id) {
return new Promise((resolve, reject) => {
User.findById(id, (err, result) => {
if (err) {
reject(err);
} else {
resolve(result);
}
})
});
}
async function getSalesDetails(productList) {
for(let product of productList) {
let userDetails = await getUserById(product.seller);
product.seller = userDetails;
}
return productList;
}
async function testGetSalesDetails() {
let productList = [{
"seller": "5cee0e69f67e171ac8ef14c7",
"products": [1,2,3]
},{
"seller": "5d1c36910aec8934cefdda8e",
"products": [4,5,6]
}];
let productListWithSeller = await getSalesDetails(productList);
console.info("Product list (with seller): ", productListWithSeller);
}
testGetSalesDetails();
You could also use utils.promisify to generate the getUserById function (it's very handy!), for example:
const getUserById = util.promisify(User.findById);

Is it possible to update value in the collection in the query?

I am not sure whether the title explains what I try to say.
But here assume collection's structure is like;
[
{email: "alex#sda.com"},
{email: "elizabeth#sds.com"},
{email: "hannah#xx.com"},
]
I want to get emails but before the # character.
What have I done to achieve this?
db.collection.find({}).toArray(function(err, result){
var emails = [];
for(var i =0; i< result.length; i++){
emails.push(result[i].split("#")[0]);
}
})
But I don't find this efficient because after the query, I loop through the result and store them in new array.
Is there a better way to get only the alex, elizabeth, hannah parth with a query?
This is just an example. I want to ask your suggestions because I have lots of situations like this. (I am looking for a most efficient way to trim, update some values in the collections)
You can use the distinct method first to get the list of email addresses then use regex to get the names:
db.collection.distinct("email", function(err, result){
var emails = result.map(function(email){
return email.match(/^([^#]*)#/)[1];
});
console.log(emails);
});
With the aggregation framework, MongoDB 3.4 has a $split operator that you can use in conjunction with $arrayElemAt:
db.collection.aggregate([
{
"$group": {
"_id": null,
"emails": {
"$push": {
"$arrayElemAt": [
{ "$split": ["$email", "#"] },
0
]
}
}
}
}
], function(err, result){
if (err) throw err;
console.log(result);
/*
{
"_id" : null,
"emails" : [
"alex",
"elizabeth",
"hannah"
]
}
*/
});

Aggregate out from Multiple Collections

How I can output result of MongoDB aggregation into collection without replacing the collection from another aggregation output?
I need to get data only with $out: 'tempCollection', because I have 500mln documents, and getting pipeline stage limit
var q = [
{$match: query},
{$group: {_id: '$hash'}},
{$out: 'tempCollection'}
];
async.parallel([
function(callback) {
firstCollection.aggregate(q, callback);
},
function(callback) {
secondCollection.aggregate(q, callback);
},
...
], function() {
// I want to get all from tempCollection (with pagination) here
});
The bottom line here is that the $out option only ever "replaces" output on the target collection. So to do anything else you must work through a client connection rather than just outputting to the server.
Your best option here with mongoose is to step straight into the underlying driver and get access to the node stream interface as supported by the driver.
Trival example, but it shows the basic way to structure:
var async = require('async'),
mongoose = require('mongoose'),
Schema = mongoose.Schema;
mongoose.connect('mongodb://localhost/aggtest');
var testSchema = new Schema({},{ "_id": false, strict: false });
var ModelA = mongoose.model( 'ModelA', testSchema ),
ModelB = mongoose.model( 'ModelB', testSchema ),
ModelC = mongoose.model( 'ModelC', testSchema );
function processCursor(cursor,target,callback) {
cursor.on("end",callback);
cursor.on("error",callback);
cursor.on("data",function(data) {
cursor.pause();
target.update(
{ "_id": data._id },
{ "$setOnInsert": { "_id": data._id } },
{ "upsert": true },
function(err) {
if (err) callback(err);
cursor.resume();
}
);
});
}
async.series(
[
// Clean data
function(callback) {
async.each([ModelA,ModelB,ModelC],function(model,callback) {
model.remove({},callback);
},callback);
},
// Sample data
function(callback) {
async.each([ModelA,ModelB],function(model,callback) {
async.each([1,2,3],function(id,callback) {
model.create({ "_id": id },callback);
},callback);
},callback);
},
// Run merge
function(callback) {
async.parallel(
[
function(callback) {
var cursor = ModelA.collection.aggregate(
[
{ "$group": { "_id": "$_id" } }
],
{ "batchSize": 25 }
);
processCursor(cursor,ModelC,callback)
},
function(callback) {
var cursor = ModelB.collection.aggregate(
[
{ "$group": { "_id": "$_id" } }
],
{ "batchSize": 25 }
);
processCursor(cursor,ModelC,callback)
}
],
callback
);
},
// Get merged
function(callback) {
ModelC.find({},function(err,results) {
console.log(results);
callback(err);
});
}
],
function(err) {
if (err) throw err;
mongoose.disconnect();
}
);
Oustide of that, then you are going to need to $out to "separate" collections, and then merge them in with a similar .update() process, but to keep it "server side" then you need to use .eval().
It's not nice, but that is the only way to keep operations on the server. You can also modify this with "Bulk" operations ( again through the same native .collection interface ) for a bit more throughput. But the options come down to "read through the client" or "eval".

How to use populate functionality by using populate or making inner query with aggregation in mongodb

I have following data in my Mongodb.
{
"_id" : ObjectId("54a0d4c5bffabd6a179834eb"),
"is_afternoon_scheduled" : true,
"employee_id" : ObjectId("546f0a06c7555ae310ae925a")
}
I would like to use populate with aggregate, and want to fetch employee complete information in the same response, I need help in this. My code is:
var mongoose = require("mongoose");
var empid = mongoose.Types.ObjectId("54a0d4c5bffabd6a179834eb");
Availability.aggregate()
.match( { employee_id : empid} )
.group({_id : "$employee_id",count: { $sum: 1 }})
.exec(function (err, response) {
if (err) console.log(err);
res.json({"message": "success", "data": response, "status_code": "200"});
}
);
The response i am getting is
{"message":"success","data":{"_id":"54a0d4c5bffabd6a179834eb","count":1},"status_code":"200"}
My expected response is:
{"message":"success","data":[{"_id":"54aa34fb09dc5a54232e44b0","count":1, "employee":{fname:abc,lname:abcl}}],"status_code":"200"}
You can call the model form of .populate() on the result objects from an aggregate operation. But the thing is you are going to need a model to represent the "Result" object returned by your aggregation in order to do so.
There are a couple of steps, best explained with a complete listing:
var async = require('async'),
mongoose = require('mongoose'),
Schema = mongoose.Schema;
var employeeSchema = new Schema({
"fname": String,
"lname": String
})
var availSchema = new Schema({
"is_afternoon_scheduled": Boolean,
"employee_id": {
"type": Schema.Types.ObjectId,
"ref": "Employee"
}
});
var resultSchema = new Schema({
"_id": {
"type": Schema.Types.ObjectId,
"ref": "Employee"
},
"count": Number
});
var Employee = mongoose.model( "Employee", employeeSchema );
var Availability = mongoose.model( "Availability", availSchema );
var Result = mongoose.model( "Result", resultSchema, null );
mongoose.connect('mongodb://localhost/aggtest');
async.series(
[
function(callback) {
async.each([Employee,Availability],function(model,callback) {
model.remove({},function(err,count) {
console.log( count );
callback(err);
});
},callback);
},
function(callback) {
async.waterfall(
[
function(callback) {
var employee = new Employee({
"fname": "abc",
"lname": "xyz"
});
employee.save(function(err,employee) {
console.log(employee),
callback(err,employee);
});
},
function(employee,callback) {
var avail = new Availability({
"is_afternoon_scheduled": true,
"employee_id": employee
});
avail.save(function(err,avail) {
console.log(avail);
callback(err);
});
}
],
callback
);
},
function(callback) {
Availability.aggregate(
[
{ "$group": {
"_id": "$employee_id",
"count": { "$sum": 1 }
}}
],
function(err,results) {
results = results.map(function(result) {
return new Result( result );
});
Employee.populate(results,{ "path": "_id" },function(err,results) {
console.log(results);
callback(err);
});
}
);
}
],
function(err,result) {
if (err) throw err;
mongoose.disconnect();
}
);
That's the complete example, but taking a closer look at what happens inside the aggregate result is the main point:
function(err,results) {
results = results.map(function(result) {
return new Result( result );
});
Employee.populate(results,{ "path": "_id" },function(err,results) {
console.log(results);
callback(err);
});
}
The first thing to be aware of is that the results returned by .aggregate() are not mongoose documents as they would be in a .find() query. This is because aggregation pipelines typically alter the document in results from what the original schema looked like. Since it is just a raw object, each element is re-cast as a mongoose document for the Result model type defined earlier.
Now in order to .populate() with data from Employee, the model form of this method is called on the array of results in document object form along with the "path" argument to the field to be populated.
The end result fills is the data as it comes from the Employee model it was related to.
[ { _id:
{ _id: 54ab2e3328f21063640cf446,
fname: 'abc',
lname: 'xyz',
__v: 0 },
count: 1 } ]
Different to how you process with find, but it is necessary to "re-cast" and manually call in this way due to how the results are returned.
This is working like applied populate with aggregate using inner query.
var mongoose = require("mongoose");
var empid = mongoose.Types.ObjectId("54a0d4c5bffabd6a179834eb");
Availability.aggregate()
.match( { employee_id : empid} )
.group({_id : "$employee_id",count: { $sum: 1 }})
.exec(function (err, response) {
if (err) console.log(err);
if (response.length) {
var x = 0;
for (var i=0; i< response.length; i++) {
empID = response[i]._id;
if (x === response.length -1 ) {
User.find({_id: empID}, function(err, users){
res.json({"message": "success", "data": users, "status_code": "200"});
});
}
x++;
}
}
}
);

Using $in in MongooseJS with nested objects

I've been successfully using $in in my node webservice when my mongo arrays only held ids. Here is sample data.
{
"_id": {
"$oid": "52b1a60ce4b0f819260bc6e5"
},
"title": "Sample",
"team": [
{
"$oid": "52995b263e20c94167000001"
},
{
"$oid": "529bfa36c81735b802000001"
}
],
"tasks": [
{
"task": {
"$oid": "52af197ae4b07526a3ee6017"
},
"status": 0
},
{
"task": {
"$oid": "52af197ae4b07526a3ee6017"
},
"status": 1
}
]
}
Notice that tasks is an array, but the id is nested in "task", while in teams it is on the top level. Here is where my question is.
In my Node route, this is how I typically deal with calling a array of IDs in my project, this works fine in the team example, but obviously not for my task example.
app.get('/api/tasks/project/:id', function (req, res) {
var the_id = req.params.id;
var query = req.params.query;
models.Projects.findById(the_id, null, function (data) {
models.Tasks.findAllByIds({
ids: data._doc.tasks,
query: query
}, function(items) {
console.log(items);
res.send(items);
});
});
});
That communicates with my model which has a method called findAllByIds
module.exports = function (config, mongoose) {
var _TasksSchema = new mongoose.Schema({});
var _Tasks = mongoose.model('tasks', _TasksSchema);
/*****************
* Public API
*****************/
return {
Tasks: _Tasks,
findAllByIds: function(data, callback){
var query = data.query;
_Tasks.find({_id: { $in: data.ids} }, query, function(err, doc){
callback(doc);
});
}
}
}
In this call I have $in: data.ids which works in the simple array like the "teams" example above. Once I nest my object, as with "task" sample, this does not work anymore, and I am not sure how to specify $in to look at data.ids array, but use the "task" value.
I'd like to avoid having to iterate through the data to create an array with only id, and then repopulate the other values once the data is returned, unless that is the only option.
Update
I had a thought of setting up my mongo document like this, although I'd still like to know how to do it the other way, in the event this isn't possible in the future.
"tasks": {
"status0": [
{
"$oid": "52995b263e20c94167000001"
},
{
"$oid": "529bfa36c81735b802000001"
}
],
"status1": [
{
"$oid": "52995b263e20c94167000001"
},
{
"$oid": "529bfa36c81735b802000001"
}
]
}
You can call map on the tasks array to project it into a new array with just the ObjectId values:
models.Tasks.findAllByIds({
ids: data.tasks.map(function(value) { return value.task; }),
query: query
}, function(items) { ...
Have you try the $elemMatch option in find conditions ? http://docs.mongodb.org/manual/reference/operator/query/elemMatch/

Resources