How to synchronize requests? - node.js

I am using nodejs+express+mongoose. Assuming I have 2 schemas and models in place: "Fruits" and "Vegetables".
Assuming I have the following:
var testlist = ["Tomato", "Carrot", "Orange"];
var convertedList = [];
// Assume res is the "response" object in express
I wan to be able to check each item in the array against the "fruits" and "vegetables" collections respectively and insert them into a converted list where Tomato, Carrot, and Broccoli are replaced with their respective documents.
Below I have some pseudocode of what I think it would be, but know not how to do this.
for(var i = 0; i < testlist.length; i++) {
var fruitfind = Fruit.find({"name":testlist[i]});
var vegfind = Vegetables.find({"name":testlist[i]});
// If fruit only
if(fruitfind) {
convertedList.push(fruitfindresults);
}
// If vegetable only
else if(vegfind) {
convertedList.push(vegfindresults);
}
// If identified as a fruit and a vegetable (assume tomato is a doc listed under both fruit and vegetable collections)
else if (fruitfind && vegfind) {
convertedList.push(vegfindresults);
}
}
// Converted List should now contain the appropriate docs found.
res.send(convertedList) // Always appears to return empty array... how to deal with waiting for all the callbacks to finish for the fruitfind and vegfinds?
What is the best way to do this? Or is this even possible?

Assuming there's only one of each fruit/vegetable and that you intended to push a veggie that's found in both collections twice.
var async = require("async"),
testlist = ["Tomato", "Carrot", "Orange"];
async.map(testlist, function (plant, next) {
async.parallel([function (done) {
Fruit.findOne({"name": plant}, done);
},
function (done) {
Vegetables.findOne({"name": plant}, done);
}], function (err, plants) { // Edited: before it was (err, fruit, veggie) which is wrong
next(err, plants);
});
},
function (err, result) {
var convertedList = [].concat(result);
res.send(convertedList);
});
Note: haven't actually tested the code, but it should work. The async module is excellent for managing callbacks like this btw.
Update
To get each fruit only once, the async.parallel callback simply have to be rewritten like this:
function (err, plants) {
next(err, plants[0] || plants[1]);
}
And there's no concat needed anymore in the .map callback:
function (err, result) {
res.send(result);
}

find is an asynchronous function, and it makes a request to the mongo database. This means two things:
The functions will not return results immediately. The find function in mongoose follows a very common async pattern. It accepts a "callback" function which it will call with either the results, or an error. By node convention, if the first argument is not null, it is an error.
// So typically you'd call find like this
SomeModel.find({your: 'conditions'}, function (err, results) {
if (err) {
// some error has occurred which you must handle
} else {
res.send(results);
}
})
// note that if code existed on the lines following the find, it would
// be executed *before* the find completed.
As every query is firing another request off to the database, you typically want to limit the number if you can. In this case, instead of finding each fruit/veg by name, you could look for all the names at once by using mongo's $in.
With these two things in mind, your code might look something like this:
// here *first* we're finding fruits
Fruit.find({name: {$in: testlist}}, function (err, fruits) {
// when the fruit request calls back with results, we find vegetables
Vegetable.find({name: {$in: testlist}}, function (err, vegetables) {
// finally concat and send the results
res.send(fruits.concat(vegetables));
});
});
To have both requests happen in parallel, a little more work is required. You could use a library like async, or write something yourself like:
var fruits
, vegetables
, done = function () {
if (fruits && vegetables) {
res.send(fruits.concat(vegetables));
}
}
Fruit.find({name: {$in: testlist}}, function (err, docs) {
fruits = docs;
done();
});
Vegetable.find({name: {$in: testlist}}, function (err, docs) {
vegetables = docs;
done();
});
Note that both examples here simply concat the results and send them, as it's not clear how you want the results processed. This means that if a tomato, for example, was in both lists, it would appear in the results twice, both the Vegetable and Fruit documents.
You'll also need to handle any errors coming back from mongoose.
Edit: Uniquely named docs
In light of your comment, this is one way you might return only one doc for Tomato (or other records that are both fruit and vegetable)
// after retrieving fruits and vegetables, create a map which will
// serve to weed out docs with duplicate names
var map = {};
fruits.forEach(function (fruit) {
map[fruit.name] = fruit;
});
vegetables.forEach(function (vegetable) {
map[vegetable.name] = vegetable;
});
var results = [];
// this would sort by name
Object.keys(map).sort().forEach(function (key, i) {
results[i] = map[key];
});
res.send(results);
Note that this sort of thing becomes much more complicated if you need to sort and paginate or otherwise limit the result of the two queries, and if you need that you might rather consider keeping the documents in the same collection.

Related

Node.js DNS Lookup scope error? (POST request)

I'm making a DNS Lookup API using Node.js and Express.js framework such that when it sends a POST request, it should return the addresses of different record types.
app.post('/', (req, res) => {
// Request format
// const l = {
// lookup: 'twitter.com',
// recordTypes: ['A', 'TXT']
// };
// Using destructor to fetch properties
const { lookup, recordTypes } = req.body;
console.log(lookup, recordTypes);
// For each record type
recordTypes.forEach(function(type) {
// setTimeout to get something async
setTimeout(function() {
dns.resolve(lookup.toLowerCase(), type, (err, addresses) => {
console.log(type);
if (err) {
return console.log(`\nType(${type}):\n`, err);
}
result = result + JSON.stringify({ type: `${type}`, response: { addresses } });
console.log(result);
});
}, 2000);
});
res.send(result);
});
It logs the correct stuff in the console but when it comes to the response, it returns an empty string. I used setTimeout to mimic the asynchronous nature of the request but it just does not work.
Please assume that I have declared stuff like result etc. because it is working. Also, please don't to redirect me to the Node.js documentation because I have already read that stuff and that's not the problem here. The problem is that I need to get every record type in an array and send that back as a response.
Here's what I have tried:
Tried to push response for each record type in the result array,
Tried to use a for of loop instead of forEach
Please help!
The way I'm reading your code is that for each item in the array you correctly use callbacks to do each individual bit of processing.
However, remember that forEach itself is not asynchronous. Thus you are setting up a bunch of tasks that will complete sometime, then returning undefined... then your results start to trickle in.
There's a couple ways to correctly. As you are using callbacks here I will use that style. You want to get a callback when all items in an array have been completely processed. The async module does this very well, providing a lot of high quality methods that act on arrays and such and give you a way to have a callback when they are all over.
Your function will look something like:
let res = []
async.each( recordTypes,
( type, done ) => {
dns.resolve(lookup.toLowerCase(), type, (err, addresses) => {
result = result + JSON.stringify({ type: `${type}`, response: { addresses } });
done(err)
} )
},
(allOverError) => {
res.send(result);
}
)
Notice there are two function parameters here: the first one is called for every item in the list, and the last is called when every item in the list has been completely processed.
There are other ways too, promises or the async/await keywords (confusing because of the name of the async module), but callbacks are good.

Node.js: async.map getting slower

Hello,
I use Node.js to provide an API for storing data on a MongoDB database.
I ran multiple tests on a read method, which takes ids and returns the corresponding documents. The point is that I must return these documents in the specified order. To ensure that, I use the following code:
// Sequentially fetch every element
function read(ids, callback) {
var i = 0;
var results = [];
function next() {
db.findOne(ids[i], function (err, doc) {
results.push(err ? null : doc);
if (ids.length > ++i) {
return next();
}
callback(results);
});
}
next();
}
This way, documents are fetched one-by-one, in the right order. It takes about 11s on my laptop to retrieve 27k documents.
However, I thought that it was possible to improve this method:
// Asynchronously map the whole array
var async = require('async');
function read(ids, callback) {
async.map(ids, db.findOne.bind(db), callback):
}
After running a single test, I was quite satisfied seeing that the 27k documents were retrieved in only 8s using simpler code.
The problem happens when I repeat the same request: the response time keeps growing (proportionally to the number of elements retrieved): 9s 10s 11s 12s.... This problem does not happen in the sequential version.
I tried two versions of Node.js, v6.2.0 and v0.10.29. The problem is the same. What causes this latency and how could I suppress it?
Try to use async.mapLimit to prevent overload. You need some tests to tune limit value with your environment.
But find({_id: {$in: list}}) is always better, because single database request instead of multiple.
I suggest you to try to perform restore of original order client-side.
Something like this:
function read(ids, cb) {
db.find(
{_id: {$in: ids.map(id => mongoose.Types.ObjectId(id))}},
process
);
function process(err, docs) {
if (err) return cb(err);
return cb(null, docs.sort(ordering))
}
function ordering(a, b) {
return ids.indexOf(b._id.toString()) - ids.indexOf(a._id.toString());
}
}
May be, find query needs to be corrected, I can't to know what exact mongodb driver you use.
This code is first-try, more manual sorting can improve performance alot. [].indexOf is heavy too(O(n)).
But I'm almost sure, even as-is now, it will work much faster.
Possible ordering replacement:
var idHash = {};
for(var i = 0; i < ids.length; i++)
idHash[ids[i]] = i;
function ordering(a, b) {
return idHash[b._id.toString()] - idHash[a._id.toString()];
}
Any sort algorithm has O(nlogn) in best case, but we already know result position of each found document, so, we can restore original order by O(n):
var idHash = ids.reduce((c, id, i) => (c[id] = i, c), {});
function process(err, docs) {
if (err) return cb(err);
return cb(null,
docs.reduce(
(c, doc) => (c[idHash[doc._id.toString()]] = doc, c),
ids.map(id => null))) //fill not_found docs by null
}
Functional style makes code flexier. For example this code can be easy modified to use async.reduce to be less sync-blocking.

callback in callback with waterfall in nodejs

I am using MEAN (Mongo Express Angulars NodeJs) for a project. The problem is I have to add one extra attribute to data received from query object. And make new data array with exactly old data array but have one extra attribute. I know how to add attribute and pass them into callback using waterfall model, as I am using multiple callback functions and for loops I am not able to get expected result.
code:
var fetchRevenue = function(restaurantArray, type, startDate, endDate, fn) {
_.forEach(restaurantArray, function(rest) {
fetchDateWiseReport(new Date('07/10/2015'), new Date('07/16/2015'), rest._id, type, function(orders) {
var newOrders = [];
async.waterfall([
function(callback) {
if(orders && orders.length > 0){
async.forEach(orders, function(order) {
getSellingPriceOfItems(order.orders, function(sp) {
order.sp = sp;
newOrders.push(order);
if (newOrders.length === orders.length)
callback(null, newOrders);
});
});
} else {
newOrders.push([]);
}
},
function(newOrders, callback) {
var restArr = []
//get sum of all orders of each restaurant and add into restArr
callback(null, restArr);
},
function(restArr, callback) {
callback(null, newOrders);
}
], function(err, result) {
fn(result);
});
});
});
};
where my functions:
fetchDateWiseReport = fetches restaurant record for given date and send result in callback
getSellingPriceOfItems = query to item model find price for each item and return selling price of given array and send result in callback.
my complete code including all functions is here.
now I want orders should be equal to newOrders with additional attibute 'sp'. But I am unable to get this. Will you suggest me something to proceed?
Use Express way to handle callback problem
in you route
app.get('you/route',fetchDateWiseReport(), second(),finalReturningREsult())
your first function will be doing first async loop function assiggn reult in req.body.firstResult and pass to second function. and so on

node.js, express - executing mysql queries one after another within loops in a synchronous way

In my node.js, express app, I am making an ajax call with the superagent middleware. The call fetches database data in a complex array using the node-mysql middleware through quite a few database queries.
Before pasting the code, I am trying to explain in words what I am trying to do although the code would suffice to say what it wants to do with the addition that all the asynchronous things inside the first callback should be done in the synchronous way.
Explanation:
Inside the callback of the first query , a for loop is executed to run the second query multiple times and after each loop, the next loop is to be called only after the callback of the second query is complete. Things are same for the next code lines as well.
Code:
You can however skip the innards( marked in comments) of the for loops to make things brief and easy if you want.
conn.query("SELECT * FROM `super_cats`",function(error, results, fields) {
if(error){console.log("erro while fetching products for homepage "+ error);}
for(var i in results) { // FIRST FOR LOOP INSIDE THE FIRST QUERY CALLBACK
/*Innards of for loop starts*/
var elem = new Object();
var supcat_id=results[i].id;
elem.super_id =supcat_id;
elem.cats=new Array();
var cat= '';
/*Innards of for loop ends*/
conn.query("SELECT * FROM `categories` WHERE `supcat_id`="+supcat_id,function(error_cats, results_cats, fields_cats) {
if (error_cats) {console.log("erro while fetching cats for menu " + error_cats);}
for(var j in results_cats) {
/*Innards of for loop starts*/
cat= new Object();
var cat_id=results_cats[j].id;
cat.cat_id=cat_id;
cat.cat_name=results_cats[j].cat_name;
cat.subcats=new Array();
/*Innards of for loop starts*/
conn.query("SELECT * FROM `subcategories` WHERE `category`="+cat_id,function(error_subcats, results_subcats, fields_subcats) {
if (error_subcats) {console.log("erro while fetching subcats for menu " + error_subcats);}
for(var k in results_subcats ){
/*Innards of for loop starts*/
var subcat=new Object();
var subcat_id=results_subcats[k].id;
subcat.subcat_id=subcat_id;
subcat.subcat_name=results_subcats[k].subcategory;
cat.subcats.push(subcat);
elem.cats.push(cat);
/*Innards of for loop starts*/
}// end of for loop for results_subcats
});
}// end of for loop for result_cats
});
super_cats.push(elem);
}// end of for supercat results
res.send(super_cats)
});
I tried with the async middleware but in vain as I just could not figure out which function to use in this case .
To be brief, requirements are :
1) All the asynchronous things inside the first callback should be done in the synchronous way.
2) the response should be sent to the ajax call only after all the calculations are done and not before that (as it would probably happen if things were asynchronous as they are in the existing code, wouldn't it ?)
It may be just semantics, but it's important to understand that you cannot run this in a synchronous way. You have to run it asynchronously, and manage the order of the processing to get the desired effect. I find it useful to think about these kinds of problems more in terms of how I want to transform the data (à la functional programming) rather than the imperative code I would write in a more synchronous environment.
From what I can tell by the code, you want to end up with a data structure in super_cats that looks something like this:
[
{
super_id: 1,
cats: [
{
cat_id: 2,
cat_name: "Category",
subcats: [
{
subcat_id: 3,
subcat_name: "Subcategory"
},
...
]
},
...
]
},
...
]
Let's start by extracting this into a single function call with a single callback.
function getCategoryTree(callback) {
}
Now, then, let's take it from the top. You want to run a single asynchronous function (an SQL query), and you want to produce an array with one entry per result. That sounds like a map operation to me. However, since we want one of the values (cats) to be determined asynchronously, we need to use an asynchronous map, which the async library provides.
Let's just fill in the async.map signature for now; we want to map over our results (this is the functional equivalent of our for loop), and for each one we want to turn the result into something—the asynchronous function that does the something is called the iterator. Finally, once we have all our transformed array elements, we want to call the callback given to our function.
function getCategoryTree(callback) {
conn.query("SELECT * FROM `super_cats`", function(error, results, fields) {
async.map(results, iterator, callback);
});
}
Let's create a new function for getting the top-level category information, and use its name in place of our iterator placeholder.
function getCategoryTree(callback) {
conn.query("SELECT * FROM `super_cats`", function(error, results, fields) {
async.map(results, getSuperCategory, callback);
});
}
function getSuperCategory(resultRow, callback) {
}
Now we need to decide what we want to give back for each resultRow. Based on our diagram above, we want an object with super_id equal to the row's ID, and cats equal to all the categories in the top-level category. However, since cats is also determined asynchronously, we need to run the next query and transform those results before we can move on.
Similar to last time, we want each item in our cats array to be an object with some information from the query's result, but we also want a subcats array, which is again determined asynchronously, so we'll use async.map again. This time, however, we'll use an anonymous function for the callback, since we want to do something with the results before we give them to the higher-level callback.
function getSuperCategory(resultItem, callback) {
var supcat_id = resultItem.id;
conn.query("SELECT * FROM `categories` WHERE supcat_id` = " + supcat_id, function(error, results, fields) {
async.map(results, getCategory, function(err, categories) {
callback(err, { super_id: supcat_id, cats: categories });
});
});
}
As you can see, once this async.map is done, it means we have all the categories under this this super-category; thus, we can call our callback with the object we want to be in the array.
Now that that's done, we just need to implement getCategory. It will look very similar to getSuperCategory, because we want to do basically the same thing—for each result, return an object that has some data from the query, but also an asynchronous component.
function getCategory(resultItem, callback) {
var cat_id = resultItem.id;
var cat_name = resultItem.cat_name;
conn.query("SELECT * FROM `subcategories` WHERE `category` = " + cat_id, function(error, results, fields) {
async.map(results, getSubCategory, function(err, subcategories) {
callback(err, { cat_id: cat_id, cat_name: cat_name, subcats: subcategories });
});
});
}
Now, we just need to implement getSubCategory.
function getSubCategory(resultItem, callback) {
callback(null, {
subcat_id: resultItem.id,
subcat_name: resultItem.subcategory
});
}
Oops! The data we need from getSubCategory doesn't have an asynchronous component! It turns out we didn't need that last async.map at all; we could have used a regular array map; let's change getCategory and getSubCategory to work that way.
function getCategory(resultItem, callback) {
var cat_id = resultItem.id;
var cat_name = resultItem.cat_name;
conn.query("SELECT * FROM `subcategories` WHERE `category` = " + cat_id, function(error, results, fields) {
var subcategories = results.map(getSubCategory);
callback(error, { cat_id: cat_id, cat_name: cat_name, subcats: subcategories });
});
}
function getSubCategory(resultItem) {
return {
subcat_id: resultItem.id,
subcat_name: resultItem.subcategory
};
}
It's worth noting that our original method worked fine; if there's a chance getSubCategory ever has an async component, you could just leave it as it was.
And that's it! Here's the code that I wrote as I was writing this answer; note that I had to fake out the SQL a bit, but I think the idea is there:
var async = require("async");
// fake out sql queries
queryNum = 0;
var conn = {
query: function(query, callback) {
queryNum++;
var results = [1, 2, 3, 4, 5].map(function(elem) {
return {
id: queryNum + "-" + elem,
cat_name: "catname-" + queryNum + "-" + elem,
subcategory: "subcategory-" + queryNum + "-" + elem
};
});
callback(null, results, null);
}
};
function getCategoryTree(callback) {
conn.query("SELECT * FROM `super_cats`", function(error, results, fields) {
async.map(results, getSuperCategory, callback);
});
}
function getSuperCategory(resultItem, callback) {
var supcat_id = resultItem.id;
conn.query("SELECT * FROM `categories` WHERE supcat_id` = " + supcat_id, function(error, results, fields) {
async.map(results, getCategory, function(err, categories) {
callback(err, { super_id: supcat_id, cats: categories });
});
});
}
function getCategory(resultItem, callback) {
var cat_id = resultItem.id;
var cat_name = resultItem.cat_name;
conn.query("SELECT * FROM `subcategories` WHERE `category` = " + cat_id, function(error, results, fields) {
var subcategories = results.map(getSubCategory);
callback(error, { cat_id: cat_id, cat_name: cat_name, subcats: subcategories });
});
}
function getSubCategory(resultItem) {
return {
subcat_id: resultItem.id,
subcat_name: resultItem.subcategory
};
}
getCategoryTree(function(err, result) {
console.log(JSON.stringify(result, null, " "));
});
There are some inefficiencies here, but for simplicity's sake I've glossed over them. For example, rather than running the second sub-query over and over, you could query at once for all the category IDs, then query all the categories at once, etc. Then, once you have all the data, you could loop over each array synchronously to pull out the pieces you need.
In addition, there are better ways to store tree structures in relational databases; in particular, take a look at Modified Preorder Tree Traversal.

Node.js promises with mongoskin

I'm trying to avoid using callbacks when making mongodb queries. I'm using mongoskin to make calls like so:
req.db.collection('users').find().toArray(function (err, doc) {
res.json(doc);
});
In many cases I need to make multiple queries so I want to use Node.js promise library but I'm not sure how to wrap these functions as promises. Most of the examples I see are trivial for things like readFile, I'm guessing in this case I would need to wrap toArray somehow? Can this be done or would have to be something implemented by mongoskin?
An example could be any set of callbacks, find/insert, find/find/insert, find/update:
req.db.collection('users').find().toArray(function (err, doc) {
if (doc) {
req.db.collection('users').find().toArray(function (err, doc) {
// etc...
});
}
else {
// err
}
});
You can promisify the entire module like so with bluebird:
var Promise = require("bluebird");
var mongoskin = require("mongoskin");
Object.keys(mongoskin).forEach(function(key) {
var value = mongoskin[key];
if (typeof value === "function") {
Promise.promisifyAll(value);
Promise.promisifyAll(value.prototype);
}
});
Promise.promisifyAll(mongoskin);
This only needs to be done in one place for one time in your application, not anywhere in your application code.
After that you just use methods normally except with the Async suffix and don't pass callbacks:
req.db.collection('users').find().toArrayAsync()
.then(function(doc) {
if (doc) {
return req.db.collection('users').find().toArrayAsync();
}
})
.then(function(doc) {
if (doc) {
return req.db.collection('users').find().toArrayAsync();
}
})
.then(function(doc) {
if (doc) {
return req.db.collection('users').find().toArrayAsync();
}
});
So again, if you call a function like
foo(a, b, c, function(err, result) {
if (err) return console.log(err);
//Code
});
The promise-returning version is called like:
fooAsync(a, b, c).then(...)
(Uncaught errors are automatically logged so you don't need to check for them if you are only going to log it)
Just stumbled here with the same question and didn't love "promisfying" mongoskin so did a bit more digging and found monk. It's built on top of mongoskin, tidies up the API and returns
promises for all async calls. Probably worth a peek to anyone else who lands here.
Esailija's answer may work, but its not super efficient since you have to run db.collection on every single db call. I don't know exactly how expensive that is, but looking at the code in mongoskin, its non-trivial. Not only that, but it's globally modifying prototypes, which isn't very safe.
The way I do this with fibers futures is:
wrap the collection methods for each collection
on receiving the result, for methods that return a Cursor wrap the toArray method, call it and return the resulting future (for methods that don't return a cursor, you don't need to do anything else).
use the future as normal
like this:
var Future = require("fibers/future")
// note: when i originally wrote this answer fibers/futures didn't have a good/intuitive wrapping function; but as of 2014-08-18, it does have one
function futureWrap() {
// function
if(arguments.length === 1) {
var fn = arguments[0]
var object = undefined
// object, methodName
} else {
var object = arguments[0]
var fn = object[arguments[1]]
}
return function() {
var args = Array.prototype.slice.call(arguments)
var future = new Future
args.push(future.resolver())
var me = this
if(object) me = object
fn.apply(me, args)
return future
}
}
var methodsYouWantToHave = ['findOne', 'find', 'update', 'insert', 'remove', 'findAndModify']
var methods = {}
methodsYouWantToHave.forEach(function(method) {
internalMethods[method] = futureWrap(this.collection, method)
}.bind(this))
// use them
var document = methods.findOne({_id: 'a3jf938fj98j'}, {}).wait()
var documents = futureWrap(methods.find({x: 'whatever'}, {}).wait(), 'toArray')().wait()
If you don't want to use fibers, I'd recommend using the async-future module, which has a good wrap function built in too.

Resources