How do I optimise this callback hierarchy in my simple express app? - node.js

I'm creating a simple rss feed aggregator. For which, I'm using a module called 'blind-parser'. This modules parses an rss/xml string and returns an object.
My aim was to obtain such objects from multiple rss feeds and render them. So I did this, initially :
var obj1 = {},obj2 = {}, obj3 = {};
app.get('/',function(req,res){
parser.parseURL('https://www.toptal.com/blog.rss', function (err, parsed1) {
obj1 = parsed1;
});
parser.parseURL('https://www.toptal.com/designers/blog.rss', function (err, parsed2) {
obj2 = parsed2;
});
parser.parseURL('http://jsfeeds.com/feed', function (err, parsed3) {
obj3 = parsed3;
});
parser.parseURL('http://www.mironov.com/feed/', function (err, parsed4) {
obj4 = parsed4;
});
res.render('index', {'toptal': obj1 , 'toptaldesign' : obj2 , 'jsf': obj3 , 'product': obj4 });
});
However it didn't work as obj1,obj2,obj3 and obj4 turned out to be undefined.
Which I coudn't understand. I tried declaring the variable inside the app.get and outside, both to no effect.
So I did this instead,
app.get('/',function(req,res){
parser.parseURL('https://www.toptal.com/blog.rss', function (err, parsed1) {
parser.parseURL('https://www.toptal.com/designers/blog.rss', function (err, parsed2) {
parser.parseURL('http://jsfeeds.com/feed', function (err, parsed3) {
parser.parseURL('http://www.mironov.com/feed/', function (err, parsed4) {
res.render('index', {'toptal': parsed1 , 'toptaldesign' : parsed2 , 'rs': parsed3 , 'product': parsed4 });
});
});
});
});
});
and voila it worked! However the major downside to this approach is that the time it takes increases 2x, every time I nest another rss-link.
already,the time it takes to load the page with three-tier nesting is ~20seconds.
Can someone please help me out and offer a solution to this, also I'm new to node so please be elaborate.
Thanks.

I'm assuming the parse.parseURL function is actually fetching the specified URL and parsing its contents. Each of your four parseURL requests is asynchronous, meaning that each function is invoked synchronously, but each of their callbacks is executed at some later point in time once the URL has been fetched etc. Meanwhile your res.render call has also been run synchronously, immediately after the preceding parseURL calls but before each of their callbacks have been invoked, and so before obj1 etc have been set.
When you nest the calls within each callback, as in your second example, then you are ensuring that each URL result is fetched before proceeding, but as you are fetching each URL sequentially (e.g. you don't fetch the second URL until the first has been returned) then the execution time will be slower.
You can possibly speed things up by fetching all four URLs in parallel (assuming that your requests are handled concurrently). To do this, it will be simpler if you first use a library like async, or alternatively, if you convert your code to use promises. If you use async, then your code could be changed to fetch the URLs in parallel like so:
async.parallel([
( cb ) => {
parser.parseURL('https://www.toptal.com/blog.rss', cb );
},
( cb ) => {
parser.parseURL('https://www.toptal.com/designers/blog.rss', cb );
},
( cb ) => {
parser.parseURL('http://jsfeeds.com/feed', cb );
},
( cb ) => {
parser.parseURL('http://www.mironov.com/feed/', cb );
}
],
( err, results ) => {
// Called once all requests have completed.
res.render('index', {'toptal': results[0] , 'toptaldesign' : results[1] , 'jsf': results[2] , 'product': results[3] });
});

Related

Node.js DNS Lookup scope error? (POST request)

I'm making a DNS Lookup API using Node.js and Express.js framework such that when it sends a POST request, it should return the addresses of different record types.
app.post('/', (req, res) => {
// Request format
// const l = {
// lookup: 'twitter.com',
// recordTypes: ['A', 'TXT']
// };
// Using destructor to fetch properties
const { lookup, recordTypes } = req.body;
console.log(lookup, recordTypes);
// For each record type
recordTypes.forEach(function(type) {
// setTimeout to get something async
setTimeout(function() {
dns.resolve(lookup.toLowerCase(), type, (err, addresses) => {
console.log(type);
if (err) {
return console.log(`\nType(${type}):\n`, err);
}
result = result + JSON.stringify({ type: `${type}`, response: { addresses } });
console.log(result);
});
}, 2000);
});
res.send(result);
});
It logs the correct stuff in the console but when it comes to the response, it returns an empty string. I used setTimeout to mimic the asynchronous nature of the request but it just does not work.
Please assume that I have declared stuff like result etc. because it is working. Also, please don't to redirect me to the Node.js documentation because I have already read that stuff and that's not the problem here. The problem is that I need to get every record type in an array and send that back as a response.
Here's what I have tried:
Tried to push response for each record type in the result array,
Tried to use a for of loop instead of forEach
Please help!
The way I'm reading your code is that for each item in the array you correctly use callbacks to do each individual bit of processing.
However, remember that forEach itself is not asynchronous. Thus you are setting up a bunch of tasks that will complete sometime, then returning undefined... then your results start to trickle in.
There's a couple ways to correctly. As you are using callbacks here I will use that style. You want to get a callback when all items in an array have been completely processed. The async module does this very well, providing a lot of high quality methods that act on arrays and such and give you a way to have a callback when they are all over.
Your function will look something like:
let res = []
async.each( recordTypes,
( type, done ) => {
dns.resolve(lookup.toLowerCase(), type, (err, addresses) => {
result = result + JSON.stringify({ type: `${type}`, response: { addresses } });
done(err)
} )
},
(allOverError) => {
res.send(result);
}
)
Notice there are two function parameters here: the first one is called for every item in the list, and the last is called when every item in the list has been completely processed.
There are other ways too, promises or the async/await keywords (confusing because of the name of the async module), but callbacks are good.

Redis Node - Get from hash - Not inserting into array

My goal is to insert the values gotten from a redis hash. I am using the redis package for node js.
My code is the following:
getFromHash(ids) {
const resultArray = [];
ids.forEach((id) => {
common.redisMaster.hget('mykey', id, (err, res) => {
resultArray.push(res);
});
});
console.log(resultArray);
},
The array logged at the end of the function is empty and res is not empty. What could i do to fill this array please ?
You need to use some control flow, either the async library or Promises (as described in reds docs)
Put your console.log inside the callback when the results return from the redis call. Then you will see more print out. Use one of the control flow patterns for your .forEach as well, as that is currently synchronous.
If you modify your code to something like this, it will work nicely:
var getFromHash = function getFromHash(ids) {
const resultArray = [];
ids.forEach((id) => {
common.redisMaster.hget('mykey', id, (err, res) => {
resultArray.push(res);
if (resultArray.length === ids.length) {
// All done.
console.log('getFromHash complete: ', resultArray);
}
});
});
};
In your original code you're printing the result array before any of the hget calls have returned.
Another approach will be to create an array of promises and then do a Promise.all on it.
You'll see this kind of behavior a lot with Node, remember it uses asynchronous calls for almost all i/o. When you're coming from a language where most function calls are synchronous you get tripped up by this kind of problem a lot!

How do I achieve a synchronous requirement using asynchronous NodeJS

I am adding user validation an data modification page on a node.js application.
In a synchronous universe, in a single function I would:
Lookup the original record in the database
Lookup the user in LDAP to see if they are the owner or admin
Do the logic and write the record.
In an asynchronous universe that won't work. To solve it I've built a series of hand-off functions:
router.post('/writeRecord', jsonParser, function(req, res) {
post = req.post;
var smdb = new AWS.DynamoDB.DocumentClient();
var params = { ... }
smdb.query(params, function(err,data){
if( err == null ) writeRecordStep2(post,data);
}
});
function writeRecord2( ru, post, data ){
var conn = new LDAP();
conn.search(
'ou=groups,o=amazon.com',
{ ... },
function(err,resp){
if( err == null ){
writeRecordStep3( ru, post, data, ldap1 )
}
}
}
function writeRecord3( ru, post, data ){
var conn = new LDAP();
conn.search(
'ou=groups,o=amazon.com',
{ ... },
function(err,resp){
if( err == null ){
writeRecordStep4( ru, post, data, ldap1, ldap2 )
}
}
}
function writeRecordStep4( ru, post, data, ldap1, ldap2 ){
// Do stuff with collected data
}
Additionally, because the LDAP and Dynamo logic are in their own source documents, these functions are scattered tragically around the code.
This strikes me as inefficient, as well as inelegant. I'm eager to find a more natural asynchronous pattern to achieve the same result.
Any promise library should sort your issue out. My preferred choice is bluebird. In summary they help you in performing blocking operations.
If you haven't heard about bluebird then just use it. It converts all function of a module and return promise which is then-able. Simply put, it promisifies all functions.
Here is the mechanism:
Module1.someFunction() \\do your job and finally pass the return object to next call
.then() \\Use that object which is return from the first call, do your job and return the updated value
.then() \\same goes on
.catch() \\do your job when any error occurs.
Hope you understand. Here is an example:
var readFile = Promise.promisify(require("fs").readFile);
readFile("myfile.js",
"utf8").then(function(contents) {
return eval(contents);
}).then(function(result) {
console.log("The result of evaluating
myfile.js", result);
}).catch(SyntaxError, function(e) {
console.log("File had syntax error", e);
//Catch any other error
}).catch(function(e) {
console.log("Error reading file", e);
});
I could not tell from your pseudo-code exactly which async operations depend upon results from with other ones and knowing that is key to the most efficient way to code a series of asynchronous operations. If two operations do not depend upon one another, they can run in parallel which generally gets to an end result faster. I also can't tell exactly what data needs to be passed on to later parts of the async requests (too much pseudo-code and not enough real code to show us what you're really attempting to do).
So, without that level of detail, I'll show you two ways to approach this. The first runs each operation sequentially. Run the first async operation, when it's done, run the next one and accumulates all the results into an object that is passed along to the next link in the chain. This is general purpose since all async operations have access to all the prior results.
This makes use of promises built into the AWS.DynamboDB interface and makes our own promise for conn.search() (though if I knew more about that interface, it may already have a promise interface).
Here's the sequential version:
// promisify the search method
const util = require('util');
LDAP.prototype.searchAsync = util.promisify(LDAP.prototype.search);
// utility function that does a search and adds the result to the object passed in
// returns a promise that resolves to the object
function ldapSearch(data, key) {
var conn = new LDAP();
return conn.searchAsync('ou=groups,o=amazon.com', { ... }).then(results => {
// put our results onto the passed in object
data[key] = results;
// resolve with the original object (so we can collect data here in a promise chain)
return data;
});
}
router.post('/writeRecord', jsonParser, function(req, res) {
let post = req.post;
let smdb = new AWS.DynamoDB.DocumentClient();
let params = { ... }
// The latest AWS interface gets a promise with the .promise() method
smdb.query(params).promise().then(dbresult => {
return ldapSearch({post, dbresult}, "ldap1");
}).then(result => {
// result.dbresult
// result.ldap1
return ldapSearch(result, "ldap2")
}).then(result => {
// result.dbresult
// result.ldap1
// result.ldap2
// doSomething with all the collected data here
}).catch(err => {
console.log(err);
res.status(500).send("Internal Error");
});
});
And, here's a parallel version that runs all three async operations at once and then waits for all three of the to be done and then has all the results at once:
// if the three async operations you show can be done in parallel
// first promisify things
const util = require('util');
LDAP.prototype.searchAsync = util.promisify(LDAP.prototype.search);
function ldapSearch(params) {
var conn = new LDAP();
return conn.searchAsync('ou=groups,o=amazon.com', { ... });
}
router.post('/writeRecord', jsonParser, function(req, res) {
let post = req.post;
let smdb = new AWS.DynamoDB.DocumentClient();
let params = { ... }
Promise.all([
ldapSearch(...),
ldapSearch(...),
smdb.query(params).promise()
]).then(([ldap1Result, ldap2Result, queryResult]) => {
// process ldap1Result, ldap2Result and queryResult here
}).catch(err => {
console.log(err);
res.status(500).send("Internal Error");
});
});
Keep in mind that due to the pseudo-code nature of the code in your question, this is also pseudo-code where implementation details (exactly what parameters you're searching for, what response you're sending, etc...) have to be filled in. This should be illustrative of promise chaining to serialize operations and the use of Promise.all() for parallelizing operations and promisifying a method that didn't have promises built in.

callback in callback with waterfall in nodejs

I am using MEAN (Mongo Express Angulars NodeJs) for a project. The problem is I have to add one extra attribute to data received from query object. And make new data array with exactly old data array but have one extra attribute. I know how to add attribute and pass them into callback using waterfall model, as I am using multiple callback functions and for loops I am not able to get expected result.
code:
var fetchRevenue = function(restaurantArray, type, startDate, endDate, fn) {
_.forEach(restaurantArray, function(rest) {
fetchDateWiseReport(new Date('07/10/2015'), new Date('07/16/2015'), rest._id, type, function(orders) {
var newOrders = [];
async.waterfall([
function(callback) {
if(orders && orders.length > 0){
async.forEach(orders, function(order) {
getSellingPriceOfItems(order.orders, function(sp) {
order.sp = sp;
newOrders.push(order);
if (newOrders.length === orders.length)
callback(null, newOrders);
});
});
} else {
newOrders.push([]);
}
},
function(newOrders, callback) {
var restArr = []
//get sum of all orders of each restaurant and add into restArr
callback(null, restArr);
},
function(restArr, callback) {
callback(null, newOrders);
}
], function(err, result) {
fn(result);
});
});
});
};
where my functions:
fetchDateWiseReport = fetches restaurant record for given date and send result in callback
getSellingPriceOfItems = query to item model find price for each item and return selling price of given array and send result in callback.
my complete code including all functions is here.
now I want orders should be equal to newOrders with additional attibute 'sp'. But I am unable to get this. Will you suggest me something to proceed?
Use Express way to handle callback problem
in you route
app.get('you/route',fetchDateWiseReport(), second(),finalReturningREsult())
your first function will be doing first async loop function assiggn reult in req.body.firstResult and pass to second function. and so on

Patterns for async list processing in node.js

I have an application where a database query returns a number of rows (typically, less than 100). For each row, I need to make an http call to get supplemental data. I'd like to fire off all of the requests, and then when the last callback completes, move on to rendering the result page.
So far, the answers to similar questions I've looked at have either been chaining the requests by making request #2 in the callback for request #1 (advantages: simple, avoids burying the server in multiple requests), or by firing all of the requests with no tracking of whether all of the requests have completed (works well in the browser where the callback updates the UI).
My current plan is to keep a counter of requests made and have the callback decrement the counter; if it reaches zero, I can call the render function. I may also need to handle the case where responses come in faster than requests are being made (not likely, but a possible edge case).
Are there are other useful patterns for this type of problem?
When using async code could roughly look like this:
var async = require('async');
results = [];
var queue = async.queue(function(row, callback) {
http.fetchResultForRow(row, function(data){
result.push(data);
callback();
});
}, 1);
queue.drain = function() {
console.log("All results loaded");
renderEverything(results);
}
database.fetch(function(rows) {
for (var i=0; i < rows.length; i++) {
queue.push(rows[i]);
}
});
If the order does not matter you also could use: map
Look around in the documenation of async, there are a lot of useful patterns.
You can implement this quite nicely with promises using the when library. Though if you want to rate limit the calls to getting the extra info you will need to do a little more work than in the async approach of TheHippo I think.
Here's an example:
when = require('when')
// This is the function that gets the extra info.
// I've added a setTimeout to show how it is async.
function get_extra_info_for_row(x, callback) {
setTimeout( function(){ return callback(null, x+10); }, 1 );
};
rows = [1,2,3,4,5];
row_promises = rows.map(
function(x) {
var defered = when.defer()
get_extra_info_for_row(x, function(err,extra_info) {
if(err) return defered.reject(err);
defered.resolve([x,extra_info]);
});
return defered.promise;
})
when.all( row_promises )
.then(
function(augmented_rows) { console.log( augmented_rows ); },
function(err) { console.log("Error", err ); }
);
This outputs
[ [ 1, 11 ], [ 2, 12 ], [ 3, 13 ], [ 4, 14 ], [ 5, 15 ] ]

Resources