NodeJS parallel running function to fetch data from an REST API - node.js

I have been working with a Node.js code where I make a call to an API. I get a paginated response of 50 objects at a time. In a query parameter I set an offset as 0, 50, 100 to keep fetching the more data. if the response has 50 objects I increase my offset query parameter. And once the data is less that 50 I stop the call. Is there any way I can split the calling to get data in a faster way? Suppose I call api?offset=0, api?offset=50, api?offset=100, api?offset=150, in parallel so that I get data in a faster way, and collect the data of all the calls made. Also if there is no data stop calling for the next offset values.
NOTE: I don't know the offset limit.

Use recursive function
Since you don't know what the offset limit would be your best guess is to great groups of requests, so you will reach your offset faster.
For async tasks I love to use async here. Here is an example :
const async = require('async')
const offsets = [0, 50, 100, 150];
async.map(offsets, function (offset, callback) {
// Do request here with 'offset'
// And return response in the callback
// Do not return an error, otherwise the main callback (of map function)
// will be called immediately
return callback (null, response);
}, function (error, results) {
// results is an array containing all responses from all requests
// iterate over results, if all contain a valid result,
// you can increase your offset group and perform this operation again
});
Of course you will need to wrap this into another function which allows for an increasing offsetgroup , repeating the requests as long as you get valid results -> a so called recursive function :
const async = require('async')
function performGroupRequest (offsets, allResults, end) {
async.map(offsets, function (offset, callback) {
// Do request here with 'offset' and return response in the callback
return callback (null, response);
}, function (error, results) {
// check results
results.map(r => {
if (hasItems(r)) {
// valid result, add to allResults
allResults.push(r);
} else {
// invalid result, let's end here
return end(allResults);
}
});
// all results are valid here, so call the function again, with increased offset
performGroupRequest(offsets.map(o => o + 200), allResults, end)
});
}
function getAllItems () {
return Promise((resolve, reject) => {
performGroupRequest([0, 50, 100, 150], [], function(all) {
return resolve(all);
});
});
}
// start the journey
getAllItems().then((allResults) => {
});
👆 Please note : I totally omitted error handling here for simplicity reasons.

Related

Multiple Callback Functions with the Node Twitter Library

I'm trying to make requests to Twitter with Twitter for Node and store responses in an array for handling later. I'm pushing returned tweets to an array with each push() happening in a callback, which seems to be working fine. My problem is that I'm having trouble accessing the full array with all pushed tweets.
The cause, of course, is that any attempt to work with that array is getting called before the results from the Twitter API have arrived, so I'm getting an empty array.
How can I (and should I) get my function working with the full array into another callback? I'm asking this from the perspective of someone still trying to get a firm grasp on asynchronous programming - especially multiple callbacks or functions that have to run asynchronously.
Again, current result is tweetHold = [], and I'd like tweetHold to contain all matched tweets for all users in the searchArray.
let searchArray = {
users: ['ByBuddha', 'thetweetofgod']
}
let tweetHold = [];
let T = new Twitter(config);
for (user of searchArray.users) {
let params = {
q: 'from:'+ user,
count: 1,
tweet_mode: 'extended',
result_type: 'recent',
lang: 'en'
}
T.get('search/tweets', params, returnedTweets);
}
function returnedTweets(err, tweets, response) {
tweetHold.push(tweets);
}
// obviously, doesn't work as the array is logged to console before T.get() is done
console.log(tweetHold);
T.get accepts a callback function that gets called once the asynchronous operation is done. But since you want to get multiple responses, and not just one, using just the callbacks by themselves would be a bit messy. For example, inside returnedTweets, you could increment a persistent counter variable, and call the next function once counter === searchArray.users.length, but it would be more elegant to use Promises instead.
Map each T.get call to a Promise that resolves with the tweets variable you're interested in, and then call Promise.all on an array of those Promises. Promise.all takes an array of Promises and returns a Promise that resolves once every Promise in the passed array has resolved.
Note that it looks like you're currently ignoring the err that might come back from T.get - that's probably not a good idea, it would be better to be able to check when errors occur, and then handle the error somehow (otherwise, the tweetHold array may sometimes contain broken data). Luckily, if you use Promises, implementing this is easy - just reject if there's an err, and catch after the Promise.all:
const T = new Twitter(config);
const searchObject = {
users: ['ByBuddha', 'thetweetofgod']
};
const searchPromises = searchArray.users.map((user) => {
return new Promise((resolve, reject) => {
const params = {
q: 'from:'+ user,
count: 1,
tweet_mode: 'extended',
result_type: 'recent',
lang: 'en'
};
T.get('search/tweets', params, (err, tweets) => {
if (err) reject(err);
else resolve(tweets);
});
});
});
Promise.all(searchPromises)
.then((tweetHold) => {
// tweetHold will be an array containing the `tweets` variable for each user searched
console.log(tweetHold);
})
.catch((err) => {
// there was an error, best to handle it somehow
// the `.then` above will not be entered
});

Node.js DNS Lookup scope error? (POST request)

I'm making a DNS Lookup API using Node.js and Express.js framework such that when it sends a POST request, it should return the addresses of different record types.
app.post('/', (req, res) => {
// Request format
// const l = {
// lookup: 'twitter.com',
// recordTypes: ['A', 'TXT']
// };
// Using destructor to fetch properties
const { lookup, recordTypes } = req.body;
console.log(lookup, recordTypes);
// For each record type
recordTypes.forEach(function(type) {
// setTimeout to get something async
setTimeout(function() {
dns.resolve(lookup.toLowerCase(), type, (err, addresses) => {
console.log(type);
if (err) {
return console.log(`\nType(${type}):\n`, err);
}
result = result + JSON.stringify({ type: `${type}`, response: { addresses } });
console.log(result);
});
}, 2000);
});
res.send(result);
});
It logs the correct stuff in the console but when it comes to the response, it returns an empty string. I used setTimeout to mimic the asynchronous nature of the request but it just does not work.
Please assume that I have declared stuff like result etc. because it is working. Also, please don't to redirect me to the Node.js documentation because I have already read that stuff and that's not the problem here. The problem is that I need to get every record type in an array and send that back as a response.
Here's what I have tried:
Tried to push response for each record type in the result array,
Tried to use a for of loop instead of forEach
Please help!
The way I'm reading your code is that for each item in the array you correctly use callbacks to do each individual bit of processing.
However, remember that forEach itself is not asynchronous. Thus you are setting up a bunch of tasks that will complete sometime, then returning undefined... then your results start to trickle in.
There's a couple ways to correctly. As you are using callbacks here I will use that style. You want to get a callback when all items in an array have been completely processed. The async module does this very well, providing a lot of high quality methods that act on arrays and such and give you a way to have a callback when they are all over.
Your function will look something like:
let res = []
async.each( recordTypes,
( type, done ) => {
dns.resolve(lookup.toLowerCase(), type, (err, addresses) => {
result = result + JSON.stringify({ type: `${type}`, response: { addresses } });
done(err)
} )
},
(allOverError) => {
res.send(result);
}
)
Notice there are two function parameters here: the first one is called for every item in the list, and the last is called when every item in the list has been completely processed.
There are other ways too, promises or the async/await keywords (confusing because of the name of the async module), but callbacks are good.

Avoid callback multi-invocation when forEach is used

I have a function that processes an array of data (first parameter) and, once the procesing is finished, it invokes only one time a callback function (second parameter). I'm using forEach to process data item by item, consisting the processing of each item in some checkings and storing the param in database. The function storeInDB() does the storing work and uses a callback (second parameter) when the item has been stored.
A first approach to the code is the following:
function doWork(data, callback) {
data.forEach(function (item) {
// Do some check on item
...
storeInDB(item, function(err) {
// check error etc.
...
callback();
});
});
}
However, it's wrong, as the the callback function will be invoked several times (as many as element in the data array).
I'd like to know how to refactor my code in order to achieve the desired behaviour, i.e. only one invocation to callback once the storing work is finished. I guess that async could help in this task, but I haven't find the right pattern yet to combine async + forEach.
Any help is appreciated!
You can use a library such as async to do this, although I would recommend using promises if possible. For your immediate problem you can use a counter to determine how many storage calls have completed and call the callback when the total number are completed.
let counter = 0;
data.forEach(function (item) {
// Do some check on item
...
storeInDB(item, function(err) {
// check error etc.
counter++
if (counter == data.length) {
callback();
}
});
});
you can also utilize the three parameters passed to the function to execute on each array method
function doWork(data, callback) {
data.forEach(function (value,idx,arr) {
// Do some check on item
...
storeInDB(arr[idx], function(err) {
// check error etc.
...
if ( (idx + 1) === arr.length ) {
callback();
}
});
});
}
If storeInDB function returns a promise, you can push all async functions to an array and use Promise.all. After all tasks run successfully, It will invokes callback function.
Hope this helps you.
function doWork(data, callback) {
let arr = [];
data.map(function(itm) {
// Do some check on item
...
arr.push(storeInDB(item));
});
Promise.all(arr)
.then(function(res) {
callback();
});
}

How do I achieve a synchronous requirement using asynchronous NodeJS

I am adding user validation an data modification page on a node.js application.
In a synchronous universe, in a single function I would:
Lookup the original record in the database
Lookup the user in LDAP to see if they are the owner or admin
Do the logic and write the record.
In an asynchronous universe that won't work. To solve it I've built a series of hand-off functions:
router.post('/writeRecord', jsonParser, function(req, res) {
post = req.post;
var smdb = new AWS.DynamoDB.DocumentClient();
var params = { ... }
smdb.query(params, function(err,data){
if( err == null ) writeRecordStep2(post,data);
}
});
function writeRecord2( ru, post, data ){
var conn = new LDAP();
conn.search(
'ou=groups,o=amazon.com',
{ ... },
function(err,resp){
if( err == null ){
writeRecordStep3( ru, post, data, ldap1 )
}
}
}
function writeRecord3( ru, post, data ){
var conn = new LDAP();
conn.search(
'ou=groups,o=amazon.com',
{ ... },
function(err,resp){
if( err == null ){
writeRecordStep4( ru, post, data, ldap1, ldap2 )
}
}
}
function writeRecordStep4( ru, post, data, ldap1, ldap2 ){
// Do stuff with collected data
}
Additionally, because the LDAP and Dynamo logic are in their own source documents, these functions are scattered tragically around the code.
This strikes me as inefficient, as well as inelegant. I'm eager to find a more natural asynchronous pattern to achieve the same result.
Any promise library should sort your issue out. My preferred choice is bluebird. In summary they help you in performing blocking operations.
If you haven't heard about bluebird then just use it. It converts all function of a module and return promise which is then-able. Simply put, it promisifies all functions.
Here is the mechanism:
Module1.someFunction() \\do your job and finally pass the return object to next call
.then() \\Use that object which is return from the first call, do your job and return the updated value
.then() \\same goes on
.catch() \\do your job when any error occurs.
Hope you understand. Here is an example:
var readFile = Promise.promisify(require("fs").readFile);
readFile("myfile.js",
"utf8").then(function(contents) {
return eval(contents);
}).then(function(result) {
console.log("The result of evaluating
myfile.js", result);
}).catch(SyntaxError, function(e) {
console.log("File had syntax error", e);
//Catch any other error
}).catch(function(e) {
console.log("Error reading file", e);
});
I could not tell from your pseudo-code exactly which async operations depend upon results from with other ones and knowing that is key to the most efficient way to code a series of asynchronous operations. If two operations do not depend upon one another, they can run in parallel which generally gets to an end result faster. I also can't tell exactly what data needs to be passed on to later parts of the async requests (too much pseudo-code and not enough real code to show us what you're really attempting to do).
So, without that level of detail, I'll show you two ways to approach this. The first runs each operation sequentially. Run the first async operation, when it's done, run the next one and accumulates all the results into an object that is passed along to the next link in the chain. This is general purpose since all async operations have access to all the prior results.
This makes use of promises built into the AWS.DynamboDB interface and makes our own promise for conn.search() (though if I knew more about that interface, it may already have a promise interface).
Here's the sequential version:
// promisify the search method
const util = require('util');
LDAP.prototype.searchAsync = util.promisify(LDAP.prototype.search);
// utility function that does a search and adds the result to the object passed in
// returns a promise that resolves to the object
function ldapSearch(data, key) {
var conn = new LDAP();
return conn.searchAsync('ou=groups,o=amazon.com', { ... }).then(results => {
// put our results onto the passed in object
data[key] = results;
// resolve with the original object (so we can collect data here in a promise chain)
return data;
});
}
router.post('/writeRecord', jsonParser, function(req, res) {
let post = req.post;
let smdb = new AWS.DynamoDB.DocumentClient();
let params = { ... }
// The latest AWS interface gets a promise with the .promise() method
smdb.query(params).promise().then(dbresult => {
return ldapSearch({post, dbresult}, "ldap1");
}).then(result => {
// result.dbresult
// result.ldap1
return ldapSearch(result, "ldap2")
}).then(result => {
// result.dbresult
// result.ldap1
// result.ldap2
// doSomething with all the collected data here
}).catch(err => {
console.log(err);
res.status(500).send("Internal Error");
});
});
And, here's a parallel version that runs all three async operations at once and then waits for all three of the to be done and then has all the results at once:
// if the three async operations you show can be done in parallel
// first promisify things
const util = require('util');
LDAP.prototype.searchAsync = util.promisify(LDAP.prototype.search);
function ldapSearch(params) {
var conn = new LDAP();
return conn.searchAsync('ou=groups,o=amazon.com', { ... });
}
router.post('/writeRecord', jsonParser, function(req, res) {
let post = req.post;
let smdb = new AWS.DynamoDB.DocumentClient();
let params = { ... }
Promise.all([
ldapSearch(...),
ldapSearch(...),
smdb.query(params).promise()
]).then(([ldap1Result, ldap2Result, queryResult]) => {
// process ldap1Result, ldap2Result and queryResult here
}).catch(err => {
console.log(err);
res.status(500).send("Internal Error");
});
});
Keep in mind that due to the pseudo-code nature of the code in your question, this is also pseudo-code where implementation details (exactly what parameters you're searching for, what response you're sending, etc...) have to be filled in. This should be illustrative of promise chaining to serialize operations and the use of Promise.all() for parallelizing operations and promisifying a method that didn't have promises built in.

Perform arbitrary set of asynchronous tasks

My input is streamed from another source, which makes it difficult to use async.forEach. I am pulling data from an API endpoint, but I have a limit of 1000 objects per request to the endpoint, and I need to get hundreds of thousands of them (basically all of them) and I will know they're finished when the response contains < 1000 objects. Now, I have tried this approach:
/* List all deposits */
var depositsAll = [];
var depositsIteration = [];
async.doWhilst(this._post(endpoint_path, function (err, response) {
// check err
/* Loop through the data and gather only the deposits */
for (var key in response) {
//do some stuff
}
depositsAll += depositsIteration;
return callback(null, depositsAll);
}, {limit: 1000, offset: 0, sort: 'desc'}),
response.length > 1000, function (err, depositsAll) {
// check for err
// return the complete result
return callback(null, depositsAll);
});
With this code I get an async internal error that iterator is not a function. But in general I am almost sure the logic is not correct as well.
If it's not clear what I'm trying to achieve - I need to perform a request multiple times, and add the response data to a result that at the end contains all the results, so I can return it. And I need to perform requests until the response contains less than 1000 objects.
I also looked into async.queue but could not get the hang of it...
Any ideas?
You should be able to do it like that, but if that example is from your real code you have misunderstood some of how async works. doWhilst takes three arguments, each of them being a function:
The function to be called by async. Gets argument callback that must be called. In your case, you need to wrap this._post inside another function.
The test function (you would give value of response.length > 1000, ie. a boolean, if response would be defined)
The final function to be called once execution is stopped
Example with each needed function separated for readability:
var depositsAll = [];
var responseLength = 1000;
var self = this;
var post = function(asyncCb) {
self._post(endpoint_path, function(err, res) {
...
responseLength = res.length;
asyncCb(err, depositsAll);
});
}
var check = function() {
return responseLength >= 1000;
};
var done = function(err, deposits) {
console.log(deposits);
};
async.doWhilst(post, check, done);

Resources