Node JS Api request in loop - node.js

I'm trying my damndest to avoid callback hell with my Node JS. But I'm trying to make a large number of api-requests and insert these into my database.
My issue here (of course) is that my for-loop runs and increments i before I finish my request and database insertion.
for(var i = 0; i <= 1 ; i++){
apiRequest = data[i];
apicall(apiRequest);
}
function apicall(urlApi){
request((urlApi), function(error, response, body){
if(error){
console.log("error");
} else if(!error && response.statusCode == 200){
var myobj = JSON.parse(body);
dbInsert(myobj);
}
});
}
function dbInsert(obj) {
//insert into database
}
If someone else would come by this question I can truly recommend this blogpost which I found after reading the response by joshvermaire:
http://www.sebastianseilund.com/nodejs-async-in-practice

There are a number of ways to approach this type of problem. Firstly, if you can run all the API calls in parallel (all in flight at the same time) and it doesn't matter what order they are inserted in your database, then you can get a result a lot faster by doing that (vs. serializing them in order).
In all the options below, you would use this code:
const rp = require('request-promise');
function apicall(urlApi){
return rp({url: urlApi, json: true}).then(function(obj){
return dbInsert(obj);
});
}
function dbInsert(obj) {
//insert into database
// return a promise that resolves when the database insertion is done
}
Parallel Using ES6 Standard Promises
let promises = [];
for (let i = 0; i <= data.length; i++) {
promises.push(apicall(data[i]));
}
Promise.all(promises).then(() => {
// all done here
}).catch(err => {
// error here
});
Parallel using Bluebird Promise Library
With the Bluebird Promise library, you can use Promise.map() to iterate your array and you can pass it the concurrency option to control how many async calls are in flight at the same time which might keep from overwhelming either the database or the target API host and might help control max memory usage.
Promise.map(data, apiCall, {concurrency: 10}).then(() => {
// all done here
}).catch(err => {
// error here
});
In Series using Standard ES6 Promises
If you have to serialize them for some reason such as inserting into the database in order, then you can do that like this. The .reduce() pattern shown below is a classic way to serialize promise operations on an array using standard ES6:
data.reduce(data, (p, item) => {
return p.then(() => {
return apicall(item);
});
}, Promise.resolve()).then(() => {
// all done here
}).catch(err => {
// error here
});
In Series Using Bluebird Promises
Bluebird has a Promise.mapSeries() that iterates an array in series, calling a function that returns a promise on each item in the array which is a little simpler than doing it manually.
Promise.mapSeries(data, apiCall).then(() => {
// all done here
}).catch(err => {
// error here
});

I'd recommend using something like async.each. Then you could do:
async.each(data, function(apiRequest, cb) {
apicall(apiRequest, cb);
}, function(err) {
// do something after all api requests have been made
});
function apicall(urlApi, cb){
request((urlApi), function(error, response, body){
if(error){
console.log("error");
cb(error);
} else if(!error && response.statusCode == 200){
var myobj = JSON.parse(body);
dbInsert(myobj, cb);
}
});
}
function dbInsert(obj, cb) {
doDBInsert(obj, cb);
}
When the dbInsert method completes, make sure the cb callback is called. If you need to do this in a series, look at async.eachSeries.

Related

nodejs express search in request parallel single response

i need to query multiple pages from another api and return a object from the page if a specified value matches.
i guess the problem is that the loop is done asychron because i always get "not found" and later i get "Cannot set headers after they are sent to the client" if the loop found the object.
solved this by calling it recursive but i need more speed because there are many pages with many entries. if possible requests should run parallel but not "found should" be called after all loops finished
router.post('/search', function (req, res) {
var foundObj = false;
for (var page = 1; page < req.body.cubesize; page++) {
request({
method: 'GET',
uri: 'http://localhost:8080/api/v1/getpage/json/' + page
},
function (error, response, body) {
if (!error) {
var result = JSON.parse(body);
for (var obj in result) {
console.log(result[obj]);
if (result[obj].hasOwnProperty(req.body.field)) {
if (result[obj][req.body.field] == req.body.value) {
foundObj = true;
return res.status(200).send(result[obj]);
}
}
}
}
});
}
if(!foundObj){
return res.status(404).send("not found");
}
});
anyone got an idea how to fast loop all pages with all entries but wait for calling not found?
As long as you have a res.send() inside a for loop and at least two matches occurs, two (at least) res.send() calls will be executed and an error will rise.
How to run in parallel ?
router.post('/search', function (req, res) {
const callApi = (page) => new Promise( (resolve, reject) => {
request({
method: 'GET',
uri: `http://localhost:8080/api/v1/getpage/json/${page}`,
},
function (error, response, body) {
if (error) reject(null)
let result = JSON.parse(body);
for (var obj in result) {
console.log(result[obj]);
if (result[obj].hasOwnProperty(req.body.field)) {
if (result[obj][req.body.field] == req.body.value)
return resolve(result[obj]);
}
}
return reject(null);
}
});
});
const promisesArr = [];
for ( let page = 1; page < req.body.cubesize; page++) {
promisesArr.push(callApi(page))
}
Promise.allSettled(promisesArr).then((resArr)=>{
const resolvedArray = resArr.filter(val => !!val);
if (resolvedArray.length === 0) return res.status(404).send("not found");
if (resolvedArray.length === 1)
return res.status(200).send(resolvedArr[0][obj])
if (resolvedArray.length > 1)
return res.status(500).send("Too many matches")
// It is not clear to me in your code what you need to do in case more than one resolves
});
});
Some explanation about the code.
The idea is to promisify request and run in parallel
To run in parallel, Promise object allows four methods:
Promise.all, Promise.race and Promise.allSettled and Promise.any. The two last ones, Promise.allSettled and Promise.any are not fully compatible, so keep this in mind.
Once you have the array and run in parallel, Promise.all and Promise.allSettled returns an array of results. The array is filtered and if some value matchs, it response that, otherwise, response 404.
Further information about promises will be required to select the right one for your specific case. You can found about it here[https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Promise]
Unfortunately my code is not tested, so please review it and refactor to adapt to your specific case.

Function with async request in Node js

I have a loop, which iterates over array and in every iteration I have to do a http request, like this:
var httpsRequest = require('request')
var getData = function(id) {
var result;
httpsRequest({
url: 'https://link/'+id,
}, (error, resp, body) => {
if(resp.statusCode == 200) {
result = JSON.parse(body);
}
});
//here I would like to wait for a result
}
var data = [];
for(row in rows) {
data.push(getData(row.ID))
}
resp.send(JSON.stringify(data)) //I send data back to the client
I cannot do the rest of the for loop in callback, I have to wait for a result which will be returned from a function getData and move to the next iteration.
How to handle this?
PS I know I could use callback function but what if after the last iteration program will send the response (last line above) before the last getData execution finish?
Regards
As stated in the answer by Johannes, the use of promises is a good idea. Since you're using request I'd like to propose an alternative method by using request-promise which is a promisified version of 'request' using bluebird.
The requests will in this case return a promise, and by using .map() you can create an array of promises that you can await using Promise.all(). When all promises are resolved, the response can be sent! This also differs from the use of .reduce(), which only will start to execute the next request as soon as the previous one is done. By using an array of promises, you can start all the requests at the same time.
var httpsRequest = require('request-promise')
var getData = function(id) {
return httpsRequest({
url: 'https://link/' + id,
}, (error, resp, body) => {
if(resp.statusCode == 200) {
return JSON.parse(body);
} else {
//Throw error, this will be caught in the .catch()
throw error;
}
});
}
var promises = rows.map(function(row){
return getData(row.ID)
});
Promise.all(promises)
.then(function(results){
//All requests are done!
//The variable results will be an array of all the results in the same order as they were requested
resp.send(JSON.stringify(results));
})
.catch(function(error){
//Handle the error thrown in the 'getData' function
});
If you need to wait for each iteration to be done before starting another one, you can use Promises and reduce. If you only want to wait for all requests to be finished it's better to use map + Promise.all as explained in Daniel Bs answer.
// i asume rows is an array as you wrote you iterate over one.
const results = [];
rows.reduce((previous, row) => {
return previous.then(() => getData(row.ID).then(result => results.push(result)) // do whatever you want with the result
);
}, Promise.resolve())
.then(() => resp.send(JSON.stringify(results)));
const getData = (id) => {
return new Promise((resolve, reject)=> {
httpsRequest({
url: 'https://link/'+id,
}, (error, resp, body) => {
if(error) return reject(error);
if(resp.statusCode == 200) {
return resolve(JSON.parse(body));
}
return resolve(); // if you want to pass non 200 through. You may want to do sth different here
});
});
};

Node.js Parallel calls to same child rest service and aggregating response

I want to call from a parent rest service a child rest service. The number of times child service is called depends on parameters to parent rest services. Once I call all child service instance concurrently with different parameters. I want to combine the responses from all instances of child service. I am using below snippet. But I don't want to use timeout. It should either be timeout or when all calls of child service are over which ever is lesser.
for( i=0; i<length; i++)
{
url=accountID[i] +'+'+sortcode[i] +'+' +accountHolderName[i];
micro(url ,filter[i],function(resp)
{
this.resutlObject[count]=resp;
console.log("count"+count);
count=count+1;
}.bind( {resutlObject: resutlObject} ));
}//end of for
setTimeout(function () {
console.log("in time out");
res.end(JSON.stringify(resutlObject || {}, null, 2));
},500);
Also you could use Promises. Suppose service call returns promise, then you wait while all of them are fulfilled. Node.js supports promises starting from v4. If you have earlier version of node, just use some library.
//Instead of
function micro(url, filter, cb) {
var resp = "result of async job";//do some async work
cb(resp)
}
//Modify your service to return a promise
function micro(url, filter) {
return new Promise(function(resolve, reject) {
var resp = "result of async job using `url` and `filter`";
if (resp) {
resolve(resp);
} else {
reject("reason");
}
});
}
//Create a list of service calls.
var promises = [];
for( i=0; i<length; i++)
{
url=accountID[i] +'+'+sortcode[i] +'+' +accountHolderName[i];
promises.push(micro(url, filter[i]));
}
//Wait for all off them to fulfill
Promise.all(promises)
.then(function(resultObject) {
//Response
res.end(JSON.stringify(resultObject || {}, null, 2));
}, function(reason) {
res.sendStatus(500);
console.error(reason);
});
you can use async module async. It provides the parallel foreach loop.
var obj = {dev: "/dev.json", test: "/test.json", prod: "/prod.json"};
var configs = {};
async.forEachOf(obj, function (value, key, callback) {
fs.readFile(__dirname + value, "utf8", function (err, data) {
if (err) return callback(err);
try {
configs[key] = JSON.parse(data);
} catch (e) {
return callback(e);
}
callback();
})
}, function (err) {
if (err) console.error(err.message);
// configs is now a map of JSON data
doSomethingWith(configs);
})
here in the example it is reading files listed in parameters.
similarly you can do for your task
You could use async module. It's designed to do the stuff you're after. Something like this:
var async = require('async');
var collection = [];
for(i=0;i<length;i++) {
collection.push(
(function(i) {
return function(callback) {
url=accountID[i] +'+'+sortcode[i] +'+' +accountHolderName[i];
micro(url ,filter[i],function(resp) {
callback(null, resp);
});
}
})(i)
);
}//end of for
async.parallel(collection, function(err, results) {
console.log(results) // array of results from all requests
})
What happens
async.parallel takes an array of functions as an argument. Each function receives callback as an argument. Callback is a function, which takes error and result as an argument.
After all callback are executed async calls the final callback which receives the array of results from all other callbacks.
In the loop we are creating just that, a collection of functions. In this example the code is a bit more complex because we use closure in order to preserve the value of i for each of these functions.

Use promises for multiple node requests

With the request library, is there a way to use promises to simplify this callback?
var context = {};
request.get({
url: someURL,
}, function(err, response, body) {
context.one = JSON.parse(body);
request.get({
url: anotherURL,
}, function(err, response, body) {
context.two = JSON.parse(body);
// render page
res.render('pages/myPage');
});
});
Here's a solution using the Bluebird promises library. This serializes the two requests and accumulates the results in the context object and rolls up error handling all to one place:
var Promise = require("bluebird");
var request = Promise.promisifyAll(require("request"), {multiArgs: true});
var context = {};
request.getAsync(someURL).spread(function(response, body) {
context.one = JSON.parse(body);
return request.getAsync(anotherURL);
}).spread(response, body)
context.two = JSON.parse(body);
// render page
res.render('pages/myPage');
}).catch(function(err) {
// error here
});
And, if you have multiple URLs, you can use some of Bluebirds other features like Promise.map() to iterate an array of URLs:
var Promise = require("bluebird");
var request = Promise.promisifyAll(require("request"), {multiArgs: true});
var urlList = ["url1", "url2", "url3"];
Promise.map(urlList, function(url) {
return request.getAsync(url).spread(function(response,body) {
return [JSON.parse(body),url];
});
}).then(function(results) {
// results is an array of all the parsed bodies in order
}).catch(function(err) {
// handle error here
});
Or, you could create a helper function to do this for you:
// pass an array of URLs
function getBodies(array) {
return Promise.map(urlList, function(url) {
return request.getAsync(url).spread(function(response.body) {
return JSON.parse(body);
});
});
});
// sample usage of helper function
getBodies(["url1", "url2", "url3"]).then(function(results) {
// process results array here
}).catch(function(err) {
// process error here
});
Here is how I would implement chained Promises.
var request = require("request");
var someURL = 'http://ip.jsontest.com/';
var anotherURL = 'http://ip.jsontest.com/';
function combinePromises(context){
return Promise.all(
[someURL, anotherURL].map((url, i)=> {
return new Promise(function(resolve, reject){
try{
request.get({
url: url,
}, function(err, response, body) {
if(err){
reject(err);
}else{
context[i+1] = JSON.parse(body);
resolve(1); //you can send back anything you want here
}
});
}catch(error){
reject(error);
}
});
})
);
}
var context = {"1": "", "2": ""};
combinePromises(context)
.then(function(response){
console.log(context);
//render page
res.render('pages/myPage');
}, function(error){
//do something with error here
});
Doing this with native Promises. It's good to understand the guts.
This here is known as the "Promise Constructor Antipattern" as pointed out by #Bergi in the comments. Don't do this. Check out the better method below.
var contextA = new Promise(function(resolve, reject) {
request('http://someurl.com', function(err, response, body) {
if(err) reject(err);
else {
resolve(body.toJSON());
}
});
});
var contextB = new Promise(function(resolve, reject) {
request('http://contextB.com', function(err, response, contextB) {
if(err) reject(err);
else {
contextA.then(function(contextA) {
res.render('page', contextA, contextB);
});
}
});
});
The nifty trick here, and I think by using raw promises you come to appreciate this, is that contextA resolves once and then we have access to it's resolved result. This is, we never make the above request to someurl.com, but still have access to contextA's JSON.
So I can conceivable create a contextC and reuse the JSON without having to make another request. Promises always only resolve once. You would have to take that anonymous executor function out and put it in a new Promise to refresh that data.
Bonus note:
This executes contextA and contextB in parallel, but will do the final computation that needs both contexts when both A & B are resolved.
Here's my new stab at this.
The main problem with the above solution is none of the promises are reusable and they are not chained which is a key feature of Promises.
However, I still recommend promisifying your request library yourself and abstaining from adding another dependency to your project. Another benefit of promisifying yourself is you can write your own rejection logic. This is important if you're working with a particular API that sends error messages in the body. Let's take a look:
//Function that returns a new Promise. Beats out constructor anti-pattern.
const asyncReq = function(options) {
return new Promise(function (resolve, reject) {
request(options, function(err, response, body) {
//Rejected promises can be dealt with in a `catch` block.
if(err) {
return reject(err);
}
//custom error handling logic for your application.
else if (hasError(body)) {
return reject(toError(body));
}
// typically I just `resolve` `res` since it contains `body`.
return resolve(res);
}
});
};
asyncReq(urlA)
.then(function(resA) {
//Promise.all is the preferred method for managing nested context.
return Promise.all([resA, asyncReq(urlB)]);
})
.then(function(resAB) {
return render('page', resAB[0], resAB[1]);
})
.catch(function(e) {
console.err(e);
});
You can use the request-promise library to do this. In your case, you could have something like this, where you chain your requests.
request
.get({ url: someURL })
.then(body => {
context.one = JSON.parse(body);
// Resolves the promise
return request.get({ url: anotherURL });
})
.then(body => {
context.two = JSON.parse(body);
res.render('pages/myPage');
})
.catch(e => {
//Catch errors
console.log('Error:', e);
});
By far the easiest is to use request-promise library. You can also use use a promise library like bluebird and use its promisify functions to convert the request callback API to a promise API, though you may need to write your own promisify function as request does not use the standard callback semantics. Lastly, you can just make your own promise wrapper, using either native promises or bluebird.
If you're starting fresh, just use request-promise. If you're refactoring existing code, I would just write a simple wrapper for request using bluebird's spread function.

node.js callback function at the after loop has ended

I have an array of URLs and I want to loop through them and fetch thr content. After I have looped through them and fetched thr content I want a callback function to be called.
I know I can do this via async library but I want to do this without using any library.
Sample of what kind of code I want is below
['yahoo.com', 'gmail.com'].each(function(item){
//code to fetch URL content
},someCallbackFunctionToBeExecutedAtTheEndOfLoop);
This is typically the type of thing you do using promises (But you would need a library), with a code like:
var ops = [];
urls.forEach(function(url) {
ops.push(fetchUrl(url));
});
P.all(ops).then(callback);
function fetchUrl(url) {
var defer = P.defer();
//do stuff
// call defer.resolve(result);
return defer.promise;
}
If you don't want to use promises, you can use a counter of operations, like:
var ops = urls.length;
urls.forEach(function(url) {
// do stuff
ops--;
if (ops === 0) {
callback();
}
});
If you chose the promises, I advice to use p-promise module, which is far more optimized than Q.
If you want to do it without any sort of library like async, then you have to write your own counter to keep track of when all the async responses have been completed:
var request = require('request');
function loadAll(list, fn) {
var cnt = list.length;
var responses = [];
list.forEach(function(url, index) {
request(url, function(error, response, body) {
if (error) {
fn(error);
} else {
responses[index] = response;
--cnt;
if (cnt === 0) {
fn(0, responses);
}
}
});
})
}
loadAll(['http://www.yahoo.com', 'http://www.gmail.com'], function(err, results) {
if (!err) {
// process results array here
}
});
If you're going to be doing many async operations in node.js, then getting a promise library like Bluebird will save you a lot of time. For example, I think you could do the above in something like this (untested):
var Promise = require("bluebird");
var requestP = Promise.promisfy(require("request"));
Promise.map(['http://www.yahoo.com', 'http://www.gmail.com'], requestP).then(function(results) {
// process the array of results here
});

Resources