Async map with headers for getting multiple url's parallel - node.js

I am learning async map . I want to download a bunch of URLs but i want to send a header along with the get request.
If i didn't have a header, I could have just done
var request = require('request');
var async = require('async');
var urls = ['http://myurl1.com', 'http://myurl2.com', 'http://myurl3.com'];
async.map(urls, request, function(err, results) {
if (err) throw(err); // handle error
console.log(results.length); // == urls.length
});
Now I do need to send a header {"x-url-key":"myurlkey"} along with every get request.
How do I modify the code above to do so?

That should be straightforward enough to do, we can create a wrapper function requestWithHeader to pass to async.map, this will specify whichever headers (or other options) you wish.
I'm also specifying json: true here, you may not want to do that in your actual code.
In this example I'm using https://httpbin.org/get as the url, this will send back all the request parameters which is useful for test purposes as we can see which headers we populated.
var request = require('request');
var async = require('async');
var urls = ["https://httpbin.org/get?foo=bar", "https://httpbin.org/get?foo=baz"];
function requestWithHeader(uri, callback) {
request(uri, { headers: {"x-url-key":"myurlkey"}, json:true }, callback)
}
async.map(urls, requestWithHeader, function(err, results) {
if (err) throw(err); // handle error
console.log("Results:", results.map(result => result.body));
});
To wait for async.map to finish you can create an asynchronous function to call it, e.g.
async function testMapWithPromise() {
try {
let results = await async.map(urls, requestWithHeader);
console.log("testMapWithPromise: Results:", results.map(result => result.body));
// Do whatever with results here...
} catch (error) {
console.error("testMapWithPromise: An error occurred:", error);
}
}
testMapWithPromise();

Related

Returning body of a request function inside a variable

I have an endpoint in my node backend in which will need to retrieve for each item in my Adhoc collection from my local database the _id along with a number value which I need to calculate from the body of a request() function in an array of objects. The objects will be like this
{id: "id", sum: 3}
To do this I need to iterate through the Adhocs with a for loop and make a request for each to get the sum value and I need to be able to store these values before I have all of them and res.send() the array to the front end. I am having trouble storing the sum value in a variable. I have provided below the code of the request.
let theSum = request(options, function (error, response, body) {
if (error) throw new Error(error);
console.log(
'Response: ' + response.statusCode + ' ' + response.statusMessage
);
let bodyy = JSON.parse(body);
let sum = bodyy.fields.timetracking.originalEstimateSeconds / 3600 * theRate;
return sum;
});
I know this is wrong as the return statement is for the function inside the request function so it won't return the sum to my variable. And adding another callback function will basically be the same scenario. Anyone has any suggestions of how I can store the value from the request function so I can make further calls?
I found an answer that works for me pretty well. I didn't try Terry's answer above but I suppose that works as well since it's using Promise as well as me. What I've done is in a function I wrapped the request call in a Promise with a callback which is returned. Code below:
function asyncRequest (url) {
return new Promise (function (resolve, reject) {
var options = {
url: 'http://localhost:8080/rest/' + url,
auth: { username: 'username', password: 'password' },
headers: {
'Accept': 'application/json'
}
}
request(options, function (err, response, body) {
if (err) reject(err);
resolve(JSON.parse(body))
});
})
}
When I want to retrieve something I just have something like this:
let json = await asyncRequest('agile/1.0/issue/'+ adhoc[u].jIssue);
And that variable has the body of the request function inside and I can use it.
You can use async and await along with request-promise-native to loop over your objects and get the list of results you wish to have.
You can call the readEstimates function in a express.get( ).. handler as long as the handler is asynchronous (or you can use readEstimates().then(..)).
Now, we will wrap an error handler around the readEstimates call since, this could potentially throw an error.
For example:
const rp = require('request-promise-native');
async function readEstimates() {
const sumList = [];
for(const adhoc of adhocList) {
// Set your options here, e.g. url for each request.. by setting json to true we don't need to JSON.parse the body.
let options = { url: SOME_URL, json: true, resolveWithFullResponse: true };
let response = await rp(options);
console.log('Response: ' + response.statusCode + ' ' + response.statusMessage);
const sum = response.body.fields.timetracking.originalEstimateSeconds / 3600 * theRate;
sumList.push(sum);
}
return sumList;
}
async function testReadEstimates() {
try {
const sumList = await readEstimates();
console.log("Sumlist:", sumList);
} catch (error) {
console.error("testReadEstimates: An error has occurred:", error);
}
}
testReadEstimates();
You can also use readEstimates in an Express route:
app.get('/', async (req, res) => {
try {
const sumList = await readEstimates();
res.json({sumList}); // Send the list to the client.
} catch (error) {
console.error("/: An error has occurred:", error);
res.status(500).send("an error has occurred");
}
})

Alexa API skill - nodejs get request not executing

I'm working on my first Alexa skill and, as a starting point, would like Alexa to state data retrieved from a simple GET request (see lambda function below). For some reason, however, the request does not actually seem to be executing - nothing from inside request.get() is printing to the console and speechOutput is 'Outside Request' after the handler executes. I'm also new to looking through CloudWatch logs and have not been able to find any information about the network requests to even know if this is being attempted. Any help here would be welcome!
'use strict';
//Required node packages
const alexa = require('./node_modules/alexa-sdk');
const request = require('request');
// var https = require('https')
//this is the handler, when the lambda is invoked, this is whats called
exports.handler = function (event, context, callback) {
const skill = alexa.handler(event, context);
skill.appId = '<app_id>';
skill.registerHandlers(handlers);
skill.execute();
};
//Alexa handlers
const handlers = {
'LaunchRequest': function () {
console.log("inside of LaunchRequest");
const speechOutput = "Hello from NASA!";
this.response.speak(speechOutput).listen(speechOutput);
this.emit(':responseReady');
},
//Entering our main, part finding function
'GetAPOD': function () {
const intent_context= this
const speechOutput = getData()
intent_context.response.speak(speechOutput).listen(speechOutput);
intent_context.emit(':responseReady');
},
'Unhandled': function (){
console.log("inside of unhandled");
const speechOutput = "I didn't understand that. Please try again";
this.response.speak(speechOutput).listen(speechOutput);
this.emit(':responseReady');
}
};
const getData = function() {
const url = "https://api.nasa.gov/planetary/apod?api_key=<key>"
console.log("inside get data")
request.get(url, function (error, response, body) {
console.log("inside request")
console.log('error', error) //Print the error if one occurred
console.log('statusCode:', response && response.statusCode); // Print the response status code if a response was received
console.log('body:', body); // Print the HTML for the Google homepage.
return "complete request"
return body
});
return "outside request"
}
I have found in the past that such API requests will get clobbered because they are not synchronous, like David stated. To resolve this, I have had to tuck the request in a promise to get it to resolve, something similar to this in your case:
Change your function to contain the promise:
function getData = function() {
const url = "https://api.nasa.gov/planetary/apod?api_key=<key>"
console.log("inside get data")
return new Promise(function(resolve, reject) {
request.get(url, function (error, response, body) {
if (err) {
reject(err);
}
if (body) {
resolve(JSON.parse(body));
}
});
});
}
Then change your intent handler to use the promise:
//Entering our main, part finding function
'GetAPOD': function () {
getData()
.then(function(body) {
let speechOutput = body;
intent_context.response.speak(speechOutput).listen(speechOutput);
intent_context.emit(':responseReady');
}
Something along these lines. You would need to play with it a bit to make sure the results are produced as you intend. Hope this helps.
D

Function with async request in Node js

I have a loop, which iterates over array and in every iteration I have to do a http request, like this:
var httpsRequest = require('request')
var getData = function(id) {
var result;
httpsRequest({
url: 'https://link/'+id,
}, (error, resp, body) => {
if(resp.statusCode == 200) {
result = JSON.parse(body);
}
});
//here I would like to wait for a result
}
var data = [];
for(row in rows) {
data.push(getData(row.ID))
}
resp.send(JSON.stringify(data)) //I send data back to the client
I cannot do the rest of the for loop in callback, I have to wait for a result which will be returned from a function getData and move to the next iteration.
How to handle this?
PS I know I could use callback function but what if after the last iteration program will send the response (last line above) before the last getData execution finish?
Regards
As stated in the answer by Johannes, the use of promises is a good idea. Since you're using request I'd like to propose an alternative method by using request-promise which is a promisified version of 'request' using bluebird.
The requests will in this case return a promise, and by using .map() you can create an array of promises that you can await using Promise.all(). When all promises are resolved, the response can be sent! This also differs from the use of .reduce(), which only will start to execute the next request as soon as the previous one is done. By using an array of promises, you can start all the requests at the same time.
var httpsRequest = require('request-promise')
var getData = function(id) {
return httpsRequest({
url: 'https://link/' + id,
}, (error, resp, body) => {
if(resp.statusCode == 200) {
return JSON.parse(body);
} else {
//Throw error, this will be caught in the .catch()
throw error;
}
});
}
var promises = rows.map(function(row){
return getData(row.ID)
});
Promise.all(promises)
.then(function(results){
//All requests are done!
//The variable results will be an array of all the results in the same order as they were requested
resp.send(JSON.stringify(results));
})
.catch(function(error){
//Handle the error thrown in the 'getData' function
});
If you need to wait for each iteration to be done before starting another one, you can use Promises and reduce. If you only want to wait for all requests to be finished it's better to use map + Promise.all as explained in Daniel Bs answer.
// i asume rows is an array as you wrote you iterate over one.
const results = [];
rows.reduce((previous, row) => {
return previous.then(() => getData(row.ID).then(result => results.push(result)) // do whatever you want with the result
);
}, Promise.resolve())
.then(() => resp.send(JSON.stringify(results)));
const getData = (id) => {
return new Promise((resolve, reject)=> {
httpsRequest({
url: 'https://link/'+id,
}, (error, resp, body) => {
if(error) return reject(error);
if(resp.statusCode == 200) {
return resolve(JSON.parse(body));
}
return resolve(); // if you want to pass non 200 through. You may want to do sth different here
});
});
};

Use promises for multiple node requests

With the request library, is there a way to use promises to simplify this callback?
var context = {};
request.get({
url: someURL,
}, function(err, response, body) {
context.one = JSON.parse(body);
request.get({
url: anotherURL,
}, function(err, response, body) {
context.two = JSON.parse(body);
// render page
res.render('pages/myPage');
});
});
Here's a solution using the Bluebird promises library. This serializes the two requests and accumulates the results in the context object and rolls up error handling all to one place:
var Promise = require("bluebird");
var request = Promise.promisifyAll(require("request"), {multiArgs: true});
var context = {};
request.getAsync(someURL).spread(function(response, body) {
context.one = JSON.parse(body);
return request.getAsync(anotherURL);
}).spread(response, body)
context.two = JSON.parse(body);
// render page
res.render('pages/myPage');
}).catch(function(err) {
// error here
});
And, if you have multiple URLs, you can use some of Bluebirds other features like Promise.map() to iterate an array of URLs:
var Promise = require("bluebird");
var request = Promise.promisifyAll(require("request"), {multiArgs: true});
var urlList = ["url1", "url2", "url3"];
Promise.map(urlList, function(url) {
return request.getAsync(url).spread(function(response,body) {
return [JSON.parse(body),url];
});
}).then(function(results) {
// results is an array of all the parsed bodies in order
}).catch(function(err) {
// handle error here
});
Or, you could create a helper function to do this for you:
// pass an array of URLs
function getBodies(array) {
return Promise.map(urlList, function(url) {
return request.getAsync(url).spread(function(response.body) {
return JSON.parse(body);
});
});
});
// sample usage of helper function
getBodies(["url1", "url2", "url3"]).then(function(results) {
// process results array here
}).catch(function(err) {
// process error here
});
Here is how I would implement chained Promises.
var request = require("request");
var someURL = 'http://ip.jsontest.com/';
var anotherURL = 'http://ip.jsontest.com/';
function combinePromises(context){
return Promise.all(
[someURL, anotherURL].map((url, i)=> {
return new Promise(function(resolve, reject){
try{
request.get({
url: url,
}, function(err, response, body) {
if(err){
reject(err);
}else{
context[i+1] = JSON.parse(body);
resolve(1); //you can send back anything you want here
}
});
}catch(error){
reject(error);
}
});
})
);
}
var context = {"1": "", "2": ""};
combinePromises(context)
.then(function(response){
console.log(context);
//render page
res.render('pages/myPage');
}, function(error){
//do something with error here
});
Doing this with native Promises. It's good to understand the guts.
This here is known as the "Promise Constructor Antipattern" as pointed out by #Bergi in the comments. Don't do this. Check out the better method below.
var contextA = new Promise(function(resolve, reject) {
request('http://someurl.com', function(err, response, body) {
if(err) reject(err);
else {
resolve(body.toJSON());
}
});
});
var contextB = new Promise(function(resolve, reject) {
request('http://contextB.com', function(err, response, contextB) {
if(err) reject(err);
else {
contextA.then(function(contextA) {
res.render('page', contextA, contextB);
});
}
});
});
The nifty trick here, and I think by using raw promises you come to appreciate this, is that contextA resolves once and then we have access to it's resolved result. This is, we never make the above request to someurl.com, but still have access to contextA's JSON.
So I can conceivable create a contextC and reuse the JSON without having to make another request. Promises always only resolve once. You would have to take that anonymous executor function out and put it in a new Promise to refresh that data.
Bonus note:
This executes contextA and contextB in parallel, but will do the final computation that needs both contexts when both A & B are resolved.
Here's my new stab at this.
The main problem with the above solution is none of the promises are reusable and they are not chained which is a key feature of Promises.
However, I still recommend promisifying your request library yourself and abstaining from adding another dependency to your project. Another benefit of promisifying yourself is you can write your own rejection logic. This is important if you're working with a particular API that sends error messages in the body. Let's take a look:
//Function that returns a new Promise. Beats out constructor anti-pattern.
const asyncReq = function(options) {
return new Promise(function (resolve, reject) {
request(options, function(err, response, body) {
//Rejected promises can be dealt with in a `catch` block.
if(err) {
return reject(err);
}
//custom error handling logic for your application.
else if (hasError(body)) {
return reject(toError(body));
}
// typically I just `resolve` `res` since it contains `body`.
return resolve(res);
}
});
};
asyncReq(urlA)
.then(function(resA) {
//Promise.all is the preferred method for managing nested context.
return Promise.all([resA, asyncReq(urlB)]);
})
.then(function(resAB) {
return render('page', resAB[0], resAB[1]);
})
.catch(function(e) {
console.err(e);
});
You can use the request-promise library to do this. In your case, you could have something like this, where you chain your requests.
request
.get({ url: someURL })
.then(body => {
context.one = JSON.parse(body);
// Resolves the promise
return request.get({ url: anotherURL });
})
.then(body => {
context.two = JSON.parse(body);
res.render('pages/myPage');
})
.catch(e => {
//Catch errors
console.log('Error:', e);
});
By far the easiest is to use request-promise library. You can also use use a promise library like bluebird and use its promisify functions to convert the request callback API to a promise API, though you may need to write your own promisify function as request does not use the standard callback semantics. Lastly, you can just make your own promise wrapper, using either native promises or bluebird.
If you're starting fresh, just use request-promise. If you're refactoring existing code, I would just write a simple wrapper for request using bluebird's spread function.

Async parallel HTTP request

I'm having a control flow problem with an application loading a large array of URLs. I'm using Caolan Async and the NPM request module.
My problem is that the HTTP response starts as soon as the function is added to the queue. Ideally I want to build my queue and only start making the HTTP requests when the queue starts. Otherwise the callbacks start firing before the queue starts - causing the queue to finish prematurely.
var request = require('request') // https://www.npmjs.com/package/request
, async = require('async'); // https://www.npmjs.com/package/async
var myLoaderQueue = []; // passed to async.parallel
var myUrls = ['http://...', 'http://...', 'http://...'] // 1000+ urls here
for(var i = 0; i < myUrls.length; i++){
myLoaderQueue.push(function(callback){
// Async http request
request(myUrls[i], function(error, response, html) {
// Some processing is happening here before the callback is invoked
callback(error, html);
});
});
}
// The loader queue has been made, now start to process the queue
async.parallel(queue, function(err, results){
// Done
});
Is there a better way of attacking this?
Using for loops combined with asynchronous calls is problematic (with ES5) and may yield unexpected results (in your case, the wrong URL being retrieved).
Instead, consider using async.map():
async.map(myUrls, function(url, callback) {
request(url, function(error, response, html) {
// Some processing is happening here before the callback is invoked
callback(error, html);
});
}, function(err, results) {
...
});
Given that you have 1000+ url's to retrieve, async.mapLimit() may also be worth considering.
If you're willing to start using Bluebird and Babel to utilize promises and ES7 async / await you can do the following:
let Promise = require('bluebird');
let request = Promise.promisify(require('request'));
let myUrls = ['http://...', 'http://...', 'http://...'] // 1000+ urls here
async function load() {
try {
// map myUrls array into array of request promises
// wait until all request promises in the array resolve
let results = await Promise.all(myUrls.map(request));
// don't know if Babel await supports syntax below
// let results = await* myUrls.map(request));
// print array of results or use forEach
// to process / collect them in any other way
console.log(results)
} catch (e) {
console.log(e);
}
}
I'm pretty confident you experiencing the results of a different error. By the time your queued functions are evaluating, i has been redefined, which might result in it appearing like you missed the first URLs. Try a little closure when you are queing the functions.
var request = require('request') // https://www.npmjs.com/package/request
, async = require('async'); // https://www.npmjs.com/package/async
var myLoaderQueue = []; // passed to async.parallel
var myUrls = ['http://...', 'http://...', 'http://...'] // 1000+ urls here
for(var i = 0; i < myUrls.length; i++){
(function(URLIndex){
myLoaderQueue.push(function(callback){
// Async http request
request(myUrls[URLIndex], function(error, response, html) {
// Some processing is happening here before the callback is invoked
callback(error, html);
});
});
})(i);
}
// The loader queue has been made, now start to process the queue
async.parallel(queue, function(err, results){
// Done
});

Resources