Chaining an arbitrary number of promises in Q - node.js

I want to send an HTTP request N times. I want to eventually have information about the results of each of those requests.
Running the request function once works great. Here's the HTTP request function using Q.defer():
function runRequest() {
var deferred = Q.defer(),
start = (new Date).getTime(),
req = HTTP.request(options, function(res) {
var end = (new Date).getTime(),
requestDetails = {
reqStatus: res.statusCode,
reqStart: start,
reqEnd: end,
duration: end - start
}
deferred.resolve(requestDetails);
});
req.on('error', function(e) {
deferred.reject(e.message);
});
req.end();
return deferred.promise;
}
If I do this, I get back the data I expect:
runRequest().then(function(requestDetails) {
console.log('STATUS: ' + requestDetails.reqStatus);
console.log('Duration: ' + requestDetails.duration);
console.log('Start: ' + requestDetails.reqStart);
console.log('End: ' + requestDetails.reqEnd);
}, function(error) {
console.log('Problem with request: ' + error);
})
.done();
To iterate, I tried to fit that into a for loop:
function iterateRequests() {
var deferred = Q.defer();
var reqResults = [];
for (var iteration = 0; iteration < requests; iteration++) {
runRequest()
.then(function(requestDetails) {
console.log('STATUS: ' + requestDetails.reqStatus);
reqResults.push(requestDetails);
}, function(error) {
console.log('Problem with request: ' + error);
});
}
deferred.resolve(reqResults);
return deferred.promise;
}
Then I call it like this:
iterateRequests()
.then(function(results) {
console.log(results);
console.log("in the success callback after iterateRequests");
}, function() {
console.log("in the failure callback after iterateRequests");
})
.done();
I end up getting into the success callback (i.e., it logs "in the success callback after iterateRequests"). However, the console.log(results) prints before I get the logs from runRequest().then() callback and it's an empty array.
Any ideas or some guidance on chaining/iterating over promise-return functions?
Thanks!
Update
Follow up question in response to #abject_error's answer:
Checked out Q.all. Definitely looks like what I need. And it's much simpler that what I was working with. I made a simple test case to help me figure out how it works:
var Q = require("q");
function returner(number) {
var deferred = Q.defer();
deferred.resolve(number);
return deferred.promise;
}
function parent() {
return Q.all([
returner(1),
returner(2),
returner(4)
]);
}
parent()
.then(function(promises) {
// works - promises gives me [1, 2, 4]
console.log(promises);
});
So I see how I can use it if I know beforehand the number of times I need to call it (and which functions I'm going to call). Any tips on how to get a dynamic number of calls to returner (in this example) or runRequest (in my original example) in the array?

This answers the update part of the question:
var buildCalls = function() {
var calls = [];
for (var i in stories) {
calls.push(myFunc(i));
}
return calls;
}
return Q.all(buildCalls());

Q has other functions to aid in Promise based workflows. The method you need to use is Q#all. If you have an array of promises, and you want to call a function when all of them have successfully fulfilled, you do
Q.all(array_of_promises).then(success_callback, failure_callback);
After all the request promises are fulfilled, success_callback is called. If any of them rejects, the failure_callback is called immediately.

Related

node - how does async.map work?

After reading about async, I assumed the code below would output to the console the total of all values returned from the http/API call; but it seems to fire immediately after the first http call returns, and only shows a 'total' value equal to the first value returned from the API.
Where is my misunderstanding about how async.map works?
var http = require('https');
const
async = require('async');
var MongoClient = require('mongodb').MongoClient;
var dbUrl = "mongodb://localhost:27017/";
var total = 0;
var tokens = [ {
name : "tron"
}, {
name : 'cardano'
}, {
name : 'nucleus-vision'
}, {
name : 'ripple'
}, {
name : 'litecoin'
}, {
name : 'havven'
}];
function run() {
doStuff();
setInterval(doStuff, 1 * 60 * 1000);
};
function doStuff() {
total = 0;
async.map(tokens, httpGet, function (value){
console.log('async done ', total);
});
}
function httpGet(token, callback) {
var url = 'https://api.coinmarketcap.com/v1/ticker/' + token.name;
http.get( url,
function(res) {
var body = '';
res.on('data', function(chunk) {
body += chunk;
});
res.on('end', function() {
var jsonObj = JSON.parse(body);
var price = parseFloat(jsonObj[0].price);
total += price;
MongoClient.connect(dbUrl, function(err, db) {
if (err)
throw err;
var dbo = db.db("crypto");
dbo.collection("tick").insertOne(jsonObj[0],
function(err, res) {
if (err)
throw err;
db.close();
});
});
callback(price);
});
}).on('error', function(e) {
console.log("Got an error: ", e);
});
};
run();
callback that is passed to an iteratee (httpGet) is used incorrectly. The first argument (price) is considered an error. From the docs:
If iteratee passes an error to its callback, the main callback (for the map function) is immediately called with the error.
So
callback(price);
should rather be
callback(null, price);
So async does not halt after the first iteration.
I believe there are two separate problems here:
As you may know, we cannot use return statements in asynchronous code like we would in synchronous code, which is why we use callbacks instead. Node-style callbacks are on the form function (err, result) {}, where the first parameter is the error (if any) and the second the result of the function (the return value). According to the docs,
Async.map(coll, iteratee, callback) will stop the execution if the
iteratee passes an error to its callback.
As your iteratee-function is calling its callback as such: callback(price), you're effectively stopping execution, as price is passed as the error parameter. What you want to do to "return" the price variable, is to call the callback as so: callback(null, price)
Typically, map-functions are used for
appl[ying] a given function to each element of a list, returning a list of results in the same order.
The map function of the async library does the same, IE: iterates through an array and returns an array of the resulting items, just like the normal map (below) method does.
[1, 2, 3].map(function (nbr) { return nbr*2 }) // returns [2, 4, 6]
The result parameter of your callback (IE, the third parameter to async.map) will be called with an array of prices, and not the summed value of the prices.
async.map(tokens, httpGet, function (error, total) {
console.log(error); // prints undefined (unless there was an error)
console.log(total); // prints an array of prices
});
For summing the values, I would recommend the reduce function, or simply sum the values returned as a result.

Nodejs Http request has no response

Currently using http GET to an external API. When called individually, the response is good. When put in a for loop, some requests don't seem to have a response.
This is the http GET function:
function httpGetChunk(url, callback) {
http.get(url, function(resp) {
var body='';
resp.on('data', function(chunk) {
body += chunk; //chunk too large from this response
});
resp.on('end', function() {
var data = JSON.parse(body);
callback(data);
});
resp.on("error", function(e) {
console.log("Got error: " + e.message);
});
});
}
When I call the GET function in a for loop for 5 different urls, I only get responses for some of them. Ran it a couple of times and the response would be from a different combination of the called urls but never all of them.
Any insight?
Edit 1: To give more information, my for loop looks something like this.
for (var i=0;i<5; i++) {
httpGetChunk(someUrl, function(data) {
console.log(data);
});
}
This would only print out some responses but not all.
Edit 2:
I've taken into account all the advice on this thread. I'm now using the async module and have increased the number of concurrent connections to 20:
http.globalAgent.maxSockets = 20;
Following code is the one im currently testing:
getMatchStats() returns an game 'match' object with statistics (e.g kills, deaths in the match etc.)
matchIds is the array containing all the id keys of the matches
async.parallel([
getMatchStats(matchIds[0], function (matchData) {
console.log('0');
}),
getMatchStats(matchIds[1], function (matchData) {
console.log('1');
}),
getMatchStats(matchIds[2], function (matchData) {
console.log('2');
}),
getMatchStats(matchIds[3], function (matchData) {
console.log('3');
}),
getMatchStats(matchIds[4], function (matchData) {
console.log('4');
}),
], function(err, result) {
console.log('done');
callback(result);
});
and getMatchStats
function getMatchStats(matchId, callback) {
var url = getMatchStatsUrl(matchId); //gets url based on id
httpGetChunk(url, function(data) {
callback(data);
});
}
again, the async.parallel never finishes since only some of the requests have responses. Every time i run it, the responses would be from a different combination of matches. Sometimes, it even completes all of the requests.
Maybe my OS has limitations on number of connections (im testing on localhost)?
Each request is asynchronous. So, if you use a regular for loop, each step is going to be executed synchronously and won't wait for callback to be called. What do you need is something like the each method from the async module, like:
async.each(yourArrayOfUrls, function (url, callback) {
httpGetChunk(url, function(data) {
console.log(data);
callback();
});
}, function (err) {
// if some step produce an error, you can get it here...
});

Node.js sequentially running multiple childProcess.execFile processes from q all

I am writing a program to batch print drawings. They need to be in a certain order ex. drawing A,B,C. The program plots the correct number of prints its just the order is random. I need the first in the array list to complete before next and so on.
startMe(newPaths)
.then(function(result){
})
function startMe(dwgPaths){
return q.all(buildCalls(dwgPaths));
}
var buildCalls = function(dwgPaths) {
var calls = [];
var scFiles = [tmpDir + "425011-fab.scr",tmpDir + "425011-pk.scr",tmpDir + "425011-sc.scr"];
for (var sc in scFiles){
for (var i in dwgPaths) {
calls.push(callAccoreConsole(dwgPaths[i],scFiles[sc]));
}
}
return calls;
};
function callAccoreConsole(dwgPath,scrFile){
var deferred = q.defer();
childProcess.execFile('C:/Program Files/Autodesk/AutoCAD 2015/accoreconsole.exe',['/i',dwgPath,'/s',scrFile], function(err, data) {
if(err)
deferred.resolve({success:false,reason: err});
deferred.resolve({success:true});
});
return deferred.promise;
}
The code below works the way I want. I print 2 files 3 copies each. File A using script 1,then File B using script 1. Then repeats for the other scripts. I have a total of 6 prints "three groups" A,B,A,B,A,B each with the appropriate script ran. With the code above I may get B,A,A,B,A,A.
callAccoreConsole(newPaths[0],scFiles2[0])
.then(function(result){
callAccoreConsole(newPaths[1],scFiles2[0])
.then(function(result){
callAccoreConsole(newPaths[0],scFiles2[1])
.then(function(result){
callAccoreConsole(newPaths[1],scFiles2[1])
.then(function(result){
callAccoreConsole(newPaths[0],scFiles2[2])
.then(function(result){
callAccoreConsole(newPaths[1],scFiles2[2])
.then(function(result){
})
})
})
})
})
});
I have been struggling with this for a while. I found the code below and got it to work for my application, But it doesn't seem the most efficient way to be written. If any one has a more compact way let me know please.Thanks
var itemsToProcess = [];
for (var sc in scFiles){
for (var i in newPaths) {
itemsToProcess.push( {file:newPaths[i],script:scFiles[sc]});
}
}
function getDeferredResult(a) {
return (function (items) {
var deferred;
if (items.length === 0) {
return q.resolve(true);
}
deferred = q.defer();
var payload = {
file:items[0].file,
script:items[0].script
};
callAccoreConsole2(payload)
.then(function(result){
deferred.resolve(items.splice(1));
});
return deferred.promise.then(getDeferredResult);
}(a));
}
q.resolve(itemsToProcess)
.then(getDeferredResult)
.then(function(result){
return res.send({success:true});
})
As you are constructing your array of promises you are simultaneously invoking the execFile method.
function callAccoreConsole(dwgPath,scrFile){
var deferred = q.defer();
childProcess.execFile('C:/Program Files/Autodesk/AutoCAD 2015/accoreconsole.exe',['/i',dwgPath,'/s',scrFile], function(err, data) {
if(err)
deferred.resolve({success:false,reason: err});
deferred.resolve({success:true});
});
return deferred.promise;
}
So, instead using callAccoreConsole to run the process and return a deffered, you need something that calls that method eventually -
calls.push(q.fcall(callAccoreConsole, dwgPaths[i],scFiles[sc]));
I haven't tried this specifically, but the gist is that you are calling your method at the same time you are creating the deferred for it.

accessing an array outside the function in node js

I know node.js run asynchronously, so outer functions execute earlier than the inner. But what is the way to access the notification array outside the for loop? I would like to access all the values in array at once, is this feasible?
var notification=[];
for(var j=0;j<6; j++)
{
getNotification(response[j].sender_id,function(results) // a function called
{
notification[j] =results;
console.log(notification); // output: correct
});
}
console.log(notification); // output: [], need notification array values here
EDIT: If you don't want to use third party libs, this is how to do this in your own code.
/* jshint node:true*/
function getNotifications(responses, callbackToMainProgramLogic) {
'use strict';
var results = [];
function getNotificationAsync(response) {
getNotification(response.sender_id, function (data) {
results.push(data);
if (responses.length) {
getNotificationAsync(responses.pop());//If there are still responses, launch another async getNotification.
} else {
callbackToMainProgramLogic(results);//IF there aren't we're done, and we return to main program flow
}
});
}
getNotificationAsync(responses.pop());
}
getNotifications(someArrayOfResonses, function (dataFromNotifications) {
console.log('The collected data: ' + JSON.stringify(dataFromNotifications, 0, 4));
});
If you absolutely must, you could do something ridiculous like this. Your logic in the loopUntilDatReceived would be waiting for array sizes, not waiting for a non-empty string, but the idea is similar, and you shouldn't be using this anyway! :)
var fileData = '';
fs.readFile('blah.js', function (err, data) { //Async operation, similar to your issue.
'use strict';
fileData = data;
console.log('The Data: ' + data);
});
function loopUntilDataReceived() {
'use strict';
process.nextTick(function () {//A straight while loop would block the event loop, so we do this once per loop around the event loop.
if (fileData === '') {
console.log('No Data Yet');
loopUntilDataReceived();
} else {
console.log('Finally: ' + fileData);
}
});
}
loopUntilDataReceived();
Did I mention this is ridiculous? Honestly, this is an awful idea, but it may help you understand what is going on and how the Node event loop works, and why what you want is not possible. AND why the other posts about callbacks, and flow control libraries are the way to go.
First off, you're having a closure issue in your code (please see the details here)
Then, you simply can't have the array values just next to the loop, because the values are not ready at this point.
You need to wait until all 6 of your getNotification calls get resolved. You can do that with the async library. Something like:
var notification = [];
function createRequest (index) {
return function (callback) {
getNotification(response[index].sender_id, function(results) {
notification[index] = results;
callback(results);
});
}
}
var requests = [];
for(var j=0;j<6; j++) {
requests.push(createRequest(j));
}
async.parallel(requests, function (allResults) {
// notifications array is ready at this point
// the data should also be available in the allResults array
console.log(notifications);
});
Send a callback to the notification loop like this:
var notification=[];
getNotificationArray( function() {
console.log(notification);
});
function getNotificationArray (callback)
{
for(var j=0;j<6; j++)
{
getNotification(response[j].sender_id,function(results) // a function called
{
notification[j] =results;
console.log(notification); // output: correct
});
}
callback();
}

NodeJS async queue too fast (Slowing down async queue method)

I have an HTTP Get request and I want to parse the response and save it to my database.
If i call crawl(i) independentely i get good results. But i have to call crawl() from 1 to 2000.
I get good results but some responses seem to get lost and some responses are duplicates. I don't think I understand how to call thousands of asynchronous functions. I am using the async module queue function but so far I am still missing some data and still have some duplicates. What am I doing wrong here? Thanks for your help.
What i am crawling
My node functions :
function getOptions(i) {
return {
host: 'magicseaweed.com',
path: '/syndicate/rss/index.php?id='+i+'&unit=uk',
method: 'GET'
}
};
function crawl(i){
var req = http.request(getOptions(i), function(res) {
res.on('data', function (body) {
parseLocation(body);
});
});
req.end();
}
function parseLocation(body){
parser.parseString(body, function(err, result) {
if(result && typeof result.rss != 'undefined') {
var locationTitle = result.rss.channel[0].title;
var locationString = result.rss.channel[0].item[0].link[0];
var location = new Location({
id: locationString.split('/')[2],
name: locationTitle
});
location.save();
}
});
}
N = 2 //# of simultaneous tasks
var q = async.queue(function (task, callback) {
crawl(task.url);
callback();
}, N);
q.drain = function() {
console.log('Crawling done.');
}
for(var i = 0; i < 100; i++){
q.push({url: 'http://magicseaweed.com/syndicate/rss/index.php?id='+i+'&unit=uk'});
}
[EDIT] WELL, after a lot of testing it seems that the service I am crawling cannot handle so many request that fast. Because when I do each requests sequentially, I can get all the good responses.
Is there a way to SLOW DOWN ASYNC queue method?
You should have a look at this great module, async which simplifies async tasks like this. You can use queue, simple example:
N = # of simultaneous tasks
var q = async.queue(function (task, callback) {
somehttprequestfunction(task.url, function(){
callback();
}
}, N);
q.drain = function() {
console.log('all items have been processed');
}
for(var i = 0; i < 2000; i++){
q.push({url:"http://somewebsite.com/"+i+"/feed/"});
}
It will have a window of ongoing actions and the tasks room will be available for a future task if you only invoke the callback function. Difference is, your code now opens 2000 connections immidiately and obviously the failure rate is high. Limiting it to a reasonable value, 5,10,20 (depends on site and connection) will result in a better sucess rate. If a request fails, you can always try it again, or push the task to another async queue for another trial. The key point is to invoke callback() in queue function, so that a room will be available when it is done.
var q = async.queue(function (task, callback) {
crawl(task.url);
callback();
}, N);
You'are executing next task immediately after starting the previous one, in this way, the queue is just meaningless. You should modify your code like this:
// first, modify your 'crawl' function to take a callback argument, and call this callback after the job is done.
// then
var q = async.queue(function (task, next/* name this argument as 'next' is more meaningful */) {
crawl(task.url, function () {
// after this one is done, start next one.
next();
});
// or, more simple way, crawl(task.url, next);
}, N);
Another option if you want. Vanilla JS without fancy libraries.
var incrementer = 0;
var resultsArray = [];
var myInterval = setInterval(function() {
incrementer++
if(incrementer == 100){
clearInterval(myInterval)
//when done parse results array
}
//make request here
//push request result to array here
}, 500);
Invokes the function every half second. Easy way to force sync and exit after x requests.
I know I am a little late to the question, however here is a solution I wrote to slow down the number of requests when testing an api endpoint, using node 4 or node 5:
var fs = require('fs');
var supertest = require('supertest');
var request = supertest("http://sometesturl.com/api/test/v1/")
var Helper = require('./check.helper');
var basicAuth = Helper.basicAuth;
var options = Helper.options;
fs.readFile('test.txt', function(err, data){
var parsedItems = JSON.parse(data);
var urlparts = []
// create a queue
for (let year of range(1975, 2016)) {
for (var make in parsedItems[year]){
console.log(year, make, '/models/' + year + '/' + make)
urlparts.push({urlpart:'/models/' + year + '/' + make, year: year, make: make})
}
}
// start dequeue
waitDequeue();
// This function calls itself after the makeRequest promise completes
function waitDequeue(){
var item = urlparts.pop()
if (item){
makeRequest(item)
.then(function(){
// wait this time before next dequeue
setTimeout(function() {
waitDequeue();
}, 3000);
})
} else {
write(parsedItems)
}
}
// make a request, mutate parsedItems then resolve
function makeRequest(item){
return new Promise((resolve, reject)=>{
request
.get(item.urlpart)
.set(options.auth[0], options.auth[1])
.set(options.type[0], options.type[1])
.end(function(err, res) {
if (err) return done1(err);
console.log(res.body)
res.body.forEach(function(model){
parsedItems[item.year][item.make][model] = {}
});
resolve()
})
})
}
// write the results back to the file
function write(parsedItems){
fs.writeFile('test.txt', JSON.stringify(parsedItems, null, 4), function(err){
console.log(err)
})
}
})
A little late but I have found this works!
Using async you can slow down the queue by using whilst inside the task handler eg:
var q = async.priorityQueue(function(task, callback) {
// your code process here for each task
//when ready to complete the task delay it by calling
async.whilst( //wait 6 seconds
function() {
return count < 10;
},
function(callback) {
count++;
setTimeout(function() {
callback(null, count);
}, 1000);
},
function (err, n) {
// n seconds have passed
callback(); //callback to q handler
}
); //whilst
} , 5);

Resources