Currently using http GET to an external API. When called individually, the response is good. When put in a for loop, some requests don't seem to have a response.
This is the http GET function:
function httpGetChunk(url, callback) {
http.get(url, function(resp) {
var body='';
resp.on('data', function(chunk) {
body += chunk; //chunk too large from this response
});
resp.on('end', function() {
var data = JSON.parse(body);
callback(data);
});
resp.on("error", function(e) {
console.log("Got error: " + e.message);
});
});
}
When I call the GET function in a for loop for 5 different urls, I only get responses for some of them. Ran it a couple of times and the response would be from a different combination of the called urls but never all of them.
Any insight?
Edit 1: To give more information, my for loop looks something like this.
for (var i=0;i<5; i++) {
httpGetChunk(someUrl, function(data) {
console.log(data);
});
}
This would only print out some responses but not all.
Edit 2:
I've taken into account all the advice on this thread. I'm now using the async module and have increased the number of concurrent connections to 20:
http.globalAgent.maxSockets = 20;
Following code is the one im currently testing:
getMatchStats() returns an game 'match' object with statistics (e.g kills, deaths in the match etc.)
matchIds is the array containing all the id keys of the matches
async.parallel([
getMatchStats(matchIds[0], function (matchData) {
console.log('0');
}),
getMatchStats(matchIds[1], function (matchData) {
console.log('1');
}),
getMatchStats(matchIds[2], function (matchData) {
console.log('2');
}),
getMatchStats(matchIds[3], function (matchData) {
console.log('3');
}),
getMatchStats(matchIds[4], function (matchData) {
console.log('4');
}),
], function(err, result) {
console.log('done');
callback(result);
});
and getMatchStats
function getMatchStats(matchId, callback) {
var url = getMatchStatsUrl(matchId); //gets url based on id
httpGetChunk(url, function(data) {
callback(data);
});
}
again, the async.parallel never finishes since only some of the requests have responses. Every time i run it, the responses would be from a different combination of matches. Sometimes, it even completes all of the requests.
Maybe my OS has limitations on number of connections (im testing on localhost)?
Each request is asynchronous. So, if you use a regular for loop, each step is going to be executed synchronously and won't wait for callback to be called. What do you need is something like the each method from the async module, like:
async.each(yourArrayOfUrls, function (url, callback) {
httpGetChunk(url, function(data) {
console.log(data);
callback();
});
}, function (err) {
// if some step produce an error, you can get it here...
});
Related
I want to spider some links and after all tasks done,i want to do something else.
How to track tasks done is hard for me.Hope someone can help.
here is my code:
var urlList=[];
//Ready file lines
lineReader.eachLine('url.txt', function(line) {
console.log('url is :'+line);
urlList.push(line);
}).then(function(){//After Read,begin to proceed each line
console.log('read done!begin collect');
async.each(urlList,function(line){
console.log('begin line :'+line);
//down each url
download(line,function(data,cb){
var $=cheerio.load(data);//load cheerio
var title=$('head>title').text();//get title
console.log('title is '+title);
});
});
//At here i want to track whether all urls has been download,and i can do something else
if(/* allproceed */)
{
console.log('Task all done!Begin Next');
}
});
function download(url, callback) {
http.get(url, function(res) {
var data = "";
res.on('data', function (chunk) {
data += chunk;
});
res.on("end", function() {
callback(data);
});
}).on("error", function(e) {
console.log("Got error: " + e.message);
callback(null);
});
}
Hope someone can help me.
Thanks very much.
I've made some fixes to your code, see the result below:
var urlList=[];
//Ready file lines
lineReader.eachLine('url.txt', function(line) {
console.log('url is :'+line);
urlList.push(line);
}).then(function(){//After Read,begin to proceed each line
console.log('read done!begin collect');
async.each(urlList,function(line, callback){
console.log('begin line :'+line);
//down each url
download(line,function(err, data){
if (err) {
return callback(err);
}
var $=cheerio.load(data);//load cheerio
var title=$('head>title').text();//get title
console.log('title is '+title);
callback(null, title);
});
}, function continueHere(err) {
//At here i want to track whether all urls has been download,and i can do something else
console.log('Task all done!Begin Next');
});
});
function download(url, callback) {
http.get(url, function(res) {
var data = "";
res.on('data', function (chunk) {
data += chunk;
});
res.on("end", function() {
callback(null, data);
});
}).on("error", function(e) {
console.log("Got error: " + e.message);
callback(e);
});
}
Some things to pay special attention to:
You were already very close to your answer. async.each() is a tool you can use to get the job done, but you were not using correctly yet. The iterator function you pass it, the one that gets called for each item in urlList, takes a callback that you can call if the job for that iteration is done. I added that callback.
async.each() also takes a third argument: The function that gets called when all tasks have completed. In this function you can put the code that continues the rest of your application.
With regards to using callback: a pattern repeated across node.js is that the first argument passed to a callback is always an error, if one exists. If not, that argument is undefined or null. The actual result is passed as the second argument. It's a good idea to follow this pattern. async, for instance, expects you to obey it. If any of the tasks in the async.each() fail (by passing a non-null value as the first argument to the callback), async consideres the entire series failed, and passes that error on to the series callback (in the code above the function continueHere).
One last thing. Though the code above should work, it is mixing promises (signified by the .then() statement) with callbacks. These are two different ways to manage asynchronous code. Though you're free to mix them up if you want, for readability of the code it might help to pick one patter and stick with it ;).
I need to make an HTTP call and then put the response in database. i should repeat it forever. i have been reading on async module but i didn't understood how to combine these actions along with the waiting for couple of seconds between each iteration.
Can someone help?
Thanks in advance.
Look into async.forever. Your code would look something like this:
var async = require("async");
var http = require("http");
//Delay of 5 seconds
var delay = 5000;
async.forever(
function(next) {
http.get({
host: "google.com",
path: "/"
}, function(response) {
// Continuously update stream with data
var body = "";
response.on("data", function(chunk) {
body += chunk;
});
response.on("end", function() {
//Store data in database
console.log(body);
//Repeat after the delay
setTimeout(function() {
next();
}, delay)
});
});
},
function(err) {
console.error(err);
}
);
Why using such a module only for doing this ? Why don't you just use setTimeout like:
function makeRequest() {
request(url, function(response) {
saveInDatabase(function() {
// After save is complete, use setTimeout to call again
// "makeRequest" a few seconds later (Here 1 sec)
setTimeout(makeRequest, 1000);
});
}
}
This code won't really work for the request and save part of course, it was just to give an example of what I was proposing.
I am trying to create multiple http requests in Node.js, with each of them receiving a separate response. What I want to do is to identify which event corresponds to which call:
for (var i=0; i<100; i++) {
var req = http.request(options, function(response) {
var str = "";
response.on('data', function (chunk) {
console.log(str.length);
});
response.on('end', function () {
console.log("End of response");
});
});
req.on('error', function(err) {
console.log(err.message);
});
req.end();
}
Is there any way of properly identifying which response corresponds to each iteration? I am basically creating 100 response instances, but they all emit the same event, so the event emitting/handling is done globally. Basically, could I somehow tie i and the events emitted by response?
#BenFortune was right, this was related to closures. The example in the original question was overly-simplified, but if you have a construction similar to:
for(var i=0; ... ) {
function someFunction() {
}
}
and you want to keep track of something external to the function inside the function, then you should look into closures.
I create express.js example by WebMatrix. I want to create a api to get result from myfunction. If first request case is complicated and spend many time and the second request case is simple, the second request have to wait the first request finish. Can I do something that the second request can return data faster than first request?
app.post('/getData', function(req, res) {
res.header("Access-Control-Allow-Origin", "*");
res.header("Access-Control-Allow-Headers", "X-Requested-With");
var case= req.body.case;
var json = myfunction(case);
res.json(json);
});
You can use async to achieve that:
var async = require('async');
async.waterfall([
function(callback){ // first task
// process myCase (don't use case, it's reserved word),
// then pass it to your next function
callback(null, myCase); // 1st argument: null means no error
// if error, pass error in 1st arg
// so that 2nd function won't be
// executed
},
function(myCase, callback){ // 2nd task
// use argument 'myCase' to populate your final json result (json)
// and pass the result down in the callback
callback(null, json);
}
], function (err, json) {
// the argument json is your result
res.json(json);
});
If You like, You dont have to use any external libraries. You can do for example something like this:
console.log('1');
function async(input, callback) {
setTimeout(function() {
//do stuff here
for (var i = 0; i < input; i++) {
//this takes some time
}
//call callback, it may of course return something
callback('2');
}, 0);
}
async('10000000', function(result) {
console.log(result);
});
console.log('3');
You can test it, and see, that "2" will be printer after 1 and 3.
Hope it helped.
PS You can also use setInterval, or Underscore library:
var _ = require('underscore');
console.log('1');
function async(input, callback) {
_.defer(function() {
//do stuff here, for ie - this time-consuming loop
for (var i = 0; i < input; i++) {
//this takes some time
}
//call callback, it may of course return something
callback('2');
});
}
async('10000000', function(result) {
console.log(result);
});
console.log('3');
I want to send an HTTP request N times. I want to eventually have information about the results of each of those requests.
Running the request function once works great. Here's the HTTP request function using Q.defer():
function runRequest() {
var deferred = Q.defer(),
start = (new Date).getTime(),
req = HTTP.request(options, function(res) {
var end = (new Date).getTime(),
requestDetails = {
reqStatus: res.statusCode,
reqStart: start,
reqEnd: end,
duration: end - start
}
deferred.resolve(requestDetails);
});
req.on('error', function(e) {
deferred.reject(e.message);
});
req.end();
return deferred.promise;
}
If I do this, I get back the data I expect:
runRequest().then(function(requestDetails) {
console.log('STATUS: ' + requestDetails.reqStatus);
console.log('Duration: ' + requestDetails.duration);
console.log('Start: ' + requestDetails.reqStart);
console.log('End: ' + requestDetails.reqEnd);
}, function(error) {
console.log('Problem with request: ' + error);
})
.done();
To iterate, I tried to fit that into a for loop:
function iterateRequests() {
var deferred = Q.defer();
var reqResults = [];
for (var iteration = 0; iteration < requests; iteration++) {
runRequest()
.then(function(requestDetails) {
console.log('STATUS: ' + requestDetails.reqStatus);
reqResults.push(requestDetails);
}, function(error) {
console.log('Problem with request: ' + error);
});
}
deferred.resolve(reqResults);
return deferred.promise;
}
Then I call it like this:
iterateRequests()
.then(function(results) {
console.log(results);
console.log("in the success callback after iterateRequests");
}, function() {
console.log("in the failure callback after iterateRequests");
})
.done();
I end up getting into the success callback (i.e., it logs "in the success callback after iterateRequests"). However, the console.log(results) prints before I get the logs from runRequest().then() callback and it's an empty array.
Any ideas or some guidance on chaining/iterating over promise-return functions?
Thanks!
Update
Follow up question in response to #abject_error's answer:
Checked out Q.all. Definitely looks like what I need. And it's much simpler that what I was working with. I made a simple test case to help me figure out how it works:
var Q = require("q");
function returner(number) {
var deferred = Q.defer();
deferred.resolve(number);
return deferred.promise;
}
function parent() {
return Q.all([
returner(1),
returner(2),
returner(4)
]);
}
parent()
.then(function(promises) {
// works - promises gives me [1, 2, 4]
console.log(promises);
});
So I see how I can use it if I know beforehand the number of times I need to call it (and which functions I'm going to call). Any tips on how to get a dynamic number of calls to returner (in this example) or runRequest (in my original example) in the array?
This answers the update part of the question:
var buildCalls = function() {
var calls = [];
for (var i in stories) {
calls.push(myFunc(i));
}
return calls;
}
return Q.all(buildCalls());
Q has other functions to aid in Promise based workflows. The method you need to use is Q#all. If you have an array of promises, and you want to call a function when all of them have successfully fulfilled, you do
Q.all(array_of_promises).then(success_callback, failure_callback);
After all the request promises are fulfilled, success_callback is called. If any of them rejects, the failure_callback is called immediately.