Identify event emitter from multiple - node.js

I am trying to create multiple http requests in Node.js, with each of them receiving a separate response. What I want to do is to identify which event corresponds to which call:
for (var i=0; i<100; i++) {
var req = http.request(options, function(response) {
var str = "";
response.on('data', function (chunk) {
console.log(str.length);
});
response.on('end', function () {
console.log("End of response");
});
});
req.on('error', function(err) {
console.log(err.message);
});
req.end();
}
Is there any way of properly identifying which response corresponds to each iteration? I am basically creating 100 response instances, but they all emit the same event, so the event emitting/handling is done globally. Basically, could I somehow tie i and the events emitted by response?

#BenFortune was right, this was related to closures. The example in the original question was overly-simplified, but if you have a construction similar to:
for(var i=0; ... ) {
function someFunction() {
}
}
and you want to keep track of something external to the function inside the function, then you should look into closures.

Related

Nodejs Http request has no response

Currently using http GET to an external API. When called individually, the response is good. When put in a for loop, some requests don't seem to have a response.
This is the http GET function:
function httpGetChunk(url, callback) {
http.get(url, function(resp) {
var body='';
resp.on('data', function(chunk) {
body += chunk; //chunk too large from this response
});
resp.on('end', function() {
var data = JSON.parse(body);
callback(data);
});
resp.on("error", function(e) {
console.log("Got error: " + e.message);
});
});
}
When I call the GET function in a for loop for 5 different urls, I only get responses for some of them. Ran it a couple of times and the response would be from a different combination of the called urls but never all of them.
Any insight?
Edit 1: To give more information, my for loop looks something like this.
for (var i=0;i<5; i++) {
httpGetChunk(someUrl, function(data) {
console.log(data);
});
}
This would only print out some responses but not all.
Edit 2:
I've taken into account all the advice on this thread. I'm now using the async module and have increased the number of concurrent connections to 20:
http.globalAgent.maxSockets = 20;
Following code is the one im currently testing:
getMatchStats() returns an game 'match' object with statistics (e.g kills, deaths in the match etc.)
matchIds is the array containing all the id keys of the matches
async.parallel([
getMatchStats(matchIds[0], function (matchData) {
console.log('0');
}),
getMatchStats(matchIds[1], function (matchData) {
console.log('1');
}),
getMatchStats(matchIds[2], function (matchData) {
console.log('2');
}),
getMatchStats(matchIds[3], function (matchData) {
console.log('3');
}),
getMatchStats(matchIds[4], function (matchData) {
console.log('4');
}),
], function(err, result) {
console.log('done');
callback(result);
});
and getMatchStats
function getMatchStats(matchId, callback) {
var url = getMatchStatsUrl(matchId); //gets url based on id
httpGetChunk(url, function(data) {
callback(data);
});
}
again, the async.parallel never finishes since only some of the requests have responses. Every time i run it, the responses would be from a different combination of matches. Sometimes, it even completes all of the requests.
Maybe my OS has limitations on number of connections (im testing on localhost)?
Each request is asynchronous. So, if you use a regular for loop, each step is going to be executed synchronously and won't wait for callback to be called. What do you need is something like the each method from the async module, like:
async.each(yourArrayOfUrls, function (url, callback) {
httpGetChunk(url, function(data) {
console.log(data);
callback();
});
}, function (err) {
// if some step produce an error, you can get it here...
});

How to handle callbacks in a for loop(Node.JS)

I am trying to write a code with NodeJS where I grab data from an external API and then populate them in MongoDB using Mongoose. In between that, I'll check to see if that particular already exists in Mongo or not. Below is my code.
router.route('/report') // the REST api address
.post(function(req, res) // calling a POST
{
console.log('calling report API');
var object = "report/" + reportID; // related to the API
var parameters = '&limit=100' // related to the API
var url = link + object + apiKey + parameters; // related to the API
var data = "";
https.get(url, function callback(response)
{
response.setEncoding("utf8");
response.on("data", function(chunk)
{
data += chunk.toString() + "";
});
response.on("end", function()
{
var jsonData = JSON.parse(data);
var array = jsonData['results']; // data is return in array of objects. accessing only a particular array
var length = array.length;
console.log(length);
for (var i = 0; i < length; i++)
{
var report = new Report(array.pop()); // Report is the schema model defined.
console.log('^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^');
console.log(i);
console.log('*****************************');
console.log(report);
console.log('*****************************');
// console.log(report['id']);
/*report.save(function(err)
{
if(err)
res.send(err);
});*/
Report.find({id:report['id']}).count(function(err, count) // checks if the id of that specific data already exists in Mongo
{
console.log(count);
console.log('*****************************');
if (count == 0) // if the count = 0, meaning not exist, then only save
{
report.save(function(err)
{
console.log('saved');
if(err)
res.send(err);
});
}
});
};
res.json({
message: 'Grabbed Report'
});
});
response.on("error", console.error);
});
})
My problem is that since NodeJS callbacks are parallel, it is not getting called sequentially. My end result would be something like this :
Calling report API
console.log(length) = 100
^^^^^^^^^^^^^^^^^^^^^^^^
console.log(i) = starts with 0
*******************************
console.log(report) = the data which will be stored inside Mongo
*******************************
number 3 - 7 repeats 100 times as the length is equals to 100
console.log(count) = either 0 or 1
number 9 repeats 100 times
console.log('saved')
number 11 repeats 100 times
Lastly, only the last out of 100 data is stored into Mongo
What I need is some sort of technique or method to handle these callbacks which are executing one after the other and not sequentially following the loop. I am pretty sure this is the problem as my other REST APIs are all working.
I have looked into async methods, promises, recursive functions and a couple others non which I could really understand how to solve this problem. I really hope someone can shed some light into this matter.
Feel free also to correct me if I did any mistakes in the way I'm asking the question. This is my first question posted in StackOverflow.
This problem is termed as the "callback hell".
There's lots of other approaches like using Promise and Async libraries you'll find.
I'm more excited about the native async ES7 will bring,
which you can actually start using today with transpiler library Babel.
But by far the simplest approach I've found is the following:
You take out the long callback functions and define them outside.
router.route('/report') // the REST api address
.post(calling_a_POST)
function calling_a_POST(req, res) {
...
var data = "";
https.get(url, function callback(response) {
...
response.on("end", response_on_end_callback); // --> take out
response.on("error", console.error);
});
}
function response_on_end_callback() { // <-- define here
...
for (var i = 0; i < length; i++) {
var report = new Report(array.pop());
...
Report.find({ id: report['id'] })
.count(Report_find_count_callback); // --> take out
};
res.json({
message: 'Grabbed Report'
});
}
function Report_find_count_callback(err, count) { // <-- define here
...
if (count == 0) {
report.save(function(err) { // !! report is undefined here
console.log('saved');
if (err)
res.send(err); // !! res is undefined here
});
}
}
A caveat is that you won't be able to access all the variables inside what used to be the callback,
because you've taken them out of the scope.
This could be solved with a "dependency injection" wrapper of sorts to pass the required variables.
router.route('/report') // the REST api address
.post(calling_a_POST)
function calling_a_POST(req, res) {
...
var data = "";
https.get(url, function callback(response) {
...
response.on("end", function(err, data){ // take these arguments
response_on_end(err, data, res); // plus the needed variables
});
response.on("error", console.error);
});
}
function response_on_end(err, data, res) { // and pass them to function defined outside
...
for (var i = 0; i < length; i++) {
var report = new Report(array.pop());
...
Report.find({ id: report['id'] })
.count(function(err, count){
Report_find_count(err, count, report, res); // same here
});
};
res.json({ // res is now available
message: 'Grabbed Report'
});
}
function Report_find_count(err, count, report, res) { // same here
...
if (count == 0) {
report.save(function(err) { // report is now available
console.log('saved');
if (err)
res.send(err); // res is now available
});
}
}
When I execute the response_on_end function, I am getting the undefined:1 unexpected token u error.
I am pretty much sure it has something to do with this line: var jsonData = JSON.parse(data)
My response_on_end is as below: var jsonData = JSON.parse(data); // problem here
I realize I made an error here:
function calling_a_POST(req, res) {
...
var data = "";
https.get(url, function callback(response) {
...
//sponse.on("end", function(err, data){
response.on("end", function(err){ // data shouldn't be here
response_on_end(err, data, res);
});
response.on("error", console.error);
});
}
Another problem I could forsee, which actually may not arise here but still would be better to talk about anyways.
The data variable, since it's a string which is a primitive type unlike an object, it is "passed by value".
More info
It's better to wrap the variable in an object and pass the object, because objects in javascript are always "passed by reference".
function calling_a_POST(req, res) {
...
// var data = ""; //
var data_wrapper = {};
data_wrapper.data = {}; // wrap it in an object
https.get(url, function callback(response) {
...
response.on("data", function(chunk){
data_wrapper.data += chunk.toString() + ""; // use the dot notation to reference
});
response.on("end", function(err){
response_on_end(err, data_wrapper, res); // and pass that object
});
response.on("error", console.error);
});
}
function response_on_end_callback(err, data_wrapper, res) {
var data = data_wrapper.data; // later redefine the variable
...
for (var i = 0; i < length; i++) {
var report = new Report(array.pop());
...
You can use async library for controlling your execution flows. And there are also iterators for working with arrays.

Node tries to send headers but previous response seems still opened

I have the following code:
app.get('/pull-requests', function (request) {
fetchRepos(fetchPullRequests);
app.on('pull-requests:fetched', function (pullRequestsByRepo) {
var html = "";
_.each(pullRequestsByRepo, function (pullRequests) {
html += 'There is <strong>'+ pullRequests.length +'</strong> pending pull request(s) for <strong>'+ pullRequests[0].title +'</strong>:';
html += '<ul>';
_.each(pullRequests, function (pullRequest) {
html += '<li><em>'+ pullRequest.title +'</em> ('+ pullRequest.url +')</li>';
});
html += '</ul>';
});
response.send(html);
});
});
It works fine once. Every second request ends raising an error Can't set headers after they are sent..
EDIT: More code to explicit the logic
function fetchRepos (callback) {
_options.path = '/orgs/'+ app.get('org') +'/repos?client_id='+ app.get('client_id') +'&client_secret='+ app.get('client_secret');
// Fetch the list of repos for a given organisation
var request = https.get(_options, function (res) {
data = "";
res.on('data', function (chunk) {
data += chunk;
});
res.on('end', function () {
var repos = JSON.parse(data);
return callback(repos);
});
});
request.on('error', function (error) {
console.log('Problem with request: '+ e);
});
}
function fetchPullRequests (repos) {
var pullRequests = [];
_.each(repos, function (repo, index) {
_options.path = '/repos/'+ app.get('org') +'/'+ repo.name +'/pulls?client_id='+ app.get('client_id') +'&client_secret='+ app.get('client_secret');
var request = https.get(_options, function (res) {
(function () {
var data = "";
res.on('data', function (chunk) {
data += chunk;
});
res.on('end', function () {
data = JSON.parse(data);
if (data.length > 0) {
pullRequests.push(data);
}
if (index == (repos.length - 1)) {
app.emit('pull-requests:fetched', pullRequests);
}
});
})();
});
});
}
Your problem is that whenever you call app.on('pull-requests:fetched', …), you add a new listener, meaning that when the second request arrives, it will trigger the first one again.
node then complains because you try reply twice to the first request.
You could fix your immediate issue by calling app.once, which would ensure that the only fires once, but you would still have an issue if 2 requests arrive at the same time.
The correct pattern in this case is to pass a callback to fetchRepos.

accessing an array outside the function in node js

I know node.js run asynchronously, so outer functions execute earlier than the inner. But what is the way to access the notification array outside the for loop? I would like to access all the values in array at once, is this feasible?
var notification=[];
for(var j=0;j<6; j++)
{
getNotification(response[j].sender_id,function(results) // a function called
{
notification[j] =results;
console.log(notification); // output: correct
});
}
console.log(notification); // output: [], need notification array values here
EDIT: If you don't want to use third party libs, this is how to do this in your own code.
/* jshint node:true*/
function getNotifications(responses, callbackToMainProgramLogic) {
'use strict';
var results = [];
function getNotificationAsync(response) {
getNotification(response.sender_id, function (data) {
results.push(data);
if (responses.length) {
getNotificationAsync(responses.pop());//If there are still responses, launch another async getNotification.
} else {
callbackToMainProgramLogic(results);//IF there aren't we're done, and we return to main program flow
}
});
}
getNotificationAsync(responses.pop());
}
getNotifications(someArrayOfResonses, function (dataFromNotifications) {
console.log('The collected data: ' + JSON.stringify(dataFromNotifications, 0, 4));
});
If you absolutely must, you could do something ridiculous like this. Your logic in the loopUntilDatReceived would be waiting for array sizes, not waiting for a non-empty string, but the idea is similar, and you shouldn't be using this anyway! :)
var fileData = '';
fs.readFile('blah.js', function (err, data) { //Async operation, similar to your issue.
'use strict';
fileData = data;
console.log('The Data: ' + data);
});
function loopUntilDataReceived() {
'use strict';
process.nextTick(function () {//A straight while loop would block the event loop, so we do this once per loop around the event loop.
if (fileData === '') {
console.log('No Data Yet');
loopUntilDataReceived();
} else {
console.log('Finally: ' + fileData);
}
});
}
loopUntilDataReceived();
Did I mention this is ridiculous? Honestly, this is an awful idea, but it may help you understand what is going on and how the Node event loop works, and why what you want is not possible. AND why the other posts about callbacks, and flow control libraries are the way to go.
First off, you're having a closure issue in your code (please see the details here)
Then, you simply can't have the array values just next to the loop, because the values are not ready at this point.
You need to wait until all 6 of your getNotification calls get resolved. You can do that with the async library. Something like:
var notification = [];
function createRequest (index) {
return function (callback) {
getNotification(response[index].sender_id, function(results) {
notification[index] = results;
callback(results);
});
}
}
var requests = [];
for(var j=0;j<6; j++) {
requests.push(createRequest(j));
}
async.parallel(requests, function (allResults) {
// notifications array is ready at this point
// the data should also be available in the allResults array
console.log(notifications);
});
Send a callback to the notification loop like this:
var notification=[];
getNotificationArray( function() {
console.log(notification);
});
function getNotificationArray (callback)
{
for(var j=0;j<6; j++)
{
getNotification(response[j].sender_id,function(results) // a function called
{
notification[j] =results;
console.log(notification); // output: correct
});
}
callback();
}

Chaining an arbitrary number of promises in Q

I want to send an HTTP request N times. I want to eventually have information about the results of each of those requests.
Running the request function once works great. Here's the HTTP request function using Q.defer():
function runRequest() {
var deferred = Q.defer(),
start = (new Date).getTime(),
req = HTTP.request(options, function(res) {
var end = (new Date).getTime(),
requestDetails = {
reqStatus: res.statusCode,
reqStart: start,
reqEnd: end,
duration: end - start
}
deferred.resolve(requestDetails);
});
req.on('error', function(e) {
deferred.reject(e.message);
});
req.end();
return deferred.promise;
}
If I do this, I get back the data I expect:
runRequest().then(function(requestDetails) {
console.log('STATUS: ' + requestDetails.reqStatus);
console.log('Duration: ' + requestDetails.duration);
console.log('Start: ' + requestDetails.reqStart);
console.log('End: ' + requestDetails.reqEnd);
}, function(error) {
console.log('Problem with request: ' + error);
})
.done();
To iterate, I tried to fit that into a for loop:
function iterateRequests() {
var deferred = Q.defer();
var reqResults = [];
for (var iteration = 0; iteration < requests; iteration++) {
runRequest()
.then(function(requestDetails) {
console.log('STATUS: ' + requestDetails.reqStatus);
reqResults.push(requestDetails);
}, function(error) {
console.log('Problem with request: ' + error);
});
}
deferred.resolve(reqResults);
return deferred.promise;
}
Then I call it like this:
iterateRequests()
.then(function(results) {
console.log(results);
console.log("in the success callback after iterateRequests");
}, function() {
console.log("in the failure callback after iterateRequests");
})
.done();
I end up getting into the success callback (i.e., it logs "in the success callback after iterateRequests"). However, the console.log(results) prints before I get the logs from runRequest().then() callback and it's an empty array.
Any ideas or some guidance on chaining/iterating over promise-return functions?
Thanks!
Update
Follow up question in response to #abject_error's answer:
Checked out Q.all. Definitely looks like what I need. And it's much simpler that what I was working with. I made a simple test case to help me figure out how it works:
var Q = require("q");
function returner(number) {
var deferred = Q.defer();
deferred.resolve(number);
return deferred.promise;
}
function parent() {
return Q.all([
returner(1),
returner(2),
returner(4)
]);
}
parent()
.then(function(promises) {
// works - promises gives me [1, 2, 4]
console.log(promises);
});
So I see how I can use it if I know beforehand the number of times I need to call it (and which functions I'm going to call). Any tips on how to get a dynamic number of calls to returner (in this example) or runRequest (in my original example) in the array?
This answers the update part of the question:
var buildCalls = function() {
var calls = [];
for (var i in stories) {
calls.push(myFunc(i));
}
return calls;
}
return Q.all(buildCalls());
Q has other functions to aid in Promise based workflows. The method you need to use is Q#all. If you have an array of promises, and you want to call a function when all of them have successfully fulfilled, you do
Q.all(array_of_promises).then(success_callback, failure_callback);
After all the request promises are fulfilled, success_callback is called. If any of them rejects, the failure_callback is called immediately.

Resources