I want to spider some links and after all tasks done,i want to do something else.
How to track tasks done is hard for me.Hope someone can help.
here is my code:
var urlList=[];
//Ready file lines
lineReader.eachLine('url.txt', function(line) {
console.log('url is :'+line);
urlList.push(line);
}).then(function(){//After Read,begin to proceed each line
console.log('read done!begin collect');
async.each(urlList,function(line){
console.log('begin line :'+line);
//down each url
download(line,function(data,cb){
var $=cheerio.load(data);//load cheerio
var title=$('head>title').text();//get title
console.log('title is '+title);
});
});
//At here i want to track whether all urls has been download,and i can do something else
if(/* allproceed */)
{
console.log('Task all done!Begin Next');
}
});
function download(url, callback) {
http.get(url, function(res) {
var data = "";
res.on('data', function (chunk) {
data += chunk;
});
res.on("end", function() {
callback(data);
});
}).on("error", function(e) {
console.log("Got error: " + e.message);
callback(null);
});
}
Hope someone can help me.
Thanks very much.
I've made some fixes to your code, see the result below:
var urlList=[];
//Ready file lines
lineReader.eachLine('url.txt', function(line) {
console.log('url is :'+line);
urlList.push(line);
}).then(function(){//After Read,begin to proceed each line
console.log('read done!begin collect');
async.each(urlList,function(line, callback){
console.log('begin line :'+line);
//down each url
download(line,function(err, data){
if (err) {
return callback(err);
}
var $=cheerio.load(data);//load cheerio
var title=$('head>title').text();//get title
console.log('title is '+title);
callback(null, title);
});
}, function continueHere(err) {
//At here i want to track whether all urls has been download,and i can do something else
console.log('Task all done!Begin Next');
});
});
function download(url, callback) {
http.get(url, function(res) {
var data = "";
res.on('data', function (chunk) {
data += chunk;
});
res.on("end", function() {
callback(null, data);
});
}).on("error", function(e) {
console.log("Got error: " + e.message);
callback(e);
});
}
Some things to pay special attention to:
You were already very close to your answer. async.each() is a tool you can use to get the job done, but you were not using correctly yet. The iterator function you pass it, the one that gets called for each item in urlList, takes a callback that you can call if the job for that iteration is done. I added that callback.
async.each() also takes a third argument: The function that gets called when all tasks have completed. In this function you can put the code that continues the rest of your application.
With regards to using callback: a pattern repeated across node.js is that the first argument passed to a callback is always an error, if one exists. If not, that argument is undefined or null. The actual result is passed as the second argument. It's a good idea to follow this pattern. async, for instance, expects you to obey it. If any of the tasks in the async.each() fail (by passing a non-null value as the first argument to the callback), async consideres the entire series failed, and passes that error on to the series callback (in the code above the function continueHere).
One last thing. Though the code above should work, it is mixing promises (signified by the .then() statement) with callbacks. These are two different ways to manage asynchronous code. Though you're free to mix them up if you want, for readability of the code it might help to pick one patter and stick with it ;).
Related
Hi im trying to use callback for variable external use outside the function but something is wrong, i think my callback is not so correct as i think:
function latitude(callback){
var mylat;
const https = require('https');
https.get('https://url_of_my_json', (resp) => {
let data = '';
// A chunk of data has been recieved.
resp.on('data', (chunk) => {
data += chunk;
});
// The whole response has been received. Print out the result.
resp.on('end', () => {
mylat = JSON.parse(data).results[0].geometry.location.lat;
callback(mylat);
});
}).on("error", (err) => {
console.log("Error: " + err.message);
});
}
var mylat = latitude(); // variable i want to use for the rest of code
Thanks
The syntax of the callback is incorrect.
here is the example, for better understanding of callback Try this example as simple as you can read, just copy save newfile.js do node newfile to run the application.
function myNew(next){
console.log("Im the one who initates callback");
next("nope", "success");
}
myNew(function(err, res){
console.log("I got back from callback",err, res);
});
happy coding :)
I have several files that I need to write to remote server. As you can see in my code, I'm reading those files inside async scope and writing them to remote server as they become available. Instead of all files, only the last file is successfully transfer to remote server. I put console output statement inside readfile callback function and check path variable value. It outputs the last filename three times. So I'm suspecting that the last readfile call is overwriting the previous ones. Shouldn't async wait for the callback function call before it moves onto next iteration?
var attachments = ['1.jpg','2.jpg','3.jpg'];
var request = require('request');
var option = {
url: "http://www.xxxxxxx.xxxx",
headers: {"content-type": "multipart/form-data"}
};
async.each(attachments, function(att, callback){
path = imagePath + "\\" + att;
fstat = fs.statSync(path);
fsize = fstat["size"];
fs.readFile(path, function(err, data){
if(err)
{
return callback(err);
}
else{
option.body = data;
request.post(option,function(err,response,body) {
if (err) {
return callback(err);
}
else{
console.log(option);
callback();
}
});
}
});
}, function(err){
if (err) {
console.log(err);
}
else
{
console.log("finish");
}
});
Shouldn't async wait for the callback function call before it moves onto next iteration?
No, you're describing what async.eachSeries does. async.each runs each all tasks in parallel.
Your problem is that here is only one option object here. You perform console.log(option) inside of a function asynchronously run by request.post. That callback function runs after every request.post has been run. Since you only have on option object, it will have the final body value it was assigned by the last fs.readFile callback.
The solution here is to have every each function call create its own option object:
async.each(attachments, function(att, callback){
var option = {
url: "http://www.xxxxxxx.xxxx",
headers: {"content-type": "multipart/form-data"}
};
Currently using http GET to an external API. When called individually, the response is good. When put in a for loop, some requests don't seem to have a response.
This is the http GET function:
function httpGetChunk(url, callback) {
http.get(url, function(resp) {
var body='';
resp.on('data', function(chunk) {
body += chunk; //chunk too large from this response
});
resp.on('end', function() {
var data = JSON.parse(body);
callback(data);
});
resp.on("error", function(e) {
console.log("Got error: " + e.message);
});
});
}
When I call the GET function in a for loop for 5 different urls, I only get responses for some of them. Ran it a couple of times and the response would be from a different combination of the called urls but never all of them.
Any insight?
Edit 1: To give more information, my for loop looks something like this.
for (var i=0;i<5; i++) {
httpGetChunk(someUrl, function(data) {
console.log(data);
});
}
This would only print out some responses but not all.
Edit 2:
I've taken into account all the advice on this thread. I'm now using the async module and have increased the number of concurrent connections to 20:
http.globalAgent.maxSockets = 20;
Following code is the one im currently testing:
getMatchStats() returns an game 'match' object with statistics (e.g kills, deaths in the match etc.)
matchIds is the array containing all the id keys of the matches
async.parallel([
getMatchStats(matchIds[0], function (matchData) {
console.log('0');
}),
getMatchStats(matchIds[1], function (matchData) {
console.log('1');
}),
getMatchStats(matchIds[2], function (matchData) {
console.log('2');
}),
getMatchStats(matchIds[3], function (matchData) {
console.log('3');
}),
getMatchStats(matchIds[4], function (matchData) {
console.log('4');
}),
], function(err, result) {
console.log('done');
callback(result);
});
and getMatchStats
function getMatchStats(matchId, callback) {
var url = getMatchStatsUrl(matchId); //gets url based on id
httpGetChunk(url, function(data) {
callback(data);
});
}
again, the async.parallel never finishes since only some of the requests have responses. Every time i run it, the responses would be from a different combination of matches. Sometimes, it even completes all of the requests.
Maybe my OS has limitations on number of connections (im testing on localhost)?
Each request is asynchronous. So, if you use a regular for loop, each step is going to be executed synchronously and won't wait for callback to be called. What do you need is something like the each method from the async module, like:
async.each(yourArrayOfUrls, function (url, callback) {
httpGetChunk(url, function(data) {
console.log(data);
callback();
});
}, function (err) {
// if some step produce an error, you can get it here...
});
I have folowing script
var email_list = ['email1#email.com', 'email2#email.com',....'email100#email.com'];
for(i=0;i<email_list.length;i++){
if(checkEmail(email_list[i])){
//do processing save in db and email to email addresses.
}
}
This code will be blocking in nodejs how to make this non blocking?
You can do this without blocking the event loop at all, by using a recursive loop. This way what you end up with is only launching one database worker per call, at a give time. Assuming the database work you were doing was asynchronous, your code didn't really block the event loop. But the foor loop still launched a bunch of workers simultaneously, which will tend to clog the event loop(not block it). And you are right in that it is blocking the event loop while your for loop is counting from 0, to whatever the size of your array is. The following does exactly the same thing, but you only launch one database worker at a time(good), and you never count from 0 to length. Each worker is popped off the list after the work on the current email is done, and your global event loop is left to process other things, not email_list.length database requests simultaneously.
var email_list = ['email1#email.com', 'email2#email.com', 'email100#email.com'];
function checkEmailList(emails, emailCallBack, completionCallback) {
var someDataCollectdOverAllEmails = '';
function checkEmailAsync(email) {
db.doSomeDBWorkAsync(email, function (data) {
someDataCollectdOverAllEmails += data;
if (email_list.length) {
checkEmail(email_list.pop()); //If there are still emails to be checked, check the next one ine line
} else {
completionCallback(someDataCollectdOverAllEmails);//IF not, call the completionCallBack
}
emailCallBack(data);
});
}
checkEmailAsync(emails.pop());
}
function logIndividualEmailData(data) {
console.log('Sningle Email: ' + data);
}
function logGlobalEmailData(data) {
console.log('All Email Data: ' + data);
}
checkEmailList(email_list, logIndividualEmailData, logGlobalEmailData);
Process.nextTick example
process.nextTick(function () {
'use strict';
console.log('printed second');
while (true);
});
process.nextTick(function () {
'use strict';
console.log('never printed');
});
console.log('printed first');
Note however that in the example below, despite the fact that loopForever will run forever, it still allows both of our files to be read out. If we just had while(true) it would of course block and not allow this and one of our files data would not be printed out.
var files = ['blah.js', 'file.js'];
for(var i = 0; i < files.length; i++) {
fs.readFile(files[i], function (err, data) {
console.log('File data' + data);
function loopForver(loop) {//asynchronously loop forever, pretty cool, but only useful for really specific situations!
process.nextTick(function () {
if(loop) {
console.log('looping');
loopForver(true);
}
});
}
loopForver(true);
});
}
If I need to do stuff after the emails all send, I use the async library (docs), which provides some useful functions for control flow.
You will still need to rewrite checkEmail(email) into checkEmail(email, callback) as #S.D. suggests. In checkEmail you will want to call callback after everything is completed. This probably means that you will nest callbacks, calling the second async thing (sending the email) only after the first (db query) has completed successfully.
I also suggest that you follow convention by using the first callback argument as an err parameter. If you callback(null) you are explicitly saying 'there was no error'. #S.D.'s solution suggests instead callback(ok) which is the opposite of convention.
Here is an example showing a couple nested asynchronous functions and the async library.
edit - use async.eachLimit instead of async.each so you don't execute all 100 calls simultaneously
(function main(){
var emails = ["a#b", "c#d"];
var async = require('async');
async.eachLimit(
emails // array to iterate across
,10 // max simultaneous iterations
,checkEmail // an asynchronous iterator function
,function(err){ // executed on any error or every item successful
console.log('Callback of async.eachLimit');
if(err){
console.log('Error: '+err)
} else {
console.log('All emails succeeded');
};
}
);
console.log('Code below the async.eachLimit call will continue executing after starting the asynchronous jobs');
})();
function checkEmail(email, callback){
fetchFromDb(email, function(err, obj){
if(err){ return callback(err) };
sendEmail(email, function(err, obj){
if(err){ return callback(err)};
console.log('Both fetchFromDb and sendEmail have completed successfully for '+email);
callback(null);
});
});
};
function fetchFromDb(email, callback){
process.nextTick(function(){ // placeholder, insert real async function here
callback(null);
});
};
function checkEmail(email, callback){
process.nextTick(function(){ // placeholder, insert real async function here
callback(null);
});
};
I want to send an HTTP request N times. I want to eventually have information about the results of each of those requests.
Running the request function once works great. Here's the HTTP request function using Q.defer():
function runRequest() {
var deferred = Q.defer(),
start = (new Date).getTime(),
req = HTTP.request(options, function(res) {
var end = (new Date).getTime(),
requestDetails = {
reqStatus: res.statusCode,
reqStart: start,
reqEnd: end,
duration: end - start
}
deferred.resolve(requestDetails);
});
req.on('error', function(e) {
deferred.reject(e.message);
});
req.end();
return deferred.promise;
}
If I do this, I get back the data I expect:
runRequest().then(function(requestDetails) {
console.log('STATUS: ' + requestDetails.reqStatus);
console.log('Duration: ' + requestDetails.duration);
console.log('Start: ' + requestDetails.reqStart);
console.log('End: ' + requestDetails.reqEnd);
}, function(error) {
console.log('Problem with request: ' + error);
})
.done();
To iterate, I tried to fit that into a for loop:
function iterateRequests() {
var deferred = Q.defer();
var reqResults = [];
for (var iteration = 0; iteration < requests; iteration++) {
runRequest()
.then(function(requestDetails) {
console.log('STATUS: ' + requestDetails.reqStatus);
reqResults.push(requestDetails);
}, function(error) {
console.log('Problem with request: ' + error);
});
}
deferred.resolve(reqResults);
return deferred.promise;
}
Then I call it like this:
iterateRequests()
.then(function(results) {
console.log(results);
console.log("in the success callback after iterateRequests");
}, function() {
console.log("in the failure callback after iterateRequests");
})
.done();
I end up getting into the success callback (i.e., it logs "in the success callback after iterateRequests"). However, the console.log(results) prints before I get the logs from runRequest().then() callback and it's an empty array.
Any ideas or some guidance on chaining/iterating over promise-return functions?
Thanks!
Update
Follow up question in response to #abject_error's answer:
Checked out Q.all. Definitely looks like what I need. And it's much simpler that what I was working with. I made a simple test case to help me figure out how it works:
var Q = require("q");
function returner(number) {
var deferred = Q.defer();
deferred.resolve(number);
return deferred.promise;
}
function parent() {
return Q.all([
returner(1),
returner(2),
returner(4)
]);
}
parent()
.then(function(promises) {
// works - promises gives me [1, 2, 4]
console.log(promises);
});
So I see how I can use it if I know beforehand the number of times I need to call it (and which functions I'm going to call). Any tips on how to get a dynamic number of calls to returner (in this example) or runRequest (in my original example) in the array?
This answers the update part of the question:
var buildCalls = function() {
var calls = [];
for (var i in stories) {
calls.push(myFunc(i));
}
return calls;
}
return Q.all(buildCalls());
Q has other functions to aid in Promise based workflows. The method you need to use is Q#all. If you have an array of promises, and you want to call a function when all of them have successfully fulfilled, you do
Q.all(array_of_promises).then(success_callback, failure_callback);
After all the request promises are fulfilled, success_callback is called. If any of them rejects, the failure_callback is called immediately.