Sequence of operation in Node.js - node.js

I took the example code from npmjs > jsdom. This process is performed in a few seconds and only after it I want to run a second action, such as console.log. But do not insert the code in the body of jsdom. Maybe it's work with Node.js > Stream
Whant to create a chain of functions, next process start as soon as the end of the previous.
Where i can read about sequence in Node.js?
var jsdom = require("jsdom");
jsdom.env({
url: "http://news.ycombinator.com/",
scripts: ["http://code.jquery.com/jquery.js"],
done: function (errors, window) {
var $ = window.$;
console.log("HN Links");
$("td.title:not(:last) a").each(function() {
console.log(" -", $(this).text());
});
}
});
console.log("The end");

You're looking for Async.js.
To be specific, you're looking for its series() functionality (Run an array of functions in series, each one running once the previous function has completed).
Code example (based on it's docs):
async.series([
function(callback){
jsdom.env({
url: "http://news.ycombinator.com/",
scripts: ["http://code.jquery.com/jquery.js"],
done: function (errors, window) {
var $ = window.$;
console.log("HN Links");
$("td.title:not(:last) a").each(function() {
console.log(" -", $(this).text());
});
callback(null, 'one');
}
});
},
function(callback){
// do some more stuff (second task) ...
callback(null, 'two');
}
],
// optional callback
function(err, results){
console.log("The end");
});

Related

why does this node.js code not execute in series?

I'm a total node noob and barely know what I'm doing. I'm trying to execute a series of functions in sequence, one after the other, using the futures library. My code:
var futures = require('futures');
var sequence = futures.sequence();
sequence
.then(function() {
console.log("one");
})
.then(function() {
console.log("two");
})
.then(function() {
console.log("three");
});
I expect my output to be
one
two
three
but the output I get is
one
What am I doing wrong?
Node.js is working on callback function, so you need to pass it in anonymous to make futures execute next function:
var futures = require('futures');
var sequence = futures.sequence();
sequence
.then(function(next) {
console.log("one");
next(null, 1);
})
.then(function(next) {
console.log("two");
next(null, 2);
})
.then(function(next) {
console.log("three");
next(null, 3);
});
futures is moving and changing constantly. Why not use a more robust and popular module async. It has everything you could possibly need for these kind of operations.
What you're after is async.series https://github.com/caolan/async#seriestasks-callback
async.series([
function(callback){
// do some stuff ...
callback(null, 'one');
},
function(callback){
// do some more stuff ...
callback(null, 'two');
}
],
// optional callback
function(err, results){
// results is now equal to ['one', 'two']
});

Exit Node Process After Successful fs.appendFile

I'm having trouble create processes in parallel with Node while exiting when they're done with a simple HTTP GET request. I've noticed that if I fire a process.exit() inside of a callback for appendFile, some files will not be created or appended in a Node cluster setup. Ideally, the way below is how I would like to fire events since the process is exited as soon as the job is done:
var rp = require("request-promise");
config = require("./config"),
cluster = require("cluster"),
os = require("os"),
fs = require("fs");
var keywordArray = [
'keyword1',
'keyword2',
...
];
if (cluster.isMaster) {
var numCPUs = os.cpus().length;
var clusterDivision = Math.ceil(keywordArray.length/numCPUs);
// Reset the json if previously set
keywordArray.forEach(function(arrayItem) {
fs.unlink(config.dataDirectory + arrayItem + '.json', function(err) {
if (err) console.error(err);
console.log('successfully unlinked ' + arrayItem + '.json from ' + config.dataDirectory);
});
});
// Create a worker for each CPU
// Seperate the array out evenly for each worker
for (var j=1;j<=numCPUs;j++) {
var tempArray = [];
var removed = keywordArray.splice(0, clusterDivision);
if (removed.length > 0) {
// The array contains something so let's do something with the keyword
console.log('creating a worker');
cluster.fork().send(removed);
} else {
// We don't need a cluster here
}
}
process.on('exit', function() {
console.log('exited');
});
} else if (cluster.isWorker) {
// Code to run if we're in a worker process
// Send the object we created above from variables so they're available to the workers
process.on('message', function(seperatedArrayItem) {
seperatedArrayItem.forEach(function(arrayItem) {
function radarRequest(err, response, body) {
var responseBody = JSON.parse(body);
console.log(arrayItem);
fs.appendFileSync(config.dataDirectory + arrayItem + '.json', JSON.stringify(responseBody.results, null, '\t'), function (err) {
if (err) console.err(err);
console.log('success writing file');
});
}
rp({
url: config.radarSearchURI +
'?key='+ config.apiKey +
'&location=' + config.latitude + ',' + config.longitude +
'&radius=' + config.searchRadius +
'&keyword=' + arrayItem, headers: config.headers
}, radarRequest);
});
setTimeout(function() {
process.exit(0);
}, 5000);
});
}
The only way I can make sure all files are properly appended is by using a Timeout, which is exactly what I don't want to - and shouldn't - do. Is there another way I can ensure an appendFile has happened successfully and then kill the node process? Here's a way that works (assuming the process doesn't take longer than 5 seconds):
process.on('message', function(seperatedArrayItem) {
seperatedArrayItem.forEach(function(arrayItem) {
function radarRequest(err, response, body) {
var responseBody = JSON.parse(body);
console.log(arrayItem);
fs.appendFile(config.dataDirectory + arrayItem + '.json', JSON.stringify(responseBody.results, null, '\t'), function (err) {
if (err) console.err(err)
console.log('success writing file');
});
}
rp({
url: config.radarSearchURI +
'?key='+ config.apiKey +
'&location=' + config.latitude + ',' + config.longitude +
'&radius=' + config.searchRadius +
'&keyword=' + arrayItem, headers: config.headers
}, radarRequest);
});
setTimeout(function() {
process.exit(0);
}, 5000);
});
You can use an async flow control module like async to kill the process after all files are written. I'd also recomment cluster.worker.disconnect() so that the node process will simple exit gracefully, but that isn't a requirement.
async.forEach(seperatedArrayItem, function(item, done){
// append file and call 'done' when it is written.
}, function(){
// Will be called when all item 'done' functions have been called.
cluster.worker.disconnect();
});
Node fs.appendFile( ... ) is an asynchronous function. So it expects us to pass a callback for we know it has finished its main operation, to inform us of some error occurred, or another purpose.
This means we need to call Node process.exit( ... ) in the scope of the provided callback. I've written this code to test:
'use strict';
var fs = require('fs');
function jsonValue(obj) {
return JSON.stringify(obj, null, '\t');
}
fs.appendFile('file.json', jsonValue(['t', 'e', 's', 't']), function(error) {
if (error) {
throw error;
}
console.log('success writing file'); // no error, so log...
process.exit(); // and exit right now
console.log('exited?'); // this will not be printed
});
Well, it worked as defined.
Other way it works is to use the synchronous version of fs.appendFile( ... ) and call process.exit() in a sequential way:
fs.appendFileSync('file.json', jsonValue(['t', 'e', 's', 't']));
console.log('success writing file'); // no error (I hope so =), so log...
process.exit(); // and exit right now
console.log('exited?'); // this will not be printed
This is clean code and works, but you lose the robustness and convenience gained with the callback...

nodeJS: running a batch of childprocess commands in a waterfall

I am trying to elegantly run five git commands one after the other while maintaining the ability to catch error and progress:
git status
git pull
git add .
git commit -am "commit message"
git push
Open source note: I've studied different node-git libraries and decided for different reasons to implement it on my own.
Using Q, I've created a deferred method to run child processes:
var exec = require('child_process').exec,
path = require('path'),
Q = require('q'),
gitPath = path.resolve(__dirname + "/../projects/langs");
function run(command) {
var deferred = Q.defer();
exec(command, {cwd: gitPath}, function puts(error, stdout, stderr) {
if (error) {
deferred.reject(new Error(error));
} else {
deferred.resolve(stdout);
}
});
return deferred.promise;
}
However, I want to avoid the Pyramid of doom:
function option1() {
// Pyramid of doom
run("git status").then(function (output) {
console.log(output);
run("git pull").then(function (output) {
console.log(output);
run("git add .").then(function (output) {
console.log(output);
// etc.
});
});
});
}
And doesn't feel quite elegant:
function options1a() {
// Pyramid of doom
run("git status").then(function (output) {
console.log(output);
run("git pull");
}).then(function (output) {
console.log(output);
run("git add .")
}).then(function (output) {
console.log(output);
});
}
I saw a third option but can't seem to get it to work:
function promiseWaterfall(tasks) {
var resolvedPromise = Q(undefined);
var finalTaskPromise = tasks.reduce(function (prevTaskPromise, task) {
return prevTaskPromise.then(task);
}, resolvedPromise); // initial value
return finalTaskPromise;
}
promiseWaterfall([
run("git status"),
run("git pull"),
run("git add .")
]).then(function () {
console.log(arguments);
});
And I'm playing with a fourth option of using the async library:
async.waterfall([
function(callback){
callback(null, 'one', 'two');
},
function(arg1, arg2, callback){
callback(null, 'three');
},
function(arg1, callback){
// arg1 now equals 'three'
callback(null, 'done');
}
], function (err, result) {
// result now equals 'done'
});
But this seems to take me towards a non-promises path.
How do I get it to work elegantly? any best practices?
I'm familiar with when.js promises so I will answer your question with that promise library. It provides helper functions for this sort of thing similar to the callback based async lib. Check out their API documentation for more examples.
In the following code I am using the when/sequence module to perform what you are looking for. I've also modified your code organization a little to keep things somewhat modular (e.g. not embedding the git cwd inside of the run function in your example).
Here is a fully working implementation. Make sure to change out the git cwd to your own git repository as it's currently pointing to one of my own.
var exec = require('child_process').exec
, when = require('when')
, sequence = require('when/sequence');
// simple promise wrapper for exec
function exec_p(command, options) {
options = options || {};
var defer = when.defer();
exec(command, options, function(error, stdout, stderr) {
return error
? defer.reject(stderr + new Error(error.stack || error))
: defer.resolve(stdout);
});
return defer.promise;
}
// Some simple git wrapper
function Git(config) {
var self = this;
self.config = config;
return function(gitCommand) {
return exec_p('git ' + gitCommand, self.config);
};
}
// create a new instnace of git and specify our options
var git = new Git({ cwd: "/home/trev/git/tsenior" });
// we can now use sequence & our newly created git wrapper to easily
// can things in order one after another
sequence([
function() { return git('status'); },
function() { return git('status'); },
function() { return git('status'); },
function() { return git('status'); }
]).then(function(results) { // handle the results here
console.log(results);
}).otherwise(function(error) { // handle any errors here
console.error(error.stack || error);
process.exit(1);
});
The code provided doesn't console.log after every step (it just logs out the results at the end), but it can be easily modified to do so.

How can I make vows wait 60s for a callback to happen

I've written some code that connects to an FTP server and lists a very long directory. It can take 40+ seconds to get a response.
I've written some code to start testing this but I get Errored >> callback not fired.
Is there a way to instruct Vows or Node to just chill for a bit and wait for the callbacks to fire say, up to some configurable amount of time?
Here's my vows code:
vows.describe('FTP Downloader Suite').addBatch({
'FTP Downloader' : {
topic: function() {
var promise = new(events.EventEmitter);
var lastMomentDownloaded = moment();
lastMomentDownloaded.subtract('minute', 1);
ftpDownloader.getNewPathsToDownload(config, lastMomentDownloaded, function(err, res) {
if (err) { promise.emit('error', err, res) }
else { promise.emit('success', err, res) }
});
return promise;
},
'can be accessed': function(err, stat) {
assert.isNull(err); // we have no error
assert.isArray(stat); // we have a result
},
'is not empty': function(err, stat) {
assert.isNotZero(stat.length);
},
'is shorter than 100 paths': function(err, stat) {
assert.isTrue(stat.length < 100);
},
'contains paths matching the config': function(err, stat) {
_.each(stat, function(value, key, list) {
console.log(value);
});
}
}
}).export(module);
Thanks!
Vows was obfuscating the source of the problem which is that an error was being thrown. When I switched to nodeunit the issue was obvious. Maybe there is a way to improve error logging with vows but I'm done with it.

node.io : when the job is done, do it again

I'm building a scraper with node.io.
The page I want to scrape has new content every minute. I would like to run my job again and again every minute.
(Ok I could do that with a bash script, but I would like to stay in javascript)
This is a basic job :
var nodeio = require('node.io'), options = {timeout: 10};
exports.job = new nodeio.Job(options, {
input: ['hello', 'foobar', 'weather'],
run: function (keyword) {
this.getHtml('http://www.google.com/search?q=' + encodeURIComponent(keyword), function (err, $) {
var results = $('#resultStats').text.toLowerCase();
this.emit(keyword + ' has ' + results);
});
}
});
How could I do that ? I'm a beginner in node.js, i tried setInterval around the job (: without success.
Try this (run with "node<myfile.js>" instead of "node.io<myfile.js>"):
var nodeio = require('node.io'), options = {timeout: 10};
var job = {
input: ['hello', 'foobar', 'weather'],
run: function (keyword) {
this.getHtml('http://www.google.com/search?q=' + encodeURIComponent(keyword), function (err, $) {
var results = 'test';//$('#resultStats').text.toLowerCase();
this.emit(keyword + ' has ' + results);
});
}
};
setInterval(function(){
nodeio.start(new nodeio.Job(options, job), options, function(){});
}, 5000);
The problem you were running into is the following block of code in node.io that exits node when you don't provide a callback when running the job:
//Default behaviour is to exit once the job is complete
callback = callback || function (err) {
if (err) {
utils.status.error(err);
}
process.exit();
};

Resources