nodeJS: running a batch of childprocess commands in a waterfall

nodeJS: running a batch of childprocess commands in a waterfall - node.js

I am trying to elegantly run five git commands one after the other while maintaining the ability to catch error and progress:
git status
git pull
git add .
git commit -am "commit message"
git push
Open source note: I've studied different node-git libraries and decided for different reasons to implement it on my own.
Using Q, I've created a deferred method to run child processes:
var exec = require('child_process').exec,
path = require('path'),
Q = require('q'),
gitPath = path.resolve(__dirname + "/../projects/langs");
function run(command) {
var deferred = Q.defer();
exec(command, {cwd: gitPath}, function puts(error, stdout, stderr) {
if (error) {
deferred.reject(new Error(error));
} else {
deferred.resolve(stdout);
}
});
return deferred.promise;
}
However, I want to avoid the Pyramid of doom:
function option1() {
// Pyramid of doom
run("git status").then(function (output) {
console.log(output);
run("git pull").then(function (output) {
console.log(output);
run("git add .").then(function (output) {
console.log(output);
// etc.
});
});
});
}
And doesn't feel quite elegant:
function options1a() {
// Pyramid of doom
run("git status").then(function (output) {
console.log(output);
run("git pull");
}).then(function (output) {
console.log(output);
run("git add .")
}).then(function (output) {
console.log(output);
});
}
I saw a third option but can't seem to get it to work:
function promiseWaterfall(tasks) {
var resolvedPromise = Q(undefined);
var finalTaskPromise = tasks.reduce(function (prevTaskPromise, task) {
return prevTaskPromise.then(task);
}, resolvedPromise); // initial value
return finalTaskPromise;
}
promiseWaterfall([
run("git status"),
run("git pull"),
run("git add .")
]).then(function () {
console.log(arguments);
});
And I'm playing with a fourth option of using the async library:
async.waterfall([
function(callback){
callback(null, 'one', 'two');
},
function(arg1, arg2, callback){
callback(null, 'three');
},
function(arg1, callback){
// arg1 now equals 'three'
callback(null, 'done');
}
], function (err, result) {
// result now equals 'done'
});
But this seems to take me towards a non-promises path.
How do I get it to work elegantly? any best practices?

I'm familiar with when.js promises so I will answer your question with that promise library. It provides helper functions for this sort of thing similar to the callback based async lib. Check out their API documentation for more examples.
In the following code I am using the when/sequence module to perform what you are looking for. I've also modified your code organization a little to keep things somewhat modular (e.g. not embedding the git cwd inside of the run function in your example).
Here is a fully working implementation. Make sure to change out the git cwd to your own git repository as it's currently pointing to one of my own.
var exec = require('child_process').exec
, when = require('when')
, sequence = require('when/sequence');
// simple promise wrapper for exec
function exec_p(command, options) {
options = options || {};
var defer = when.defer();
exec(command, options, function(error, stdout, stderr) {
return error
? defer.reject(stderr + new Error(error.stack || error))
: defer.resolve(stdout);
});
return defer.promise;
}
// Some simple git wrapper
function Git(config) {
var self = this;
self.config = config;
return function(gitCommand) {
return exec_p('git ' + gitCommand, self.config);
};
}
// create a new instnace of git and specify our options
var git = new Git({ cwd: "/home/trev/git/tsenior" });
// we can now use sequence & our newly created git wrapper to easily
// can things in order one after another
sequence([
function() { return git('status'); },
function() { return git('status'); },
function() { return git('status'); },
function() { return git('status'); }
]).then(function(results) { // handle the results here
console.log(results);
}).otherwise(function(error) { // handle any errors here
console.error(error.stack || error);
process.exit(1);
});
The code provided doesn't console.log after every step (it just logs out the results at the end), but it can be easily modified to do so.

Related

async foreach inside async series nodejs

I'm working on node async library. I'm not able to execute in the sequence i want. i don't know where I'm going wrong
here is the code.. in comments i have defined order number..
currently its executing in 2,3,4,5,1 order i want in 1,2,3,4,5 order ....kindly help
function getAsExhibitors(req, res) {
//getting all exhibitors against an event
var exhibitors = [];
var eac_app_names = [];
async.series([function(callback){
models.EacExhibitorsExt.find({ deleted: false,userid: req.user._id}).sort({ modified: -1 }).exec(function(err, myExhibitors) {
exhibitors = myExhibitors;
callback();
});
},function(callback){
async.forEach(exhibitors,function(exhibitor,callback){
models.Eac.findById(exhibitor.eventid).exec(function(err,eac){
eac_app_names[exhibitors.indexOf(exhibitor)]=eac;
console.log("-----------------1--------------"+eac_app_names);
});
console.log("-----------------2--------------"+eac_app_names);
callback();
},function(err) {
console.log("-----------------3--------------"+eac_app_names);
callback();
});
}],function(err) { //This function gets called after the two tasks have called their "task callbacks"
if (err) return next(err);
//Here locals will be populated with 'exhibitors' and 'apps'
console.log("-------------------------4------"+eac_app_names);
console.log("-------------------------5------"+eac_app_names.name);
res.locals.exhibitors = exhibitors;
res.locals.eac_app_names = eac_app_names;
res.render('eac/eac_reg_as_exhibitor', { title: "My Event Exhibitors", asexhibitor: exhibitors,app_names:eac_app_names});
});
};

All mongoose methods work as asynchronous.In your scenario try this way:
function getAsExhibitors(req, res) {
//getting all exhibitors against an event
var exhibitors = [];
var eac_app_names = [];
async.series([function(callback){
models.EacExhibitorsExt.find({ deleted: false,userid: req.user._id}).sort({ modified: -1 }).exec(function(err, myExhibitors) {
exhibitors = myExhibitors;
callback();
});
},function(callback){
async.forEach(exhibitors,function(exhibitor,callback){
models.Eac.findById(exhibitor.eventid).exec(function(err,eac){
eac_app_names[exhibitors.indexOf(exhibitor)]=eac;
console.log("-----------------1--------------"+eac_app_names);
console.log("-----------------2--------------"+eac_app_names);
callback();
});
},function(err) {
console.log("-----------------3--------------"+eac_app_names);
callback();
});
}],function(err) { //This function gets called after the two tasks have called their "task callbacks"
if (err) return next(err);
//Here locals will be populated with 'exhibitors' and 'apps'
console.log("-------------------------4------"+eac_app_names);
console.log("-------------------------5------"+eac_app_names.name);
res.locals.exhibitors = exhibitors;
res.locals.eac_app_names = eac_app_names;
res.render('eac/eac_reg_as_exhibitor', { title: "My Event Exhibitors", asexhibitor: exhibitors,app_names:eac_app_names});
});
};

You are welcome to use es6 with generator.
Try out co-foreach-series to get each of the array element and execute async function one by one.
ForEach Series Example
foreach(yourArray, function(element, index) {
// Each of this function will be executed one after one
co(function*() {
// Do some async task, and wait until this task be finished
yield yourAsyncFunc();
yield doOtherAsyncTasks();
})
})

Node, mongo and loop, how to break loop when I find data

My code looks similar to that:
var mongo_client = require('mongodb').MongoClient, dataStorage;
lib = {
[...]
find: function(res, param, callback) {
var parentPath = param.path;
while (parentPath !== '/') {
collection.findOne({'paths' : parentPath}, {...}, function(err, data)) {
if (data) {
dataStorage = data;
callback(data, res);
}
}
if (dataStorage) {
return;
}
parentPath = lib.removeLastBlockOfPath(parentPath);
}
if (!dataStorage) {
callback(someDefaultData, res);
}
}
[...]
}
What I want to do is to find some path stored in mongo, or if there is no match, try do find first matching parent path.
I can't set dataStorage value from findOne callback is it any way to do that? Eaven if I find path it always run thru all path blocks.

Node is asynchronous, so your code must be written accordingly. An option is to use the async module, that has lots of tools to manage asynchronous flows.
For example, you could use the whilst function to manage your while loop:
find: function(res, param, callback) {
var parentPath = param.path,
dataStorage = null;
async.whilst(
function () { return parentPath !== '/'; },
function (done) {
collection.findOne({'paths' : parentPath}, {...}, function(err, data) {
if (data) {
dataStorage = data;
return callback(data, res);
}
parentPath = lib.removeLastBlockOfPath(parentPath);
done();
});
},
function (error) {
if (!dataStorage) return callback(someDefaultData, res);
}
);
}
Don't forget to install and require the async module:
var async = require('async');

Your code is written as if it is "traditional synchronous" -- which its not. You cannot check for dataStorage validity till results from findOne() come back -- so your checks need to be moved all the way into the inner "if (data)" statement. This is not a mongodb issue, this is purely how nodejs works and the fact that everything is asynchronous and works on callbacks.

Sequence of operation in Node.js

I took the example code from npmjs > jsdom. This process is performed in a few seconds and only after it I want to run a second action, such as console.log. But do not insert the code in the body of jsdom. Maybe it's work with Node.js > Stream
Whant to create a chain of functions, next process start as soon as the end of the previous.
Where i can read about sequence in Node.js?
var jsdom = require("jsdom");
jsdom.env({
url: "http://news.ycombinator.com/",
scripts: ["http://code.jquery.com/jquery.js"],
done: function (errors, window) {
var $ = window.$;
console.log("HN Links");
$("td.title:not(:last) a").each(function() {
console.log(" -", $(this).text());
});
}
});
console.log("The end");

You're looking for Async.js.
To be specific, you're looking for its series() functionality (Run an array of functions in series, each one running once the previous function has completed).
Code example (based on it's docs):
async.series([
function(callback){
jsdom.env({
url: "http://news.ycombinator.com/",
scripts: ["http://code.jquery.com/jquery.js"],
done: function (errors, window) {
var $ = window.$;
console.log("HN Links");
$("td.title:not(:last) a").each(function() {
console.log(" -", $(this).text());
});
callback(null, 'one');
}
});
},
function(callback){
// do some more stuff (second task) ...
callback(null, 'two');
}
],
// optional callback
function(err, results){
console.log("The end");
});

Iterating over a mongodb cursor serially (waiting for callbacks before moving to next document)

Using mongoskin, I can do a query like this, which will return a cursor:
myCollection.find({}, function(err, resultCursor) {
resultCursor.each(function(err, result) {
}
}
However, I'd like to call some async functions for each document, and only move on to the next item on the cursor after this has called back (similar to the eachSeries structure in the async.js module). E.g:
myCollection.find({}, function(err, resultCursor) {
resultCursor.each(function(err, result) {
externalAsyncFunction(result, function(err) {
//externalAsyncFunction completed - now want to move to next doc
});
}
}
How could I do this?
Thanks
UPDATE:
I don't wan't to use toArray() as this is a large batch operation, and the results might not fit in memory in one go.

A more modern approach that uses async/await:
const cursor = db.collection("foo").find({});
while(await cursor.hasNext()) {
const doc = await cursor.next();
// process doc here
}
Notes:
This may be even more simple to do when async iterators arrive.
You'll probably want to add try/catch for error checking.
The containing function should be async or the code should be wrapped in (async function() { ... })() since it uses await.
If you want, add await new Promise(resolve => setTimeout(resolve, 1000)); (pause for 1 second) at the end of the while loop to show that it does process docs one after the other.

If you don't want to load all of the results into memory using toArray, you can iterate using the cursor with something like the following.
myCollection.find({}, function(err, resultCursor) {
function processItem(err, item) {
if(item === null) {
return; // All done!
}
externalAsyncFunction(item, function(err) {
resultCursor.nextObject(processItem);
});
}
resultCursor.nextObject(processItem);
}

since node.js v10.3 you can use async iterator
const cursor = db.collection('foo').find({});
for await (const doc of cursor) {
// do your thing
// you can even use `await myAsyncOperation()` here
}
Jake Archibald wrote a great blog post about async iterators, that I came to know after reading #user993683's answer.

This works with large dataset by using setImmediate:
var cursor = collection.find({filter...}).cursor();
cursor.nextObject(function fn(err, item) {
if (err || !item) return;
setImmediate(fnAction, item, arg1, arg2, function() {
cursor.nextObject(fn);
});
});
function fnAction(item, arg1, arg2, callback) {
// Here you can do whatever you want to do with your item.
return callback();
}

If someone is looking for a Promise way of doing this (as opposed to using callbacks of nextObject), here it is. I am using Node v4.2.2 and mongo driver v2.1.7. This is kind of an asyncSeries version of Cursor.forEach():
function forEachSeries(cursor, iterator) {
return new Promise(function(resolve, reject) {
var count = 0;
function processDoc(doc) {
if (doc != null) {
count++;
return iterator(doc).then(function() {
return cursor.next().then(processDoc);
});
} else {
resolve(count);
}
}
cursor.next().then(processDoc);
});
}
To use this, pass the cursor and an iterator that operates on each document asynchronously (like you would for Cursor.forEach). The iterator needs to return a promise, like most mongodb native driver functions do.
Say, you want to update all documents in the collection test. This is how you would do it:
var theDb;
MongoClient.connect(dbUrl).then(function(db) {
theDb = db; // save it, we'll need to close the connection when done.
var cur = db.collection('test').find();
return forEachSeries(cur, function(doc) { // this is the iterator
return db.collection('test').updateOne(
{_id: doc._id},
{$set: {updated: true}} // or whatever else you need to change
);
// updateOne returns a promise, if not supplied a callback. Just return it.
});
})
.then(function(count) {
console.log("All Done. Processed", count, "records");
theDb.close();
})

You can do something like this using the async lib. The key point here is to check if the current doc is null. If it is, it means you are finished.
async.series([
function (cb) {
cursor.each(function (err, doc) {
if (err) {
cb(err);
} else if (doc === null) {
cb();
} else {
console.log(doc);
array.push(doc);
}
});
}
], function (err) {
callback(err, array);
});

You could use a Future:
myCollection.find({}, function(err, resultCursor) {
resultCursor.count(Meteor.bindEnvironment(function(err,count){
for(var i=0;i<count;i++)
{
var itemFuture=new Future();
resultCursor.nextObject(function(err,item)){
itemFuture.result(item);
}
var item=itemFuture.wait();
//do what you want with the item,
//and continue with the loop if so
}
}));
});

You can get the result in an Array and iterate using a recursive function, something like this.
myCollection.find({}).toArray(function (err, items) {
var count = items.length;
var fn = function () {
externalAsyncFuntion(items[count], function () {
count -= 1;
if (count) fn();
})
}
fn();
});
Edit:
This is only applicable for small datasets, for larger one's you should use cursors as mentioned in other answers.

A more modern approach that uses for await:
const cursor = db.collection("foo").find({});
for await(const doc of cursor) {
// process doc here with await
await processDoc(doc);
}

You could use simple setTimeOut's. This is an example in typescript running on nodejs (I am using promises via the 'when' module but it can be done without them as well):
import mongodb = require("mongodb");
var dbServer = new mongodb.Server('localhost', 27017, {auto_reconnect: true}, {});
var db = new mongodb.Db('myDb', dbServer);
var util = require('util');
var when = require('when'); //npm install when
var dbDefer = when.defer();
db.open(function() {
console.log('db opened...');
dbDefer.resolve(db);
});
dbDefer.promise.then(function(db : mongodb.Db){
db.collection('myCollection', function (error, dataCol){
if(error) {
console.error(error); return;
}
var doneReading = when.defer();
var processOneRecordAsync = function(record) : When.Promise{
var result = when.defer();
setTimeout (function() {
//simulate a variable-length operation
console.log(util.inspect(record));
result.resolve('record processed');
}, Math.random()*5);
return result.promise;
}
var runCursor = function (cursor : MongoCursor){
cursor.next(function(error : any, record : any){
if (error){
console.log('an error occurred: ' + error);
return;
}
if (record){
processOneRecordAsync(record).then(function(r){
setTimeout(function() {runCursor(cursor)}, 1);
});
}
else{
//cursor up
doneReading.resolve('done reading data.');
}
});
}
dataCol.find({}, function(error, cursor : MongoCursor){
if (!error)
{
setTimeout(function() {runCursor(cursor)}, 1);
}
});
doneReading.promise.then(function(message : string){
//message='done reading data'
console.log(message);
});
});
});

Reading and returning multiple files in Node.js using fs.readFile

I'm writing a simple request handler to return a pair of css files. Using fs.readFileSync this was easy. However, I'm having difficulty accomplishing the same task using the async version of readFile. Below is my code. Having my response.write() method calls split among two different callbacks seems to be problematic. Can someone point out what I've done wrong? Interestingly this code works if I put response.end() inside of the first else statement. However, that creates a problem in that the second css file does not get returned (because response.end() has already been fired).
function css(response) {
response.writeHead(200, {"Content-Type": "text/css"});
fs.readFile('css/bootstrap.css', function(error, content){
if(error){
console.log(error);
}
else{
response.write(content);
}
});
fs.readFile('css/bootstrap-responsive.css', function(error, content){
if(error){
console.log(error);
}
else{
response.write(content)
}
});
response.end();
}

The primary issue with what you have is that response.end() gets called right away. You need to only call it after the files have done their response.write calls.
The easiest way would be to use a control flow library. Managing multiple asynchronous callbacks is generally complicated.
https://github.com/joyent/node/wiki/modules#wiki-async-flow
I'm going to use the async library because it's the one I know best.
var fs = require('fs');
var async = require('async');
function css(response) {
response.writeHead(200, {"Content-Type": "text/css"});
async.eachSeries(
// Pass items to iterate over
['css/bootstrap.css', 'css/bootstrap-responsive.css'],
// Pass iterator function that is called for each item
function(filename, cb) {
fs.readFile(filename, function(err, content) {
if (!err) {
response.write(content);
}
// Calling cb makes it go to the next item.
cb(err);
});
},
// Final callback after each item has been iterated over.
function(err) {
response.end()
}
);
}
If you want to accomplish this without a library, or just want another way, this is how I would do it more directly. Basically you keep a count and call end once both file reads have finished.
function css(response) {
response.writeHead(200, {"Content-Type": "text/css"});
var count = 0;
var handler = function(error, content){
count++;
if (error){
console.log(error);
}
else{
response.write(content);
}
if (count == 2) {
response.end();
}
}
fs.readFile('css/bootstrap.css', handler);
fs.readFile('css/bootstrap-responsive.css', handler);
}

You can simply rely on html5 Promise. The code can be as simple as follows:
var promises= ['file1.css', 'file2.css'].map(function(_path){
return new Promise(function(_path, resolve, reject){
fs.readFile(_path, 'utf8', function(err, data){
if(err){
console.log(err);
resolve(""); //following the same code flow
}else{
resolve(data);
}
});
}.bind(this, _path));
});
Promise.all(promises).then(function(results){
//Put your callback logic here
response.writeHead(200, {"Content-Type": "text/css"});
results.forEach(function(content){response.write(content)});
response.end();
});

There's a simple common solution to get them all with an one callback.
You can place it anywhere in your project to reuse in many different cases.
var FS = require('fs');
/**
* Abstract helper to asyncly read a bulk of files
* Note that `cb` will receive an array of errors for each file as an array of files data
* Keys in resulting arrays will be the same as in `paths`
*
* #param {Array} paths - file paths array
* #param {Function} cb
* #param {Array} errors - a list of file reading error
* #param {Array} data - a list of file content data
*/
function FS_readFiles (paths, cb) {
var result = [], errors = [], l = paths.length;
paths.forEach(function (path, k) {
FS.readFile(path, function (err, data) {
// decrease waiting files
--l;
// just skip non-npm packages and decrease valid files count
err && (errors[k] = err);
!err && (result[k] = data);
// invoke cb if all read
!l && cb (errors.length? errors : undef, result);
});
});
}
Just put inside it a bulk of files and it will returns to you each of them as a buffer.
Simple example:
var cssFiles = [
'css/bootstrap.css',
'css/bootstrap-responsive.css'
];
function css(response) {
FS_readFiles(cssFiles, function (errors, data) {
response.writeHead(200, {"Content-Type": "text/css"});
data.forEach(function (v) {
response.write(v);
});
response.end();
});
}
Offtopic: Btw, requests like this you better to cache on front-end proxy server like nginx or varnish. It's never change.

const fs = require('fs');
function readFilePromise(fileName) {
return new Promise(function (resolve, reject) {
fs.readFile(fileName, 'utf-8', function(err, data){
if(err){reject(err)} else {
resolve(data)
}
})
})
}
Promise.all([readFilePromise("abc.txt"), readFilePromise("dec.txt")]).then(function(out){
console.log(out)
})

Async is an awesome lib. However the standard for these things is moving in the direction of promises for handling multiple asynchronous operations. In fact in ECMAScript6 this will be a standard part of the library. There are several libraries that implement promises including JQuery. However, for node, I like to use 'q'
Here is the same code using promises: One note.. you might want to move the first writeHead call to coincide with the first successful read.
var Q = require('q');
function css(response) {
response.writeHead(200, {"Content-Type": "text/css"});
var defer = Q.defer();
fs.readFile('css/bootstrap.css', function(error, content){
if(error){
defer.reject(error)
}
else{
response.write(content);
defer.resolve();
}
});
defer.promise.then(function() { //this gets executed when the first read succeeds and is written
var secondDefer = Q.defer();
fs.readFile('css/bootstrap-responsive.css', function(error, content){
if(error){
secondDefer.reject(error);
}
else{
response.write(content);
secondDefer.resolve();
}
});
return secondDefer.promise;
},
function(error) { //this gets called when the first read fails
console.log(error);
//other error handling
}).
done(function() {
response.end();
},
function(error) { //this is the error handler for the second read fails
console.log(error);
response.end(); //gotta call end anyway
});
}

Develop Reference

node.js excel linux python-3.x azure haskell apache-spark rust .htaccess string

nodeJS: running a batch of childprocess commands in a waterfall - node.js

Related

async foreach inside async series nodejs

Node, mongo and loop, how to break loop when I find data

Sequence of operation in Node.js

Iterating over a mongodb cursor serially (waiting for callbacks before moving to next document)

Reading and returning multiple files in Node.js using fs.readFile

Categories

Resources