node.js ncp (async recursive copy utility) callback not called - node.js

I am new to node.js and maybe I am doing something wrong.
There's this hugely popular async recursive copy utility npmjs.org/package/ncp.
I am trying to run it in parallel:
var ncp = require('ncp').ncp;
var dirs = [
['test/from/1', 'test/to/1'],
['test/from/2', 'test/to/2'],
['test/from/3', 'test/to/3']
];
var copyDirAsync = function (dir) {
ncp(dir[0], dir[1], function (err) {
console.log('done: ' + dir[1]);
});
}
for (var i = 0; i < dirs.length; ++i) {
copyDirAsync(dirs[i]);
}
So, all dirs copy just fine. However I get only one console.log message printed with a random directory. The other two don't arrive. The program just exists. If I add a 15 sec timeout so that node keeps running for a while, the callbacks don't arrive either. I would assume that this is a problem with ncp, however with 30K downloads per day of a 1-month old realease 0.5.0, and no issues reported so far, plus me a newcomer to node.js, I'll just assume I don't understand something about node.

First read Asynchronous iteration patterns
Now, you can use the async module especially async.series like so;
var ncp = require('ncp').ncp
, async = require('async');
var dirs = [
['test/from/1', 'test/to/1'],
['test/from/2', 'test/to/2'],
['test/from/3', 'test/to/3']
];
var copyDirAsync = function (dir, done) {
ncp(dir[0], dir[1], function (err) {
if (err) return done(err);
console.log('done: ' + dir[1]);
done();
});
}
async.each(dirs, copyDirAsync, function(err){
console.log(err);
});

Related

NodeJS - fs.stat() is ignored in a FOR-loop

I'm trying to loop over a bunch of directories and then try if a file inside that directory exists with NodeJS and fs.stat().
I've got a simple for-loop to loop over the directories and in it the fs.stat() call to check, if "project.xml" inside that particular directory exists. My code looks like this:
for (var i = 0, length = prDirs.length; i < length; i++) {
var path = Config["ProjectDirectory"] + "/" + prDirs[i];
console.log("PATH=" + path);
fs.stat(path + "/project.xml", function (err, stat) {
if (err == null) {
console.log(" => PATH=" + path);
}
})
}
NodeJS loops correctly over the directory, the console.log() outputs all the directories correctly, but the code inside the if inside fs.stat() is not called and runs only once at the end of the loop. My console shows this:
PATH=(...)/PHP
PATH=(...)/Electron
PATH=(...)/testapp
PATH=(...)/Vala
=> PATH=(...)/Vala/project.xml
But the project.xml I'm looking for is in testapp/ not in Vala/ but Vala/ is the last entry in prDirs.
The code above is my latest attempt, I've tried plenty of other variations and one (I appended an else to the if inside fs.stat()) showed me, that fs.stat() actually gets invoked, but only the code inside the if is not running and the code in the else I appended once was running.
Thanks in advance!
fs.stat is an asynchronous i/o function, so its callback will be called only after the main thread is idle, or in your case, only after the for loop is done. Instead of a for loop, I suggest iterating the folder in an asynchronous matter. You can use async.each, async.eachSeries, or implement it yourself.
As #Gil Z mention fs.stat in async. I would suggest to use promises if you want to keep for each loop and make code looks sync.
Here is the example. Works on node v 6.9
"use strict";
const fs = require('fs');
let paths = ["tasks", "aaa", "bbbb"];
//method to get stat data using promises
function checkFileExists(path) {
return new Promise((resolve, reject) => {
fs.stat(path + "/project.xml", (err, res) => {
resolve(err ? "Not found in " + path : " => PATH=" + path);
});
});
}
//create promise array with each directory
let promiseArr = [];
paths.forEach(pathPart => {
let path = process.cwd() + "/" + pathPart;
promiseArr.push(checkFileExists(path));
});
//run all promises and collect results
Promise.all(promiseArr)
.then(reslut => {
console.log(reslut);
})
.catch(e => console.log("Error in promises"));
The above code will log this
[ ' => PATH=/Users/mykolaborysiuk/Sites/circlemedia/syndic-apiManager/tasks',
'Not found in /Users/mykolaborysiuk/Sites/circlemedia/syndic-apiManager/aaa',
'Not found in /Users/mykolaborysiuk/Sites/circlemedia/syndic-apiManager/bbbb' ]
Hope this helps.

How to wait in node.js for a variable available on the cloud to have a specific value

I'm sorry if this is a basic question, but I am trying to implement a program in node.js that should wait for the value of a variable available trough a request to a cloud api (photon.variable()) to be 1. This variable should not be requested more than once per second. My first attempt is included in the sample code below. Despite knowing it does not work at all, I think it could be useful to show the functionality I would like to implement.
var photondata = 0;
while (photondata < 1)
{
setTimeout(function () {
photon.variable("witok", function(err, data) {
if (!err) {
console.log("data: ", data.result);
photondata = data.result;
}
else console.log(err);
})}, 1000);
}
Since you couldn't do async stuff in loops before, the traditional approach would be to create a function that adds itself to setTimeout for as long as needed, then calls some other function when it's done. You still need to do this in the browser if not using Babel.
These days, you can stop execution and wait for things to happen when using a generator function (which latest versions of Node now support). There are many libraries that will let you do this and I will advertise ours :)
CL.run(function* () {
var photondata = 0;
while (true) {
yield CL.try(function* () {
var data = yield photon.variable("witok", CL.cb());
console.log("data: ", data.result);
photondata = data.result;
}, function* (err) {
console.log(err.message);
});
if (photondata >= 1) break;
yield CL.sleep(1000);
}
// do whatever you need here
});

nodeJS too many child processes?

I am using node to recursively traverse a file system and make a system call for each file, by using child.exec. It works well when tested on a small structure, with a couple of folders and files, but when run on the whole home directory, it crashes after a while
child_process.js:945
throw errnoException(process._errno, 'spawn');
^
Error: spawn Unknown system errno 23
at errnoException (child_process.js:998:11)
at ChildProcess.spawn (child_process.js:945:11)
at exports.spawn (child_process.js:733:9)
at Object.exports.execFile (child_process.js:617:15)
at exports.exec (child_process.js:588:18)
Does this happen because it uses up all resources? How can I avoid this?
EDIT: Code
improvement and best practices suggestions always welcome :)
function processDir(dir, callback) {
fs.readdir(dir, function (err, files) {
if (err) {...}
if (files) {
async.each(files, function (file, cb) {
var filePath = dir + "/" + file;
var stats = fs.statSync(filePath);
if (stats) {
if (stats.isFile()) {
processFile(dir, file, function (err) {
if (err) {...}
cb();
});
} else if (stats.isDirectory()) {
processDir(filePath, function (err) {
if (err) {...}
cb();
});
}
}
}, function (err) {
if (err) {...}
callback();
}
);
}
});
}
the issue can be because of having many open files simultaneously
consider using async module to solve the issue
https://github.com/caolan/async#eachLimit
async.eachLimit(
files,
20,
function(file, callback){
//process file here and call callback
},
function(err){
//done
}
);
in current example you will process 20 files at a time
Well, I don't know the reason for the failure, but if this is what you expect (using up all of the resources) or as others say (too many files open), you could try to use multitasking for it. JXcore (fork of Node.JS) offers such thing - it allows to run a task in a separate instance, but this is done still inside one single process.
While Node.JS app as a process has its limitations - JXcore with its sub-instances multiplies those limits: single process even with one extra instance (or task, or well: we can call it sub-thread) doubles the limits!
So, let's say, that you will run each of your spawn() in a separate task. Or, since tasks are not running in a main thread any more - you can even use sync method that jxcore offers : cmdSync().
Probably the the best illustration would be given by this few lines of the code:
jxcore.tasks.setThreadCount(4);
var task = function(file) {
var your_cmd = "do something with " + file;
return jxcore.utils.cmdSync(your_cmd);
};
jxcore.tasks.addTask(task, "file1.txt", function(ret) {
console.log("the exit code:", ret.exitCode);
console.log("output:", ret.out);
});
Let me repeat: the task will not block the main thread, since it is running in a separate instance!
Multitasking API is documented here: Multitasking.
As has been established in comments, you are likely running out of file handles because you are running too many concurrent operations on your files. So, a solution is to limit the number of concurrent operations that run at once so too many files aren't in use at the same time.
Here's a somewhat different implementation that uses Bluebird promises to control both the async aspects of the operation and the concurrency aspects of the operation.
To make the management of the concurrency aspect easier, this collects the entire list of files into an array first and then processes the array of filenames rather than processing as you go. This makes it easier to use a built-in concurrency capability in Bluebird's .map() (which works on a single array) so we don't have to write that code ourselves:
var Promise = require("bluebird");
var fs = Promise.promisifyAll(require("fs"));
var path = require("path");
// recurse a directory, call a callback on each file (that returns a promise)
// run a max of numConcurrent callbacks at once
// returns a promise for when all work is done
function processDir(dir, numConcurrent, fileCallback) {
var allFiles = [];
function listDir(dir, list) {
var dirs = [];
return fs.readdirAsync(dir).map(function(file) {
var filePath = path.join(dir , file);
return fs.statAsync(filePath).then(function(stats) {
if (stats.isFile()) {
allFiles.push(filePath);
} else if (stats.isDirectory()) {
return listDir(filePath);
}
}).catch(function() {
// ignore errors on .stat - file could just be gone now
return;
});
});
}
return listDir(dir, allFiles).then(function() {
return Promise.map(allFiles, function(filename) {
return fileCallback(filename);
}, {concurrency: numConcurrent});
});
}
// example usage:
// pass the initial directory,
// the number of concurrent operations allowed at once
// and a callback function (that returns a promise) to process each file
processDir(process.cwd(), 5, function(file) {
// put your own code here to process each file
// this is code to cause each callback to take a random amount of time
// for testing purposes
var rand = Math.floor(Math.random() * 500) + 500;
return Promise.delay(rand).then(function() {
console.log(file);
});
}).catch(function(e) {
// error here
}).finally(function() {
console.log("done");
});
FYI, I think you'll find that proper error propagation and proper error handling from many async operations is much, much easier with promises than the plain callback method.

Console.time always returns 0.000ms

I'm using node-webkit to create a album manager and I'm setting up a recursive scan to find all my photos. I'm scanning some 10k files, but console.time just keeps returning 0.000ms. I know the scan is happening pretty quick, but it's not that quick. Am I doing something wrong?
var fs = require('fs');
var path = 'I:/pictures/';
console.time('read-directory');
var scanDirectory = function(path) {
fs.readdir(path,function(err,files) {
if(err) {
console.log(err);
} else {
files.forEach(function(file) {
fs.stat(path + file, function(err,stats) {
if(err) {
console.log(err);
} else {
if(stats.isDirectory()) {
scanDirectory(path + file + '/');
} else {
console.log(path + file);
}
}
});
});
}
});
}
scanDirectory(path);
console.timeEnd('read-directory');
You are using fs.readdir which is asynchronous. So your timer is not depending of your scanDirectory execution.
In fact, it's just launching your function when you call scanDirectory(path) then directly after stop the timer.
If you want you can use fs.readdirSync which will prevent to jump to the timer end as it's a synchronise function. The problems, is that will freeze your application (if your use it's directly like that) during this time and problably slow your execution.
In order to get the time of execution of your asynchrone function you can use the profiler tool of Node-webkit. But you will need to filter and sum them manually...
The other solution is to use timely (it's an npm package ) that can time synchronous or asynchronous functions.

How to wait for all async calls to finish

I'm using Mongoose with Node.js and have the following code that will call the callback after all the save() calls has finished. However, I feel that this is a very dirty way of doing it and would like to see the proper way to get this done.
function setup(callback) {
// Clear the DB and load fixtures
Account.remove({}, addFixtureData);
function addFixtureData() {
// Load the fixtures
fs.readFile('./fixtures/account.json', 'utf8', function(err, data) {
if (err) { throw err; }
var jsonData = JSON.parse(data);
var count = 0;
jsonData.forEach(function(json) {
count++;
var account = new Account(json);
account.save(function(err) {
if (err) { throw err; }
if (--count == 0 && callback) callback();
});
});
});
}
}
You can clean up the code a bit by using a library like async or Step.
Also, I've written a small module that handles loading fixtures for you, so you just do:
var fixtures = require('./mongoose-fixtures');
fixtures.load('./fixtures/account.json', function(err) {
//Fixtures loaded, you're ready to go
};
Github:
https://github.com/powmedia/mongoose-fixtures
It will also load a directory of fixture files, or objects.
I did a talk about common asyncronous patterns (serial and parallel) and ways to solve them:
https://github.com/masylum/i-love-async
I hope its useful.
I've recently created simpler abstraction called wait.for to call async functions in sync mode (based on Fibers). It's at an early stage but works. It is at:
https://github.com/luciotato/waitfor
Using wait.for, you can call any standard nodejs async function, as if it were a sync function, without blocking node's event loop. You can code sequentially when you need it.
using wait.for your code will be:
//in a fiber
function setup(callback) {
// Clear the DB and load fixtures
wait.for(Account.remove,{});
// Load the fixtures
var data = wait.for(fs.readFile,'./fixtures/account.json', 'utf8');
var jsonData = JSON.parse(data);
jsonData.forEach(function(json) {
var account = new Account(json);
wait.forMethod(account,'save');
}
callback();
}
That's actually the proper way of doing it, more or less. What you're doing there is a parallel loop. You can abstract it into it's own "async parallel foreach" function if you want (and many do), but that's really the only way of doing a parallel loop.
Depending on what you intended, one thing that could be done differently is the error handling. Because you're throwing, if there's a single error, that callback will never get executed (count won't be decremented). So it might be better to do:
account.save(function(err) {
if (err) return callback(err);
if (!--count) callback();
});
And handle the error in the callback. It's better node-convention-wise.
I would also change another thing to save you the trouble of incrementing count on every iteration:
var jsonData = JSON.parse(data)
, count = jsonData.length;
jsonData.forEach(function(json) {
var account = new Account(json);
account.save(function(err) {
if (err) return callback(err);
if (!--count) callback();
});
});
If you are already using underscore.js anywhere in your project, you can leverage the after method. You need to know how many async calls will be out there in advance, but aside from that it's a pretty elegant solution.

Resources