NodeJS and parallel flow

NodeJS and parallel flow - node.js

I'm new with NodeJS. An issue makes me confused is parallel flow. I read an example show this snippet as a technique for controlling parallel flow:
var fs = require('fs');
var fileDir = './files';
fs.readdir(filesDir, function (err, files) {
if (err) throw err;
for (var index in files) {
var task = (function (file) {
return function () {
fs.readFile(file, function (err, text) {
if (err) throw err;
doSomething();
});
}
})(filesDir + '/' + files[index]);
tasks.push(task);
}
for (var index in tasks) {
tasks[index]();
}
});
This code work like a charm, but when I replace it with
for (var index in files) {
var task = function () {
console.log(files[index]);
fs.readFile(filesDir + '/' + files[index], function (err, text) {
if (err) throw err;
doSomething();
});
};
tasks.push(task);
}
for (var index in tasks) {
tasks[index]();
}
It doesn't work as I expected, because the files[index] in loop is always the last file in directory. Could you please explain me what the real flow is?

In short, the function you created have reference for the index variable(not it's value), so when it's executed, the index value is the last file in directory in your case.
Some links: Understanding variable capture by closures in Javascript/Node

Its because index reference will be to its last file. Node js is asynchronous that it ll not wait till read file operation is completed. It ll increment index value.
for (var index in files) {
var task = function () {
console.log(files[index]);
fs.readFile(filesDir + '/' + files[index], function (err, text) {
if (err) throw err;
doSomething();
});
};
tasks.push(task);
}
Since first code uses closures and it passes the current indexed file to a function. It ll take the current indexed file and returns a function with the file as input.
Now that returned function will execute in parallel.

Related

My function is returning list of empty object a

I have created a function to populate a list of objects but they are not being written.
if (stats.isDirectory()) {
objlist=[];
//do something here.. the path was correct
fs.readdir(path, (err, files) => {
objlist=[];
for(file in files){
//console.log(files[file])
objlist[file]={};
fs.stat(path + '\\' + files[file], function(err,stats){
if(err)
throw err;
if(stats.isDirectory()){
objlist[file]['file'] = 'folder'
}
if(stats.isFile()){
objlist[file]['file'] = 'file'
}
objlist[file]['name'] = files[file]
objlist[file]['size'] = stats.size
//console.log(objlist)
//console.log(stats)
});
}
});
console.log(objlist);
return objlist;
}
However the function returns an empty objlist; Can you please suggest what I am doing wrong

this code will be helphul. use let file in files
objlist=[];
//do something here.. the path was correct
fs.readdir(path, (err, files) => {
objlist=[];
for(let file in files){
objlist[file]={};
fs.stat(path + '\\' + files[file], function(err,stats){
if(err)
throw err;
if(stats.isDirectory()){
objlist[file]['file'] = 'folder'
}
if(stats.isFile()){
objlist[file]['file'] = 'file'
}
objlist[file]['name'] = files[file]
objlist[file]['size'] = stats.size
if(file == files.length-1) {
console.log(objlist);
}
});
}
});

You must add the objects to the array with the method push, like this:
objlist=[];
for(file in files){
obj = {};
obj.file = file;
//... do all stuff with obj
objlist.push(obj);
}
Also your function is asynchronous inside the fs.readdir, so if you have to build and populate in the function or do an await to receive the result.
Otherwise you will call the function, it will be executed asynchronously, you will continue at the main function without the wait of the response of the fs.readdir and the array will continue empty because you async function may or may not have been executed.

There are two mistakes in your code:-
1) You are using an array as an object
2) Since for loop is synchronous loop and you are trying asynchronous task inside it and hence either use synchronous version of fs or change your for loop as follows:-
for(file in files) {
obj = {};
obj[file]={};
(function(f){
fs.stat(path + '\\' + files[f], function(err,stats){
if(err)
throw err;
if(stats.isDirectory()){
obj[file]['file'] = 'folder'
}
if(stats.isFile()){
obj[file]['file'] = 'file'
}
obj[file]['name'] = files[file]
obj[file]['size'] = stats.size
objlist.push(obj)
//console.log(objlist)
//console.log(stats)
});
}
}(file))
}

node.js svn update/commit synchronously?

I'm using svn-spawn library to update/commit files to svn. Problem is my app calls svn up/commit in a loop, and because of the async nature of the call, svn-up is called from the next iteration of the loop before the previous svn-up can finish.
How to handle this issue? Is there any way to prevent the next call from happening until the previous one is complete?

Figured out a way to do it using async module.
async.series can be used to execute async tasks in a serial fashion.
This is how I did it.
function commitFile(arg, callback) {
svnClient.getStatus(filePath, function(err, data) {
//...
svnClient.commit(['Commit msg', filePath], callback);
//...
});
}
var toCommit = [];
for (var i = 0, len = requests.length; i < len; i++) {
//Adding files to commit, async.apply enables adding arguments to the anonymous function
toCommit.push(async.apply(function(arg, cb) {
commitFile(arg, cb);
}, 'arg1'));
}
async.series(toCommit,function (err, result) {
console.log('Final callback');
if(err) {
console.log('error', err);
} else {
console.log('result of this run: ' + result);
}
});
async.series needs an array of functions which must call a callback once they are done. It uses the callback to determine that the current function in done executing and only then it will pick the next function to execute.

NodeJS + redis gives weird results

Maybe the results ain't weird, but I started using Node 1-2 months ago so for me they are...
I have a loop which sorts out every other value of the array returned by hgetall (Redis command) and in that loop I call a function to get all values from another table with keys stored in the sorted array. This was more difficult to explain than I thought. Here's my code:
Pastebin: http://pastebin.com/tAVhSUV1 (or see below)
function getInfo (cn, callback) {
var anArray = [];
redis_client.hgetall('chat_info:' + cn, function (err, vals) {
if(err) { throw err; }
for(i in vals) {
anArray.push(vals[i]);
}
return callback(anArray);
});
}
redis_client.hgetall('chat_rooms:' + POST.chat_name, function (err, val) {
if(err) { throw err; }
var vars = [],
rArr = [];
for (i in val) {
vars.push(i);
}
for(var i = 0; i < vars.length; i += 1) {
if(i%2 === 0) {
getInfo(vars[i], function (hej) {
rArr.push(hej);
});
}
}
});
The callback from the call to getInfo() is executed after the entire loop. Am I missing out on something here? Because it can't do that, right? (when I use rArr (right after the loop) it's empty, nbBut if I log it in the callback it gets logged after everything else written after the loop)

Yes, that's probably normal.
Understand that callbacks are executed after the hgetall call. Which mean that when the redis functions receive somehting it will call the callbacks. In other words, all the callbacks can be executed later.
As javascript only works in one thread, the calls to hgetall should be blocking to be executed as they come in the for loop. But as you're more certainly using async IO. The for loop ends and then it will start calling each callbacks that were queued inside the javascript event loop.
Edit
Unfortunately, to achieve what you're trying to do, you should wrap your code inside many other callbacks. You can use this project to make it easier: https://github.com/caolan/async
You should be able to install it using npm install async.
You'd have to do something like that:
function getInfo (cn) {
return function(callback) {
var anArray = [];
redis_client.hgetall('chat_info:' + cn, function (err, vals) {
if(err) { throw err; }
for(i in vals) {
anArray.push(vals[i]);
}
return callback(anArray);
});
};
}
redis_client.hgetall('chat_rooms:' + POST.chat_name, function (err, val) {
if(err) { throw err; }
var vars = [],
rArr = [],
callbacks = [];
for (i in val) {
vars.push(i);
}
for(var i = 0; i < vars.length; i += 1) {
if(i%2 === 0) {
callbacks.push(getInfo(vars[i]));
}
}
async.series(callbacks, function (err, results) {
// Final code here
});
});

How to work with a large number of files and streams in Node.js?

I am working on a small node project which requires copying and ungzipping a number of files of various sizes. I've been trying to use async.eachSeries to take care of it, but it is not working out. The files are created but the pipe out the wr ends up writing to multiple different files regardless of which file it should end up in.
fs.readdir(path, function (err, files) {
async.eachSeries(files, function (file, callback) {
var wr = fs.createWriteStream(file);
fs.stat(file, function (err, stats) {
if (err) throw err;
var stream = fs.createReadStream(file).on('end', function () {
callback();
}).pipe(ungzip).pipe(wr);
});
}, function () {
//res.write(concatenated);
//res.end();
});
});
I'm still new to node so any help would be appreciated.
-NQ

Looks like the solution is to use a closure.
The problem in your code is that the callback function passed to fs.stat references a variable from outer scope, ie wr, which is changed in the next iteration of the loop. Closures are good to sort it out.
fs.readdir(path, function (err, files) {
async.eachSeries(files, function (file, callback) {
var wr = fs.createWriteStream(file);
fs.stat(file, function(myWr){
return function (err, stats) {
if (err) throw err;
var stream = fs.createReadStream(file).on('end', function () {
callback();
}).pipe(ungzip).pipe(myWr);
}
}(wr));
}, function () {
//res.write(concatenated);
//res.end();}
});
});
Refer Please explain the use of JavaScript closures in loops and Serving A Batch Of Dynamic Pages for more on closures.

Node - how to wait on async operations?

Sorry, just starting with node. This might be a very novice question.
Let's say I have some code which reads some files from a directory in the file system:
var fs = require('fs');
fs.readdir(__dirname + '/myfiles', function (err, files) {
if (err) throw err;
files.forEach(function (fileName) {
fs.readFile(__dirname + '/myfiles/' + fileName, function (err, data) {
if (err) throw err;
console.log('finished reading file ' + fileName + ': ' + data);
module.exports.files.push(data);
});
});
});
Note that all of this occurs asynchronously. Let's also say I have a Mocha test which executes this code:
describe('fileProvider', function () {
describe('#files', function () {
it.only('files array not empty', function () {
assert(fileProvider.files.length > 0, 'files.length is zero');
});
});
});
The mocha test runs before the files are finished being read. I know this because I see the console.log statement after I see the little dot that indicates a mocha test being run (at least I think that is what is being indicated). Also, if I surround the assert with a setTimeout, the assert passes.
How should I structure my code so that I can ensure the async file operations are completed? Note that this is not just a problem with testing - I need the files to be loaded fully before I can do real work in my app as well.
I don't think the right answer is to read files synchronously, because that will block the Node request / response loop, right?
Bonus question:
Even if I put the assert in a setTimeout with a 0 timeout value, the test still passes. Is this because just putting it in a setTimeout kicks it to the end of the processing chain or something so the filesystem work finishes first?

You can implement a complete callback after all files have been read.
exports.files = [];
exports.initialize = initialize;
function initialize(callback) {
var fs = require('fs');
fs.readdir(__dirname + '/myfiles', function (err, files) {
if (err) throw err;
files.forEach(function (fileName) {
fs.readFile(__dirname + '/myfiles/' + fileName, function (err, data) {
if (err) throw err;
console.log('finished reading file ' + fileName + ': ' + data);
exports.files.push(data);
if (exports.files.length == files.length) {
callback();
}
});
});
}
You can call the file operation method by doing something like:
var f = require('./files.js');
if (f.files.length < 1) {
console.log('initializing');
f.initialize(function () {
console.log('After: ' + f.files.length);
var another = require('./files.js');
console.log('Another module: ' + another.files.length);
});
}
EDIT: Since you want to only have to call this once, you could initialize it once when the application loads. According to Node.js documentation, modules are cached after the first time they are loaded. The two above examples have been edited as well.

To avoid being caught up in nested callbacks. You might want to use async's each that will allow you to do the tasks asynchronously in a non-blocking manner:
https://github.com/caolan/async#each

I think that's a good test, the same thing would happen in any app that used your module, i.e. it's code could be run before files is set. What you need to do is create a callback like #making3 suggests, or use promises. I haven't used mocha, but there's a section on ascynchronous calls. You could export the promise itself:
module.exports.getFiles = new Promise((resolve, reject) => {
datas = [];
fs.readdir(__dirname + '/myfiles', function (err, files) {
if (err) {
reject(err);
return;
}
files.forEach(function (fileName) {
fs.readFile(__dirname + '/myfiles/' + fileName, function (err, data) {
if (err) {
reject(err);
return;
}
console.log('finished reading file ' + fileName + ': ' + data);
datas.push(data);
if (datas.length == files.length) {
resolve(datas);
}
});
});
});
}
chai-as-promissed lets you work directly with promises using eventually, or you can use the callback passed to your test I think:
describe('fileProvider', function () {
describe('#files', function () {
it.only('files array not empty', function (done) {
fileProvider.getFiles.then(function(value) {
assert(value.length > 0, 'files.length is zero');
done();
}, function(err) {
done(err);
})
});
});
});

Develop Reference

node.js excel linux python-3.x azure haskell apache-spark rust .htaccess string

NodeJS and parallel flow - node.js

In short, the function you created have reference for the index variable(not it's value), so when it's executed, the index value is the last file in directory in your case. Some links: Understanding variable capture by closures in Javascript/Node

Related

My function is returning list of empty object a

node.js svn update/commit synchronously?

NodeJS + redis gives weird results

How to work with a large number of files and streams in Node.js?

Node - how to wait on async operations?

Categories

Resources