Node.js fs.readdir recursive directory search - node.js

Any ideas on an async directory search using fs.readdir? I realize that we could introduce recursion and call the read directory function with the next directory to read, but I'm a little worried about it not being async...
Any ideas? I've looked at node-walk which is great, but doesn't give me just the files in an array, like readdir does. Although
Looking for output like...
['file1.txt', 'file2.txt', 'dir/file3.txt']

There are basically two ways of accomplishing this. In an async environment you'll notice that there are two kinds of loops: serial and parallel. A serial loop waits for one iteration to complete before it moves onto the next iteration - this guarantees that every iteration of the loop completes in order. In a parallel loop, all the iterations are started at the same time, and one may complete before another, however, it is much faster than a serial loop. So in this case, it's probably better to use a parallel loop because it doesn't matter what order the walk completes in, just as long as it completes and returns the results (unless you want them in order).
A parallel loop would look like this:
var fs = require('fs');
var path = require('path');
var walk = function(dir, done) {
var results = [];
fs.readdir(dir, function(err, list) {
if (err) return done(err);
var pending = list.length;
if (!pending) return done(null, results);
list.forEach(function(file) {
file = path.resolve(dir, file);
fs.stat(file, function(err, stat) {
if (stat && stat.isDirectory()) {
walk(file, function(err, res) {
results = results.concat(res);
if (!--pending) done(null, results);
});
} else {
results.push(file);
if (!--pending) done(null, results);
}
});
});
});
};
A serial loop would look like this:
var fs = require('fs');
var path = require('path');
var walk = function(dir, done) {
var results = [];
fs.readdir(dir, function(err, list) {
if (err) return done(err);
var i = 0;
(function next() {
var file = list[i++];
if (!file) return done(null, results);
file = path.resolve(dir, file);
fs.stat(file, function(err, stat) {
if (stat && stat.isDirectory()) {
walk(file, function(err, res) {
results = results.concat(res);
next();
});
} else {
results.push(file);
next();
}
});
})();
});
};
And to test it out on your home directory (WARNING: the results list will be huge if you have a lot of stuff in your home directory):
walk(process.env.HOME, function(err, results) {
if (err) throw err;
console.log(results);
});
EDIT: Improved examples.

This one uses the maximum amount of new, buzzwordy features available in node 8, including Promises, util/promisify, destructuring, async-await, map+reduce and more, making your co-workers scratch their heads as they try to figure out what is going on.
Node 8+
No external dependencies.
const { promisify } = require('util');
const { resolve } = require('path');
const fs = require('fs');
const readdir = promisify(fs.readdir);
const stat = promisify(fs.stat);
async function getFiles(dir) {
const subdirs = await readdir(dir);
const files = await Promise.all(subdirs.map(async (subdir) => {
const res = resolve(dir, subdir);
return (await stat(res)).isDirectory() ? getFiles(res) : res;
}));
return files.reduce((a, f) => a.concat(f), []);
}
Usage
getFiles(__dirname)
.then(files => console.log(files))
.catch(e => console.error(e));
Node 10.10+
Updated for node 10+ with even more whizbang:
const { resolve } = require('path');
const { readdir } = require('fs').promises;
async function getFiles(dir) {
const dirents = await readdir(dir, { withFileTypes: true });
const files = await Promise.all(dirents.map((dirent) => {
const res = resolve(dir, dirent.name);
return dirent.isDirectory() ? getFiles(res) : res;
}));
return Array.prototype.concat(...files);
}
Note that starting with node 11.15.0 you can use files.flat() instead of Array.prototype.concat(...files) to flatten the files array.
Node 11+
If you want to blow everybody's head up completely, you can use the following version using async iterators. In addition to being really cool, it also allows consumers to pull results one-at-a-time, making it better suited for really large directories.
const { resolve } = require('path');
const { readdir } = require('fs').promises;
async function* getFiles(dir) {
const dirents = await readdir(dir, { withFileTypes: true });
for (const dirent of dirents) {
const res = resolve(dir, dirent.name);
if (dirent.isDirectory()) {
yield* getFiles(res);
} else {
yield res;
}
}
}
Usage has changed because the return type is now an async iterator instead of a promise
;(async () => {
for await (const f of getFiles('.')) {
console.log(f);
}
})()
In case somebody is interested, I've written more about async iterators here: https://qwtel.com/posts/software/async-generators-in-the-wild/

Just in case anyone finds it useful, I also put together a synchronous version.
var walk = function(dir) {
var results = [];
var list = fs.readdirSync(dir);
list.forEach(function(file) {
file = dir + '/' + file;
var stat = fs.statSync(file);
if (stat && stat.isDirectory()) {
/* Recurse into a subdirectory */
results = results.concat(walk(file));
} else {
/* Is a file */
results.push(file);
}
});
return results;
}
Tip: To use less resources when filtering. Filter within this function itself. E.g. Replace results.push(file); with below code. Adjust as required:
file_type = file.split(".").pop();
file_name = file.split(/(\\|\/)/g).pop();
if (file_type == "json") results.push(file);

A. Have a look at the file module. It has a function called walk:
file.walk(start, callback)
Navigates a file tree, calling callback for each directory, passing in
(null, dirPath, dirs, files).
This may be for you! And yes, it is async. However, I think you would have to aggregate the full path's yourself, if you needed them.
B. An alternative, and even one of my favourites: use the unix find for that. Why do something again, that has already been programmed? Maybe not exactly what you need, but still worth checking out:
var execFile = require('child_process').execFile;
execFile('find', [ 'somepath/' ], function(err, stdout, stderr) {
var file_list = stdout.split('\n');
/* now you've got a list with full path file names */
});
Find has a nice build-in caching mechanism that makes subsequent searches very fast, as long as only few folder have changed.

I recommend using node-glob to accomplish that task.
var glob = require( 'glob' );
glob( 'dirname/**/*.js', function( err, files ) {
console.log( files );
});

Another nice npm package is glob.
npm install glob
It is very powerful and should cover all your recursing needs.
Edit:
I actually wasn't perfectly happy with glob, so I created readdirp.
I'm very confident that its API makes finding files and directories recursively and applying specific filters very easy.
Read through its documentation to get a better idea of what it does and install via:
npm install readdirp

Short, Modern and Efficient:
import {readdir} from 'node:fs/promises'
import {join} from 'node:path'
const deepReadDir = async (dirPath) => await Promise.all(
(await readdir(dirPath, {withFileTypes: true})).map(async (dirent) => {
const path = join(dirPath, dirent.name)
return dirent.isDirectory() ? await deepReadDir(path) : path
}),
)
Special thank to Function for hinting use of {withFileTypes: true}.
This auto folds each nested path into a new nested array. For example if:
await deepReadDir('src')
returns something like this:
[
[
'src/client/api.js',
'src/client/http-constants.js',
'src/client/index.html',
'src/client/index.js',
[ 'src/client/res/favicon.ico' ],
'src/client/storage.js'
],
[ 'src/crypto/keygen.js' ],
'src/discover.js',
[
'src/mutations/createNewMutation.js',
'src/mutations/newAccount.js',
'src/mutations/transferCredit.js',
'src/mutations/updateApp.js'
],
[
'src/server/authentication.js',
'src/server/handlers.js',
'src/server/quick-response.js',
'src/server/server.js',
'src/server/static-resources.js'
],
[ 'src/util/prompt.js', 'src/util/safeWriteFile.js' ],
'src/util.js'
]
But you can easily flat it, if you want:
(await deepReadDir('src')).flat(Number.POSITIVE_INFINITY)
[
'src/client/api.js',
'src/client/http-constants.js',
'src/client/index.html',
'src/client/index.js',
'src/client/res/favicon.ico',
'src/client/storage.js',
'src/crypto/keygen.js',
'src/discover.js',
'src/mutations/createNewMutation.js',
'src/mutations/newAccount.js',
'src/mutations/transferCredit.js',
'src/mutations/updateApp.js',
'src/server/authentication.js',
'src/server/handlers.js',
'src/server/quick-response.js',
'src/server/server.js',
'src/server/static-resources.js',
'src/util/prompt.js',
'src/util/safeWriteFile.js',
'src/util.js'
]

If you want to use an npm package, wrench is pretty good.
var wrench = require("wrench");
var files = wrench.readdirSyncRecursive("directory");
wrench.readdirRecursive("directory", function (error, files) {
// live your dreams
});
EDIT (2018):
Anyone reading through in recent time: The author deprecated this package in 2015:
wrench.js is deprecated, and hasn't been updated in quite some time. I heavily recommend using fs-extra to do any extra filesystem operations.

Async
const fs = require('fs')
const path = require('path')
const readdir = (p, done, a = [], i = 0) => fs.readdir(p, (e, d = []) =>
d.map(f => readdir(a[a.push(path.join(p, f)) - 1], () =>
++i == d.length && done(a), a)).length || done(a))
readdir(__dirname, console.log)
Sync
const fs = require('fs')
const path = require('path')
const readdirSync = (p, a = []) => {
if (fs.statSync(p).isDirectory())
fs.readdirSync(p).map(f => readdirSync(a[a.push(path.join(p, f)) - 1], a))
return a
}
console.log(readdirSync(__dirname))
Async readable
function readdir (currentPath, done, allFiles = [], i = 0) {
fs.readdir(currentPath, function (e, directoryFiles = []) {
if (!directoryFiles.length)
return done(allFiles)
directoryFiles.map(function (file) {
var joinedPath = path.join(currentPath, file)
allFiles.push(joinedPath)
readdir(joinedPath, function () {
i = i + 1
if (i == directoryFiles.length)
done(allFiles)}
, allFiles)
})
})
}
readdir(__dirname, console.log)
Note: both versions will follow symlinks (same as the original fs.readdir)

With Recursion
var fs = require('fs')
var path = process.cwd()
var files = []
var getFiles = function(path, files){
fs.readdirSync(path).forEach(function(file){
var subpath = path + '/' + file;
if(fs.lstatSync(subpath).isDirectory()){
getFiles(subpath, files);
} else {
files.push(path + '/' + file);
}
});
}
Calling
getFiles(path, files)
console.log(files) // will log all files in directory

I loved the answer from chjj above and would not have been able to create my version of the parallel loop without that start.
var fs = require("fs");
var tree = function(dir, done) {
var results = {
"path": dir
,"children": []
};
fs.readdir(dir, function(err, list) {
if (err) { return done(err); }
var pending = list.length;
if (!pending) { return done(null, results); }
list.forEach(function(file) {
fs.stat(dir + '/' + file, function(err, stat) {
if (stat && stat.isDirectory()) {
tree(dir + '/' + file, function(err, res) {
results.children.push(res);
if (!--pending){ done(null, results); }
});
} else {
results.children.push({"path": dir + "/" + file});
if (!--pending) { done(null, results); }
}
});
});
});
};
module.exports = tree;
I created a Gist as well. Comments welcome. I am still starting out in the NodeJS realm so that is one way I hope to learn more.

Use node-dir to produce exactly the output you like
var dir = require('node-dir');
dir.files(__dirname, function(err, files) {
if (err) throw err;
console.log(files);
//we have an array of files now, so now we can iterate that array
files.forEach(function(path) {
action(null, path);
})
});

Here is a simple synchronous recursive solution
const fs = require('fs')
const getFiles = path => {
const files = []
for (const file of fs.readdirSync(path)) {
const fullPath = path + '/' + file
if(fs.lstatSync(fullPath).isDirectory())
getFiles(fullPath).forEach(x => files.push(file + '/' + x))
else files.push(file)
}
return files
}
Usage:
const files = getFiles(process.cwd())
console.log(files)
You could write it asynchronously, but there is no need. Just make sure that the input directory exists and is accessible.

Modern promise based read dir recursive version:
const fs = require('fs');
const path = require('path');
const readDirRecursive = async (filePath) => {
const dir = await fs.promises.readdir(filePath);
const files = await Promise.all(dir.map(async relativePath => {
const absolutePath = path.join(filePath, relativePath);
const stat = await fs.promises.lstat(absolutePath);
return stat.isDirectory() ? readDirRecursive(absolutePath) : absolutePath;
}));
return files.flat();
}

qwtel's answer variant, in TypeScript
import { resolve } from 'path';
import { readdir } from 'fs/promises';
async function* getFiles(dir: string): AsyncGenerator<string> {
const entries = await readdir(dir, { withFileTypes: true });
for (const entry of entries) {
const res = resolve(dir, entry.name);
if (entry.isDirectory()) {
yield* getFiles(res);
} else {
yield res;
}
}
}

Vanilla ES6 + async/await + small & readable
I didn't find the answer I was looking for in this thread; there were a few similar elements spread across different answers, but I just wanted something simple and readable.
Just in case it helps anyone in the future (i.e. myself in a couple of months), this I what I ended up using:
const { readdir } = require('fs/promises');
const { join } = require('path');
const readdirRecursive = async dir => {
const files = await readdir( dir, { withFileTypes: true } );
const paths = files.map( async file => {
const path = join( dir, file.name );
if ( file.isDirectory() ) return await readdirRecursive( path );
return path;
} );
return ( await Promise.all( paths ) ).flat( Infinity );
}
module.exports = {
readdirRecursive,
}

I've coded this recently, and thought it would make sense to share this here. The code makes use of the async library.
var fs = require('fs');
var async = require('async');
var scan = function(dir, suffix, callback) {
fs.readdir(dir, function(err, files) {
var returnFiles = [];
async.each(files, function(file, next) {
var filePath = dir + '/' + file;
fs.stat(filePath, function(err, stat) {
if (err) {
return next(err);
}
if (stat.isDirectory()) {
scan(filePath, suffix, function(err, results) {
if (err) {
return next(err);
}
returnFiles = returnFiles.concat(results);
next();
})
}
else if (stat.isFile()) {
if (file.indexOf(suffix, file.length - suffix.length) !== -1) {
returnFiles.push(filePath);
}
next();
}
});
}, function(err) {
callback(err, returnFiles);
});
});
};
You can use it like this:
scan('/some/dir', '.ext', function(err, files) {
// Do something with files that ends in '.ext'.
console.log(files);
});

A library called Filehound is another option. It will recursively search a given directory (working directory by default). It supports various filters, callbacks, promises and sync searches.
For example, search the current working directory for all files (using callbacks):
const Filehound = require('filehound');
Filehound.create()
.find((err, files) => {
if (err) {
return console.error(`error: ${err}`);
}
console.log(files); // array of files
});
Or promises and specifying a specific directory:
const Filehound = require('filehound');
Filehound.create()
.paths("/tmp")
.find()
.each(console.log);
Consult the docs for further use cases and examples of usage: https://github.com/nspragg/filehound
Disclaimer: I'm the author.

Using async/await, this should work:
const FS = require('fs');
const readDir = promisify(FS.readdir);
const fileStat = promisify(FS.stat);
async function getFiles(dir) {
let files = await readDir(dir);
let result = files.map(file => {
let path = Path.join(dir,file);
return fileStat(path).then(stat => stat.isDirectory() ? getFiles(path) : path);
});
return flatten(await Promise.all(result));
}
function flatten(arr) {
return Array.prototype.concat(...arr);
}
You can use bluebird.Promisify or this:
/**
* Returns a function that will wrap the given `nodeFunction`. Instead of taking a callback, the returned function will return a promise whose fate is decided by the callback behavior of the given node function. The node function should conform to node.js convention of accepting a callback as last argument and calling that callback with error as the first argument and success value on the second argument.
*
* #param {Function} nodeFunction
* #returns {Function}
*/
module.exports = function promisify(nodeFunction) {
return function(...args) {
return new Promise((resolve, reject) => {
nodeFunction.call(this, ...args, (err, data) => {
if(err) {
reject(err);
} else {
resolve(data);
}
})
});
};
};
Node 8+ has Promisify built-in
See my other answer for a generator approach that can give results even faster.

Simple, Async Promise Based
const fs = require('fs/promises');
const getDirRecursive = async (dir) => {
try {
const items = await fs.readdir(dir);
let files = [];
for (const item of items) {
if ((await fs.lstat(`${dir}/${item}`)).isDirectory()) files = [...files, ...(await getDirRecursive(`${dir}/${item}`))];
else files.push({file: item, path: `${dir}/${item}`, parents: dir.split("/")});
}
return files;
} catch (e) {
return e
}
};
Usage: await getDirRecursive("./public");

Check out the final-fs library. It provides a readdirRecursive function:
ffs.readdirRecursive(dirPath, true, 'my/initial/path')
.then(function (files) {
// in the `files` variable you've got all the files
})
.otherwise(function (err) {
// something went wrong
});

Standalone promise implementation
I am using the when.js promise library in this example.
var fs = require('fs')
, path = require('path')
, when = require('when')
, nodefn = require('when/node/function');
function walk (directory, includeDir) {
var results = [];
return when.map(nodefn.call(fs.readdir, directory), function(file) {
file = path.join(directory, file);
return nodefn.call(fs.stat, file).then(function(stat) {
if (stat.isFile()) { return results.push(file); }
if (includeDir) { results.push(file + path.sep); }
return walk(file, includeDir).then(function(filesInDir) {
results = results.concat(filesInDir);
});
});
}).then(function() {
return results;
});
};
walk(__dirname).then(function(files) {
console.log(files);
}).otherwise(function(error) {
console.error(error.stack || error);
});
I've included an optional parameter includeDir which will include directories in the file listing if set to true.

klaw and klaw-sync are worth considering for this sort of thing. These were part of node-fs-extra.

For Node 10.3+, here is a for-await solution:
#!/usr/bin/env node
const FS = require('fs');
const Util = require('util');
const readDir = Util.promisify(FS.readdir);
const Path = require('path');
async function* readDirR(path) {
const entries = await readDir(path,{withFileTypes:true});
for(let entry of entries) {
const fullPath = Path.join(path,entry.name);
if(entry.isDirectory()) {
yield* readDirR(fullPath);
} else {
yield fullPath;
}
}
}
async function main() {
const start = process.hrtime.bigint();
for await(const file of readDirR('/mnt/home/media/Unsorted')) {
console.log(file);
}
console.log((process.hrtime.bigint()-start)/1000000n);
}
main().catch(err => {
console.error(err);
});
The benefit of this solution is that you can start processing the results immediately; e.g. it takes 12 seconds to read all the files in my media directory, but if I do it this way I can get the first result within a few milliseconds.

Here's yet another implementation. None of the above solutions have any limiters, and so if your directory structure is large, they're all going to thrash and eventually run out of resources.
var async = require('async');
var fs = require('fs');
var resolve = require('path').resolve;
var scan = function(path, concurrency, callback) {
var list = [];
var walker = async.queue(function(path, callback) {
fs.stat(path, function(err, stats) {
if (err) {
return callback(err);
} else {
if (stats.isDirectory()) {
fs.readdir(path, function(err, files) {
if (err) {
callback(err);
} else {
for (var i = 0; i < files.length; i++) {
walker.push(resolve(path, files[i]));
}
callback();
}
});
} else {
list.push(path);
callback();
}
}
});
}, concurrency);
walker.push(path);
walker.drain = function() {
callback(list);
}
};
Using a concurrency of 50 works pretty well, and is almost as fast as simpler implementations for small directory structures.

The recursive-readdir module has this functionality.

I modified Trevor Senior's Promise based answer to work with Bluebird
var fs = require('fs'),
path = require('path'),
Promise = require('bluebird');
var readdirAsync = Promise.promisify(fs.readdir);
var statAsync = Promise.promisify(fs.stat);
function walkFiles (directory) {
var results = [];
return readdirAsync(directory).map(function(file) {
file = path.join(directory, file);
return statAsync(file).then(function(stat) {
if (stat.isFile()) {
return results.push(file);
}
return walkFiles(file).then(function(filesInDir) {
results = results.concat(filesInDir);
});
});
}).then(function() {
return results;
});
}
//use
walkDir(__dirname).then(function(files) {
console.log(files);
}).catch(function(e) {
console.error(e); {
});

For fun, here is a flow based version that works with highland.js streams library. It was co-authored by Victor Vu.
###
directory >---m------> dirFilesStream >---------o----> out
| |
| |
+--------< returnPipe <-----------+
legend: (m)erge (o)bserve
+ directory has the initial file
+ dirListStream does a directory listing
+ out prints out the full path of the file
+ returnPipe runs stat and filters on directories
###
_ = require('highland')
fs = require('fs')
fsPath = require('path')
directory = _(['someDirectory'])
mergePoint = _()
dirFilesStream = mergePoint.merge().flatMap((parentPath) ->
_.wrapCallback(fs.readdir)(parentPath).sequence().map (path) ->
fsPath.join parentPath, path
)
out = dirFilesStream
# Create the return pipe
returnPipe = dirFilesStream.observe().flatFilter((path) ->
_.wrapCallback(fs.stat)(path).map (v) ->
v.isDirectory()
)
# Connect up the merge point now that we have all of our streams.
mergePoint.write directory
mergePoint.write returnPipe
mergePoint.end()
# Release backpressure. This will print files as they are discovered
out.each H.log
# Another way would be to queue them all up and then print them all out at once.
# out.toArray((files)-> console.log(files))

Using Promises (Q) to solve this in a Functional style:
var fs = require('fs'),
fsPath = require('path'),
Q = require('q');
var walk = function (dir) {
return Q.ninvoke(fs, 'readdir', dir).then(function (files) {
return Q.all(files.map(function (file) {
file = fsPath.join(dir, file);
return Q.ninvoke(fs, 'lstat', file).then(function (stat) {
if (stat.isDirectory()) {
return walk(file);
} else {
return [file];
}
});
}));
}).then(function (files) {
return files.reduce(function (pre, cur) {
return pre.concat(cur);
});
});
};
It returns a promise of an array, so you can use it as:
walk('/home/mypath').then(function (files) { console.log(files); });

I must add the Promise-based sander library to the list.
var sander = require('sander');
sander.lsr(directory).then( filenames => { console.log(filenames) } );

Related

How can I search a folder AND its sub-folders for a certain file and then get the file of it? (NodeJS)

I am building a npm module and I want to search the user's folder and its subfolders for a file (ex. config.json).
So basically I need a function which takes in the file name as an parameter and returns the file's path.
Can someone help me?
This will search recursively and return the found file(s) as an array.
const fs = require('fs');
const path = require('path');
const search = async (targetPath, findFile) => {
const files = await fs.promises.readdir(targetPath);
const result = [];
for (let file of files) {
try {
const filepath = path.join(targetPath, file);
const stats = await fs.promises.lstat(filepath);
if (stats.isDirectory()) {
const childFiles = await fs.promises.readdir(filepath);
files.push(...childFiles.map((f) => path.join(file, f)));
}
if (stats.isFile() && path.basename(file) === findFile) {
result.push(filepath);
}
} catch (err) {
console.error(err);
}
}
return result;
}
const run = async () => {
try {
const found = await search('path/to/search', 'config.json');
console.log(found);
} catch (err) {
console.error(err);
}
}
run();

Path system in Windows

I want to give a path to my node project package.json script. I have a one folder called X. X has 10 folders and one of them has file Y. I don't know which folder has file Y. How do I write a path?
If you are trying to find the file Y inside one of the subfolders of X and then return/print the path, you can try the following. This will look through all the files within the passed directory and recursively continue to do so if there are subdirectories:
const path = require('path');
const fs = require('fs');
async function findFileRecursively(fileName, currentDirectory) {
const files = await fs.promises.readdir(currentDirectory);
for (let currFile of files) {
const filePath = path.join(currentDirectory, currFile);
const stats = await fs.promises.stat(filePath);
if (currFile === fileName) {
return filePath;
} else if (stats.isDirectory()) {
const result = await findFileRecursively(fileName, filePath);
if (result) {
return result;
}
}
}
}
(async () => {
try {
const result = await findFileRecursively("filename.txt", './path-to-basefolder');
if (result) console.log("File found at path: " + result)
else console.log("File could not be found");
} catch (err) {
console.log(err.message);
}
})();

Return list from asynchronous function with nodejs

I have the following code :
var fs = require("fs");
function getMediaList(){
var media_copy_list, line_list;
media_copy_list = [];
fs.readFile("input.csv", function(err, data) {
line_list = data.toString('utf-8').trim().split('\n');
return line_list.forEach(function(file_line) {
var output_path, source_path, split_list;
if (file_line.length) {
split_list = file_line.split(';');
console.log(split_list[0]);
if (split_list.length >= 2) {
source_path = split_list[0].toString('utf-8').trim();
output_path = split_list[1].toString('utf-8').trim();
media_copy_list.push({
source: source_path,
destination: output_path
});
}
}
});
});
}
You can see that that I'm filling a list with :
media_copy_list.push({
source: source_path,
destination: output_path
});
What I'd like to do is to return this list once I have finished reading the input.csv file.
I don't have any issues if I read the file synchrnously( just have to call return media_copy_list). But in this case , I don't know.
I heard about async.parallel but really don't know how to apply.
Example of input.csv :
FirstPart;SecondPart
Test/test2;Whatever/example
Just wrap your code inside a promise and resolve it only once you're done. Some suggest callbacks, which does pretty much the same thing, but this pattern is discouraged, now. You should really use a promise.
var fs = require("fs");
function getMediaList(file){
return new Promise(function (resolve, reject) {
fs.readFile(file, 'utf-8', function(err, data) {
if (err) {
return reject(err);
}
resolve(data.split('\n').reduce(function(media_copy_list, file_line) {
var output_path;
var source_path;
var split_list;
file_line = file_line.trim();
if (file_line.length) {
split_list = file_line.split(';');
console.log(split_list[0]);
if (split_list.length >= 2) {
source_path = split_list[0].toString('utf-8').trim();
output_path = split_list[1].toString('utf-8').trim();
media_copy_list.push({
source: source_path,
destination: output_path
});
}
}
return media_copy_list;
}, []));
});
});
}
Then, invoke with
getMediaList('input.csv').then(function (mediaList) {
// ...
}).catch(function (err) {
console.error(err.stack);
});
Note: bluebird, Q, etc. are quite unnecessary since Node 4.2+. Unless you are using an earlier version of Node, try to avoid them. IMO.
The reason why Promises are encouraged is because Node will implement async/await, which will allow you to call this exact same function like :
var mediaList = await getMediaList('input.csv');
As noted in the comments, you don't want to return the list from the function.. what you should do is include a callback as a parameter to getMediaList and call that callback with your results. I would use async.each for looping through the lines in the file. You can read more about async.each here: https://github.com/caolan/async#each. Here is an example:
var fs = require("fs");
function getMediaList(callback){
var media_copy_list, line_list;
media_copy_list = [];
fs.readFile("input.csv", function(err, data) {
if(err) {
return callback(err);
}
line_list = data.toString('utf-8').trim().split('\n');
async.each(line_list, function(file_line, next) {
var output_path, source_path, split_list;
if (file_line.length) {
split_list = file_line.split(';');
console.log(split_list[0]);
if (split_list.length >= 2) {
source_path = split_list[0].toString('utf-8').trim();
output_path = split_list[1].toString('utf-8').trim();
media_copy_list.push({
source: source_path,
destination: output_path
});
}
}
next(err);
}, function (err) {
callback(err, media_copy_list);
}
});
}
Or you can use promises(bluebird in the case below).
var Promise = require('bluebird'),
fs = require("fs"),
media_copy_list, line_list,
media_copy_list = [];
fs.readFile("input.csv", function(err, data) {
line_list = data.toString('utf-8').trim().split('\n');
Promise.map(line_list, function(file_line) {
var output_path, source_path, split_list;
if (file_line.length) {
split_list = file_line.split(';');
if (split_list.length >= 2) {
source_path = split_list[0].toString('utf-8').trim();
output_path = split_list[1].toString('utf-8').trim();
media_copy_list = {
source: source_path,
destination: output_path
};
}
}
return media_copy_list
}).then(function(values){
console.log(values);
})
});

NodeJS hash files recursively in a directory

I am able to achieve recursive file traversal in a directory (i.e to explore all the subdirectories and files in a directory). For that I have used an answer from a respective post on stack overflow. The snippet of that is below:
var fs = require("fs");
var tree = function(dir, done) {
var results = {
"path": dir,
"children": []
};
fs.readdir(dir, function(err, list) {
if (err) { return done(err); }
var pending = list.length;
if (!pending) { return done(null, results); }
list.forEach(function(file) {
fs.stat(dir + '/' + file, function(err, stat) {
if (stat && stat.isDirectory()) {
tree(dir + '/' + file, function(err, res) {
results.children.push(res);
if (!--pending){ done(null, results); }
});
} else {
results.children.push({"path": dir + "/" + file});
if (!--pending) { done(null, results); }
}
});
});
});
};
module.exports = tree;
When I run:
tree(someDirectoryPath, function(err, results) {
if (err) throw err;
console.log(results);
});
I get a sample result, such as this one:
{ path: '/Users/UserName/Desktop/1',
children:
[ { path: '/Users/UserName/Desktop/1/file1' },
{ path: '/Users/UserName/Desktop/1/file2' },
{ path: '/Users/UserName/Desktop/1/file3' },
{ path: '/Users/UserName/Desktop/1/subdir1',
children: [Object] } ] }
I am also able to hash a single file in a specific location, by using the fs' module ReadStream method. The snippet for that is below:
/**
* Checking File Integrity
*/
var fs = require('fs'),
args = process.argv.splice('2'),
path = require('path'),
traverse = require('/Users/UserName/Desktop/tree.js'),
crypto = require('crypto');
//var algorithm = ['md5', 'sha1', 'sha256', 'sha512'];
var algorithm = 'sha512';
var hashTable = new Array();
var hash = crypto.createHash(algorithm);
var fileStream = fs.ReadStream(args[0]);
fileStream.on('data', function(data) {
hash.update(data);
fileStream.on('end', function() {
var digest = hash.digest('hex');
console.log('algorithm used: ', algorithm);
console.log('hash for the file: ',digest);
hashTable[args[0]] = digest;
console.log(hashTable);
});
});
Where args[0] stores the location of the file to be read by the ReadStream. After hashing of a specific file, the console log returned is as follows:
node fileIntegrityChecker.js hello.txt
algorithm used: sha512
hash for the file: 9b71d224bd62f3785d96d46ad3ea3d73319bfbc2890caadae2dff72519673ca72323c3d99ba5c11d7c7acc6e14b8c5da0c4663475c2e5c3adef46f73bcdec043
the hashtable is: [ 'hello.txt': '9b71d224bd62f3785d96d46ad3ea3d73319bfbc2890caadae2dff72519673ca72323c3d99ba5c11d7c7acc6e14b8c5da0c4663475c2e5c3adef46f73bcdec043' ]
My problem is that I tried to somehow integrate the tree module functionality in the hash related js file. My idea is that the program will capture the user's input, as a path to a directory and that input will be processed to traverse the whole subdirectories and files of a folder. Also, the fileStream.on method should be included in the callback from the tree module. However I am not fully initiated in the callback mechanism and I hope to get some insight from you.
This is what I've tried
/**
* Checking File Integrity
*/
var fs = require('fs'),
args = process.argv.splice('2'),
path = require('path'),
tree = require('/Users/UserName/Desktop/tree.js'),
crypto = require('crypto');
//var algorithm = ['md5', 'sha1', 'sha256', 'sha512'];
var algorithm = 'sha512';
var hashTable = new Array();
var pathString = 'Users/UserName/Desktop/1';
tree(pathString, function(err, results) {
if (err) throw err;
var hash = crypto.createHash(algorithm);
var fileStream = fs.ReadStream(results.children[1]['path']);
fileStream.on('data', function(data) {
hash.update(data);
fileStream.on('end', function() {
var digest = hash.digest('hex');
console.log('algorithm used: ', algorithm);
console.log('hash for the file: ',digest);
hashTable[results.children[1]['path']] = digest;
console.log('The hashtable is: ', hashTable);
});
});
});
Now, I've made some progress in the sense that I don't receive an error. Basically I achieved my scope. However I am able to extract only one result explicitly. For some reason, I cannot think how to iteratively (for instance) get each child of the result JSON object. If that is solved, I think the problem will be completely solved.
Can you please show me a way how to successfully combine the module and the js file to recursively traverse all the contents of a directory and create a hash for every file in it. I need this to ultimately check if some changes in the files occurred, based on their hashes. Thank you!
The simplest thing to do would be to generate the hash while you are already walking the directory tree. This involves updating the tree.js file as follows:
} else {
var fname = dir + "/" + file};
// put your hash generation here
generateHash(fname, function (e, hash) {
if (e) done(e);
results.children.push({"path": fname, "hash" : hash);
if (!--pending) {
done(null, results);
}
});
}
Then put your hash generation code in a function like this:
function generateHash (filename, callback) {
var algorithm = 'sha512';
var hashTable = new Array();
var hash = crypto.createHash(algorithm);
var fileStream = fs.ReadStream(filename);
fileStream.on('data', function(data) {
hash.update(data);
});
fileStream.on('end', function() {
var digest = hash.digest('hex');
callback(null, digest);
});
}
Using vinyl-fs, you could glob a directory. This will probably cut down on your code quite a bit.
Then you would pipe the files through a handler that would generate your hash.
Here's an example:
fs.src(['./**/*.js'])
.pipe(hasher)
.pipe(concater)
.dest('output.file')
import crypto from 'crypto';
import fs from 'fs';
import path from 'path';
// walk dir recursively
function* walkSync(dir: string) {
const files = fs.readdirSync(dir, { withFileTypes: true });
for (const file of files) {
if (file.isDirectory()) {
yield* walkSync(path.join(dir, file.name));
} else {
yield path.join(dir, file.name);
}
}
}
// concat all files hashes and hash the hashes
function dirHash(dir: string) {
const hexes = [];
for (const file of walkSync(dir)) {
const buffer = fs.readFileSync(file);
const hash = crypto.createHash('sha256');
hash.update(buffer);
const hex = hash.digest('hex');
hexes.push(hex);
}
return crypto.createHash('sha256').update(hexes.join('')).digest('hex');
}
console.log(dirHash('./src'));

node.js glob pattern for excluding multiple files

I'm using the npm module node-glob.
This snippet returns recursively all files in the current working directory.
var glob = require('glob');
glob('**/*', function(err, files) {
console.log(files);
});
sample output:
[ 'index.html', 'js', 'js/app.js', 'js/lib.js' ]
I want to exclude index.html and js/lib.js.
I tried to exclude these files with negative pattern '!' but without luck.
Is there a way to achieve this only by using a pattern?
I suppose it's not actual anymore but I got stuck with the same question and found an answer.
This can be done using only glob module.
We need to use options as a second parameter to glob function
glob('pattern', {options}, cb)
There is an options.ignore pattern for your needs.
var glob = require('glob');
glob("**/*",{"ignore":['index.html', 'js', 'js/app.js', 'js/lib.js']}, function (err, files) {
console.log(files);
})
Check out globby, which is pretty much glob with support for multiple patterns and a Promise API:
const globby = require('globby');
globby(['**/*', '!index.html', '!js/lib.js']).then(paths => {
console.log(paths);
});
You can use node-globule for that:
var globule = require('globule');
var result = globule.find(['**/*', '!index.html', '!js/lib.js']);
console.log(result);
Or without an external dependency:
/**
Walk directory,
list tree without regex excludes
*/
var fs = require('fs');
var path = require('path');
var walk = function (dir, regExcludes, done) {
var results = [];
fs.readdir(dir, function (err, list) {
if (err) return done(err);
var pending = list.length;
if (!pending) return done(null, results);
list.forEach(function (file) {
file = path.join(dir, file);
var excluded = false;
var len = regExcludes.length;
var i = 0;
for (; i < len; i++) {
if (file.match(regExcludes[i])) {
excluded = true;
}
}
// Add if not in regExcludes
if(excluded === false) {
results.push(file);
// Check if its a folder
fs.stat(file, function (err, stat) {
if (stat && stat.isDirectory()) {
// If it is, walk again
walk(file, regExcludes, function (err, res) {
results = results.concat(res);
if (!--pending) { done(null, results); }
});
} else {
if (!--pending) { done(null, results); }
}
});
} else {
if (!--pending) { done(null, results); }
}
});
});
};
var regExcludes = [/index\.html/, /js\/lib\.js/, /node_modules/];
walk('.', regExcludes, function(err, results) {
if (err) {
throw err;
}
console.log(results);
});
Here is what I wrote for my project:
var glob = require('glob');
var minimatch = require("minimatch");
function globArray(patterns, options) {
var i, list = [];
if (!Array.isArray(patterns)) {
patterns = [patterns];
}
patterns.forEach(pattern => {
if (pattern[0] === "!") {
i = list.length-1;
while( i > -1) {
if (!minimatch(list[i], pattern)) {
list.splice(i,1);
}
i--;
}
}
else {
var newList = glob.sync(pattern, options);
newList.forEach(item => {
if (list.indexOf(item)===-1) {
list.push(item);
}
});
}
});
return list;
}
And call it like this (Using an array):
var paths = globArray(["**/*.css","**/*.js","!**/one.js"], {cwd: srcPath});
or this (Using a single string):
var paths = globArray("**/*.js", {cwd: srcPath});
A samples example with gulp:
gulp.task('task_scripts', function(done){
glob("./assets/**/*.js", function (er, files) {
gulp.src(files)
.pipe(gulp.dest('./public/js/'))
.on('end', done);
});
});

Resources