JSON stringify sporadically replaces separator with closing square bracket - node.js

I am running a node.js script that culls through a directory and extracts any regex matches it finds, then pushes them onto an array. Once the query has run, I am stringifying the array into a standalone JSON file for safekeeping until it's needed.
I'm running into sporadic errors where the array in the JSON file is malformed and causes an error when I try to parse it out later on. It is sporadic, and I cannot determine a pattern, but every time (so far) the character that is causing the error is a closing square bracket - just like the one between the 2nd and 3rd elements below:
["first string","second string"]"third string"]
It's almost as if my array.push() is just adding another element onto the end of the existing array, and "forgetting" to change the closing square bracket to a comma...
I have searched, but cannot find references to a known error that would cause this. Has anyone seen this before?
Environment:
Node.js v19.0.0 | Ubuntu 22.04.1 LTS | Intel i7 processor
Here's my code for generating the JSON files:
const fs = require('fs');
const path = require('path');
const devPath = '/home/user/directory';
const activeReminder = [];
function buildArray() {
let getFiles = fs.readdirSync(devPath);
getFiles.forEach(file => {
splitMD(file);
});
}
function splitMD(file) {
if (path.extname(file) == ".md" & file != {}) {
let data = fs.readFileSync(file, 'utf8');
const line = data.split(/\r?\n/);
const match = line.find(element => {
extractions(element, file);
});
}
};
function extractions(element, file) {
if (element.includes("- [ ] reminder: ")) { //the - [ ] is markdown for Obsidian
const datedReminder = "- [ ] [["+file+"]]" + element.split('- [ ] reminder:')[1]
activeReminder.push(datedReminder);
buildReminderJSON();
}
}
function buildCcioJSON() {
const reminderJSON = JSON.stringify(activeReminder);
fs.writeFile("reminder.json", reminderJSON, 'utf8', function (err) {
if (err) {
console.log("Error while writing CCIO JSON:");
return console.log(err);
}
})
}
buildArray();
module.exports = { buildArray };

I don't know what's causing the anomaly described above, but I created a script to correct it as a workaround until I can figure out a root cause. I'm running it as a crontab a few minutes after the crontab that creates the original array.
const fs = require('fs')
const path = require('path')
const devPath = '/home/user/directory'
function cleanup() {
let getFiles = fs.readdirSync(devPath);
getFiles.forEach(file => {
splitJSON(file);
});
}
function splitJSON(file) {
if (path.extname(file) == ".json" & file != {}) {
let data = fs.readFileSync(file, 'utf8');
const line = data.split(/\r?\n/);
const match = line.find(element => {
jsonCheck(element, file);
});
}
};
function jsonCheck(element, file) {
if (element.includes("\"]\"")) {
fs.readFile(file, 'utf8', function (err,data) {
if (err) {
return console.log(err);
}
var result = data.replace(/\"\]\"/g, '","');
fs.writeFile(file, result, 'utf8', function (err) {
if (err) return console.log(err);
});
});
}
}
cleanup();
module.exports = { cleanup };

Related

Is there a way in nodejs to get all the files in a directory including files that are nested in folders [duplicate]

I have a little problem with my function. I would like to get all files in many directories. Currently, I can retrieve the files in the file passed in parameters. I would like to retrieve the html files of each folder in the folder passed as a parameter. I will explain if I put in parameter "test" I retrieve the files in "test" but I would like to retrieve "test / 1 / *. Html", "test / 2 / . /.html ":
var srcpath2 = path.join('.', 'diapo', result);
function getDirectories(srcpath2) {
return fs.readdirSync(srcpath2).filter(function (file) {
return fs.statSync(path.join(srcpath2, file)).isDirectory();
});
}
The result :
[1,2,3]
thanks !
It looks like the glob npm package would help you. Here is an example of how to use it:
File hierarchy:
test
├── one.html
└── test-nested
└── two.html
JS code:
const glob = require("glob");
var getDirectories = function (src, callback) {
glob(src + '/**/*', callback);
};
getDirectories('test', function (err, res) {
if (err) {
console.log('Error', err);
} else {
console.log(res);
}
});
which displays:
[ 'test/one.html',
'test/test-nested',
'test/test-nested/two.html' ]
I've seen many very long answers, and it's kinda a waste of memory space. Some also use packages like glob, but if you don't want to depend on any package, here's my solution.
const Path = require("path");
const FS = require("fs");
let Files = [];
function ThroughDirectory(Directory) {
FS.readdirSync(Directory).forEach(File => {
const Absolute = Path.join(Directory, File);
if (FS.statSync(Absolute).isDirectory()) return ThroughDirectory(Absolute);
else return Files.push(Absolute);
});
}
ThroughDirectory("./input/directory/");
It's pretty self-explanatory. There's an input directory, and it iterates through that. If one of the items is also a directory, go through that and so on. If it's a file, add the absolute path to the array.
Hope this helped :]
Using ES6 yield
const fs = require('fs');
const path = require('path');
function *walkSync(dir) {
const files = fs.readdirSync(dir, { withFileTypes: true });
for (const file of files) {
if (file.isDirectory()) {
yield* walkSync(path.join(dir, file.name));
} else {
yield path.join(dir, file.name);
}
}
}
for (const filePath of walkSync(__dirname)) {
console.log(filePath);
}
I really liked Smally's Solution but didn't like the Syntax.
Same solution but slightly easier to read:
const fs = require("fs");
const path = require("path");
let files = [];
const getFilesRecursively = (directory) => {
const filesInDirectory = fs.readdirSync(directory);
for (const file of filesInDirectory) {
const absolute = path.join(directory, file);
if (fs.statSync(absolute).isDirectory()) {
getFilesRecursively(absolute);
} else {
files.push(absolute);
}
}
};
Here's mine. Like all good answers it's hard to understand:
const isDirectory = path => statSync(path).isDirectory();
const getDirectories = path =>
readdirSync(path).map(name => join(path, name)).filter(isDirectory);
const isFile = path => statSync(path).isFile();
const getFiles = path =>
readdirSync(path).map(name => join(path, name)).filter(isFile);
const getFilesRecursively = (path) => {
let dirs = getDirectories(path);
let files = dirs
.map(dir => getFilesRecursively(dir)) // go through each directory
.reduce((a,b) => a.concat(b), []); // map returns a 2d array (array of file arrays) so flatten
return files.concat(getFiles(path));
};
With modern JavaScript (NodeJs 10) you can use async generator function and loop through them using for-await...of
// ES modules syntax that is included by default in NodeJS 14.
// For earlier versions, use `--experimental-modules` flag
import fs from "fs/promises"
// or, without ES modules, use this:
// const fs = require('fs').promises
async function run() {
for await (const file of getFiles()) {
console.log(file.path)
}
}
async function* getFiles(path = `./`) {
const entries = await fs.readdir(path, { withFileTypes: true })
for (let file of entries) {
if (file.isDirectory()) {
yield* getFiles(`${path}${file.name}/`)
} else {
yield { ...file, path: path + file.name }
}
}
}
run()
Packed into library:
https://www.npmjs.com/package/node-recursive-directory
https://github.com/vvmspace/node-recursive-directory
List of files:
const getFiles = require('node-recursive-directory');
(async () => {
const files = await getFiles('/home');
console.log(files);
})()
List of files with parsed data:
const getFiles = require('node-resursive-directory');
(async () => {
const files = await getFiles('/home', true); // add true
console.log(files);
})()
You will get something like that:
[
...,
{
fullpath: '/home/vvm/Downloads/images/Some/Some Image.jpg',
filepath: '/home/vvm/Downloads/images/Some/',
filename: 'Some Image.jpg',
dirname: 'Some'
},
]
You can also write your own code like below to traverse the directory as shown below :
var fs = require('fs');
function traverseDirectory(dirname, callback) {
var directory = [];
fs.readdir(dirname, function(err, list) {
dirname = fs.realpathSync(dirname);
if (err) {
return callback(err);
}
var listlength = list.length;
list.forEach(function(file) {
file = dirname + '\\' + file;
fs.stat(file, function(err, stat) {
directory.push(file);
if (stat && stat.isDirectory()) {
traverseDirectory(file, function(err, parsed) {
directory = directory.concat(parsed);
if (!--listlength) {
callback(null, directory);
}
});
} else {
if (!--listlength) {
callback(null, directory);
}
}
});
});
});
}
traverseDirectory(__dirname, function(err, result) {
if (err) {
console.log(err);
}
console.log(result);
});
You can check more information about it here : http://www.codingdefined.com/2014/09/how-to-navigate-through-directories-in.html
I needed to so something similar, in an Electron app: get all subfolders in a given base folder, using TypeScript, and came up with this:
import { readdirSync, statSync, existsSync } from "fs";
import * as path from "path";
// recursive synchronous "walk" through a folder structure, with the given base path
getAllSubFolders = (baseFolder, folderList = []) => {
let folders:string[] = readdirSync(baseFolder).filter(file => statSync(path.join(baseFolder, file)).isDirectory());
folders.forEach(folder => {
folderList.push(path.join(baseFolder,folder));
this.getAllSubFolders(path.join(baseFolder,folder), folderList);
});
}
const fs = require('fs');
const path = require('path');
var filesCollection = [];
const directoriesToSkip = ['bower_components', 'node_modules', 'www', 'platforms'];
function readDirectorySynchronously(directory) {
var currentDirectorypath = path.join(__dirname + directory);
var currentDirectory = fs.readdirSync(currentDirectorypath, 'utf8');
currentDirectory.forEach(file => {
var fileShouldBeSkipped = directoriesToSkip.indexOf(file) > -1;
var pathOfCurrentItem = path.join(__dirname + directory + '/' + file);
if (!fileShouldBeSkipped && fs.statSync(pathOfCurrentItem).isFile()) {
filesCollection.push(pathOfCurrentItem);
}
else if (!fileShouldBeSkipped) {
var directorypath = path.join(directory + '\\' + file);
readDirectorySynchronously(directorypath);
}
});
}
readDirectorySynchronously('');
This will fill filesCollection with all the files in the directory and its subdirectories (it's recursive). You have the option to skip some directory names in the directoriesToSkip array.
Speaking of npm packages - another short option is to use fs-readdir-recursive:
const read = require("fs-readdir-recursive");
const foundFiles = read("test");
console.log(foundFiles);
Output:
[ 'one.html', 'test-nested/some_text.txt', 'test-nested/two.html' ]
If you're interested only in files with specific extension (like .html mentioned in the question) you can filter them using .endsWith():
const filteredFiles = read("test").filter(item => item.endsWith(".html"));
The accepted answer needs to install a package.
If you want a native option that is ES6:
import { readdirSync } from 'fs'
import { join } from 'path'
function walk(dir) {
return readdirSync(dir, { withFileTypes: true }).flatMap((file) => file.isDirectory() ? walk(join(dir, file.name)) : join(dir, file.name))
}
This works for me.
Read root directory with readdirSync
Then map over but flatten as we go
if it's a directory, go recursive; else return the filename
If you rather work synchronously with glob, use the glob.sync() function as mentioned in their documentation. Here's the equivalent example provided by #Paul Mougel but written synchronously:
const glob = require("glob");
var getDirectories = function (src) {
return glob.sync(src + '/**/*');
};
var rest = getDirectories('test');
console.log(res);
A solution with Promises based on globby:
import { globby } from 'globby';
(async () => {
const path = '/path/to/dir';
const files = await globby([`${path}/**/*`]);
console.log(files);
// [
// '/path/to/dir/file1.txt',
// '/path/to/dir/subdir/file2.txt',
// ...
// ]
})()
Synchrone method with two option, simple and efficacy.
const path = require('path');const fs = require('fs');
function toHierarchie_files(pathDir, output_normalize=false, removeEmpty=true)
{
var result = {}, enqueue = [pathDir];
//normalize slash separator if output_normalize is true or just return val
output_normalize = output_normalize == false?val => {return val}:val => {return path.normalize(val)};
//allows absolute or relative path with extended resolution. Returns path normalize absolute to work with or 'none' string.
const path_exist = (path_test) => {var tmpTab = fs.existsSync(path.normalize(path.resolve(path_test))) == true?[path.normalize(path.resolve(path_test))]:['', '../', '../../'].map(val => path.normalize(path.resolve(__dirname, val+path_test))).filter((val, index) => fs.existsSync(path.normalize(path.resolve(__dirname, val+path_test))) == true);return tmpTab.length > 0?tmpTab[0]:'none'};
//Check if file exist and return her type or 'none' string
const getType = (path_test) => {path_test = path_exist(path_test);return path_test == 'none'?'none':fs.lstatSync(path_test).isDirectory() == true?'dir':fs.lstatSync(path_test).isFile() == true?'file':'none';};
function recursive()
{
//init new entrie
var parentDir = enqueue.pop();result[parentDir]=[];
//read dir
fs.readdirSync(path_exist(parentDir)).forEach((file, index) =>{
switch(getType(parentDir+'/'+file))
{
//if detect dir push in queue
case 'dir': enqueue.push(output_normalize(parentDir+'/'+file)); break;
//if file, add in entrie
case 'file': result[parentDir].push(file); break;
//else done
default: break;
};
});
//if optional arg remove empty is true, delete entries if not contains files
if(result[parentDir].length == 0 && removeEmpty == true){Reflect.deleteProperty(result, parentDir);}
//if queue is not empty continue processing
if(enqueue.length > 0){recursive();}
};
//if dir renseign exist, go recusive
if(getType(pathDir) == 'dir'){recursive();}
return result;
};
Result:
{
"public/assets": [
"favicon.ico"
],
"public/assets/js": [
"dede.js",
"test.js"
],
"public/assets/js/css/secure": [
"config.json",
"index.js"
],
"public/assets/css": [
"style.css"
]
}
You can use loop through all the files and directories of the root folder, if it's a directory, then get inside it and repeat the process.
Consider the code below:
const fs = require('fs');
const path = require('path');
const target = './'; // choose the directory to target
var result = []
var filePaths = []
var tempFolder = []
const targetPath = fs.readdirSync(target);
function hit(mainPath = targetPath) {
mainPath.forEach((file) => {
let check = fs.statSync(file);
if (!check.isDirectory()) {
filePaths.push(file)
}
else {
if (file[0] != '.') {
tempFolder.push(file)
}
}
});
// get files from folder
if (tempFolder.length > 0) {
tempFolder.forEach((dir) => {
getFiles(dir)
})
}
// filePaths contains path to every file
}
function getFiles(dir) {
var paths = fs.readdirSync(dir);
var files = [];
paths.forEach(function (file) {
var fullPath = dir + '/' + file;
files.push(fullPath);
});
files.forEach((tempFile) => {
let check = fs.statSync(tempFile);
if (check.isDirectory()) {
getFiles(tempFile)
} else {
filePaths.push(tempFile)
}
})
}
hit(); // main function
Although not perfect in some scenarios, it must be helpful in many.
const getAllFilePath = (path: string) => {
const addData = (_paths: string[]) => {
const newFoldersToScrape: string[] = [];
_paths.forEach(_path => {
fs.readdirSync(_path).forEach((file: string) => {
if (file.indexOf(".") === -1) {
newFoldersToScrape.push(`${_path}/${file}`);
} else {
filePaths.push(`${_path}/${file}`);
}
});
});
foldersToScrape = newFoldersToScrape;
};
const baseDirPath = `<YOUR BASE PATH HERE>/${path}`;
let foldersToScrape: string[] = [];
const filePaths: string[] = [];
addData([baseDirPath]);
while (foldersToScrape.length !== 0) {
addData(foldersToScrape);
}
return filePaths;
};
This is how I did it, I think it is similar to yet simpler than most of the other answers here.
const fs = require('fs')
let files = []
const getFiles = (path) => {
if (fs.lstatSync(path).isDirectory()) { // is this a folder?
fs.readdirSync(path).forEach(f => { // for everything in this folder
getFiles(path + '/' + f) // process it recursively
})
} else if (path.endsWith(".ts")) { // is this a file we are searching for?
files.push(path) // record it
}
}
getFiles("src")
It fills the "files" array with every .ts file under the "src/" directory.
Slightly modified version of #Stephen's response (https://stackoverflow.com/a/66083078/4421370) above that returns the files' path relative to the directory you are searching. Or any arbitrary base path you supply to the function call in-place of the default base. If you want the full path just call it as walkSync(dir, dir).
Search Path is: c:\tmp,
File path is c:\tmp\test\myfile.txt,
Result is test\myfile.txt
Hopefully helpful to some.
const fs = require('fs');
const path = require('path');
function *walkSync(dir, base="") {
const files = fs.readdirSync(dir, { withFileTypes: true })
for (const file of files) {
if (file.isDirectory()) {
yield* walkSync(path.join(dir, file.name), path.join(base, file.name));
} else {
yield path.join(base, file.name);
}
}
}
for (const filePath of walkSync(__dirname)) {
console.log(filePath);
}
Here is a compact pure function that returns all the paths (relatives) in the directory.
import path from 'path'
const getFilesPathsRecursively = (directory: string, origin?: string): string[] =>
fs.readdirSync(directory).reduce((files, file) => {
const absolute = path.join(directory, file)
return [
...files,
...(fs.statSync(absolute).isDirectory()
? getFilesPathsRecursively(absolute, origin || directory)
: [path.relative(origin || directory, absolute)]),
]
}, [])
The solution is written in TypeScript.
modern solution with async/await
No external dependencies.
Asynchronous function (non-blocking like other solutions with readdirSync and statSync)
It is extremely fast because multiple processes work in parallel (it is not waiting for a response from every file in the list).
It has also some naive error handling (if something happens with one file or folder it will not blow whole process)
import path from "path";
import fs from "fs/promises"
export default async function readDirectory(directory: string): Promise<string[]> {
const files = await fs.readdir(directory)
const filesPromises = files.map(async (file) => {
try {
const absolutePath = path.join(directory, file);
const fileStat = await fs.stat(absolutePath)
if (fileStat.isDirectory()) {
return await readDirectory(absolutePath);
} else {
return absolutePath;
}
} catch (err) {
// error handling
return [];
}
});
const filesWithArrays = await Promise.all(filesPromises)
const flatArray = filesWithArrays.reduce<string[]>((acc, fileOrArray) => acc.concat(fileOrArray), []);
return flatArray;
}
usage (if this is a separate file please remember to import)
const results = await readDirectory('some/path');
I did mine with typescript works well fairly easy to understand
import * as fs from 'fs';
import * as path from 'path';
export const getAllSubFolders = (
baseFolder: string,
folderList: string[] = []
) => {
const folders: string[] = fs
.readdirSync(baseFolder)
.filter(file => fs.statSync(path.join(baseFolder, file)).isDirectory());
folders.forEach(folder => {
folderList.push(path.join(baseFolder, folder));
getAllSubFolders(path.join(baseFolder, folder), folderList);
});
return folderList;
};
export const getFilesInFolder = (rootPath: string) => {
return fs
.readdirSync(rootPath)
.filter(
filePath => !fs.statSync(path.join(rootPath, filePath)).isDirectory()
)
.map(filePath => path.normalize(path.join(rootPath, filePath)));
};
export const getFilesRecursively = (rootPath: string) => {
const subFolders: string[] = getAllSubFolders(rootPath);
const allFiles: string[][] = subFolders.map(folder =>
getFilesInFolder(folder)
);
return [].concat.apply([], allFiles);
};

NodeJS: How to read and modify buffer data, before parsing it from file?

In NodeJS, I am having a log file, where my logs are in the format:
{"time":"2021-09-23T11:36:18.076Z","type":"info","message":"some message","data":{"id":123}},
{"time":"2021-09-23T11:36:18.076Z","type":"info","message":"some message","data":{"id":123}},
{"time":"2021-09-23T11:36:18.076Z","type":"info","message":"some message","data":{"id":123}},
These are basically objects, separated with comma. What I need to do is to read the content of this file and to convert the logs into an array of objects (that I can manipulate later).
I am trying with something like:
let fileLogs = "./data/myfile.log";
fs.readFile(fileLogs, (err, fileLogsContent) => {
if (err) {
console.log("cannot read log file");
return;
}
//I know I need to manipulate the fileLogsContent here, before doing JSON.parse
let logsContent = { ...JSON.parse(fileLogsContent) };
//do something here with the array of objects 'logsContent'
});
Since the content in the log file is not in a format that can be parsed, the above JSON.parse fails. My idea is to bring the logfile in the following format:
[
{"time":"2021-09-23T11:36:18.076Z","type":"info","message":"some message","data":{"id":123}},
{"time":"2021-09-23T11:36:18.076Z","type":"info","message":"some message","data":{"id":123}},
{"time":"2021-09-23T11:36:18.076Z","type":"info","message":"some message","data":{"id":123}}
]
Which means that on the fly I would need to prepend [ as a first character and to replace the last , with ]. I am not aware how can I do that, since the fileLogsContent is actually a buffer. So how can I read the content and do the manipulations I mentioned, in order to be able to parse it later and to bring it into array of objects format?
You can simply wrap each line in a string and then call JSON.parse on it after removing the trailing comma. Here's an example (note that it still needs error-handling ofc):
const fs = require('fs');
const readline = require('readline');
const readInterface = readline.createInterface({
input: fs.createReadStream('./input.txt'),
output: undefined,
console: false
});
(async () => {
const resultArray = await new Promise((resolve, reject) => {
const chunks = [];
readInterface.on('line', (line) => {
line = line.substr(0, line.lastIndexOf(','))
chunks.push(JSON.parse(`${line}`));
})
readInterface.on('close', () => {
resolve(chunks);
})
});
console.log(resultArray);
})();
Here is the working solution that I came up with, following the directions from the #eol's answer.
const { once } = require('events');
const { createReadStream } = require('fs');
const { createInterface } = require('readline');
(async function processLineByLine() {
try {
const rl = createInterface({
input: createReadStream('./data/myfile.log'),
crlfDelay: Infinity
});
const chunks = [];
rl.on('line', (line) => {
// Process the line.
chunks.push(JSON.parse(`${line.substr(0, line.lastIndexOf(','))}`));
});
await once(rl, 'close');
console.log('File processed. Content = ', chunks);
} catch (err) {
console.log("cannot read log file, err = ", err);
}
})();

Get all files recursively in directories NodejS

I have a little problem with my function. I would like to get all files in many directories. Currently, I can retrieve the files in the file passed in parameters. I would like to retrieve the html files of each folder in the folder passed as a parameter. I will explain if I put in parameter "test" I retrieve the files in "test" but I would like to retrieve "test / 1 / *. Html", "test / 2 / . /.html ":
var srcpath2 = path.join('.', 'diapo', result);
function getDirectories(srcpath2) {
return fs.readdirSync(srcpath2).filter(function (file) {
return fs.statSync(path.join(srcpath2, file)).isDirectory();
});
}
The result :
[1,2,3]
thanks !
It looks like the glob npm package would help you. Here is an example of how to use it:
File hierarchy:
test
├── one.html
└── test-nested
└── two.html
JS code:
const glob = require("glob");
var getDirectories = function (src, callback) {
glob(src + '/**/*', callback);
};
getDirectories('test', function (err, res) {
if (err) {
console.log('Error', err);
} else {
console.log(res);
}
});
which displays:
[ 'test/one.html',
'test/test-nested',
'test/test-nested/two.html' ]
I've seen many very long answers, and it's kinda a waste of memory space. Some also use packages like glob, but if you don't want to depend on any package, here's my solution.
const Path = require("path");
const FS = require("fs");
let Files = [];
function ThroughDirectory(Directory) {
FS.readdirSync(Directory).forEach(File => {
const Absolute = Path.join(Directory, File);
if (FS.statSync(Absolute).isDirectory()) return ThroughDirectory(Absolute);
else return Files.push(Absolute);
});
}
ThroughDirectory("./input/directory/");
It's pretty self-explanatory. There's an input directory, and it iterates through that. If one of the items is also a directory, go through that and so on. If it's a file, add the absolute path to the array.
Hope this helped :]
Using ES6 yield
const fs = require('fs');
const path = require('path');
function *walkSync(dir) {
const files = fs.readdirSync(dir, { withFileTypes: true });
for (const file of files) {
if (file.isDirectory()) {
yield* walkSync(path.join(dir, file.name));
} else {
yield path.join(dir, file.name);
}
}
}
for (const filePath of walkSync(__dirname)) {
console.log(filePath);
}
I really liked Smally's Solution but didn't like the Syntax.
Same solution but slightly easier to read:
const fs = require("fs");
const path = require("path");
let files = [];
const getFilesRecursively = (directory) => {
const filesInDirectory = fs.readdirSync(directory);
for (const file of filesInDirectory) {
const absolute = path.join(directory, file);
if (fs.statSync(absolute).isDirectory()) {
getFilesRecursively(absolute);
} else {
files.push(absolute);
}
}
};
Here's mine. Like all good answers it's hard to understand:
const isDirectory = path => statSync(path).isDirectory();
const getDirectories = path =>
readdirSync(path).map(name => join(path, name)).filter(isDirectory);
const isFile = path => statSync(path).isFile();
const getFiles = path =>
readdirSync(path).map(name => join(path, name)).filter(isFile);
const getFilesRecursively = (path) => {
let dirs = getDirectories(path);
let files = dirs
.map(dir => getFilesRecursively(dir)) // go through each directory
.reduce((a,b) => a.concat(b), []); // map returns a 2d array (array of file arrays) so flatten
return files.concat(getFiles(path));
};
With modern JavaScript (NodeJs 10) you can use async generator function and loop through them using for-await...of
// ES modules syntax that is included by default in NodeJS 14.
// For earlier versions, use `--experimental-modules` flag
import fs from "fs/promises"
// or, without ES modules, use this:
// const fs = require('fs').promises
async function run() {
for await (const file of getFiles()) {
console.log(file.path)
}
}
async function* getFiles(path = `./`) {
const entries = await fs.readdir(path, { withFileTypes: true })
for (let file of entries) {
if (file.isDirectory()) {
yield* getFiles(`${path}${file.name}/`)
} else {
yield { ...file, path: path + file.name }
}
}
}
run()
Packed into library:
https://www.npmjs.com/package/node-recursive-directory
https://github.com/vvmspace/node-recursive-directory
List of files:
const getFiles = require('node-recursive-directory');
(async () => {
const files = await getFiles('/home');
console.log(files);
})()
List of files with parsed data:
const getFiles = require('node-resursive-directory');
(async () => {
const files = await getFiles('/home', true); // add true
console.log(files);
})()
You will get something like that:
[
...,
{
fullpath: '/home/vvm/Downloads/images/Some/Some Image.jpg',
filepath: '/home/vvm/Downloads/images/Some/',
filename: 'Some Image.jpg',
dirname: 'Some'
},
]
You can also write your own code like below to traverse the directory as shown below :
var fs = require('fs');
function traverseDirectory(dirname, callback) {
var directory = [];
fs.readdir(dirname, function(err, list) {
dirname = fs.realpathSync(dirname);
if (err) {
return callback(err);
}
var listlength = list.length;
list.forEach(function(file) {
file = dirname + '\\' + file;
fs.stat(file, function(err, stat) {
directory.push(file);
if (stat && stat.isDirectory()) {
traverseDirectory(file, function(err, parsed) {
directory = directory.concat(parsed);
if (!--listlength) {
callback(null, directory);
}
});
} else {
if (!--listlength) {
callback(null, directory);
}
}
});
});
});
}
traverseDirectory(__dirname, function(err, result) {
if (err) {
console.log(err);
}
console.log(result);
});
You can check more information about it here : http://www.codingdefined.com/2014/09/how-to-navigate-through-directories-in.html
I needed to so something similar, in an Electron app: get all subfolders in a given base folder, using TypeScript, and came up with this:
import { readdirSync, statSync, existsSync } from "fs";
import * as path from "path";
// recursive synchronous "walk" through a folder structure, with the given base path
getAllSubFolders = (baseFolder, folderList = []) => {
let folders:string[] = readdirSync(baseFolder).filter(file => statSync(path.join(baseFolder, file)).isDirectory());
folders.forEach(folder => {
folderList.push(path.join(baseFolder,folder));
this.getAllSubFolders(path.join(baseFolder,folder), folderList);
});
}
const fs = require('fs');
const path = require('path');
var filesCollection = [];
const directoriesToSkip = ['bower_components', 'node_modules', 'www', 'platforms'];
function readDirectorySynchronously(directory) {
var currentDirectorypath = path.join(__dirname + directory);
var currentDirectory = fs.readdirSync(currentDirectorypath, 'utf8');
currentDirectory.forEach(file => {
var fileShouldBeSkipped = directoriesToSkip.indexOf(file) > -1;
var pathOfCurrentItem = path.join(__dirname + directory + '/' + file);
if (!fileShouldBeSkipped && fs.statSync(pathOfCurrentItem).isFile()) {
filesCollection.push(pathOfCurrentItem);
}
else if (!fileShouldBeSkipped) {
var directorypath = path.join(directory + '\\' + file);
readDirectorySynchronously(directorypath);
}
});
}
readDirectorySynchronously('');
This will fill filesCollection with all the files in the directory and its subdirectories (it's recursive). You have the option to skip some directory names in the directoriesToSkip array.
Speaking of npm packages - another short option is to use fs-readdir-recursive:
const read = require("fs-readdir-recursive");
const foundFiles = read("test");
console.log(foundFiles);
Output:
[ 'one.html', 'test-nested/some_text.txt', 'test-nested/two.html' ]
If you're interested only in files with specific extension (like .html mentioned in the question) you can filter them using .endsWith():
const filteredFiles = read("test").filter(item => item.endsWith(".html"));
The accepted answer needs to install a package.
If you want a native option that is ES6:
import { readdirSync } from 'fs'
import { join } from 'path'
function walk(dir) {
return readdirSync(dir, { withFileTypes: true }).flatMap((file) => file.isDirectory() ? walk(join(dir, file.name)) : join(dir, file.name))
}
This works for me.
Read root directory with readdirSync
Then map over but flatten as we go
if it's a directory, go recursive; else return the filename
If you rather work synchronously with glob, use the glob.sync() function as mentioned in their documentation. Here's the equivalent example provided by #Paul Mougel but written synchronously:
const glob = require("glob");
var getDirectories = function (src) {
return glob.sync(src + '/**/*');
};
var rest = getDirectories('test');
console.log(res);
A solution with Promises based on globby:
import { globby } from 'globby';
(async () => {
const path = '/path/to/dir';
const files = await globby([`${path}/**/*`]);
console.log(files);
// [
// '/path/to/dir/file1.txt',
// '/path/to/dir/subdir/file2.txt',
// ...
// ]
})()
Synchrone method with two option, simple and efficacy.
const path = require('path');const fs = require('fs');
function toHierarchie_files(pathDir, output_normalize=false, removeEmpty=true)
{
var result = {}, enqueue = [pathDir];
//normalize slash separator if output_normalize is true or just return val
output_normalize = output_normalize == false?val => {return val}:val => {return path.normalize(val)};
//allows absolute or relative path with extended resolution. Returns path normalize absolute to work with or 'none' string.
const path_exist = (path_test) => {var tmpTab = fs.existsSync(path.normalize(path.resolve(path_test))) == true?[path.normalize(path.resolve(path_test))]:['', '../', '../../'].map(val => path.normalize(path.resolve(__dirname, val+path_test))).filter((val, index) => fs.existsSync(path.normalize(path.resolve(__dirname, val+path_test))) == true);return tmpTab.length > 0?tmpTab[0]:'none'};
//Check if file exist and return her type or 'none' string
const getType = (path_test) => {path_test = path_exist(path_test);return path_test == 'none'?'none':fs.lstatSync(path_test).isDirectory() == true?'dir':fs.lstatSync(path_test).isFile() == true?'file':'none';};
function recursive()
{
//init new entrie
var parentDir = enqueue.pop();result[parentDir]=[];
//read dir
fs.readdirSync(path_exist(parentDir)).forEach((file, index) =>{
switch(getType(parentDir+'/'+file))
{
//if detect dir push in queue
case 'dir': enqueue.push(output_normalize(parentDir+'/'+file)); break;
//if file, add in entrie
case 'file': result[parentDir].push(file); break;
//else done
default: break;
};
});
//if optional arg remove empty is true, delete entries if not contains files
if(result[parentDir].length == 0 && removeEmpty == true){Reflect.deleteProperty(result, parentDir);}
//if queue is not empty continue processing
if(enqueue.length > 0){recursive();}
};
//if dir renseign exist, go recusive
if(getType(pathDir) == 'dir'){recursive();}
return result;
};
Result:
{
"public/assets": [
"favicon.ico"
],
"public/assets/js": [
"dede.js",
"test.js"
],
"public/assets/js/css/secure": [
"config.json",
"index.js"
],
"public/assets/css": [
"style.css"
]
}
You can use loop through all the files and directories of the root folder, if it's a directory, then get inside it and repeat the process.
Consider the code below:
const fs = require('fs');
const path = require('path');
const target = './'; // choose the directory to target
var result = []
var filePaths = []
var tempFolder = []
const targetPath = fs.readdirSync(target);
function hit(mainPath = targetPath) {
mainPath.forEach((file) => {
let check = fs.statSync(file);
if (!check.isDirectory()) {
filePaths.push(file)
}
else {
if (file[0] != '.') {
tempFolder.push(file)
}
}
});
// get files from folder
if (tempFolder.length > 0) {
tempFolder.forEach((dir) => {
getFiles(dir)
})
}
// filePaths contains path to every file
}
function getFiles(dir) {
var paths = fs.readdirSync(dir);
var files = [];
paths.forEach(function (file) {
var fullPath = dir + '/' + file;
files.push(fullPath);
});
files.forEach((tempFile) => {
let check = fs.statSync(tempFile);
if (check.isDirectory()) {
getFiles(tempFile)
} else {
filePaths.push(tempFile)
}
})
}
hit(); // main function
Although not perfect in some scenarios, it must be helpful in many.
const getAllFilePath = (path: string) => {
const addData = (_paths: string[]) => {
const newFoldersToScrape: string[] = [];
_paths.forEach(_path => {
fs.readdirSync(_path).forEach((file: string) => {
if (file.indexOf(".") === -1) {
newFoldersToScrape.push(`${_path}/${file}`);
} else {
filePaths.push(`${_path}/${file}`);
}
});
});
foldersToScrape = newFoldersToScrape;
};
const baseDirPath = `<YOUR BASE PATH HERE>/${path}`;
let foldersToScrape: string[] = [];
const filePaths: string[] = [];
addData([baseDirPath]);
while (foldersToScrape.length !== 0) {
addData(foldersToScrape);
}
return filePaths;
};
This is how I did it, I think it is similar to yet simpler than most of the other answers here.
const fs = require('fs')
let files = []
const getFiles = (path) => {
if (fs.lstatSync(path).isDirectory()) { // is this a folder?
fs.readdirSync(path).forEach(f => { // for everything in this folder
getFiles(path + '/' + f) // process it recursively
})
} else if (path.endsWith(".ts")) { // is this a file we are searching for?
files.push(path) // record it
}
}
getFiles("src")
It fills the "files" array with every .ts file under the "src/" directory.
Slightly modified version of #Stephen's response (https://stackoverflow.com/a/66083078/4421370) above that returns the files' path relative to the directory you are searching. Or any arbitrary base path you supply to the function call in-place of the default base. If you want the full path just call it as walkSync(dir, dir).
Search Path is: c:\tmp,
File path is c:\tmp\test\myfile.txt,
Result is test\myfile.txt
Hopefully helpful to some.
const fs = require('fs');
const path = require('path');
function *walkSync(dir, base="") {
const files = fs.readdirSync(dir, { withFileTypes: true })
for (const file of files) {
if (file.isDirectory()) {
yield* walkSync(path.join(dir, file.name), path.join(base, file.name));
} else {
yield path.join(base, file.name);
}
}
}
for (const filePath of walkSync(__dirname)) {
console.log(filePath);
}
Here is a compact pure function that returns all the paths (relatives) in the directory.
import path from 'path'
const getFilesPathsRecursively = (directory: string, origin?: string): string[] =>
fs.readdirSync(directory).reduce((files, file) => {
const absolute = path.join(directory, file)
return [
...files,
...(fs.statSync(absolute).isDirectory()
? getFilesPathsRecursively(absolute, origin || directory)
: [path.relative(origin || directory, absolute)]),
]
}, [])
The solution is written in TypeScript.
modern solution with async/await
No external dependencies.
Asynchronous function (non-blocking like other solutions with readdirSync and statSync)
It is extremely fast because multiple processes work in parallel (it is not waiting for a response from every file in the list).
It has also some naive error handling (if something happens with one file or folder it will not blow whole process)
import path from "path";
import fs from "fs/promises"
export default async function readDirectory(directory: string): Promise<string[]> {
const files = await fs.readdir(directory)
const filesPromises = files.map(async (file) => {
try {
const absolutePath = path.join(directory, file);
const fileStat = await fs.stat(absolutePath)
if (fileStat.isDirectory()) {
return await readDirectory(absolutePath);
} else {
return absolutePath;
}
} catch (err) {
// error handling
return [];
}
});
const filesWithArrays = await Promise.all(filesPromises)
const flatArray = filesWithArrays.reduce<string[]>((acc, fileOrArray) => acc.concat(fileOrArray), []);
return flatArray;
}
usage (if this is a separate file please remember to import)
const results = await readDirectory('some/path');
I did mine with typescript works well fairly easy to understand
import * as fs from 'fs';
import * as path from 'path';
export const getAllSubFolders = (
baseFolder: string,
folderList: string[] = []
) => {
const folders: string[] = fs
.readdirSync(baseFolder)
.filter(file => fs.statSync(path.join(baseFolder, file)).isDirectory());
folders.forEach(folder => {
folderList.push(path.join(baseFolder, folder));
getAllSubFolders(path.join(baseFolder, folder), folderList);
});
return folderList;
};
export const getFilesInFolder = (rootPath: string) => {
return fs
.readdirSync(rootPath)
.filter(
filePath => !fs.statSync(path.join(rootPath, filePath)).isDirectory()
)
.map(filePath => path.normalize(path.join(rootPath, filePath)));
};
export const getFilesRecursively = (rootPath: string) => {
const subFolders: string[] = getAllSubFolders(rootPath);
const allFiles: string[][] = subFolders.map(folder =>
getFilesInFolder(folder)
);
return [].concat.apply([], allFiles);
};

Need help parsing id3 tags asyncrhonously in node.js using id3-parser

I have a folder full of mp3s that I would like to parse using id3-parser npm module so that I can return an rss feed. The id3-parser documentation says to use this structure:
id3.parse(filebuffer).then(function(tag){
console.log(tag.title);
});
The id3-parser module uses the promise-a-plus module. The problem with my code is that the tag gets read at the very end of the stack. Here is my test code:
var id3 = require('id3-parser');
var fs = require('fs');
fs.readdir("podcasts", function (err, files) {
files.forEach( function (file) {
var stats = fs.statSync("podcasts/" + file);
if (stats.isFile()) {
var fbuff = fs.readFileSync("podcasts/" + file);
id3.parse(fbuff).then( function (tag) {
console.log("***"+tag.title);
});
console.log("Parsed id3 tag " + file);
}
console.log("Done file");
});
console.log("Done readdir");
});
The output is:
Parsed id3 tag test.mp3
Done file
Done readdir
***Title tag
I cannot for the life of me figure out how to properly wait for the tag to be parsed. Parsing should happen before I put it into the rss feed (have no trouble using the rss module to create that xml feed -- code not shown). I have been reading about promises and I am missing something. I am sure I just need to completely refactor my code, but not sure how to read every file (currently using readdir and forEach) and add each file to the feed.
Try this:
var id3 = require('id3-parser')
var fs = require('fs')
var path = require('path')
function promiseFromCallback (fn) {
return new Promise(function (resolve, reject) {
return fn(function (err, result) {
return err ? reject(err) : resolve(result)
})
})
}
function dirID3 (directory) {
return promiseFromCallback(function (gotDir) {
return fs.readdir(directory, gotDir)
}).then(function (files) {
return Promise.all(files.map(function (file) {
var filePath = path.join(directory, file)
return promiseFromCallback(function (gotStat) {
return fs.stat(filePath, gotStat)
}).then(function (stat) {
return stat.isFile() ? promiseFromCallback(function (gotBuffer) {
return fs.readFile(filePath, gotBuffer)
}).then(function (fbuff) {
return id3.parse(fbuff)
}).then(function (tag) {
return { path: filePath, tag: tag }
}) : null
})
}))
}).then(function (id3Items) {
var result = {}
id3Items.forEach(function (id3Item) {
if (id3Item) result[id3Item.path] = id3Item.tag
})
return result
})
}
Then use it like this:
dirID3('podcasts').then(function (dirInfo) {
console.log('ID3 info for directory', dirInfo)
})

Node.js download multiple files

I need to download multiple files from urls. I have got list of them in the file. How should I do that? I already made it, but it's not working. I need to wain until last download is done before starting next wan. How can I do that?
You want to call the download function from the callback of the file before that. I threw together something, do not consider it pretty nor production ready, please ;-)
var http = require('http-get');
var files = { 'url' : 'local-location', 'repeat-this' : 'as often as you want' };
var MultiLoader = function (files, finalcb) {
var load_next_file = function (files) {
if (Object.keys(files) == 0) {
finalcb(null);
return;
}
var nexturl = Object.keys(files)[0];
var nextfnname = files[nexturl];
console.log('will load ' + nexturl);
http.get(nexturl, nextfnname, function (err, result) {
console.log('loaded ' + nexturl);
delete files[nexturl];
load_next_file(files);
});
};
load_next_file(JSON.parse(JSON.stringify(files)));
};
MultiLoader(files, function () { console.log('finalcb'); });
http-get is not a standard node module, you can install it via npm install http-get.
I think this is what you're looking for.
const fs = require('fs')
const https = require('https')
const downloadFolderPath = 'downloads'
const urls = [
'url 1',
'url 2'
]
const downloadFile = url => {
return new Promise((resolve, reject) => {
const splitUrl = url.split('/')
const filename = splitUrl[splitUrl.length - 1]
const outputPath = `${downloadFolderPath}/${filename}`
const file = fs.createWriteStream(outputPath)
https.get(url, res => {
if (res.statusCode === 200) {
res.pipe(file).on('close', resolve)
} else {
reject(res.statusCode)
}
})
})
}
if (!fs.existsSync(downloadFolderPath)) {
fs.mkdirSync(downloadFolderPath)
}
let downloadedFiles = 0
urls.forEach(async url => {
await downloadFile(url)
downloadedFiles++
console.log(`${downloadedFiles}/${urls.length} downloaded`)
})
You can read files using fs (var fs = require('fs');)in node js
fs.readFile('<filepath>', "utf8", function (err, data) {
if (err) throw err;
console.log(data);
});

Resources