How to get totalsize of files in directory? - node.js

How to get totalsize of files in directory ? Best way ?

Here is a simple solution using the core Nodejs fs libraries combined with the async library. It is fully asynchronous and should work just like the 'du' command.
var fs = require('fs'),
path = require('path'),
async = require('async');
function readSizeRecursive(item, cb) {
fs.lstat(item, function(err, stats) {
if (!err && stats.isDirectory()) {
var total = stats.size;
fs.readdir(item, function(err, list) {
if (err) return cb(err);
async.forEach(
list,
function(diritem, callback) {
readSizeRecursive(path.join(item, diritem), function(err, size) {
total += size;
callback(err);
});
},
function(err) {
cb(err, total);
}
);
});
}
else {
cb(err);
}
});
}

I tested the following code and it works perfectly fine.
Please do let me know if there is anything that you don't understand.
var util = require('util'),
spawn = require('child_process').spawn,
size = spawn('du', ['-sh', '/path/to/dir']);
size.stdout.on('data', function (data) {
console.log('size: ' + data);
});
// --- Everything below is optional ---
size.stderr.on('data', function (data) {
console.log('stderr: ' + data);
});
size.on('exit', function (code) {
console.log('child process exited with code ' + code);
});
Courtesy Link
2nd method:
var util = require('util'), exec = require('child_process').exec, child;
child = exec('du -sh /path/to/dir', function(error, stdout, stderr){
console.log('stderr: ' + stderr);
if (error !== null){
console.log('exec error: ' + error);
}
});
You might want to refer the Node.js API for child_process

Use du : https://www.npmjs.org/package/du
require('du')('/home/rvagg/.npm/', function (err, size) {
console.log('The size of /home/rvagg/.npm/ is:', size, 'bytes')
})

ES6 variant:
import path_module from 'path'
import fs from 'fs'
// computes a size of a filesystem folder (or a file)
export function fs_size(path, callback)
{
fs.lstat(path, function(error, stats)
{
if (error)
{
return callback(error)
}
if (!stats.isDirectory())
{
return callback(undefined, stats.size)
}
let total = stats.size
fs.readdir(path, function(error, names)
{
if (error)
{
return callback(error)
}
let left = names.length
if (left === 0)
{
return callback(undefined, total)
}
function done(size)
{
total += size
left--
if (left === 0)
{
callback(undefined, total)
}
}
for (let name of names)
{
fs_size(path_module.join(path, name), function(error, size)
{
if (error)
{
return callback(error)
}
done(size)
})
}
})
})
}

Review the node.js File System functions. It looks like you can use a combination of fs.readdir(path, [cb]), and fs.stat(file, [cb]) to list the files in a directory and sum their sizes.
Something like this (totally untested):
var fs = require('fs');
fs.readdir('/path/to/dir', function(err, files) {
var i, totalSizeBytes=0;
if (err) throw err;
for (i=0; i<files.length; i++) {
fs.stat(files[i], function(err, stats) {
if (err) { throw err; }
if (stats.isFile()) { totalSizeBytes += stats.size; }
});
}
});
// Figure out how to wait for all callbacks to complete
// e.g. by using a countdown latch, and yield total size
// via a callback.
Note that this solution only considers the plain files stored directly in the target directory and performs no recursion. A recursive solution would come naturally by checking stats.isDirectory() and entering, although it likely complicates the "wait for completion" step.

'use strict';
const async = require('async');
const fs = require('fs');
const path = require('path')
const getSize = (item, callback) => {
let totalSize = 0;
fs.lstat(item, (err, stats) => {
if (err) return callback(err);
if (stats.isDirectory()) {
fs.readdir(item, (err, list) => {
if (err) return callback(err);
async.each(list, (listItem, cb) => {
getSize(path.join(item, listItem), (err, size) => {
totalSize += size;
cb();
});
},
(err) => {
if (err) return callback(err);
callback(null, totalSize);
});
});
} else {
// Ensure fully asynchronous API
process.nextTick(function() {
callback(null, (totalSize += stats.size))
});
}
});
}
getSize('/Applications', (err, totalSize) => { if (!err) console.log(totalSize); });

I know I'm a bit late to the part but I though I'd include my solution which uses promises based on #maerics answer:
const fs = require('fs');
const Promise = require('bluebird');
var totalSizeBytes=0;
fs.readdir('storage', function(err, files) {
if (err) throw err;
Promise.mapSeries(files, function(file){
return new Promise((resolve, reject) => {
fs.stat('storage/' + file,function(err, stats) {
if (err) { throw err; }
if (stats.isFile()) { totalSizeBytes += stats.size; resolve(); }
});
})
}).then(()=>{
console.log(totalSizeBytes);
});
});

function readSizeRecursive(folder, nested = 0) {
return new Promise(function(resolve, reject) {
const stats = fs.lstatSync(path.resolve(__dirname, '../projects/', folder));
var total = stats.size;
const list = fs.readdirSync(path.resolve(__dirname, '../projects/', folder));
if(list.length > 0){
Promise.all(list.map(async li => {
const stat = await fs.lstatSync(path.resolve(__dirname, '../projects/', folder, li));
if(stat.isDirectory() && nested == 0){
const tt = await readSizeRecursive(folder, 1);
total += tt;
} else {
total += stat.size;
}
})).then(() => resolve(convertBytes(total)));
} else {
resolve(convertBytes(total));
}
});
}
const convertBytes = function(bytes) {
const sizes = ["Bytes", "KB", "MB", "GB", "TB"]
if (bytes == 0) {
return "n/a"
}
const i = parseInt(Math.floor(Math.log(bytes) / Math.log(1024)))
if (i == 0) {
return bytes + " " + sizes[i]
}
// return (bytes / Math.pow(1024, i)).toFixed(1) + " " + sizes[i]
return parseFloat((bytes / Math.pow(1024, i)).toFixed(1));
}

This combines async/await and the fs Promises API introduced in Node.js v14.0.0 for a clean, readable implementation:
const { readdir, stat } = require('fs/promises');
const dirSize = async directory => {
const files = await readdir( directory );
const stats = files.map( file => stat( path.join( directory, file ) ) );
let size = 0;
for await ( const stat of stats ) size += stat.size;
return size;
};
Usage:
const size = await dirSize( '/path/to/directory' );
console.log( size );
An shorter-but-less-readable alternative of the dirSize function would be:
const dirSize = async directory => {
const files = await readdir( directory );
const stats = files.map( file => stat( path.join( directory, file ) ) );
return ( await Promise.all( stats ) ).reduce( ( accumulator, { size } ) => accumulator + size, 0 );
}

A very simple synchronous solution that I implemented.
const fs = require("fs");
function getSize(path){
// Get the size of a file or folder recursively
let size = 0;
if(fs.statSync(path).isDirectory()){
const files = fs.readdirSync(path);
files.forEach(file => {
size += getSize(path + "/" + file);
});
}
else{
size += fs.statSync(path).size;
}
return size;
}

Related

Modify the value of a variable outside callback with the callback inside loop

I am new to Nodejs and I am facing with a problem: Modify the value of a variable outside callback with the callback inside a loop.
I am coding online-judgle project, this is my function to check output of a program with answer from database. I created result object to store amount of correct testcase.
function compareResult(fileName, problem, timeLimit, callback) {
const cp = require('child_process');
const exePath = 'submit\\' + fileName + '.exe';
const child = cp.spawn(exePath, ['--from=markdown', '--to=html'], {timeout: timeLimit});
MongoClient.connect(uri, function(err, db) {
if (err) throw err;
var dbo = db.db(dbName);
var query = { id_problem: problem, is_eg: "false" };
var proj = { projection: {input: 1, output: 1} };
dbo.collection("sample").find(query, proj).toArray(function(err, arr) {
if (err) throw err;
if (arr != null) {
var result = {
correct: 0,
total: arr.length
};
for (const json of arr) {
const answer = json['output'];
child.stdin.write(json['input']);
child.stdout.on('data', function(data) {
if (data == answer) {
result.correct += 1; // I want to modify result object here.
}
});
child.stdin.end();
};
console.log(result);
callback(result);
}
});
});
I want to modify result object in that place. How will I do it?
function compareResult(fileName, problem, timeLimit, callback) {
const cp = require('child_process');
const exePath = 'submit\\' + fileName + '.exe';
const child = cp.spawn(exePath, ['--from=markdown', '--to=html'], {timeout: timeLimit});
MongoClient.connect(uri, function(err, db) {
if (err) throw err;
var dbo = db.db(dbName);
var query = { id_problem: problem, is_eg: "false" };
var proj = { projection: {input: 1, output: 1} };
dbo.collection("sample").find(query, proj).toArray(function(err, arr) {
if (err) throw err;
if (arr != null) {
var result = {
correct: 0,
total: arr.length
};
for (const json of arr) {
const answer = json['output'];
child.stdin.write(json['input']);
child.stdout.on('data', function(data) {
if (data == answer) {
result.correct += 1;
}
// Decrement total here to track how many 'data' events have been emitted
result.total--;
if (result.total === 0) {
// All 'data' events have been emitted, so call the callback function
callback(result);
}
});
child.stdin.end();
};
}
});
});
}

Node.js split file lines

I want to write a script that divides the lines read from the file into packages of 25, unfortunately the sample package returns 40 codes. I would like to do so that, for example, he divided me into packages of 25 items. I mean, I have, for example, 60 codes, this creates 2 packages of 25, and one with 10 codes. Unfortunately, I can't handle it.
const fs = require('fs');
fs.readFile('code.txt', function (err, data) {
if (err) throw err;
const array = data.toString().split("\n");
let count = 0;
let items = [];
for (let i in array) {
items.push({
PutRequest: {
Item: {
code: array[i]
}
}
});
let params = {
RequestItems: {
'TABLE_NAME': items
}
};
if (count === 25) {
dynamoDB.batchWrite(params, function (err, data) {
if (err) {
console.log(err);
} else {
count = 0;
items = [];
}
});
}else{
count++;
}
}
});
code.txt content
https://0bin.net/paste/NA8-4hkq#1Ohwt5uUkQqE0YscwnxTX2gxEqlvAUVKp1JRipBCsZg
Any idea what I do wrong?
Your dynamoDB.batchWrite() is asynchronous. Thus its callback is executed only after the loop has completed. So items and count are never reset ...
The easiest would be, if you could switch to an promise based approach like the following
const BATCHSIZE = 25;
const fs = require('fs').promises;
async function batchLoad() {
const lines = (await fs.readFile("code.txt", "utf-8")).split("\n");
while (lines.length > 0) {
const items = lines.splice(0, BATCHSIZE).map(l => ({PutRequest: {Item: { code: l }}}));
const params = { RequestItems: { TABLE_NAME: items}};
await new Promise((resolve, reject) => {
dynamoDb.batchWrite(params, (err) => {
if (err) return reject(err);
resolve();
});
});
}
}
A callback based approach could look like this
const BATCHSIZE = 25;
fs.readFile("code.txt", "utf-8", (err, data) => {
const lines = data.split("\n");
function writeBatch() {
if (!lines.length) return;
const items = lines.splice(0, BATCHSIZE).map(l => ({PutRequest: {Item: { code: l }}}));
const params = { RequestItems: { TABLE_NAME: items}};
dynamoDb.batchWrite(params, err => {
if (err) ...
else writeBatch();
});
}
writeBatch();
}
The function writeBatch takes a certain number of lines from your original array and writes them into the database. Only afer the write into the DB was successful, it recursively calls itself and handles the next batch. But be aware, that this approach may exceed the maximum call stack size and throw an error.
You can also make either of this approaches not manipulate the lines array (which may be quite expensive), but just get out the current slice
const BATCHSIZE = 25;
const fs = require('fs').promises;
async function batchLoad() {
const lines = (await fs.readFile("code.txt", "utf-8")).split("\n");
let currentIndex = 0;
while (currentIndex < lines.length) {
const items = lines.slice(currentIndex, currentIndex + BATCHSIZE).map(l => ({PutRequest: {Item: { code: l }}}));
const params = { RequestItems: { TABLE_NAME: items}};
await new Promise((resolve, reject) => {
dynamoDb.batchWrite(params, (err) => {
if (err) return reject(err);
resolve();
});
});
currentIndex += BATCHSIZE;
}
}
and
const BATCHSIZE = 25;
fs.readFile("code.txt", "utf-8", (err, data) => {
const lines = data.split("\n");
function writeBatch(currentIndex) {
if (currentIndex >= lines.length) return;
const items = lines.slice(currentIndex, currentIndex + BATCHSIZE).map(l => ({PutRequest: {Item: { code: l }}}));
const params = { RequestItems: { TABLE_NAME: items}};
dynamoDb.batchWrite(params, err => {
if (err) ...
else writeBatch(currentIndex + BATCHSIZE);
});
}
writeBatch(0);
}
To prevent stumbling into a maximum callstack exception you may also add the next batch to the eventloop and not call it recursively. Ie
dynamoDb.batchWrite(params, err => {
if (err) ...
else setTimeout(()=> { writeBatch(currentIndex + BATCHSIZE);}, 0);
});
This way you won't build up a massive callstack from recursive calls.
To keep track of how many records are already saved to the db you could simply store the current counter in a file. When you restart the process, load that file and check how many lines to skip. Don't forget to remove the file, once all records have been saved ... For example with the first approach:
const BATCHSIZE = 25;
const fs = require('fs').promises;
async function batchLoad() {
const lines = (await fs.readFile("code.txt", "utf-8")).split("\n");
const skipLines = 0;
try {
skipLines = +(await fs.readFile("skip.txt", "utf-8"));
if (isNaN(skipLines)) skipLines = 0;
lines.splice(0, skipLines);
} catch (e) {
skipLines = 0;
}
while (lines.length > 0) {
const items = lines.splice(0, BATCHSIZE).map(l => ({PutRequest: {Item: { code: l }}}));
const params = { RequestItems: { TABLE_NAME: items}};
await new Promise((resolve, reject) => {
dynamoDb.batchWrite(params, (err) => {
if (err) return reject(err);
resolve();
});
});
skipLines += BATCHSIZE;
await fs.writeFile("skip.txt", `${skipLines}`);
}
try {
await fs.unlink("skip.txt");
} catch (e) {
}
}

How to convert all djvu files to pdf

it's answer. Just use nodejs and ddjvu from DJView lib.
There
imports
const fs = require('fs');
const os = require('os');
const {spawn} = require('child_process');
const path = require('path');
const maxProcess = os.cpus().length - 1;// count of procces - 1 for system needs
let nowPlayed = 0;
method for convert file, and delete when converted.
function chpoc(args) {
console.log(args[1] + " start converting");
spawn(`ddjvu`, ["-format=pdf", args[0], args[1] + ".pdf"]).on('close', (data) => {
console.log(args[1] + ".pdf converted");
fs.unlink(args[0], (err) => {
if (err) throw err;
console.log(args[0] + ' successfully deleted!');
nowPlayed--;
})
});
}
queue for optimize max convertions at one time
let queue = [];
function startQueue() {
if (nowPlayed < maxProcess && queue.length) {
nowPlayed++;
queue.pop()();
}
}
setInterval(startQueue, 500)
fillthe queue and start it
function workWithFile(filepath) {
const args = filepath.match(/(.*)\.djvu/)
if (args && args.length) {
queue.push(() => {
chpoc(args);
});
}
}
show errors
const eachCallback = function (err) {
err && console.error(err);
}
catalog three and finde the djvus
let filePaths = [];
function getFiles(dirPath, callback) {
fs.readdir(dirPath, function (err, files) {
if (err) return callback(err);
files.forEach((fileName) => {
setTimeout(() => {
let filePath = path.join(dirPath, fileName);
if (filePath) {
fs.stat(filePath, function (err, stat) {
if (err) return eachCallback(err);
if (stat.isDirectory()) {
getFiles(filePath, callback);
} else if (stat.isFile() && /\.djvu$/.test(filePath)) {
filePaths.push(filePath);
callback(filePath)
}
})
}
});
});
});
}
init from started dir
getFiles(__dirname, function (file) {
workWithFile(file);
});
imports
const fs = require('fs');
const os = require('os');
const {spawn} = require('child_process');
const path = require('path');
const maxProcess = os.cpus().length - 1;// count of procces - 1 for system needs
let nowPlayed = 0;
method for convert file, and delete when converted.
function chpoc(args) {
console.log(args[1] + " start converting");
spawn(`ddjvu`, ["-format=pdf", args[0], args[1] + ".pdf"]).on('close', (data) => {
console.log(args[1] + ".pdf converted");
fs.unlink(args[0], (err) => {
if (err) throw err;
console.log(args[0] + ' successfully deleted!');
nowPlayed--;
})
});
}
queue for optimize max convertions at one time let queue = [];
function startQueue() {
if (nowPlayed < maxProcess && queue.length) {
nowPlayed++;
queue.pop()();
}
}
setInterval(startQueue, 500)
fill the queue and start it
function workWithFile(filepath) {
const args = filepath.match(/(.*)\.djvu/)
if (args && args.length) {
queue.push(() => {
chpoc(args);
});
}
}
show errors
const eachCallback = function (err) {
err && console.error(err);
}
catalog three and finde the djvus
let filePaths = [];
function getFiles(dirPath, callback) {
fs.readdir(dirPath, function (err, files) {
if (err) return callback(err);
files.forEach((fileName) => {
setTimeout(() => {
let filePath = path.join(dirPath, fileName);
if (filePath) {
fs.stat(filePath, function (err, stat) {
if (err) return eachCallback(err);
if (stat.isDirectory()) {
getFiles(filePath, callback);
} else if (stat.isFile() && /\.djvu$/.test(filePath)) {
filePaths.push(filePath);
callback(filePath)
}
})
}
});
});
});
}
init from started dir
getFiles(__dirname, function (file) {
workWithFile(file);
});

Node js Promises with recursive function

I want to read the all (text) files from a specific directory and it's all subdirecoty recursively.. I am able to read the file and append the result to a global variable. but i want to access the variable at the end of all operation. I am trying with promises but i am unable to access it. please help
var file_path = `C:\\Users\\HP\\Desktop\\test_folder`;
const fs = require('fs');
var final_array = [];
let getFolderTree = function(file_path) {
return new Promise(function(resolve, reject) {
fs.readdir(file_path, function(err, folders) {
if (err) {
console.log("error reading folder :: " + err);
} else {
if (folders.length !== 0) {
for (let i = 0; i < folders.length; i++) {
if (folders[i].endsWith("txt")) {
let text_file_path = file_path + `\\` + folders[i];
fs.readFile(text_file_path, function(error_read, data) {
if (error_read) {
console.log("error reading " + error_read);
} else {
return resolve(final_array.push(data));// want to access final_array at the end of all operations
}
});
} else {
let current_path = file_path + `\\` + folders[i];
getFolderTree(current_path);
}
}
}
}
});
});
}
getFolderTree(file_path).then(function() {
console.log(final_array); // this is not working
});
I think i have found the solution but I am still confused about how it works.
I took reference from another code and able to figure out some how.
var fs = require('fs');
var path = require('path');
let root_path = "C:\\Users\\HP\\Desktop\\test_folder";
function getAllDirectoriesPath(current_path) {
var results = [];
return new Promise(function (resolve, reject) {
fs.readdir(current_path, function (erro, sub_dirs) {
if (erro) {
console.log(error);
} else {
let no_of_subdir = sub_dirs.length;
if (!no_of_subdir) {
return resolve(results);
} else {
sub_dirs.forEach(function (dir) {
dir = path.resolve(current_path, dir);
fs.stat(dir, function (err, stat) {
if (stat && stat.isDirectory()) {
getAllDirectoriesPath(dir).then(function (res) {
results = results.concat(res);
if (!--no_of_subdir) {
resolve(results);
}
});
} else {
fs.readFile(dir, function (err, data) {
results.push(data.toString());
if (!--no_of_subdir) {
resolve(results);
}
});
}
});
});
}
}
});
});
}
getAllDirectoriesPath(root_path).then(function (results) {
console.log(results);
});

How to know non blocking Recursive job is complete in nodejs

I have written this non-blocking nodejs sample recursive file search code, the problem is I am unable to figure out when the task is complete. Like to calculate the time taken for the task.
fs = require('fs');
searchApp = function() {
var dirToScan = 'D:/';
var stringToSearch = 'test';
var scan = function(dir, done) {
fs.readdir(dir, function(err, files) {
files.forEach(function (file) {
var abPath = dir + '/' + file;
try {
fs.lstat(abPath, function(err, stat) {
if(!err && stat.isDirectory()) {
scan(abPath, done);;
}
});
}
catch (e) {
console.log(abPath);
console.log(e);
}
matchString(file,abPath);
});
});
}
var matchString = function (fileName, fullPath) {
if(fileName.indexOf(stringToSearch) != -1) {
console.log(fullPath);
}
}
var onComplte = function () {
console.log('Task is completed');
}
scan(dirToScan,onComplte);
}
searchApp();
Above code do the search perfectly, but I am unable to figure out when the recursion will end.
Its not that straight forward, i guess you have to rely on timer and promise.
fs = require('fs');
var Q = require('q');
searchApp = function() {
var dirToScan = 'D:/';
var stringToSearch = 'test';
var promises = [ ];
var traverseWait = 0;
var onTraverseComplete = function() {
Q.allSettled(promises).then(function(){
console.log('Task is completed');
});
}
var waitForTraverse = function(){
if(traverseWait){
clearTimeout(traverseWait);
}
traverseWait = setTimeout(onTraverseComplete, 5000);
}
var scan = function(dir) {
fs.readdir(dir, function(err, files) {
files.forEach(function (file) {
var abPath = dir + '/' + file;
var future = Q.defer();
try {
fs.lstat(abPath, function(err, stat) {
if(!err && stat.isDirectory()) {
scan(abPath);
}
});
}
catch (e) {
console.log(abPath);
console.log(e);
}
matchString(file,abPath);
future.resolve(abPath);
promises.push(future);
waitForTraverse();
});
});
}
var matchString = function (fileName, fullPath) {
if(fileName.indexOf(stringToSearch) != -1) {
console.log(fullPath);
}
}
scan(dirToScan);
}
searchApp();

Resources