How to know non blocking Recursive job is complete in nodejs - node.js

I have written this non-blocking nodejs sample recursive file search code, the problem is I am unable to figure out when the task is complete. Like to calculate the time taken for the task.
fs = require('fs');
searchApp = function() {
var dirToScan = 'D:/';
var stringToSearch = 'test';
var scan = function(dir, done) {
fs.readdir(dir, function(err, files) {
files.forEach(function (file) {
var abPath = dir + '/' + file;
try {
fs.lstat(abPath, function(err, stat) {
if(!err && stat.isDirectory()) {
scan(abPath, done);;
}
});
}
catch (e) {
console.log(abPath);
console.log(e);
}
matchString(file,abPath);
});
});
}
var matchString = function (fileName, fullPath) {
if(fileName.indexOf(stringToSearch) != -1) {
console.log(fullPath);
}
}
var onComplte = function () {
console.log('Task is completed');
}
scan(dirToScan,onComplte);
}
searchApp();
Above code do the search perfectly, but I am unable to figure out when the recursion will end.

Its not that straight forward, i guess you have to rely on timer and promise.
fs = require('fs');
var Q = require('q');
searchApp = function() {
var dirToScan = 'D:/';
var stringToSearch = 'test';
var promises = [ ];
var traverseWait = 0;
var onTraverseComplete = function() {
Q.allSettled(promises).then(function(){
console.log('Task is completed');
});
}
var waitForTraverse = function(){
if(traverseWait){
clearTimeout(traverseWait);
}
traverseWait = setTimeout(onTraverseComplete, 5000);
}
var scan = function(dir) {
fs.readdir(dir, function(err, files) {
files.forEach(function (file) {
var abPath = dir + '/' + file;
var future = Q.defer();
try {
fs.lstat(abPath, function(err, stat) {
if(!err && stat.isDirectory()) {
scan(abPath);
}
});
}
catch (e) {
console.log(abPath);
console.log(e);
}
matchString(file,abPath);
future.resolve(abPath);
promises.push(future);
waitForTraverse();
});
});
}
var matchString = function (fileName, fullPath) {
if(fileName.indexOf(stringToSearch) != -1) {
console.log(fullPath);
}
}
scan(dirToScan);
}
searchApp();

Related

Parent function is not waiting for child to finish

Inside getMetadata(), Object.keys function is not waiting for httprequest to finish. How can I make object.keys function to wait till httprequest function manipluates the result variable?
I'm using node. I tried to make promise but failed.
function fetchAirportPageIDsListWithMetaJSON(faa, cb){
logger.info('[airportcms-data-processor] fetching airport pages list with Metadata');
var faa = faa
async.waterfall([
getAirportPageIDsList,
getMetadata,
], function (err, result) {
cb(null, result);
});
function getAirportPageIDsList(callback) {
httpRequests.fetchData('//s3 url to fetch data', function (err, data) {
var idsMap={};
data["page-ids"].forEach( (obj) => {
obj.list.forEach((item) => idsMap[item] = obj.id);
});
callback(null, idsMap);
})
}
function getMetadata(data,callback) {
var result=[];
Object.keys(data).sort().forEach( function (t) {
var tempJson={};
var urlcheck = verifySpecialPageId(t);
if (urlcheck){
var url = config.urls.s3_airports_base_url+'/'+faa+'/'+urlcheck;
}else{
var url = config.urls.s3_airports_base_url+'/'+faa+'/'+t;
}
tempJson["sectionId"]= t;
tempJson["page"]= data[t];
httpRequests.makeHeadRequestWithCallerId(url, function (err, metax) {
if (metax){
let z = metax.split('|')[0];
tempJson["SummaryRange"]= getSummaryRangeAirportPageList(z);
tempJson["timestamp"]= new Date(parseInt(z)).toLocaleDateString();
tempJson["callerId"]= metax.split('|')[1];
}else{
tempJson["timestamp"]='';
tempJson["callerId"]='';
tempJson["SummaryRange"]='';
}
})
result.push(tempJson);
});
logger.info("Final result: ", result);
callback(null, result);
}
}
http request function:
function makeHeadRequestWithCallerId (url, cb) {
httpRequest.head(url, function (err, res) {
if (err) {
logger.error('Error ' + err);
return cb(err, null);
}
if(res.code === 200) {
if (res.headers['x-amz-meta-x-amz-meta-lastmodified'] || res.headers['x-amz-meta-x-amz-meta-callerid']) {
var dateModified = res.headers['x-amz-meta-x-amz-meta-lastmodified'];
var timeStamp = Date.parse(dateModified);
var callerid = res.headers['x-amz-meta-x-amz-meta-callerid'];
if(timeStamp && callerid) {
return cb(null, timeStamp+'|'+callerid);
} else if (callerid){
return cb(null, '|'+callerid);
}else if(timeStamp){
return cb(null, timeStamp+'|');
}else{
return cb(null, null);
}
}else{
return cb(null, null);
}
}
});
}
Current log=> Final result:
[{ sectionId: 'internet-wifi', page: 'internet-wifi' },
{ sectionId: 'layover-ideas', page: 'layover-ideas' }]
Expected log=> Final result:
{ sectionId: 'internet-wifi',
page: 'internet-wifi',
SummaryRange: '12-99',
timestamp: '1/29/2018',
callerId: '' },
{ sectionId: 'layover-ideas',
page: 'layover-ideas',
SummaryRange: '12-99',
timestamp: '1/26/2017',
callerId: '' },
function getMetadata(data, callback) {
var result = [];
var count = Object.keys(data).length;
var i = 0;
Object.keys(data).sort().forEach(function (t) {
var tempJson = {};
var urlcheck = verifySpecialPageId(t);
if (urlcheck) {
var url = config.urls.s3_airports_base_url + '/' + faa + '/' + urlcheck;
} else {
var url = config.urls.s3_airports_base_url + '/' + faa + '/' + t;
}
tempJson["sectionId"] = t;
tempJson["page"] = data[t];
httpRequests.makeHeadRequestWithCallerId(url, function (err, metax) {
if (metax) {
let z = metax.split('|')[0];
tempJson["SummaryRange"] = getSummaryRangeAirportPageList(z);
tempJson["timestamp"] = new Date(parseInt(z)).toLocaleDateString();
tempJson["callerId"] = metax.split('|')[1];
} else {
tempJson["timestamp"] = '';
tempJson["callerId"] = '';
tempJson["SummaryRange"] = '';
}
result.push(tempJson);
i++;
if(count === i){
logger.info("Final result: ", result);
callback(null, result);
}
})
});
}

How to convert all djvu files to pdf

it's answer. Just use nodejs and ddjvu from DJView lib.
There
imports
const fs = require('fs');
const os = require('os');
const {spawn} = require('child_process');
const path = require('path');
const maxProcess = os.cpus().length - 1;// count of procces - 1 for system needs
let nowPlayed = 0;
method for convert file, and delete when converted.
function chpoc(args) {
console.log(args[1] + " start converting");
spawn(`ddjvu`, ["-format=pdf", args[0], args[1] + ".pdf"]).on('close', (data) => {
console.log(args[1] + ".pdf converted");
fs.unlink(args[0], (err) => {
if (err) throw err;
console.log(args[0] + ' successfully deleted!');
nowPlayed--;
})
});
}
queue for optimize max convertions at one time
let queue = [];
function startQueue() {
if (nowPlayed < maxProcess && queue.length) {
nowPlayed++;
queue.pop()();
}
}
setInterval(startQueue, 500)
fillthe queue and start it
function workWithFile(filepath) {
const args = filepath.match(/(.*)\.djvu/)
if (args && args.length) {
queue.push(() => {
chpoc(args);
});
}
}
show errors
const eachCallback = function (err) {
err && console.error(err);
}
catalog three and finde the djvus
let filePaths = [];
function getFiles(dirPath, callback) {
fs.readdir(dirPath, function (err, files) {
if (err) return callback(err);
files.forEach((fileName) => {
setTimeout(() => {
let filePath = path.join(dirPath, fileName);
if (filePath) {
fs.stat(filePath, function (err, stat) {
if (err) return eachCallback(err);
if (stat.isDirectory()) {
getFiles(filePath, callback);
} else if (stat.isFile() && /\.djvu$/.test(filePath)) {
filePaths.push(filePath);
callback(filePath)
}
})
}
});
});
});
}
init from started dir
getFiles(__dirname, function (file) {
workWithFile(file);
});
imports
const fs = require('fs');
const os = require('os');
const {spawn} = require('child_process');
const path = require('path');
const maxProcess = os.cpus().length - 1;// count of procces - 1 for system needs
let nowPlayed = 0;
method for convert file, and delete when converted.
function chpoc(args) {
console.log(args[1] + " start converting");
spawn(`ddjvu`, ["-format=pdf", args[0], args[1] + ".pdf"]).on('close', (data) => {
console.log(args[1] + ".pdf converted");
fs.unlink(args[0], (err) => {
if (err) throw err;
console.log(args[0] + ' successfully deleted!');
nowPlayed--;
})
});
}
queue for optimize max convertions at one time let queue = [];
function startQueue() {
if (nowPlayed < maxProcess && queue.length) {
nowPlayed++;
queue.pop()();
}
}
setInterval(startQueue, 500)
fill the queue and start it
function workWithFile(filepath) {
const args = filepath.match(/(.*)\.djvu/)
if (args && args.length) {
queue.push(() => {
chpoc(args);
});
}
}
show errors
const eachCallback = function (err) {
err && console.error(err);
}
catalog three and finde the djvus
let filePaths = [];
function getFiles(dirPath, callback) {
fs.readdir(dirPath, function (err, files) {
if (err) return callback(err);
files.forEach((fileName) => {
setTimeout(() => {
let filePath = path.join(dirPath, fileName);
if (filePath) {
fs.stat(filePath, function (err, stat) {
if (err) return eachCallback(err);
if (stat.isDirectory()) {
getFiles(filePath, callback);
} else if (stat.isFile() && /\.djvu$/.test(filePath)) {
filePaths.push(filePath);
callback(filePath)
}
})
}
});
});
});
}
init from started dir
getFiles(__dirname, function (file) {
workWithFile(file);
});

Node js Promises with recursive function

I want to read the all (text) files from a specific directory and it's all subdirecoty recursively.. I am able to read the file and append the result to a global variable. but i want to access the variable at the end of all operation. I am trying with promises but i am unable to access it. please help
var file_path = `C:\\Users\\HP\\Desktop\\test_folder`;
const fs = require('fs');
var final_array = [];
let getFolderTree = function(file_path) {
return new Promise(function(resolve, reject) {
fs.readdir(file_path, function(err, folders) {
if (err) {
console.log("error reading folder :: " + err);
} else {
if (folders.length !== 0) {
for (let i = 0; i < folders.length; i++) {
if (folders[i].endsWith("txt")) {
let text_file_path = file_path + `\\` + folders[i];
fs.readFile(text_file_path, function(error_read, data) {
if (error_read) {
console.log("error reading " + error_read);
} else {
return resolve(final_array.push(data));// want to access final_array at the end of all operations
}
});
} else {
let current_path = file_path + `\\` + folders[i];
getFolderTree(current_path);
}
}
}
}
});
});
}
getFolderTree(file_path).then(function() {
console.log(final_array); // this is not working
});
I think i have found the solution but I am still confused about how it works.
I took reference from another code and able to figure out some how.
var fs = require('fs');
var path = require('path');
let root_path = "C:\\Users\\HP\\Desktop\\test_folder";
function getAllDirectoriesPath(current_path) {
var results = [];
return new Promise(function (resolve, reject) {
fs.readdir(current_path, function (erro, sub_dirs) {
if (erro) {
console.log(error);
} else {
let no_of_subdir = sub_dirs.length;
if (!no_of_subdir) {
return resolve(results);
} else {
sub_dirs.forEach(function (dir) {
dir = path.resolve(current_path, dir);
fs.stat(dir, function (err, stat) {
if (stat && stat.isDirectory()) {
getAllDirectoriesPath(dir).then(function (res) {
results = results.concat(res);
if (!--no_of_subdir) {
resolve(results);
}
});
} else {
fs.readFile(dir, function (err, data) {
results.push(data.toString());
if (!--no_of_subdir) {
resolve(results);
}
});
}
});
});
}
}
});
});
}
getAllDirectoriesPath(root_path).then(function (results) {
console.log(results);
});

node phantomjs seo running via node write the file

I'm wondering if this is the write way:
node.js
var fs = require('fs');
var path = require('path');
var childProcess = require('child_process');
var phantomjs = require('phantomjs');
var binPath = phantomjs.path;
var childArgs = [
path.join(__dirname, 'phantomjs-runner.js'),
'http://localhost:3000'
]
childProcess.execFile(binPath, childArgs, function(err, stdout, stderr) {
if(err){
}
if(stderr){
}
fs.writeFile(__dirname+'/public/snapshots/index.html', stdout, function(err) {
if(err) {
console.log(err);
}
else {
console.log("The file was saved!");
}
});
});
phantomjs-runner.js
var system = require('system');
var url = system.args[1] || '';
if(url.length > 0) {
var page = require('webpage').create();
page.open(url, function (status) {
if (status == 'success') {
var delay, checker = (function() {
var html = page.evaluate(function () {
var body = document.getElementsByTagName('body')[0];
if(body.getAttribute('data-status') == 'ready') {
return document.getElementsByTagName('html')[0].outerHTML;
}
});
if(html) {
clearTimeout(delay);
console.log(html);
phantom.exit();
}
});
delay = setInterval(checker, 100);
}
});
}
may be could be a better way like
clearTimeout(delay);
fs.writeFile
phantom.exit();
How can I manage ie
different urls and different files
I mean
http://localhost:3000 index.html
http://localhost:3000/blog blog.html
http://localhost:3000/blog/postid postid.html
ENDED UP
'use strict';
var fs = require('fs'),
path = require('path'),
childProcess = require('child_process'),
phantomjs = require('phantomjs'),
binPath = phantomjs.path,
config = require('../config/config');
var env = (process.env.NODE_ENV === 'production') ? 'production' : null,
currentPath = (env) ? config.proot + '/build/default/snapshots' : config.proot + '/default/snapshots';
function normalizeUrl(url){
if( (typeof url === 'undefined') || !url){
return '';
}
if ( url.charAt( 0 ) === '/' ){
url = url.substring(1);
}
return '/'+url.replace(/\/$/, "");
}
function normalizePage(route){
if(!route){
return 'index';
}
var chunks = route.substring(1).split('/');
var len = chunks.length;
if(len===1){
return chunks[0];
}
chunks.shift();
//I get the id
return chunks[0];
}
module.exports = function (url) {
var route = normalizeUrl(url);
var page = normalizePage(route);
var childArgs = [
path.join(__dirname, './phantomjs-runner.js'),
config.url+route
];
childProcess.execFile(binPath, childArgs, function(err, stdout, stderr) {
if(err){
}
if(stderr){
}
fs.writeFile(currentPath + '/' + page + '.html', stdout, function(err) {
if(err) {
console.log(err);
}
else {
console.log("The file was saved!");
}
});
});
};
So I worked out for the parameters I'm still
waiting for way to get the output ^^

How to get totalsize of files in directory?

How to get totalsize of files in directory ? Best way ?
Here is a simple solution using the core Nodejs fs libraries combined with the async library. It is fully asynchronous and should work just like the 'du' command.
var fs = require('fs'),
path = require('path'),
async = require('async');
function readSizeRecursive(item, cb) {
fs.lstat(item, function(err, stats) {
if (!err && stats.isDirectory()) {
var total = stats.size;
fs.readdir(item, function(err, list) {
if (err) return cb(err);
async.forEach(
list,
function(diritem, callback) {
readSizeRecursive(path.join(item, diritem), function(err, size) {
total += size;
callback(err);
});
},
function(err) {
cb(err, total);
}
);
});
}
else {
cb(err);
}
});
}
I tested the following code and it works perfectly fine.
Please do let me know if there is anything that you don't understand.
var util = require('util'),
spawn = require('child_process').spawn,
size = spawn('du', ['-sh', '/path/to/dir']);
size.stdout.on('data', function (data) {
console.log('size: ' + data);
});
// --- Everything below is optional ---
size.stderr.on('data', function (data) {
console.log('stderr: ' + data);
});
size.on('exit', function (code) {
console.log('child process exited with code ' + code);
});
Courtesy Link
2nd method:
var util = require('util'), exec = require('child_process').exec, child;
child = exec('du -sh /path/to/dir', function(error, stdout, stderr){
console.log('stderr: ' + stderr);
if (error !== null){
console.log('exec error: ' + error);
}
});
You might want to refer the Node.js API for child_process
Use du : https://www.npmjs.org/package/du
require('du')('/home/rvagg/.npm/', function (err, size) {
console.log('The size of /home/rvagg/.npm/ is:', size, 'bytes')
})
ES6 variant:
import path_module from 'path'
import fs from 'fs'
// computes a size of a filesystem folder (or a file)
export function fs_size(path, callback)
{
fs.lstat(path, function(error, stats)
{
if (error)
{
return callback(error)
}
if (!stats.isDirectory())
{
return callback(undefined, stats.size)
}
let total = stats.size
fs.readdir(path, function(error, names)
{
if (error)
{
return callback(error)
}
let left = names.length
if (left === 0)
{
return callback(undefined, total)
}
function done(size)
{
total += size
left--
if (left === 0)
{
callback(undefined, total)
}
}
for (let name of names)
{
fs_size(path_module.join(path, name), function(error, size)
{
if (error)
{
return callback(error)
}
done(size)
})
}
})
})
}
Review the node.js File System functions. It looks like you can use a combination of fs.readdir(path, [cb]), and fs.stat(file, [cb]) to list the files in a directory and sum their sizes.
Something like this (totally untested):
var fs = require('fs');
fs.readdir('/path/to/dir', function(err, files) {
var i, totalSizeBytes=0;
if (err) throw err;
for (i=0; i<files.length; i++) {
fs.stat(files[i], function(err, stats) {
if (err) { throw err; }
if (stats.isFile()) { totalSizeBytes += stats.size; }
});
}
});
// Figure out how to wait for all callbacks to complete
// e.g. by using a countdown latch, and yield total size
// via a callback.
Note that this solution only considers the plain files stored directly in the target directory and performs no recursion. A recursive solution would come naturally by checking stats.isDirectory() and entering, although it likely complicates the "wait for completion" step.
'use strict';
const async = require('async');
const fs = require('fs');
const path = require('path')
const getSize = (item, callback) => {
let totalSize = 0;
fs.lstat(item, (err, stats) => {
if (err) return callback(err);
if (stats.isDirectory()) {
fs.readdir(item, (err, list) => {
if (err) return callback(err);
async.each(list, (listItem, cb) => {
getSize(path.join(item, listItem), (err, size) => {
totalSize += size;
cb();
});
},
(err) => {
if (err) return callback(err);
callback(null, totalSize);
});
});
} else {
// Ensure fully asynchronous API
process.nextTick(function() {
callback(null, (totalSize += stats.size))
});
}
});
}
getSize('/Applications', (err, totalSize) => { if (!err) console.log(totalSize); });
I know I'm a bit late to the part but I though I'd include my solution which uses promises based on #maerics answer:
const fs = require('fs');
const Promise = require('bluebird');
var totalSizeBytes=0;
fs.readdir('storage', function(err, files) {
if (err) throw err;
Promise.mapSeries(files, function(file){
return new Promise((resolve, reject) => {
fs.stat('storage/' + file,function(err, stats) {
if (err) { throw err; }
if (stats.isFile()) { totalSizeBytes += stats.size; resolve(); }
});
})
}).then(()=>{
console.log(totalSizeBytes);
});
});
function readSizeRecursive(folder, nested = 0) {
return new Promise(function(resolve, reject) {
const stats = fs.lstatSync(path.resolve(__dirname, '../projects/', folder));
var total = stats.size;
const list = fs.readdirSync(path.resolve(__dirname, '../projects/', folder));
if(list.length > 0){
Promise.all(list.map(async li => {
const stat = await fs.lstatSync(path.resolve(__dirname, '../projects/', folder, li));
if(stat.isDirectory() && nested == 0){
const tt = await readSizeRecursive(folder, 1);
total += tt;
} else {
total += stat.size;
}
})).then(() => resolve(convertBytes(total)));
} else {
resolve(convertBytes(total));
}
});
}
const convertBytes = function(bytes) {
const sizes = ["Bytes", "KB", "MB", "GB", "TB"]
if (bytes == 0) {
return "n/a"
}
const i = parseInt(Math.floor(Math.log(bytes) / Math.log(1024)))
if (i == 0) {
return bytes + " " + sizes[i]
}
// return (bytes / Math.pow(1024, i)).toFixed(1) + " " + sizes[i]
return parseFloat((bytes / Math.pow(1024, i)).toFixed(1));
}
This combines async/await and the fs Promises API introduced in Node.js v14.0.0 for a clean, readable implementation:
const { readdir, stat } = require('fs/promises');
const dirSize = async directory => {
const files = await readdir( directory );
const stats = files.map( file => stat( path.join( directory, file ) ) );
let size = 0;
for await ( const stat of stats ) size += stat.size;
return size;
};
Usage:
const size = await dirSize( '/path/to/directory' );
console.log( size );
An shorter-but-less-readable alternative of the dirSize function would be:
const dirSize = async directory => {
const files = await readdir( directory );
const stats = files.map( file => stat( path.join( directory, file ) ) );
return ( await Promise.all( stats ) ).reduce( ( accumulator, { size } ) => accumulator + size, 0 );
}
A very simple synchronous solution that I implemented.
const fs = require("fs");
function getSize(path){
// Get the size of a file or folder recursively
let size = 0;
if(fs.statSync(path).isDirectory()){
const files = fs.readdirSync(path);
files.forEach(file => {
size += getSize(path + "/" + file);
});
}
else{
size += fs.statSync(path).size;
}
return size;
}

Resources