I want to remove some files in a directory after reaching some limits.(for example remove files if number of files more than 20)
It would be great if any automation can be done to remove those files.
In details:
In my case there is a uploads directory, where I'm uploading the images. For each new image, a directory is created and the image resides in the directory. So I want to keep some of the newly created or recently used directories and remove others after a certain limit(for example after reaching 20 numbers of directories). While creating new images, it'll check the limit and if exceeds the max dir limits, it'll remove the unused directories.
Note: The directories are not empty.
How can i do that using Node.js
Any help would be appreciable.
The most widely used technique would be to have an API that can delete files in your folder. Take a look at
fs.unlink
You can get more details here
Once you have this API, it is preferable to have a cron call this API every month or so. Take a look at
crontab -e
If you're running Node on a Linux server, you can use the exec module to execute Linux commands. For example, here is a function I use to move old log files:
var exec = require('child_process').exec;
exec('mv ' + __dirname + '/log/*.log ' + __dirname + '/log/archive',
function(err, stdout, stderr) {
if (err) {
console.log('Error archiving log files: ' + stderr);
} else {
console.log('Log files archived to ' + __dirname + '/log/archive');
}
});
You can use any Linux command - so you could use this approach to remove files as well.
I create a "cronjob" function in node.js to remove files in a folder (note child folders will be ignore)
USAGE:
// keep only 5 newest files in `logs` folder
watchAndRemoveOldFiles('logs', 5, function (err, removeFiles) {
console.log('These files has been remove:', removeFiles);
});
Full code (you need npm install async to run the code):
var fs = require('fs');
var path = require('path');
var async = require('async');
function findAndRemoveOldFiles(inputDir, keepCount, callback) {
if(!callback) {
callback = function (err, removeFiles) {
// default callback: doing nothing
};
};
fs.readdir(inputDir, function (err, files) {
if(err) {
return callback(err);
}
fileNames = files.map(function (fileName) {
return path.join(inputDir, fileName);
});
async.map(fileNames, function (fileName, cb) {
fs.stat(fileName, function (err, stat) {
if(err) {
return cb(err);
};
cb(null, {
name: fileName,
isFile: stat.isFile(),
time: stat.mtime,
});
});
}, function (err, files) {
if(err) {
return callback(err);
};
files = files.filter(function (file) {
return file.isFile;
})
files.sort(function (filea, fileb) {
return filea.time < fileb.time;
});
files = files.slice(keepCount);
async.map(files, function (file, cb) {
fs.unlink(file.name, function (err) {
if(err) {
return cb(err);
};
cb(null, file.name);
});
}, function (err, removedFiles) {
if(err) {
return callback(err);
}
callback(null, removedFiles);
});
});
});
}
function watchAndRemoveOldFiles(inputDir, keepCount, callback) {
findAndRemoveOldFiles(inputDir, keepCount, callback);
fs.watch(inputDir, function () {
findAndRemoveOldFiles(inputDir, keepCount, callback);
});
}
// USAGE: watch and remove old files, keep only 5 newest files
watchAndRemoveOldFiles('log', 5, function (err, removeFiles) {
console.log('These files has been remove:', removeFiles);
});
you might consider setting up a kue task:
https://github.com/learnboost/kue
Kue (or a slight wrapper/mod on top of it) is likely to be what makes it into core for our scheduled jobs down the road.
Related
I am using following code to create directories in sync way. It checks the existence of the directory, deletes it if exists and creates it. All operations are in sync way. I am looping this operation for 5 times. I am getting different results each time. Sometimes it creates only 4 directories, sometimes it creates all 5. What is the reason for this unstability in the code?
fs.readdir(dir, function(err, filenames) {
if (err) {
onError(err);
return;
}
filenames.forEach(function(filename) {
fs.readFile(dir + filename, 'utf-8', function(err, content) {
if (err) {
onError(err);
return;
}
AsyncFunc(content, ....)
.then(newContent => {
filenames.forEach(function(filename) {
if (fs.existsSync(currentDirName)) {
fs.rmdirSync(currentDirName);
}
fs.mkdirSync(currentDirName, '0766');
});
});
});
If you are using sync functions you can not use callbacks. Also if you want to remove a folder you need to use rmdirSync(filename);
var fs = require('fs');
var filenames = ['1','2','3','4'];
filenames.forEach(function(filename) {
if (fs.existsSync(filename)) {
fs.rmdirSync(filename);
}
fs.mkdirSync(filename, '0766');
});
I am trying to delete array of files but one or two of them gets deleted .Pls check the code I am sure I am doing something wrong.
I am searching for a file in directory and if found I am First Removing it from db..and after response I am removing file from directory
if (files.length > 0) {
files.forEach(function(filename) {
fileDir = path.join(__dirname, '/uploads/' + decodeURI(filename));
fs.stat(fileDir, function(err, stats) {
if (stats.isFile()) {
// fs.unlink(fileDir, function(err) {
form_op.deleteImg(url.resolve('http://localhost/uploads/', filename), query._id, function(err, result) {
if (err) {
throw console.log(err)
}
fs.unlink(fileDir);
});
// });
}
});
done++;
});
through debugging I found that filename and fileDir variable are getting next file in the array without waiting for fs.stat(fileDir, function(err, stats) { to complete.. is there any other way I can do it? the check and remove??
my files variable is array only file's name.
fileDir = path.join(__dirname, '/uploads/' + decodeURI(filename));
You are missing a var declaration here, therefore the fileDir variable is global and shared by all of your callbacks.
Use var fileDir = ... to have the variable scoped to your function.
Code below:
I'm using the findit walker, documentation here -> https://github.com/substack/node-findit
With this package i'm listing all the directories and files of my application, and i'm trying to send to my bucket on Amazon S3 (with my own code).
I'm not sure if the code is right, and i don't know what i need to put in the Body, inside the params object.
This part it's listening all the Directories of my app:
finder.on('directory', function (dir, stat, stop) {
var base = path.basename(dir);
if (base === '.git' || base === 'node_modules' || base === 'bower_components') {
stop();
}
else {
console.log(dir + '/');
}
});
And this one it's listening all the files of my app:
finder.on('file', function (file, stat) {
console.log(file);
});
I updated it to send data to my bucket, like this:
finder.on('file', function (file, stat) {
console.log(file);
var params = {
Bucket: BUCKET_NAME,
Key: file,
//Body:
};
//console.log(params.body);
s3.putObject(params, function(err) {
if(err) {
console.log(err);
}
else {
console.log("Success!");
}
});
});
I really don't know what i need to put inside the Body, and i don't know if the code is right. Anyone could help me?
Thanks.
to help, all code, all the code:
var fs = require('fs');
var finder = require('findit')(process.argv[2] || '.');
var path = require('path');
var aws = require('aws-sdk');
var s3 = new aws.S3();
aws.config.loadFromPath('./AwsConfig.json');
var BUCKET_NAME = 'test-dev-2';
finder.on('directory', function (dir, stat, stop) {
var base = path.basename(dir);
if (base === '.git' || base === 'node_modules' || base === 'bower_components') {
stop();
}
else {
console.log(dir + '/');
}
});
finder.on('file', function (file, stat) {
console.log(file);
var params = {
Bucket: BUCKET_NAME,
Key: file,
//Body:
};
//console.log(params.body);
s3.putObject(params, function(err) {
if(err) {
console.log(err);
}
else {
console.log("Success");
}
});
});
finder.on('error', function (err) {
console.log(err);
});
finder.on('end', function () {
console.log('Done!');
});
Based on the documentation, the Body parameter of s3.putObject can take a Buffer, Typed Array, Blob, String, or ReadableStream. The best one of those to use in most cases would be a ReadableString. You can create a ReadableString from any file using the createReadStream() function in the fs module.
So, that part your code would look something like:
finder.on('file', function (file, stat) {
console.log(file);
var params = {
Bucket: BUCKET_NAME,
Key: file,
Body: fs.createReadStream(file) // NOTE: You might need to adjust "file" so that it's either an absolute path, or relative to your code's directory.
};
s3.putObject(params, function(err) {
if(err) {
console.log(err);
}
else {
console.log("Success!");
}
});
});
I also want to point out that you might run in to a problem with this code if you pass it a directory with a lot of files. putObject is an asynchronous function, which means it'll be called and then the code will move on to something else while it's doing its thing (ok, that's a gross simplification, but you can think of it that way). What that means in terms of this code is that you'll essentially be uploading all the files it finds at the same time; that's not good.
What I'd suggest is to use something like the async module to queue your file uploads so that only a few of them happen at a time.
Essentially you'd move the code you have in your file event handler to the queue's worker method, like so:
var async = require('async');
var uploadQueue = async.queue(function(file, callback) {
var params = {
Bucket: BUCKET_NAME,
Key: file,
Body: fs.createReadStream(file) // NOTE: You might need to adjust "file" so that it's either an absolute path, or relative to your code's directory.
};
s3.putObject(params, function(err) {
if(err) {
console.log(err);
}
else {
console.log("Success!");
}
callback(err); // <-- Don't forget the callback call here so that the queue knows this item is done
});
}, 2); // <-- This "2" is the maximum number of files to upload at once
Note the 2 at the end there, that specifies your concurrency which, in this case, is how many files to upload at once.
Then, your file event handler simply becomes:
finder.on('file', function (file, stat) {
uploadQueue.push(file);
});
That will queue up all the files it finds and upload them 2 at a time until it goes through all of them.
An easier and arguably more efficient solution may be to just tar up the directory and upload that single tar file (also gzipped if you want). There are tar modules on npm, but you could also just spawn a child process for it too.
I am working on a small node project which requires copying and ungzipping a number of files of various sizes. I've been trying to use async.eachSeries to take care of it, but it is not working out. The files are created but the pipe out the wr ends up writing to multiple different files regardless of which file it should end up in.
fs.readdir(path, function (err, files) {
async.eachSeries(files, function (file, callback) {
var wr = fs.createWriteStream(file);
fs.stat(file, function (err, stats) {
if (err) throw err;
var stream = fs.createReadStream(file).on('end', function () {
callback();
}).pipe(ungzip).pipe(wr);
});
}, function () {
//res.write(concatenated);
//res.end();
});
});
I'm still new to node so any help would be appreciated.
-NQ
Looks like the solution is to use a closure.
The problem in your code is that the callback function passed to fs.stat references a variable from outer scope, ie wr, which is changed in the next iteration of the loop. Closures are good to sort it out.
fs.readdir(path, function (err, files) {
async.eachSeries(files, function (file, callback) {
var wr = fs.createWriteStream(file);
fs.stat(file, function(myWr){
return function (err, stats) {
if (err) throw err;
var stream = fs.createReadStream(file).on('end', function () {
callback();
}).pipe(ungzip).pipe(myWr);
}
}(wr));
}, function () {
//res.write(concatenated);
//res.end();}
});
});
Refer Please explain the use of JavaScript closures in loops and Serving A Batch Of Dynamic Pages for more on closures.
Sorry, just starting with node. This might be a very novice question.
Let's say I have some code which reads some files from a directory in the file system:
var fs = require('fs');
fs.readdir(__dirname + '/myfiles', function (err, files) {
if (err) throw err;
files.forEach(function (fileName) {
fs.readFile(__dirname + '/myfiles/' + fileName, function (err, data) {
if (err) throw err;
console.log('finished reading file ' + fileName + ': ' + data);
module.exports.files.push(data);
});
});
});
Note that all of this occurs asynchronously. Let's also say I have a Mocha test which executes this code:
describe('fileProvider', function () {
describe('#files', function () {
it.only('files array not empty', function () {
assert(fileProvider.files.length > 0, 'files.length is zero');
});
});
});
The mocha test runs before the files are finished being read. I know this because I see the console.log statement after I see the little dot that indicates a mocha test being run (at least I think that is what is being indicated). Also, if I surround the assert with a setTimeout, the assert passes.
How should I structure my code so that I can ensure the async file operations are completed? Note that this is not just a problem with testing - I need the files to be loaded fully before I can do real work in my app as well.
I don't think the right answer is to read files synchronously, because that will block the Node request / response loop, right?
Bonus question:
Even if I put the assert in a setTimeout with a 0 timeout value, the test still passes. Is this because just putting it in a setTimeout kicks it to the end of the processing chain or something so the filesystem work finishes first?
You can implement a complete callback after all files have been read.
exports.files = [];
exports.initialize = initialize;
function initialize(callback) {
var fs = require('fs');
fs.readdir(__dirname + '/myfiles', function (err, files) {
if (err) throw err;
files.forEach(function (fileName) {
fs.readFile(__dirname + '/myfiles/' + fileName, function (err, data) {
if (err) throw err;
console.log('finished reading file ' + fileName + ': ' + data);
exports.files.push(data);
if (exports.files.length == files.length) {
callback();
}
});
});
}
You can call the file operation method by doing something like:
var f = require('./files.js');
if (f.files.length < 1) {
console.log('initializing');
f.initialize(function () {
console.log('After: ' + f.files.length);
var another = require('./files.js');
console.log('Another module: ' + another.files.length);
});
}
EDIT: Since you want to only have to call this once, you could initialize it once when the application loads. According to Node.js documentation, modules are cached after the first time they are loaded. The two above examples have been edited as well.
To avoid being caught up in nested callbacks. You might want to use async's each that will allow you to do the tasks asynchronously in a non-blocking manner:
https://github.com/caolan/async#each
I think that's a good test, the same thing would happen in any app that used your module, i.e. it's code could be run before files is set. What you need to do is create a callback like #making3 suggests, or use promises. I haven't used mocha, but there's a section on ascynchronous calls. You could export the promise itself:
module.exports.getFiles = new Promise((resolve, reject) => {
datas = [];
fs.readdir(__dirname + '/myfiles', function (err, files) {
if (err) {
reject(err);
return;
}
files.forEach(function (fileName) {
fs.readFile(__dirname + '/myfiles/' + fileName, function (err, data) {
if (err) {
reject(err);
return;
}
console.log('finished reading file ' + fileName + ': ' + data);
datas.push(data);
if (datas.length == files.length) {
resolve(datas);
}
});
});
});
}
chai-as-promissed lets you work directly with promises using eventually, or you can use the callback passed to your test I think:
describe('fileProvider', function () {
describe('#files', function () {
it.only('files array not empty', function (done) {
fileProvider.getFiles.then(function(value) {
assert(value.length > 0, 'files.length is zero');
done();
}, function(err) {
done(err);
})
});
});
});