Stop function from being invoked multiple times - node.js

I'm in the process of building a file upload component that allows you to pause/resume file uploads.
The standard way to achieve this seems to be to break the file into chunks on the client machine, then send the chunks along with book-keeping information up to the server which can store the chunks into a staging directory, then merge them together when it has received all of the chunks. So, this is what I am doing.
I am using node/express and I'm able to get the files fine, but I'm running into an issue because my merge_chunks function is being invoked multiple times.
Here's my call stack:
router.post('/api/videos',
upload.single('file'),
validate_params,
rename_uploaded_chunk,
check_completion_status,
merge_chunks,
record_upload_date,
videos.update,
send_completion_notice
);
the check_completion_status function is implemented as follows:
/* Recursively check to see if we have every chunk of a file */
var check_completion_status = function (req, res, next) {
var current_chunk = 1;
var see_if_chunks_exist = function () {
fs.exists(get_chunk_file_name(current_chunk, req.file_id), function (exists) {
if (current_chunk > req.total_chunks) {
next();
} else if (exists) {
current_chunk ++;
see_if_chunks_exist();
} else {
res.sendStatus(202);
}
});
};
see_if_chunks_exist();
};
The file names in the staging directory have the chunk numbers embedded in them, so the idea is to see if we have a file for every chunk number. The function should only next() one time for a given (complete) file.
However, my merge_chunks function is being invoked multiple times. (usually between 1 and 4) Logging does reveal that it's only invoked after I've received all of the chunks.
With this in mind, my assumption here is that it's the async nature of the fs.exists function that's causing the issue.
Even though the n'th invocation of check_completion_status may occur before I have all of the chunks, by the time we get to the nth call to fs.exists(), x more chunks may have arrived and been processed concurrently, so the function can keep going and in some cases get to the end and next(). However those chunks that arrived concurrently are also going to correspond to invocations of check_completion_status, which are also going to next() because we obviously have all of the files at this point.
This is causing issues because I didn't account for this when I wrote merge_chunks.
For completeness, here's the merge_chunks function:
var merge_chunks = (function () {
var pipe_chunks = function (args) {
args.chunk_number = args.chunk_number || 1;
if (args.chunk_number > args.total_chunks) {
args.write_stream.end();
args.next();
} else {
var file_name = get_chunk_file_name(args.chunk_number, args.file_id)
var read_stream = fs.createReadStream(file_name);
read_stream.pipe(args.write_stream, {end: false});
read_stream.on('end', function () {
//once we're done with the chunk we can delete it and move on to the next one.
fs.unlink(file_name);
args.chunk_number += 1;
pipe_chunks(args);
});
}
};
return function (req, res, next) {
var out = path.resolve('videos', req.video_id);
var write_stream = fs.createWriteStream(out);
pipe_chunks({
write_stream: write_stream,
file_id: req.file_id,
total_chunks: req.total_chunks,
next: next
});
};
}());
Currently, I'm receiving an error because the second invocation of the function is trying to read the chunks that have already been deleted by the first invocation.
What is the typical pattern for handling this type of situation? I'd like to avoid a stateful architecture if possible. Is it possible to cancel pending handlers right before calling next() in check_completion_status?

If you just want to make it work ASAP, I would use a lock (much like a db lock) to lock the resource so that only one of the requests processes the chunks. Simply create a unique id on the client, and send it along with the chunks. Then just store that unique id in some sort of a data structure, and look that id up prior to processing. The example below is by far not optimal (in fact this map will keep growing, which is bad), but it should demonstrate the concept
// Create a map (an array would work too) and keep track of the video ids that were processed. This map will persist through each request.
var processedVideos = {};
var check_completion_status = function (req, res, next) {
var current_chunk = 1;
var see_if_chunks_exist = function () {
fs.exists(get_chunk_file_name(current_chunk, req.file_id), function (exists) {
if (processedVideos[req.query.uniqueVideoId]){
res.sendStatus(202);
} else if (current_chunk > req.total_chunks) {
processedVideos[req.query.uniqueVideoId] = true;
next();
} else if (exists) {
current_chunk ++;
see_if_chunks_exist();
} else {
res.sendStatus(202);
}
});
};
see_if_chunks_exist();
};

Related

Replay a log file with NodeJS as if it were happening in real-time

I have a log file with about 14.000 aircraft position datapoints captured from a system called Flarm, it looks like this:
{"addr":"A","time":1531919658.578100,"dist":902.98,"alt":385,"vs":-8}
{"addr":"A","time":1531919658.987861,"dist":914.47,"alt":384,"vs":-7}
{"addr":"A","time":1531919660.217471,"dist":925.26,"alt":383,"vs":-7}
{"addr":"A","time":1531919660.623466,"dist":925.26,"alt":383,"vs":-7}
What I need to do is find a way to 'play' this file back in real-time (as if it were occuring right now, even though it's pre-recorded), and emit an event whenever a log entry 'occurs'. The file is not being added to, it's pre-recorded and the playing back would occur at a later stage.
The reason for doing this is that I don't have access to the receiving equipment when I'm developing.
The only way I can think to do it is to set a timeout for every log entry, but that doesn't seem like the right way to do it. Also, this process would have to scale to longer recordings (this one was only an hour long).
Are there other ways of doing this?
If you want to "play them back" with the actual time difference, a setTimeout is pretty much what you have to do.
const processEntry = (entry, index) => {
index++;
const nextEntry = getEntry(index);
if (nextEntry == null) return;
const timeDiff = nextEntry.time - entry.time;
emitEntryEvent(entry);
setTimeout(processEntry, timeDiff, nextEntry, index);
};
processEntry(getEntry(0), 0);
This emits the current entry and then sets a timeout based on the difference until the next entry.
getEntry could either fetch lines from a prefilled array or fetch lines individually based on the index. In the latter case only two lines of data would only be in memory at the same time.
Got it working in the end! setTimeout turned out to be the answer, and combined with the input of Lucas S. this is what I ended up with:
const EventEmitter = require('events');
const fs = require('fs');
const readable = fs.createReadStream("./data/2018-07-18_1509log.json", {
encoding: 'utf8',
fd: null
});
function read_next_line() {
var chunk;
var line = '';
// While this is a thing we can do, assign chunk
while ((chunk = readable.read(1)) !== null) {
// If chunk is a newline character, return the line
if (chunk === '\n'){
return JSON.parse(line);
} else {
line += chunk;
}
}
return false;
}
var lines = [];
var nextline;
const processEntry = () => {
// If lines is empty, read a line
if (lines.length === 0) lines.push(read_next_line());
// Quit here if we've reached the last line
if ((nextline = read_next_line()) == false) return true;
// Else push the just read line into our array
lines.push(nextline);
// Get the time difference in milliseconds
var delay = Number(lines[1].time - lines[0].time) * 1000;
// Remove the first line
lines.shift();
module.exports.emit('data', lines[0]);
// Repeat after the calculated delay
setTimeout(processEntry, delay);
}
var ready_to_start = false;
// When the stream becomes readable, allow starting
readable.on('readable', function() {
ready_to_start = true;
});
module.exports = new EventEmitter;
module.exports.start = function() {
if (ready_to_start) processEntry();
if (!ready_to_start) return false;
}
Assuming you want to visualize the flight logs, you can use fs watch as below, to watch the log file for changes:
fs.watch('somefile', function (event, filename) {
console.log('event is: ' + event);
if (filename) {
console.log('filename provided: ' + filename);
} else {
console.log('filename not provided');
}
});
Code excerpt is from here. For more information on fs.watch() check out here
Then, for seamless update on frontend, you can setup a Websocket to your server where you watch the log file and send newly added row via that socket to frontend.
After you get the data in frontend you can visualize it there. While I haven't done any flight visualization project before, I've used D3js to visualize other stuff (sound, numerical data, metric analysis and etc.) couple of times and it did the job every time.

using async Node.JS to serve HTTP requests

I've successfully written a few nodejs HTTP handlers to serve data in response to an HTTP request. However, everything I've written has been using *Sync version of functions. I'm now quickly running into limitations of this approach.
I cannot figure out, however, how to properly use asynchronous functions in the HTTP request context. If I try an async call, processing quickly falls through and returns without giving the code a chance to process data.
What's the correct approach? I haven't been able to find any good examples, so any pointers to literature are appreciated. Short of that, what's an example of a handler for a GET request that scans a local directory and, say, returns a json list of file names and corresponding number of lines (or really any stub code of the above that shows the proper technique).
Here's a simple sample:
var http = require('http')
var fs = require('fs')
function dir (req, res) {
fs.readdir('.', function (error, files) {
if (error) {
res.writeHead(500)
res.end(error.message)
return
}
files.forEach(function (file) {
res.write(file + '\n')
})
res.end()
})
}
var server = http.createServer(dir)
server.listen(7000)
Run with node server.js and test it with curl :7000.
Yes the request handler returns before the readdir callback is executed. That is by design. That's how async programming works. It's OK. when the filesystem IO is done, the callback will execute and the response will be sent.
Peter Lyons' answer is great/correct. I'm going to expand on it a bit and suggest a different method of synchronization using promises and co as well as nested/looping asynchronicity.
/* Script to count all lines of a file */
const co = require("co");
// Promisifed fs -- eventually node will support this on its own
const fs = require("mz/fs");
const rootDir = 'files/';
// Recursivey count the lines of all files in the given directory and sum them
function countLines(directory) {
// We can only use `yield` inside a generator block
// `co` allows us to do this and walks through the generator on its own
// `yield` will not move to the next line until the promise resolves
//
// This is still asynchronous code but it is written in a way
// that makes it look synchronized. This entire block is asynchronous, so we
// can `countLines` of multiple directories simultaneously
return co(function* () {
// `files` will be an array of files in the given directory
const files = yield fs.readdir(directory);
// `.map` will create an array of promises. `yield` only completes when
// *all* promises in the array have resolved
const lines = yield files.map(file => countFileLines(file, directory));
// Sum the lines of all files in this directory
return lines.reduce((a, b) => a + b, 0);
});
}
function countFileLines(file, directory) {
// We need the full path to read the file
const fullPath = `${directory}/${file}`;
// `co` returns a promise, so `co` itself can be yielded
// This entire block is asynchronous so we should be able to count lines
// of files without waiting for each file to be read
return co(function* () {
// Used to check whether this file is a directory
const stats = yield fs.stat(fullPath);
if (stats.isDirectory()) {
// If it is, recursively count lines of this directory
return countLines(fullPath);
}
// Otherwise just get the line count of the file
const contents = yield fs.readFile(fullPath, "utf8");
return contents.split("\n").length - 1;
});
}
co(function* () {
console.log(yield countLines(rootDir));
})
// All errors propagate here
.catch(err => console.error(err.stack));
Note that this is just an example. There are probably already libraries to count lines of files in a directory and there are definitely libraries that simplify recursive reading/globbing of files.

Perform arbitrary set of asynchronous tasks

My input is streamed from another source, which makes it difficult to use async.forEach. I am pulling data from an API endpoint, but I have a limit of 1000 objects per request to the endpoint, and I need to get hundreds of thousands of them (basically all of them) and I will know they're finished when the response contains < 1000 objects. Now, I have tried this approach:
/* List all deposits */
var depositsAll = [];
var depositsIteration = [];
async.doWhilst(this._post(endpoint_path, function (err, response) {
// check err
/* Loop through the data and gather only the deposits */
for (var key in response) {
//do some stuff
}
depositsAll += depositsIteration;
return callback(null, depositsAll);
}, {limit: 1000, offset: 0, sort: 'desc'}),
response.length > 1000, function (err, depositsAll) {
// check for err
// return the complete result
return callback(null, depositsAll);
});
With this code I get an async internal error that iterator is not a function. But in general I am almost sure the logic is not correct as well.
If it's not clear what I'm trying to achieve - I need to perform a request multiple times, and add the response data to a result that at the end contains all the results, so I can return it. And I need to perform requests until the response contains less than 1000 objects.
I also looked into async.queue but could not get the hang of it...
Any ideas?
You should be able to do it like that, but if that example is from your real code you have misunderstood some of how async works. doWhilst takes three arguments, each of them being a function:
The function to be called by async. Gets argument callback that must be called. In your case, you need to wrap this._post inside another function.
The test function (you would give value of response.length > 1000, ie. a boolean, if response would be defined)
The final function to be called once execution is stopped
Example with each needed function separated for readability:
var depositsAll = [];
var responseLength = 1000;
var self = this;
var post = function(asyncCb) {
self._post(endpoint_path, function(err, res) {
...
responseLength = res.length;
asyncCb(err, depositsAll);
});
}
var check = function() {
return responseLength >= 1000;
};
var done = function(err, deposits) {
console.log(deposits);
};
async.doWhilst(post, check, done);

nodeJS too many child processes?

I am using node to recursively traverse a file system and make a system call for each file, by using child.exec. It works well when tested on a small structure, with a couple of folders and files, but when run on the whole home directory, it crashes after a while
child_process.js:945
throw errnoException(process._errno, 'spawn');
^
Error: spawn Unknown system errno 23
at errnoException (child_process.js:998:11)
at ChildProcess.spawn (child_process.js:945:11)
at exports.spawn (child_process.js:733:9)
at Object.exports.execFile (child_process.js:617:15)
at exports.exec (child_process.js:588:18)
Does this happen because it uses up all resources? How can I avoid this?
EDIT: Code
improvement and best practices suggestions always welcome :)
function processDir(dir, callback) {
fs.readdir(dir, function (err, files) {
if (err) {...}
if (files) {
async.each(files, function (file, cb) {
var filePath = dir + "/" + file;
var stats = fs.statSync(filePath);
if (stats) {
if (stats.isFile()) {
processFile(dir, file, function (err) {
if (err) {...}
cb();
});
} else if (stats.isDirectory()) {
processDir(filePath, function (err) {
if (err) {...}
cb();
});
}
}
}, function (err) {
if (err) {...}
callback();
}
);
}
});
}
the issue can be because of having many open files simultaneously
consider using async module to solve the issue
https://github.com/caolan/async#eachLimit
async.eachLimit(
files,
20,
function(file, callback){
//process file here and call callback
},
function(err){
//done
}
);
in current example you will process 20 files at a time
Well, I don't know the reason for the failure, but if this is what you expect (using up all of the resources) or as others say (too many files open), you could try to use multitasking for it. JXcore (fork of Node.JS) offers such thing - it allows to run a task in a separate instance, but this is done still inside one single process.
While Node.JS app as a process has its limitations - JXcore with its sub-instances multiplies those limits: single process even with one extra instance (or task, or well: we can call it sub-thread) doubles the limits!
So, let's say, that you will run each of your spawn() in a separate task. Or, since tasks are not running in a main thread any more - you can even use sync method that jxcore offers : cmdSync().
Probably the the best illustration would be given by this few lines of the code:
jxcore.tasks.setThreadCount(4);
var task = function(file) {
var your_cmd = "do something with " + file;
return jxcore.utils.cmdSync(your_cmd);
};
jxcore.tasks.addTask(task, "file1.txt", function(ret) {
console.log("the exit code:", ret.exitCode);
console.log("output:", ret.out);
});
Let me repeat: the task will not block the main thread, since it is running in a separate instance!
Multitasking API is documented here: Multitasking.
As has been established in comments, you are likely running out of file handles because you are running too many concurrent operations on your files. So, a solution is to limit the number of concurrent operations that run at once so too many files aren't in use at the same time.
Here's a somewhat different implementation that uses Bluebird promises to control both the async aspects of the operation and the concurrency aspects of the operation.
To make the management of the concurrency aspect easier, this collects the entire list of files into an array first and then processes the array of filenames rather than processing as you go. This makes it easier to use a built-in concurrency capability in Bluebird's .map() (which works on a single array) so we don't have to write that code ourselves:
var Promise = require("bluebird");
var fs = Promise.promisifyAll(require("fs"));
var path = require("path");
// recurse a directory, call a callback on each file (that returns a promise)
// run a max of numConcurrent callbacks at once
// returns a promise for when all work is done
function processDir(dir, numConcurrent, fileCallback) {
var allFiles = [];
function listDir(dir, list) {
var dirs = [];
return fs.readdirAsync(dir).map(function(file) {
var filePath = path.join(dir , file);
return fs.statAsync(filePath).then(function(stats) {
if (stats.isFile()) {
allFiles.push(filePath);
} else if (stats.isDirectory()) {
return listDir(filePath);
}
}).catch(function() {
// ignore errors on .stat - file could just be gone now
return;
});
});
}
return listDir(dir, allFiles).then(function() {
return Promise.map(allFiles, function(filename) {
return fileCallback(filename);
}, {concurrency: numConcurrent});
});
}
// example usage:
// pass the initial directory,
// the number of concurrent operations allowed at once
// and a callback function (that returns a promise) to process each file
processDir(process.cwd(), 5, function(file) {
// put your own code here to process each file
// this is code to cause each callback to take a random amount of time
// for testing purposes
var rand = Math.floor(Math.random() * 500) + 500;
return Promise.delay(rand).then(function() {
console.log(file);
});
}).catch(function(e) {
// error here
}).finally(function() {
console.log("done");
});
FYI, I think you'll find that proper error propagation and proper error handling from many async operations is much, much easier with promises than the plain callback method.

fs.watch fired twice when I change the watched file

fs.watch( 'example.xml', function ( curr, prev ) {
// on file change we can read the new xml
fs.readFile( 'example.xml','utf8', function ( err, data ) {
if ( err ) throw err;
console.dir(data);
console.log('Done');
});
});
OUTPUT:
some data
Done X 1
some data
Done X 2
It is my usage fault or ..?
The fs.watch api:
is unstable
has known "behaviour" with regards repeated notifications. Specifically, the windows case being a result of windows design, where a single file modification can be multiple calls to the windows API
I make allowance for this by doing the following:
var fsTimeout
fs.watch('file.js', function(e) {
if (!fsTimeout) {
console.log('file.js %s event', e)
fsTimeout = setTimeout(function() { fsTimeout=null }, 5000) // give 5 seconds for multiple events
}
}
I suggest to work with chokidar (https://github.com/paulmillr/chokidar) which is much better than fs.watch:
Commenting its README.md:
Node.js fs.watch:
Doesn't report filenames on OS X.
Doesn't report events at all when using editors like Sublime on OS X.
Often reports events twice.
Emits most changes as rename.
Has a lot of other issues
Does not provide an easy way to recursively watch file trees.
Node.js fs.watchFile:
Almost as bad at event handling.
Also does not provide any recursive watching.
Results in high CPU utilization.
If you need to watch your file for changes then you can check out my small library on-file-change. It checks file sha1 hash between fired change events.
Explanation of why we have multiple fired events:
You may notice in certain situations that a single creation event generates multiple Created events that are handled by your component. For example, if you use a FileSystemWatcher component to monitor the creation of new files in a directory, and then test it by using Notepad to create a file, you may see two Created events generated even though only a single file was created. This is because Notepad performs multiple file system actions during the writing process. Notepad writes to the disk in batches that create the content of the file and then the file attributes. Other applications may perform in the same manner. Because FileSystemWatcher monitors the operating system activities, all events that these applications fire will be picked up.
Source
My custom solution
I personally like using return to prevent a block of code to run when checking something, so, here is my method:
var watching = false;
fs.watch('./file.txt', () => {
if(watching) return;
watching = true;
// do something
// the timeout is to prevent the script to run twice with short functions
// the delay can be longer to disable the function for a set time
setTimeout(() => {
watching = false;
}, 100);
};
Feel free to use this example to simplify your code. It may NOT be better than using a module from others, but it works pretty well!
Similar/same problem. I needed to do some stuff with images when they were added to a directory. Here's how I dealt with the double firing:
var fs = require('fs');
var working = false;
fs.watch('directory', function (event, filename) {
if (filename && event == 'change' && active == false) {
active = true;
//do stuff to the new file added
active = false;
});
It will ignore the second firing until if finishes what it has to do with the new file.
I'm dealing with this issue for the first time, so all of the answers so far are probably better than my solution, however none of them were 100% suitable for my case so I came up with something slightly different – I used a XOR operation to flip an integer between 0 and 1, effectively keeping track of and ignoring every second event on the file:
var targetFile = "./watchThis.txt";
var flippyBit = 0;
fs.watch(targetFile, {persistent: true}, function(event, filename) {
if (event == 'change'){
if (!flippyBit) {
var data = fs.readFile(targetFile, "utf8", function(error, data) {
gotUpdate(data);
})
} else {
console.log("Doing nothing thanks to flippybit.");
}
flipBit(); // call flipBit() function
}
});
// Whatever we want to do when we see a change
function gotUpdate(data) {
console.log("Got some fresh data:");
console.log(data);
}
// Toggling this gives us the "every second update" functionality
function flipBit() {
flippyBit = flippyBit ^ 1;
}
I didn't want to use a time-related function (like jwymanm's answer) because the file I'm watching could hypothetically get legitimate updates very frequently. And I didn't want to use a list of watched files like Erik P suggests, because I'm only watching one file. Jan Święcki's solution seemed like overkill, as I'm working on extremely short and simple files in a low-power environment. Lastly, Bernado's answer made me a little nervous – it would only ignore the second update if it arrived before I'd finished processing the first, and I can't handle that kind of uncertainty. If anyone were to find themselves in this very specific scenario, there might be some merit to the approach I used? If there's anything massively wrong with it please do let me know/edit this answer, but so far it seems to work well?
NOTE: Obviously this strongly assumes that you'll get exactly 2 events per real change. I carefully tested this assumption, obviously, and learned its limitations. So far I've confirmed that:
Modifying a file in Atom editor and saving triggers 2 updates
touch triggers 2 updates
Output redirection via > (overwriting file contents) triggers 2 updates
Appending via >> sometimes triggers 1 update!*
I can think of perfectly good reasons for the differing behaviours but we don't need to know why something is happening to plan for it – I just wanted to stress that you'll want to check for yourself in your own environment and in the context of your own use cases (duh) and not trust a self-confessed idiot on the internet. That being said, with precautions taken I haven't had any weirdness so far.
* Full disclosure, I don't actually know why this is happening, but we're already dealing with unpredictable behaviour with the watch() function so what's a little more uncertainty? For anyone following along at home, more rapid appends to a file seem to cause it to stop double-updating but honestly, I don't really know, and I'm comfortable with the behaviour of this solution in the actual case it'll be used, which is a one-line file that will be updated (contents replaced) like twice per second at the fastest.
first is change and the second is rename
we can make a difference from the listener function
function(event, filename) {
}
The listener callback gets two arguments (event, filename). event is either 'rename' or 'change', and filename is the name of the file which triggered the event.
// rm sourcefile targetfile
fs.watch( sourcefile_dir , function(event, targetfile)){
console.log( targetfile, 'is', event)
}
as a sourcefile is renamed as targetfile, it's will call three event as fact
null is rename // sourcefile not exist again
targetfile is rename
targetfile is change
notice that , if you want catch all these three evnet, watch the dir of sourcefile
I somtimes get multible registrations of the Watch event causing the Watch event to fire several times.
I solved it by keeping a list of watching files and avoid registering the event if the file allready is in the list:
var watchfiles = {};
function initwatch(fn, callback) {
if watchlist[fn] {
watchlist[fn] = true;
fs.watch(fn).on('change', callback);
}
}
......
Like others answers says... This got a lot of troubles, but i can deal with this in this way:
var folder = "/folder/path/";
var active = true; // flag control
fs.watch(folder, function (event, filename) {
if(event === 'rename' && active) { //you can remove this "check" event
active = false;
// ... its just an example
for (var i = 0; i < 100; i++) {
console.log(i);
}
// ... other stuffs and delete the file
if(!active){
try {
fs.unlinkSync(folder + filename);
} catch(err) {
console.log(err);
}
active = true
}
}
});
Hope can i help you...
Easiest solution:
const watch = (path, opt, fn) => {
var lock = false
fs.watch(path, opt, function () {
if (!lock) {
lock = true
fn()
setTimeout(() => lock = false, 1000)
}
})
}
watch('/path', { interval: 500 }, function () {
// ...
})
I was downloading file with puppeteer and once a file saved, I was sending automatic emails. Due to problem above, I noticed, I was sending 2 emails. I solved by stopping my application using process.exit() and auto-start with pm2. Using flags in code didn't saved me.
If anyone has this problem in future, one can use this solution as well. Exit from program and restart with monitor tools automatically.
Here's my simple solution. It works well every time.
// Update obj as file updates
obj = JSON.parse(fs.readFileSync('./file.json', 'utf-8'));
fs.watch('./file.json', () => {
const data = JSON.parse(fs.readFileSync('./file.json', 'utf-8') || '{}');
if(Object.entries(data).length > 0) { // This checks fs.watch() isn't false-firing
obj = data;
console.log('File actually changed: ', obj)
}
});
I came across the same issue. If you don't want to trigger multiple times, you can use a debounce function.
fs.watch( 'example.xml', _.debounce(function ( curr, prev ) {
// on file change we can read the new xml
fs.readFile( 'example.xml','utf8', function ( err, data ) {
if ( err ) throw err;
console.dir(data);
console.log('Done');
});
}, 100));
Debouncing The Observer
A solution I arrived at was that (a) there needs to be a workaround for the problem in question and, (b), there needs to be a solution to ensure multiple rapid Ctrl+s actions do not cause Race Conditions. Here's what I have...
./**/utilities.js (somewhere)
export default {
...
debounce(fn, delay) { // #thxRemySharp https://remysharp.com/2010/07/21/throttling-function-calls/
var timer = null;
return function execute(...args) {
var context = this;
clearTimeout(timer);
timer = setTimeout(fn.bind(context, ...args), delay);
};
},
...
};
./**/file.js (elsewhere)
import utilities from './**/utilities.js'; // somewhere
...
function watch(server) {
const debounced = utilities.debounce(observeFilesystem.bind(this, server), 1000 * 0.25);
const observers = new Set()
.add( fs.watch('./src', debounced) )
.add( fs.watch('./index.html', debounced) )
;
console.log(`watching... (${observers.size})`);
return observers;
}
function observeFilesystem(server, type, filename) {
if (!filename) console.warn(`Tranfer Dev Therver: filesystem observation made without filename for type ${type}`);
console.log(`Filesystem event occurred:`, type, filename);
server.close(handleClose);
}
...
This way, the observation-handler that we pass into fs.watch is [in this case a bound bunction] which gets debounced if multiple calls are made less than 1000 * 0.25 seconds (250ms) apart from one another.
It may be worth noting that I have also devised a pipeline of Promises to help avoid other types of Race Conditions as the code also leverages other callbacks. Please also note the attribution to Remy Sharp whose debounce function has repeatedly proven very useful over the years.
watcher = fs.watch( 'example.xml', function ( curr, prev ) {
watcher.close();
fs.readFile( 'example.xml','utf8', function ( err, data ) {
if ( err ) throw err;
console.dir(data);
console.log('Done');
});
});
I had similar similar problem but I was also reading the file in the callback which caused a loop.
This is where I found how to close watcher:
How to close fs.watch listener for a folder
NodeJS does not fire multiple events for a single change, it is the editor you are using updating the file multiple times.
Editors use stream API for efficiency, they read and write data in chunks which causes multiple updates depending on the chunks size and the amount of content. Here is a snippet to test if fs.watch fires multiple events:
const http = require('http');
const fs = require('fs');
const path = require('path');
const host = 'localhost';
const port = 3000;
const file = path.join(__dirname, 'config.json');
const requestListener = function (req, res) {
const data = new Date().toString();
fs.writeFileSync(file, data, { encoding: 'utf-8' });
res.end(data);
};
const server = http.createServer(requestListener);
server.listen(port, host, () => {
fs.watch(file, (eventType, filename) => {
console.log({ eventType });
});
console.log(`Server is running on http://${host}:${port}`);
});
I believe a simple solution would be checking for the last modified timestamp:
let lastModified;
fs.watch(file, (eventType, filename) => {
stat(file).then(({ mtimeMs }) => {
if (lastModified !== mtimeMs) {
lastModified = mtimeMs;
console.log({ eventType, filename });
}
});
});
Please note that you need to use all-sync or all-async methods otherwise you will have issues:
Update the file in a editor, you will see only single event is logged:
const http = require('http');
const host = 'localhost';
const port = 3000;
const fs = require('fs');
const path = require('path');
const file = path.join(__dirname, 'config.json');
let lastModified;
const requestListener = function (req, res) {
const data = Date.now().toString();
fs.writeFileSync(file, data, { encoding: 'utf-8' });
lastModified = fs.statSync(file).mtimeMs;
res.end(data);
};
const server = http.createServer(requestListener);
server.listen(port, host, () => {
fs.watch(file, (eventType, filename) => {
const mtimeMs = fs.statSync(file).mtimeMs;
if (lastModified !== mtimeMs) {
lastModified = mtimeMs;
console.log({ eventType });
}
});
console.log(`Server is running on http://${host}:${port}`);
});
Few notes on the alternative solutions: Storing files for comparison will be memory inefficient especially if you have large files, taking file hashes will be expensive, custom flags are hard to keep track of, especially if you are going to detect changes made by other applications, and lastly unsubscribing and re-subscribing requires unnecessary juggling.
If you don't need an instant result, you can use setTimout to debounce successive events:
let timeoutId;
fs.watch(file, (eventType, filename) => {
clearTimeout(timeoutId);
timeoutId = setTimeout(() => {
console.log({ eventType });
}, 100);
});

Resources