Nodejs write multiple dynamically changing files with fs writefile - node.js

I need to write multiple dynamically changing files based on an array consisting of objects passed to a custom writeData() function. This array consists of objects containing the file name and the data to write as shown below:
[
{
file_name: "example.json",
dataObj,
},
{
file_name: "example2.json",
dataObj,
},
{
file_name: "example3.json",
dataObj,
},
{
file_name: "example4.json",
dataObj,
},
];
My current method is to map this array and read + write new data to each file:
array.map((entry) => {
fs.readFile(
entry.file_name,
"utf8",
(err, unparsedData) => {
if (err) console.log(err);
else {
var parsedData = JSON.parse(unparsedData);
parsedData.data.push(entry.dataObj);
const parsedDataJSON = JSON.stringify(parsedData, null, 2);
fs.writeFile(
entry.file_name,
parsedDataJSON,
"utf8",
(err) => {
if (err) console.log(err);
}
);
}
}
);
});
This however, does not work. Only a small percent of data is written to these files and often times the file is not correctly written in json format (I think this is because two writeFile processes are writing to the same file at once and that breaks the file). Obviously this does not work the way I expected it to.
The multiple ways I have tried to resolve this problem consisted of attempting to make the fs.writeFile synchronous (delay the map loop, allowing each process to finish before moving to the next entry), but this is not a good practice as synchronous processes hang up the entire app. I have also looked into implementing promises but to no avail. I am a new learner to nodejs so apologies for missed details/information. Any help is appreciated!

The same file is often listed multiple times in the array if that changes anything.
Well, that changes everything. You should have shown that in the original question. If that is the case, then you have to sequence each individual file in the loop so it finishes one before advancing to the next. To prevent conflicts between writing to the same file, you have to assure yourself of two things:
You sequence each of the files in the loop so the next one doesn't start until the previous one is done.
You don't call this code again while its still in operation.
You can assure yourself of the first item like this:
async function processFiles(array) {
for (let entry of array) {
const unparsedData = await fs.promises.readFile(entry.file_name, "utf8");
const parsedData = JSON.parse(unparsedData);
parsedData.data.push(entry.dataObj);
const json = JSON.stringify(parsedData, null, 2);
await fs.promise.writeFile(entry.file_name, json, "utf8");
}
}
This will abort the loop if it gets an error on any of them. If you want it to continue to write the others, you can add a try/catch internally:
async function processFiles(array) {
let firstError;
for (let entry of array) {
try {
const unparsedData = await fs.promises.readFile(entry.file_name, "utf8");
const parsedData = JSON.parse(unparsedData);
parsedData.data.push(entry.dataObj);
const json = JSON.stringify(parsedData, null, 2);
await fs.promise.writeFile(entry.file_name, json, "utf8");
} catch (e) {
// log error and continue with the rest of the loop
if (!firstError) {
firstError = e;
}
console.log(e);
}
}
// make sure we communicate back any error that happened
if (firstError) {
throw firstError;
}
}
To assure yourself of the second point above, you will have to either not use a setInterval() (replace it with a setTimeout() that you set when the promise that processFiles()resolves or make absolutely sure that the setInterval() time is long enough that it will never fire before processFiles() is done.
Also, make absolutely sure that you are not modifying the array used in this function while that function is running.

Related

Why await within async function doesn't work for fs modules?

I am trying to read a sample.json file through my js code. First my program checks for sample.json within every folder in the specified path. And it reads the sample.json if available and fetches the data. But the await used doesn't work as expected and simply passes the empty object to the calling function before the async functions completes it execution. I have attached the image for the issue.
async function getAvailableJson(filesPath) {
let detectedJson = {};
let folders = await fs.promises.readdir(filesPath);
folders.forEach(async function(folder) {
await fs.promises.access(path.join(filesPath, folder, "Sample.json")).then(async function() {
jsonData = await fs.promises.readFile(path.join(filesPath, folder ,"Sample.json"))
const directory = JSON.parse(jsonData)
const hashvalue = Hash.MD5(jsonData)
detectedJson[directory["dirName"]] = {
name: directory["dirName"],
version: directory["dirVersion"],
hash: hashvalue
}
console.log(detectedJson);
}).catch(function(err) {
if(err.code === "ENOENT")
{}
});
});
return detectedJson;
}
I don't want to use any sync functions since it creates unnecessary locks. I have also tried with fs.readdir, fs.access and fs.readFile functions. Could someone point out what I am doing wrong here since I am new to Node.js thanks in advance.
Sample Image
Change your .forEach() to use for/of instead and generally simplify by not mixing await and .then().
async function getAvailableJson(filesPath) {
let detectedJson = {};
let folders = await fs.promises.readdir(filesPath);
let detectedJson = {};
for (let folder of folders) {
let file = path.join(filesPath, folder, "Sample.json");
try {
let jsonData = await fs.promises.readFile(file);
const directory = JSON.parse(jsonData);
const hashvalue = Hash.MD5(jsonData);
detectedJson[directory["dirName"]] = {
name: directory["dirName"],
version: directory["dirVersion"],
hash: hashvalue
};
} catch (err) {
// silently skip any directories that don't have sample.json in them
// otherwise, throw the error to stop further processing
if (err.code !== "ENOENT") {
console.log(`Error on file ${file}`, err);
throw err;
}
}
console.log(detectedJson);
}
return detectedJson;
}
Summary of Changes:
Replace .forEach() with for/of.
Remove .then() and use only await.
Remove .catch() and use only try/catch.
Remove call to fs.promises.access() since the error can just be handled on fs.promises.readFile()
Add logging if the error is not ENOENT so you can see what the error is and what file it's on. You pretty much never want to silently eat an error with no logging. Though you may want to skip some particular errors, others must be logged. Rethrow errors that are not ENOENT so the caller will see them.
Declare and initialize all variables in use here as local variables.
.forEach() is not promise-aware so using await inside it does not pause the outer function at all. Instead, use a for/of loop which doesn't create the extra function scope and will allow await to pause the parent function.
Also, I consider .forEach() to be pretty much obsolete these days. It's not promise-aware. for/of is a more efficient and more generic way to iterate. And, there's no longer a need to create a new function scope using the .forEach() callback because we have block-scoped variables with let and const. I don't use it any more.
Also, I see no reason why you're preflighting things with fs.promises.access(). That just creates a race condition and you may as well just handle whatever error you get from fs.promises.readFile() as that will do the same thing without the race condition.
See also a related answer on a similar issue.

Correct way to organise this process in Node

I need some advice on how to structure this function as at the moment it is not happening in the correct order due to node being asynchronous.
This is the flow I want to achieve; I don't need help with the code itself but with the order to achieve the end results and any suggestions on how to make it efficient
Node routes a GET request to my controller.
Controller reads a .csv file on local system and opens a read stream using fs module
Then use csv-parse module to convert that to an array line by line (many 100,000's of lines)
Start a try/catch block
With the current row from the csv, take a value and try to find it in a MongoDB
If found, take the ID and store the line from the CSV and this id as a foreign ID in a separate database
If not found, create an entry into the DB and take the new ID and then do 6.
Print out to terminal the row number being worked on (ideally at some point I would like to be able to send this value to the page and have it update like a progress bar as the rows are completed)
Here is a small part of the code structure that I am currently using;
const fs = require('fs');
const parse = require('csv-parse');
function addDataOne(req, id) {
const modelOneInstance = new InstanceOne({ ...code });
const resultOne = modelOneInstance.save();
return resultOne;
}
function addDataTwo(req, id) {
const modelTwoInstance = new InstanceTwo({ ...code });
const resultTwo = modelTwoInstance.save();
return resultTwo;
}
exports.add_data = (req, res) => {
const fileSys = 'public/data/';
const parsedData = [];
let i = 0;
fs.createReadStream(`${fileSys}${req.query.file}`)
.pipe(parse({}))
.on('data', (dataRow) => {
let RowObj = {
one: dataRow[0],
two: dataRow[1],
three: dataRow[2],
etc,
etc
};
try {
ModelOne.find(
{ propertyone: RowObj.one, propertytwo: RowObj.two },
'_id, foreign_id'
).exec((err, searchProp) => {
if (err) {
console.log(err);
} else {
if (searchProp.length > 1) {
console.log('too many returned from find function');
}
if (searchProp.length === 1) {
addDataOne(RowObj, searchProp[0]).then((result) => {
searchProp[0].foreign_id.push(result._id);
searchProp[0].save();
});
}
if (searchProp.length === 0) {
let resultAddProp = null;
addDataTwo(RowObj).then((result) => {
resultAddProp = result;
addDataOne(req, resultAddProp._id).then((result) => {
resultAddProp.foreign_id.push(result._id);
resultAddProp.save();
});
});
}
}
});
} catch (error) {
console.log(error);
}
i++;
let iString = i.toString();
process.stdout.clearLine();
process.stdout.cursorTo(0);
process.stdout.write(iString);
})
.on('end', () => {
res.send('added');
});
};
I have tried to make the functions use async/await but it seems to conflict with the fs.openReadStream or csv parse functionality, probably due to my inexperience and lack of correct use of code...
I appreciate that this is a long question about the fundamentals of the code but just some tips/advice/pointers on how to get this going would be appreciated. I had it working when the data was sent one at a time via a post request from postman but can't implement the next stage which is to read from the csv file which contains many records
First of all you can make the following checks into one query:
if (searchProp.length === 1) {
if (searchProp.length === 0) {
Use upsert option in mongodb findOneAndUpdate query to update or upsert.
Secondly don't do this in main thread. Use a queue mechanism it will be much more efficient.
Queue which I personally use is Bull Queue.
https://github.com/OptimalBits/bull#basic-usage
This also provides the functionality you need of showing progress.
Also regarding using Async Await with ReadStream, a lot of example can be found on net such as : https://humanwhocodes.com/snippets/2019/05/nodejs-read-stream-promise/

converting promiseAll to gradual promises resolve(every 3promises for example) does not work

I have a list of promises and currently I am using promiseAll to resolve them
Here is my code for now:
const pageFutures = myQuery.pages.map(async (pageNumber: number) => {
const urlObject: any = await this._service.getResultURL(searchRecord.details.id, authorization, pageNumber);
if (!urlObject.url) {
// throw error
}
const data = await rp.get({
gzip: true,
headers: {
"Accept-Encoding": "gzip,deflate",
},
json: true,
uri: `${urlObject.url}`,
})
const objects = data.objects.filter((object: any) => object.type === "observed-data" && object.created);
return new Promise((resolve, reject) => {
this._resultsDatastore.bulkInsert(
databaseName,
objects
).then(succ => {
resolve(succ)
}, err => {
reject(err)
})
})
})
const all: any = await Promise.all(pageFutures).catch(e => {
console.log(e)
})
So as you see here I use promise all and it works:
const all: any = await Promise.all(pageFutures).catch(e => {
console.log(e)
})
However I noticed it affects the database performance wise so I decided to resolve every 3 of them at a time.
for that I was thinking of different ways like cwait, async pool or wrting my own iterator
but I get confused on how to do that?
For example when I use cwait:
let promiseQueue = new TaskQueue(Promise,3);
const all=new Promise.map(pageFutures, promiseQueue.wrap(()=>{}));
I do not know what to pass inside the wrap so I pass ()=>{} for now plus I get
Property 'map' does not exist on type 'PromiseConstructor'.
So whatever way I can get it working(my own iterator or any library) I am ok with as far as I have a good understanding of it.
I appreciate if anyone can shed light on that and help me to get out of this confusion?
First some remarks:
Indeed, in your current setup, the database may have to process several bulk inserts concurrently. But that concurrency is not caused by using Promise.all. Even if you had left out Promise.all from your code, it would still have that behaviour. That is because the promises were already created, and so the database requests will be executed any way.
Not related to your issue, but don't use the promise constructor antipattern: there is no need to create a promise with new Promise when you already have a promise in your hands: bulkInsert() returns a promise, so return that one.
As your concern is about the database load, I would limit the work initiated by the pageFutures promises to the non-database aspects: they don't have to wait for eachother's resolution, so that code can stay like it was.
Let those promises resolve with what you currently store in objects: the data you want to have inserted. Then concatenate all those arrays together to one big array, and feed that to one database bulkInsert() call.
Here is how that could look:
const pageFutures = myQuery.pages.map(async (pageNumber: number) => {
const urlObject: any = await this._service.getResultURL(searchRecord.details.id,
authorization, pageNumber);
if (!urlObject.url) { // throw error }
const data = await rp.get({
gzip: true,
headers: { "Accept-Encoding": "gzip,deflate" },
json: true,
uri: `${urlObject.url}`,
});
// Return here, don't access the database yet...
return data.objects.filter((object: any) => object.type === "observed-data"
&& object.created);
});
const all: any = await Promise.all(pageFutures).catch(e => {
console.log(e);
return []; // in case of error, still return an array
}).flat(); // flatten it, so all data chunks are concatenated in one long array
// Don't create a new Promise with `new`, only to wrap an other promise.
// It is an antipattern. Use the promise returned by `bulkInsert`
return this._resultsDatastore.bulkInsert(databaseName, objects);
This uses .flat() which is rather new. In case you have no support for it, look at the alternatives provided on mdn.
First, you asked a question about a failing solution attempt. That is called X/Y problem.
So in fact, as I understand your question, you want to delay some DB request.
You don't want to delay the resolving of a Promise created by a DB request... Like No! Don't try that! The promise wil resolve when the DB will return a result. It's a bad idea to interfere with that process.
I banged my head a while with the library you tried... But I could not do anything to solve your issue with it. So I came with the idea of just looping the data and setting some timeouts.
I made a runnable demo here: Delaying DB request in small batch
Here is the code. Notice that I simulated some data and a DB request. You will have to adapt it. You also will have to adjust the timeout delay. A full second certainly is too long.
// That part is to simulate some data you would like to save.
// Let's make it a random amount for fun.
let howMuch = Math.ceil(Math.random()*20)
// A fake data array...
let someData = []
for(let i=0; i<howMuch; i++){
someData.push("Data #"+i)
}
console.log("Some feak data")
console.log(someData)
console.log("")
// So we have some data that look real. (lol)
// We want to save it by small group
// And that is to simulate your DB request.
let saveToDB = (data, dataIterator) => {
console.log("Requesting DB...")
return new Promise(function(resolve, reject) {
resolve("Request #"+dataIterator+" complete.");
})
}
// Ok, we have everything. Let's proceed!
let batchSize = 3 // The amount of request to do at once.
let delay = 1000 // The delay between each batch.
// Loop through all the data you have.
for(let i=0;i<someData.length;i++){
if(i%batchSize == 0){
console.log("Splitting in batch...")
// Process a batch on one timeout.
let timeout = setTimeout(() => {
// An empty line to clarify the console.
console.log("")
// Grouping the request by the "batchSize" or less if we're almost done.
for(let j=0;j<batchSize;j++){
// If there still is data to process.
if(i+j < someData.length){
// Your real database request goes here.
saveToDB(someData[i+j], i+j).then(result=>{
console.log(result)
// Do something with the result.
// ...
})
} // END if there is still data.
} // END sending requests for that batch.
},delay*i) // Timeout delay.
} // END splitting in batch.
} // END for each data.

Fs.writeFile callback not called

Node version: 8.11.2
I have a simple CSV export function that takes an array of objects, and generates the headers of the file based on the object properties of the objects.
const exportCsv = (list, fileName) => {
if (list.length > 0) {
let headers = Object.keys(list[0]);
let opts = { headers };
let parser = new Parser(opts);
let csv = parser.parse(list);
fs.writeFile(`./output/${fileName}.csv`, csv, err => {
if (err) {
console.error(err);
}
console.log(`Wrote ${fileName} to disk.`);
});
} else {
console.log('List is Empty. Nothing to export.');
}
};
It was working great, but now the call back in the fs.writeFile call isn't firing, and there are no errors or exceptions from VS Code's Debugger.
What would cause it to nut run?
If process is dead before the writing path is done, your callback will not be called because it is an asynchronous.
So you have 3 options
make sure your process is not dead before writing path is done (you can use async/await or promise)
use writeFileSync instead (less effective but less confusing)

fs.watch fired twice when I change the watched file

fs.watch( 'example.xml', function ( curr, prev ) {
// on file change we can read the new xml
fs.readFile( 'example.xml','utf8', function ( err, data ) {
if ( err ) throw err;
console.dir(data);
console.log('Done');
});
});
OUTPUT:
some data
Done X 1
some data
Done X 2
It is my usage fault or ..?
The fs.watch api:
is unstable
has known "behaviour" with regards repeated notifications. Specifically, the windows case being a result of windows design, where a single file modification can be multiple calls to the windows API
I make allowance for this by doing the following:
var fsTimeout
fs.watch('file.js', function(e) {
if (!fsTimeout) {
console.log('file.js %s event', e)
fsTimeout = setTimeout(function() { fsTimeout=null }, 5000) // give 5 seconds for multiple events
}
}
I suggest to work with chokidar (https://github.com/paulmillr/chokidar) which is much better than fs.watch:
Commenting its README.md:
Node.js fs.watch:
Doesn't report filenames on OS X.
Doesn't report events at all when using editors like Sublime on OS X.
Often reports events twice.
Emits most changes as rename.
Has a lot of other issues
Does not provide an easy way to recursively watch file trees.
Node.js fs.watchFile:
Almost as bad at event handling.
Also does not provide any recursive watching.
Results in high CPU utilization.
If you need to watch your file for changes then you can check out my small library on-file-change. It checks file sha1 hash between fired change events.
Explanation of why we have multiple fired events:
You may notice in certain situations that a single creation event generates multiple Created events that are handled by your component. For example, if you use a FileSystemWatcher component to monitor the creation of new files in a directory, and then test it by using Notepad to create a file, you may see two Created events generated even though only a single file was created. This is because Notepad performs multiple file system actions during the writing process. Notepad writes to the disk in batches that create the content of the file and then the file attributes. Other applications may perform in the same manner. Because FileSystemWatcher monitors the operating system activities, all events that these applications fire will be picked up.
Source
My custom solution
I personally like using return to prevent a block of code to run when checking something, so, here is my method:
var watching = false;
fs.watch('./file.txt', () => {
if(watching) return;
watching = true;
// do something
// the timeout is to prevent the script to run twice with short functions
// the delay can be longer to disable the function for a set time
setTimeout(() => {
watching = false;
}, 100);
};
Feel free to use this example to simplify your code. It may NOT be better than using a module from others, but it works pretty well!
Similar/same problem. I needed to do some stuff with images when they were added to a directory. Here's how I dealt with the double firing:
var fs = require('fs');
var working = false;
fs.watch('directory', function (event, filename) {
if (filename && event == 'change' && active == false) {
active = true;
//do stuff to the new file added
active = false;
});
It will ignore the second firing until if finishes what it has to do with the new file.
I'm dealing with this issue for the first time, so all of the answers so far are probably better than my solution, however none of them were 100% suitable for my case so I came up with something slightly different – I used a XOR operation to flip an integer between 0 and 1, effectively keeping track of and ignoring every second event on the file:
var targetFile = "./watchThis.txt";
var flippyBit = 0;
fs.watch(targetFile, {persistent: true}, function(event, filename) {
if (event == 'change'){
if (!flippyBit) {
var data = fs.readFile(targetFile, "utf8", function(error, data) {
gotUpdate(data);
})
} else {
console.log("Doing nothing thanks to flippybit.");
}
flipBit(); // call flipBit() function
}
});
// Whatever we want to do when we see a change
function gotUpdate(data) {
console.log("Got some fresh data:");
console.log(data);
}
// Toggling this gives us the "every second update" functionality
function flipBit() {
flippyBit = flippyBit ^ 1;
}
I didn't want to use a time-related function (like jwymanm's answer) because the file I'm watching could hypothetically get legitimate updates very frequently. And I didn't want to use a list of watched files like Erik P suggests, because I'm only watching one file. Jan Święcki's solution seemed like overkill, as I'm working on extremely short and simple files in a low-power environment. Lastly, Bernado's answer made me a little nervous – it would only ignore the second update if it arrived before I'd finished processing the first, and I can't handle that kind of uncertainty. If anyone were to find themselves in this very specific scenario, there might be some merit to the approach I used? If there's anything massively wrong with it please do let me know/edit this answer, but so far it seems to work well?
NOTE: Obviously this strongly assumes that you'll get exactly 2 events per real change. I carefully tested this assumption, obviously, and learned its limitations. So far I've confirmed that:
Modifying a file in Atom editor and saving triggers 2 updates
touch triggers 2 updates
Output redirection via > (overwriting file contents) triggers 2 updates
Appending via >> sometimes triggers 1 update!*
I can think of perfectly good reasons for the differing behaviours but we don't need to know why something is happening to plan for it – I just wanted to stress that you'll want to check for yourself in your own environment and in the context of your own use cases (duh) and not trust a self-confessed idiot on the internet. That being said, with precautions taken I haven't had any weirdness so far.
* Full disclosure, I don't actually know why this is happening, but we're already dealing with unpredictable behaviour with the watch() function so what's a little more uncertainty? For anyone following along at home, more rapid appends to a file seem to cause it to stop double-updating but honestly, I don't really know, and I'm comfortable with the behaviour of this solution in the actual case it'll be used, which is a one-line file that will be updated (contents replaced) like twice per second at the fastest.
first is change and the second is rename
we can make a difference from the listener function
function(event, filename) {
}
The listener callback gets two arguments (event, filename). event is either 'rename' or 'change', and filename is the name of the file which triggered the event.
// rm sourcefile targetfile
fs.watch( sourcefile_dir , function(event, targetfile)){
console.log( targetfile, 'is', event)
}
as a sourcefile is renamed as targetfile, it's will call three event as fact
null is rename // sourcefile not exist again
targetfile is rename
targetfile is change
notice that , if you want catch all these three evnet, watch the dir of sourcefile
I somtimes get multible registrations of the Watch event causing the Watch event to fire several times.
I solved it by keeping a list of watching files and avoid registering the event if the file allready is in the list:
var watchfiles = {};
function initwatch(fn, callback) {
if watchlist[fn] {
watchlist[fn] = true;
fs.watch(fn).on('change', callback);
}
}
......
Like others answers says... This got a lot of troubles, but i can deal with this in this way:
var folder = "/folder/path/";
var active = true; // flag control
fs.watch(folder, function (event, filename) {
if(event === 'rename' && active) { //you can remove this "check" event
active = false;
// ... its just an example
for (var i = 0; i < 100; i++) {
console.log(i);
}
// ... other stuffs and delete the file
if(!active){
try {
fs.unlinkSync(folder + filename);
} catch(err) {
console.log(err);
}
active = true
}
}
});
Hope can i help you...
Easiest solution:
const watch = (path, opt, fn) => {
var lock = false
fs.watch(path, opt, function () {
if (!lock) {
lock = true
fn()
setTimeout(() => lock = false, 1000)
}
})
}
watch('/path', { interval: 500 }, function () {
// ...
})
I was downloading file with puppeteer and once a file saved, I was sending automatic emails. Due to problem above, I noticed, I was sending 2 emails. I solved by stopping my application using process.exit() and auto-start with pm2. Using flags in code didn't saved me.
If anyone has this problem in future, one can use this solution as well. Exit from program and restart with monitor tools automatically.
Here's my simple solution. It works well every time.
// Update obj as file updates
obj = JSON.parse(fs.readFileSync('./file.json', 'utf-8'));
fs.watch('./file.json', () => {
const data = JSON.parse(fs.readFileSync('./file.json', 'utf-8') || '{}');
if(Object.entries(data).length > 0) { // This checks fs.watch() isn't false-firing
obj = data;
console.log('File actually changed: ', obj)
}
});
I came across the same issue. If you don't want to trigger multiple times, you can use a debounce function.
fs.watch( 'example.xml', _.debounce(function ( curr, prev ) {
// on file change we can read the new xml
fs.readFile( 'example.xml','utf8', function ( err, data ) {
if ( err ) throw err;
console.dir(data);
console.log('Done');
});
}, 100));
Debouncing The Observer
A solution I arrived at was that (a) there needs to be a workaround for the problem in question and, (b), there needs to be a solution to ensure multiple rapid Ctrl+s actions do not cause Race Conditions. Here's what I have...
./**/utilities.js (somewhere)
export default {
...
debounce(fn, delay) { // #thxRemySharp https://remysharp.com/2010/07/21/throttling-function-calls/
var timer = null;
return function execute(...args) {
var context = this;
clearTimeout(timer);
timer = setTimeout(fn.bind(context, ...args), delay);
};
},
...
};
./**/file.js (elsewhere)
import utilities from './**/utilities.js'; // somewhere
...
function watch(server) {
const debounced = utilities.debounce(observeFilesystem.bind(this, server), 1000 * 0.25);
const observers = new Set()
.add( fs.watch('./src', debounced) )
.add( fs.watch('./index.html', debounced) )
;
console.log(`watching... (${observers.size})`);
return observers;
}
function observeFilesystem(server, type, filename) {
if (!filename) console.warn(`Tranfer Dev Therver: filesystem observation made without filename for type ${type}`);
console.log(`Filesystem event occurred:`, type, filename);
server.close(handleClose);
}
...
This way, the observation-handler that we pass into fs.watch is [in this case a bound bunction] which gets debounced if multiple calls are made less than 1000 * 0.25 seconds (250ms) apart from one another.
It may be worth noting that I have also devised a pipeline of Promises to help avoid other types of Race Conditions as the code also leverages other callbacks. Please also note the attribution to Remy Sharp whose debounce function has repeatedly proven very useful over the years.
watcher = fs.watch( 'example.xml', function ( curr, prev ) {
watcher.close();
fs.readFile( 'example.xml','utf8', function ( err, data ) {
if ( err ) throw err;
console.dir(data);
console.log('Done');
});
});
I had similar similar problem but I was also reading the file in the callback which caused a loop.
This is where I found how to close watcher:
How to close fs.watch listener for a folder
NodeJS does not fire multiple events for a single change, it is the editor you are using updating the file multiple times.
Editors use stream API for efficiency, they read and write data in chunks which causes multiple updates depending on the chunks size and the amount of content. Here is a snippet to test if fs.watch fires multiple events:
const http = require('http');
const fs = require('fs');
const path = require('path');
const host = 'localhost';
const port = 3000;
const file = path.join(__dirname, 'config.json');
const requestListener = function (req, res) {
const data = new Date().toString();
fs.writeFileSync(file, data, { encoding: 'utf-8' });
res.end(data);
};
const server = http.createServer(requestListener);
server.listen(port, host, () => {
fs.watch(file, (eventType, filename) => {
console.log({ eventType });
});
console.log(`Server is running on http://${host}:${port}`);
});
I believe a simple solution would be checking for the last modified timestamp:
let lastModified;
fs.watch(file, (eventType, filename) => {
stat(file).then(({ mtimeMs }) => {
if (lastModified !== mtimeMs) {
lastModified = mtimeMs;
console.log({ eventType, filename });
}
});
});
Please note that you need to use all-sync or all-async methods otherwise you will have issues:
Update the file in a editor, you will see only single event is logged:
const http = require('http');
const host = 'localhost';
const port = 3000;
const fs = require('fs');
const path = require('path');
const file = path.join(__dirname, 'config.json');
let lastModified;
const requestListener = function (req, res) {
const data = Date.now().toString();
fs.writeFileSync(file, data, { encoding: 'utf-8' });
lastModified = fs.statSync(file).mtimeMs;
res.end(data);
};
const server = http.createServer(requestListener);
server.listen(port, host, () => {
fs.watch(file, (eventType, filename) => {
const mtimeMs = fs.statSync(file).mtimeMs;
if (lastModified !== mtimeMs) {
lastModified = mtimeMs;
console.log({ eventType });
}
});
console.log(`Server is running on http://${host}:${port}`);
});
Few notes on the alternative solutions: Storing files for comparison will be memory inefficient especially if you have large files, taking file hashes will be expensive, custom flags are hard to keep track of, especially if you are going to detect changes made by other applications, and lastly unsubscribing and re-subscribing requires unnecessary juggling.
If you don't need an instant result, you can use setTimout to debounce successive events:
let timeoutId;
fs.watch(file, (eventType, filename) => {
clearTimeout(timeoutId);
timeoutId = setTimeout(() => {
console.log({ eventType });
}, 100);
});

Resources