Related
I have a log file with about 14.000 aircraft position datapoints captured from a system called Flarm, it looks like this:
{"addr":"A","time":1531919658.578100,"dist":902.98,"alt":385,"vs":-8}
{"addr":"A","time":1531919658.987861,"dist":914.47,"alt":384,"vs":-7}
{"addr":"A","time":1531919660.217471,"dist":925.26,"alt":383,"vs":-7}
{"addr":"A","time":1531919660.623466,"dist":925.26,"alt":383,"vs":-7}
What I need to do is find a way to 'play' this file back in real-time (as if it were occuring right now, even though it's pre-recorded), and emit an event whenever a log entry 'occurs'. The file is not being added to, it's pre-recorded and the playing back would occur at a later stage.
The reason for doing this is that I don't have access to the receiving equipment when I'm developing.
The only way I can think to do it is to set a timeout for every log entry, but that doesn't seem like the right way to do it. Also, this process would have to scale to longer recordings (this one was only an hour long).
Are there other ways of doing this?
If you want to "play them back" with the actual time difference, a setTimeout is pretty much what you have to do.
const processEntry = (entry, index) => {
index++;
const nextEntry = getEntry(index);
if (nextEntry == null) return;
const timeDiff = nextEntry.time - entry.time;
emitEntryEvent(entry);
setTimeout(processEntry, timeDiff, nextEntry, index);
};
processEntry(getEntry(0), 0);
This emits the current entry and then sets a timeout based on the difference until the next entry.
getEntry could either fetch lines from a prefilled array or fetch lines individually based on the index. In the latter case only two lines of data would only be in memory at the same time.
Got it working in the end! setTimeout turned out to be the answer, and combined with the input of Lucas S. this is what I ended up with:
const EventEmitter = require('events');
const fs = require('fs');
const readable = fs.createReadStream("./data/2018-07-18_1509log.json", {
encoding: 'utf8',
fd: null
});
function read_next_line() {
var chunk;
var line = '';
// While this is a thing we can do, assign chunk
while ((chunk = readable.read(1)) !== null) {
// If chunk is a newline character, return the line
if (chunk === '\n'){
return JSON.parse(line);
} else {
line += chunk;
}
}
return false;
}
var lines = [];
var nextline;
const processEntry = () => {
// If lines is empty, read a line
if (lines.length === 0) lines.push(read_next_line());
// Quit here if we've reached the last line
if ((nextline = read_next_line()) == false) return true;
// Else push the just read line into our array
lines.push(nextline);
// Get the time difference in milliseconds
var delay = Number(lines[1].time - lines[0].time) * 1000;
// Remove the first line
lines.shift();
module.exports.emit('data', lines[0]);
// Repeat after the calculated delay
setTimeout(processEntry, delay);
}
var ready_to_start = false;
// When the stream becomes readable, allow starting
readable.on('readable', function() {
ready_to_start = true;
});
module.exports = new EventEmitter;
module.exports.start = function() {
if (ready_to_start) processEntry();
if (!ready_to_start) return false;
}
Assuming you want to visualize the flight logs, you can use fs watch as below, to watch the log file for changes:
fs.watch('somefile', function (event, filename) {
console.log('event is: ' + event);
if (filename) {
console.log('filename provided: ' + filename);
} else {
console.log('filename not provided');
}
});
Code excerpt is from here. For more information on fs.watch() check out here
Then, for seamless update on frontend, you can setup a Websocket to your server where you watch the log file and send newly added row via that socket to frontend.
After you get the data in frontend you can visualize it there. While I haven't done any flight visualization project before, I've used D3js to visualize other stuff (sound, numerical data, metric analysis and etc.) couple of times and it did the job every time.
I'm making a fun open sourced sample for doing Edge Compute Computer Vision using the Raspberry Pi as my hardware.
The current SDK I have to access hardware is nodejs based (I'll release a second with python when it is available). Warning: I am a node novice.
The issue I am facing is that I want to take pictures using the stock camera in a loop without saving a file. I just want to get access to the buffer, extract the pixels, pass to my second edge module.
Taking pictures with no file save in a while(true) loop appears to never execute.
Here is my sample:
'use strict';
var sleep = require('sleep');
const Raspistill = require('node-raspistill').Raspistill;
var pixel_getter = require('pixel-getter');
while(true) {
const camera = new Raspistill({ verticalFlip: true,
horizontalFlip: true,
width: 500,
height: 500,
encoding: 'jpg',
noFileSave: true,
time: 1 });
camera.takePhoto().then((photo) => {
console.log('got photo');
pixel_getter.get(photo,
function(err, pixels) {
console.log('got pixels');
console.log(String(pixels));
});
});
sleep.sleep(5);
}
console.log('picture taken');
In the above code, none of the console.log functions actually ever log; leading me to believe that photos are never taken and therefor pixels can not be extracted.
Any assistance would be greatly appreciated.
UPDATE:
It looks like the looping mechanic might be funny. I guess I don't really care if it takes pictures in a loop as long as it takes a picture, I pass it off, I take a picture and I pass it off, indefinately.
I decided to approach the problem with a recursive loop instead which worked brilliantly.
'use strict';
const sleep = require('sleep');
const Raspistill = require('node-raspistill').Raspistill;
const pixel_getter = require('pixel-getter')
const camera = new Raspistill({ verticalFlip: true,
horizontalFlip: true,
width: 500,
height: 500,
encoding: 'jpg',
noFileSave: true,
time: 5 });
function TakePictureLoop() {
console.log('taking picture');
camera.takePhoto().then((photo) => {
console.log('got photo');
pixel_getter.get(photo, function(err, pixels) {
console.log('got pixels');
TakePictureLoop();
});
});
}
TakePictureLoop();
I am dipping my toe into using different npm modules my own way whereas before I just executed already created gulpfiles. The npm module penthouse loads a webpage and determines the above the fold CSS for that page. I am trying to take that module and use it with a site crawler so I can get the above the fold css for all pages, and store that CSS in a table.
So essentially I am:
Crawling a site to get all the urls
capturing the page id from each url
storing pages & their id's in a CSV
load the CSV and pass each URL to penthouse
take penthouse output and store it in a table
So I am fine up until the last two steps. When I am reading the CSV, I get the error possible EventEmitter memory leak detected. 11 exit listeners added. Use emitter.setMaxListeners() to increase limit.
The stack trace points here at line 134. After reading about the error, it makes sense because I see a bunch of event listeners being added, but I don't see penthouse ever really executing and closing the event listeners.
It works just fine standalone as expected (Running penthouse against a single page then exiting). But when I execute the code below to try and loop through all URLs in a csv, it spits out the memory leak error twice, and just hangs. None of my console.log statements in the following script are executed.
However, I added console.log to the end of the penthouse index.js file, and it is executed multiple times (where it adds event listeners), but it never timeouts or exits.
So it's clear I am not integrating this properly, but not sure how to proceed. What would be the best way to force it to read one line in the CSV at a time, process the URL, then take the output and store it in the DB before moving onto the next line?
const fs = require('fs');
var csv = require('fast-csv');
var penthouse = require('penthouse'),
path = require('path');
var readUrlCsv = function() {
var stream = fs.createReadStream("/home/vagrant/urls.csv");
var csvStream = csv()
//returns single line from CSV
.on("data", function(data) {
// data[0]: table id, data[1]: page type, data[2]: url
penthouse({
url : data[2],
css : './dist/styles/main.css'
}, function(err, criticalCss) {
if (err) {
console.log(err);
}
console.log('do we ever get here?'); //answer is no
if (data[1] === 'post') {
wp.posts().id( data[0] ).post({
inline_css: criticalCss
}).then(function( response ) {
console.log('saved to db');
});
} else {
wp.pages().id( data[0] ).page({
inline_css: criticalCss
}).then(function( response ) {
console.log('saved to db');
});
}
});
})
.on("end", function(){
console.log("done");
});
return stream.pipe(csvStream);
};
UPDATE
Changed my method to look like below so it processes all rows first, but still throws the same error. Writes "done" to the console, and immediately spits out the memory warning twice.
var readUrlCsv = function() {
var stream = fs.createReadStream("/home/vagrant/urls.csv");
var urls = [];
var csvStream = csv()
.on("data", function(data) {
// data[0]: table id, data[1]: page type, data[2]: url
urls.push(data);
})
.on("end", function(){
console.log("done");
buildCriticalCss(urls);
});
return stream.pipe(csvStream);
};
var buildCriticalCss = function(urls) {
//console.log(urls);
urls.forEach(function(data, idx) {
//console.log(data);
penthouse({
url : data[2],
css : './dist/styles/main.css',
// OPTIONAL params
width : 1300, // viewport width
height : 900, // viewport height
timeout: 30000, // ms; abort critical css generation after this timeout
strict: false, // set to true to throw on css errors (will run faster if no errors)
maxEmbeddedBase64Length: 1000 // charaters; strip out inline base64 encoded resources larger than this
}, function(err, criticalCss) {
if (err) {
console.log(err);
}
console.log('do we ever finish one?');
if (data[1] === 'post') {
console.log('saving post ' + data[0]);
wp.posts().id( data[0] ).post({
inline_css: criticalCss
}).then(function( response ) {
console.log('saved post to db');
});
} else {
console.log('saving page ' + data[0]);
wp.pages().id( data[0] ).page({
inline_css: criticalCss
}).then(function( response ) {
console.log('saved page to db');
});
}
});
});
};
Update 2
I took the simple approach to control the amount of concurrent processes spawned.
var readUrlCsv = function() {
var stream = fs.createReadStream("/home/vagrant/urls.csv");
var urls = [];
var csvStream = csv()
.on("data", function(data) {
// data[0]: table id, data[1]: page type, data[2]: url
urls.push(data);
})
.on("end", function(){
console.log("done");
//console.log(urls);
buildCriticalCss(urls);
});
return stream.pipe(csvStream);
};
function buildCriticalCss(data) {
var row = data.shift();
console.log(row);
penthouse({
url : row[2],
css : './dist/styles/main.css',
// OPTIONAL params
width : 1300, // viewport width
height : 900, // viewport height
timeout: 30000, // ms; abort critical css generation after this timeout
strict: false, // set to true to throw on css errors (will run faster if no errors)
maxEmbeddedBase64Length: 1000 // charaters; strip out inline base64 encoded resources larger than this
}, function(err, criticalCss) {
if (err) {
console.log('err');
}
// handle your criticalCSS
console.log('finished');
console.log(row[2]);
// now start next job, if we have more urls
if (data.length !== 0) {
buildCriticalCss(data);
}
});
}
The error message you're seeing is a default printed to the console by node's event library if more than the allowed number of event listeners are defined for an instance of EventEmitter. It does not indicate an actual memory leak. Rather it is displayed to make sure you're aware of the possibility of a leak.
You can see this by checking the event.EventEmitter source code at lines 20 and 244.
To stop EventEmitter from displaying this message and since penthouse does not expose its specific EventEmitter, you'll need to set the default allowed event emitter listeners to something larger than its default value of 10 using:
var EventEmitter=require('event').EventEmitter;
EventEmitter.defaultMaxListeners=20;
Note that according to Node's documentation for EventEmitter.defaultMaxListeners, this will change the maximum number of listeners for all instances of EventEmitter, including those that have already been defined previous to the change.
Or you could simply ignore the message.
Further to the hanging of your code, I'd advise gathering all the results from the parsing of your CSV into an array, and then processing the array contents separately from the parsing process.
This would accomplish two things: It would allow you to
be assured the entire CSV file was valid before you started processing, and
instrument debugging messages while processing each element, which would give you deeper insight into how each element of the array was processed.
UPDATE
As noted below, depending on how many URLs you're processing, you're probably overwhelming Node's ability to handle all of your requests in parallel.
One easy way to proceed would be to use eventing to marshall your processing so your URLs are processed sequentially, as in:
var assert=require('assert'),
event=require('events'),
fs=require('fs'),
csv=require('fast-csv');
penthouse=require('penthouse');
var emitter=new events.EventEmitter();
/** Container for URL records read from CSV file.
*
* #type {Array}
*/
var urls=[];
/** Reads urls from file and triggers processing
*
* #emits processUrl
*/
var readUrlCsv = function() {
var stream = fs.createReadStream("/home/vagrant/urls.csv");
stream.on('error',function(e){ // always handle errors!!
console.error('failed to createReadStream: %s',e);
process.exit(-1);
});
var csvStream = csv()
.on("data", function(data) {
// data[0]: table id, data[1]: page type, data[2]: url
urls.push(data);
})
.on("end", function(){
console.log("done reading csv");
//console.log(urls);
emitter.emit('processUrl'); // start processing URLs
})
.on('error',function(e){
console.error('failed to parse CSV: %s',e);
process.exit(-1);
});
// no return required since we don't do anything with the result
stream.pipe(csvStream);
};
/** Event handler to process a single URL
*
* #emits processUrl
*/
var onProcessUrl=function(){
// always check your assumptions
assert(Array.isArray(urls),'urls must be an array');
var urlRecord=urls.shift();
if(urlRecord){
assert(Array.isArray(urlRecord),'urlRecord must be an array');
assert(urlRecord.length>2,'urlRecord must have at least three elements');
penthouse(
{
// ...
},
function(e,criticalCss){
if(e){
console.error('failed to process record %s: %s',urlRecord,e);
return; // IMPORTANT! do not drop through to rest of func!
}
// do what you need with the result here
if(urls.length===0){ // ok, we're done
console.log('completed processing URLs');
return;
}
emitter.emit('processUrl');
}
);
}
}
/**
* processUrl event - triggers processing of next URL
*
* #event processUrl
*/
emitter.on('processUrl',onProcessUrl); // assign handler
// start everything going...
readUrlCsv();
The benefit of using events here rather than your solution is the lack of recursion which can easily overwhelm your stack.
Hint: You can use events to handle all program flow issues normally addressed by Promises or modules like async.
And since events are at the very heart of Node (the "event loop"), it's really the best, most efficient way to solve such problems.
It's both elegant and "The Node Way"!
Here is a gist that illustrates the technique, without relying on streams or penthouse, the output of which is:
url: url1
RESULT: RESULT FOR url1
url: url2
RESULT: RESULT FOR url2
url: url3
RESULT: RESULT FOR url3
completed processing URLs
Besides using console.logs which usually is enough, you can also use the built in debugger: https://nodejs.org/api/debugger.html
Another thing you can do is go into the node_modules/penthouse directory and add your console.logs or debugger statement into the code for that module. That way you can debug your program there rather than the module just being a black box.
Also make sure there isn't some kind of race condition where for example the CSV doesn't always get output before it tries to read them in.
I think that the memory leak issue is probably a red herring as far as making your code function.
From your comment it sounds like you want to do something like the following with async.mapSeries: http://promise-nuggets.github.io/articles/15-map-in-series.html You could also use promises as it shows or even after getting promises set up use the async/await stuff with a regular for loop after compiling with babel. In the long run I recommend doing that sort of thing with async/await and babel but that might be overkill just to get this working.
How can I prevent someone from simply doing
while(true){client.emit('i am spammer', true)};
This sure proves to be a problem when someone has the urge to crash my node server!
Like tsrurzl said you need to implement a rate limiter (throttling sockets).
Following code example only works reliably if your socket returns a Buffer (instead of a string). The code example assumes that you will first call addRatingEntry(), and then call evalRating() immediately afterwards. Otherwise you risk a memory leak in the case where evalRating() doesn't get called at all or too late.
var rating, limit, interval;
rating = []; // rating: [*{'timestamp', 'size'}]
limit = 1048576; // limit: maximum number of bytes/characters.
interval = 1000; // interval: interval in milliseconds.
// Describes a rate limit of 1mb/s
function addRatingEntry (size) {
// Returns entry object.
return rating[(rating.push({
'timestamp': Date.now(),
'size': size
}) - 1);
}
function evalRating () {
// Removes outdated entries, computes combined size, and compares with limit variable.
// Returns true if you're connection is NOT flooding, returns false if you need to disconnect.
var i, newRating, totalSize;
// totalSize in bytes in case of underlying Buffer value, in number of characters for strings. Actual byte size in case of strings might be variable => not reliable.
newRating = [];
for (i = rating.length - 1; i >= 0; i -= 1) {
if ((Date.now() - rating[i].timestamp) < interval) {
newRating.push(rating[i]);
}
}
rating = newRating;
totalSize = 0;
for (i = newRating.length - 1; i >= 0; i -= 1) {
totalSize += newRating[i].timestamp;
}
return (totalSize > limit ? false : true);
}
// Assume connection variable already exists and has a readable stream interface
connection.on('data', function (chunk) {
addRatingEntry(chunk.length);
if (evalRating()) {
// Continue processing chunk.
} else {
// Disconnect due to flooding.
}
});
You can add extra checks, like checking whether or not the size parameter really is a number etc.
Addendum: Make sure the rating, limit and interval variables are enclosed (in a closure) per connection, and that they don't define a global rate (where each connection manipulates the same rating).
I implemented a little flood function, not perfect (see improvements below) but it will disconnect a user when he does to much request.
// Not more then 100 request in 10 seconds
let FLOOD_TIME = 10000;
let FLOOD_MAX = 100;
let flood = {
floods: {},
lastFloodClear: new Date(),
protect: (io, socket) => {
// Reset flood protection
if( Math.abs( new Date() - flood.lastFloodClear) > FLOOD_TIME ){
flood.floods = {};
flood.lastFloodClear = new Date();
}
flood.floods[socket.id] == undefined ? flood.floods[socket.id] = {} : flood.floods[socket.id];
flood.floods[socket.id].count == undefined ? flood.floods[socket.id].count = 0 : flood.floods[socket.id].count;
flood.floods[socket.id].count++;
//Disconnect the socket if he went over FLOOD_MAX in FLOOD_TIME
if( flood.floods[socket.id].count > FLOOD_MAX){
console.log('FLOODPROTECTION ', socket.id)
io.sockets.connected[socket.id].disconnect();
return false;
}
return true;
}
}
exports = module.exports = flood;
And then use it like this:
let flood = require('../modules/flood')
// ... init socket io...
socket.on('message', function () {
if(flood.protect(io, socket)){
//do stuff
}
});
Improvements would be, to add another value next to the count, how often he got disconneted and then create a banlist and dont let him connect anymore. Also when a user refreshes the page he gets a new socket.id so maybe use here a unique cookie value instead of the socket.id
Here is simple rate-limiter-flexible package example.
const app = require('http').createServer();
const io = require('socket.io')(app);
const { RateLimiterMemory } = require('rate-limiter-flexible');
app.listen(3000);
const rateLimiter = new RateLimiterMemory(
{
points: 5, // 5 points
duration: 1, // per second
});
io.on('connection', (socket) => {
socket.on('bcast', async (data) => {
try {
await rateLimiter.consume(socket.handshake.address); // consume 1 point per event from IP
socket.emit('news', { 'data': data });
socket.broadcast.emit('news', { 'data': data });
} catch(rejRes) {
// no available points to consume
// emit error or warning message
socket.emit('blocked', { 'retry-ms': rejRes.msBeforeNext });
}
});
});
Read more in official docs
fs.watch( 'example.xml', function ( curr, prev ) {
// on file change we can read the new xml
fs.readFile( 'example.xml','utf8', function ( err, data ) {
if ( err ) throw err;
console.dir(data);
console.log('Done');
});
});
OUTPUT:
some data
Done X 1
some data
Done X 2
It is my usage fault or ..?
The fs.watch api:
is unstable
has known "behaviour" with regards repeated notifications. Specifically, the windows case being a result of windows design, where a single file modification can be multiple calls to the windows API
I make allowance for this by doing the following:
var fsTimeout
fs.watch('file.js', function(e) {
if (!fsTimeout) {
console.log('file.js %s event', e)
fsTimeout = setTimeout(function() { fsTimeout=null }, 5000) // give 5 seconds for multiple events
}
}
I suggest to work with chokidar (https://github.com/paulmillr/chokidar) which is much better than fs.watch:
Commenting its README.md:
Node.js fs.watch:
Doesn't report filenames on OS X.
Doesn't report events at all when using editors like Sublime on OS X.
Often reports events twice.
Emits most changes as rename.
Has a lot of other issues
Does not provide an easy way to recursively watch file trees.
Node.js fs.watchFile:
Almost as bad at event handling.
Also does not provide any recursive watching.
Results in high CPU utilization.
If you need to watch your file for changes then you can check out my small library on-file-change. It checks file sha1 hash between fired change events.
Explanation of why we have multiple fired events:
You may notice in certain situations that a single creation event generates multiple Created events that are handled by your component. For example, if you use a FileSystemWatcher component to monitor the creation of new files in a directory, and then test it by using Notepad to create a file, you may see two Created events generated even though only a single file was created. This is because Notepad performs multiple file system actions during the writing process. Notepad writes to the disk in batches that create the content of the file and then the file attributes. Other applications may perform in the same manner. Because FileSystemWatcher monitors the operating system activities, all events that these applications fire will be picked up.
Source
My custom solution
I personally like using return to prevent a block of code to run when checking something, so, here is my method:
var watching = false;
fs.watch('./file.txt', () => {
if(watching) return;
watching = true;
// do something
// the timeout is to prevent the script to run twice with short functions
// the delay can be longer to disable the function for a set time
setTimeout(() => {
watching = false;
}, 100);
};
Feel free to use this example to simplify your code. It may NOT be better than using a module from others, but it works pretty well!
Similar/same problem. I needed to do some stuff with images when they were added to a directory. Here's how I dealt with the double firing:
var fs = require('fs');
var working = false;
fs.watch('directory', function (event, filename) {
if (filename && event == 'change' && active == false) {
active = true;
//do stuff to the new file added
active = false;
});
It will ignore the second firing until if finishes what it has to do with the new file.
I'm dealing with this issue for the first time, so all of the answers so far are probably better than my solution, however none of them were 100% suitable for my case so I came up with something slightly different – I used a XOR operation to flip an integer between 0 and 1, effectively keeping track of and ignoring every second event on the file:
var targetFile = "./watchThis.txt";
var flippyBit = 0;
fs.watch(targetFile, {persistent: true}, function(event, filename) {
if (event == 'change'){
if (!flippyBit) {
var data = fs.readFile(targetFile, "utf8", function(error, data) {
gotUpdate(data);
})
} else {
console.log("Doing nothing thanks to flippybit.");
}
flipBit(); // call flipBit() function
}
});
// Whatever we want to do when we see a change
function gotUpdate(data) {
console.log("Got some fresh data:");
console.log(data);
}
// Toggling this gives us the "every second update" functionality
function flipBit() {
flippyBit = flippyBit ^ 1;
}
I didn't want to use a time-related function (like jwymanm's answer) because the file I'm watching could hypothetically get legitimate updates very frequently. And I didn't want to use a list of watched files like Erik P suggests, because I'm only watching one file. Jan Święcki's solution seemed like overkill, as I'm working on extremely short and simple files in a low-power environment. Lastly, Bernado's answer made me a little nervous – it would only ignore the second update if it arrived before I'd finished processing the first, and I can't handle that kind of uncertainty. If anyone were to find themselves in this very specific scenario, there might be some merit to the approach I used? If there's anything massively wrong with it please do let me know/edit this answer, but so far it seems to work well?
NOTE: Obviously this strongly assumes that you'll get exactly 2 events per real change. I carefully tested this assumption, obviously, and learned its limitations. So far I've confirmed that:
Modifying a file in Atom editor and saving triggers 2 updates
touch triggers 2 updates
Output redirection via > (overwriting file contents) triggers 2 updates
Appending via >> sometimes triggers 1 update!*
I can think of perfectly good reasons for the differing behaviours but we don't need to know why something is happening to plan for it – I just wanted to stress that you'll want to check for yourself in your own environment and in the context of your own use cases (duh) and not trust a self-confessed idiot on the internet. That being said, with precautions taken I haven't had any weirdness so far.
* Full disclosure, I don't actually know why this is happening, but we're already dealing with unpredictable behaviour with the watch() function so what's a little more uncertainty? For anyone following along at home, more rapid appends to a file seem to cause it to stop double-updating but honestly, I don't really know, and I'm comfortable with the behaviour of this solution in the actual case it'll be used, which is a one-line file that will be updated (contents replaced) like twice per second at the fastest.
first is change and the second is rename
we can make a difference from the listener function
function(event, filename) {
}
The listener callback gets two arguments (event, filename). event is either 'rename' or 'change', and filename is the name of the file which triggered the event.
// rm sourcefile targetfile
fs.watch( sourcefile_dir , function(event, targetfile)){
console.log( targetfile, 'is', event)
}
as a sourcefile is renamed as targetfile, it's will call three event as fact
null is rename // sourcefile not exist again
targetfile is rename
targetfile is change
notice that , if you want catch all these three evnet, watch the dir of sourcefile
I somtimes get multible registrations of the Watch event causing the Watch event to fire several times.
I solved it by keeping a list of watching files and avoid registering the event if the file allready is in the list:
var watchfiles = {};
function initwatch(fn, callback) {
if watchlist[fn] {
watchlist[fn] = true;
fs.watch(fn).on('change', callback);
}
}
......
Like others answers says... This got a lot of troubles, but i can deal with this in this way:
var folder = "/folder/path/";
var active = true; // flag control
fs.watch(folder, function (event, filename) {
if(event === 'rename' && active) { //you can remove this "check" event
active = false;
// ... its just an example
for (var i = 0; i < 100; i++) {
console.log(i);
}
// ... other stuffs and delete the file
if(!active){
try {
fs.unlinkSync(folder + filename);
} catch(err) {
console.log(err);
}
active = true
}
}
});
Hope can i help you...
Easiest solution:
const watch = (path, opt, fn) => {
var lock = false
fs.watch(path, opt, function () {
if (!lock) {
lock = true
fn()
setTimeout(() => lock = false, 1000)
}
})
}
watch('/path', { interval: 500 }, function () {
// ...
})
I was downloading file with puppeteer and once a file saved, I was sending automatic emails. Due to problem above, I noticed, I was sending 2 emails. I solved by stopping my application using process.exit() and auto-start with pm2. Using flags in code didn't saved me.
If anyone has this problem in future, one can use this solution as well. Exit from program and restart with monitor tools automatically.
Here's my simple solution. It works well every time.
// Update obj as file updates
obj = JSON.parse(fs.readFileSync('./file.json', 'utf-8'));
fs.watch('./file.json', () => {
const data = JSON.parse(fs.readFileSync('./file.json', 'utf-8') || '{}');
if(Object.entries(data).length > 0) { // This checks fs.watch() isn't false-firing
obj = data;
console.log('File actually changed: ', obj)
}
});
I came across the same issue. If you don't want to trigger multiple times, you can use a debounce function.
fs.watch( 'example.xml', _.debounce(function ( curr, prev ) {
// on file change we can read the new xml
fs.readFile( 'example.xml','utf8', function ( err, data ) {
if ( err ) throw err;
console.dir(data);
console.log('Done');
});
}, 100));
Debouncing The Observer
A solution I arrived at was that (a) there needs to be a workaround for the problem in question and, (b), there needs to be a solution to ensure multiple rapid Ctrl+s actions do not cause Race Conditions. Here's what I have...
./**/utilities.js (somewhere)
export default {
...
debounce(fn, delay) { // #thxRemySharp https://remysharp.com/2010/07/21/throttling-function-calls/
var timer = null;
return function execute(...args) {
var context = this;
clearTimeout(timer);
timer = setTimeout(fn.bind(context, ...args), delay);
};
},
...
};
./**/file.js (elsewhere)
import utilities from './**/utilities.js'; // somewhere
...
function watch(server) {
const debounced = utilities.debounce(observeFilesystem.bind(this, server), 1000 * 0.25);
const observers = new Set()
.add( fs.watch('./src', debounced) )
.add( fs.watch('./index.html', debounced) )
;
console.log(`watching... (${observers.size})`);
return observers;
}
function observeFilesystem(server, type, filename) {
if (!filename) console.warn(`Tranfer Dev Therver: filesystem observation made without filename for type ${type}`);
console.log(`Filesystem event occurred:`, type, filename);
server.close(handleClose);
}
...
This way, the observation-handler that we pass into fs.watch is [in this case a bound bunction] which gets debounced if multiple calls are made less than 1000 * 0.25 seconds (250ms) apart from one another.
It may be worth noting that I have also devised a pipeline of Promises to help avoid other types of Race Conditions as the code also leverages other callbacks. Please also note the attribution to Remy Sharp whose debounce function has repeatedly proven very useful over the years.
watcher = fs.watch( 'example.xml', function ( curr, prev ) {
watcher.close();
fs.readFile( 'example.xml','utf8', function ( err, data ) {
if ( err ) throw err;
console.dir(data);
console.log('Done');
});
});
I had similar similar problem but I was also reading the file in the callback which caused a loop.
This is where I found how to close watcher:
How to close fs.watch listener for a folder
NodeJS does not fire multiple events for a single change, it is the editor you are using updating the file multiple times.
Editors use stream API for efficiency, they read and write data in chunks which causes multiple updates depending on the chunks size and the amount of content. Here is a snippet to test if fs.watch fires multiple events:
const http = require('http');
const fs = require('fs');
const path = require('path');
const host = 'localhost';
const port = 3000;
const file = path.join(__dirname, 'config.json');
const requestListener = function (req, res) {
const data = new Date().toString();
fs.writeFileSync(file, data, { encoding: 'utf-8' });
res.end(data);
};
const server = http.createServer(requestListener);
server.listen(port, host, () => {
fs.watch(file, (eventType, filename) => {
console.log({ eventType });
});
console.log(`Server is running on http://${host}:${port}`);
});
I believe a simple solution would be checking for the last modified timestamp:
let lastModified;
fs.watch(file, (eventType, filename) => {
stat(file).then(({ mtimeMs }) => {
if (lastModified !== mtimeMs) {
lastModified = mtimeMs;
console.log({ eventType, filename });
}
});
});
Please note that you need to use all-sync or all-async methods otherwise you will have issues:
Update the file in a editor, you will see only single event is logged:
const http = require('http');
const host = 'localhost';
const port = 3000;
const fs = require('fs');
const path = require('path');
const file = path.join(__dirname, 'config.json');
let lastModified;
const requestListener = function (req, res) {
const data = Date.now().toString();
fs.writeFileSync(file, data, { encoding: 'utf-8' });
lastModified = fs.statSync(file).mtimeMs;
res.end(data);
};
const server = http.createServer(requestListener);
server.listen(port, host, () => {
fs.watch(file, (eventType, filename) => {
const mtimeMs = fs.statSync(file).mtimeMs;
if (lastModified !== mtimeMs) {
lastModified = mtimeMs;
console.log({ eventType });
}
});
console.log(`Server is running on http://${host}:${port}`);
});
Few notes on the alternative solutions: Storing files for comparison will be memory inefficient especially if you have large files, taking file hashes will be expensive, custom flags are hard to keep track of, especially if you are going to detect changes made by other applications, and lastly unsubscribing and re-subscribing requires unnecessary juggling.
If you don't need an instant result, you can use setTimout to debounce successive events:
let timeoutId;
fs.watch(file, (eventType, filename) => {
clearTimeout(timeoutId);
timeoutId = setTimeout(() => {
console.log({ eventType });
}, 100);
});