How to force sequential statement execution in Node? - node.js

I am new to Node, so please forgive me if my question is too simple. I fully appreciate the Async paradigm and why it is useful in single threads. But some logical operations are synchronous by nature.
I have found many posts about the async/sync issue, and have been reading for whole two days about callbacks, promises, async/await etc...
But still I cannot figure what should be straight forward and simple thing to do. Am I missing something!
Basically for the code below:
const fs = require('fs');
var readline = require('readline');
function getfile (aprodfile) {
var prodlines = [];
var prodfile = readline.createInterface({input: fs.createReadStream(aprodfile)});
prodfile.on('line', (line) => {prodlines.push(line)});
prodfile.on('close', () => console.log('Loaded '+prodlines.length));
// the above block is repeated several times to read several other
// files, but are omitted here for simplicity
console.log(prodlines.length);
// then 200+ lines that assume prodlines already filled
};
the output I get is:
0
Loaded 11167
whereas the output I expect is:
Loaded 11167
11167
This is because the console.log statement executes before the prodfile.on events are completed.
Is there a nice clean way to tell Node to execute commands sequentially, even if blocking? or better still to tell console.log (and the 200+ lines of code following it) to wait until prodlines is fully populated?

Here's the execution order of what you wrote :
prodfile.on('line', line => { // (1) subscribing to the 'line' event
prodlines.push(line) // (4), whenever the 'line' event is triggered
});
prodfile.on('close', () => { // (2) subscribing to the 'close' event
console.log('Loaded '+prodlines.length) // (5), whenever the 'close' event is triggered
});
console.log(prodlines.length); // (3) --> so it logs 0, nothing has happened yet
What you can do is this :
function getfile (aprodfile) {
var prodlines = [];
var prodfile = readline.createInterface({input: fs.createReadStream(aprodfile)});
prodfile.on('line', line => { prodlines.push(line) });
prodfile.on('close', () => {
console.log('Loaded '+prodlines.length)
finishedFetching( prodlines );
});
};
function finishedFetching( prodlines ) {
console.log(prodlines.length) // 200!
}

Related

Socket.io async/await for .on()

I'm building a socket.io Node JS application and my socket.io server will be listening for data from many socket.io clients, I need to save data to an API via my socket.io server as quickly as possible and figure that async/await is the best way forward.
Right now, I've got a function inside my .on('connection'), but is there a way I can make this an async function rather than have a nested function inside?
io.use((socket, next) => {
if (!socket.handshake.query || !socket.handshake.query.token) {
console.log('Authentication Error 1')
return
}
jwt.verify(socket.handshake.query.token, process.env.AGENT_SIGNING_SECRET, (err, decoded) => {
if (err) {
console.log('Authentication Error 2')
return
}
socket.decoded = decoded
next()
})
}).on('connection', socket => {
socket.on('agent-performance', data => {
async function savePerformance () {
const saved = await db.saveToDb('http://127.0.0.1:8000/api/profiler/save', data)
console.log(saved)
}
savePerformance()
})
})
Sort of, but you'll probably want to keep your current code if there can be multiple agent-performance events. You can modify the following, but it'd be messy and less readable. Event emitters still exist for a reason, they're not made obsolete by the introduction of promises. If it's performance you're after, your current code is probably faster and more resistant to backpressure and easier to error-handle.
events.on is a utility function that takes an event emitter (like socket) and returns an iterator that yields promises. You can await those with for await of.
events.once is a utility function that takes an event emitter (like socket) and returns a promise that resolves when the specified event is executed.
const { on, once } = require('events');
(async function() {
// This is an iterator that can emit infinite number of times.
const iterator = on(io, 'connection');
// Yield a promise, await it, run what is between `{ }` and repeat.
for await (const socket of iterator) {
const data = await once(socket, 'agent-performance');
const saved = await db.saveToDb(/* etc */);
}
})();
As the names imply, on is similar to socket.on and once is similar to socket.once. In the above example:
connected user 1, first agent-performance event: OK
connected user 1, second agent-performance event: not handled, there's no more event handler, since once is "used up".
connected user 2, first agent-performance event: OK
The documentation for on has a note about concurrency when using for await (x of on(...)), but I don't know if that would be a problem in your usecase.
// The execution of this inner block is synchronous and it
// processes one event at a time (even with await). Do not use
// if concurrent execution is required.

Execute when both(!) events fire .on('end')

I have a node app that reads two files as streams. I use event.on('end') to then work with the results. The problem is I don't really know how I can wait for BOTH events to trigger 'end'.
What I have now is:
reader1.on('end', function(){
reader2.on('end',function(){
doSomething();
});
});
With small files this works, but if one of the files is very large the app aborts.
Your execution logic is somewhat flawed. You ought to do something like this instead
var checklist = [];
// checklist will contain sort of a counter
function reader_end(){
if(checklist.length == 2 )
// doSomething only if both have been added to the checklist
doSomething();
}
reader1.on('end', function() {
checklist.push('reader1');
// increment the counter
reader_end();
});
reader2.on('end', function() {
checklist.push('reader2');
reader_end();
});
Although there are libraries to better handle this sort of stuff, like Async and Promises.
With Async you'll need to use compose
var r12_done = async.compose(reader1.on, reader2.on);
r12_done('end', function(){
doSomething();
});
Edit: I just noticed that since probably reader1.on is a Stream 'end' event which doesn't have the standard callback argument signature of (err, results), this probably won't work. In that case you should just go with Promise.
With Promise you'll need to first Promisify and then join
var reader1Promise = Promise.promisify(reader1.on)('end');
var reader2Promise = Promise.promisify(reader2.on)('end');
var reader12Promise = Promise.join(reader1Promise, reader1Promise);
reader12Promise.then(function(){
doSomething();
});

How do I ensure results of streamed commands are output in the same order?

In a Node.js program that asynchronously handles lines of input constantly coming in from stdin, how can I ensure the asynchronous handlers print their results in the same order the inputs came in?
SSCCE program.js (dependency: npm install split):
var executeCommand = function(line) {
setTimeout(function() { console.log(line); }, 1000 * Math.random());
};
var split = require("split");
process.stdin.pipe(split("\n")).on("data", function(line) {
executeCommand(line);
});
Running printf "A\nB\nC\nD\nE\nF" | node program.js produces
B
E
A
D
C
F
This is because the handler (executeCommand) has an unpredictable delay, modelled here as a random setTimeout. The "processing" (the setTimeouts) should happen concurrently, but their outputs (console.logs) should be in the same order as the constantly incoming inputs.
How can I make that happen?
I'd usually just exclaim "It's Async.js time!", but this time I can't see an appropriate existing helper: Since tasks are constantly coming in, anything that operates on a fixed collection of inputs won't cut it.
I figured it out.
As #Peter and #jfriend pointed out, handler results must be queued to a queue that only allows dequeueing completed tasks. A good time to check for finished tasks is whenever a handler completes.
A picture might clarify how it works:
Turns out a transform stream is a nice way to model that. ("Stuff comes in and eventually stuff related to the incoming stuff comes out" is pretty much the description of a transform stream.) Whenever results finish, completed tasks are pushed.
Here's the question's example, modified to work:
var Transform = require("stream").Transform;
var split = require("split");
var orderedParallel = function(worker) {
var s = new Transform({ objectMode : true });
var resultsQueue = [];
var sendFinishedFromQueue = function() {
while (resultsQueue[0] && resultsQueue[0].done) {
s.push(resultsQueue.shift().data);
}
}
s._transform = function(chunk, encoding, callback) {
var resultObject = { done : false, data : null };
resultsQueue.push(resultObject);
worker(chunk, function(result) {
resultObject.data = result;
resultObject.done = true;
sendFinishedFromQueue();
});
callback();
};
s._flush = function(callback) {
// Do nothing.
//
// We don't have anything to flush, because as workers complete,
// they'll handle sending any and all messages we're allowed to send
// right now.
};
return s;
};
var executeCommand = function(line, cb) {
setTimeout(function() { cb(line); }, 1000 * Math.random());
};
process.stdin.pipe(split("\n")).pipe(orderedParallel(executeCommand))
.on("data", function(x) { console.log(x); });
To convince yourself it works, try a hundred parallel tasks:
for (( i=0; i<=100; i++ ))
do
echo "$i"
done | node program.js
They should complete in parallel (within 1 second at random), but come out of the orderedParallel transform stream in order regardless.
async.queue with limit of 1 and where the worker function both executes the command and prints the results will do it. You won't have optimal concurrency, but it will behave correctly, so I suggest coding that even if it's a stepping stone. Keeping the correct behavior but adding some concurrency will require both queueing the main work function but also some buffering of output in the case of output2 being ready before output1 arrives.

Block for stdin in Node.js

Short explanation:
I'm attempting to write a simple game in Node.js that needs to wait for user input every turn. How do I avoid callback hell (e.g. messy code) internal to a turn loop where each turn loop iteration needs to block and wait for input from stdin?
Long explanation:
All the explanations I have read on StackOverflow when someone asks about blocking for stdin input seem to be "that's not what Node.js is about!"
I understand that Node.js is designed to be non-blocking and I also understand why. However I feel that it has me stuck between a rock and a hard place on how to solve this. I feel like I have three options:
Find a way to block for stdin and retain my while loop
Ditch the while loop and instead recursively call a method (like nextTurn) whenever the previous turn ends.
Ditch the while loop and instead use setTimeout(0, ...) or something similar to call a method (like nextTurn) whenever a turn ends.
With option (1) I am going against Node.js principles of non-blocking IO.
With option (2) I will eventually reach a stack overflow as each call adds another turn to the call stack.
With option (3) my code ends up being a mess to follow.
Internal to Node.js there are default functions that are marked **Sync (e.g. see the fs library or the sleep function) and I'm wondering why there is no Sync method for getting user input? And if I were to write something similar to fs.readSync how would I go about doing it and still follow best practices?
Just found this:
https://www.npmjs.com/package/readline-sync
Example code (after doing an npm install readline-sync)
var readlineSync = require('readline-sync');
while(true) {
var yn = readlineSync.question("Do you like having tools that let you code how you want, rather than how their authors wanted?");
if(yn === 'y') {
console.log("Hooray!");
} else {
console.log("Back to callback world, I guess...");
process.exit();
}
}
Only problem so far is the wailing of the "That's not how node is meant to be used!" chorus, but I have earplugs :)
I agree with the comment about moving towards an event based system and would ditch the loops. I've thrown together a quick example of text based processing which can be used for simple text games.
var fs = require('fs'),
es = require('event-stream');
process.stdin
.pipe(es.split())
.on('data', parseCommand);
var actionHandlers = {};
function parseCommand(command) {
var words = command.split(' '),
action = '';
if(words.length > 1) {
action = words.shift();
}
if(actionHandlers[action]) {
actionHandlers[action](words);
} else {
invalidAction(action);
}
}
function invalidAction(action) {
console.log('Unknown Action:', action);
}
actionHandlers['move'] = function(words) {
console.log('You move', words);
}
actionHandlers['attack'] = function(words) {
console.log('You attack', words);
}
You can now break up your actions into discrete functions which you can register with a central actionHandlers variable. This makes adding new commands almost trivial. If you can add some details on why the above approach wouldn't work well for you, let me know and I'll revise the answer.
ArtHare's solution, at least for my use case, blocks background execution, including those started by a promise. While this code isn't elegant, it did block execution of the current function, until the read from stdin completed.
While this code must run from inside an async function, keep in mind that running an async function from a top-level context (directly from a script, not contained within any other function) will block that function until it completes.
Below is a full .js script demonstrating usage, tested with node v8.12.0:
const readline = require('readline');
const sleep = (waitTimeInMs) => new Promise(resolve => setTimeout(resolve, waitTimeInMs));
async function blockReadLine() {
var rl = readline.createInterface({
input: process.stdin,
output: process.stdout,
terminal: false
});
let result = undefined;
rl.on('line', function(line){
result = line;
})
while(!result) await sleep(100);
return result;
}
async function run() {
new Promise(async () => {
while(true) {
console.log("Won't be silenced! Won't be censored!");
await sleep(1000);
}
});
let result = await blockReadLine();
console.log("The result was:" + result);
process.exit(0);
}
run();

fs.watch fired twice when I change the watched file

fs.watch( 'example.xml', function ( curr, prev ) {
// on file change we can read the new xml
fs.readFile( 'example.xml','utf8', function ( err, data ) {
if ( err ) throw err;
console.dir(data);
console.log('Done');
});
});
OUTPUT:
some data
Done X 1
some data
Done X 2
It is my usage fault or ..?
The fs.watch api:
is unstable
has known "behaviour" with regards repeated notifications. Specifically, the windows case being a result of windows design, where a single file modification can be multiple calls to the windows API
I make allowance for this by doing the following:
var fsTimeout
fs.watch('file.js', function(e) {
if (!fsTimeout) {
console.log('file.js %s event', e)
fsTimeout = setTimeout(function() { fsTimeout=null }, 5000) // give 5 seconds for multiple events
}
}
I suggest to work with chokidar (https://github.com/paulmillr/chokidar) which is much better than fs.watch:
Commenting its README.md:
Node.js fs.watch:
Doesn't report filenames on OS X.
Doesn't report events at all when using editors like Sublime on OS X.
Often reports events twice.
Emits most changes as rename.
Has a lot of other issues
Does not provide an easy way to recursively watch file trees.
Node.js fs.watchFile:
Almost as bad at event handling.
Also does not provide any recursive watching.
Results in high CPU utilization.
If you need to watch your file for changes then you can check out my small library on-file-change. It checks file sha1 hash between fired change events.
Explanation of why we have multiple fired events:
You may notice in certain situations that a single creation event generates multiple Created events that are handled by your component. For example, if you use a FileSystemWatcher component to monitor the creation of new files in a directory, and then test it by using Notepad to create a file, you may see two Created events generated even though only a single file was created. This is because Notepad performs multiple file system actions during the writing process. Notepad writes to the disk in batches that create the content of the file and then the file attributes. Other applications may perform in the same manner. Because FileSystemWatcher monitors the operating system activities, all events that these applications fire will be picked up.
Source
My custom solution
I personally like using return to prevent a block of code to run when checking something, so, here is my method:
var watching = false;
fs.watch('./file.txt', () => {
if(watching) return;
watching = true;
// do something
// the timeout is to prevent the script to run twice with short functions
// the delay can be longer to disable the function for a set time
setTimeout(() => {
watching = false;
}, 100);
};
Feel free to use this example to simplify your code. It may NOT be better than using a module from others, but it works pretty well!
Similar/same problem. I needed to do some stuff with images when they were added to a directory. Here's how I dealt with the double firing:
var fs = require('fs');
var working = false;
fs.watch('directory', function (event, filename) {
if (filename && event == 'change' && active == false) {
active = true;
//do stuff to the new file added
active = false;
});
It will ignore the second firing until if finishes what it has to do with the new file.
I'm dealing with this issue for the first time, so all of the answers so far are probably better than my solution, however none of them were 100% suitable for my case so I came up with something slightly different – I used a XOR operation to flip an integer between 0 and 1, effectively keeping track of and ignoring every second event on the file:
var targetFile = "./watchThis.txt";
var flippyBit = 0;
fs.watch(targetFile, {persistent: true}, function(event, filename) {
if (event == 'change'){
if (!flippyBit) {
var data = fs.readFile(targetFile, "utf8", function(error, data) {
gotUpdate(data);
})
} else {
console.log("Doing nothing thanks to flippybit.");
}
flipBit(); // call flipBit() function
}
});
// Whatever we want to do when we see a change
function gotUpdate(data) {
console.log("Got some fresh data:");
console.log(data);
}
// Toggling this gives us the "every second update" functionality
function flipBit() {
flippyBit = flippyBit ^ 1;
}
I didn't want to use a time-related function (like jwymanm's answer) because the file I'm watching could hypothetically get legitimate updates very frequently. And I didn't want to use a list of watched files like Erik P suggests, because I'm only watching one file. Jan Święcki's solution seemed like overkill, as I'm working on extremely short and simple files in a low-power environment. Lastly, Bernado's answer made me a little nervous – it would only ignore the second update if it arrived before I'd finished processing the first, and I can't handle that kind of uncertainty. If anyone were to find themselves in this very specific scenario, there might be some merit to the approach I used? If there's anything massively wrong with it please do let me know/edit this answer, but so far it seems to work well?
NOTE: Obviously this strongly assumes that you'll get exactly 2 events per real change. I carefully tested this assumption, obviously, and learned its limitations. So far I've confirmed that:
Modifying a file in Atom editor and saving triggers 2 updates
touch triggers 2 updates
Output redirection via > (overwriting file contents) triggers 2 updates
Appending via >> sometimes triggers 1 update!*
I can think of perfectly good reasons for the differing behaviours but we don't need to know why something is happening to plan for it – I just wanted to stress that you'll want to check for yourself in your own environment and in the context of your own use cases (duh) and not trust a self-confessed idiot on the internet. That being said, with precautions taken I haven't had any weirdness so far.
* Full disclosure, I don't actually know why this is happening, but we're already dealing with unpredictable behaviour with the watch() function so what's a little more uncertainty? For anyone following along at home, more rapid appends to a file seem to cause it to stop double-updating but honestly, I don't really know, and I'm comfortable with the behaviour of this solution in the actual case it'll be used, which is a one-line file that will be updated (contents replaced) like twice per second at the fastest.
first is change and the second is rename
we can make a difference from the listener function
function(event, filename) {
}
The listener callback gets two arguments (event, filename). event is either 'rename' or 'change', and filename is the name of the file which triggered the event.
// rm sourcefile targetfile
fs.watch( sourcefile_dir , function(event, targetfile)){
console.log( targetfile, 'is', event)
}
as a sourcefile is renamed as targetfile, it's will call three event as fact
null is rename // sourcefile not exist again
targetfile is rename
targetfile is change
notice that , if you want catch all these three evnet, watch the dir of sourcefile
I somtimes get multible registrations of the Watch event causing the Watch event to fire several times.
I solved it by keeping a list of watching files and avoid registering the event if the file allready is in the list:
var watchfiles = {};
function initwatch(fn, callback) {
if watchlist[fn] {
watchlist[fn] = true;
fs.watch(fn).on('change', callback);
}
}
......
Like others answers says... This got a lot of troubles, but i can deal with this in this way:
var folder = "/folder/path/";
var active = true; // flag control
fs.watch(folder, function (event, filename) {
if(event === 'rename' && active) { //you can remove this "check" event
active = false;
// ... its just an example
for (var i = 0; i < 100; i++) {
console.log(i);
}
// ... other stuffs and delete the file
if(!active){
try {
fs.unlinkSync(folder + filename);
} catch(err) {
console.log(err);
}
active = true
}
}
});
Hope can i help you...
Easiest solution:
const watch = (path, opt, fn) => {
var lock = false
fs.watch(path, opt, function () {
if (!lock) {
lock = true
fn()
setTimeout(() => lock = false, 1000)
}
})
}
watch('/path', { interval: 500 }, function () {
// ...
})
I was downloading file with puppeteer and once a file saved, I was sending automatic emails. Due to problem above, I noticed, I was sending 2 emails. I solved by stopping my application using process.exit() and auto-start with pm2. Using flags in code didn't saved me.
If anyone has this problem in future, one can use this solution as well. Exit from program and restart with monitor tools automatically.
Here's my simple solution. It works well every time.
// Update obj as file updates
obj = JSON.parse(fs.readFileSync('./file.json', 'utf-8'));
fs.watch('./file.json', () => {
const data = JSON.parse(fs.readFileSync('./file.json', 'utf-8') || '{}');
if(Object.entries(data).length > 0) { // This checks fs.watch() isn't false-firing
obj = data;
console.log('File actually changed: ', obj)
}
});
I came across the same issue. If you don't want to trigger multiple times, you can use a debounce function.
fs.watch( 'example.xml', _.debounce(function ( curr, prev ) {
// on file change we can read the new xml
fs.readFile( 'example.xml','utf8', function ( err, data ) {
if ( err ) throw err;
console.dir(data);
console.log('Done');
});
}, 100));
Debouncing The Observer
A solution I arrived at was that (a) there needs to be a workaround for the problem in question and, (b), there needs to be a solution to ensure multiple rapid Ctrl+s actions do not cause Race Conditions. Here's what I have...
./**/utilities.js (somewhere)
export default {
...
debounce(fn, delay) { // #thxRemySharp https://remysharp.com/2010/07/21/throttling-function-calls/
var timer = null;
return function execute(...args) {
var context = this;
clearTimeout(timer);
timer = setTimeout(fn.bind(context, ...args), delay);
};
},
...
};
./**/file.js (elsewhere)
import utilities from './**/utilities.js'; // somewhere
...
function watch(server) {
const debounced = utilities.debounce(observeFilesystem.bind(this, server), 1000 * 0.25);
const observers = new Set()
.add( fs.watch('./src', debounced) )
.add( fs.watch('./index.html', debounced) )
;
console.log(`watching... (${observers.size})`);
return observers;
}
function observeFilesystem(server, type, filename) {
if (!filename) console.warn(`Tranfer Dev Therver: filesystem observation made without filename for type ${type}`);
console.log(`Filesystem event occurred:`, type, filename);
server.close(handleClose);
}
...
This way, the observation-handler that we pass into fs.watch is [in this case a bound bunction] which gets debounced if multiple calls are made less than 1000 * 0.25 seconds (250ms) apart from one another.
It may be worth noting that I have also devised a pipeline of Promises to help avoid other types of Race Conditions as the code also leverages other callbacks. Please also note the attribution to Remy Sharp whose debounce function has repeatedly proven very useful over the years.
watcher = fs.watch( 'example.xml', function ( curr, prev ) {
watcher.close();
fs.readFile( 'example.xml','utf8', function ( err, data ) {
if ( err ) throw err;
console.dir(data);
console.log('Done');
});
});
I had similar similar problem but I was also reading the file in the callback which caused a loop.
This is where I found how to close watcher:
How to close fs.watch listener for a folder
NodeJS does not fire multiple events for a single change, it is the editor you are using updating the file multiple times.
Editors use stream API for efficiency, they read and write data in chunks which causes multiple updates depending on the chunks size and the amount of content. Here is a snippet to test if fs.watch fires multiple events:
const http = require('http');
const fs = require('fs');
const path = require('path');
const host = 'localhost';
const port = 3000;
const file = path.join(__dirname, 'config.json');
const requestListener = function (req, res) {
const data = new Date().toString();
fs.writeFileSync(file, data, { encoding: 'utf-8' });
res.end(data);
};
const server = http.createServer(requestListener);
server.listen(port, host, () => {
fs.watch(file, (eventType, filename) => {
console.log({ eventType });
});
console.log(`Server is running on http://${host}:${port}`);
});
I believe a simple solution would be checking for the last modified timestamp:
let lastModified;
fs.watch(file, (eventType, filename) => {
stat(file).then(({ mtimeMs }) => {
if (lastModified !== mtimeMs) {
lastModified = mtimeMs;
console.log({ eventType, filename });
}
});
});
Please note that you need to use all-sync or all-async methods otherwise you will have issues:
Update the file in a editor, you will see only single event is logged:
const http = require('http');
const host = 'localhost';
const port = 3000;
const fs = require('fs');
const path = require('path');
const file = path.join(__dirname, 'config.json');
let lastModified;
const requestListener = function (req, res) {
const data = Date.now().toString();
fs.writeFileSync(file, data, { encoding: 'utf-8' });
lastModified = fs.statSync(file).mtimeMs;
res.end(data);
};
const server = http.createServer(requestListener);
server.listen(port, host, () => {
fs.watch(file, (eventType, filename) => {
const mtimeMs = fs.statSync(file).mtimeMs;
if (lastModified !== mtimeMs) {
lastModified = mtimeMs;
console.log({ eventType });
}
});
console.log(`Server is running on http://${host}:${port}`);
});
Few notes on the alternative solutions: Storing files for comparison will be memory inefficient especially if you have large files, taking file hashes will be expensive, custom flags are hard to keep track of, especially if you are going to detect changes made by other applications, and lastly unsubscribing and re-subscribing requires unnecessary juggling.
If you don't need an instant result, you can use setTimout to debounce successive events:
let timeoutId;
fs.watch(file, (eventType, filename) => {
clearTimeout(timeoutId);
timeoutId = setTimeout(() => {
console.log({ eventType });
}, 100);
});

Resources