I wrote and ran the following script on a Linux system and expected a growing list of numbers, that stops after one second.
const { Worker, isMainThread } = require('node:worker_threads');
if (isMainThread) {
new Worker(__filename);
setTimeout(process.exit, 1000);
} else {
let i = 0;
while (true) {
console.error(i++);
}
}
However, the actual output is
0
I've read that a blocked parent thread can block the output of worker threads. I tried
const { Worker, isMainThread } = require('node:worker_threads');
if (isMainThread) {
new Worker(__filename);
setTimeout(process.exit, 1000);
} else {
let i = 0;
while (i < 10) {
console.error(i++);
}
}
instead and got
0
1
2
3
4
5
6
7
8
9
It seems like the infinite loop inside the worker thread blocks the output, but I can't find the reason.
I've read https://nodejs.org/api/worker_threads.html#new-workerfilename-options and can't find anything that describes the behavior. I would understand that an infinite loop in the parent thread could block the output, but not an infinite loop in the worker thread.
I wrote another script to check whether process.stderr or the parent thread are blocked, but
const { Worker, isMainThread, parentPort } = require('node:worker_threads');
if (isMainThread) {
const worker = new Worker(__filename);
worker.on('message', console.error);
setTimeout(() => {
process.exit();
}, 1000);
} else {
let i = 0;
while (true) {
parentPort.postMessage(i++);
}
}
produces the expected output:
0
1
2
3
.
.
.
139108
139109
139110
139111
139112
Why is the output blocked? In addition, if the infinite loop were blocking the output, why would I see the first output 0?
I found this note and I think it's relate to my question, but I don't fully understand, what it means:
https://nodejs.org/dist/latest-v18.x/docs/api/process.html#a-note-on-process-io
TTYs are synchronous on Linux and pipes and sockets are asynchrnous on Linux.
Related
I am currently trying to find a way to end all worker threads spawned from some master thread after any of them finishes. I've looked at various schemes for communication between worker threads (e.g. cluster.worker.send(), process.send(), etc.), but none of them seem to work.
The code that I have so far is quite simple:
const cluster = require('cluster')
const os = require('os')
if ( cluster.isMaster ) {
let children = []
for ( let i = 0; i < os.cpus().length; i++ ) {
children.push(cluster.fork())
}
process.on('message', (msg) => {
if ( msg.cmd === 'shutdown' ) {
console.log('shutting down')
children.forEach(child => {
child.kill()
})
}
})
} else {
console.log('I am worker #' + cluster.worker.id)
let time = Date.now()
while ( Date.now() < time + cluster.worker.id * 1000 ) {
// do nothing for some time
}
console.log('I am worker #' + cluster.worker.id + ' and I am done')
process.send({cmd: 'shutdown'})
}
The first part creates a child process for each CPU using the cluster module. Next, the master process expects some kind of shutdown message from any of the workers. Each worker runs some process, and, after a certain amount of time, sends a shutdown message to the main thread. At this point, the master thread should trigger all the other threads to shut down, but instead the message is never received. This seems to be the problem but I haven't been able to find a way to get this message sent.
When the code runs, each process sends an 'I am done' message but the threads are not killed.
There are a couple of things you need to change:
Use cluster.on instead of process.on, as process.on is for the child process, not the parent process.
Use child.process.kill() instead of child.kill().
const cluster = require('cluster')
const os = require('os')
if ( cluster.isMaster ) {
let children = []
for ( let i = 0; i < os.cpus().length; i++ ) {
children.push(cluster.fork())
}
cluster.on('message', (worker, msg) => {
if (msg.command = 'shutdown') {
// Kill every cluster worker
children.forEach(child => {
child.process.kill()
})
}
})
} else {
console.log('I am worker #' + cluster.worker.id)
let time = Date.now()
while ( Date.now() < time + cluster.worker.id * 1000 ) {
// do nothing for some time
}
console.log('I am worker #' + cluster.worker.id + ' and I am done')
process.send({command: 'shutdown'})
}
Is there a way of ensuring that a child process has been killed?
I currently have the following code:
let p = child_process.spawn(app, args);
...
try{
p.kill('SIGKILL');
} catch(e) {
console.error("Killing process exception:", e);
}
job = setInterval( () => {
if(p.killed || timeout === true){
clearInterval(job);
callback();
}
}, 100);
setTimeout( () => {
console.log("Killing process timeout!");
timeout = true;
}, 1000);
I check periodically (100 ms period) if the killing signal has been properly send to the process and, in that moment, I assume that the process has been killed; but, to ensure that the process is not locked, I set a timeout of 1 second.
Many times the timeout is fired, independently of waiting for 1 second or 10 seconds.
The code below is executed in linux; if working in WSL, then everything seems to work properly
I have a process which passes messages into a child process for processing in the middle of a loop and then should continue depending the response from the child.
I however don't know how to pause the loop to allow the child process to complete and proceed once we get a message.
Here is how the function looks like
function runTests()
{
allTests = getTests()
allTests.forEach(test => {
console.log(chalk.blue.bold.underline('Running: '+ test.title))
for(i = 0;i < test.questions.length;i++)
{
//Fork child process.
const poll = fork('poll.js',['999999', new Date()])
poll.send(test.questions[i])
poll.on('message', msg => {
if(assert("msg.includes('error')",msg))
{
console.log(chalk.blue.bold.underline('Child response: '+ msg))
}
});
}
})
}
var Worker = require('webworker-threads').Worker;
require('http').createServer(function (req,res) {
var fibo = new Worker(function() {
function fibo (n) {
return n > 1 ? fibo(n - 1) + fibo(n - 2) : 1;
}
// which onmessage does this this refer to?
onmessage = function (event) { //reference 1
postMessage(fibo(event.data));
}
});
fibo.onmessage = function (event) { //reference 2
res.end('fib(40) = ' + event.data);
};
fibo.postMessage(40);
}).listen(port);
This is the code found as an example for the webworker class.
I was looking at the API and doen't seem to understand what reference 1 in the above code is referring to. Why does the postMessage(40) hit the inner onmessage function and not the fibo.onmessage function?
The main point to note here is that the onmessage() and postmessage() is used as message bearers between both the main thread and the worker thread. This can be confusing initially. So the flow goes like this
Create a worker thread .
var fibo= new Worker...
This will spawn another JavaScript thread in the node. It can run in parallel in the background and use all the available CPU cores. Otherwise in node due to its single threaded model, taking advantage of multiple CPU cores is not possible (hence worker threads is a good approach for handling CPU-bound tasks in node)
In the worker thread we define
a)how to process the request/work it receives - the onmessage() does this job. It listens for any incoming work request and act on it.
onmessage= function (event) { //reference 1
postMessage(fibo(event.data));
}
b) how to communicate back to the main thread once work is done-
postMessage does this job.
postMessage(fibo(event.data));
In the main thread :-
a. Call the worker thread and give it a task to execute -ie. using postmessage (By now you got the dance)
fibo.postMessage(40);
b. Define listener regarding the action to take once the worker thread finishes it job and responds back. ie. using onmessage.
fibo.onmessage = function (event) { //reference 2
res.end('fib(40) = ' + event.data);
};
try this code:
var port=8080;
var Worker = require('webworker-threads').Worker;
var fibo = new Worker(function() {
function fibo (n) {
return n > 1 ? fibo(n - 1) + fibo(n - 2) : 1;
}
// which onmessage does this this refer to?
onmessage = function (event) { //reference 1
console.log("fibo.onmessage inside");
postMessage(fibo(event.data));
}
});
fibo.onmessage = function (event) { //reference 2
console.log("fibo.onmessage outside")
console.log('fib(40) = ' + event.data);
};
fibo.postMessage(40);
it gives
fibo.onmessage inside
fibo.onmessage outside
fib(40) = 165580141
The key thing is, it has to run on another node, so this implies that at some point, its doing (not even close but still applies)
class worker {
constructor(fnToRun){
child_process.spawn('thread-creator-process', [
'-fn', escape(fnToRun.toString())
]);
}
}
so now the other dude could do something like
var result = new Function('context',
'with(context){'+
' ' + fnAsString +
'}'
result(thisThreadContext);
now context has the onmessage reference it will be created, and it can already have a reference to the postMessage and those things, with a little bit of parsing you have a new thread to work with
at least i think it that way
I'm trying to write a small node application that will search through and parse a large number of files on the file system.
In order to speed up the search, we are attempting to use some sort of map reduce. The plan would be the following simplified scenario:
Web request comes in with a search query
3 processes are started that each get assigned 1000 (different) files
once a process completes, it would 'return' it's results back to the main thread
once all processes complete, the main thread would continue by returning the combined result as a JSON result
The questions I have with this are:
Is this doable in Node?
What is the recommended way of doing it?
I've been fiddling, but come no further then following example using Process:
initiator:
function Worker() {
return child_process.fork("myProcess.js");
}
for(var i = 0; i < require('os').cpus().length; i++){
var process = new Worker();
process.send(workItems.slice(i * itemsPerProcess, (i+1) * itemsPerProcess));
}
myProcess.js
process.on('message', function(msg) {
var valuesToReturn = [];
// Do file reading here
//How would I return valuesToReturn?
process.exit(0);
}
Few sidenotes:
I'm aware the number of processes should be dependent of the number of CPU's on the server
I'm also aware of speed restrictions in a file system. Consider it a proof of concept before we move this to a database or Lucene instance :-)
Should be doable. As a simple example:
// parent.js
var child_process = require('child_process');
var numchild = require('os').cpus().length;
var done = 0;
for (var i = 0; i < numchild; i++){
var child = child_process.fork('./child');
child.send((i + 1) * 1000);
child.on('message', function(message) {
console.log('[parent] received message from child:', message);
done++;
if (done === numchild) {
console.log('[parent] received all results');
...
}
});
}
// child.js
process.on('message', function(message) {
console.log('[child] received message from server:', message);
setTimeout(function() {
process.send({
child : process.pid,
result : message + 1
});
process.disconnect();
}, (0.5 + Math.random()) * 5000);
});
So the parent process spawns an X number of child processes and passes them a message. It also installs an event handler to listen for any messages sent back from the child (with the result, for instance).
The child process waits for messages from the parent, and starts processing (in this case, it just starts a timer with a random timeout to simulate some work being done). Once it's done, it sends the result back to the parent process and uses process.disconnect() to disconnect itself from the parent (basically stopping the child process).
The parent process keeps track of the number of child processes started, and the number of them that have sent back a result. When those numbers are equal, the parent received all results from the child processes so it can combine all results and return the JSON result.
For a distributed problem like this, I've used zmq and it has worked really well. I'll give you a similar problem that I ran into, and attempted to solve via processes (but failed.) and then turned towards zmq.
Using bcrypt, or an expensive hashing algorith, is wise, but it blocks the node process for around 0.5 seconds. We had to offload this to a different server, and as a quick fix, I used essentially exactly what you did. Run a child process and send messages to it and get it to
respond. The only issue we found is for whatever reason our child process would pin an entire core when it was doing absolutely no work.(I still haven't figured out why this happened, we ran a trace and it appeared that epoll was failing on stdout/stdin streams. It would also only happen on our Linux boxes and would work fine on OSX.)
edit:
The pinning of the core was fixed in https://github.com/joyent/libuv/commit/12210fe and was related to https://github.com/joyent/node/issues/5504, so if you run into the issue and you're using centos + kernel v2.6.32: update node, or update your kernel!
Regardless of the issues I had with child_process.fork(), here's a nifty pattern I always use
client:
var child_process = require('child_process');
function FileParser() {
this.__callbackById = [];
this.__callbackIdIncrement = 0;
this.__process = child_process.fork('./child');
this.__process.on('message', this.handleMessage.bind(this));
}
FileParser.prototype.handleMessage = function handleMessage(message) {
var error = message.error;
var result = message.result;
var callbackId = message.callbackId;
var callback = this.__callbackById[callbackId];
if (! callback) {
return;
}
callback(error, result);
delete this.__callbackById[callbackId];
};
FileParser.prototype.parse = function parse(data, callback) {
this.__callbackIdIncrement = (this.__callbackIdIncrement + 1) % 10000000;
this.__callbackById[this.__callbackIdIncrement] = callback;
this.__process.send({
data: data, // optionally you could pass in the path of the file, and open it in the child process.
callbackId: this.__callbackIdIncrement
});
};
module.exports = FileParser;
child process:
process.on('message', function(message) {
var callbackId = message.callbackId;
var data = message.data;
function respond(error, response) {
process.send({
callbackId: callbackId,
error: error,
result: response
});
}
// parse data..
respond(undefined, "computed data");
});
We also need a pattern to synchronize the different processes, when each process finishes its task, it will respond to us, and we'll increment a count for each process that finishes, and then call the callback of the Semaphore when we've hit the count we want.
function Semaphore(wait, callback) {
this.callback = callback;
this.wait = wait;
this.counted = 0;
}
Semaphore.prototype.signal = function signal() {
this.counted++;
if (this.counted >= this.wait) {
this.callback();
}
}
module.exports = Semaphore;
here's a use case that ties all the above patterns together:
var FileParser = require('./FileParser');
var Semaphore = require('./Semaphore');
var arrFileParsers = [];
for(var i = 0; i < require('os').cpus().length; i++){
var fileParser = new FileParser();
arrFileParsers.push(fileParser);
}
function getFiles() {
return ["file", "file"];
}
var arrResults = [];
function onAllFilesParsed() {
console.log('all results completed', JSON.stringify(arrResults));
}
var lock = new Semaphore(arrFileParsers.length, onAllFilesParsed);
arrFileParsers.forEach(function(fileParser) {
var arrFiles = getFiles(); // you need to decide how to split the files into 1k chunks
fileParser.parse(arrFiles, function (error, result) {
arrResults.push(result);
lock.signal();
});
});
Eventually I used http://zguide.zeromq.org/page:all#The-Load-Balancing-Pattern, where the client was using the nodejs zmq client, and the workers/broker were written in C. This allowed us to scale this across multiple machines, instead of just a local machine with sub processes.