Restarting child process - node.js

I have a main process that spawns child processes. When the child process is killed, it is restarted, but when it is killed again, it will not restart.
test.js
const fork = require('child_process').fork;
const path = require('path');
const test = path.resolve('test2.js');
let test_child = fork(test);
test_child.on("close", () => {
console.log("child gone");
setTimeout(() => {
test_child = fork(test);
}, 2500)
});
test2.js
setInterval(() => {
console.log("test")
}, 250);
I want the main process (test.js), to continually start up (test2.js) if it ever crashes or stops for some reason.

const fork = require('child_process').fork;
const path = require('path');
const test = path.resolve('child.js');
function repeat() {
let test_child = fork(test);
test_child.on("close", () => {
console.log("child gone");
setTimeout(() => {
repeat();
}, 25)
});
}
repeat();
You can try the above code in your main.js

If you can check the status of your child process, you can add it to a setInterval function checking if it is running or not, and if not just call it again

Related

Forked child processes does not exit

I have a script downloading pdf files. These files come in batches of 5. So I thought of using fork to download 5 files simultaneously then when finished start the next 5. I made a small test file before jumping to full scale project. I found 2 problems
child process does not exit (it seem) after downloading
fork is not async. meaning I have to use some other method for waiting before sending next batch
main
const fork = require('child_process').fork;
var url_s = ["https://www.responsibilityreports.com/HostedData/ResponsibilityReportArchive/a/NASDAQ_AAON_2020.pdf", "https://www.responsibilityreports.com/HostedData/ResponsibilityReportArchive/a/NASDAQ_AAON_2019.pdf"];
var year_s = ["2020","2019"];
name ='a';
type = 'a';
const ls = fork('a.js');
ls.on('exit', (code)=>{
console.log(`child_process exited with code ${code}`);
});
ls.on('close', (code)=>{
console.log(`child_process exited with code ${code}`);
});
ls.on('message', (msg) => {
ls.send([url_s[0],name,type,year_s[0]]);
ls.send([url_s[1],name,type,year_s[1]]);
});
child
const down_path = 'test/';
const https = require('https');
const fs = require('fs');
process.on('message', async (arr) => {
console.log("CHILD: url received from parent process", arr);
url = arr[0];
name = arr[1];
type = arr[2];
year = arr[3];
await download(url,name,type,year);
});
process.send('Executed');
async function download(url,name,type,year) {
https.get(url, res => {
const stream = fs.createWriteStream(down_path + name + '_' + type + '_' + year + '.pdf');
res.pipe(stream);
stream.on('finish', () => {
console.log('done : ' + year);
stream.close();
});
});
}
I tried to kill child processes manually using process.kill() and process.exit(),
stream.on('finish', () => {
console.log('done : ' + year);
stream.close();
process.exit()
});
but it kills all the child processes, not the one I want.

Node.js on multi-core machines for file I/O operations

I'm a bit confused because all the examples I read about Node cluster module only seem to apply to webservers and concurrent requests. Otherwise to CPU intensive application it is recommended to use the worker_threads module.
And what about I/O file operations? Imagine I have an array with 1 million filenames: ['1.txt', '2.txt', etc., ..., '1000000.txt'] and I need to do heavy processing and then write the result file content?
What would be the method to efficiently use all the cores of the CPU to spread the processing towards different cores amongst different filenames?
Normally I would use this:
const fs = require('fs')
const fs = require('async')
const heavyProcessing = require('./heavyProcessing.js')
const files = ['1.txt', '2.txt', ..., '1000000.txt']
async.each(files, function (file, cb) {
fs.writeFile(file, heavyProcessing(file), function (err) {
if (!err) cb()
})
}
Should I use now the cluster or the worker_threads? And how should I use it?
Does this work?
const fs = require('fs')
const fs = require('async')
const heavyProcessing = require('./heavyProcessing.js')
const cluster = require('node:cluster');
const http = require('node:http');
const numCPUs = require('node:os').cpus().length;
const process = require('node:process');
if (cluster.isPrimary) {
console.log(`Primary ${process.pid} is running`);
// Fork workers.
for (let i = 0; i < numCPUs; i++) {
cluster.fork();
}
cluster.on('exit', (worker, code, signal) => {
console.log(`worker ${worker.process.pid} died`);
});
} else {
const files = ['1.txt', '2.txt', ..., '1000000.txt']
async.each(files, function (file, cb) {
fs.writeFile(file, heavyProcessing(file), function (err) {
if (!err) cb()
})
}
}
Just for everyone to know, if they are interested, you need to use the npm module piscina.
In this gist I explain everything. NodeJS is a powerful tool for backend developers, but you must be aware of multi-core processing in order to maximize the potential of your CPU.
This NodeJS multi-core feature is mostly used for webservers and NodeJS has already out of the box the cluster module thereto.
Although NodeJS has also out of the box the module threads, it's not so easy to deal with.
Let's create a project that will test single-thread and multi-thread CPU intensive data and write some random data to file.
Create the project:
mkdir test-threads && cd test-threads
npm init -y
Install dependencies and create dist/ directory
npm install async progress piscina command-line-args
mkdir dist
Create the file index.js at the root of the project directory
const path = require('path')
const async = require('async')
const ProgressBar = require('progress')
const Piscina = require('piscina')
const commandLineArgs = require('command-line-args')
console.time('main')
const worker = require(path.resolve(__dirname, 'worker.js'))
const piscina = new Piscina({
filename: path.resolve(__dirname, 'worker.js')
})
const argvOptions = commandLineArgs([
{ name: 'multi-thread', type: Boolean },
{ name: 'iterations', alias: 'i', type: Number }
])
const files = []
for (let i=0; i < (argvOptions.iterations || 1000); i++) {
files.push(path.join(__dirname, 'dist', i + '.txt'))
}
var bar = new ProgressBar(':bar', { total: files.length, width: 80 });
async.each(files, function (file, cb) {
(async function() {
try {
const err = argvOptions['multi-thread'] ? (await piscina.run(file)) : worker(file)
bar.tick()
if (err) cb(Error(err)); else cb()
} catch(err) {
cb(Error(err))
}
})();
}, (err) => {
if (err) {
console.error('There was an error: ', err)
process.exitCode = 1
} else {
bar.terminate()
console.log('Success')
console.timeEnd('main')
process.exitCode = 0
}
})
Create now worker.js also at the root of the project directory
const fs = require('fs')
// some CPU intensive function; the higher is baseNumber, the higher is the time elapsed
function mySlowFunction(baseNumber) {
let result = 0
for (var i = Math.pow(baseNumber, 7); i >= 0; i--) {
result += Math.atan(i) * Math.tan(i)
}
}
module.exports = (file) => {
try {
mySlowFunction(parseInt(Math.random() * 10 + 1))
fs.writeFileSync(file, Math.random().toString())
return null
} catch (e) {
return Error(e)
}
}
Now just run on single thread and check time elapsed, for 1000 and 10000 iterations (one iteration equals to data processing and file creation)
node index.js -i 1000
node index.js -i 10000
Now compare with the great advantage of multi-thread
node index.js --multi-thread -i 1000
node index.js --multi-thread -i 10000
With the test I did (16 cores CPU), the difference is huge, it went with 1000 iterations from 1:27.061 (m:ss.mmm) for single thread to 8.884s with multi-thread. Check also the files inside dist/ to be sure they were created correctly.

NodeJS child_process and express: terminate previous process and run a new one when a new POST request is made

I have this code that calls the nfc-forum-emulate-tag4 command on a file that the code also creates, containing a NDEF text record of the data received in the POST request. However, nfc-forum-emulate-tag4 runs indefinitely until the tag is scanned or until the process is killed. I want to, every time I get a POST request, kill the previous process and run another one. I have found that with the below code, the execSync blocks the POST request until the emulation process completes, but when I use exec instead of execSync, the command doesn't actually run. (Terrible) code:
const { execSync } = require('child_process')
const express = require('express')
const fs = require('fs')
const app = express()
function buildNFC(text) {
const hex = Buffer.from(text)
const length = text.length+3
const initial = Buffer.from([0xd1, 0x01, length, 0x54, 0x02, 0x65, 0x6e])
return Buffer.concat([initial, hex])
}
var child;
app.post("/", function(req, res) {
const nfcbuffer = buildNFC(req.query.code)
fs.writeFileSync("/home/pi/nfc", nfcbuffer)
res.send("Sent " + nfcbuffer.toString('hex'))
try {
if(child) {
console.log(`pid: ${child.pid}`)
execSync(`killall ${child.pid}`, {stdio: 'inherit'})
child = null
}
else {
console.log(`no child running`)
}
child = execSync(`cd /home/pi && nfc-emulate-forum-tag4 ./nfc`, {stdio: 'inherit'})
} catch {}
})
app.listen(3000, () => console.log("Listening on port 3000..."))
Thanks in advance!

Nodejs cluster unable to kill worker running infinite loop

It is possible to kill a cluster worker that is running an infinite loop? I've tried, but unable to kill the worker. I guess the kill command cannot get onto the worker's js event loop. Any ideas on how else I can do this? When the master receives a "start" message, I want it to fork a worker. When the master receives a "stop" message, I want it to kill the worker.
const cluster = require('cluster');
const numCPUs = require('os').cpus().length;
const process = require('process');
const { nrpin, nrpout } = require("./helpers/PubSub");
const chalk = require('chalk');
//https://leanpub.com/thenodejsclustermodule/read
//https://gist.github.com/jpoehls/2232358
const arrWorkers = [];
if (cluster.isMaster) {
masterProcess();
} else {
childProcess();
}
function masterProcess() {
console.log(chalk.blueBright(`Master ${process.pid} is running`));
nrpin.on("start", async (bot) => {
console.log("Start:", bot._id);
if (arrWorkers.length == numCPUs){
console.log(chalk.yellowBright("No CPUs available to create worker!"));
}
const worker = cluster.fork();
arrWorkers.push({
workerId: worker.id,
botId: bot._id
})
})
nrpin.on("stop", async (bot) => {
console.log("Stop:", bot._id);
const worker = arrWorkers.find(x => x.botId == bot._id);
if (worker){
console.log("killing worker:", worker.workerId);
cluster.workers[worker.workerId].kill();
}
})
// Be notified when workers die
cluster.on('exit', function(worker, code, signal) {
if (worker.isDead()) {
console.info(`${chalk.redBright('worker dead pid')}: ${worker.process.pid}`);
}
});
}
function childProcess() {
console.log(chalk.green(`Worker ${process.pid} started...`));
while(true){
console.log(Date.now());
}
}
Never mind, I solved this using process.kill
let process_id = cluster.workers[worker.workerId].process.pid;
process.kill(process_id);

Worker stopped sending data to master

I have an app (master) which distributes work to n amount of workers. Inside the worker js I have hooked the console output as follows:
console._log = console.log;
console._error = console.error;
console.log = (...args) => {
process.send({
cmd:'log',
channel:'out',
data: args.join(' ')
});
};
console.error = (...args) => {
process.send({
cmd:'log',
channel:'err',
data: args.join(' ')
});
};
The master now is responsible of logging all incoming messages into a file besides std. Accomplished with the following code & module:
const intercept = require('intercept-stdout');
const stripAnsi = require('strip-ansi');
const unhook_intercept = intercept(function (str) {
// stdout
fs.appendFileSync(lib.logOutFile(), stripAnsi(str));
}, function (str) {
// stderr
fs.appendFileSync(lib.logErrFile(), stripAnsi(str));
});
I have noticed in the logs that a worker after 1,5 day stopped sending messages. In the master I have worker exit detection:
cluster.on('exit', (worker, code, signal) => {
if (signal) {
console.log(`${lib.dateTimeStamp()} - ${chalk.magenta('[')}${chalk.cyan(worker.process.pid)}${chalk.magenta(']')}\tWorker: ${chalk.yellow(`was killed by signal: ${signal}`)}`);
} else if (code !== 0) {
console.error(`${lib.dateTimeStamp()} - ${chalk.magenta('[')}${chalk.cyan(worker.process.pid)}${chalk.magenta(']')}\tWorker: ${chalk.red(`exited with error code: ${code}`)}`);
let newWorker = cluster.fork();
let data = work[worker.process.pid];
let d = new Date();
status[worker.process.pid].status = 'dead';
status[newWorker.process.pid] = {
started: `${d.toLocaleDateString()} ${d.toLocaleTimeString()}`,
status: 'alive'
};
delete work[worker.process.pid];
work[newWorker.process.pid] = data;
newWorker.send({
options: cfg.options,
websites: work[newWorker.process.pid]
});
} else {
delete work[worker.process.pid];
delete status[worker.process.pid]
console.log(`${lib.dateTimeStamp()} - ${chalk.magenta('[')}${chalk.cyan(worker.process.pid)}${chalk.magenta(']')}\tWorker: ${chalk.green('exited successfully')}`);
}
});
Exit was not triggered as I have seen in the logs. At the moment I have only assumptions and I'd like your opinions. Could it be because:
The synchronous file logging.
A worker disconnected on its own.
A worker exited and the exit event was missed.
Your opinion...

Resources