Forked child processes does not exit - node.js

I have a script downloading pdf files. These files come in batches of 5. So I thought of using fork to download 5 files simultaneously then when finished start the next 5. I made a small test file before jumping to full scale project. I found 2 problems
child process does not exit (it seem) after downloading
fork is not async. meaning I have to use some other method for waiting before sending next batch
main
const fork = require('child_process').fork;
var url_s = ["https://www.responsibilityreports.com/HostedData/ResponsibilityReportArchive/a/NASDAQ_AAON_2020.pdf", "https://www.responsibilityreports.com/HostedData/ResponsibilityReportArchive/a/NASDAQ_AAON_2019.pdf"];
var year_s = ["2020","2019"];
name ='a';
type = 'a';
const ls = fork('a.js');
ls.on('exit', (code)=>{
console.log(`child_process exited with code ${code}`);
});
ls.on('close', (code)=>{
console.log(`child_process exited with code ${code}`);
});
ls.on('message', (msg) => {
ls.send([url_s[0],name,type,year_s[0]]);
ls.send([url_s[1],name,type,year_s[1]]);
});
child
const down_path = 'test/';
const https = require('https');
const fs = require('fs');
process.on('message', async (arr) => {
console.log("CHILD: url received from parent process", arr);
url = arr[0];
name = arr[1];
type = arr[2];
year = arr[3];
await download(url,name,type,year);
});
process.send('Executed');
async function download(url,name,type,year) {
https.get(url, res => {
const stream = fs.createWriteStream(down_path + name + '_' + type + '_' + year + '.pdf');
res.pipe(stream);
stream.on('finish', () => {
console.log('done : ' + year);
stream.close();
});
});
}
I tried to kill child processes manually using process.kill() and process.exit(),
stream.on('finish', () => {
console.log('done : ' + year);
stream.close();
process.exit()
});
but it kills all the child processes, not the one I want.

Related

how to read video Frames directly into memory with Nodejs?

What i am trying to do is taking a video and diving it to frames and passing this frames to a Model to detect objects in each frame but the problem is the extraction process cost so much time and i don't need the frames on my disk.
fmpeg-stream offers stream capabilities. So there is no need to write to a file.
It is also possible to use directly ffmpeg and spawn a new child process. Its .stdout property is a readable stream. On the event data, the chunk can be read.
const fs = require("fs");
const tf = require("#tensorflow/tfjs-node")
const logStream = fs.createWriteStream('./logFile.log');
const spawnProcess = require('child_process').spawn,
ffmpeg = spawnProcess('ffmpeg', [
'-i', 'videfile.mp4',
'-vcodec', 'png',
'-f', 'rawvideo',
'-s', 'h*w', // size of one frame
'pipe:1'
]);
ffmpeg.stderr.pipe(logStream); // for debugging
let i = 0
ffmpeg.stdout.on('data', (data) => {
try {
console.log(tf.node.decodeImage(data).shape)
console.log(`${++i} frames read`)
// dispose all tensors
} catch(e) {
console.log(e)
}
})
ffmpeg.on('close', function (code) {
console.log('child process exited with code ' + code);
});
Decoding the image is in a try catch block to prevent error raised when the chunk does not match a frame.
A more robust code to prevent decoding chunks that do not correspond to images will be the following:
const { Transform } = require("stream")
class ExtractFrames extends Transform {
constructor(delimiter) {
super({ readableObjectMode: true })
this.delimiter = Buffer.from(delimiter, "hex")
this.buffer = Buffer.alloc(0)
}
_transform(data, enc, cb) {
// Add new data to buffer
this.buffer = Buffer.concat([this.buffer, data])
const start = this.buffer.indexOf(this.delimiter)
if (start < 0) return // there's no frame data at all
const end = this.buffer.indexOf(
this.delimiter,
start + this.delimiter.length,
)
if (end < 0) return // we haven't got the whole frame yet
this.push(this.buffer.slice(start, end)) // emit a frame
this.buffer = this.buffer.slice(end) // remove frame data from buffer
if (start > 0) console.error(`Discarded ${start} bytes of invalid data`)
cb()
}
_flush(callback) {
// push remaining buffer to readable stream
callback(null, this.buffer);
}
}
const fs = require("fs");
const tf = require("#tensorflow/tfjs-node")
const logStream = fs.createWriteStream('./logFile.log');
const spawnProcess = require('child_process').spawn,
ffmpeg = spawnProcess('ffmpeg', [
'-i', 'generique.mp4',
'-vcodec', 'mjpeg',
'-f', 'rawvideo',
'-s', '420x360', // size of one frame
'pipe:1'
]);
ffmpeg.stderr.pipe(logStream); // for debugging
let i = 0
ffmpeg.stdout
.pipe(new ExtractFrames("FFD8FF")).on('data', (data) => {
try {
console.log(tf.node.decodeImage(data).shape)
console.log(`${++i} frames read`)
// dispose all tensors
} catch(e) {
console.log(e)
}
})
ffmpeg.on('close', function (code) {
console.log('child process exited with code ' + code);
});
Though, the above code works, it will still fill up quickly the memory. Separating the frame extraction from the data processing itself will help.
async function* frames() {
let resolve;
let promise = new Promise(r => resolve = r);
let bool = true;
ls.stdout.pipe(new ExtractFrames("FFD8FF")).on('data', data => {
resolve(data);
promise = new Promise(r => resolve = r);
});
ls.on('close', function (code) {
bool = false
console.log('code')
});
while (bool) {
const data = await promise;
yield data;
}
}
(async() => {
// data processing
// possibly create tf.dataset for training
for await (const data of stream()) {
console.log(tf.node.decodeImage(data).shape)
console.log(data);
}
})()

Restarting child process

I have a main process that spawns child processes. When the child process is killed, it is restarted, but when it is killed again, it will not restart.
test.js
const fork = require('child_process').fork;
const path = require('path');
const test = path.resolve('test2.js');
let test_child = fork(test);
test_child.on("close", () => {
console.log("child gone");
setTimeout(() => {
test_child = fork(test);
}, 2500)
});
test2.js
setInterval(() => {
console.log("test")
}, 250);
I want the main process (test.js), to continually start up (test2.js) if it ever crashes or stops for some reason.
const fork = require('child_process').fork;
const path = require('path');
const test = path.resolve('child.js');
function repeat() {
let test_child = fork(test);
test_child.on("close", () => {
console.log("child gone");
setTimeout(() => {
repeat();
}, 25)
});
}
repeat();
You can try the above code in your main.js
If you can check the status of your child process, you can add it to a setInterval function checking if it is running or not, and if not just call it again

How do I close fs.createWriteStream?

I am trying to use the node module vtt2srt to convert a VTT string to SRT file and save the output. It works once, and my subtitles are saved correctly, but if I hit the endpoint a second time node crashes with this error:
Error: write after end
at writeAfterEnd
I have tried all combinations of .close .on('close')
I send a unique vid and the VTT data from the frontend
router.post('/downloadsubs', function(req,res,next) {
var vttObj = webvtt.compile(req.body.data);
fs.unlink(__dirname + '/../static/videos/'+req.body.vid+'/subtitles.srt', function(){
srtStream.write(vttObj);
var writestream = fs.createWriteStream(__dirname + '/../static/videos/'+req.body.vid+'/subtitles.srt')
srtStream.end()
srtStream.pipe(writestream)
res.send(req.body.vid);
})
})
I worked out what my problem was, I hope it can be useful to someone else one day.
Previously I was requiring my module at the head of my router file:
const vtt2srt = require('node-vtt-to-srt');
const srtStream = vtt2srt();
router.post('/downloadsubs', function(req,res,next) {
var vttObj = webvtt.compile(req.body.data);
fs.unlink(__dirname + '/../static/videos/'+req.body.vid+'/subtitles.srt', function(){
srtStream.write(vttObj);
srtStream.end()
var writestream = fs.createWriteStream(__dirname + '/../static/videos/'+req.body.vid+'/subtitles.srt');
srtStream.pipe(writestream)
writestream.on('finish', function () { res.send(req.body.vid) });
})
})
Now, instead, I am creating a new srtStream in the router method:
const vtt2srt = require('node-vtt-to-srt');
router.post('/downloadsubs', function(req,res,next) {
var srtStream = vtt2srt();
var vttObj = webvtt.compile(req.body.data);
fs.unlink(__dirname + '/../static/videos/'+req.body.vid+'/subtitles.srt', function(){
srtStream.write(vttObj);
srtStream.end()
var writestream = fs.createWriteStream(__dirname + '/../static/videos/'+req.body.vid+'/subtitles.srt');
srtStream.pipe(writestream)
writestream.on('finish', function () { res.send(req.body.vid) });
})
})
And it works.

Stop nodejs child_process with browser api call

I have vue (axios) making a get call to an express route which triggers a child_process of ffmpeg in an infinite loop. ffmpeg streams one file over udp , on close it re calls itself and streams another file.
I'd like to be able to kill this process from a button on a web page, but can't seem to work it out.
This is my express route code
router.get('/test', function(req, res) {
const childProcess = require('child_process');
const fs = require('fs')
const path = require('path')
//Grabs a random index between 0 and length
function randomIndex(length) {
return Math.floor(Math.random() * (length));
}
function Stream () {
const FILE_SRC = '/path/to/file'
//Read the directory and get the files
const dirs = fs.readdirSync(FILE_SRC)
.map(file => {
return path.join(FILE_SRC, file);
});
const srcs_dup = [];
const hashCheck = {}; //used to check if the file was already added to srcs_dup
var numberOfFiles = dirs.length - 1; //OR whatever # you want
console.log(numberOfFiles)
//While we haven't got the number of files we want. Loop.
while (srcs_dup.length < numberOfFiles) {
var fileIndex = randomIndex(dirs.length-1);
//Check if the file was already added to the array
if (hashCheck[fileIndex] == true) {
continue; //Already have that file. Skip it
}
//Add the file to the array and object
srcs_dup.push(dirs[fileIndex]);
hashCheck[fileIndex] = true;
}
var chosen = "'" + srcs_dup[0] + "'"
var call = "ffmpeg -re -i " + chosen + " -content_type audio/mpeg -f mp3 udp://224.1.2.3:1234"
const stop = childProcess.exec(call, { shell: true });
stop.stdout.on('data', function (data) {
console.log('stdout: ' + data.toString());
});
stop.stderr.on('data', (data) => {
console.log(`stderr: ${data}`);
});
stop.on('close', (code) => {
console.log ('child exited with code ' + code)
Stream();
});
stop.on('error', function(err) {
console.log('sh error' + err)
});
}

Node.js how to iterate with async callback?

What I am doing is using fs to stitch 5 pieces of html-page parts(html, head, chead, topaside, main, footer) together. The file name is htmlpage.js, so you can just run node htmlpage.js file1 file2 file3 ... in command line tool, and it will stitch those html-page parts together, then spit out file1.html, file2.html, file3.html .... I don't like to use template engine/library/framework or whatever, especially when I am learning.
Here is the sorce code:
'use strict';
const fs = require('fs'),
head = fs.createReadStream('./html-parts/head.html', 'utf8'),
topaside = fs.createReadStream('./html-parts/topaside.html', 'utf8'),
footer = fs.createReadStream('./html-parts/footer.html', 'utf8');
let name = process.argv.slice(2),
htmlray = [],
ni = 0,
nl = name.length;
for (ni; ni < nl; ni ++) {
let cheadP = './html-parts/' + name[ni] + '-head.html',
mainP = './html-parts/' + name[ni] + '-main.html',
htmlP = name[ni] + '.html',
chead = fs.createReadStream(cheadP, 'utf8'),
main = fs.createReadStream(mainP, 'utf8'),
html = fs.createWriteStream(htmlP, 'utf8');
//let those parts form an array
htmlray = [html, head, chead, topaside, main, footer];
openendPipe(htmlray[1], htmlray[0]);
htmlray[1].on('end', () => {
openendPipe(htmlray[2], htmlray[0]);
htmlray[2].on('end', () => {
openendPipe(htmlray[3], htmlray[0]);
htmlray[3].on('end', () => {
openendPipe(htmlray[4], htmlray[0]);
htmlray[4].on('end', () => {
htmlray[5].pipe(htmlray[0]);
htmlray[5].on('end', () => {
console.log(name + '.html' + ' created');
});
});
});
});
});
}
function openendPipe(src, dst) {
return src.pipe(dst, {end: false});
}
But what if the htmlray has 100 parts, I want to be able to do an iteration to replace these code, let's call it pipeblock:
openendPipe(htmlray[1], htmlray[0]);
htmlray[1].on('end', () => {
openendPipe(htmlray[2], htmlray[0]);
htmlray[2].on('end', () => {
openendPipe(htmlray[3], htmlray[0]);
htmlray[3].on('end', () => {
openendPipe(htmlray[4], htmlray[0]);
htmlray[4].on('end', () => {
htmlray[5].pipe(htmlray[0]);
htmlray[5].on('end', () => {
console.log(name + '.html' + ' created');
});
});
});
});
});
I tried these solutions, they didn't work:
Solution 1:
(function () {
let i = 0, count = 1;
function nextpipe() {
let arr = arguments[0];
i ++;
if (count > 5) return;
openendPipe(arr[i], arr[0]);
count ++;
arr[i].on('end', nextpipe);
}
return nextpipe;
})();
//then replace 'pipeblock' with 'nextpipe(htmlray)';
//console.log: nextpipe is undefined.
Solution 2:
//replace 'pipeblock' with these code
let pi = 1,
pl = htmlray.length - 1;
htmlray[pi].pipe(htmlray[0], {end: false});
htmlray[pi].on('end', nextpipe);
function nextpipe() {
if (pi > pl) return console.log(name + '.html' + ' created');;
pi ++;
htmlray[pi].pipe(htmlray[0], {end: false});
htmlray[pi].on('end', nextpipe);
}
//cosole.log:
//htmlray[pi].pipe(htmlray[0], {end: false});
//TypeError: Cannot read property 'pipe' of undefined
This thing calls "callback hell" and you should use either some library to handle async calls like async.js or (better) use promises.
Just simply promisify fs.readFile like this (or write your own promise for fs.createReadStream it you want to use it)
const readFile = Promise.promisify(require('fs').readFile);
and then combine your request promises using Promise.all()
Here are examples http://bluebirdjs.com/docs/api/promise.promisify.html, http://bluebirdjs.com/docs/api/promise.all.html for fs for Bluebird promise library.
While I am reading through async and promise documentation, I get to the part about parallel, series and waterfall. My problem is about creating an html page, so it can't be done in parallel. But the documentation starts and talks a lot about parallel, and they just add confusion to my head. To grasp all of them is probably going to take a long time. Anyway, while I am experimenting, I come up with an easy solution of my own.
In my problem, the repetitive part is about check if previous html-part has done writing by using readingStream.on('end', <do next writing>);, so I make it into a function:
function ifend(src, src2, dst) {
return src.on('end', () => {return openendPipe(src2, dst);});
}
Then I can turn pipeblock into:
openendPipe(htmlray[0], html);
ifend(htmlray[0], htmlray[1], html);
ifend(htmlray[1], htmlray[2], html);
ifend(htmlray[2], htmlray[3], html);
ifend(htmlray[3], htmlray[4], html);
Then I can do an iteration in a function:
function createHtml(src, dst) {
let l = src.length - 1,
i = 0;
openendPipe(src[0], dst);
for (i; i < l; i ++) {
//iterate ifend function.
ifend(src[i], src[i + 1], dst);
}
return dst;
}
Now I can replace pipeblock with this:
createHtml(htmlray, html);

Resources