ioSocket not emitting to browser on server heavy load - node.js

Some requests that can be made to my nodejs server require heavy processing (for example: 5000 files). Since the request will take a while to process, I want to display progresses in the browser. For that, I'm using io-socket. Regularly, the server sends the progress advance to the client with for example ioSocket.emit("log", "progress 24%).
However, if progress sending to the client usually works, when there is a big amount of files, it doesn't. Nothing is sent to browser.
I'm sure the process is going fine since I log progress to node terminal, and there it appears as expected.
I'm wondering what I can do to have ioSocket.emit event work in heavy load case, because it's where it's most useful to see progress.
The files processing function looks like this:
var child_process = require("child_process");
function (ioSocket) {
ioSocket.emit("log", "start")
var ratingCounts = 0;
var listOfFilesRatings = []
_.each(listOfFilesPaths, function(path, i){
child_process.exec("exiftool -b -Rating "+ path, function(err, stdout){
if (err) console.log(err)
else {
listOfFilesRatings.push(stdout);
ratingCounts++;
ioSocket.emit("log", "rating test progress "+ ratingCounts)
};
});
ioSocket.emit("log", "each progress "+ i)
});
}
In this example, only the first "start" emit will be fired to the browser.
However if I do the following:
function (ioSocket) {
ioSocket.emit("log", "start")
for (i=0; i<1000; i++) {
ioSocket.emit("log", "each progress "+ i)
};
}
everything works fine, and I get the "start" and all "each progress" sent to browser.

If you are processing 5000 files, your scheme with _.each() and child_process.exec() will launch 5000 exiftool processes at once. That will likely bring any computer, except for perhaps some big iron to its knees. You should probably be launching no more than N of those at a time where you run some performance tests on your particular hardware to determine what N should be (probably under 10).
Here's one way to do that:
var child_process = require("child_process");
function processFiles(ioSocket) {
return new Promise((resolve, reject) => {
ioSocket.emit("log", "start")
let ratingCounts = 0;
let listOfFilesRatings = [];
const maxInFlight = 10;
let inFlightCntr = 0;
let fileIndex = 0;
function run() {
// while room to run more, run them
while (inFlightCntr < maxInFlight && fileIndex < listOfFilesPaths.length) {
let index = fileIndex++;
++inFlightCntr;
ioSocket.emit("log", "each progress " + index)
child_process.exec("exiftool -b -Rating " + path, function(err, stdout) {
++ratingCounts;
--inFlightCntr;
if (err) {
console.log(err);
listOfFilesRatings[index] = 0;
} else {
listOfFilesRatings[index] = stdout;
ioSocket.emit("log", "rating test progress " + ratingCounts)
}
run();
});
}
if (inFlightCntr === 0 && fileIndex >= listOfFilesPaths.length) {
// all done here
console.log(listOfFilesRatings);
resolve(listOfFilesRatings);
}
}
run();
});
}
processFiles().then(results => {
console.log(results);
});

Related

Expressjs main loop blocked during intense operation

I'm having an expressjs server running in which an endpoint init performs some intense operation that has an average completion time of 10 seconds. During these 10 seconds, the main loop is "stuck", making it impossible to send requests to the expressjs server. I've been googling for a while now but found nothing which would enable expressjs to handle requests concurrently. It would seem silly if this is not possible. For any hints or help, I'm very thankful.
Example code:
routes.js
app.route('/v1/cv/random').get(init);
features/init.js
module.exports = async function init(req, res) {
try {
// perform some time consuming operation here
res.status(201).send(someVar);
} catch (err) {
res.status(500).send(`failed to init`).end();
}
};
It is possible to implement algorithms with long running time in a synchronous manner, for example the Tower of Hanoi:
function move(from, to, via, n) {
if (n > 1)
move(from, via, to, n - 1);
to.unshift(from.shift());
if (n > 1)
move(via, to, from, n - 1);
}
app.get("/tower", function(req, res) {
var a = [];
for (var i = 0; i < Number(req.query.n); i++) a.push(i);
var b = [];
var c = [];
move(a, b, c, a.length);
res.end("Done");
});
Invoking GET /tower?n=<N> with large enough <N> will indeed block the main loop of express.
This blocking can be avoided by introducing asynchronousness into the algorithm, for example with setTimeout(nextAlgorithmicStep) commands. This puts the nextAlgorithmicStep function in a queue, but the same queue also has room for functions that process concurrent requests:
function tick(from, to, via, n) {
return new Promise(function(resolve, reject) {
setTimeout(function() {
move(from, to, via, n, resolve);
});
});
}
async function move(from, to, via, n, resolve) {
if (n > 1)
await tick(from, via, to, n - 1);
to.unshift(from.shift());
if (n > 1)
await tick(via, to, from, n - 1);
resolve();
}
app.get("/tower", async function(req, res) {
var a = [];
for (var i = 0; i < Number(req.query.n); i++) a.push(i);
var b = [];
var c = [];
await tick(a, b, c, a.length);
res.end("Done");
});
With this, you can you wait (forever) for the request GET /tower?n=64 to come back, but you can at least still make concurrent requests to the same server. (Using simply Promise or process.nextTick instead of setTimeout is not "asynchronous enough" to allow concurrent requests to be processed in between.)
However, the execution of GET /tower?n=10, which finished "immediately" in the first version, now takes much longer. It would be better to use the setTimeout not on all n levels of recursion, but only on every tenth level or so. You have to find similar good points for asynchronousness in your RSA algorithm.
That's what you can do with a single-threaded Node.js program. But there is an alternative that uses multiple Node.js processes.
app.get("/tower", function(req, res) {
spawn("node", ["tower.js", req.query.n]).stdout.pipe(res);
});
where tower.js is an additional Javascript program:
function move(from, to, via, n) {
if (n > 1)
move(from, via, to, n - 1);
to.unshift(from.shift());
if (n > 1)
move(via, to, from, n - 1);
}
var a = [];
for (var i = 0; i < Number(process.argv[2]); i++) a.push(i);
var b = [];
var c = [];
move(a, b, c, a.length);
process.stdout.write("Done");
I found an answer shortly before #Heiko Theißen updated his answer. It is (I think) a similar approach.
I've found a way to use child_process and with that execute everything that a certain file has by using
const {fork} = require('child_process');
...
module.exports = async function init(req, res) {
try {
const childProcess = fork('./path/to/the/script.js');
childProcess.send({'body': req.body});
childProcess.on('message', (message) => {
res.status(201).json({someVar: message}).end();
});
} catch (err) {
res.status(500).send(`failed to init`).end();
}
};
The script.js looks like
process.on('message', async (message) => {
// perform a time consuming operation here
process.send(someVar);
process.exit();
});

child_process.fork() in Electron

Is it possible to fork a child_process from an electron render process? I found some posts across the net, but there were no hint how helps me to get my code working.
I created a module, that fork child processes. This code works, when I run this with cmd and under node. But when I try to integrate it in my electron app, I can not communicate with the child.send() method.
// create fork
const fork = require('child_process').fork;
const fs = require('fs');
const img_path = [
'path/to/an/image1.jpg',
'path/to/an/image2.jpg',
'path/to/an/image3.jpg'
];
const cp = [];
const temp_path = img_path.map((item) => item);
createAndResize(2);
function createAndResize(num) {
return childResize(createChildProcess(num));
}
function createChildProcess(num) {
if(num <= 0) {
return cp;
} else {
let cf = fork('./child.js');
cp.push(cf);
num -= 1;
return createChildProcess(num);
}
}
function childResize(list) {
if(list.length <=0) {
return true;
} else {
// child_process is created
let child = list.shift();
child.on('message', function (data) {
if (!temp_path.length) {
process.kill(data);
} else {
child.send(temp_path.shift());
}
});
child.send(temp_path.shift());
setTimeout(function() {
childResize(list);
}, 1000);
}
}
//child.js
process.on('message', function(msg) {
console.log(msg); //this is never reached
};
EDIT: based on the comment below, I fork child processes on the main process. The comunication seems to work with few exceptions. But first my new code:
// myView.js
const { remote } = require('electron');
const mainProcess = remote.require('./main.js');
const { forkChildfromMain } = mainProcess;
forkChildfromMain();
// main.js
const fork = require('child_process').fork;
let cp = [];
function forkChildfromMain() {
createAndResize(4);
}
function createAndResize(num) {
return childResize(createChildProcess(num));
}
function createChildProcess(num) {
if(num <= 0) {
return cp;
} else {
let cf = fork('./resize.js');
cp.push(cf);
num -= 1;
return createChildProcess(num);
}
}
function childResize(list) {
if(list.length <=0) {
return true;
} else {
let child = list.shift();
child.on('message', function (msg) {
// logs 'Hello World' to the cmd console
console.log(msg);
});
child.send('Hello World');
setTimeout(function() {
childResize(list);
}, 1000);
}
}
exports.forkChildfromMain = forkChildfromMain;
// child.js
process.on('message', function(msg) {
// this console statement get never loged
// I think, I must integrate an icpModule
console.log(msg);
//process send msg back to main.js
process.send(msg);
})
OUTDATED: The main problem now is, that I think electron 'spawn' new child processes and do not fork.
Because, when I look at my task manager I see only one instance from electron. When I run the code in a node env, I see there were fork multiple node instances.
The reason why I prefer to fork my child processes in multiple node instances is, that I want to make many image manipulation. So when I fork childs, then every child has it own node instance with memory and so on. I think that would be more performant then when I only have one instance who shared the memory and resources to all of the childs.
The second unexpected behavior is, that the console.log statement in the child is not printed to my cmd console. But this is the smaller ones :)
EDIT: After I analyse my task manager a little more in depth, I saw, that electron spawn multiple child processes like it should.
Electron's renderer process is not the right place for forking child processes, you should think about moving this to the main process.
Nonetheless, it should work the way you describe. If you'd make a minimal example available somewhere I could take a closer look.

Mongoose promise built in but not working?

Or quite possibly I am doing it wrong, in fact, more than likely I am doing it wrong.
Have a table which contains a "tree" of skill, starting at the root level and may be as deep as ten levels (only two so far), but I want to return it as one big fat JSON structure, so I want to ask the database for each set of data, build my structure then ask for the next level.
Of course if I just send of my requests using mongoose, they will come back at any time, as they are all nice asyncronous calls. Normally a good things.
Looking at the documentation for Mongoose(using 4.1.1) it seems like it has a promise built in, but whenever I try to use it the api call throws a hissy fit and I get a 500 back.
Here is my simple function:
exports.getSkills = function(req,res) {
console.log("Will return tree of all skills");
for (var i = 0; i<10; i++){
var returnData = [];
console.log("Lets get level " + i );
var query = Skill.find({level: i });//The query function
var promise = query.exec; //The promise?
promise.then(function(doc) { //Totally blows up at this point
console.log("Something came back")
return "OK";
});
}
}
The Mongoose documentation on the subject can be found here
http://mongoosejs.com/docs/api.html#promise_Promise
var promise = query.exec;
// =>
var promise = query.exec()
exports.getSkills = function(req,res) {
console.log("Will return tree of all skills");
var p;
for (var i = 0; i < 10; i ++) {
if (i == 0 ) {
p = Skill.find({level:i}).exec();
} else {
p.then(function (){
return Skill.find({level:i}).exec()
})
}
p.then(function (data) {
//deal with your data
})
}
p.then(function () {
// deal with response
})
}

Maximum call stack size exceeded on insert 10000 documents

This is the code am running which returns the Range Maximum call stack size exceeded error.
// to insert 10000 values on to mongodb using node.js
var MongoClient = require('mongodb').MongoClient;
var mongoServer = require('mongodb').Server;
var serverOptions = {
'auto_reconnect': true,
'poolSize': 100
};
var i=0;
var async =require('async');
var mongoClient = new MongoClient(new mongoServer('localhost', 27017, serverOptions));
var db = mongoClient.db('test');
var collection = db.collection('new_file_test');
mongoClient.open(function (err, mongoClient)
{
if(err){console.log(err)};
function start(i,call)
{
if(i<10000) {
call(start);
}
}
function pass(callback)
{
Insert(save);
i++;
callback(i,pass);
}
start(i,pass);
});
function Insert(callback) {
console.log("Inserting" );
var doc={
'trip_paramid':i,
'tripid':'116',
'lattitude':'12.8929183',
'longitude':'77.63627',
'speed':'2',
'heading':'0',
'altitude':'80469',
'address':'qwertyasdfgxcvbn',
'engine_status':'Normal',
'oil_pressure': '83.12',
'water_temp': '28',
'fuel_content':'0',
'brake':'Normal',
'creation_time':'2013-08-31 23:22:17',
'brakelight_status':'Normal',
'battery_status':'12.68',
'event_code':'8',
'dbinsert_time':'2013-08-31 23:24:59',
'gsm_status':'-51',
'cell_id':'45',
'vehicle_id':'123456',
'distance':'0'}
callback(doc);
}
function save(doc)
{
collection.insert(doc, function(err)
{
if (err)
{
console.log('Error occured');
}
else
console.log("Saved");
});
}
If the condition is to insert 1000 rows it works fine and the error throws only when the condition goes beyond 10000.
Looping over 10000 times and performing insert is really a bad idea. But still you can do with async library which might help you fix the issue. I have came across this situation before and i used async.queue to overcome the issue.
Async.js module.
The problem comes from the recursive loop you made:
function start(i, call) {
if (i < 10000) {
call(start);
}
}
function pass(callback) {
Insert(save);
i++;
callback(i, pass);
}
start(i, pass);
You should change it to something like this:
for (var i = 0; i < 10000; i++) {
Insert(save);
}
Simplifying your code you have this:
var i = 0;
function pass() {
if (i < 10000) {
Insert(save);
pass(i);
}
i++;
}
pass();
The problem comes from the part that you are calling this function recursively, and since javascript doesn't have tail recursion elimination, the callstack keeps growing. V8(nodejs javascript engine) has it's limits, the callstack once reached to the maximum defined size the error will be thrown.
You can also have look at the following questions for more information:
Maximum call stack size exceeded error
JavaScript recursion: Maximum call stack size exceeded
This is all about fixing Maximum call stack size exceeded error. But 10000 looks like a huge number. I just ran that and it took about 3 seconds on my machine, to finish the loop using monk. Using mongo shell it took about 1 second. If you are running a server, when the loop is running your application is unresponsive.
I suggest instead, insert in batches, and use node's setImmediate function to schedule the next batch to be run after pending I/O events(like handling new web requests):
function insert10000(i) {
insert100();
i++;
if (i < 100) {
setImmidiate(insert10000, i);
}
}
function insert100() {
for (var i = 0; i < 100; i++) {
Insert(save);
}
}
And since we came on the topic of batching insert calls, collection.insert method, supports an array of documents instead of just one to be inserted.
So when we currently have something like following:
collection.insert(doc1);
collection.insert(doc2);
It can be changed to this:
collection.insert([doc1, doc2]);
And that actually is faster. So you can change the code to this:
function insert10000(i) {
insert100(i);
i++;
if (i < 100) {
setImmediate(insert10000, i);
}
}
function insert100(i) {
var docs = [];
for (var l = i + 1000; i < l; i++) {
docs.push({
'trip_paramid':i,
'tripid':'116',
'lattitude':'12.8929183',
'longitude':'77.63627',
'speed':'2',
'heading':'0',
'altitude':'80469',
'address':'qwertyasdfgxcvbn',
'engine_status':'Normal',
'oil_pressure': '83.12',
'water_temp': '28',
'fuel_content':'0',
'brake':'Normal',
'creation_time':'2013-08-31 23:22:17',
'brakelight_status':'Normal',
'battery_status':'12.68',
'event_code':'8',
'dbinsert_time':'2013-08-31 23:24:59',
'gsm_status':'-51',
'cell_id':'45',
'vehicle_id':'123456',
'distance':'0'
});
}
collection.insert(docs, function(err) {
if (err) {
console.log('Error occurred', err);
}
});
}
I measured this, it was faster twice faster than the original case.

How to get rid of the asynchoronous code here

I have been trying to retrieve the data using the MongoJS driver for node.js.The Code which I am using is as follows
req.on('end', function(){
var decodedBody = querystring.parse(fullBody);
story=decodedBody.name;
var z=new Array();
console.log(story);
res.writeHead(200,{'Content-Type': 'text/html'});
res.write('<html><body>');
db.frames.find({str_id:story}).toArray(function(err,doc){
console.log(doc);
for(var t=0;t<doc.length;t++)
{
var picid=doc[t].pic_id;
console.log(picid);
db.pictures.find({_id:picid}).toArray(function(err,pic){
res.write('<img src="'+pic[0].name+'"/>');
});
}
})
res.end('</body></html>');
});
The problem here is that because of the asynchronous nature of the code the response gets ends first and then the code inside the block of database gets executed and because of that nothing gets displayed on the browser i.e an image in this case .Thankx in advance.
Don't fight the asynchronous nature of node.js, embrace it!
So you should fire off all your requests, marking each one as completed when the response arrives. When all requests are completed, render your images and body/html closing tags.
I don't usually work with node.js, so I can make some mistakes, but it may look like this:
res.write('<html><body>');
db.frames.find({str_id:story}).toArray(function(err,doc){
console.log(doc);
var completed = {};
for(var t = 0; t < doc.length; t++) {
var picid = doc[t].pic_id;
completed.picid = false;
console.log(picid);
db.pictures.find({_id: picid}).toArray(function(err, pic) {
// mark request as completed
completed.picid = pic;
// check if all requests completed
var all_finished = true;
for(var k in completed) {
if(completed[k] === false) {
all_finished = false;
break;
}
}
// render final markup
if(all_finished) {
for(var k in completed) {
var pic = completed[k];
res.write('<img src="'+pic[0].name+'"/>');
}
res.end('</body></html>);
}
});
}
})
just put the res.end('</body></html>'); inside your db.frames.find function. Check when you reached doc.length - 1 and then send the end command.

Resources