Expressjs main loop blocked during intense operation - node.js

I'm having an expressjs server running in which an endpoint init performs some intense operation that has an average completion time of 10 seconds. During these 10 seconds, the main loop is "stuck", making it impossible to send requests to the expressjs server. I've been googling for a while now but found nothing which would enable expressjs to handle requests concurrently. It would seem silly if this is not possible. For any hints or help, I'm very thankful.
Example code:
routes.js
app.route('/v1/cv/random').get(init);
features/init.js
module.exports = async function init(req, res) {
try {
// perform some time consuming operation here
res.status(201).send(someVar);
} catch (err) {
res.status(500).send(`failed to init`).end();
}
};

It is possible to implement algorithms with long running time in a synchronous manner, for example the Tower of Hanoi:
function move(from, to, via, n) {
if (n > 1)
move(from, via, to, n - 1);
to.unshift(from.shift());
if (n > 1)
move(via, to, from, n - 1);
}
app.get("/tower", function(req, res) {
var a = [];
for (var i = 0; i < Number(req.query.n); i++) a.push(i);
var b = [];
var c = [];
move(a, b, c, a.length);
res.end("Done");
});
Invoking GET /tower?n=<N> with large enough <N> will indeed block the main loop of express.
This blocking can be avoided by introducing asynchronousness into the algorithm, for example with setTimeout(nextAlgorithmicStep) commands. This puts the nextAlgorithmicStep function in a queue, but the same queue also has room for functions that process concurrent requests:
function tick(from, to, via, n) {
return new Promise(function(resolve, reject) {
setTimeout(function() {
move(from, to, via, n, resolve);
});
});
}
async function move(from, to, via, n, resolve) {
if (n > 1)
await tick(from, via, to, n - 1);
to.unshift(from.shift());
if (n > 1)
await tick(via, to, from, n - 1);
resolve();
}
app.get("/tower", async function(req, res) {
var a = [];
for (var i = 0; i < Number(req.query.n); i++) a.push(i);
var b = [];
var c = [];
await tick(a, b, c, a.length);
res.end("Done");
});
With this, you can you wait (forever) for the request GET /tower?n=64 to come back, but you can at least still make concurrent requests to the same server. (Using simply Promise or process.nextTick instead of setTimeout is not "asynchronous enough" to allow concurrent requests to be processed in between.)
However, the execution of GET /tower?n=10, which finished "immediately" in the first version, now takes much longer. It would be better to use the setTimeout not on all n levels of recursion, but only on every tenth level or so. You have to find similar good points for asynchronousness in your RSA algorithm.
That's what you can do with a single-threaded Node.js program. But there is an alternative that uses multiple Node.js processes.
app.get("/tower", function(req, res) {
spawn("node", ["tower.js", req.query.n]).stdout.pipe(res);
});
where tower.js is an additional Javascript program:
function move(from, to, via, n) {
if (n > 1)
move(from, via, to, n - 1);
to.unshift(from.shift());
if (n > 1)
move(via, to, from, n - 1);
}
var a = [];
for (var i = 0; i < Number(process.argv[2]); i++) a.push(i);
var b = [];
var c = [];
move(a, b, c, a.length);
process.stdout.write("Done");

I found an answer shortly before #Heiko Theißen updated his answer. It is (I think) a similar approach.
I've found a way to use child_process and with that execute everything that a certain file has by using
const {fork} = require('child_process');
...
module.exports = async function init(req, res) {
try {
const childProcess = fork('./path/to/the/script.js');
childProcess.send({'body': req.body});
childProcess.on('message', (message) => {
res.status(201).json({someVar: message}).end();
});
} catch (err) {
res.status(500).send(`failed to init`).end();
}
};
The script.js looks like
process.on('message', async (message) => {
// perform a time consuming operation here
process.send(someVar);
process.exit();
});

Related

node js non blocking for loop

Please check if my understanding about the following for loop is correct.
for(let i=0; i<1000; i){
sample_function(i, function(result){});
}
The moment the for loop is invoked, 1000 events of sample_function will be qued in the event loop. After about 5 seconds a user gives a http request, which is qued after those "1000 events".
Usually this would not be a problem because the loop is asynchronous.
But lets say that this sample_function is a CPU intensive function. Therefore the "1000 events" are completed consecutively and each take about 1 second.
As a result, the for loop will block for about 1000 seconds.
Would there be a way to solve such problem? For example would it be possible to let the thread take a "break" every 10 loops? and allow other new ques to pop in between? If so how would I do it?
Try it this:
for(let i=0; i<1000; i++)
{
setTimeout(sample_function, 0, i, function(result){});
}
or
function sample_function(elem, index){..}
var arr = Array(1000);
arr.forEach(sample_function);
There is a technique called partitioning which you can read about in the NodeJs's document, But as the document states:
If you need to do something more complex, partitioning is not a good option. This is because partitioning uses only the Event Loop, and you won't benefit from multiple cores almost certainly available on your machine.
So you can also use another technique called offloading, e.g. using worker threads or child processes which also have certain downsides like having to serialize and deserialize any objects that you wish to share between the event loop (current thread) and a worker thread or a child process
Following is an example of partitioning that I came up with which is in the context of an express application.
const express = require('express');
const crypto = require('crypto');
const randomstring = require('randomstring');
const app = express();
const port = 80;
app.get('/', async (req, res) => {
res.send('ok');
})
app.get('/block', async (req, res) => {
let result = [];
for (let i = 0; i < 10; ++i) {
result.push(await block());
}
res.send({result});
})
app.listen(port, () => {
console.log(`Listening on port ${port}`);
console.log(`http://localhost:${port}`);
})
/* takes around 5 seconds to run(varies depending on your processor) */
const block = () => {
//promisifying just to get the result back to the caller in an async way, this is not part of the partitioning technique
return new Promise((resolve, reject) => {
/**
* https://nodejs.org/en/docs/guides/dont-block-the-event-loop/#partitioning
* using partitioning techinique(using setImmediate/setTimeout) to prevent a long running operation
* to block the eventloop completely
* there will be a breathing period between each time block is called
*/
setImmediate(() => {
let hash = crypto.createHash("sha256");
const numberOfHasUpdates = 10e5;
for (let iter = 0; iter < numberOfHasUpdates; iter++) {
hash.update(randomstring.generate());
}
resolve(hash);
})
});
}
There are two endpoints / and /block, if you hit /block and then hit / endpoint, what happens is that the / endpoint will take around 5 seconds to give back response(during the breathing space(the thing that you call it a "break"))
If setImmediate was not used, then the / endpoint would respond to a request after approximately 10 * 5 seconds(10 being the number of times block function is called in the for-loop)
Also you can do partitioning using a recursive approach like this:
/**
*
* #param items array we need to process
* #param chunk a number indicating number of items to be processed on each iteration of event loop before the breathing space
*/
function processItems(items, chunk) {
let i = 0;
const process = (done) => {
let currentChunk = chunk;
while (currentChunk > 0 && i < items?.length) {
--currentChunk;
syncBlock();
++i;
}
if (i < items?.length) {
setImmediate(process);//the key is to schedule the next recursive call (by passing the function to setImmediate) instead of doing a recursive call (by simply invoking the process function)
}
}
process();
}
And if you need to get back the data processed you can promisify it like this:
function processItems(items, chunk) {
let i = 0;
let result = [];
const process = (done) => {
let currentChunk = chunk;
while (currentChunk > 0 && i < items?.length) {
--currentChunk;
const returnedValue = syncBlock();
result.push(returnedValue);
++i;
}
if (i < items?.length) {
setImmediate(() => process(done));
} else {
done && done(result);
}
}
const promisified = () => new Promise((resolve) => process(resolve));
return promisified();
}
And you can test it by adding this route handler to the other route handlers provided above:
app.get('/block2', async (req, res) => {
let result = [];
let arr = [];
for (let i = 0; i < 10; ++i) {
arr.push(i);
}
result = await processItems(arr, 1);
res.send({ result });
})

Mongoose promise built in but not working?

Or quite possibly I am doing it wrong, in fact, more than likely I am doing it wrong.
Have a table which contains a "tree" of skill, starting at the root level and may be as deep as ten levels (only two so far), but I want to return it as one big fat JSON structure, so I want to ask the database for each set of data, build my structure then ask for the next level.
Of course if I just send of my requests using mongoose, they will come back at any time, as they are all nice asyncronous calls. Normally a good things.
Looking at the documentation for Mongoose(using 4.1.1) it seems like it has a promise built in, but whenever I try to use it the api call throws a hissy fit and I get a 500 back.
Here is my simple function:
exports.getSkills = function(req,res) {
console.log("Will return tree of all skills");
for (var i = 0; i<10; i++){
var returnData = [];
console.log("Lets get level " + i );
var query = Skill.find({level: i });//The query function
var promise = query.exec; //The promise?
promise.then(function(doc) { //Totally blows up at this point
console.log("Something came back")
return "OK";
});
}
}
The Mongoose documentation on the subject can be found here
http://mongoosejs.com/docs/api.html#promise_Promise
var promise = query.exec;
// =>
var promise = query.exec()
exports.getSkills = function(req,res) {
console.log("Will return tree of all skills");
var p;
for (var i = 0; i < 10; i ++) {
if (i == 0 ) {
p = Skill.find({level:i}).exec();
} else {
p.then(function (){
return Skill.find({level:i}).exec()
})
}
p.then(function (data) {
//deal with your data
})
}
p.then(function () {
// deal with response
})
}

nodejs event loop, how to use nextTick correctly

I'm trying to follow exercises from [node school][1]. There is an exercise where one needs to collect three streams and only print the output when all three streams are done. Without using any 3rd party module.
Can somebody please point out why my approach is not working? It gets stuck in an infinite loop:
var http = require('http');
var concat = require('concat-stream');
var count = 3;
var str1, str2, str3;
http.get(process.argv[2], function (response) {
response.pipe(concat(function(data) {
str1 = data.toString();
--count;
}));
});
http.get(process.argv[3], function (response) {
response.pipe(concat(function(data) {
str2 = data.toString();
--count;
}));
});
http.get(process.argv[4], function (response) {
response.pipe(concat(function(data) {
str3 = data.toString();
--count;
}));
});
function foo() {
if (count > 0) {
process.nextTick(foo);
} else {
console.log(str1);
console.log(str2);
console.log(str3);
}
};
foo();
http.get() callbacks can't run until the next tick of the event loop or later. process.nextTick() puts something right at the front of the event loop, ahead of the callbacks that are already there.
Your recursive routine never stops recursing because it's waiting for those callbacks to decrement the counter but they never fire.
It might work if you swap out process.nextTick() for setImmediate(). (I didn't test that, and if you do, hey, let me know if it works or not.)
But I would say just get rid of the recursion altogether. It's not needed. You can (for example) do something like this instead:
var count = 0;
var httpGet = function (index) {
http.get(process.argv[2 + index], function (response) {
// Do stuff here
// This next bit will probably end up inside the callback provided to concat
count++;
if (count === 3) {
// Print results here
}
})
};
for (var i = 0; i < 3; i++) {
httpGet(i);
}

meteor observe array server side

I have a recursive function that builds asynchronously a tree on the server side and I would like to 'observe' it and have the calling method in Meteor rerun every time there is a change.
I have made a simplified example that builds a tree with a recursive readdir call (in the real application there is a computation that may take several minutes per node and its results depend on the nodes already explored)
in server/methods.js
var fs = Meteor.npmRequire('fs')
var path = Meteor.npmRequire('path')
var tree = function (dir, r) {
try
{
fs.readdir (dir, function (error, files) {
if (files && files.length)
for (var i = 0; i < files.length; i++)
{
r[i] = { name : files[i], children : [] }
tree(path.resolve(dir, files[i]), r[i].children)
}
})
} catch (e) { console.log("exception", e)}
}
Meteor.methods({
'build_tree' : function () {
var r = []
tree("/tmp/", r)
return r // Wrong !
}
})
in client/client.js
Meteor.call('build_tree', function (error, result) {
console.log(error, result)
}
I have already used futures in other parts of the code based on https://www.discovermeteor.com/patterns/5828399.
But in this case I am somehow lost due to
the recursive nature of the server-side code
the fact I want the client-side to update automatically every time the server-side data structure is updated
The only workaround that comes to my mind is to insert progressively the asynchronous results in a 'flat' Mongo collection and reactively rebuild it as a tree on the client side.
I managed to do this by
counting the number of times an asynchronous computation was started
or finished
resolving the future only when those numbers are equal
relaunching the function every time an asynchronous computation ends
(in case it returned to launch more asynchronous computations or resolve the future)
[line to close list markup or code doesn't format properly]
Future = Meteor.npmRequire('fibers/future')
FS = Meteor.npmRequire('fs')
Path = Meteor.npmRequire('path')
const all_files = []
const future = new Future()
const to_process = [dir]
let started = 0
let ended = 0
const tree = function () {
while (to_process.length) {
let dir = to_process.pop()
started++
FS.readdir (dir, function (error, files) {
if (error) {
if (error.code == 'ENOTDIR') all_files.push(dir)
}
else if (files && files.length)
{
for (let i = 0, leni = files.length; i < leni; i++)
{
let f = Path.resolve(dir, files[i])
to_process.push(f)
}
}
ended++
tree()
})
}
if (!to_process.length && started == ended)
future['return']()
}
tree()
future.wait()
It doesn't have the "progressive update" feeling that you get by updating the database and letting the reactivity manage it since all computations are waiting for that final Future['return']() but the code is simpler and self-contained.
That would be indeed very complicated. First of all, as your tree code runs async you need to either provide a success callback/resolve a promise/return a future or something else, so that you can control when the Meteor method returns. Then you need to use Futures to defer the return of the method util you have your result.
But even then I don't see how the server is supposed to know that something has changed.
The only workaround that comes to my mind is to insert progressively the asynchronous results in a 'flat' Mongo collection and reactively rebuild it as a tree on the client side.
This is actually a workable straightforward solution.

async in for loop in node.js without using async library helper classes [duplicate]

This question already has answers here:
JavaScript closure inside loops – simple practical example
(44 answers)
Closed 7 years ago.
I am a beginner to node.js. I was trying out the examples from the 'learnyounode' tutorial. I am trying to write a program that takes three url parameters and fetches some data from those urls and displays the returned data in the order in which the urls were provided.
var http = require('http');
var bl = require('bl');
var url = [];
url[0] = process.argv[2];
url[1] = process.argv[3];
url[2] = process.argv[4];
var data = [];
var remaining = url.length;
for(var i = 0; i < url.length; i++){
http.get(url[i], function (response){
response.setEncoding('utf8');
response.pipe(bl(function (err, chunk){
if(err){
console.log(err);
}
else{
data[i] = chunk.toString();
console.log(data[i]);
remaining -= 1;
if(remaining == 0) {
for(var j = 0; j < url.length; j++){
console.log(data[j]);
}
}
}
}));
});
}
I have two console.log statements in the program. The output i get is as follows:
It'll be chunder where lets throw a ford. We're going durry where mad as a cooee
.
Shazza got us some apples with come a strides. Mad as a swag when get a dog up y
a roo. It'll be rapt piece of piss as cunning as a trackie dacks.
As cross as a bogged with watch out for the boardies. As cunning as a digger fla
min lets get some roo bar. As dry as a piker piece of piss he hasn't got a joey.
Lets throw a strides mate we're going digger.
undefined
undefined
undefined
It seems like the data is correctly fetched and stored in the 'data' array but it still displays undefined.
Any idea why this is happening?
Thanks in advance!
This is a very common issue in async programming in node.js or even in the browser. A main issue you have is that the loop variable i will not be what you want it to be some time later when the async callback is called. By then, the for loop will have run to the end of its loop and i will be at the end value for all response callbacks.
There are numerous ways to solve this. You can use a closure to close over the i value and make it uniquely available to each callback.
var http = require('http');
var bl = require('bl');
var url = [];
url[0] = process.argv[2];
url[1] = process.argv[3];
url[2] = process.argv[4];
var data = [];
var remaining = url.length;
for(var i = 0; i < url.length; i++){
// create closure here to uniquely capture the loop index
// for each separate http request
(function(index) {
http.get(url[index], function (response){
response.setEncoding('utf8');
response.pipe(bl(function (err, chunk){
if(err){
console.log(err);
}
else{
data[index] = chunk.toString();
console.log(data[index]);
remaining -= 1;
if(remaining == 0) {
for(var j = 0; j < url.length; j++){
console.log(data[j]);
}
}
}
}));
});
})(i);
}
If you do much node.js programming, you will find that you probably want to learn how to use promises because they are very, very handy for controlling the flow and sequence of async operations.

Resources