node js non blocking for loop - node.js

Please check if my understanding about the following for loop is correct.
for(let i=0; i<1000; i){
sample_function(i, function(result){});
}
The moment the for loop is invoked, 1000 events of sample_function will be qued in the event loop. After about 5 seconds a user gives a http request, which is qued after those "1000 events".
Usually this would not be a problem because the loop is asynchronous.
But lets say that this sample_function is a CPU intensive function. Therefore the "1000 events" are completed consecutively and each take about 1 second.
As a result, the for loop will block for about 1000 seconds.
Would there be a way to solve such problem? For example would it be possible to let the thread take a "break" every 10 loops? and allow other new ques to pop in between? If so how would I do it?

Try it this:
for(let i=0; i<1000; i++)
{
setTimeout(sample_function, 0, i, function(result){});
}
or
function sample_function(elem, index){..}
var arr = Array(1000);
arr.forEach(sample_function);

There is a technique called partitioning which you can read about in the NodeJs's document, But as the document states:
If you need to do something more complex, partitioning is not a good option. This is because partitioning uses only the Event Loop, and you won't benefit from multiple cores almost certainly available on your machine.
So you can also use another technique called offloading, e.g. using worker threads or child processes which also have certain downsides like having to serialize and deserialize any objects that you wish to share between the event loop (current thread) and a worker thread or a child process
Following is an example of partitioning that I came up with which is in the context of an express application.
const express = require('express');
const crypto = require('crypto');
const randomstring = require('randomstring');
const app = express();
const port = 80;
app.get('/', async (req, res) => {
res.send('ok');
})
app.get('/block', async (req, res) => {
let result = [];
for (let i = 0; i < 10; ++i) {
result.push(await block());
}
res.send({result});
})
app.listen(port, () => {
console.log(`Listening on port ${port}`);
console.log(`http://localhost:${port}`);
})
/* takes around 5 seconds to run(varies depending on your processor) */
const block = () => {
//promisifying just to get the result back to the caller in an async way, this is not part of the partitioning technique
return new Promise((resolve, reject) => {
/**
* https://nodejs.org/en/docs/guides/dont-block-the-event-loop/#partitioning
* using partitioning techinique(using setImmediate/setTimeout) to prevent a long running operation
* to block the eventloop completely
* there will be a breathing period between each time block is called
*/
setImmediate(() => {
let hash = crypto.createHash("sha256");
const numberOfHasUpdates = 10e5;
for (let iter = 0; iter < numberOfHasUpdates; iter++) {
hash.update(randomstring.generate());
}
resolve(hash);
})
});
}
There are two endpoints / and /block, if you hit /block and then hit / endpoint, what happens is that the / endpoint will take around 5 seconds to give back response(during the breathing space(the thing that you call it a "break"))
If setImmediate was not used, then the / endpoint would respond to a request after approximately 10 * 5 seconds(10 being the number of times block function is called in the for-loop)
Also you can do partitioning using a recursive approach like this:
/**
*
* #param items array we need to process
* #param chunk a number indicating number of items to be processed on each iteration of event loop before the breathing space
*/
function processItems(items, chunk) {
let i = 0;
const process = (done) => {
let currentChunk = chunk;
while (currentChunk > 0 && i < items?.length) {
--currentChunk;
syncBlock();
++i;
}
if (i < items?.length) {
setImmediate(process);//the key is to schedule the next recursive call (by passing the function to setImmediate) instead of doing a recursive call (by simply invoking the process function)
}
}
process();
}
And if you need to get back the data processed you can promisify it like this:
function processItems(items, chunk) {
let i = 0;
let result = [];
const process = (done) => {
let currentChunk = chunk;
while (currentChunk > 0 && i < items?.length) {
--currentChunk;
const returnedValue = syncBlock();
result.push(returnedValue);
++i;
}
if (i < items?.length) {
setImmediate(() => process(done));
} else {
done && done(result);
}
}
const promisified = () => new Promise((resolve) => process(resolve));
return promisified();
}
And you can test it by adding this route handler to the other route handlers provided above:
app.get('/block2', async (req, res) => {
let result = [];
let arr = [];
for (let i = 0; i < 10; ++i) {
arr.push(i);
}
result = await processItems(arr, 1);
res.send({ result });
})

Related

Expressjs main loop blocked during intense operation

I'm having an expressjs server running in which an endpoint init performs some intense operation that has an average completion time of 10 seconds. During these 10 seconds, the main loop is "stuck", making it impossible to send requests to the expressjs server. I've been googling for a while now but found nothing which would enable expressjs to handle requests concurrently. It would seem silly if this is not possible. For any hints or help, I'm very thankful.
Example code:
routes.js
app.route('/v1/cv/random').get(init);
features/init.js
module.exports = async function init(req, res) {
try {
// perform some time consuming operation here
res.status(201).send(someVar);
} catch (err) {
res.status(500).send(`failed to init`).end();
}
};
It is possible to implement algorithms with long running time in a synchronous manner, for example the Tower of Hanoi:
function move(from, to, via, n) {
if (n > 1)
move(from, via, to, n - 1);
to.unshift(from.shift());
if (n > 1)
move(via, to, from, n - 1);
}
app.get("/tower", function(req, res) {
var a = [];
for (var i = 0; i < Number(req.query.n); i++) a.push(i);
var b = [];
var c = [];
move(a, b, c, a.length);
res.end("Done");
});
Invoking GET /tower?n=<N> with large enough <N> will indeed block the main loop of express.
This blocking can be avoided by introducing asynchronousness into the algorithm, for example with setTimeout(nextAlgorithmicStep) commands. This puts the nextAlgorithmicStep function in a queue, but the same queue also has room for functions that process concurrent requests:
function tick(from, to, via, n) {
return new Promise(function(resolve, reject) {
setTimeout(function() {
move(from, to, via, n, resolve);
});
});
}
async function move(from, to, via, n, resolve) {
if (n > 1)
await tick(from, via, to, n - 1);
to.unshift(from.shift());
if (n > 1)
await tick(via, to, from, n - 1);
resolve();
}
app.get("/tower", async function(req, res) {
var a = [];
for (var i = 0; i < Number(req.query.n); i++) a.push(i);
var b = [];
var c = [];
await tick(a, b, c, a.length);
res.end("Done");
});
With this, you can you wait (forever) for the request GET /tower?n=64 to come back, but you can at least still make concurrent requests to the same server. (Using simply Promise or process.nextTick instead of setTimeout is not "asynchronous enough" to allow concurrent requests to be processed in between.)
However, the execution of GET /tower?n=10, which finished "immediately" in the first version, now takes much longer. It would be better to use the setTimeout not on all n levels of recursion, but only on every tenth level or so. You have to find similar good points for asynchronousness in your RSA algorithm.
That's what you can do with a single-threaded Node.js program. But there is an alternative that uses multiple Node.js processes.
app.get("/tower", function(req, res) {
spawn("node", ["tower.js", req.query.n]).stdout.pipe(res);
});
where tower.js is an additional Javascript program:
function move(from, to, via, n) {
if (n > 1)
move(from, via, to, n - 1);
to.unshift(from.shift());
if (n > 1)
move(via, to, from, n - 1);
}
var a = [];
for (var i = 0; i < Number(process.argv[2]); i++) a.push(i);
var b = [];
var c = [];
move(a, b, c, a.length);
process.stdout.write("Done");
I found an answer shortly before #Heiko Theißen updated his answer. It is (I think) a similar approach.
I've found a way to use child_process and with that execute everything that a certain file has by using
const {fork} = require('child_process');
...
module.exports = async function init(req, res) {
try {
const childProcess = fork('./path/to/the/script.js');
childProcess.send({'body': req.body});
childProcess.on('message', (message) => {
res.status(201).json({someVar: message}).end();
});
} catch (err) {
res.status(500).send(`failed to init`).end();
}
};
The script.js looks like
process.on('message', async (message) => {
// perform a time consuming operation here
process.send(someVar);
process.exit();
});

How to loop many http requests with axios in node.js

I have an array of users where each user has an IP address.
I have an API that I send an IP as a request and it returns a county code that belongs to this IP.
In order to get a country code to each user I need to send separate request to each user.
In my code I do async await but it takes about 10 seconds until I get all the responses, if I don't do the async await, I don’t get the country codes at all.
My code:
async function getAllusers() {
let allUsersData = await usersDao.getAllusers();
for (let i = 0; i < allUsersData.length; i++) {
let data = { ip: allUsersData[i].ip };
let body = new URLSearchParams(data);
await axios
.post("http://myAPI", body)
.then((res) => {
allUsersData[i].countryCode = res.data.countryCode;
});
}
return allUsersData;
}
You can use Promise.all to make all your requests once instead of making them one by one.
let requests = [];
for (let i = 0; i < allUsersData.length; i++) {
let data = { ip: allUsersData[i].ip };
let body = new URLSearchParams(data);
requests.push(axios.post("http://myAPI", body)); // axios.post returns a Promise
}
try {
const results = await Promise.all(requests);
// results now contains each request result in the same order
// Your logic here...
}
catch (e) {
// Handles errors
}
If you're just trying to get all the results faster, you can request them in parallel and know when they are all done with Promise.all():
async function getAllusers() {
let allUsersData = await usersDao.getAllusers();
await Promise.all(allUsersData.map((userData, index) => {
let body = new URLSearchParams({ip: userData.ip});
return axios.post("http://myAPI", body).then((res) => {
allUsersData[index].countryCode = res.data.countryCode;
});
}));
return allUsersData;
}
Note, I would not recommend doing it this way if the allUsersData array is large (like more than 20 long) because you'll be raining a lot of requests on the target server and it may either impeded its performance or you may get rate limited or even refused service. In that case, you'd need to send N requests at a time (like perhaps 5) using code like this pMap() here or mapConcurrent() here.

Limit number of tasks processed simultaneously in loop

I define a function named fn which I then run in the background as part of a bigger service.
The function fn retrieves messages from a message queue one by one, and processes each message. Processing of each message can take between 1 and 10 minutes (longProcess()).
Using the following code, the for await loop waits for new messages and processes each one, and then fetches a new message once the processing is complete.
const fn = async (subscription: AsyncIterable) => {
subscription.pullOne();
for await (const msg of subscription) {
await longProcess(msg);
subscription.pullOne();
}
subscription.close();
};
fn(subscription).then(() => {});
If I remove the await from before longProcess(msg), messages are sent to be processed as soon as they are received, which is what I want, but I only want a maximum of 5 messages to be processed simultaneously.
I don't want any more messages to be pulled before the current messages are done processing (so that other subscribers may pull and process them).
This question deals with a very similar case, but I can't seem to find a solution that actually works and provides an elegant solution.
I tried using the bottleneck library by defining a concurrency limit, but I can't figure out how to stop the loop from fetching more messages before the active processing is finished.
const limiter = new Bottleneck({
maxConcurrent: 5,
});
const fn = async (subscription: AsyncIterable) => {
subscription.pullOne();
for await (const msg of subscription) {
limiter.schedule(() => longProcess(msg));
subscription.pullOne();
}
subscription.close();
};
You can try processing them in batches/chunks of <= 5 items at a time
// helper function
async function take(aIterable, count) {
const res = [];
let done = false
for(let i = 0; i < count; i++) {
const next = await aIterable.next()
if(!next.done) res.push(next.value)
done = next.done;
}
return [res, done];
}
const fn = async (subscription) => {
subscription.pullOne();
let done = false;
while (!done) {
let [batch, _done] = await take(subscription, 5);
done = _done;
await Promise.allSettled(batch.map(msg => longProcess(msg)));
// subscription.pullOne();
}
subscription.close();
};

Nodejs - Fire multiple API calls while limiting the rate and wait until they are all done

My issues
Launch 1000+ online API that limits the number of API calls to 10 calls/sec.
Wait for all the API calls to give back a result (or retry), it can take 5 sec before the API sends it data
Use the combined data in the rest of my app
What I have tried while looking at a lot of different questions and answers here on the site
Use promise to wait for one API request
const https = require("https");
function myRequest(param) {
const options = {
host: "api.xxx.io",
port: 443,
path: "/custom/path/"+param,
method: "GET"
}
return new Promise(function(resolve, reject) {
https.request(options, function(result) {
let str = "";
result.on('data', function(chunk) {str += chunk;});
result.on('end', function() {resolve(JSON.parse(str));});
result.on('error', function(err) {console.log("Error: ", err);});
}).end();
});
};
Use Promise.all to do all the requests and wait for them to finish
const params = [{item: "param0"}, ... , {item: "param1000+"}]; // imagine 1000+ items
const promises = [];
base.map(function(params){
promises.push(myRequest(params.item));
});
result = Promise.all(promises).then(function(data) {
// doing some funky stuff with dat
});
So far so good, sort of
It works when I limit the number of API requests to a maximum of 10 because then the rate limiter kicks in. When I console.log(promises), it gives back an array of 'request'.
I have tried to add setTimeout in different places, like:
...
base.map(function(params){
promises.push(setTimeout(function() {
myRequest(params.item);
}, 100));
});
...
But that does not seem to work. When I console.log(promises), it gives back an array of 'function'
My questions
Now I am stuck ... any ideas?
How do I build in retries when the API gives an error
Thank you for reading up to hear, you are already a hero in my book!
When you have a complicated control-flow using async/await helps a lot to clarify the logic of the flow.
Let's start with the following simple algorithm to limit everything to 10 requests per second:
make 10 requests
wait 1 second
repeat until no more requests
For this the following simple implementation will work:
async function rateLimitedRequests (params) {
let results = [];
while (params.length > 0) {
let batch = [];
for (i=0; i<10; i++) {
let thisParam = params.pop();
if (thisParam) { // use shift instead
batch.push(myRequest(thisParam.item)); // of pop if you want
} // to process in the
// original order.
}
results = results.concat(await Promise.all(batch));
await delayOneSecond();
}
return results;
}
Now we just need to implement the one second delay. We can simply promisify setTimeout for this:
function delayOneSecond() {
return new Promise(ok => setTimeout(ok, 1000));
}
This will definitely give you a rate limiter of just 10 requests each second. In fact it performs somewhat slower than that because each batch will execute in request time + one second. This is perfectly fine and already meet your original intent but we can improve this to squeeze a few more requests to get as close as possible to exactly 10 requests per second.
We can try the following algorithm:
remember the start time
make 10 requests
compare end time with start time
delay one second minus request time
repeat until no more requests
Again, we can use almost exactly the same logic as the simple code above but just tweak it to do time calculations:
const ONE_SECOND = 1000;
async function rateLimitedRequests (params) {
let results = [];
while (params.length > 0) {
let batch = [];
let startTime = Date.now();
for (i=0; i<10; i++) {
let thisParam = params.pop();
if (thisParam) {
batch.push(myRequest(thisParam.item));
}
}
results = results.concat(await Promise.all(batch));
let endTime = Date.now();
let requestTime = endTime - startTime;
let delayTime = ONE_SECOND - requestTime;
if (delayTime > 0) {
await delay(delayTime);
}
}
return results;
}
Now instead of hardcoding the one second delay function we can write one that accept a delay period:
function delay(milliseconds) {
return new Promise(ok => setTimeout(ok, milliseconds));
}
We have here a simple, easy to understand function that will rate limit as close as possible to 10 requests per second. It is rather bursty in that it makes 10 parallel requests at the beginning of each one second period but it works. We can of course keep implementing more complicated algorithms to smooth out the request pattern etc. but I leave that to your creativity and as homework for the reader.

Node socket.io, anything to prevent flooding?

How can I prevent someone from simply doing
while(true){client.emit('i am spammer', true)};
This sure proves to be a problem when someone has the urge to crash my node server!
Like tsrurzl said you need to implement a rate limiter (throttling sockets).
Following code example only works reliably if your socket returns a Buffer (instead of a string). The code example assumes that you will first call addRatingEntry(), and then call evalRating() immediately afterwards. Otherwise you risk a memory leak in the case where evalRating() doesn't get called at all or too late.
var rating, limit, interval;
rating = []; // rating: [*{'timestamp', 'size'}]
limit = 1048576; // limit: maximum number of bytes/characters.
interval = 1000; // interval: interval in milliseconds.
// Describes a rate limit of 1mb/s
function addRatingEntry (size) {
// Returns entry object.
return rating[(rating.push({
'timestamp': Date.now(),
'size': size
}) - 1);
}
function evalRating () {
// Removes outdated entries, computes combined size, and compares with limit variable.
// Returns true if you're connection is NOT flooding, returns false if you need to disconnect.
var i, newRating, totalSize;
// totalSize in bytes in case of underlying Buffer value, in number of characters for strings. Actual byte size in case of strings might be variable => not reliable.
newRating = [];
for (i = rating.length - 1; i >= 0; i -= 1) {
if ((Date.now() - rating[i].timestamp) < interval) {
newRating.push(rating[i]);
}
}
rating = newRating;
totalSize = 0;
for (i = newRating.length - 1; i >= 0; i -= 1) {
totalSize += newRating[i].timestamp;
}
return (totalSize > limit ? false : true);
}
// Assume connection variable already exists and has a readable stream interface
connection.on('data', function (chunk) {
addRatingEntry(chunk.length);
if (evalRating()) {
// Continue processing chunk.
} else {
// Disconnect due to flooding.
}
});
You can add extra checks, like checking whether or not the size parameter really is a number etc.
Addendum: Make sure the rating, limit and interval variables are enclosed (in a closure) per connection, and that they don't define a global rate (where each connection manipulates the same rating).
I implemented a little flood function, not perfect (see improvements below) but it will disconnect a user when he does to much request.
// Not more then 100 request in 10 seconds
let FLOOD_TIME = 10000;
let FLOOD_MAX = 100;
let flood = {
floods: {},
lastFloodClear: new Date(),
protect: (io, socket) => {
// Reset flood protection
if( Math.abs( new Date() - flood.lastFloodClear) > FLOOD_TIME ){
flood.floods = {};
flood.lastFloodClear = new Date();
}
flood.floods[socket.id] == undefined ? flood.floods[socket.id] = {} : flood.floods[socket.id];
flood.floods[socket.id].count == undefined ? flood.floods[socket.id].count = 0 : flood.floods[socket.id].count;
flood.floods[socket.id].count++;
//Disconnect the socket if he went over FLOOD_MAX in FLOOD_TIME
if( flood.floods[socket.id].count > FLOOD_MAX){
console.log('FLOODPROTECTION ', socket.id)
io.sockets.connected[socket.id].disconnect();
return false;
}
return true;
}
}
exports = module.exports = flood;
And then use it like this:
let flood = require('../modules/flood')
// ... init socket io...
socket.on('message', function () {
if(flood.protect(io, socket)){
//do stuff
}
});
Improvements would be, to add another value next to the count, how often he got disconneted and then create a banlist and dont let him connect anymore. Also when a user refreshes the page he gets a new socket.id so maybe use here a unique cookie value instead of the socket.id
Here is simple rate-limiter-flexible package example.
const app = require('http').createServer();
const io = require('socket.io')(app);
const { RateLimiterMemory } = require('rate-limiter-flexible');
app.listen(3000);
const rateLimiter = new RateLimiterMemory(
{
points: 5, // 5 points
duration: 1, // per second
});
io.on('connection', (socket) => {
socket.on('bcast', async (data) => {
try {
await rateLimiter.consume(socket.handshake.address); // consume 1 point per event from IP
socket.emit('news', { 'data': data });
socket.broadcast.emit('news', { 'data': data });
} catch(rejRes) {
// no available points to consume
// emit error or warning message
socket.emit('blocked', { 'retry-ms': rejRes.msBeforeNext });
}
});
});
Read more in official docs

Resources