Node.js : How to pause/resume a for loop - node.js

I am running a for loop to generate all possible IPv4 addresses and then looking them up in maxmind's mmdb file. The problem is, while the for loop is too fast, maxmind lookup is relatively very slow and hence the processs slows down and eventually my system freezes after some 400k iterations. If I were reading from some stream, I would have paused the stream after reading every 10k ip addresses and would have resumed only when all of them have been looked up from mmdb file. But how can I have such control for a for loop ?
connectToMaxMindDatabases().then(function (done){
for(var i=0; i<256; i++){
for(var j=0; j<256; j++){
for(var k=0; k<256; k++){
for(var l=0; l<256; l++){
count++;
if(count % 1000 == 0){
console.log("count", count);
}
var newIP = getIPV4([i,j,k,l])
ispDB.getGeoDataAsync(newIP).then(function (result){
if(result){
console.log(count, newIP, result);
// process.exit();
}
});
}
}
}
}
})
function getIPV4(bytes){
return bytes.join(".")
}

I'm not sure, but process.nextTick might help
https://nodejs.org/api/process.html#process_process_nexttick_callback_arg
I'm really not sure, but the following code might be used:
connectToMaxMindDatabases().then(function (done){
for(var i=0; i<256; i++){
process.nextTick(function(){
for(var j=0; j<256; j++){
process.nextTick(function(){
for(var k=0; k<256; k++){
process.nextTick(function(){
for(var l=0; l<256; l++){
process.nextTick(function(){
count++;
if(count % 1000 == 0){
console.log("count", count);
}
var newIP = getIPV4([i,j,k,l])
ispDB.getGeoDataAsync(newIP).then(function (result){
if(result){
console.log(count, newIP, result);
// process.exit();
}
});
}
}
})
}
}
}
}
}
})
According to my knowledge,here's what happening:
all the for loops are running synchronously, and the async ispDB.getGeoDataAsync(newIP) call is getting queued, but its callback(.then function) is not able to execute because the event loop was blocked by the 4 for loops.

The root of the problem is definitely in resource management of the underlying OS/Node server. A for loop with such iteration number will freeze on that particular hardware while it probably would only freeze later or none at all on a much stronger computer. The solution is to collect all IP addresses in an array first, then process them in a way that only a segment is processed at once, wait until that is finished and move to the next segment.
A possible solution can be achieved with this kind of segmentation and Bluebird's Promse.each.
If the iterator function returns a promise or a thenable, then the
result of the promise is awaited, before continuing with next
iteration.
var Promise = require('bluebird'),
_ = require('lodash'),
allIPs = [];
connectToMaxMindDatabases().then(function (done){
for(var i=0; i<256; i++){
for(var j=0; j<256; j++){
for(var k=0; k<256; k++){
for(var l=0; l<256; l++){
count++;
if(count % 1000 == 0){
console.log("count", count);
}
var newIP = getIPV4([i,j,k,l])
allIPs.push(newIP);
}
}
}
}
return allIPs;
})
.then(function(allIPs) {
// Create an array of arrays of IPs, each inner array with 1000 IPs
var from = 0, to = 1000, segments = [], promises;
do {
segments.push(allIPs.slice(from, to));
from += 1000; to += 1000;
} while(to < allIPs.lenght);
// Process a segment of 1000 IPs and wait before moving the next
// segment until this segment is fully resolved.
Promise.each(segments, function(segmentArr) {
promises = [];
_.forOwn(segmentArr, function(IP) {
promises.push(ispDB.getGeoDataAsync(IP)
.then(function(result) {
// save the result
})
);
});
return Proimse.all(promises);
});
});
Of course if the resources in your case are limited in a way the allIPs array cannot even be produced because it starts freezing before that, this will not solve the problem, but chances are the resources are eaten up by all those getGeoDataAsync calls in which case this should help.

Related

Why break statement does not work in else loop, is there any alternative to this?

Inside nested for loop I am using if/else if else condition. When if and else if condition are false, final else condition is running as expected but as it is in for loop instead of just running once it is running multiple times. What changes do i need to make to make else condition work only once?
Here is my code
productCodes: string[] = [],
vehicleType: string[] = [],
for (var i = 0; i < urls.length; i++) {
return https.get(urls[i], res => {
res.on('data', function(chunk) {
json += chunk;
});
res.on('end', function() {
var result = JSON.parse(json);
for (var i = 0; i < result.length; i++) {
for (var j = 0; j < result[i].products.length; j++) {
if (productCodes.length !== 0 && productCodes !== undefined) {
for (var k = 0; k < productCodes.length; k++) {
if (result[i].products[j].productCode == productCodes[k]) {
console.log(
'Product Codes: ' +
result[i].products[j].productCode
);
}
}
} else if (
vehicleType.length !== 0 &&
vehicleType !== undefined
) {
for (var k = 0; k < vehicleType.length; k++) {
if (result[i].products[j].productType == vehicleType[k]) {
console.log(
'Product Codes: ' +
result[i].products[j].productCode
);
}
}
} else {
console.log('No data');
break; ------------------------> HERE
}
}
}
});
});
}
```
If the loops are to cease after you encounter your last "else" statement I would recommend breaking them, if necessary using labels.
Here is a related question! It's for java, but the syntax is similar.
If it is just about only displaying the message once, consider just setting a boolean, then asking for it after the loops have concluded, and if true log the message.
EDIT: To expand my answer for your edit, I am not sure what exactly your desired behaviour would be, but chances are you want to break a loop deeper down than the topmost one. break; will break the topmost loop, in your case I believe that would be
for (var j = 0; j < result[i].products.length; j++)
But not the loop directly above that, iterating over this loop all over again.
Try assigning a label to the loop you want to break further down, and then break that one specifically.
EDIT 2:
I've modified your code to include the example of labels. That way, you can break whichever loop you actually want to end. Just comment the appropriate break back in. Hope this is helpful!
productCodes: string[] = [],
vehicleType: string[] = [],
outer1:
for (var i = 0; i < urls.length; i++)
{
return https.get(urls[i], res =>
{
res.on('data', function(chunk)
{
json += chunk;
});
res.on('end', function()
{
var result = JSON.parse(json);
middle1:
for (var i = 0; i < result.length; i++)
{
for (var j = 0; j < result[i].products.length; j++)
{
if (productCodes.length !== 0 && productCodes !== undefined)
{
for (var k = 0; k < productCodes.length; k++)
{
if (result[i].products[j].productCode == productCodes[k])
{
console.log(
'Product Codes: ' +
result[i].products[j].productCode
);
}
}
}
else if (
vehicleType.length !== 0 &&
vehicleType !== undefined
)
{
for (var k = 0; k < vehicleType.length; k++)
{
if (result[i].products[j].productType == vehicleType[k])
{
console.log(
'Product Codes: ' +
result[i].products[j].productCode
);
}
}
}
else
{
console.log('No data');
//break outer1;
//break middle1;
//break;
-- -- -- -- -- -- -- -- -- -- -- -- > HERE
}
}
}
});
});
}
As an additional note, nested labled loops or blocks like these are fairly uncommon due to the tendency to be able to resolve loops things in their own individual functions and then simply call those. I'd advise you to take a look at the java-based answer I linked above if you wanted to look into that.

NodeJS - Throw inside nested loop in Sails.js

I'm new in NodeJS and Sails.js. I get stuck when run API script like here :
try{
for(let i = 0 ; i < 10 ; i++ ) {
for(let j = 0; j < 10; j++) {
await doSomething(); // asume the API using async
throw 'a';
}
}
} catch (e) {
return e;
}
When I run the first time the script it's ok (running well), but when I run the second time, it is stuck (nothing return). When I check the console, no error appears.
It will work again if I restart the API.
What happens to my script ? can anyone explain? Why it just run 1 time (return a) but cannot started again when i call again the API without restart it first
you should throw Error not a string, so your could should look like this
throw new Error('a')
also the script won't continue after first loop cause you are throwing.
I hope its not stuck. It will return the string 'a'. You can check that by adding console.log
try {
for (let i = 0; i < 10; i++) {
for (let j = 0; j < 10; j++) {
throw 'a';
}
}
} catch (e) {
console.log('Error: ', e);
return e;
}
This will print Error: a all the time and return 'a'
Note: As mentioned you should throw new Error instead of string 'a'.
If you want to break the loop on condition. Just set i = 11; j = 11;

Custom module data returning successfully but not displaying

I have a custom module in file help.js, the function name there is getfriends(req,res,next,user). The user is whose friends I want to get.
for (var i = 0; i < users.length; i++) {
for (var j = 0; j < docs.length; j++) {
if (docs[j].user1 == users[i].username) {
if (docs[j].user1 != req.body.user) {
friends.push(users[i]);
}
} else if (docs[j].user2 == users[i].username) {
if (docs[j].user2 != req.body.user) {
friends.push(users[i]);
}
}
}
if (i == users.length-1) {
console.log("friends",friends); //it displays my desired result and so I think the return is successfull
return(friends);
}
}
Now where I receive the data, I do this and data is not being displayed.
console.log(Help.getfriends(req,res,next,req.session.user));
I have tried doing :-
somevar = Help.getfriends(req,res,next,req.session.user);
console.log(somevar);
The module is being called, it is displaying perfect result. Please guide me how to get the data properly from the custom module.
Also, above I have done,
var Help = require('./help');
Your function is asynchronous.
When you console.log(...) <== there is no result yet.
So i'ts expected that you console.log undefined.
More info about nodejs asyncronous nature.

How to fix jslint warning Don't make functions within a loop

Getting this warning on the following code:
workflow.removeZSets = function(fn) {
var processed = 0;
for (var c = 1; c < 10; c++) {
workflow.removeZSet(c, function() {
processed++;
if (processed === 9) {
return fn(null, "finished removing");
}
});
}
}
workflow.removeZSet = function(precision, fn) {
rc.zrem("userloc:" + precision, function() {
return fn(null, 'done');
});
});
}
Does anyone have a suggestion how to accomplish this without triggering the warning?
I have some ideas like using the async library to run them all in parallel but this is a fairly common thing I do throughout this code base so interested in feedback on the best way.
The error is because you have define a function within your for loop.
You could try something like this, defining the function outside of the loop:
workflow.removeZSets = function(fn) {
var processed = 0;
function removeZ(c) {
workflow.removeZSet(c, function(err) {
processed++;
if (processed === 9) {
return fn(null, "finished removing");
}
});
}
for (var c = 1; c < 10; c++) {
removeZ(c);
}
}
Using a library like async to do the looping would help clean up your code, it would allow you to avoid checking if all the items have processed (processed ===9) because it is handled by async.

Concurrency in Node.js using webworker-threads

This question is regarding the webworker-threads node.js module.
The documentation for the webworker-threads module is very limited and I can't find any newb-friendly examples of how to achieve my goal.
To put it simply, I have two CPU intensive functions that need to be run at the same time and must return a result and print it as soon as they finish. Because these synchronous methods are blocking, my current model is this:
What I'm trying to achieve is this:
However I'm no expert and I can't get my head around translating the information supplied in the Readme.md for the webworker-threads module into my own situation.
My current hack-job code is such:
var Worker = require('webworker-threads').Worker;
//console.time("worker1");
var worker = new Worker(function(){
onmessage = function(event) {
postMessage(event.data);
self.close();
};
});
worker.onmessage = function(event) {
console.log("Worker1 said : " + event.data);
};
worker.postMessage(work());
//console.timeEnd("worker1")
//console.time("worker2");
var worker2 = new Worker(function(){
onmessage = function(event) {
postMessage(event.data);
self.close();
};
});
worker2.onmessage = function(event) {
console.log("Worker2 said : " + event.data);
};
worker2.postMessage(work2());
//console.timeEnd("worker2")
function work(){
var total = 0;
for (var i=0; i<430046534; i++){
total += (i*i*i);
}
return total;
}
function work2(){
var total = 0;
for (var i=0; i<630746533; i++){
total += (i*i*i);
}
return total;
}
But alas, the results are only printed to the console when the second worker has finished, and I'm not even sure I'm using this module the right way.
You're posting the result of your work function to the worker. Put that function inside the web worker and then call it during onmessage.
var worker = new Worker(function(){
function work(n){
var res;
for (var i = 0; i < n; i++){
res += i * i * i;
}
return res;
}
onmessage = function(event) {
var result = work(event.data)
postMessage(result);
self.close();
};
});
You are doing all your heavy lifting on the server side, you want to put the work in the workers. You're blocking the entire server thread with having your work on the main thread.

Resources