Nodejs step through array and finish each step before moving on - node.js

I'm having troubles processing a queue that I've got stored in Redis.
Basically the queue in Redis is a simple array of IDs that I want to step through one by one.
My current code:
async.forEach(data, function(n, done) {
redisClient.hgetall("visitor:" + n, function(err, visitor) {
if (visitor != null) {
agentOnlineCheck(visitor['userID'], function(online) {
if (online == true) {
console.log("We are done with this item, move on to the next");
} else {
console.log("We are done with this item, move on to the next");
}
});
} else {
console.log("We are done with this item, move on to the next");
}
});
}, function() {
console.log("I want this to fire when all items in data are finished");
});
I use the async library and above the var data represents an array such as:
['232323', '232423', '23232']
I want to loop through the array but one ID at a time. And not move on to the next ID until the previous one has run through all the callbacks.
Is this somehow possible?

You can use async.eachSeries instead of async.forEach.
c.f.: https://github.com/caolan/async#eachSeries

Related

Avoid callback multi-invocation when forEach is used

I have a function that processes an array of data (first parameter) and, once the procesing is finished, it invokes only one time a callback function (second parameter). I'm using forEach to process data item by item, consisting the processing of each item in some checkings and storing the param in database. The function storeInDB() does the storing work and uses a callback (second parameter) when the item has been stored.
A first approach to the code is the following:
function doWork(data, callback) {
data.forEach(function (item) {
// Do some check on item
...
storeInDB(item, function(err) {
// check error etc.
...
callback();
});
});
}
However, it's wrong, as the the callback function will be invoked several times (as many as element in the data array).
I'd like to know how to refactor my code in order to achieve the desired behaviour, i.e. only one invocation to callback once the storing work is finished. I guess that async could help in this task, but I haven't find the right pattern yet to combine async + forEach.
Any help is appreciated!
You can use a library such as async to do this, although I would recommend using promises if possible. For your immediate problem you can use a counter to determine how many storage calls have completed and call the callback when the total number are completed.
let counter = 0;
data.forEach(function (item) {
// Do some check on item
...
storeInDB(item, function(err) {
// check error etc.
counter++
if (counter == data.length) {
callback();
}
});
});
you can also utilize the three parameters passed to the function to execute on each array method
function doWork(data, callback) {
data.forEach(function (value,idx,arr) {
// Do some check on item
...
storeInDB(arr[idx], function(err) {
// check error etc.
...
if ( (idx + 1) === arr.length ) {
callback();
}
});
});
}
If storeInDB function returns a promise, you can push all async functions to an array and use Promise.all. After all tasks run successfully, It will invokes callback function.
Hope this helps you.
function doWork(data, callback) {
let arr = [];
data.map(function(itm) {
// Do some check on item
...
arr.push(storeInDB(item));
});
Promise.all(arr)
.then(function(res) {
callback();
});
}

NodeJs item variable in array only takes the first value in a for loop

I am using expressJs to route some POST requests.
From the client side I pass an object of objects and in the server I iterate over each of them with a for loop.
My problem, the variable cantidad in the loop only takes the first value instead of being refreshed into the pool.query, but before the pool.query it takes the right value.
So, the line below is ok.
console.log("cantidad before query: " + cantidad);
But the line below is bad. It has the first value.
console.log("cantidad in query: " + cantidad);
This is part of my code.
for (var key in objects) {
if (objects.hasOwnProperty(key)) {
...
console.log("cantidad before query: " + cantidad);
pool.query(qProducto,idProducto, function (error, results, fields {
if (error) {
...
} else {
console.log("cantidad in query: " + cantidad);
...
This is the full POST in ExpressJs.
app.post("/commanda", function (req, res) {
var idCuenta = req.body.idCuenta;
var idEmpleado = req.body.idEmpleado;
var fechaRegistro = req.body.fechaRegistro;
var cuenta_mesero = "C:" + idCuenta + ":E:" + idEmpleado;
var objects = req.body.objects;
var element = {};
for (var key in objects) {
if (objects.hasOwnProperty(key)) {
var qProducto = "SELECT descripcionProducto FROM PRODUCTO WHERE idProducto = ? ;";
var descProducto = '';
console.log("cantidad in commanda2 : " + objects[key].cantidad );
try {
pool.query(qProducto, objects[key].idProducto, function (error, results, fields) {
if (error) {
console.error(error);
console.error("Failed with query: " + qProducto);
res.status(500).end();
throw error;
} else {
console.log("cantidad in commanda4 : " + objects[key].cantidad );
descProducto = JSON.stringify(results[0].descripcionProducto);
element = {
idProducto:objects[key].idProducto,
cantidad:objects[key].cantidad,
descProducto:descProducto,
cuenta_mesero:cuenta_mesero,
fechaRegistro:fechaRegistro
};
imprimirOrden(element);
}
});
} catch (error) {
callback(error);
}
}
}
printer.printVerticalTab();
res.status(200).end();
});
This is how an object looks like.
{ '0':
{ idProducto: '28',
cantidad: '3',
descProducto: 'Product1',
precioProducto: '3500',
precioTotal: 10500,
'$$hashKey': 'object:345' },
'1':
{ idProducto: '29',
cantidad: '2',
descProducto: 'Product2',
precioProducto: '4500',
precioTotal: 9000,
'$$hashKey': 'object:346' } }
This happens because the function for is synchronous but the function poll.query is asynchronous. What this means is that using the for loop you are essentially queuing some queries. You are not executing them one by one. So the for loop will finish before even one result is returned from the query. If you want to use data from the query for the next iteration you should start using async.js, an npm module that helps you avoid this problems. TL;DR the console log that you think runs in query is actually run before even one query has finished. More information is needed on where you declare the variable cantidad and when you change it to accurately understand the problem.
UPDATE:
What I told you at first was quite wrong because of the fact that I misunderstood the in-detention of the else {}. But what I told you already is actually the problem. It was well obfuscated.The for loop finishes before even one query has finished. They are just queued. So the second console.log will have the key of the last key in the loop. If you need logic that requires knowing in which iteration you are you should implement an async function in order to know in which iteration you actually are. If you don't want to use the async library you can use something like this.
First add this function in the bottom of your js file
https://pastebin.com/4tR0xaTY
You essentially created an async for loop that you can now know in which iteration you are using loop.iteration(). Then replace your post code with the code written below ( To include the async loop ).
https://pastebin.com/YzZU7bqp

Nested asynchronous mongoDB calls in node js

I have quite a simple problem, but I can't find an elegant solution to fix this.
In the following code, I have two nested calls to a mongo DB. I use Monk to manage my calls.
The problem is : the for loop (1) loops before the nested insertion can happen. So the next find (2) instruction does not find the last inserted action.
The call order is 1-2-2-2-3-3-3 (for an actionList of size 3). So all my data is inserted.
My objective is to have the call order 1-2-3-2-3-2-3
Do you have any clue of how to manage such a problem, without making a big find on my database and manage my list server-side ? (Get all data, make myself the search, that is quite horrible to do, insert elements I want, then push it all to the db...)
for (var action of actionList)//(1)
{
collectionActions.find(//(2)
{eventid : action.eventid},
function(e,actionsFound)
{
if (actionsFound.length == 0)
{
collectionActions.insert(action, function(err, result)//(3)
{
console.log("insert action : " + action._id);
})
}
}
)
}
The native Promise object has an all method that could be leveraged to help.
Assuming find is a compliant promise, the following code would queue up all of the actions in an array through map and which would return a promise for each action that eventually returns messages to the final then for all.
A couple of notes: your code as it stands swallows all of the errors that might occur (I'm not sure that is want you want); this also assumes that insert returns a promise.
Promise.all([
// Iterate over actionList
actionList.map(function(action) {
// returns a promise with a then already attached
return collectionActions.find({
eventid: action.eventid
}).then(function(e, actionsFound) {
if (actionsFound.length == 0) {
// returns another promise that will resolve up to outer promises
return collectionActions.insert(action, function(err, result) {
// Finally resolve a value for outer promises
return 'insert action : ' + action._id;
});
} else {
// A different value to resolve with if the above promise
// is not required
return 'some other message for ' + action._id;
}
});
})
]).then(function(results) {
// Log out all values resolved by promises
console.log(results);
});
UPDATE: After the clarification of the question it sounds like you just need to chain the promises together rather than run them in parallel.
// Iterate over actionList
actionList.reduce(function(promise, action) {
// Chain promises together
return promise.then(function(results) {
return collectionActions.find({
eventid: action.eventid
}).then(function(e, actionsFound) {
if (actionsFound.length == 0) {
// returns another promise that will resolve up to outer promises
return collectionActions.insert(action, function(err, result) {
// Finally resolve a value for outer promises
return results.push('insert action : ' + action.sourceName);
});
} else {
// A different value to resolve with if the above promise
// is not required
return results.push('some other message for ' + action.sourceName);
}
});
});
}, Promise.resolve([])).then(function(results) {
// Log out all values resolved by promises
console.log(results);
});
I finally got my solution, by using a recursive function.
var currentIndex = 0;
var searchAndInsert = function(actionList)
{
var action = actionList[currentIndex];
if (typeof actionList[currentIndex] != "undefined")
{
collectionActions.find(
{eventid : action.eventid},
function(e,actions)
{
console.log("find ended")
if (actions.length == 0)
{
collectionActions.insert(action, function(err, result)
{
console.log("insert action : " + action.sourceName);
currentIndex++;
if (typeof actionList[currentIndex] != "undefined")
searchAndInsert(actionList);
})
}
else
{
currentIndex++;
if (typeof actionList[currentIndex] != "undefined")
searchAndInsert(actionList);
}
}
)
}
};

Node+MongoDB: coll.find().toArray(cb) works for collection A, but never fires cb for collection B?

UPDATE: I've narrowed this down to what appears to be a different issue, and as such have asked a separate question here.
=======
I have a mongoDB instance running on localhost with two collections, "mydocs" (which has ~12,000 documents in it) and "mydoctypes" (which has only 7 documents in it).
I have a standalone NodeJS script which gets a connection to the database and then fires off the following:
myDb.collection('mydoctypes').find().toArray(function(err, results) {
console.log("Got results.");
if (err) {
console.log("err: " + err);
} else {
console.log("Got doctypes: " + results.length);
}
});
The output of that script is:
Got results.
Got doctypes: 7
If I modify the same script to access the 'mydocs' collection instead:
myDb.collection('mydocs').find().toArray(function(err, results) {
console.log("Got results.");
if (err) {
console.log("err: " + err);
} else {
console.log("Got docs: " + results.length);
}
});
I get no output at all. The callback, apparently, never gets fired.
== UPDATE ==
So it looks like the problem was likely too many documents causing toArray() to run out of RAM.
Now, I'm using .each() to iterate, but having a different issue: each() is only running through the first batch (whatever I set batchSize to), and never loading any more documents. The code is this:
myDb.collection('mydocs').find().batchSize(50).each(function(err, item) {
if (item != null) {
process.stdout.write(".");
}
}
Indeed as seen in the comments default mongodb driver for nodejs is returning a cursor, default cursor when triggered have a ~101 documents or around 1 MB batch size, you can modify this number using the batchSize function. But in order to iterate your collection you should stream it as following:
MongoClient.connect('mongodb://localhost:27017/mydb', function(err, db) {
var cursor = db.collection('mycollection').find();
cursor.forEach(
function(doc) {
console.log(doc);
},
function(err) {
if (err) {
console.error(err);
} else {
//cursor has exausted, no more docs to iterate exit
return db.close();
}
});
});
The forEach method applied on the cursor is not the javascript default one from Arrays, it has two callbacks (the cb(doc) wich will iterate for each document, and the second one an cb(err) wich will catch the error or when the cursor was exausted.
You can use projection to lower the amount of data cursor.project({title: 1, name: 1}) and this will significantly reduce the amount of ram consumed.

how to make this function async in node.js

Here is the situation:
I am new to node.js, I have a 40MB file containing multilevel json file like:
[{},{},{}] This is an array of objects (~7000 objects). Each object has properties and a one of those properties is also an array of objects
I wrote a function to read the content of the file and iterate it. I succeeded to get what I wanted in terms of content but not usability. I thought that I wrote an async function that would allow node to serve other web requests while iterating the array but that is not the case. I would be very thankful if anyone can point me to what I've done wrong and how to rewrite it so I can have a non-blocking iteration. Here's the function that handles the situation:
function getContents(callback) {
fs.readFile(file, 'utf8', function (err, data) {
if (err) {
console.log('Error: ' + err);
return;
}
js = JSON.parse(data);
callback();
return;
});
}
getContents(iterateGlobalArr);
var count = 0;
function iterateGlobalArr() {
if (count < js.length) {
innerArr = js.nestedProp;
//iterate nutrients
innerArr.forEach(function(e, index) {
//some simple if condition here
});
var schema = {
//.....get props from forEach iteration
}
Model.create(schema, function(err, post) {
if(err) {
console.log('\ncreation error\n', err);
return;
}
if (!post) {
console.log('\nfailed to create post for schema:\n' + schema);
return;
}
});
count++;
process.nextTick(iterateGlobalArr);
}
else {
console.log("\nIteration finished");
next();
}
Just so it is clear how I've tested the above situation. I open two tabs one loading this iteration which takes some time and second with another node route which does not load until the iteration is over. So essentially I've written a blocking code but not sure how to re-factor it! I suspect that just because everything is happening in the callback I am unable to release the event loop to handle another request...
Your code is almost correct. What you are doing is inadvertently adding ALL the items to the very next tick... which still blocks.
The important piece of code is here:
Model.create(schema, function(err, post) {
if(err) {
console.log('\ncreation error\n', err);
return;
}
if (!post) {
console.log('\nfailed to create post for schema:\n' + schema);
return;
}
});
// add EVERYTHING to the very same next tick!
count++;
process.nextTick(iterateGlobalArr);
Let's say you are in tick A of the event loop when getContents() runs and count is 0. You enter iterateGlobalArr and you call Model.create. Because Model.create is async, it is returning immediately, causing process.nextTick() to add processing of item 1 to the next tick, let's say B. Then it calls iterateGlobalArr, which does the same thing, adding item 2 to the next tick, which is still B. Then item 3, and so on.
What you need to do is move the count increment and process.nextTick() into the callback of Model.create(). This will make sure the current item is processed before nextTick is invoked... which means next item is actually added to the next tick AFTER the model item has been created... which will give your app time to handle other things in between. The fixed version of iterateGlobalArr is here:
function iterateGlobalArr() {
if (count < js.length) {
innerArr = js.nestedProp;
//iterate nutrients
innerArr.forEach(function(e, index) {
//some simple if condition here
});
var schema = {
//.....get props from forEach iteration
}
Model.create(schema, function(err, post) {
// schedule our next item to be processed immediately.
count++;
process.nextTick(iterateGlobalArr);
// then move on to handling this result.
if(err) {
console.log('\ncreation error\n', err);
return;
}
if (!post) {
console.log('\nfailed to create post for schema:\n' + schema);
return;
}
});
}
else {
console.log("\nIteration finished");
next();
}
}
Note also that I would strongly suggest that you pass in your js and counter with each call to iterageGlobalArr, as it will make your iterateGlobalArr alot easier to debug, among other things, but that's another story.
Cheers!
Node is single-threaded so async will only help you if you are relying on another system/subsystem to do the work (a shell script, external database, web service etc). If you have to do the work in Node you are going to block while you do it.
It is possible to create one node process per core. This solution would result in only blocking one of the node processes and leave the rest to service your requests, but this feature is still listed as experimental http://nodejs.org/api/cluster.html.
A single instance of Node runs in a single thread. To take advantage
of multi-core systems the user will sometimes want to launch a cluster
of Node processes to handle the load.
The cluster module allows you to easily create child processes that
all share server ports.

Resources