nodejs multi threading async parallel - node.js

Currently i have a dashboard that lists a bunch of records in a table. users can select 1 record and hit execute and i send a AJAX POST request to my routes middleware which executes 3 functions inside async.waterfall and returns a 200 response back to my client if everything works correctly. this async waterfall usually takes about 40-55 seconds to finish executing (fn_1,fn_2 and fn_3) and works perfectly fine.
router.post('/url', function(req, res, next) {
try {
async.waterfall([
fn_1,
fn_2,
fn_3
], function (err, body) {
res.writeHead(200, {'Content-Type': 'application/json'});
res.end(JSON.stringify({"error":err, "result":body}));
});
function fn_1(callback) {
callback(null, response);
}
function fn_1(result, callback) {
callback(err, result);
}
function fn_2(result, callback) {
callback(null, result);
}
}
catch (err){
console.log(err)
}
});
But, If i were to give provision for users to select MULTIPLE records and send that as an array back to my route middleware. how can i execute multiple async.waterfall methods for each item in the array in parallel
i can run a loop and execute the waterfall inside the loop but it again will wait for each item to complete and only then start the next iteration. this is not what i want.
is this doable in node / express . whats the easiest way to achieve this ? or are there modules/plugins that can help solve this case ?

Here is an abbreviated version of your code and how it could be changed to suit your needs. If none of your calls need data from any of the other calls, you can just run them in parallel with promises and use Promise.all to capture the result.
function fn_1(callback) {
// See function fn_2 for structure
}
function fn_1(result, callback) {
// See function fn_2 for structure
}
function fn_2(result, callback) {
return new Promise(resolve, reject => {
resolve(result)
})
.then(d => {
// Instead of callbacks, use a "then"
// block/statement.
//
// Do something with D here.
})
}
Promise.all([fn_1(), fn_2(), fn_3()])
.then(v => {
// Do somthing with v;
})
.catch(e => {
// Do something with e
})

I tend to advocate the use of native Promises over libraries like async, however, since your already using async...
You can use parallel and map each item in the array to a waterfall handler e.g.
async.parallel(
myArray.map(val => cb => async.waterfall(fn_1, fn_2, fn_3, cb)
, (err, results) => {
// return consolidated response
})
You would need to rework your waterfall handlers to not send a response but instead just propagate any errors.
It should also be noted that parallel is only useful if you are infact running I/O bound code, if the code is anything like your example then you won't really gain anything from using parallel over something like async.each

Related

Asynchronous CRUD operations with express

I have a basic CRUD application using html forms, nodejs/express and mongodb. I have been learning about synchronous vs asynchronous code via callbacks, promises, and async/await and to my understanding for a crud application you would want the operations to be asynchronous so multiple users can do the operations at the same time. I am trying to implement aync/await with my express crud operations and am not sure if they are executing synchronously or asynchronously.
Here is my update function, which allows a user to type in the _id of the blog they want to change, then type in a new title and new body for the blog and submit it. In its current state, to my knowledge it is executing synchronously:
app.post('/update', (req, res) => {
const oldValue = { _id: new mongodb.ObjectId(String(req.body.previousValue)) }
const newValues = { $set: { blogTitle: req.body.newValue, blogBody: req.body.newValue2 } }
db.collection("miscData").updateOne(oldValue, newValues, function (err, result) {
if (err) throw err;
console.log("1 document updated");
res.redirect('/')
});
})
The way in which I was going to change this to asynchronous was this way:
app.post('/update', async (req, res) => {
const oldValue = { _id: new mongodb.ObjectId(String(req.body.previousValue)) }
const newValues = { $set: { blogTitle: req.body.newValue, blogBody: req.body.newValue2 } }
await db.collection("miscData").updateOne(oldValue, newValues, function (err, result) {
if (err) throw err;
console.log("1 document updated");
res.redirect('/')
});
})
Both blocks of code work, however I am not sure if the second block of code is doing what I am intending it to do, which is allow a user to update a blog without blocking the call stack, or if the second block of code would only make sense if I was running more functions after the await. Does this achieve the intended purpose, if not how could/should I do that?
db.collection(...).updateOne is always asynchronous, so you need not worry that a long-running database operation might block your application. There are two ways how you can obtain the asynchronous result:
With a callback function
db.collection(...).updateOne(oldValues, newValues, function(err, result) {...});
console.log("This happens synchronously");
The callback function with the two parameters (err, result) will be called asynchronously, after the database operation has completed (and after the console.log). Either err contains a database error message or result contains the database result.
With promises
try {
var result = await db.collection(...).updateOne(oldValues, newValues);
// Do something with result
} catch(err) {
// Do something with err
}
console.log("This happens asynchronously");
The updateOne function without a callback function as third parameter returns a promise that must be awaited. The statements that do something with result will be executed asynchronously, after the database operation has successfully completed. If a database error occurs, the statements in the catch block are executed instead. In either case (success or error), the console.log is only executed afterwards.
(If updateOne does not have a two-parameter version, you can write
var result = await util.promisify(db.collection(...).updateOne)(oldValues, newValues);
using util.promisify.)
Your second code snippet contains a mixture of both ways (third parameter plus await), which does not make sense.

Do too much requests cause errors in nodejs?

var urlArr = {
//ex url_for_site0 = 'https://www.google.com'
url_for_site0,
url_for_site1,
url_for_site2,
url_for_site3,
...
url_for_site50
};
urlArr.forEach(function(url, index) {
request(url, function(err, res, body) {
if(err) console.log(index+err);
else console.log(index+" success");
});
});
I got different unordered results and errors everytime I execute my app.
Example:
1 error : socket hang out
21 error : socket hang out
17 error : socket hang out
1 error : socket hang out
19 error : socket hang out
...(omission)
5 success
15 success
45 success
50 success
11 success
37 success
Everytime I get the results, they are in a different order.
Is this becasue I called too much request simultaneously?
When I request one by one, there's no error.
Example:
request(url_for_site0)
and restart program
request(url_for_site1)
and restart program
request(url_for_site2)
...
NodeJS events are all handled in a single pool and has a non-blocking nature. You can refer to the illustration below.
It happened to me once when I try to call multiple SQL queries. When I did it using C#, there is no problem at all. However, NodeJS gave me a similar behaviour to yours.
I am not sure if this is the best solution for the problem. However, here is how I fixed my problem with my SQL calls. I used the async waterfall function so that the whole process becomes synchronous. Each function will be run one by one with its return value piped to the next function. So, you can even do more stuffs. The usage of this library is not very staightforward, you can refer to this link to better help you understand how async waterfall works, then suit it to fit your solution.
https://gist.github.com/dineshsprabu/e6c1cf8f2ca100a8f5ae
Here is how I visualize your solution will roughly looks like:
var async = require('async');
async.waterfall(
[
function(callback) {
function_urlArr(url, index, function (returnVal) {
//Do something with the returnVal
callback(null, returnVal);
});
},
function(returnVal, callback) {
//the returnVal from first function gets passed here synchronously
function_urlArr(url2, index2, function (returnVal) {
//Do something with the returnVal
callback(null, returnVal);
});
},
function(returnVal, callback) {
//and so on ...
}
],
function (err) {
//console.log(err);
});
//define your function and enable callback
//you will need to include an extra third argument to receive the callback
function urlArr(url, index, callback) {
//your code
return callback(returnValue)
}
This is happening because of non-blocking nature of javascript.
If you want to make it happen one by one in order, you can use Async functions.
Socket hung up error may be because of the url you hit didn't respond anything after accepting the request.
You might have issue with non-blocking nature of loop forEach.
You can combine Promise and aysnc/await to make it blocking. Here is one way of handling it.
const request = require('request');
let urlArr = [
'https://localhost:9090',
'https://www.google.com',
'https://www.ebay.com',
'https://www.amazon.com',
];
//Creating promise for the requests.
let fetchPromise = function(url) {
return new Promise((resolve, reject) => {
request(url, (err, res, body) => {
if (err)
reject(Error(url + ' cannot be fetched'));
else
resolve(body);
});
}
);
};
//creating a blocking function
let fetchAllData = async function(urls) {
for (url of urls) { //using modern for loop instead for forEach
try {
data = await fetchPromise(url); // waiting until promise is resolved.
console.log('Recieved :' + data.length + 'bytes from ' + url);
} catch(e) {
console.log('Error :' + e); // catching error in case promise is rejected
}
}
};
//calling the function
fetchAllData(urlArr);
/*
// In case you want to wait until all promises are resolved.
// Then use Promise.all, however it will fail if any of the promise is rejected.
// One way to handle it would be to modify function fetchPromise such that it
// always resolves.
Promise
.all(urlArr.map(url => fetchPromise(url)))
.then(data => console.log(data))
.catch(err => console.log(err));
*/
I hope it helps.

ExpressJS: Why does this output [] on the first GET, and then the next GET returns the data from the previous?

I am experimenting with Express and MongoDB, and have a functional API server that can add, update, delete, and retrieve a single post. The issue I have run into is returning all of the documents from Mongo.
I have a GET route that outputs the results, except it does not behave as I imagined it would. When you run this, the first GET request to /notes returns and empty array, i.e. []
let notes =[];
app.get('/notes', (req, res) => {
async function getNotes() {
try {
await db.collection('notes').find().forEach(function (myDoc) {
notes.push(myDoc);
})
} catch(err) {
console.log(err)
}
console.log(notes);
res.send((notes));
}
getNotes();
});
On the second GET to /notes, however, the data that was pushed into notes[] is returned, and it is then overwritten by the newly pushed data.
Can anyone help me fill in the blank spot in my understanding of this? I imagine there is something that I just didn't understand along the way.
Edit***
I have experimented with this a bit, and am still running into the same issues.
let array= [];
async function getNotes() {
try {
await db.collection('notes').find().toArray(function (err, notesArray) {
array = notesArray;
})
} catch (err) {
console.log(err)
}
console.log(array);
return array;
}
app.get('/notes', (req, res) => {
getNotes();
res.send(array);
});
MongoDB's .toArray() won't both invoke a callback and return a Promise.
Returns:
Promise if no callback passed
And, await depends on the Promise, so you won't be able to use the callback as well.
Though, one isn't really necessary. When the promise resolves, await will return the array of documents, allowing you to assign that to array or any other variable.
try {
array = await db.collection('notes').find().toArray();
}

What is cb() in Node?

Where are people getting cb() from, is this a Node thing or vanilla JS thing?
For example:
Managing Node.js Callback Hell with Promises, Generators and Other Approaches
they're using cb() to I guess callback and return an error or a value or both in some cases depending on what the callback function sig is?
cb in the context you're describing it is how a vanilla callback function is passed into a (typically) asynchronous function, which is a common pattern in node.js (it's sometimes labelled next, but you can call it bananas if you so desire - it's just an argument).
Typically the first argument is an error object (often false - if all went as planned) and subsequent arguments are data of some form.
For example:
function myAsyncFunction(arg1, arg2, cb) {
// async things
cb(false, { data: 123 });
}
then using this function:
myAsyncFunction(10, 99, function onComplete(error, data) {
if (!error) {
// hooray, everything went as planned
} else {
// disaster - retry / respond with an error etc
}
});
Promises are an alternative to this design pattern where you would return a Promise object from myAsyncFunction
For example:
function myAsyncFunction2(arg1, arg2) {
return new Promise(function resolution(resolve, reject, {
// async things
resolve({ data: 123 });
});
}
then using this function:
myAsyncFunction2(10, 99)
.then(function onSuccess(data) {
// success - send a 200 code etc
})
.catch(function onError(error) {
// oh noes - 500
});
They're basically the same thing, just written slightly differently. Promises aren't supported especially widely in a native form, but if put through a transpiler (I'd recommend babel) during a build step they should perform reliably enough in a browser too.
Callbacks will always work in a browser with no shimming / transpilation.
node.js has lots of asynchronous operations that take a completion callback as an argument. This is very common in various node.js APIs.
The node.js convention for this callback is that the first argument passed to the callback is an error code. A falsey value for this first argument means that there is no error.
For example:
fs.readFile("test.txt", function(err, data) {
if (!err) {
console.log("file data is: " + data);
}
});
A function you create yourself may also define it's own callback in order to communicate the end of one or more asynchronous operations.
function getData(id, cb) {
var fname = "datafile-" + id + ".txt";
fs.readFile(fname, function(err, data) {
if (err) {
cb(err);
} else if (data.slice(0, 6) !== "Header"){
// proper header not found at beginning of file data
cb(new Error("Invalid header"));
} else {
cb(0, data);
}
});
}
// usage:
getData(29, function(err, data) {
if (!err) {
console.log(data);
}
});
From the Vanilla JS, you can declare a function and pass throuw parameters a declaration of another function, that can called async
https://developer.mozilla.org/en-US/docs/Glossary/Callback_function

Testing asynchronous middleware functionality with Mongoose

I'm using a save middleware in Mongoose to create a log of activity in the DB whenever some action is taken. Something like
UserSchema.post("save", function (doc) {
mongoose.model("Activity").create({activity: "User created: " + doc._id});
});
This appears to work fine, but the problem is that I can't test it because there is no way to pass a callback to post (which probably would not make sense). I test this out using mocha with:
User.create({name: "foo"}, function (err, user) {
Activity.find().exec(function (err, act) {
act[0].activity.should.match(new RegExp(user._id));
done();
});
});
The problem is that the Activity.create apparently does not finish before .find is called. I can get around this by wrapping .find in setTimeout, but this seems hacky to me. Is there any way to test asynchronous mongoose middleware operations?
Unfortunately, there's not a way to reliably interleave these two asynchronous functions in the way you'd like (as there aren't threads, you can't "pause" execution). They can complete in an inconsistent order, which leaves you to solutions like a timeout.
I'd suggest you wire up an event handler to the Activity class so that when an Activity is written/fails, it looks at a list of queued (hashed?) Activities that should be logged. So, when an activity is created, add to list ("onactivitycreated"). Then, it will eventually be written ("onactivitywritten"), compare and remove successes maybe (not sure what makes sense with mocha). When your tests are complete you could see if the list is empty.
You can use util.inherits(Activity, EventEmitter) for example to extend the Activity class with event functionality.
Now, you'll still need to wait/timeout on the list, if there were failures that weren't handled through events, you'd need to handle that too.
Edit -- Ignore the suggestion below as an interesting demo of async that won't work for you. :)
If you'd like to test them, I'd have a look at a library like async where you can execute your code in a series (or waterfall in this case) so that you can first create a User, and then, once it completes, verify that the correct Activity has been recorded. I've used waterfall here so that values can be passed from one task to the next.
async.waterfall([
function(done) {
User.create({name: "foo"}, function (err, user) {
if (err) { done(err); return; }
done(null, user._id); // 2nd param sent to next task as 1st param
});
},
function(id, done) { // got the _id from above
// substitute efficient method for finding
// the corresponding activity document (maybe it's another field?)
Activity.findById(id, function (err, act) {
if (err) { done(err); return; }
if (act) { done(null, true);
done(null, false); // not found?!
});
}
], function(err, result) {
console.log("Success? " + result);
});
Async post-save middleware will apparently be available in Mongoose 4.0.0:
https://github.com/LearnBoost/mongoose/issues/787
https://github.com/LearnBoost/mongoose/issues/2124
For now, you can work around this by monkey-patching the save method on the document so that it supports async post-save middleware. The following code is working for me in a similar scenario to yours.
// put any functions here that you would like to run post-save
var postSave = [
function(next) {
console.log(this._id);
return next();
}
];
var Model = require('mongoose/lib/model');
// monkey patch the save method
FooSchema.methods.save = function(done) {
return Model.prototype.save.call(this, function(err, result) {
if (err) return done(err, result);
// bind the postSave functions to the saved model
var fns = postSave.map(function(f) {return f.bind(result);});
return async.series(fns,
function(err) {done(err, result);}
);
});
};

Resources