How to run asynchronous tasks synchronous? - node.js

I'm developing an app with the following node.js stack: Express/Socket.IO + React. In React I have DataTables, wherein you can search and with every keystroke the data gets dynamically updated! :)
I use Socket.IO for data-fetching, so on every keystroke the client socket emits some parameters and the server calls then the callback to return data. This works like a charm, but it is not garanteed that the returned data comes back in the same order as the client sent it.
To simulate: So when I type in 'a', the server responds with this same 'a' and so for every character.
I found the async module for node.js and tried to use the queue to return tasks in the same order it received it. For simplicity I delayed the second incoming task with setTimeout to simulate a slow performing database-query:
Declaration:
const async = require('async');
var queue = async.queue(function(task, callback) {
if(task.count == 1) {
setTimeout(function() {
callback();
}, 3000);
} else {
callback();
}
}, 10);
Usage:
socket.on('result', function(data, fn) {
var filter = data.filter;
if(filter.length === 1) { // TEST SYNCHRONOUSLY
queue.push({name: filter, count: 1}, function(err) {
fn(filter);
// console.log('finished processing slow');
});
} else {
// add some items to the queue
queue.push({name: filter, count: filter.length}, function(err) {
fn(data.filter);
// console.log('finished processing fast');
});
}
});
But the way I receive it in the client console, when I search for abc is as follows:
ab -> abc -> a(after 3 sec)
I want it to return it like this: a(after 3sec) -> ab -> abc
My thought is that the queue runs the setTimeout and then goes further and eventually the setTimeout gets fired somewhere on the event loop later on. This resulting in returning later search filters earlier then the slow performing one.
How can i solve this problem?

First a few comments, which might help clear up your understanding of async calls:
Using "timeout" to try and align async calls is a bad idea, that is not the idea about async calls. You will never know how long an async call will take, so you can never set the appropriate timeout.
I believe you are misunderstanding the usage of queue from async library you described. The documentation for the queue can be found here.
Copy pasting the documentation in here, in-case things are changed or down:
Creates a queue object with the specified concurrency. Tasks added to the queue are processed in parallel (up to the concurrency limit). If all workers are in progress, the task is queued until one becomes available. Once a worker completes a task, that task's callback is called.
The above means that the queue can simply be used to priorities the async task a given worker can perform. The different async tasks can still be finished at different times.
Potential solutions
There are a few solutions to your problem, depending on your requirements.
You can only send one async call at a time and wait for the first one to finish before sending the next one
You store the results and only display the results to the user when all calls have finished
You disregard all calls except for the latest async call
In your case I would pick solution 3 as your are searching for something. Why would you use care about the results for "a" if they are already searching for "abc" before they get the response for "a"?
This can be done by giving each request a timestamp and then sort based on the timestamp taking the latest.

SOLUTION:
Server:
exports = module.exports = function(io){
io.sockets.on('connection', function (socket) {
socket.on('result', function(data, fn) {
var filter = data.filter;
var counter = data.counter;
if(filter.length === 1 || filter.length === 5) { // TEST SYNCHRONOUSLY
setTimeout(function() {
fn({ filter: filter, counter: counter}); // return to client
}, 3000);
} else {
fn({ filter: filter, counter: counter}); // return to client
}
});
});
}
Client:
export class FilterableDataTable extends Component {
constructor(props) {
super();
this.state = {
endpoint: "http://localhost:3001",
filters: {},
counter: 0
};
this.onLazyLoad = this.onLazyLoad.bind(this);
}
onLazyLoad(event) {
var offset = event.first;
if(offset === null) {
offset = 0;
}
var filter = ''; // filter is the search character
if(event.filters.result2 != undefined) {
filter = event.filters.result2.value;
}
var returnedData = null;
this.state.counter++;
this.socket.emit('result', {
offset: offset,
limit: 20,
filter: filter,
counter: this.state.counter
}, function(data) {
returnedData = data;
console.log(returnedData);
if(returnedData.counter === this.state.counter) {
console.log('DATA: ' + JSON.stringify(returnedData));
}
}
This however does send unneeded data to the client, which in return ignores it. Somebody any idea's for further optimizing this kind of communication? For example a method to keep old data at the server and only send the latest?

Related

Grouping redis.get for 2ms and then executing by mget

My application makes about 50 redis.get call to serve a single http request, it serves millions of request daily and application runs on about 30 pods.
When monitoring on newrelic i am getting 200MS average redis.get time, To Optimize this i wrote a simple pipeline system in nodejs which is simply a wrapper over redis.get and it pushes all the request in queue, and then execute the queue using redis.mget (getting all the keys in bulk).
Following is the code snippet:
class RedisBulk {
constructor() {
this.queue = [];
this.processingQueue = {};
this.intervalId = setInterval(() => {
this._processQueue();
}, 5);
}
clear() {
clearInterval(this.intervalId);
}
get(key, cb) {
this.queue.push({cb, key});
}
_processQueue() {
if (this.queue.length > 0) {
let queueLength = this.queue.length;
logger.debug('Processing Queue of length', queueLength);
let time = (new Date).getTime();
this.processingQueue[time] = this.queue;
this.queue = []; //empty the queue
let keys = [];
this.processingQueue[time].forEach((item)=> {
keys.push(item.key);
});
global.redisClient.mget(keys, (err, replies)=> {
if (err) {
captureException(err);
console.error(err);
} else {
this.processingQueue[time].forEach((item, index)=> {
item.cb(err, replies[index]);
});
}
delete this.processingQueue[time];
});
}
}
}
let redis_bulk = new RedisBulk();
redis_bulk.get('a');
redis_bulk.get('b');
redis_bulk.get('c');
redis_bulk.get('d');
My Question is: is this a good approach? will it help in optimizing redis get time? is there any other solution for above problem?
Thanks
I'm not a redis expert but judging by the documentation ;
MGET has the time complexity of
O(N) where N is the number of keys to retrieve.
And GET has the time complexity of
O(1)
Which brings both scenarios to the same end result in terms of time complexity in your scenario. Having a bulk request with MGET can bring you some improvements for the IO but apart from that looks like you have the same bottleneck.
I'd ideally split my data into chunks, responding via multiple http requests in async fashion if that's an option.
Alternatively, you can try calling GET with promise.all() to run GET requests in parallel, for all the GET calls you need.
Something like;
const asyncRedis = require("async-redis");
const client = asyncRedis.createClient();
function bulk() {
const keys = [];
return Promise.all(keys.map(client.get))
}

Websocket - Waiting for a http request callback to execute before next pusher event

So I'm working with websockets to process data from website's API. For every new event I also send some http requests back to the website in order to obtain more data. Up untill now everything has worked fine, but now that I started using async requests to speed it up a bit things got a bit different. My code used to process one event and then move on to the next one (these events come in extremely quick - around 10 per second) but now it just seems to ignore the async (non blocking) part and move on to the next event and that way it just skips over half of the code. Note that the code works fine outside the Pusher. I'm using the 'pusher-client' module. My code looks like this:
var Request = require("request");
var requestSync = require('sync-request');
var Pusher = require('pusher-client');
var events_channel = pusher.subscribe('inventory_changes');
events_channel1.bind('listed', function(data)
{
var var2;
//Async request (to speed up the code)
function myFunction(callback){
request("url", function(error, response, body) {
if (!error && response.statusCode == 200)
{
result = JSON.stringify(JSON.parse(body));
return callback(null, result);
}
else
{
return callback(error, null);
}
});
}
myFunction(function(err, data){
if(!err)
{
var2 = data
return(data);
}
else
{
return(err);
}
});
//The part of the code below waits for the callback and the executes some code
var var1 = var2;
check();
function check()
{
if(var2 === var1)
{
setTimeout(check, 10);
return;
}
var1 = var2;
//A CHUNK OF CODE EXECUTES HERE (connected to the data from the callback)
}
});
In conclusion the code works, but not inside the pusher due to the pusher skipping the asynchronous request. How would I make the pusher wait for my async request to finish, before processing the next event (I have no idea)? If you happen to know, please let me know :)
You need to implement a queue to handle events one after another. I'm curious how it worked before, even without Pusher you'd have to implement some queue mechanism for it.
const eventsQueue = []
events_channel1.bind('listed', function(data) {
eventsQueue.push(data)
handleNewEvent()
})
let processingEvent = false
function handleNewEvent() {
if (processingEvent) return // do nothing if already processing an event
processingEvent = true
const eventData = eventsQueue.shift() // pick the first element from array
if (!eventData) return // all events are handled at the moment
... // handle event data here
processingEvent = false
handleNewEvent() // handle next event
}
Also, you should call clearTimeout method to clear your timeout when you don;t need it anymore.
And it's better to use promises or async/await instead of callbacks. Your code will be much easier to read and maintain.

node asynchronous with response

function getResultsForOneDev(devID, res) {
var Contribution = require('../db/Contribution.js').model;
var SurveyState = require('../db/SurveyState.js').model;
var SurveyAnswer = require('../db/SurveyAnswer.js').model;
var contributionList = {
"dev": [ {
"contribs" : [ {
"surveyStates" : [ {
"surveyAnswers" : [ { } ]
} ]
} ]
} ]
};
Contribution.find({dev:devID}).exec(function (error, contribs){
// console.log("contribs:"+contribs);
contributionList = contribs;
console.log("contribs length:"+contribs.length);
for (var i = 0 ; i<contribs.length ; i++) {
(function(oneContrib) {
//console.log('contribs ID '+oneContrib._id);
SurveyState.find({contrib:oneContrib._id}).exec(function (error, surveyStates){
// console.log("surveyStates:"+surveyStates);
oneContrib.surveyStates = surveyStates;
console.log("surveyStates length:"+surveyStates.length);
for (var j = 0 ; j<surveyStates.length ; j++) {
(function(oneSurveyState) {
SurveyAnswer.find({surveyState:oneSurveyState._id}).exec(function (error, surveyAnswers){
// console.log("surveyAnswers:"+surveyAnswers);
oneSurveyState.surveyAnswers = surveyAnswers;
console.log("surveyAnswers length:"+surveyAnswers.length);
});
})(surveyStates[j]);
}
});
})(contribs[i]);
};
});
res.jsonp(contributionList);
}
This program does not run as I want, res.jsonp return empty contributionList.
I already try with async (https://github.com/caolan/async). What is the good pratice to fill contributionList before sending a res.jsonp ?
.find() is asynchronous. It returns immediately, before the callback has populated values into contributionList.
Move your res.jsonp() to the end of the callback code where contributionList is populated rather than outside the callback.
Since you seem to have multiple find() inside loops and whatnot, and you cannot guarantee the order the callbacks will run, you can use async (as you mention) to create a workflow to insure they all finish, and then run a final callback (executed by async) to invoke res.jsonp().
Because your database queries are asynchronous (they finish sometime later) and the rest of your code does not wait for them, your two for loops will finish long before the actual async responses will. As such, you have to actually keep track (somehow) of when the last async response is done and thus all the data in now in the contributionList data structure so you can now send your response.
My preference would be to use promises for this and Promise.all() to trigger an action when an arbitrary number of asynchronous operations are complete, but I don't know the database interfaces you're using to know which ones are promisified, so here's a generic method that simply uses a manual counter to keep track of how many async operations are still in flight and when the counter gets to zero, you have all the data now and you can send the response.
The additions to this code are the lines of code that use the variable remaining.
function getResultsForOneDev(devID, res) {
var Contribution = require('../db/Contribution.js').model;
var SurveyState = require('../db/SurveyState.js').model;
var SurveyAnswer = require('../db/SurveyAnswer.js').model;
var contributionList = {
"dev": [ {
"contribs" : [ {
"surveyStates" : [ {
"surveyAnswers" : [ { } ]
} ]
} ]
} ]
};
Contribution.find({dev:devID}).exec(function (error, contribs){
// console.log("contribs:"+contribs);
contributionList = contribs;
console.log("contribs length:"+contribs.length);
// keep track of how many async responses are left to be processed
// in a variable at a higher scope
var remaining = 0;
for (var i = 0 ; i<contribs.length ; i++) {
(function(oneContrib) {
//console.log('contribs ID '+oneContrib._id);
SurveyState.find({contrib:oneContrib._id}).exec(function (error, surveyStates){
// console.log("surveyStates:"+surveyStates);
oneContrib.surveyStates = surveyStates;
console.log("surveyStates length:"+surveyStates.length);
// add how many more responses are pending
remaining += surveyStates.length;
for (var j = 0 ; j<surveyStates.length ; j++) {
(function(oneSurveyState) {
SurveyAnswer.find({surveyState:oneSurveyState._id}).exec(function (error, surveyAnswers){
// console.log("surveyAnswers:"+surveyAnswers);
oneSurveyState.surveyAnswers = surveyAnswers;
console.log("surveyAnswers length:"+surveyAnswers.length);
// mark one more processed and see if all remaining ones are done
--remaining;
if (remaining === 0) {
res.jsonp(contributionList);
}
});
})(surveyStates[j]);
}
});
})(contribs[i]);
};
});
}
P.S. You should realize that you are somewhat flooding your database with a whole bunch of requests all at once (all attempting to run in parallel) and then sometime later the database will actually finish all of them. Depending upon the structure of the database and its ability to handle this flood of requests efficiently or share load with other users also using the database, this is sometimes not a best practice. So, sometimes it is better to send some small number of requests at once (e.g. 3-5) and each time one completes, you launch the next waiting request. The async library can do that type of management for you or you can fairly simply build your own little queue of requests and each time one finishes, you send another.

Express.js - while loop before sending response

I'm trying to implement and existing solution in node.js, specifically, using express.js framework. Now, the existing solution works as follows:
server exposes a GET service that clients can connect to
when a client calls the GET service, the client number increments (a global variable) and then the number of clients is checked;
if there are not at least 3 clients connected, the service is in endless loop, waiting for other clients to connect
if (or rather, when) the rest of the two clients connect, the service sends respond to everyone that enough clients are connected (a 'true' value).
So what basically happens is, the client connects and the connection is active (in a loop) until enough clients connect, then and only then there is a response (to all clients at the same time).
Now I'm not expert in these architectures, but from what I think, this is not a correct or good solution. My initial thought was: this must be solved with sockets. However, since the existing solution works like that (it's not written in node.js), I tried to emulate such behaviour:
var number = (function(){
var count = 0;
return {
increase: function() {
count++;
},
get: function(){
return count;
}
};
})();
app.get('/test', function(req, res){
number.increase();
while (number.get() < 3) {
//hold it here, until enough clients connect
}
res.json(number.get());
});
Now while I think that this is not a correct solution, I have a couple of questions:
Is there any alternative to solving this issue, besides using sockets?
Why does this "logic" work in C#, but not in express.js? The code above hangs, no other request is processed.
I know node.js is single-threaded, but what if we have a more conventional service that responds immediately, and there are 20 requests all at the same time?
I would probably use an event emitter for this:
var EventEmitter = require('events').EventEmitter;
var emitter = new EventEmitter();
app.get('/', function(req, res) {
// Increase the number
number.increase();
// Get the current value
var current = number.get();
// If it's less than 3, wait for the event emitter to trigger.
if (current < 3) {
return emitter.once('got3', function() {
return res.json(number.get());
});
}
// If it's exactly 3, emit the event so we wake up other listeners.
if (current === 3) {
emitter.emit('got3');
}
// Fall through.
return res.json(current);
});
I would like to stress that #Plato is correct in stating that browsers may timeout when a response takes too much time to complete.
EDIT: as an aside, some explanation on the return emitter.once(...).
The code above can be rewritten like so:
if (current < 3) {
emitter.once('got3', function() {
res.json(number.get());
});
} else if (current === 3) {
emitter.emit('got3');
res.json(number.get());
} else {
res.json(number.get());
}
But instead of using those if/else statements, I return from the request handler after creating the event listener. Since request handlers are asynchronous, their return value is discarded, so you can return anything (or nothing). As an alternative, I could also have used this:
if (current < 3) {
emitter.once(...);
return;
}
if (current === 3) {
...etc...
Also, even though you return from the request handler function, the event listener is still referencing the res variable, so the request handler scope is maintained by Node until res.json() in the event listener callback is called.
Your http approach should work
You are blocking the event loop so node refuses to do any other work while it is in the while loop
You're really close, you just need to check every now and then instead of constantly. I do this below with process.nextTick() but setTimeout() would also work:
var number = (function(){
var count = 0;
return {
increase: function() {
count++;
},
get: function(){
return count;
}
};
})();
function waitFor3(callback){
var n = number.get();
if(n < 3){
setImmediate(function(){
waitFor3(callback)
})
} else {
callback(n)
}
}
function bump(){
number.increase();
console.log('waiting');
waitFor3(function(){
console.log('done');
})
}
setInterval(bump, 2000);
/*
app.get('/test', function(req, res){
number.increase();
waitFor3(function(){
res.json(number.get());
})
});
*/

How I can stop async.queue after the first fail?

I want to stop of executing of my async.queue after first task error was occurred. I need to perform several similar actions in parallel with the concurrency restriction, but stop all the actions after first error. How can I do that or what should I use instead?
Assuming you fired 5 parallel functions, each will take 5 seconds. While in 3rd second, function 1 failed. Then how you can stop the execution of the rest?
It depends of what those functions do, you may poll using setInterval. However if your question is how to stop further tasks to be pushed to the queue. You may do this:
q.push(tasks, function (err) {
if (err && !called) {
//Will prevent async to push more tasks to the queue, however please note that
//whatever pushed to the queue, it will be processed anyway.
q.kill();
//This will not allow double calling for the final callback
called = true;
//This the main process callback, the final callback
main(err, results);
}
});
Here a full working example:
var async = require('async');
/*
This function is the actual work you are trying to do.
Please note for example if you are running child processes
here, by doing q.kill you will not stop the execution
of those processes, so you need actually to keep track the
spawned processed and then kill them when you call q.kill
in 'pushCb' function. In-case of just long running function,
you may poll using setInterval
*/
function worker(task, wcb) {
setTimeout(function workerTimeout() {
if (task === 11 || task === 12 || task === 3) {
return wcb('error in processing ' + task);
}
wcb(null, task + ' got processed');
}, Math.floor(Math.random() * 100));
}
/*
This function that will push the tasks to async.queue,
and then hand them to your worker function
*/
function process(tasks, concurrency, pcb) {
var results = [], called = false;
var q = async.queue(function qWorker(task, qcb) {
worker(task, function wcb(err, data) {
if (err) {
return qcb(err); //Here how we propagate error to qcb
}
results.push(data);
qcb();
});
}, concurrency);
/*
The trick is in this function, note that checking q.tasks.length
does not work q.kill introduced in async 0.7.0, it is just setting
the drain function to null and the tasks length to zero
*/
q.push(tasks, function qcb(err) {
if (err && !called) {
q.kill();
called = true;
pcb(err, results);
}
});
q.drain = function drainCb() {
pcb(null, results);
}
}
var tasks = [];
var concurrency = 10;
for (var i = 1; i <= 20; i += 1) {
tasks.push(i);
}
process(tasks, concurrency, function pcb(err, results) {
console.log(results);
if (err) {
return console.log(err);
}
console.log('done');
});
async documentation on github page is either outdated or incorrect, while inspecting the queue object returned by async.queue() method I do not see the method kill().
Nevertheless there is a way around it. Queue object has property tasks which is an array, simply assigning a reference to an empty array did the trick for me.
queue.push( someTasks, function ( err ) {
if ( err ) queue.tasks = [];
});

Resources