function getResultsForOneDev(devID, res) {
var Contribution = require('../db/Contribution.js').model;
var SurveyState = require('../db/SurveyState.js').model;
var SurveyAnswer = require('../db/SurveyAnswer.js').model;
var contributionList = {
"dev": [ {
"contribs" : [ {
"surveyStates" : [ {
"surveyAnswers" : [ { } ]
} ]
} ]
} ]
};
Contribution.find({dev:devID}).exec(function (error, contribs){
// console.log("contribs:"+contribs);
contributionList = contribs;
console.log("contribs length:"+contribs.length);
for (var i = 0 ; i<contribs.length ; i++) {
(function(oneContrib) {
//console.log('contribs ID '+oneContrib._id);
SurveyState.find({contrib:oneContrib._id}).exec(function (error, surveyStates){
// console.log("surveyStates:"+surveyStates);
oneContrib.surveyStates = surveyStates;
console.log("surveyStates length:"+surveyStates.length);
for (var j = 0 ; j<surveyStates.length ; j++) {
(function(oneSurveyState) {
SurveyAnswer.find({surveyState:oneSurveyState._id}).exec(function (error, surveyAnswers){
// console.log("surveyAnswers:"+surveyAnswers);
oneSurveyState.surveyAnswers = surveyAnswers;
console.log("surveyAnswers length:"+surveyAnswers.length);
});
})(surveyStates[j]);
}
});
})(contribs[i]);
};
});
res.jsonp(contributionList);
}
This program does not run as I want, res.jsonp return empty contributionList.
I already try with async (https://github.com/caolan/async). What is the good pratice to fill contributionList before sending a res.jsonp ?
.find() is asynchronous. It returns immediately, before the callback has populated values into contributionList.
Move your res.jsonp() to the end of the callback code where contributionList is populated rather than outside the callback.
Since you seem to have multiple find() inside loops and whatnot, and you cannot guarantee the order the callbacks will run, you can use async (as you mention) to create a workflow to insure they all finish, and then run a final callback (executed by async) to invoke res.jsonp().
Because your database queries are asynchronous (they finish sometime later) and the rest of your code does not wait for them, your two for loops will finish long before the actual async responses will. As such, you have to actually keep track (somehow) of when the last async response is done and thus all the data in now in the contributionList data structure so you can now send your response.
My preference would be to use promises for this and Promise.all() to trigger an action when an arbitrary number of asynchronous operations are complete, but I don't know the database interfaces you're using to know which ones are promisified, so here's a generic method that simply uses a manual counter to keep track of how many async operations are still in flight and when the counter gets to zero, you have all the data now and you can send the response.
The additions to this code are the lines of code that use the variable remaining.
function getResultsForOneDev(devID, res) {
var Contribution = require('../db/Contribution.js').model;
var SurveyState = require('../db/SurveyState.js').model;
var SurveyAnswer = require('../db/SurveyAnswer.js').model;
var contributionList = {
"dev": [ {
"contribs" : [ {
"surveyStates" : [ {
"surveyAnswers" : [ { } ]
} ]
} ]
} ]
};
Contribution.find({dev:devID}).exec(function (error, contribs){
// console.log("contribs:"+contribs);
contributionList = contribs;
console.log("contribs length:"+contribs.length);
// keep track of how many async responses are left to be processed
// in a variable at a higher scope
var remaining = 0;
for (var i = 0 ; i<contribs.length ; i++) {
(function(oneContrib) {
//console.log('contribs ID '+oneContrib._id);
SurveyState.find({contrib:oneContrib._id}).exec(function (error, surveyStates){
// console.log("surveyStates:"+surveyStates);
oneContrib.surveyStates = surveyStates;
console.log("surveyStates length:"+surveyStates.length);
// add how many more responses are pending
remaining += surveyStates.length;
for (var j = 0 ; j<surveyStates.length ; j++) {
(function(oneSurveyState) {
SurveyAnswer.find({surveyState:oneSurveyState._id}).exec(function (error, surveyAnswers){
// console.log("surveyAnswers:"+surveyAnswers);
oneSurveyState.surveyAnswers = surveyAnswers;
console.log("surveyAnswers length:"+surveyAnswers.length);
// mark one more processed and see if all remaining ones are done
--remaining;
if (remaining === 0) {
res.jsonp(contributionList);
}
});
})(surveyStates[j]);
}
});
})(contribs[i]);
};
});
}
P.S. You should realize that you are somewhat flooding your database with a whole bunch of requests all at once (all attempting to run in parallel) and then sometime later the database will actually finish all of them. Depending upon the structure of the database and its ability to handle this flood of requests efficiently or share load with other users also using the database, this is sometimes not a best practice. So, sometimes it is better to send some small number of requests at once (e.g. 3-5) and each time one completes, you launch the next waiting request. The async library can do that type of management for you or you can fairly simply build your own little queue of requests and each time one finishes, you send another.
Related
My application makes about 50 redis.get call to serve a single http request, it serves millions of request daily and application runs on about 30 pods.
When monitoring on newrelic i am getting 200MS average redis.get time, To Optimize this i wrote a simple pipeline system in nodejs which is simply a wrapper over redis.get and it pushes all the request in queue, and then execute the queue using redis.mget (getting all the keys in bulk).
Following is the code snippet:
class RedisBulk {
constructor() {
this.queue = [];
this.processingQueue = {};
this.intervalId = setInterval(() => {
this._processQueue();
}, 5);
}
clear() {
clearInterval(this.intervalId);
}
get(key, cb) {
this.queue.push({cb, key});
}
_processQueue() {
if (this.queue.length > 0) {
let queueLength = this.queue.length;
logger.debug('Processing Queue of length', queueLength);
let time = (new Date).getTime();
this.processingQueue[time] = this.queue;
this.queue = []; //empty the queue
let keys = [];
this.processingQueue[time].forEach((item)=> {
keys.push(item.key);
});
global.redisClient.mget(keys, (err, replies)=> {
if (err) {
captureException(err);
console.error(err);
} else {
this.processingQueue[time].forEach((item, index)=> {
item.cb(err, replies[index]);
});
}
delete this.processingQueue[time];
});
}
}
}
let redis_bulk = new RedisBulk();
redis_bulk.get('a');
redis_bulk.get('b');
redis_bulk.get('c');
redis_bulk.get('d');
My Question is: is this a good approach? will it help in optimizing redis get time? is there any other solution for above problem?
Thanks
I'm not a redis expert but judging by the documentation ;
MGET has the time complexity of
O(N) where N is the number of keys to retrieve.
And GET has the time complexity of
O(1)
Which brings both scenarios to the same end result in terms of time complexity in your scenario. Having a bulk request with MGET can bring you some improvements for the IO but apart from that looks like you have the same bottleneck.
I'd ideally split my data into chunks, responding via multiple http requests in async fashion if that's an option.
Alternatively, you can try calling GET with promise.all() to run GET requests in parallel, for all the GET calls you need.
Something like;
const asyncRedis = require("async-redis");
const client = asyncRedis.createClient();
function bulk() {
const keys = [];
return Promise.all(keys.map(client.get))
}
I'm developing an app with the following node.js stack: Express/Socket.IO + React. In React I have DataTables, wherein you can search and with every keystroke the data gets dynamically updated! :)
I use Socket.IO for data-fetching, so on every keystroke the client socket emits some parameters and the server calls then the callback to return data. This works like a charm, but it is not garanteed that the returned data comes back in the same order as the client sent it.
To simulate: So when I type in 'a', the server responds with this same 'a' and so for every character.
I found the async module for node.js and tried to use the queue to return tasks in the same order it received it. For simplicity I delayed the second incoming task with setTimeout to simulate a slow performing database-query:
Declaration:
const async = require('async');
var queue = async.queue(function(task, callback) {
if(task.count == 1) {
setTimeout(function() {
callback();
}, 3000);
} else {
callback();
}
}, 10);
Usage:
socket.on('result', function(data, fn) {
var filter = data.filter;
if(filter.length === 1) { // TEST SYNCHRONOUSLY
queue.push({name: filter, count: 1}, function(err) {
fn(filter);
// console.log('finished processing slow');
});
} else {
// add some items to the queue
queue.push({name: filter, count: filter.length}, function(err) {
fn(data.filter);
// console.log('finished processing fast');
});
}
});
But the way I receive it in the client console, when I search for abc is as follows:
ab -> abc -> a(after 3 sec)
I want it to return it like this: a(after 3sec) -> ab -> abc
My thought is that the queue runs the setTimeout and then goes further and eventually the setTimeout gets fired somewhere on the event loop later on. This resulting in returning later search filters earlier then the slow performing one.
How can i solve this problem?
First a few comments, which might help clear up your understanding of async calls:
Using "timeout" to try and align async calls is a bad idea, that is not the idea about async calls. You will never know how long an async call will take, so you can never set the appropriate timeout.
I believe you are misunderstanding the usage of queue from async library you described. The documentation for the queue can be found here.
Copy pasting the documentation in here, in-case things are changed or down:
Creates a queue object with the specified concurrency. Tasks added to the queue are processed in parallel (up to the concurrency limit). If all workers are in progress, the task is queued until one becomes available. Once a worker completes a task, that task's callback is called.
The above means that the queue can simply be used to priorities the async task a given worker can perform. The different async tasks can still be finished at different times.
Potential solutions
There are a few solutions to your problem, depending on your requirements.
You can only send one async call at a time and wait for the first one to finish before sending the next one
You store the results and only display the results to the user when all calls have finished
You disregard all calls except for the latest async call
In your case I would pick solution 3 as your are searching for something. Why would you use care about the results for "a" if they are already searching for "abc" before they get the response for "a"?
This can be done by giving each request a timestamp and then sort based on the timestamp taking the latest.
SOLUTION:
Server:
exports = module.exports = function(io){
io.sockets.on('connection', function (socket) {
socket.on('result', function(data, fn) {
var filter = data.filter;
var counter = data.counter;
if(filter.length === 1 || filter.length === 5) { // TEST SYNCHRONOUSLY
setTimeout(function() {
fn({ filter: filter, counter: counter}); // return to client
}, 3000);
} else {
fn({ filter: filter, counter: counter}); // return to client
}
});
});
}
Client:
export class FilterableDataTable extends Component {
constructor(props) {
super();
this.state = {
endpoint: "http://localhost:3001",
filters: {},
counter: 0
};
this.onLazyLoad = this.onLazyLoad.bind(this);
}
onLazyLoad(event) {
var offset = event.first;
if(offset === null) {
offset = 0;
}
var filter = ''; // filter is the search character
if(event.filters.result2 != undefined) {
filter = event.filters.result2.value;
}
var returnedData = null;
this.state.counter++;
this.socket.emit('result', {
offset: offset,
limit: 20,
filter: filter,
counter: this.state.counter
}, function(data) {
returnedData = data;
console.log(returnedData);
if(returnedData.counter === this.state.counter) {
console.log('DATA: ' + JSON.stringify(returnedData));
}
}
This however does send unneeded data to the client, which in return ignores it. Somebody any idea's for further optimizing this kind of communication? For example a method to keep old data at the server and only send the latest?
My input is streamed from another source, which makes it difficult to use async.forEach. I am pulling data from an API endpoint, but I have a limit of 1000 objects per request to the endpoint, and I need to get hundreds of thousands of them (basically all of them) and I will know they're finished when the response contains < 1000 objects. Now, I have tried this approach:
/* List all deposits */
var depositsAll = [];
var depositsIteration = [];
async.doWhilst(this._post(endpoint_path, function (err, response) {
// check err
/* Loop through the data and gather only the deposits */
for (var key in response) {
//do some stuff
}
depositsAll += depositsIteration;
return callback(null, depositsAll);
}, {limit: 1000, offset: 0, sort: 'desc'}),
response.length > 1000, function (err, depositsAll) {
// check for err
// return the complete result
return callback(null, depositsAll);
});
With this code I get an async internal error that iterator is not a function. But in general I am almost sure the logic is not correct as well.
If it's not clear what I'm trying to achieve - I need to perform a request multiple times, and add the response data to a result that at the end contains all the results, so I can return it. And I need to perform requests until the response contains less than 1000 objects.
I also looked into async.queue but could not get the hang of it...
Any ideas?
You should be able to do it like that, but if that example is from your real code you have misunderstood some of how async works. doWhilst takes three arguments, each of them being a function:
The function to be called by async. Gets argument callback that must be called. In your case, you need to wrap this._post inside another function.
The test function (you would give value of response.length > 1000, ie. a boolean, if response would be defined)
The final function to be called once execution is stopped
Example with each needed function separated for readability:
var depositsAll = [];
var responseLength = 1000;
var self = this;
var post = function(asyncCb) {
self._post(endpoint_path, function(err, res) {
...
responseLength = res.length;
asyncCb(err, depositsAll);
});
}
var check = function() {
return responseLength >= 1000;
};
var done = function(err, deposits) {
console.log(deposits);
};
async.doWhilst(post, check, done);
In a Node.js program that asynchronously handles lines of input constantly coming in from stdin, how can I ensure the asynchronous handlers print their results in the same order the inputs came in?
SSCCE program.js (dependency: npm install split):
var executeCommand = function(line) {
setTimeout(function() { console.log(line); }, 1000 * Math.random());
};
var split = require("split");
process.stdin.pipe(split("\n")).on("data", function(line) {
executeCommand(line);
});
Running printf "A\nB\nC\nD\nE\nF" | node program.js produces
B
E
A
D
C
F
This is because the handler (executeCommand) has an unpredictable delay, modelled here as a random setTimeout. The "processing" (the setTimeouts) should happen concurrently, but their outputs (console.logs) should be in the same order as the constantly incoming inputs.
How can I make that happen?
I'd usually just exclaim "It's Async.js time!", but this time I can't see an appropriate existing helper: Since tasks are constantly coming in, anything that operates on a fixed collection of inputs won't cut it.
I figured it out.
As #Peter and #jfriend pointed out, handler results must be queued to a queue that only allows dequeueing completed tasks. A good time to check for finished tasks is whenever a handler completes.
A picture might clarify how it works:
Turns out a transform stream is a nice way to model that. ("Stuff comes in and eventually stuff related to the incoming stuff comes out" is pretty much the description of a transform stream.) Whenever results finish, completed tasks are pushed.
Here's the question's example, modified to work:
var Transform = require("stream").Transform;
var split = require("split");
var orderedParallel = function(worker) {
var s = new Transform({ objectMode : true });
var resultsQueue = [];
var sendFinishedFromQueue = function() {
while (resultsQueue[0] && resultsQueue[0].done) {
s.push(resultsQueue.shift().data);
}
}
s._transform = function(chunk, encoding, callback) {
var resultObject = { done : false, data : null };
resultsQueue.push(resultObject);
worker(chunk, function(result) {
resultObject.data = result;
resultObject.done = true;
sendFinishedFromQueue();
});
callback();
};
s._flush = function(callback) {
// Do nothing.
//
// We don't have anything to flush, because as workers complete,
// they'll handle sending any and all messages we're allowed to send
// right now.
};
return s;
};
var executeCommand = function(line, cb) {
setTimeout(function() { cb(line); }, 1000 * Math.random());
};
process.stdin.pipe(split("\n")).pipe(orderedParallel(executeCommand))
.on("data", function(x) { console.log(x); });
To convince yourself it works, try a hundred parallel tasks:
for (( i=0; i<=100; i++ ))
do
echo "$i"
done | node program.js
They should complete in parallel (within 1 second at random), but come out of the orderedParallel transform stream in order regardless.
async.queue with limit of 1 and where the worker function both executes the command and prints the results will do it. You won't have optimal concurrency, but it will behave correctly, so I suggest coding that even if it's a stepping stone. Keeping the correct behavior but adding some concurrency will require both queueing the main work function but also some buffering of output in the case of output2 being ready before output1 arrives.
I have an application where a database query returns a number of rows (typically, less than 100). For each row, I need to make an http call to get supplemental data. I'd like to fire off all of the requests, and then when the last callback completes, move on to rendering the result page.
So far, the answers to similar questions I've looked at have either been chaining the requests by making request #2 in the callback for request #1 (advantages: simple, avoids burying the server in multiple requests), or by firing all of the requests with no tracking of whether all of the requests have completed (works well in the browser where the callback updates the UI).
My current plan is to keep a counter of requests made and have the callback decrement the counter; if it reaches zero, I can call the render function. I may also need to handle the case where responses come in faster than requests are being made (not likely, but a possible edge case).
Are there are other useful patterns for this type of problem?
When using async code could roughly look like this:
var async = require('async');
results = [];
var queue = async.queue(function(row, callback) {
http.fetchResultForRow(row, function(data){
result.push(data);
callback();
});
}, 1);
queue.drain = function() {
console.log("All results loaded");
renderEverything(results);
}
database.fetch(function(rows) {
for (var i=0; i < rows.length; i++) {
queue.push(rows[i]);
}
});
If the order does not matter you also could use: map
Look around in the documenation of async, there are a lot of useful patterns.
You can implement this quite nicely with promises using the when library. Though if you want to rate limit the calls to getting the extra info you will need to do a little more work than in the async approach of TheHippo I think.
Here's an example:
when = require('when')
// This is the function that gets the extra info.
// I've added a setTimeout to show how it is async.
function get_extra_info_for_row(x, callback) {
setTimeout( function(){ return callback(null, x+10); }, 1 );
};
rows = [1,2,3,4,5];
row_promises = rows.map(
function(x) {
var defered = when.defer()
get_extra_info_for_row(x, function(err,extra_info) {
if(err) return defered.reject(err);
defered.resolve([x,extra_info]);
});
return defered.promise;
})
when.all( row_promises )
.then(
function(augmented_rows) { console.log( augmented_rows ); },
function(err) { console.log("Error", err ); }
);
This outputs
[ [ 1, 11 ], [ 2, 12 ], [ 3, 13 ], [ 4, 14 ], [ 5, 15 ] ]