Performance of individual findOne query is abnormally slow (upwards of 60-85ms). Is there something fundamentally wrong with the design below? What steps should I take to make this operation faster?
Goal (fast count of items within a range, under 10-20ms):
Input max and min time
Query database for document with closest time for max and min
Return the "number" field of both query result
Take the difference of the "number" field to get document count
Setup
MongoDB database
3000 documents, compound ascending index on time_axis, latency_axis, number field
[ { time_axis:1397888153982,latency_axis:5679,number:1},
{ time_axis:1397888156339,latency_axis:89 ,number:2},
...
{ time_axis:1398036817121,latency_axis:122407,number:2999},
{ time_axis:1398036817122,latency_axis:7149560,number:3000} ]
NodeJs
exports.getCount = function (uri, collection_name, min, max, callback) {
var low, high;
var start = now();
MongoClient.connect(uri, function(err, db) {
if(err) {
return callback(err, null);
}
var collection = db.collection(collection_name);
async.parallel([
function findLow(callback){
var query = {time_axis : { $gte : min}};
var projection = { _id: 0, number: 1};
collection.findOne( query, projection, function(err, result) {
low = result.number;
console.log("min query time: "+(now()-start));
callback();
});
},
function findHigh(callback){
var query = {time_axis : { $gte : max}};
var projection = { _id: 0, number: 1};
collection.findOne( query, projection, function(err, result) {
high = result.number;
console.log("max query time: "+(now()-start));
callback();
});
}
],
function calculateCount ( err ){
var count = high - low;
db.close();
console.log("total query time: "+(now()-start));
callback(null, count);
});
});
}
Note: Thank you for Adio for the answer. It turns out mongodb connection only need to be initialized once and handles connection pooling automatically. :)
Looking at your source code, I can see that you create a new connection every time you query MongoDB. Try provide an already created connection and thus reuse the connection you create. Coming from Java world I think you should create some connection pooling.
You can also check this question and its answer.
" You open do MongoClient.connect once when your app boots up and reuse the db object. It's not a singleton connection pool each .connect creates a new connection pool. "
Try to use the --prof option in NodeJs to generate profiling results and you can find out where it spent time on. eg. node --prof app.js
you will get a v8.log file containing the profiling results. The tool for interpreting v8.log is linux-tick-processor, which can be found within the v8 project v8/tools/linux-tick-processor.
To obtain linux-tick-processor:
git clone https://github.com/v8/v8.git
cd v8
make -j8 ia32.release
export D8_PATH=$PWD/out/ia32.release
export PATH=$PATH:$PWD/tools
linux-tick-processor /path/to/v8.log | vim -
Related
I am working on a project that deals with a large amount of data. Fetching of all data at once from mongodb is not an option since it results in bad user experience. I am working on creating an infinite loading setup and with each scroll, I want a fix number of data that is fetched from mongodb and will concatenate the newly fetched data with the previously fetched data to show results on my webpage.
How to do pagination in mongodb using nodejs?
The mongodb node.js driver allows you to use the pagination through the limit and the skip attributes.
// You can first start by counting the number of entities in your collection
collection.countDocuments().then((count) => {
var step = 1000;
var offset = 0;
var limit = step;
//then exploiting your offset and limit variables
//you can limit the number of results you get in each query (a page of results)
while (offset < count) {
process(offset, limit);
offset += step;
}
});
})
.catch((error) => console.error(error));
async function process(offset, limit) {
var entities = collection.find(
{},
{
limit: limit,
skip: offset,
}
);
for await (const entity of entities) {
// do what you want
}
}
You can find more details on the MongoDB documentation page.
https://www.mongodb.com/docs/drivers/node/current/fundamentals/crud/read-operations/limit/
I have a basic NodeJS Couchbase script straight from their documentation. It just inserts a document and immediately N1QL queries the inserted document.
var couchbase = require('couchbase')
var cluster = new couchbase.Cluster('couchbase://localhost/');
cluster.authenticate('admin', 'admini');
var bucket = cluster.openBucket('application');
var N1qlQuery = couchbase.N1qlQuery;
bucket.manager().createPrimaryIndex(function() {
bucket.upsert('user:king_arthur', {
'email': 'kingarthur#couchbase.com', 'interests': ['Holy Grail',
'African Swallows']
},
function (err, result) {
bucket.get('user:king_arthur', function (err, result) {
console.log('Got result: %j', result.value);
bucket.query(
N1qlQuery.fromString('SELECT * FROM application WHERE $1 in
interests LIMIT 1'),
['African Swallows'],
function (err, rows) {
console.log("Got rows: %j", rows);
});
});
});
});
This is returning back
bash-3.2$ node nodejsTest.js Got result:
{"email":"kingarthur#couchbase.com","interests":["Holy
Grail","African Swallows"]}
Got rows: []
I was expecting the inserted document in the "rows" array.
Any idea why this very basic nodeJS starter script is not working?
Key/value read writes are always consistent (that is, if you write a document, then retrieve it by ID, you will always get back what you just wrote). However, updating an index for N1QL queries takes time and can affect performance.
As of version 5.0, you can control your consistency requirements to balance the trade-off between performance and consistency. By default, Couchbase uses the Not_bounded mode. In this mode, queries are executed immediately, without waiting for indexing to catch up. This causes the issue you're seeing. Your query is executing before the index has been updated with the mutation you made.
You can read about this further here: https://developer.couchbase.com/documentation/server/current/indexes/performance-consistency.html
In a post function, I am trying to retrieve the nth activity of a user (since I have a dropdown that return the index number of the activity). When I run the query
collection.find({'local.email':req.user.local.email},
{'local.activities':{$slice : [currActivity,1]}});
I receive the correct activity object in Robo3T.
But, when I call the same query in Node inside a post function, it returns an undefined.
app.post('/addlog',function(req,res){
var currActivity = req.body.curAct;
var score = req.body.score;
var comment = req.body.reason;
mongoose.connect('mongodb://****:****#ds044907.mlab.com:44907/intraspect',function (err, database) {
if (err)
throw err
else
{
db = database;
var collection = db.collection('users');
var retrievedAct = collection.find({'local.email':req.user.local.email},
{'local.activities':{$slice : [currActivity,1]}}).toArray().then(console.log(retrievedAct));
if (retrievedAct.length > 0) { printjson (retrievedAct[0]); }
console.log(currActivity);
console.log(retrievedAct[0]);
// console.log(req.body.newAct);
collection.update({'local.activities.name':retrievedAct[0]},
{$push: {'local.activities.log' : {
comments: comment,
score: score,
log_time: Date.now()
}}})
.then(function(){
res.redirect('/homepage');
})
.catch(function() {
console.log('Error');
});
}
});
});
I checked that the currActivity variable does infact contain the integer value for the nth activity.
If you want the result of collection.find().toArray(), as specified in the docs, you have two options:
Passing a callback to .toArray() like you did with mongoose.connect()
Using the Promise that it returns if you don't pass a callback
Now you are doing neither of them.
Also, you are mixing callback style and Promises in your code. I recommend you unificate your code. If you are using a Node.js version bigger than 8, using async/await could be nice, it makes it simpler.
I'm wondering if it's a good idea (performance wise) to store queries results in variables and update them only every few minute, since I have multiple database queries(MongoDB) in my node application that don't need to be up to date and some of them are a bit complex.
I'm thinking about something like this :
var queryResults = [];
myModel.find().exec(function(err, results) {
queryResults = results;
});
Then :
var interval = 10 * 60 * 1000;
setInterval(function() {
myModel.find().exec(function(err, results) {
queryResults = results;
});
}, interval);
And when I need to send the query results to my views engine :
app.get('/', function(req, res) {
res.render('index.ejs', {entries : queryResults});
});
Is this a good way to cache and display the same queries results to multiple clients?
You can use this module instead of creating your cache layer:
https://www.npmjs.com/package/memory-cache
You have to be careful not to put huge amount of data into memory. If you want to push in there millions of results, probably it is not a good idea.
I have a mongodb Relationships collection that stores the user_id and the followee_id(person the user is following). If I query for against the user_id I can find all the the individuals the user is following. Next I need to query the Users collection against all of the returned followee ids to get their personal information. This is where I confused. How would I accomplish this?
NOTE: I know I can embed the followees in the individual user's document and use and $in operator but I do not want to go this route. I want to maintain the most flexibility I can.
You can use an $in query without denormalizing the followees on the user. You just need to do a little bit of data manipulation:
Relationship.find({user_id: user_id}, function(error, relationships) {
var followee_ids = relationships.map(function(relationship) {
return relationship.followee_id;
});
User.find({_id: { $in: followee_ids}}, function(error, users) {
// voila
});
};
if i got your problem right(i think so).
you need to query each of the "individuals the user is following".
that means to query the database multiple queries about each one and get the data.
because the queries in node.js (i assume you using mongoose) are asynchronies you need to get your code more asynchronies for this task.
if you not familier with the async module in node.js it's about time to know it.
see npm async for docs.
i made you a sample code for your query and how it needs to be.
/*array of followee_id from the last query*/
function query(followee_id_arr, callback) {
var async = require('async')
var allResults = [];
async.eachSerias(followee_id_arr, function (f_id, callback){
db.userCollection.findOne({_id : f_id},{_id : 1, personalData : 1},function(err, data){
if(err) {/*handel error*/}
else {
allResults.push(data);
callback()
}
}, function(){
callback(null, allResults);
})
})
}
you can even make all the queries in parallel (for better preformance) by using async.map