Query live data from large MongoDB collection - Nodejs - node.js

I want to get last 10 records and newest record which just added.
I tried to use tailable cursor but it took me too much time because it have to scan entire collection before reach the end of collection to wait data.
{
"_id" : ObjectId("56fe349d0ef0edb520f0ca29"),
"topic" : "IoTeam/messages/",
"payload" : "20:15:04:01:12:75,-127.00,679",
"qos" : 0,
"retain" : false,
"_msgid" : "45975d0d.ba68a4",
"mac" : "20:15:04:01:12:75",
"temp" : "-127.00",
"hum" : "679",
"time" : "01/04/2016 15:43:09"
}
Thank for your help.

Still difficult to say the best solution without knowing more information. But here is one suggestion that you could try (all using the mongo shell)
Create an index on the time key.
db.your_collection_name.createIndex({time:-1})
After you have created the index, type the following to ensure it was done correctly.
db.your_collection_name.getIndexes()
This will list your indexes, and you should see that a new one was added for the time key.
Caution: Although this will reduce the amount of time it takes to query on the time key, it will increase the amount of time it takes to insert new records into your database. This is due to the fact that new inserts will need to be indexed. So take that into consideration when scaling your app, and it may mean down the road you will want to handle this in a different way.

First of all, create an index on the field time.
db.collection('nameOfYourCollection')
.createIndex(
{ "time": -1 },
null,
function(err, results){
console.log(results);
});
This will create an index on the time field of your collection. This might take some time. But once the index is created, the queries will be much faster.
After this in your query just do this :
var cursor = db.collection('nameOfYourCollection').find().sort([ ["time", -1] ]).limit(10);
cursor.forEach(function(doc){
if(doc) console.log("Got the document as : " + JSON.stringify(doc));
}, function(err){
if(err) console.log("Error: " + JSON.stringify(err));
});
This will give you the last 10 records that were inserted in the collection.
You can also call toArray instead of forEach in the cursor. Something like this :
var cursor = db.collection('nameOfYourCollection').find().sort([ ["time", -1] ]).limit(10);
cursor.toArray(function(err, docs){
if(err) console.log("Error: " + JSON.stringify(err));
if(docs){
console.log("Got the document as : " + JSON.stringify(docs));
console.log("This is the latest record that was inserted : " + JSON.stringify(docs[0]));
}
});
Hope this helps.

Related

Getting error "pipeline element 3 is not an object error", while trying to find & update using aggregate

I am using node js mongodb driver & trying to update an object array inside an object array in a document.
The schema of the document collection is this :
What I Want :
For collection with order no = 1 & items.qty=2 & tax rate = 25, update the tax to "cst" & taxratetype to "flat".
What I Tried :
db.OrderInfo.aggregate(
{$match:{"orderno":"1"}},
{$unwind:'$items'},
{ $match: { 'items.qty' : 2}
},function(err,result1){
if(err){
throw(err);
}else{
indexes = result1[0].items.taxes.map(function(obj, index) {
if(obj.taxrate == 25) {
return index;
}
}).filter(isFinite);
var updateData = {};
updateData["items.$.taxes."+indexes[0]+".tax"]="cst";
updateData["items.$.taxes."+indexes[0]+".taxratetype"]="flat";
db.OrderInfo.update({ "orderno":"1",'items.qty': 2,'items.taxes.taxrate': 25 },{$set: updateData },function(err,result2){
console.log(result2);
});
}
});
Currently I am using db.eval to run this script from node but later will change it once I accomplish the same.
Getting this Error :
{"name":"MongoError","message":"Error: command failed: {\n\t\"ok\" :
0,\n\t\"errmsg\" : \"pipeline element 3 is not an
object\",\n\t\"code\" : 15942\n} : aggregate failed
:\n_getErrorWithCode#src/mongo/shell/utils.js:25:13\ndoassert#src/mongo/shell/assert.js:13:14\nassert.commandWorked#src/mongo/shell/assert.js:267:5\nDBCollection.prototype.aggregate#src/mongo/shell/collection.js:1312:5\n_funcs1#:1:31\n","ok":0,"errmsg":"Error:
command failed: {\n\t\"ok\" : 0,\n\t\"errmsg\" : \"pipeline element 3
is not an object\",\n\t\"code\" : 15942\n} : aggregate failed
:\n_getErrorWithCode#src/mongo/shell/utils.js:25:13\ndoassert#src/mongo/shell/assert.js:13:14\nassert.commandWorked#src/mongo/shell/assert.js:267:5\nDBCollection.prototype.aggregate#src/mongo/shell/collection.js:1312:5\n_funcs1#:1:31\n","code":139}
I know from this issue https://jira.mongodb.org/browse/SERVER-831
that I cannot use a direct update command & hence trying this workaround.
Any other approach for such updates is also fine with me.
EDIT :
As per answer given by #titi23, I had tried using [] also inside function.
It did not gave me any error but, my values also did not get updated.
Two problems in the query :
1) You are missing [] in the aggregate query.
2) The update method does not need the tax rate clause. It will find the nested document & the index from aggregate would serve the purpose in update.
Refer aggregate-definition for more info on how to use it.
Syntax - db.collection.aggregate(pipeline, options)
pipeline - array - A sequence of data aggregation operations or stages.
Try the following:-
db.OrderInfo.aggregate([
{$match:{"orderno":"1"}},
{$unwind:'$items'},
{ $match: { 'items.qty' : 2} }]).toArray(
function(err,result1){
if(err){
throw(err);
}
else{
console.log(result[0]); //See is there any record here
indexes = result1[0].items.taxes.map(function(obj, index) {
if(obj.taxrate == 25) {
return index;
}
}).filter(isFinite);
var updateData = {};
updateData["items.$.taxes."+indexes[0]+".tax"]="cst";
updateData["items.$.taxes."+indexes[0]+".taxratetype"]="flat";
db.OrderInfo.update({ "orderno":"1",'items.qty': 2}, /*Remove the tax rate clause from here..*/
{$set: updateData },function(err,result2){
console.log(result2);
});
}
});
It should not throw the error.
EDIT:- Do toArray() with the aggregate, see if it helps. Updated the query already.

Very slow update performance

I am parsing a CSV file, for each row I want to check if corresponding entry exists in the database, and if it does I want to update it, if it doesn't I want to enter a new entry.
It is very slow - only around 30 entries per second.
Am I doing something incorrectly?
Using node, mongodb, monk
function loadShopsCSV(ShopsName) {
var filename = 'test.csv'
csv
.fromPath(filename)
.on("data", function(data) {
var entry = {
PeriodEST: Date.parse(data[0]),
TextDate: textDateM,
ShopId: parseInt(data[1]),
ShopName: data[2],
State: data[3],
AreaUS: parseInt(data[4]),
AreaUSX: AreaUSArray[stateArray.indexOf(data[3])],
ProductClass: data[5],
Type: data[6],
SumNetVolume: parseInt(data[7]),
Weekday: weekdayNum,
WeightedAvgPrice: parseFloat(data[8]),
}
db.get(ShopsDBname).update(
{"PeriodEST" : entry.PeriodEST,
"ShopName": entry.ShopName,
"State" : entry.State,
"AreaUS" : entry.AreaUS,
"ProductClass" : entry.ProductClass,
"Type" : entry.Type},
{$set : entry},
function(err, result) {
}
);
}
}
})
.on("end", function() {
console.log('finished loading: '+ShopsName)
});
}, function(err) {
console.error(err);
});
}
First I would suggest to localize problem:
replace .on("data", function(data) with dummy .on("data", function() {return;}) and confirm speed of csv parsing.
turn on mongo profiler db.setProfilingLevel(1) and check slow log if there is any query slower than 100 ms.
If there are no problems above - the bottleneck is in one of nodejs libraries you are using to prepare and send query.
Assuming the problem is with slow mongodb queries, you can use explain for the update query for details. It may be the case it does not use any indexes and run a table scan for every update.
Finally, it is recommended to use bulk operations, which was designed for exactly your usecase.
Have you tried updating with no write concern? as MongoDB blocks until whole update is successful and DB sends back that acknowledgement? Are you on cluster or something? (might want to write into primary node if so)
after your {$set : entry},
{writeConcern: {w: 0}}

MongoDB, Updates and Controlling Document Expiration

I'm working on a node.js project. I'm trying to understand how MongoDB works. I'm obtaining data hourly via a cron file. I'd like for there to be unique data, so I'm using update instead of insert. That works fine. I'd like to add the option that the data expires after three days. Its not clear to me how to do that.
In pseudo code:
Setup Vars, URL's, a couple of global variables, lineNr=1, end_index=# including databaseUrl.
MongoClient.connect(databaseUrl, function(err, db) {
assert.equal(null, err, "Database Connection Troubles: " + err);
**** db.collection('XYZ_Collection').createIndex({"createdAt": 1},
{expireAfterSeconds: 120}, function() {}); **** (update)
s = fs.createReadStream(text_file_directory + 'master_index.txt')
.pipe(es.split())
.pipe(es.mapSync(function(line) {
s.pause(); // pause the readstream
lineNr += 1;
getContentFunction(line, s);
if (lineNr > end_index) {
s.end();
}
})
.on('error', function() {
console.log('Error while reading file.');
})
.on('end', function() {
console.log('All done!');
})
);
function getContentFunction(line, stream){
(get content, format it, store it as flat JSON CleanedUpContent)
var go = InsertContentToDB(db, CleanedUpContent, function() {
stream.resume();
});
}
function InsertContentToDB(db, data, callback)
(expiration TTL code if placed here generates errors too..)
db.collection('XYZ_collection').update({
'ABC': data.abc,
'DEF': data.def)
}, {
"createdAt": new Date(),
'ABC': data.abc,
'DEF': data.def,
'Content': data.blah_blah
}, {
upsert: true
},
function(err, results) {
assert.equal(null, err, "MongoDB Troubles: " + err);
callback();
});
}
So the db.collection('').update() with two fields forms a compound index to ensure the data is unique. upsert = true allows for insertion or updates as appropriate. My data varies greatly. Some content is unique, other content is an update of prior submission. I think I have this unique insert or update function working correctly. Info from... and here
What I'd really like to add is an automatic expiration to the documents within the collection. I see lots of content, but I'm at a loss as to how to implement it.
If I try
db.collection('XYZ_collection')
.ensureIndex( { "createdAt": 1 },
{ expireAfterSeconds: 259200 } ); // three days
Error
/opt/rh/nodejs010/root/usr/lib/node_modules/mongodb/lib/mongodb/mongo_client.js:390
throw err
^
Error: Cannot use a writeConcern without a provided callback
at Db.ensureIndex (/opt/rh/nodejs010/root/usr/lib/node_modules/mongodb/lib/mongodb/db.js:1237:11)
at Collection.ensureIndex (/opt/rh/nodejs010/root/usr/lib/node_modules/mongodb/lib/mongodb/collection.js:1037:11)
at tempPrice (/var/lib/openshift/56d567467628e1717b000023/app-root/runtime/repo/get_options_prices.js:57:37)
at /opt/rh/nodejs010/root/usr/lib/node_modules/mongodb/lib/mongodb/mongo_client.js:387:15
at process._tickCallback (node.js:442:13)
If I try to use createIndex I get this error...
`TypeError: Cannot call method 'createIndex' of undefined`
Note the database is totally empty, via db.XYZ_collection.drop() So yeah, I'm new to the Mongo stuff. Anybody understand what I need to do? One note, I'm very confused by something I read: in regards to you can't create TTL index if indexed field is already in use by another index. I think I'm okay, but its not clear to me.
There are some restrictions on choosing TTL Index: you can't create
TTL index if indexed field is already used in another index. index
can't have multiple fields. indexed field should be a Date bson type
As always, many thanks for your help.
Update: I've added the createIndex code above. With an empty callback, it runs without error, but the TTL system fails to remove entries at all, sigh.

Why my query take 13 sec on mongoose and 0.7 sec with robomongo?

I need to execute this simple query on a collection of ~100K documents:
db.foobar.find().sort({ score: -1 })
My documents are smalls:
{
"_id" : ObjectId("566acb77ddf99fd8989fafed"),
"score" : 123,
"username" : "John Smith"
}
I use Robomongo to test some queries on my database and I execute the mentioned query in an average respond time of 0.7 sec
I tried to execute the same query in a static function of mongoose.
Here is my code:
leaderboardSchema.statics.getRank = function (callback) {
var query = this.find().sort({ score: -1 });
query.exec(function (err, records) {
if (err) {
return callback(err);
}
callback(null, records);
});
};
But when I execute the query I have to wait 13 seconds to get the result !
What am I missing ? Why it's so long with mongoose and not with Robomongo ?
There are two factors here:
With Mongoose, it's important to use the lean() option for queries with large results sets when all you need is the JavaScript object representation of the documents and not full Mongoose document instances.
RoboMongo is only reading the first 50 results (by default) from the results of the query, while Mongoose reads all 100K.

$inc with mongoose ignored

I am using the following code to add some content in an array and increment two different counters.
The item is properly pushed in the array, and the pendingSize is properly incremented. But the unRead is never incremented. It used to increment, then today it stopped. The value of the unRead field in my mongodb collection ( hosted on mongohq ) is set to 0 (numerical, not string)
When I look in my console, I see 'update success'.
any clue why it stopped working ?
Thanks
Notifications.update({ "id" : theid}, { $push: { pending: notification}, $inc: { unRead : 1 }, $inc: { pendingSize: 1 }}, function(err){
if(err){
console.log('update failed');
callback("Error in pushing." + result.members[i]);
}
else{
console.log('update succes');
callback("Success");
}
});
Combine the args of $inc into a single nested object, like this:
$inc: { unRead : 1, pendingSize: 1 }
Objects represented in JSON are key:value, where the keys must be unique, so trying to specify multiple values for $inc won't work.

Resources