Insertion order of array elements in MongoDB - node.js
I am having trouble preserving insertion order of a bulk insert into mongodb.
My applications requires posting data continuously (via HTTP POST # once a second) to a server. On the server side, the HTTP POST is handled and this data is stored in a capped collection in mongodb v2.4. The size of this capped collection is large (50MB). The format of this data is JSON and it has arrays in it like this:
{"Data":[{"Timestamp":"2014-08-02 13:38:18:852","Rabbit":[{"a":-0.0010663296561688185,"c":-0.0021321268286556005,"b":-0.0010663296561688185}],"Monkec":[{"a":17.511783599853516,"c":-0.42092469334602356,"b":-0.42092469334602356}]},{"Timestamp":"2014-08-02 13:38:18:858","Rabbit":[{"a":-0.0021321268286556005,"c":-0.0010663296561688185,"b":0.0010657970560714602}],"Monkec":[{"a":9.892329216003418,"c":-0.2339634746313095,"b":-0.2342628538608551}]},{"Timestamp":"2014-08-02 13:38:18:863","Rabbit":[{"a":-0.0010663296561688185,"c":-0.0021321268286556005,"b":0.0021315941121429205}],"Monkec":[{"a":9.702523231506348,"c":-0.24264541268348694,"b":-0.2148033082485199}]},{"Timestamp":"2014-08-02 13:38:18:866","Rabbit":[{"a":-0.0010663296561688185,"c":-0.0010663296561688185,"b":0.0010657970560714602}],"Monkec":[{"a":9.665101051330566,"c":-0.23366409540176392,"b":-0.2197430431842804}]},{"Timestamp":"2014-08-02 13:38:18:868","Rabbit":[{"a":-0.0021321268286556005,"c":-0.0010663296561688185,"b":0.0010657970560714602}],"Monkec":[{"a":9.693991661071777,"c":-0.2936892807483673,"b":-0.22857467830181122}]},{"Timestamp":"2014-08-02 13:38:18:872","Rabbit":[{"a":-0.0010663296561688185,"c":-0.0010663296561688185,"b":0.0010657970560714602}],"Monkec":[{"a":9.684710502624512,"c":-0.2296224981546402,"b":-0.13786330819129944}]},{"Timestamp":"2014-08-02 13:38:18:873","Rabbit":[{"a":-0.0021321268286556005,"c":-0.0010663296561688185,"b":0.0010657970560714602}],"Monkec":[{"a":9.67707633972168,"c":-0.31255003809928894,"b":-0.1902543604373932}]},{"Timestamp":"2014-08-02 13:38:18:875","Rabbit":[{"a":-0.0021321268286556005,"c":-0.0010663296561688185,"b":0}],"Monkec":[{"a":9.739496231079102,"c":-0.1899549812078476,"b":-0.18845809996128082}]},{"Timestamp":"2014-08-02 13:38:18:878","Rabbit":[{"a":-0.003197923768311739,"c":-0.0010663296561688185,"b":0.0010657970560714602}],"Monkec":[{"a":9.721234321594238,"c":-0.19205063581466675,"b":-0.17318984866142273}]},{"Timestamp":"2014-08-02 13:38:18:881","Rabbit":[{"a":-0.003197923768311739,"c":-0.003197923768311739,"b":0.0010657970560714602}],"Monkec":[{"a":9.78545093536377,"c":-0.2501298487186432,"b":-0.1953437775373459}]},{"Timestamp":"2014-08-02 13:38:18:882","Rabbit":[{"a":0,"c":-0.0010663296561688185,"b":0.0021315941121429205}],"Monkec":[{"a":9.686058044433594,"c":-0.21630020439624786,"b":-0.18247054517269135}]},{"Timestamp":"2014-08-02 13:38:18:884","Rabbit":[{"a":-0.0010663296561688185,"c":0,"b":0.0010657970560714602}],"Monkec":[{"a":9.67198657989502,"c":-0.18546432256698608,"b":-0.23156845569610596}]},{"Timestamp":"2014-08-02 13:38:18:887","Rabbit":[{"a":-0.0010663296561688185,"c":-0.0010663296561688185,"b":0.0010657970560714602}],"Monkec":[{"a":9.640103340148926,"c":-0.23276595771312714,"b":-0.25686585903167725}]},{"Timestamp":"2014-08-02 13:38:18:889","Rabbit":[{"a":-0.0010663296561688185,"c":0,"b":0}],"Monkec":[{"a":9.739346504211426,"c":-0.19130218029022217,"b":-0.22602996230125427}]},{"Timestamp":"2014-08-02 13:38:18:891","Rabbit":[{"a":-0.0021321268286556005,"c":-0.0010663296561688185,"b":0}],"Monkec":[{"a":9.716594696044922,"c":-0.22543121874332428,"b":-0.19728973507881165}]},{"Timestamp":"2014-08-02 13:38:18:898","Rabbit":[{"a":-0.0010663296561688185,"c":-0.0010663296561688185,"b":0.0010657970560714602}],"Monkec":[{"a":9.682914733886719,"c":-0.28680360317230225,"b":-0.1740879863500595}]},{"Timestamp":"2014-08-02 13:38:18:904","Rabbit":[{"a":-0.0010663296561688185,"c":0,"b":0.0021315941121429205}],"Monkec":[{"a":9.693093299865723,"c":-0.20866607129573822,"b":-0.2586621046066284}]},{"Timestamp":"2014-08-02 13:38:18:907","Rabbit":[{"a":-0.0021321268286556005,"c":-0.0010663296561688185,"b":0}],"Monkec":[{"a":9.690997123718262,"c":-0.18681152164936066,"b":-0.23216719925403595}]},{"Timestamp":"2014-08-02 13:38:18:910","Rabbit":[{"a":-0.003197923768311739,"c":-0.0010663296561688185,"b":0.0010657970560714602}],"Monkec":[{"a":9.671688079833984,"c":-0.15388000011444092,"b":-0.2588118016719818}]},{"Timestamp":"2014-08-02 13:38:19:055","Rabbit":[{"a":-0.0010663296561688185,"c":-0.0010663296561688185,"b":0}],"Monkec":[{"a":9.689650535583496,"c":-0.23605911433696747,"b":-0.1989363133907318}]}],"Serial":"35689"}
I am inserting this in mongodb (using NodeJs MongoClient driver) using a bulk insert command:
var length = 20; // only doing 20 inserts for testing purposes
for (var i = 0; i <length;i++) {
var bulk = col.initializeUnorderedBulkOp();
bulk.insert(data["Data"][i]); // data is my JSON data of interest
bulk.execute(function(err) {
if (err) {
return cb(err);
}
if (++inserted == length) {
cb(); // callback (not seen in this code snippet)
}
}); // end function
} // end of for loop
However, when I examine the entries in the database, they are not inserted in the order in which the data resides in the originating JSON array. My source data is in ascending Timestamp order, but a few entries in the mongodb capped collection are out of order. For instance, I see this:
{ "Timestamp" : "2014-08-02 13:38:18:910", "Rabbit" : [ { "a" : -0.003197923768311739, "c" : -0.0010663296561688185, "b" : 0.0010657970560714602 } ], "Monkec" : [ { "a" : 9.671688079833984, "c" : -0.15388000011444092, "b" : -0.2588118016719818 } ], "_id" : ObjectId("548e67a683946a5d25bc6d1a") }
{ "Timestamp" : "2014-08-02 13:38:18:884", "Rabbit" : [ { "a" : -0.0010663296561688185, "c" : 0, "b" : 0.0010657970560714602 } ], "Monkec" : [ { "a" : 9.67198657989502, "c" : -0.18546432256698608, "b" : -0.23156845569610596 } ], "_id" : ObjectId("548e67a683946a5d25bc6d13") }
{ "Timestamp" : "2014-08-02 13:38:18:904", "Rabbit" : [ { "a" : -0.0010663296561688185, "c" : 0, "b" : 0.0021315941121429205 } ], "Monkec" : [ { "a" : 9.693093299865723, "c" : -0.20866607129573822, "b" : -0.2586621046066284 } ], "_id" : ObjectId("548e67a683946a5d25bc6d18") }
so Timestamp" : "2014-08-02 13:38:18:910 is stored before "Timestamp" : "2014-08-02 13:38:18:884" even though it is the other way around in the source JSON.
How to ensure mongodb inserts data in the correct order? I also tried non bulk inserts (db.col.insert or db.col.insertOne) but still get this inconsistency. Thank You
If your queries aren't asking for any specific sorting/ordering, MongoDB makes no guarantees as to in which order they'll be returned.
How you insert your data is irrelevant. What you need to do is write your find query like this:
// Sort by ascending timestamp
db.my_collection.find({ ... }).sort({"TimeStamp": 1})
See http://docs.mongodb.org/manual/reference/method/cursor.sort/#cursor.sort for more information on how sorting works.
Of course, if you want to do that, you'll greatly benefit from adding an index on Timestamp to your collection (see http://docs.mongodb.org/manual/core/indexes/).
Related
paginating using angularfire2
suppose I have a data structure in firebase real time database like { "donors" : "uid1" : { "name" : "x", "bloodGroup" : "A+", "location" : "some Place"}, "uid2" : { "name" : "y", "bloodGroup" : "A-", "location" : "some place"}, ... ... } now if I have millions of donor records like this. how could I filter them based on bloodGroup location and fetching say 100 records from server at a time using angularfire2.
I have found this page which was really helpful to me when using queries to query my firebase data: https://howtofirebase.com/collection-queries-with-firebase-b95a0193745d A very simple example would be along the lines of: this.donorsData = af.database.list('/donors', { query: { orderByChild: 'bloodGroup', equalTo: 'A+', } }); Not entirely sure how to fetch 100 records, then another 100, I am using datatables in my app, which fetches all my data and using the datatables for pagination.
Check mongo data key present or not
I have some product data where some product don't have key "images.cover". now when I try to print all data it show error Cannot read property 'cover' of undefined. So I try to make if images.cover key not present then just put var cover = ''; else images.cover value. I'm using nodejs and mongodb
From the error message: Cannot read property 'cover' of undefined you can narrow down the error source on the trouble product document to any of the three attributes: the document doesn't have images field (hence the undefined object), the images field may be null, and the covers key may not be present as well. Let's consider a minimum test case where a sample collection has documents with the above three + one with the images.cover key set: db.product.insert([ /* 0 */ { "_id" : 1, "image" : { "cover" : "test1", "url" : "url1" } }, /* 1 */ { "_id" : 2, "image" : { "url" : "url2" } }, /* 2 */ { "_id" : 3 }, /* 3 */ { "_id" : 4, "image" : { "cover" : null, "url" : "url4" } } ]); In mongo shell you can check to see if a key is present or not either by using native JavaScript methods or using mongodb's $exists operator. For the former, you could try: var cover = "", covers = []; db.product.find().forEach(function (doc){ var cover = ""; if ((doc.image !== undefined) && (typeof(doc.image.cover) !== "undefined") && (doc.image.cover !== undefined)){ cover = doc["image"].cover; } covers.push(cover); }); print(covers); /* will print to mongo shell: { "0" : "test1", "1" : "", "2" : "", "3" : null } */ Using $exists operator with its value set to true, this searches for documents that contain the field, including documents where the field value is null. So using this route is probably not going to work in your example since you would like to assign the covers variable for unmatched documents as well: var cover = "", covers = []; db.product.find({ "image.cover": {$exists : true} }).forEach( function(doc) { covers.push(doc["image"].cover); }); print(covers); /* this will print to mongo shell: { "0" : "test1", "1" : null } */
Mongoose: find mixed schema type documents with multiple entries
My data model looks roughly like this: data = { ... parameters: [{type:Schema.Types.mixed}], ... } If i now insert a document into the database, doc = { ... parameters:[{"foo":"bar"}], ... } i can query it via the "parameters" key: db.dataset.find({"parameters":[{"foo":"bar"}]},function(doc){ ... }) and get back the expected document. However if "parameters" contains more than one key, for example doc = { ... parameters:[{"foo":"bar","ding":"dong"}] ... } i cant find it anymore. Why?
It is because the query cannot match any documents where the array field parameters has the exact array object as its value [{"foo": "bar", "ding": "dong"}]. To demonstrate this, let's insert a couple of sample documents in a collection: /* 0 */ { "_id" : ObjectId("551d777fcfd33f4e2a61e48f"), "parameters" : [ { "foo" : "bar" } ] } /* 1 */ { "_id" : ObjectId("551d777fcfd33f4e2a61e490"), "parameters" : [ { "foo" : "bar", "ding" : "dong" } ] } Querying this collection for parameters array with this object array [{"foo":"bar"}] will bring the document with "_id" : ObjectId("551d777fcfd33f4e2a61e48f"). However, if you change your query object to use $elemMatch then it will bring both documents: db.collection.find({"parameters": { "$elemMatch": { "foo": "bar" } }});
Remove duplicate array objects mongodb
I have an array and it contains duplicate values in BOTH the ID's, is there a way to remove one of the duplicate array item? userName: "abc", _id: 10239201141, rounds: [{ "roundId": "foo", "money": "123 },// Keep one of these {// Keep one of these "roundId": "foo", "money": "123 }, { "roundId": "foo", "money": "321 // Not a duplicate. }] I'd like to remove one of the first two, and keep the third because the id and money are not duplicated in the array. Thank you in advance! Edit I found: db.users.ensureIndex({'rounds.roundId':1, 'rounds.money':1}, {unique:true, dropDups:true}) This doesn't help me. Can someone help me? I spent hours trying to figure this out. The thing is, I ran my node.js website on two machines so it was pushing the same data twice. Knowing this, the duplicate data should be 1 index away. I made a simple for loop that can detect if there is duplicate data in my situation, how could I implement this with mongodb so it removes an array object AT that array index? for (var i in data){ var tempRounds = data[i]['rounds']; for (var ii in data[i]['rounds']){ var currentArrayItem = data[i]['rounds'][ii - 1]; if (tempRounds[ii - 1]) { if (currentArrayItem.roundId == tempRounds[ii - 1].roundId && currentArrayItem.money == tempRounds[ii - 1].money) { console.log("Found a match"); } } } }
Use an aggregation framework to compute a deduplicated version of each document: db.test.aggregate([ { "$unwind" : "$stats" }, { "$group" : { "_id" : "$_id", "stats" : { "$addToSet" : "$stats" } } }, // use $first to add in other document fields here { "$out" : "some_other_collection_name" } ]) Use $out to put the results in another collection, since aggregation cannot update documents. You can use db.collection.renameCollection with dropTarget to replace the old collection with the new deduplicated one. Be sure you're doing the right thing before you scrap the old data, though. Warnings: 1: This does not preserve the order of elements in the stats array. If you need to preserve order, you will have retrieve each document from the database, manually deduplicate the array client-side, then update the document in the database. 2: The following two objects won't be considered duplicates of each other: { "id" : "foo", "price" : 123 } { "price" : 123, "id" : foo" } If you think you have mixed key orders, use a $project to enforce a key order between the $unwind stage and the $group stage: { "$project" : { "stats" : { "id_" : "$stats.id", "price_" : "$stats.price" } } } Make sure to change id -> id_ and price -> price_ in the rest of the pipeline and rename them back to id and price at the end, or rename them in another $project after the swap. I discovered that, if you do not give different names to the fields in the project, it doesn't reorder them, even though key order is meaningful in an object in MongoDB: > db.test.drop() > db.test.insert({ "a" : { "x" : 1, "y" : 2 } }) > db.test.aggregate([ { "$project" : { "_id" : 0, "a" : { "y" : "$a.y", "x" : "$a.x" } } } ]) { "a" : { "x" : 1, "y" : 2 } } > db.test.aggregate([ { "$project" : { "_id" : 0, "a" : { "y_" : "$a.y", "x_" : "$a.x" } } } ]) { "a" : { "y_" : 2, "x_" : 1 } } Since the key order is meaningful, I'd consider this a bug, but it's easy to work around.
selecting an nested attribute to update in mongo with node driver
In the following code using the Node Js Driver for MongoDB, the console log in the callback would log the number of drivers for a particular vehicle. My problem is with trying to increment the number of drivers by one with this code $inc:{vehicles[vehicle_number].drivers:1} It's giving me unexpected token errors with the [ and also I'm not even sure if by starting the selector with vehicles it would be acting on the family that's been queried. Can you explain how I might change the code to make the increment not break the function. families.findAndModify({_id:family_id}, [], {$inc:{vehicles[vehicle_number].drivers:1}} , function(err, doc) { console.log(doc.vehicles[vehicle_number].drivers) }) Update the vehicles key contains an array of vehicles. In the code above, vehicle_number represents the index in the array. I'm trying to increment the number of drivers in the findAndModify above. { "_id" : ObjectId("5143ddf5bcf1bf4ab37da054"), "name" : "Jones", "vehicles" : [ { "make" : "Ford", "year" : "2001", "color" : "blue", "registration" : "xdklde", "drivers" : 3 }, { "make" : "Dodge", "year" : "1992", "color" : "green", "registration" : "klrv7z", "drivers" : 2 },
var selector = {}; selector['vehicles.' + vehicle_number + '.drivers'] = 1; then you use it in your query: families.update({'_id':family_id}, {'$inc':selector} , function(err, doc) { console.log(doc); }); This should work.