I am new to mongoDb, i am trying to update the fields for Each Records for around 10 to 15k records. When i am trying to update the records the query blocks whole database, the Running query will allow me to do any read or write operations till the Query Execution completes, is there anyway to improve the performance for this kind of Queries.
Here is My Code:
var ExisitingData=[{"isActive" : true,
"barcode" : "8908001921015",
"mrp" : 2000,
},
{"isActive" : true,
"barcode" : "7808001921019",
"mrp" : 1000,
}
....15k]
var updatedRsult=[];
async.forEach(ExisistingData, function (item, innerCallback) {
exports.populateData(item, function (err, populatedResult) {
if (err) {
innerCallback(err);
}
if(populatedResult==true)
totalRecordsUpdated++;
else
totalIgnoredRecords++;
innerCallback();
});
}, function (err) {
console.log("FinalDone");
var h1={}
h1['totalRecordsUpdated'] = totalRecordsUpdated;
h1['totalIgnoredRecords'] = totalIgnoredRecords;
updatedResult.push(h1);
updateCheck(null, updatedResult);
});
exports.populateData=function(item, mainCallback) {
var updated = false;
async.parallel([
function (callback1) {
Test.update({
$and: [
{'barcode': item['barcode']},
{'mrp': {$lt: parseInt(item['mrp'])}}
]
}, {$set: {'mrp': parseInt(item['mrp'])}}, function (err, result) {
if (err) {
console.log(err);
callback1();
}
else {
if (result['nModified'] == 1) {
console.log("Its Updated");
console.log(item);
updated=true;
callback1()
}
else {
callback1()
}
}
});
}
], function done(err) {
mainCallback(null,updated);
});
};
Related
I have a requirement where i need to query on a table then loop through the result and query another table inside a loop .. But i tried using synchronize node js module . But sometime api ends with timeout error with this method. Now i am trying my code in below way but no luck.
var notification = {
TableName: "Tablename",
KeyConditionExpression:"#userId =:userid and #datetime <=:datetime",
ScanIndexForward: false,
ExpressionAttributeNames: {
"#userId": "userId",
"#datetime":"datetime"
},
ExpressionAttributeValues: {
":userid" :userId,
":datetime" :datetime
}
};
docClient.query(notification, function(err, result) {
if (err) {
context.succeed({success: false, message : JSON.stringify(err, null, 2),method:"notification",type:type});
}else{
result = result.Items;
if (result.length>0) {
for(var i=0;i<result.length;i++){
result[k]['CafeDetail']=getDetail(result[k].CafeID,function (data) {
});
if (k ==(result.length -1)) {
context.succeed({success: true,data:result, message : "Notification list",method:"notification",type:type});
}
}
}
}
});
function getDetail(CafeID,callback) {
var param = {
TableName: "Tablename",
ProjectionExpression:"CafeName,Cafe_MainImage",
KeyConditionExpression:"CafeID =:cafeID",
ExpressionAttributeValues: {
":cafeID" :CafeID
}
};
docClient.query(param, function(err, CafeDetail) {
if (err) {
console.log (err)
callback(err);
} else {
callback(CafeDetail.Items);
console.log(CafeDetail.Items);
}
});
}
On output this variable (result[k]['CafeDetail']) is always coming as undefined . However console.log inside getDetail function print the result.
I am newbie to aws lambda with node js .. Any help will be appreciated .
Thanks in Advance
I just got solution to my question . So i am posting it,may be it will help someone else facing same issue
var notification = {
TableName: "Tablename",
KeyConditionExpression:"#userId =:userid",
Limit: 12,
ScanIndexForward: false,
ExpressionAttributeNames: {
"#userId": "userId",
},
ExpressionAttributeValues: {
":userid" :userId
}
};
docClient.query(notification, function(err, result) {
if (err) {
context.succeed({success: false, message : JSON.stringify(err, null, 2),method:"notification",type:type});
}else{
var ret =[];
var results =result.Items;
if (results.length>0) {
results.forEach(function(result, i) {
console.log(i);
var param = {
TableName: "Tablename",
ProjectionExpression:"CafeName,Cafe_MainImage",
KeyConditionExpression:"CafeID =:cafeID",
ExpressionAttributeValues: {
":cafeID" :result.CafeID
}
};
docClient.query(param, function(err, CafeDetail) {
if (err) {
context.succeed({success: false, message : JSON.stringify(err, null, 2),method:"notification",type:type});
} else {
console.log(CafeDetail.Items);
CafeDetail =CafeDetail.Items;
result['CafeDetail']=CafeDetail
console.log(result);
if (i === results.length -1) { // last iteration
context.succeed({success: true,data:results, message : "Notification list",method:"notification",type:type});
}
}
});
});
}else{
context.succeed({success: true,data:result, message : "Notification list",method:"notification",type:type});
}
}
});
I need to perform complicated aggregation on Mongo DB collection. I also need to go through the results twice. After first pass, I execute rewind() on a cursor. Then I try to perform second pass. This is where I get the error. What is strange that if I replace each(), with couple of next(). Then everything works as expected.
Is it a bug in each(), which I should submit to MongoDB bugtracker? Or it is some sort of my error?
Much simplified sample, but still reproduce the error:
var MongoClient = require('mongodb').MongoClient;
var ObjectId = require('mongodb').ObjectId;
MongoClient.connect('mongodb://localhost:27017/test', function(err, db) {
// Create a collection
let collection = db.collection('aggregation_each_example');
// collection.drop();
// Some docs for insertion
function createData(callback) {
let docs = [
{
"oid" : new ObjectId("59883f2e7d8c6325200b81e4"),
"tod" : new Date("2017-08-07T10:21:34.877Z")
},
{
"veryImportanta" : "newOne",
"oid" : new ObjectId("59883f2e7d8c6325200b81e4")
}
];
// Insert the docs
collection.insertMany(docs, callback);
}
function getData(callback) {
return function(err) {
if (err) {
console.error(err);
} else {
let cursor = collection.aggregate([
{
"$match": {
"oid": new ObjectId("59883f2e7d8c6325200b81e4"),
"tod": {
"$exists": 0
}
}
}
], {cursor: {batchSize: 1}});
let count = 0;
cursor.each(function(err, doc) {
if(doc) {
console.log(doc);
count++;
} else {
console.log(cursor.isClosed());
cursor.rewind();
console.log(cursor.isClosed());
callback(count, cursor);
}
});
}
}
}
createData(getData(function(count, cursor) {
console.log("Count: "+ count);
console.log("Cursor is closed: " + cursor.isClosed());
cursor.next(function(err, doc) {
if (err) console.log(err);
else console.log(doc);
// db.dropDatabase();
db.close();
});
}));
});
Output:
{ _id: 598851ad48a1841c18b50bcf,
veryImportanta: 'newOne',
oid: 59883f2e7d8c6325200b81e4 }
true
false
Count: 1
Cursor is closed: false
{ MongoError: Cursor is closed
at Function.MongoError.create (error.js:31:11)
at nextObject (node_modules\mongodb\lib\cursor.js:669:112)
at AggregationCursor.Cursor.next (node_modules\mongodb\lib\cursor.js:269:12)
at error.js:61:12
at error.js:50:13
at handleCallback (node_modules\mongodb\lib\utils.js:120:56)
at node_modules\mongodb\lib\cursor.js:748:16
at handleCallback (node_modules\mongodb\lib\utils.js:120:56)
at node_modules\mongodb\lib\cursor.js:682:5
at handleCallback (node_modules\mongodb-core\lib\cursor.js:171:5) name: 'MongoError', message: 'Cursor is closed', driver: true }
Environment:
nodejs: v6.6.0
mongodb: 2.2.30
os: windows 10
mongodb engine: 3.4
I have a record in such a way like below
1)
{
"name":"A",
"parents":[ADMIN],
"childrens":[B,C,D]
}
2)
{
"name":"B",
"parents":[A],
"childrens":[D,K,L]
}
3)
{
"name":"C",
"parents":[B],
"childrens":[K,L]
}
4)
{
"name":"D",
"parents":[C],
"childrens":[L]
}
Here if a add a new record 'E' and will make 'C' as parent ,then the logic is the record 'E' should be added as child to the parent of 'C'i.e for 'B' and at the same time 'E' should also be added to parent of 'B'.This logic is quite confusing when i start to write code and complex too but i achieved up to some extent that i can make 'E' as a child of 'C' and also the parent of 'C' but not further.
My Code:
function (callback) {
var item = {'employee' : employee.manager };
Employeehierarchy.find(item).exec(function (err, employeeparent) {
if (employeeparent && employeeparent.length > 0) {
Employeehierarchy.update(
{ _id: employeeparent[0]._id},
{"$push": { "childrens": employee._id } }
).exec(function (err, managerparent) {
});
callback(err,employeeparent);
} else{
callback(err,employeeparent);
}
}
});
},
//Finding the parent record of the manager in hierarchy
function (employeeparent, callback) {
var item = {'employee' : employeeparent[0].parents };
Employeehierarchy.find(item).exec(function (err, managerparent) {
if (err) {
return res.status(400).send({ message: errorHandler.getErrorMessage(err) });
} else {
if (managerparent && managerparent.length > 0) {console.log(managerparent+'managerparent')
Employeehierarchy.update(
{ _id: managerparent[0]._id},
{"$push": { "childrens": employee._id } }
).exec(function (err, managerparent) {
});
callback(err,managerparent);
} else{
callback(err,managerparent);
}
}
});
}else {
callback();
}
The following code updates an array of specified objects or insert if the object is not in the database. It works fine but I'm new in mongodb and I'm not sure if this is a safe or fast way to do it.
Maybe I should use updateMany? I tried to use it but I couldn't get the same behaviour as the following code.
mongodb.connect(mongo_url, function(err, db) {
if(err) console.log(err)
else {
var mongo_products_collection = db.collection("products")
mongoUpsert(mongo_products_collection, data_products, function() {
db.close()
})
}
})
function mongoUpsert(collection, data_array, cb) {
var data_length = data_array.length
for (var i=0; i < data_length; i++) {
collection.update(
{product_id: data_array[i].product_id},
data_array[i],
{upsert: true}
)
}
return cb(false)
}
Using the bulkWrite API to carry out the updates handles this better
mongodb.connect(mongo_url, function(err, db) {
if(err) console.log(err)
else {
var mongo_products_collection = db.collection("products")
mongoUpsert(mongo_products_collection, data_products, function() {
db.close()
})
}
})
function mongoUpsert(collection, data_array, cb) {
var bulkUpdateOps = data_array.map(function(data) {
return {
"updateOne": {
"filter": {
"product_id": data.product_id,
"post_modified": { "$ne": data.post_modified }
},
"update": { "$set": data },
"upsert": true
}
};
});
collection.bulkWrite(bulkUpdateOps, function(err, r) {
// do something with result
});
return cb(false);
}
If you're dealing with larger arrays i.e. > 1000 then consider sending the writes to the server in batches of 500 which gives you a better performance as you are not sending every request to the server, just once in every 500 requests.
For bulk operations MongoDB imposes a default internal limit of 1000 operations per batch and so the choice of 500 documents is good in the sense that you have some control over the batch size rather than let MongoDB impose the default, i.e. for larger operations in the magnitude of > 1000 documents. So for the above case in the first approach one could just write all the array at once as this is small but the 500 choice is for larger arrays.
var ops = [],
counter = 0;
data_array.forEach(function(data) {
ops.push({
"updateOne": {
"filter": {
"product_id": data.product_id,
"post_modified": { "$ne": data.post_modified }
},
"update": { "$set": data },
"upsert": true
}
});
counter++;
if (counter % 500 == 0) {
collection.bulkWrite(ops, function(err, r) {
// do something with result
});
ops = [];
}
})
if (counter % 500 != 0) {
collection.bulkWrite(ops, function(err, r) {
// do something with result
}
}
I want to have exactly auto-increment field like relational or objective databases, so i need an integer _id field with automatically set field value, value should be one more last record _id value like this:
data:
{_id:1,name"foo"}
{_id:2,name"bar"}
remove last record:
{_id:1,name"foo"}
add new record:
{_id:1,name"foo"}
{_id:3,name"newbar"}
I added a function to my datastore and calculate maximum of _id and plus 1 max(_id)+1 and set as field value, but there is problem here:
When we use auto-increment field in relational databases, it works like i said and after you remove last record it reserved a deleted record number and new inserted records continue increment but in my way its says the _id of removed record for new record.
My code is:
var Datastore = require('nedb'),
localDb = new Datastore({
filename: __dirname + '/dbFilePath.db',
autoload: true
});
localDb.getMax = function(fieldName, onFind){
db.find({}).sort({_id:-1}).limit(1).exec(function (err, docs) {onFind && onFind(err, docs['_id']);});
return localDb;
}
localDb.insertAutoId = function(data, onAdd){
var newIndex = 0;
localDb.getMax(function (err, maxValue) {
newIndex = maxValue+1;
if(!data["_id"])
data["_id"] = newIndex;
localDb.insert(data, function (err, newDoc) {
onAdd && onAdd(err, newDoc);
});
});
return localDb;
}
An improved answer for nedb would be:
db.getAutoincrementId = function (cb) {
this.update(
{ _id: '__autoid__' },
{ $inc: { seq: 1 } },
{ upsert: true, returnUpdatedDocs: true },
function (err, affected, autoid) {
cb && cb(err, autoid.seq);
}
);
return this;
};
Which is equivalent to the mongodb way:
db.getAutoincrementId = function (cb) {
this.findAndModify({
query: { _id: '__autoid__' },
update: { $inc: { seq: 1 } },
new: true
}
function (err, autoid) {
cb && cb(err, autoid.seq);
}
);
return this;
};
You can store the last value of the index in the database. Something like this:
var Datastore = require('nedb');
var db = new Datastore({
filename: __dirname + '/dbFilePath.db',
autoload: true
});
// Initialize the initial index value
// (if it already exists in the database, it is not overwritten)
db.insert({_id: '__autoid__', value: -1});
db.getAutoId = function(onFind) {
db.findOne( { _id: '__autoid__' }, function(err, doc) {
if (err) {
onFind && onFind(err)
} else {
// Update and returns the index value
db.update({ _id: '__autoid__'}, { $set: {value: ++doc.value} }, {},
function(err, count) {
onFind && onFind(err, doc.value);
});
}
});
return db;
}
I do not know if it will be useful for you anymore I use a database to store the next ids, inspired in the mysql system. Who always reserves the next id.
So I created a function that verifies if there is an id to the db, if it does not, it add with the value "1", and when it updates it looks for and if it exists and it performs the sequence.
This gave me full control over my ids.
The schema would be:
{
name: nameDb,
nextId: itemID
}
If you want you can create functions for updating documents, versioning, etc.
example:
db.autoincrement = new Datastore({filename: 'data/autoincrement.db', autoload: true});
function getUniqueId(nameDb, cb) {
db.autoincrement.findOne({name: nameDb}, function (err, doc) {
if (err) {
throw err;
} else {
if (doc) {
const itemID = doc.nextId + 1;
db.autoincrement.update({name: nameDb}, {
name: nameDb,
nextId: itemID
}, {}, function (err, numReplaced) {
db.autoincrement.persistence.compactDatafile();
if (err) {
throw err;
} else {
// console.log(numReplaced);
}
cb(doc.nextId);
});
} else {
const data = {
name: nameDb,
nextId: 2
};
db.autoincrement.insert(data, function (err, newDoc) {
if (err) {
throw err;
} else {
// console.log(newDoc);
}
cb(1);
});
}
}
});
}
insert new document example:
function insert(req, cb) {
getUniqueId("testdb", function (uniqueId) {
data.itemId = uniqueId;
db.testdb.insert(data, function (err, newDoc) {
if (err) {
cb({error: '1', message: 'error#2'});
throw err;
}
cb({error: '0', message: 'Item add'});
});
});
}