i am using NodeJS to iterate over a large product collection. MongoDb native driver is used. Everything is fine but i want to write a footer line to a file after all documents are processed. How can i accomplish this?
var MongoClient = require('mongodb').MongoClient
var assert = require('assert');
var filename = '/tmp/' + feed.outputFilename;
fs.writeFileSync(filename, feed.header, feed.encoding, function(err) {
if(err) throw err;
});
var url = process.env.DB_HOST;
MongoClient.connect(url, function(err, db) {
assert.equal(null, err);
var collection = db.collection('products');
var cursor = collection.find({ "catalog": "electronics"}, { "batchSize": 1,fields: {} }).forEach(function(product) {
if(product != null) {
var child = workers[Math.floor(Math.random()*workers.length)];
var data = {};
data.product = product;
data.feed = feed;
child.send(data);
}
}, function(err) {
assert.equal(null, err);
db.close();
});
// This doens't work for me (Error: Connot read property 'on' of undefined)
/*cursor.on('end', function() {
fs.appendFile('/tmp/' + filename, feed.footer, function(err) {
if(err) throw err;
});
db.close();
})*/
});
Possibly what could be happening here is that the value returned from your call to forEach is being assigned into the cursor var.
Try assigning the value returned from the find into the cursor var and calling your forEach as cursor.forEach and cursor.on later.
Related
I am using the following to insert into MongoDB.
var tagData = JSON.parse(data);
var allTags = tagData.tags;
for (var j = 0; j < allTags.length; j++) {
var p = allTags[j].tagId.toString();
for (var k = 0; k < loggerParams.length; k++) {
var q = Object.keys(loggerParams[k]).toString();
if (p === q) {
// Prepare raw data tag
var tagRawDoc = {};
// Simple key-value assignment here
// Document prepared; ready to insert into MongoDB
database.addDocument('tagraw', tagRawDoc, function (err) {
if (err) {
log.info(util.format('Error adding document to tagrawdatas. %s', err.message));
throw err;
} else {
// Prepare history tag
var historyTagDoc = {};
historyTagDoc.tagNameAlias = tagRawDoc.tagNameAlias;
// Simple key-value assignment here
// Document prepared; ready to insert into MongoDB
database.addDocument('taghistory', historyTagDoc, function (err) {
if (err) {
log.info(util.format('Error adding document to tagrawdatas. %s', err.message));
throw err;
}
});
}
});
// Match found; exit loop
break;
}
}
}
The loggerParms is a simple JSON document read from file else-where. It allows for look-up in this code to build the document to be inserted. There will be 12 values in the allTags array. These 12 values are inserted successfully into the tagraw collection. However, in taghistory collection, the values from the last (or most recent) entry made into tagraw collection is repeated 12 times. Why does this happen?
The database.addDocument is shown below. It is a part of this article I am trying to replicate.
var MongoClient = require('mongodb').MongoClient;
var assert = require('assert');
var logger = require('../../util/logger');
var util = require('util');
function DB() {
this.db = "empty";
this.log = logger().getLogger('mongoMange-DB');
}
DB.prototype.connect = function(uri, callback) {
this.log.info(util.format('About to connect to DB'));
if (this.db != "empty") {
callback();
this.log.info('Already connected to database.');
} else {
var _this = this;
MongoClient.connect(uri, function(err, database) {
if (err) {
_this.log.info(util.format('Error connecting to DB: %s', err.message));
callback(err);
} else {
_this.db = database;
_this.log.info(util.format('Connected to database.'));
callback();
}
})
}
}
DB.prototype.close = function(callback) {
log.info('Closing database');
this.db.close();
this.log.info('Closed database');
callback();
}
DB.prototype.addDocument = function(coll, doc, callback) {
var collection = this.db.collection(coll);
var _this = this;
collection.insertOne(doc, function(err, result) {
if (err) {
_this.log.info(util.format('Error inserting document: %s', err.message));
callback(err.message);
} else {
_this.log.info(util.format('Inserted document into %s collection.', coll));
callback();
}
});
};
module.exports = DB;
That's because you are mixing a/multiple synchronous for and asynchronous code with database.addDocument which cause issues with function scope in nodejs.
A simple example of this kind of thing:
for(var i = 0; i < 10; i++){
setTimeout(() => console.log(i), 0);
}
You should use a package like async to handle flow control when iterating arrays/object asynchronously.
Simple example of your code refactored to use async:
var async = require('async');
var tagData = JSON.parse(data);
var allTags = tagData.tags;
async.each(allTags, function(tag, done){
var p = tag.tagId.toString();
var loggerParam = loggerParams.find(function(loggerParam){
var q = Object.keys(loggerParam).toString();
return p === q;
});
var tagRawDoc = {};
// Simple key-value assignment here
// Document prepared; ready to insert into MongoDB
return database.addDocument('tagraw', tagRawDoc, function (err){
if (err) return done(err);
// Prepare history tag
var historyTagDoc = {};
historyTagDoc.tagNameAlias = tagRawDoc.tagNameAlias;
// Simple key-value assignment here
// Document prepared; ready to insert into MongoDB
return database.addDocument('taghistory', historyTagDoc, done);
});
}, (err) => {
if(err) throw err;
console.log('All done');
});
I need to run several queries against MongoDB in parallel using the node.js driver.
Currently I am using a counter which gets decreased any time a query gets completed. When the counter reached 0 it means that all queries have completed and then I close the db connection.
In a simple case with 2 queries running in parallel the code is the following
var mongodb = require('mongodb');
var MongoClient = require('mongodb').MongoClient;
var db;
MongoClient.connect("mongodb://localhost:27017/company", function(err, database) {
if(err) throw err;
db = database;
let collection = "resumes";
let numberOfParallelQueries = 2;
let result = [];
let finalCallback = (err, resp) => {
if (err) throw(err);
numberOfParallelQueries = numberOfParallelQueries -1;
result.push(resp);
if (numberOfParallelQueries == 0) {
console.log(result);
db.close()
};
}
db.collection(collection).find({"jobs": {$elemMatch: {"company": "CNA", position: "director"}}}).toArray(finalCallback);
db.collection(collection).find({$and: [{"jobs.company": "CNA"}, {"jobs.position": "director"}]}).toArray(finalCallback);
});
My question is whether there any more elegant solution. I am thinking about something in the line of forkJoin() method of Observable.
Thanks in advance
That's what Promises are for:
var mongodb = require('mongodb');
var MongoClient = require('mongodb').MongoClient;
var db;
MongoClient.connect("mongodb://localhost:27017/company", function(err, database) {
if(err) throw err;
db = database;
let collection = "resumes";
Promise.all([
queryPromise(collection, {"jobs": {$elemMatch: {"company": "CNA", position: "director"}}}),
queryPromise(collection, {$and: [{"jobs.company": "CNA"}, {"jobs.position": "director"}]})
]).then(function(result) {
// result is an array of responses here
db.close();
}).catch(function(err) {
console.log(err);
db.close();
});
function queryPromise(collection, query) {
return new Promise(function(resolve, reject) {
db.collection(collection).find(query).toArray(function(err, resp) {
if (err) {
reject(err);
} else {
resolve(resp);
}
});
})
}
});
using async/await is more simpler
var mongodb = require('mongodb');
var MongoClient = require('mongodb').MongoClient;
var db;
MongoClient.connect("mongodb://localhost:27017/company", function(err, database) {
if(err) throw err;
db = database;
let collection = "resumes";
let queries = [];
queries.push(async()=>await db.collection(collection).find({"jobs": {$elemMatch: {"company": "CNA", position: "director"}}}).toArray());
queries.push(async()=>await db.collection(collection).find({$and: [{"jobs.company": "CNA"}, {"jobs.position": "director"}]}).toArray());
Promise.all(
queries
).then(function(result) {
// result is an array of arrays of queries responses here
db.close();
}).catch(function(err) {
console.log(err);
db.close();
});
});
I'm having a weird issue with MongoDB. My database collection is closing, which I suppose is what it's supposed to do (I'm following along from the mongo boilerplate) BUT I see no reason why the docs would be null value. I've checked this every way I can think of, but I don't quite understand the cursor object.
Console.logging it seems to give me a bunch of native mongo properties ( which look like functions ie each, toArray, etc) so it seems right, but it's not a regular object with a data field that I can see.
After it hits that if block with the if(docs==null), the connection gets closed and it will not execute the each block in the else if.
Ideally if there was a way to help troubleshoot or figure out how to make this execute that would be great.
More background:
in the mongo shell I can ask for
use weather // no issues
and get the results of the data object which is 3000 records with an empty find();
var MongoClient = require('mongodb').MongoClient;
MongoClient.connect('mongodb://localhost:27017/weather', function(err, db) {
if(err){
console.log("line 7" + err);
}
var query = {};
var projection = { 'State' : 1, 'Temperature' : 1 };
var cursor = db.collection('data').find(query, projection);
console.log("cursor" + cursor); // [object Object]
var state = '';
var operator = {'$set' : {'month_high' : true } };
cursor.each(function(err, doc) {
if (err) throw err;
if (doc == null) {
console.log("docs have value:" + doc); //NULL VALUE so will close on line 23
return db.close();
} else if (doc.State !== state) {
// first record for each state is the high temp one
state = doc.State;
db.collection('data').update( {'_id':doc._id}, operator, function(err, updated) {
if (err) console.log(err);
// return db.close(); ?
});
}
});
});
{ [MongoError: Connection Closed By Application] name: 'MongoError' } //doh
{ [MongoError: Connection Closed By Application] name: 'MongoError' } //doh
{ [MongoError: Connection Closed By Application] name: 'MongoError' } //doh
Figuring out when to call db.close() can be a bit messy. Here it is rewritten with find().toArray() plus some logic to test when you're updating the last matched doc. This works for me.
var MongoClient = require('mongodb').MongoClient;
var assert = require('assert');
var Q = require('q');
MongoClient.connect('mongodb://localhost:27017/weather', function(err, db) {
assert.equal(null, err);
var query = {};
var projection = { 'State' : 1, 'Temperature' : 1 };
var state = '';
var operator = {'$set' : {'month_high' : true } };
var promises = [];
db.collection('data').find(query, projection).toArray(function(err, docs) {
assert.equal(null, err);
docs.forEach(function(doc, index, arr) {
var deferred = Q.defer();
promises.push(deferred.promise);
if (null !== doc && state !== doc.State) {
db.collection('data').update( {'_id':doc._id}, operator, function(err, updated) {
assert.equal(null, err);
console.log("Updated "+updated+" documents.");
deferred.resolve();
});
} else {
deferred.resolve();
}
});
Q.all(promises).done(function() {
console.log("closing");
db.close()
});
});
});
EDIT: Added Q since db.close() was still called prematurely in some cases.
I'm trying to update the states with the highest weather, adding a field, but when i try to do the loop, the connection gets closed! It only do the first update, but the next it says
MongoError: Connection Closed By Application
Why? I'm not closing the connection.
var MongoClient = require('mongodb').MongoClient;
MongoClient.connect('mongodb://localhost:27017/weather', function(err, db) {
var state = "";
var query = {};
if(err) throw err;
var grades = db.collection('data');
var options = { 'sort' : [['State', 1], ['Temperature', -1]] };
var cursor = grades.find({}, {}, options);
cursor.each(function(err, doc) {
if(err) throw err;
if(doc == null) {
return db.close();
}
if(state != doc.State){
state = doc.State;
console.dir(state);
query['_id'] = doc['_id'];
grades.update(query, {$set: {"month_high": true}}, function(err, updated){
if(err) throw err;
console.dir("Se han modificado " + updated + " Elementos!");
// return db.close(); I comment this line and this stills closing!!!!
});
}
});
});
See this post for the solution Removing documents from a mongodb
As someone wrote: "You should not use throw for callback, throw is good for function stack"
I have a mongoDB collection which I want to add a field at random locations using $set, at least I am pretty sure its a $set. Correct me if I am wrong.
I am including code. Around the middle I include remarks of what I am trying to do:
var MongoClient = require('mongodb').MongoClient;
MongoClient.connect('mongodb://localhost:27017/weather', function(err, db) {
// Also, what is the best way to handle err in this code?
//if(err) throw err;
var query = { };
var sortorder = {"State":1, "Temperature":-1}
var xx = null;
//var cursor = db.collection('data').find();
var cursor = db.collection('data').find().sort(sortorder);
cursor.each(function(err, doc) {
//if(err) throw err;
if(doc == null) {
return db.close();
}
if (xx == doc.State){
}else{
console.dir("New state -----------" + doc.State);
console.dir("Tempurature -----------" + doc.Temperature);
// !!!!!!!!!!!!!!!!!!!!!!!!!! this is the problem area.
//--- this is the part I am trying to figure out...
update_routine = $set:{"month_high---test001":true};
doc.update = update_routine;
// How do I do a $set operation on a mongoDB cursor. which I have here.
xx = doc.State;
// add the field
//doc.update();
}
if(doc == null) {
return db.close();
}
//app.error(function(err, req, res, next){
// console.error(err);
// res.send('Fail Whale, yo.');
//});
//console.dir(doc.State + " is a state!");
});
});
~~
your code looks a little chaotic, but here is what you can do.
also take a look at mongodb documentation for $set: http://docs.mongodb.org/manual/reference/operator/update/set/
var cursor = db.collection('data').find().sort(sortorder);
cursor.each(function(err, doc) {
if(err) throw err;
if(doc == null) {
return db.close();
}
// until here you code makes sense, you have a cursor,
// you checked for errors and have the current document
// now you want to update a record, you can do it like this:
var myquery = {};
myquery['_id'] = doc['_id'];
// you were missing the surrounding {}
var myupdate = { $set: { field1: "whatever value", field2: 500 } };
// obviously you want to replace field1 and field2 with your actual field names
// instead of creating a new update object and using $set
// you could also just modify the 'doc' variable and pass it again
// in the update function below instead of myupdate
db.collection('data').update(myquery, myupdate, function (err, updatedDoc) {
if (err) throw err;
console.log("successfully updated document", updatedDoc);
});
});