I am trying to write a simple function to grab the id of a specific instance based on matching criteria from mongodb using the official node package 'mongodb'.
My function works as I can console log the data but I am unable to return the data to use it as I intended to do as you can see.
const mongo = require('mongodb').MongoClient;
const url = 'mongodb://localhost:27017';
// Function for finding database id of device based on deviceKey, The database is written into
// the code under the const 'db' as is the collection.
async function fetchId(deviceKey) {
const client = await mongo.connect(url, { useNewUrlParser: true });
const db = client.db('telcos');
const collection = db.collection('device');
try {
await collection.find({"deviceKey": deviceKey}).toArray((err, response) => {
if (err) throw err;
console.log(response[0]._id); // << works logs _id
return response[0]._id; // << does nothing... ?
})
} finally {
client.close();
}
}
// # fetchId() USAGE EXAMPLE
//
// fetchId(112233); < include deviceKey to extract id
//
// returns database id of device with deviceKey 112233
// Run test on fetchId() to see if it works
fetchId("112233")
.then(function(id) {
console.dir(id); // << undefined
})
.catch(function(error) {
console.log(error);
});
Why does my test return undefined but my console.log() inside the function works?
It looks like you're combining callback code with async/await code in an odd way. Your function fetchId isn't returning anything at all, which is why you don't see id after fetching.
try {
const response = await collection.find(...).toArray()
return response[0]._id
}...
If we weren't able to await collection.find(...).toArray() and needed to manually convert this from using callbacks to promises, we'd have to do something like:
function fetchId (id) {
// this function returns a promise
return new Promise((resolve, reject) => {
...
collection.find(...).toArray((err, response) => {
// within the callback, returning values doesn't do anything
if (err) return reject(err);
return resolve(response[0]._id);
})
});
}
You are returning a value but handled like a promise is being returned.Please try this code.I had not tested it.
const mongo = require('mongodb').MongoClient;
const url = 'mongodb://localhost:27017';
// Function for finding database id of device based on deviceKey, The database is written into
// the code under the const 'db' as is the collection.
async function fetchId(deviceKey) {
return new Promise((resolve,reject)=>{
const client = await mongo.connect(url, { useNewUrlParser: true });
const db = client.db('telcos');
const collection = db.collection('device');
try {
await collection.find({"deviceKey": deviceKey}).toArray((err, response) => {
if (err) throw err;
console.log(response[0]._id); // << works logs _id
return resolve(response[0]._id); // << does nothing... ?
})
}
catch(error){
return reject(error);
}
finally {
client.close();
}
});
}
// # fetchId() USAGE EXAMPLE
//
// fetchId(112233); < include deviceKey to extract id
//
// returns database id of device with deviceKey 112233
// Run test on fetchId() to see if it works
fetchId("112233")
.then(function(id) {
console.dir(id); // << undefined
})
.catch(function(error) {
console.log(error);
});
This question already has answers here:
batchSize field name ignored in Field Projection
(2 answers)
Closed 4 years ago.
I have an express app that's supposed to query my MongoDB database, then return only certain fields of the result. My code for this currently looks like this:
app.get('/stored', (req, res) => {
let pollId = req.query.id;
let pollCursor = db.collection(collName).find({"_id": ObjectId(pollId)}, {"_id": false, "pollName": false}).toArray((err, data) => {
if(err) return err;
let dataObj = data[0];
console.log(dataObj);
});
});
I expect to recieve a data object that contains all fields except the '_id' and 'pollName' fields. However, the object I get still contains both these fields, as well as all the other fields. What am I doing wrong?
The first you have to test like this:
app.get('/stored', (req, res) => {
let pollId = req.query.id;
res.send(pollId)
return;
});
if pollId match with pollId from client send, it's fine. You should create a query to find like this:
let query = {_id: pollId}
This is my code, hope help you
// https://tudiendanhngon.herokuapp.com/author/detail?id=5adc963c7b76157c65d3b1d9
app.get("/author/detail", function (req, res) {
var id = req.query.id;
var query = {_id: id};
db.collection(COLLECTION_AUTHORS).find(query).exec(function (err, author) {
if (err) {
res.send("Error")
} else {
res.json(author);
}
});
});
I know this has been asked before, but no one of the answers worked for me.
In my App, I have a users collection in MongoDb. These users collection have an array field named 'segActuacions' that can be modified within a form. This form sends this Object to router:
{
"segActuacions.0.date": "27/09/2016",
"segActuacions.0.body": "first item",
"segActuacions.1.date": "27/09/2016",
"segActuacions.1.body": "second item"
}
router has this line to go to controller:
router.delete('/seg_act_upd_EE/:id/actDel/:i',
sessionController.loginRequired,
userController.actuaDelete
);
where ':id' is User-id, and ':i' is the index of segActuacions
Controller has this code:
exports.actuaDelete = function (req, res) {
var userId = req.params.id;
var userI = req.params.i;
var usr = req.body;
delete usr['segActuacions.userI.date'];
delete usr['segActuacions.userI.body'];
models.User.findByIdAndUpdate(userId, usr, function (error, user){
if (error) res.json(error);
res.redirect('/list_EE');
});
}
I want to delete the fields 'i' of the Object (for instance: if i=0, i'd like to delete "segActuacions.0.date" and "segActuacions.0.body", ...but nothing happens.
If you are simply looking to delete the keys from the req.body object and subsequently update your collection with the remaining keys then follow this example:
exports.actuaDelete = function (req, res) {
var userId = req.params.id;
var userI = req.params.i;
var usr = req.body;
delete usr["segActuacions."+ userI +".date"];
delete usr["segActuacions."+ userI +".body"];
models.User.findByIdAndUpdate(userId, usr, function (error, user){
if (error) res.json(error);
res.redirect('/list_EE');
});
}
To remove the fields from the collection, the best way is to use $unset operator because delete removes the keys from an object and updating the collection with the updated object will not remove the actual fields from the collection.
In essence you use the dot notation to correctly identify the field to remove. For example, you would want to achieve the following mongo shell update operation:
db.users.update(
{ "_id" : userId },
{
"$unset": {
"segActuacions.1.date" : "",
"segActuacions.1.body" : ""
}
}
)
This will remove the fields from the element in position with index 1 in the segActuacions array.
Tying all this with your controller method, you can construct an update object that is similar to
{
"$unset": {
"segActuacions.1.date" : "",
"segActuacions.1.body" : ""
}
}
such as
exports.actuaDelete = function (req, res) {
var userId = req.params.id;
var userI = req.params.i;
var usr = {};
usr["segActuacions."+ userI +".date"] = "";
usr["segActuacions."+ userI +".body"] = "";
models.User.findByIdAndUpdate(userId, { "$unset": usr }, function (error, user){
if (error) res.json(error);
res.redirect('/list_EE');
});
}
Note your delete wouldn't work because you are using userI as a string and not using the variable value. Also update will just update the fields that are in the object.
But I think you should use $unset, like the following:
var updateObj = {};
updateObj['segActuacions'] = {};
updateObj['segActuacions'][userI] = {};
updateObj['segActuacions'][userI]['body'] = '';
updateObj['segActuacions'][userI]['date'] = '';
var update = { $unset: updateObj };
models.User.findByIdAndUpdate(userId, update, function (error, user){
if (error) res.json(error);
res.redirect('/list_EE');
});
(This code can be improved and wasn't tested)
I have a huge collection of documents in my DB and I'm wondering how can I run through all the documents and update them, each document with a different value.
The answer depends on the driver you're using. All MongoDB drivers I know have cursor.forEach() implemented one way or another.
Here are some examples:
node-mongodb-native
collection.find(query).forEach(function(doc) {
// handle
}, function(err) {
// done or error
});
mongojs
db.collection.find(query).forEach(function(err, doc) {
// handle
});
monk
collection.find(query, { stream: true })
.each(function(doc){
// handle doc
})
.error(function(err){
// handle error
})
.success(function(){
// final callback
});
mongoose
collection.find(query).stream()
.on('data', function(doc){
// handle doc
})
.on('error', function(err){
// handle error
})
.on('end', function(){
// final callback
});
Updating documents inside of .forEach callback
The only problem with updating documents inside of .forEach callback is that you have no idea when all documents are updated.
To solve this problem you should use some asynchronous control flow solution. Here are some options:
async
promises (when.js, bluebird)
Here is an example of using async, using its queue feature:
var q = async.queue(function (doc, callback) {
// code for your update
collection.update({
_id: doc._id
}, {
$set: {hi: 'there'}
}, {
w: 1
}, callback);
}, Infinity);
var cursor = collection.find(query);
cursor.each(function(err, doc) {
if (err) throw err;
if (doc) q.push(doc); // dispatching doc to async.queue
});
q.drain = function() {
if (cursor.isClosed()) {
console.log('all items have been processed');
db.close();
}
}
Using the mongodb driver, and modern NodeJS with async/await, a good solution is to use next():
const collection = db.collection('things')
const cursor = collection.find({
bla: 42 // find all things where bla is 42
});
let document;
while ((document = await cursor.next())) {
await collection.findOneAndUpdate({
_id: document._id
}, {
$set: {
blu: 43
}
});
}
This results in only one document at a time being required in memory, as opposed to e.g. the accepted answer, where many documents get sucked into memory, before processing of the documents starts. In cases of "huge collections" (as per the question) this may be important.
If documents are large, this can be improved further by using a projection, so that only those fields of documents that are required are fetched from the database.
var MongoClient = require('mongodb').MongoClient,
assert = require('assert');
MongoClient.connect('mongodb://localhost:27017/crunchbase', function(err, db) {
assert.equal(err, null);
console.log("Successfully connected to MongoDB.");
var query = {
"category_code": "biotech"
};
db.collection('companies').find(query).toArray(function(err, docs) {
assert.equal(err, null);
assert.notEqual(docs.length, 0);
docs.forEach(function(doc) {
console.log(doc.name + " is a " + doc.category_code + " company.");
});
db.close();
});
});
Notice that the call .toArray is making the application to fetch the entire dataset.
var MongoClient = require('mongodb').MongoClient,
assert = require('assert');
MongoClient.connect('mongodb://localhost:27017/crunchbase', function(err, db) {
assert.equal(err, null);
console.log("Successfully connected to MongoDB.");
var query = {
"category_code": "biotech"
};
var cursor = db.collection('companies').find(query);
function(doc) {
cursor.forEach(
console.log(doc.name + " is a " + doc.category_code + " company.");
},
function(err) {
assert.equal(err, null);
return db.close();
}
);
});
Notice that the cursor returned by the find() is assigned to var cursor. With this approach, instead of fetching all data in memory and consuming data at once, we're streaming the data to our application. find() can create a cursor immediately because it doesn't actually make a request to the database until we try to use some of the documents it will provide. The point of cursor is to describe our query. The 2nd parameter to cursor.forEach shows what to do when the driver gets exhausted or an error occurs.
In the initial version of the above code, it was toArray() which forced the database call. It meant we needed ALL the documents and wanted them to be in an array.
Also, MongoDB returns data in batch format. The image below shows, requests from cursors (from application) to MongoDB
forEach is better than toArray because we can process documents as they come in until we reach the end. Contrast it with toArray - where we wait for ALL the documents to be retrieved and the entire array is built. This means we're not getting any advantage from the fact that the driver and the database system are working together to batch results to your application. Batching is meant to provide efficiency in terms of memory overhead and the execution time. Take advantage of it, if you can in your application.
None of the previous answers mentions batching the updates. That makes them extremely slow 🐌 - tens or hundreds of times slower than a solution using bulkWrite.
Let's say you want to double the value of a field in each document. Here's how to do that fast 💨 and with fixed memory consumption:
// Double the value of the 'foo' field in all documents
let bulkWrites = [];
const bulkDocumentsSize = 100; // how many documents to write at once
let i = 0;
db.collection.find({ ... }).forEach(doc => {
i++;
// Update the document...
doc.foo = doc.foo * 2;
// Add the update to an array of bulk operations to execute later
bulkWrites.push({
replaceOne: {
filter: { _id: doc._id },
replacement: doc,
},
});
// Update the documents and log progress every `bulkDocumentsSize` documents
if (i % bulkDocumentsSize === 0) {
db.collection.bulkWrite(bulkWrites);
bulkWrites = [];
print(`Updated ${i} documents`);
}
});
// Flush the last <100 bulk writes
db.collection.bulkWrite(bulkWrites);
And here is an example of using a Mongoose cursor async with promises:
new Promise(function (resolve, reject) {
collection.find(query).cursor()
.on('data', function(doc) {
// ...
})
.on('error', reject)
.on('end', resolve);
})
.then(function () {
// ...
});
Reference:
Mongoose cursors
Streams and promises
Leonid's answer is great, but I want to reinforce the importance of using async/promises and to give a different solution with a promises example.
The simplest solution to this problem is to loop forEach document and call an update. Usually, you don't need close the db connection after each request, but if you do need to close the connection, be careful. You must just close it if you are sure that all updates have finished executing.
A common mistake here is to call db.close() after all updates are dispatched without knowing if they have completed. If you do that, you'll get errors.
Wrong implementation:
collection.find(query).each(function(err, doc) {
if (err) throw err;
if (doc) {
collection.update(query, update, function(err, updated) {
// handle
});
}
else {
db.close(); // if there is any pending update, it will throw an error there
}
});
However, as db.close() is also an async operation (its signature have a callback option) you may be lucky and this code can finish without errors. It may work only when you need to update just a few docs in a small collection (so, don't try).
Correct solution:
As a solution with async was already proposed by Leonid, below follows a solution using Q promises.
var Q = require('q');
var client = require('mongodb').MongoClient;
var url = 'mongodb://localhost:27017/test';
client.connect(url, function(err, db) {
if (err) throw err;
var promises = [];
var query = {}; // select all docs
var collection = db.collection('demo');
var cursor = collection.find(query);
// read all docs
cursor.each(function(err, doc) {
if (err) throw err;
if (doc) {
// create a promise to update the doc
var query = doc;
var update = { $set: {hi: 'there'} };
var promise =
Q.npost(collection, 'update', [query, update])
.then(function(updated){
console.log('Updated: ' + updated);
});
promises.push(promise);
} else {
// close the connection after executing all promises
Q.all(promises)
.then(function() {
if (cursor.isClosed()) {
console.log('all items have been processed');
db.close();
}
})
.fail(console.error);
}
});
});
The node-mongodb-native now supports a endCallback parameter to cursor.forEach as for one to handle the event AFTER the whole iteration, refer to the official document for details http://mongodb.github.io/node-mongodb-native/2.2/api/Cursor.html#forEach.
Also note that .each is deprecated in the nodejs native driver now.
You can now use (in an async function, of course):
for await (let doc of collection.find(query)) {
await updateDoc(doc);
}
// all done
which nicely serializes all updates.
let's assume that we have the below MongoDB data in place.
Database name: users
Collection name: jobs
===========================
Documents
{ "_id" : ObjectId("1"), "job" : "Security", "name" : "Jack", "age" : 35 }
{ "_id" : ObjectId("2"), "job" : "Development", "name" : "Tito" }
{ "_id" : ObjectId("3"), "job" : "Design", "name" : "Ben", "age" : 45}
{ "_id" : ObjectId("4"), "job" : "Programming", "name" : "John", "age" : 25 }
{ "_id" : ObjectId("5"), "job" : "IT", "name" : "ricko", "age" : 45 }
==========================
This code:
var MongoClient = require('mongodb').MongoClient;
var dbURL = 'mongodb://localhost/users';
MongoClient.connect(dbURL, (err, db) => {
if (err) {
throw err;
} else {
console.log('Connection successful');
var dataBase = db.db();
// loop forEach
dataBase.collection('jobs').find().forEach(function(myDoc){
console.log('There is a job called :'+ myDoc.job +'in Database')})
});
I looked for a solution with good performance and I end up creating a mix of what I found which I think works good:
/**
* This method will read the documents from the cursor in batches and invoke the callback
* for each batch in parallel.
* IT IS VERY RECOMMENDED TO CREATE THE CURSOR TO AN OPTION OF BATCH SIZE THAT WILL MATCH
* THE VALUE OF batchSize. This way the performance benefits are maxed out since
* the mongo instance will send into our process memory the same number of documents
* that we handle in concurrent each time, so no memory space is wasted
* and also the memory usage is limited.
*
* Example of usage:
* const cursor = await collection.aggregate([
{...}, ...],
{
cursor: {batchSize: BATCH_SIZE} // Limiting memory use
});
DbUtil.concurrentCursorBatchProcessing(cursor, BATCH_SIZE, async (doc) => ...)
* #param cursor - A cursor to batch process on.
* We can get this from our collection.js API by either using aggregateCursor/findCursor
* #param batchSize - The batch size, should match the batchSize of the cursor option.
* #param callback - Callback that should be async, will be called in parallel for each batch.
* #return {Promise<void>}
*/
static async concurrentCursorBatchProcessing(cursor, batchSize, callback) {
let doc;
const docsBatch = [];
while ((doc = await cursor.next())) {
docsBatch.push(doc);
if (docsBatch.length >= batchSize) {
await PromiseUtils.concurrentPromiseAll(docsBatch, async (currDoc) => {
return callback(currDoc);
});
// Emptying the batch array
docsBatch.splice(0, docsBatch.length);
}
}
// Checking if there is a last batch remaining since it was small than batchSize
if (docsBatch.length > 0) {
await PromiseUtils.concurrentPromiseAll(docsBatch, async (currDoc) => {
return callback(currDoc);
});
}
}
An example of usage for reading many big documents and updating them:
const cursor = await collection.aggregate([
{
...
}
], {
cursor: {batchSize: BATCH_SIZE}, // Limiting memory use
allowDiskUse: true
});
const bulkUpdates = [];
await DbUtil.concurrentCursorBatchProcessing(cursor, BATCH_SIZE, async (doc: any) => {
const update: any = {
updateOne: {
filter: {
...
},
update: {
...
}
}
};
bulkUpdates.push(update);
// Updating if we read too many docs to clear space in memory
await this.bulkWriteIfNeeded(bulkUpdates, collection);
});
// Making sure we updated everything
await this.bulkWriteIfNeeded(bulkUpdates, collection, true);
...
private async bulkWriteParametersIfNeeded(
bulkUpdates: any[], collection: any,
forceUpdate = false, flushBatchSize) {
if (bulkUpdates.length >= flushBatchSize || forceUpdate) {
// concurrentPromiseChunked is a method that loops over an array in a concurrent way using lodash.chunk and Promise.map
await PromiseUtils.concurrentPromiseChunked(bulkUpsertParameters, (upsertChunk: any) => {
return techniquesParametersCollection.bulkWrite(upsertChunk);
});
// Emptying the array
bulkUpsertParameters.splice(0, bulkUpsertParameters.length);
}
}
In a node.js / Mongoose project, I have a schema which contains references to external image files.
var PageSchema = new Schema({
title: String
, media: {
digest: String
, name: String
}
});
Those files have additional properties which are stored in the file itself: url, width, height, exif fields, etc. Those fields will need to be populated before the model being sent to res.render().
For some fields, things are synchronous and a virtual just does the job:
PageSchema.virtual('media.url').get(function () {
return appPaths.fileUrl(this.media);
});
However, width / height, or exif fields require async calls. I thought of using middleware to populate them, but this does not seem to work:
PageSchema.post('init', function(next) {
var media = this.media;
var fileName = filedb.absoluteFilePath(media);
im.identify(fileName, function(err, features) {
if (err) {
media.width = 0;
media.height = 0;
} else {
media.width = features.width;
media.height = features.height;
}
next();
});
});
What am I doing wrong? Is there a common design pattern for solving this kind of problem? (Other than duplicating this information in the database itself?)
The real problem here is that mongoose currently seems to have a wonky implementation of post callbacks. While pre('init',function(next){ ... }); works as you expect, post('init',function(next){ ... }); does not actually get passed a next function. In fact, the post init callback does not receive any arguments whatsoever when it is called.
As such, I usually write a wrapper for my query callbacks to make a sort of DIY middleware:
var setAsyncVirtuals = function(callback){
return function(err, docs){
if(err) return callback(err);
var i = done = docs.length;
if(i > 0)
while(i--){
(function(i){
var filename = getFilename();
im.identify(filename, function(err, features) {
if (err) {
docs[i].media.width = 0;
docs[i].media.height = 0;
} else {
docs[i].media.width = features.width;
docs[i].media.height = features.height;
}
done--;
if(done <= 0) callback(null, docs);
});
})(i); // bind i to hold value for async call
}
else callback(null, docs);
}
}
then
Page.find({}, setAsyncVirtuals(function(err,docs){
res.send(docs); // these have media.width & media.height assigned
}));