MongoDB Insert Many, Update Many - node.js

I'm using MongoDB in node.js. I'm trying to update or insert many documents based on different conditions; however, MongoDB update (with upsert) only works with a single document or many documents with the same condition. Currently, I have an array containing the objects that I want to insert (Or update if the unique index exists) and I'm looping through the array and calling the updateOnce; however, I believe this method is not very efficient for a large number of objects that I'm going to have.
What is a better way to achieve this?
var mongoUtil = require( './database.js' );
var db = mongoUtil.getDb();
//array of objects to insert:
//NOTE: First + Last name is a unique index
var users = [
{firstName:"John", lastName:"Doe", points:300},
{firstName:"Mickey", lastName:"Mouse", points:200}
];
var collection = db.collection( 'points' );
for(var i = 0; i < users.length; i++) {
//If firstName and lastName already exists, update points. Otherwise insert new object
collection.updateOne(
{firstName: users[i].firstName, lastName: users[i].lastName},
{$set: {points: users[i].points}},
{upsert: true},
function(err,docs) {
if(err)
console.log(err);
}
)
}

I solved this issue using .bulkWrite():
var mongoUtil = require( './database.js' );
var db = mongoUtil.getDb();
var collection = db.collection( 'points' );
var users = [
{firstName:"John", lastName:"Doe", points:300},
{firstName:"Mickey", lastName:"Mouse", points:200}
];
let userUpdate = users.map(user => ({
updateOne: {
filter: {firstName: user.firstName, lastName: user.lastName},
update: {$set: user},
upsert: true
}
}));
collection.bulkWrite(userUpdate).catch(e => {
console.log(e);
});

Related

Mongoose: After finding document, iterate over a value in the document and run a new query on each

I have one schema which contains an array of references to another schema (among other fields):
const RecipeIngredient = new Schema({
ingredientId: { // store id ref so I can populate later
type: Schema.Types.ObjectId,
ref: 'ingredients',
required: true
},
// there are a couple other fields but not relevant here
});
const Recipe = new Schema({
ingredients: [RecipeIngredient]
});
I'm trying to write a route which will first find a recipe by _id, populate the ingredients array (already have this working), and finally iterate over each ingredient in that array.
router.get('/:recipeId/testing', async (req, res) => {
const { recipeId } = req.params
let recipe = await Recipe
.findById(recipeId)
.populate({
path: 'ingredients.ingredientId',
model: 'Ingredient',
select: '_id ......' //I'm selecting other fields too
})
.lean()
.exec();
if (recipe) {
const { ingredients } = recipe;
const newIngredients = [];
await ingredients.forEach(async (ingr) => {
// here I'd like to be able to run a new query
// and append the result to an array outside of the forEach
// I do need information about the ingr in order to run the new query
newIngredients.push(resultOfNewQuery);
});
return res.json(newIngredients)
};
return res.status(404).json({ noRecipeFound: 'No recipe found.'});
})
I've tried approaching this in a few different ways, and the closest I've gotten was executing the new query within each iteration, but because the query is async, I return the response before I've actually collected the documents from the inner query.
I also attempted to use .cursor() in the initial query, but that won't work for me because I do need to access the ingredients field on the recipe once it is resolved before I can iterate and run the new queries.
Any ideas would be appreciated! I'm definitely opening to restructuring this whole route if my approach is not ideal.
I was able to make this work by using a for loop:
const newIngredients = [];
for (let idx = 0; idx < ingredients.length; idx++) {
const { fieldsImInterestedIn } = ingredients[idx];
const matchingIngredients = await Ingredient
.find(fieldsImInterestedIn)
.lean()
.exec()
.catch(err => res.status(404).json({ noIngredientsFound: 'No ingredients found' }));
newIngredients.push(ingredientsToChooseFrom[randomIndex]);
};
return res.json(newIngredients);
still a little perplexed as to why this was able to work while forEach wasn't, but I'll happily move on...

Update Entire Collection In MongoDB Node Js

I have a program that receive a New file from FTP server after some specific time , I insert the new updated file's data into my database MongoDB the fields remain same , only data changes in new file.. Now the problem is that EVERY TIME I have to insert whole new collection into database, and the collection increases accordingly.Forexample- first time data is of 20 records , second time 40 and then 60 and so on .The thing I want to do is I want to check which field's data is updated in New file before inserting new FILE's data , and should only update these field's data in database instead of inserting whole new document.Does MONGOOSE or MONGODB provide solution for this , means IF I PASS A DATA AS PARAMETER , IT SOULD COMPARE MY EXISTING COLLECTION WITH MY NEW DATA and then Update Only UPDATED FIELDS ..Please help me i,m stuck , thanks :) . I,m using NODE JS ...
var c = new Client();
var connectionProperties = {
host: 'ABC',
user: 'ABC',
port: 'ABC',
password: 'ABC',
};
c.connect(connectionProperties);
c.on('ready', function () {
c.get('path-to-excel-file', function (err, stream) {
if (err) throw err;
stream.once('close', function () {
const workBook = XLSX.readFile('Convertedfile.xlsx');
XLSX.writeFile(workBook, 'Convertedfile', { bookType: "csv" });
csv()
.fromFile("Convertedfile")
.then((jsonObj) => {
Model.collection.insert(jsonObj, function (err, docs) {
if (err) {
return console.error(err);
} else {
console.log("All Documents insterted");
}
});
})
c.end()
});
stream.pipe(fs.createWriteStream('ConvertedFile.xlsx'))
})
})
It looks like you need upsert means update if document/record exists or insert/create it.
So this can be done either 1 document at a moment or in bulk, but it would need a query to find the matching document(s) first.
Since you've not provided any sample data so I can't write a sample code snippet for you but here is the link to get you started, for bulk: Bulk.find.upsert and for single document this thread is good : how-do-i-update-upsert-a-document-in-mongoose
Update: here is the mongodb bulk upsert in action:
const mongo = require('mongodb');
const MongoClient = mongo.MongoClient;
const client = new MongoClient('mongodb://127.0.0.1:27017', { useUnifiedTopology: true });
client.connect(async err => {
if (err) {
console.log('DB Connection Error ', err);
} else {
const db = client.db('node-cheat-db');
// lets assume you've read all file contents and data is now ready for db operation
let records = [
{first: 'john', last: 'doe', email: 'johen#doe.com'},
{first: 'jack', last: 'doe', email: 'jack#doe.com'},
{first: 'jill', last: 'doe_updated', email: 'jill#doe.com'}
];
// prepare bulk upsert so that new records are created and existing are updated
let bulk = db.collection('users').initializeOrderedBulkOp();
for (var i = 0; i < records.length; i++) {
bulk.find({
"email": records[i].email // at least 1 key should be treated as PK; in this example email is PK
}).upsert(records[i]).replaceOne(records[i]);
}
bulk.execute(function (err,updateResult) {
if (updateResult.result.ok != 1) {
console.log('Bulk Upsert Error');
} else {
console.log(`Inserted: ${updateResult.result.nUpserted} and Updated: ${updateResult.result.nModified}`);
}
});
}
});
sample output looks like:
Inserted: 0 and Updated: 3
Further Details:
Clone node-cheat bulk-update, run node bulk-update.js followed by npm install mongodb.

How do I keep mongo's array index from changing during an update?

I am trying to update an array within my object. However, every time I send the post call, the index in the array changes.
I have tried using $set and manually updating the array... but the index on the array keeps changing.
Here is the model:
const MappedImageSchema = new Schema({
imageUrl: {type: String, required: true},
name: {type: String, required: true},
areas:[
{
name: {type: String},
shape: {type: String},
coords:[{type: Number}],
}
]
});
module.exports = MappedImage = mongoose.model('mappedImages', MappedImageSchema)
Here is the code that performs the update:
// #route POST api/maps/:id/areas
// #desc add an area to a map (by map id)
// #access Private
router.post('/:id/areas/:id_area', passport.authenticate('jwt', { session: false }),
(req, res) => {
MappedImage.findById(req.params.id)
.then(map => {
// get all of the areas from the map...
var allAreas = map.areas;
// now get the index of the area we are going to update
const areaIndex = map.areas.map(item => item._id.toString()).indexOf(req.params.id_area);
// update the information
var coords = req.body.coords.split(',');
const updatedArea = {
name: req.body.name,
shape: req.body.shape,
coords: coords,
};
// set the updated information in the correct map area
allAreas[areaIndex] = updatedArea;
var query = {_id: req.params.id}; // this is the MAP id
var update = {$set: {areas:allAreas}}; // update the areas
var options = {new: true};
MappedImage.findOneAndUpdate(query, update, options)
.then(map => res.json(map))
.catch(err => res.status(404).json({ mapnotfound: err }));
})
.catch(err => res.status(404).json({ mapnotfound: 'Map not found while updating area' }));
}
);
Here is the data BEFORE the call
{
"_id": "5c5c69dda40e872258b4531d",
"imageUrl": "url test",
"name": "name test",
"areas": [
{
"coords": [1,2,3,4,5,6],
"_id": "5c5c8db2f904932dd8d4c560", <---- _id changes every time !
"name": "area name",
"shape": "poly"
}
],
"__v": 3
}
Here is the Postman call I make:
The result of the call is the name gets changed... but so does the index... making the next call fail with "no area found with that index".
What is perplexing about this problem is the _id for the map does not get updated when I run this code:
router.post('/:id', passport.authenticate('jwt', { session: false }),
(req, res) => {
var query = {_id: req.params.id};
var update = {imageUrl: req.body.imageUrl, name: req.body.name};
var options = {new: true};
MappedImage.findOneAndUpdate(query, update, options)
.then(map => res.json(map))
.catch(err => res.status(404).json({ mapnotfound: err }));
});
Update 1
I tried using the areas index and updating just that area... but the _id changes with this code as well:
... same code all the way down to here
allAreas[areaIndex] = updatedArea;
// but instead of calling 'findOneAndUpdate'... call map save
map.save().then(map => res.json(map));
Update 2
I can't get this code to work as areas._id and areas.$ are undefined ?
var query = {_id: req.params.id, areas._id: id_area}; // this is the MAP id
var update = {$set: {areas.$: updatedArea}}; // update the area
Update 3
So, putting the _id in the updatedArea fixes this issue... but it "feels" wrong to do so: ( per eol answer )
const updatedArea = {
_id: req.params.id_area,
name: req.body.name,
shape: req.body.shape,
coords: coords,
};
Update 4
eol - thanks for the verification on the mongoDB side... If that solves the DB id problem... I just need to know why my query is failing. I tried this and all I see in the terminal output is "creating query"... I never see the "query" and it's definition... so something is wrong and I don't know how to figure out what. Here is what I have now:
console.log('creating query');
var query = {"_id": req.params.id, "areas._id": id_area};
console.log('query');
console.log(query);
Update 5
Figured it out why the query not being output, id_area is not defined... but req.params.id_area is !
console.log('creating query');
var query = {"_id": req.params.id, "areas._id": req.params.id_area};
console.log('query');
Update 6
Code is in... but it is still not working. A picture is worth a 1000 words... so here are two:
This one shows the areas ID is still changing:
Here is the code I have now:
console.log('Update area');
console.log('changing area ' + req.params.id_area);
//console.log(req.body);
const { errors, isValid } = mapValidators.validateAreaInput(req.body);
// Check Validation
if(!isValid){
return res.status(400).json(errors);
}
MappedImage.findById(req.params.id)
.then(map => {
// Check to see if area exists
if (
map.areas.filter(
area => area._id.toString() === req.params.id_area
).length === 0
) {
return res.status(404).json({ areanotfound: 'Area does not exist' });
}
console.log('area exists');
// get all of the areas from the map...
var allAreas = map.areas;
console.log('all areas');
console.log(allAreas);
// now get the index of the area we are going to update
const areaIndex = map.areas.map(item => item._id.toString()).indexOf(req.params.id_area);
console.log('area index');
console.log(areaIndex);
// update the information
var coords = req.body.coords.split(',');
const updatedArea = {
name: req.body.name,
shape: req.body.shape,
preFillColor: req.body.preFillColor,
fillColor: req.body.fillColor,
coords: coords,
};
console.log('updated area');
console.log(updatedArea);
// set the updated information in the maps areas
allAreas[areaIndex] = updatedArea;
console.log('creating query');
var query = {"_id": req.params.id, "areas._id": req.params.id_area};
console.log('query');
console.log(query);
var update = {$set: {"areas.$": updatedArea}};
console.log('update');
console.log(update);
var options = {new: true};
MappedImage.findOneAndUpdate(query, update, options)
.then(map => res.json(map))
.catch(err => res.status(404).json({ mapnotfound: err }));
})
.catch(err => res.status(404).json({ mapnotfound: 'Map not found while updating area' }));
Here is the terminal output:
You could try setting the _id property in the updatedArea object with the value of the area that you'd like to update. This would prevent creating a new id while using the $set operator. Something like this:
// now get the index of the area we are going to update
const areaIndex = map.areas.map(item => item._id.toString()).indexOf(req.params.id_area);
// update the information
var coords = req.body.coords.split(',');
const updatedArea = {
_id: id_area,
name: req.body.name,
shape: req.body.shape,
coords: coords,
};
...
Note that with the above solution you're always setting a new array, which is why new id's are generated.
You could also try updating the specific element in the array using the $ operator:
var query = {"_id": req.params.id, "areas._id": id_area}; // this is the MAP id
var update = {$set: {"areas.$": updatedArea}}; // update the area
See the screenshots below for an example (executing the commands in the mongodb-shell) where I'm trying to only update the second array element (i.e. with _id 5c5c8db2f904932dd8d4c561)

Mongoose - find() with multiple ids that are the same

If I were to perform this query with mongoose;
Schema.find({
_id: {
$in: ['abcd1234', 'abcd1234', 'abcd1234']
}
});
The query will only return something like:
[{
'property1': 'key1',
'property2': 'key2'
}]
With the array only having one object, obviously because I passed in all the same id's. However, I actually want duplicate objects returned. How can I do this?
Mongo itself will only return objects with no duplicates. But you can then build an array of objects with duplicates from that.
For example, if array is the array of objects returned my Mongo - in this case:
var array = [{
_id: 'abcd1234',
property1: 'key1',
property2: 'key2'
}];
and ids is your list of IDs that you want with duplicates - in your case:
var ids = ['abcd1234', 'abcd1234', 'abcd1234'];
then you can do:
var objects = {};
array.forEach(o => objects[o._id] = o);
var dupArray = ids.map(id => objects[id]);
Now dupArray should contain the objects with duplicates.
Full example:
var ids = ['abcd1234', 'abcd1234', 'abcd1234'];
Schema.find({_id: {$in: ids}}, function (err, array) {
if (err) {
// handle error
} else {
var objects = {};
array.forEach(o => objects[o._id] = o);
var dupArray = ids.map(id => objects[id]);
// here you have objects with duplicates in dupArray:
console.log(dupArray);
}
});

How to Make Mongoose's .populate() work with an embedded schema/subdocument?

I read up that you can make Mongoose auto pouplate ObjectId fields. However I am having trouble structuring a query to populate fields in a subdoc.
My models:
var QuestionSchema = new Schema({
question_text: String,
type: String,
comment_field: Boolean,
core_question: Boolean,
identifier: String
});
var SurveyQuestionSchema = new Schema({
question_number: Number,
question: {type: Schema.Types.ObjectId, ref: 'Question', required: true} //want this popuplated
});
var SurveySchema = new Schema({
start_date: Date,
end_date: Date,
title: String,
survey_questions: [SurveyQuestionSchema]
});
Right now I achieve the effect by doing:
Survey.findById(req.params.id, function(err, data){
if(err || !data) { return handleError(err, res, data); }
var len = data.survey_questions.length;
var counter = 0;
var data = data.toJSON();
_.each(data.survey_questions, function(sq){
Question.findById(sq.question, function(err, q){
sq.question = q;
if(++counter == len) {
res.send(data);
}
});
});
});
Which obviously is a very error-prone way of doing it...
As I noted in the comments above, this is an issue currently under scrutiny by the mongoose team (not yet implemented).
Also, looking at your problem from an outsider's perpsective, my first thought would be to change the schema to eliminate SurveyQuestion, as it has a very relational db "join" model feel. Mongoose embedded collections have a static sort order, eliminating the need for keeping a positional field, and if you could handle question options on the Survey itself, it would reduce the schema complexity so you wouldn't need to do the double-populate.
That being said, you could probably reduce the queries down to 2, by querying for all the questions at once, something like:
Survey.findById(req.params.id, function(err, data){
if(err || !data) { return handleError(err, res, data); }
var data = data.toJSON();
var ids = _.pluck(data.survey_questions, 'question');
Question.find({_id: { $in: ids } }, function(err, questions) {
_.each(data.survey_questions, function(sq) {
sq.question = _.find(questions, function(q) {
var id = q._id.toString();
return id == sq.question;
});
});
res.send(data);
});
});

Resources