Memory leak in migrating to new mongoose collection from existing collection - node.js

I have a multiple unnecessarily large, and poorly unorganized collections in mongodb and I am trying to migrate a subset of that data to a few new collections that have a mongoose schema. However, my approach seems to have a memory leak, as the migration slowly grinds to a halt and the node process eats up all the RAM after a while. Resulting in this error:
FATAL ERROR: JS Allocation failed - process out of memory
I start off with a list of the documents I want to migrate over, people. Initially, I used a forEach over people, but that resulted in a very big memory leak with many cursors being created before being closed. I then tried using async to generate each query in series, which is much better than my initial attempt, but there still seems to a slight memory leak, and eventually the error occurs after only a small part of the data has been migrated. What am I missing?
var PersonSchema = new mongoose.Schema({
_id: String,
records: [{
id: String
date: Date
}]
});
var Person = mongoose.model('Person', PersonSchema);
db = mongoose.connection.db;
// Collections I want to use to use in populating new schema
var members = db.collection('members');
var records = db.collection('records');
// Populate members
members.find().each(function (err, p) {
if (err) {
winston.error(err);
}
if (p) {
var id = p['Member ID - Consist'];
var person = new Patient({
_id: id,
records: []
});
person.save(function (err) {
if (err) {
winston.error(err);
} else {
winston.info('Saved %s', id);
}
});
} else {
winston.info('Done importing members');
}
});
// Loop through people again and add all their records
Person.find().exec(function (err, people) {
async.eachSeries(people, function (p, callback) {
var cursor = records.find({
'memberId': p.id
});
cursor.each(function (err, doc) {
if (err) {
throw new Error(err);
}
if (doc) {
p.records.push({
id: doc['recordId'],
date: new Date(doc['date'])
});
} else {
p.save(function (err) {
if (err) {
winston.error(err);
} else {
cursor.close(function () {
callback();
});
winston.info('Data saved for %s', p.id);
}
});
}
});
}, function (err) {
console.log(err);
});
});

Related

mongoose creating null document after using module export in express

I was recently using a function to upload files to a mongodb database successfully. However after moving those functions into a file (to clean up) and exporting that file then requiring it in my route, the database now creates documents with only null values like so:
_id:ObjectId("xxxxxxxxxxxxxxx")
name:null,
value:null,
image:null,
desc:null
I don't know what might be causing this, I am logging the argument object that i'm trying to insert from inside the function 'insertProducts' and the item.name is not null. Sorry I'm new to mongodb :/
maybe someone can point me in the right direction?
CRUD.js
const mongodb = require('mongodb');
const MongoClient = mongodb.MongoClient;
const removeProducts = function(req,res){
MongoClient.connect('mongodb://localhost', (err, client) => {
if (err) {
throw err;
}
let db = client.db('account-app');
let products = db.collection('products');
let users = db.collection('users');
try{
products.remove({ _id: req.body.id }, function(err) {
if (!err) {
console.log('removed item')
}
});
}
catch(err){
console.log('Error while inserting', err)
}
client.close()
res.redirect('/addItems')
})
}
const insertProducts = function(item,res){
console.log("item name",item.name)
MongoClient.connect('mongodb://localhost', (err, client) => {
if (err) {
throw err;
}
let db = client.db('account-app');
let products = db.collection('products');
try{
products.insertOne(item)
console.log('item inserted')
}
catch(err){
console.log('Error while inserting', err)
}
client.close()
res.redirect('/admin/addItems')
})
}
module.exports={removeProducts: removeProducts, insertProducts: insertProducts}
my admin route that requires the crud functions
const crudOps = require('../utils/admin/CRUD') //require CRUD functions
// Adding new items
// --------------------------------------------------
router.post('/addNewItems', (req, res, next) => {
console.log(req.body.name)
let item = {
name:req.body.name,
file: binary(req.files.image.data),
value: req.body.value,
desc: req.body.desc
}
crudOps.insertProducts(item, res)
});
That connection URL looks wrong. Generally, it has the format:
mongodb://localhost:27017/mydatabase
Try replacing the connection string with the appropriate one for your database and see if that works. Also, the docs normally have insertOne statements like this so maybe that is the issue?
products.insertOne(item, function(err, r) {
console.log('item inserted')
res.redirect('/admin/addItems')
db.close();
});

How to get data from one collection and insert into another collection in Nodejs?

Am using Nodejs and MongoDB and I am new to nodejs. I need to know how to get data from one collection and append some additional data and insert into another collection.
db.collection('collection1').find({ "Id" : 12345 }).toArray(function(err, result){
db.collection('collection2', function(err, collection){
collection.insert({
//some data
})
})
})
When I try this code its not working its giving me error insert is not defined.
thanks,
John.
db.collection('collection1').find({ "Id" : 12345 }).toArray(function(err, result){
//do the modification here
db.collection('collection2').insert(modifiedResult, function(err, result){
if(err) {
//log error
}else{
//log result
}
})
})
One more thing, If the result array length is more that one and you want to insert then separately then use promise
db.collection('collection1').find({ "Id" : 12345 }).toArray(function(err, result){
//do the modification here
Promise.all(modifiedResult.map((eachModifiedResult)=>{
return db.collection('collection2').insert(eachModifiedResult);
}).then((result)=>{
//result of the insert
}).catch((err){
//err if any happen
});
})
But if you have a very large doc then do it as Neil Said. Read the collection one by one using cursor and modify them and insert them to other db.
You can use callback library like async or Promises Q
Promise
var collectionData = null;
var modifiedResult = null;
// here i am using async library to avoid callbackHell
async.series([
// for get data from collection 1.
function(cb) {
var criteria = {
"Id": 12345
}
db.collection('collection1').find(criteria).toArray(function(dbErr, dbResult) {
if (err) {
cb(dbErr)
} else {
collectionData = dbResult;
cb()
}
})
},
// Append Data in collectionData
function(cb) {
// do you work here to append data in collectionData
modifiedResult = extendedData; // this is just an example you need to work on it
cb();
},
// Update collection 2 here
function(cb) {
db.collection('collection2').insert(modifiedResult, function(err, result) {
if (err) {
cb(dbErr)
} else {
collectionData = dbResult;
cb()
}
});
}
]);

Using AWS Lammbda to query a database, and push the results to Geckoboard

I am trying to set up an AWS Lambda function that will query a MySQL database and upload the results to Geckoboard for analysis. However, it will always time out. Here's my code:
'use strict';
var API_KEY = [API KEY];
var gb = require('geckoboard')(API_KEY);
var AWS = require('aws-sdk');
var mysql = require('mysql');
var connection = mysql.createConnection({
[DATABASE DETAILS]
});
var mysqlQuery = '
SELECT DATE(created_at) as date, COUNT(DATE(created_at)) as number_of_entries
FROM table
WHERE updated_at IS NOT NULL
GROUP BY date
';
var schema = {
id: 'geckoboard_target',
fields: {
date: {
type: 'datetime',
name: 'date'
},
number_of_entries: {
type: 'number',
name: 'number_of_entries'
}
}
};
function uploadToGeckoboard(schema, data, context) {
gb.datasets.findOrCreate( schema,
function (err, dataset) {
if (err) {
console.error('Error connecting to Geckoboard:',err);
context.fail('Failed');
}
dataset.put(
data,
function (err) {
if (err) {
console.error('Error uploading to Geckoboard',err);
context.fail('Failed');
}
console.log('Dataset created and data added');
context.succeed('Success');
}
);
}
);
}
exports.handler = (event, context) => {
connection.connect(function(err) {
if (!err) {
connection.query(mysqlQuery, function(err, data) {
if (!err) {
console.log("Results:", JSON.stringify(data));
uploadToGeckoboard(schema, data, context);
connection.end();
} else {
console.log("Query error:", err);
context.fail();
}
});
} else {
console.log("Error connecting database:", err.message);
context.fail();
}
});
};
It succeeds up till the point the data returns and the uploadToGeckoboard function is called. After that, it just times out. I've tried the same code, with the lambda handler and context removed, and it runs from my local machine just fine.
Any help would be greatly appreciated!
As it turns out, the code is just fine. The problem was that, along the development process we had enabled a VPC in the lambda configuration - in the hopes of enabling what we thought were necessary RDS read abilities.
Turning it off solved the perpetual time-outs, and revealed that no VPC was needed to make RDS queries. Oops!

Fetch data from multiple collections in mongo

I have some collections shown like below which is holding relationships, relation between testMaster and testDoc is holding inside the testDocMaster
For eg:
testMaster {
_id: "Schema.Objectid",
name: "String" //master name
}
testDoc {
_id : Schema.ObjectId,
name: "String", //doc name
other datas
}
testDocMaster {
masterId: "_id of the testMaster table",
docId : "_id of the above testDoc"
}
For each master entry, we are expecting many relations,
what would be the best way to fetch the data from the testDoc table, if I have the masterId.
I got it working using this:
// GLOBAL ARRAYS FOR STORING COLLECTION DATA
var collectionOne = [];
var collectionTwo = [];
app.get('/', function(req, res){
MongoClient.connect("mongodb://localhost:27017/michael", function(err, db) {
if(!err) {
console.log("We are connected");
}
db.collection("collectionOne", function(err, collection) {
collection.find().sort({order_num: 1}).toArray(function(err, result) {
if (err) {
throw err;
} else {
for (i=0; i<result.length; i++) {
collectionOne[i] = result[i];
}
}
});
db.collection("collectionTwo", function(err, collection) {
collection.find().sort({order_num: 1}).toArray(function(err, result) {
if (err) {
throw err;
} else {
for (i=0; i<result.length; i++) {
collectionTwo[i] = result[i];
}
}
});
});
// Thank you aesede!
res.render('index.html', {
collectionOne: collectionOne,
collectionTwo: collectionTwo
});
});
});
});
Now, for some reason when Node restarts, and I hit refresh, it doesn't render HTML into the front-end. However, any subsequent refresh renders the page correctly.
Assuming your testDocMaster schema uses ObjectId types that ref the other two collections you can use Mongoose's query population support to help with this:
TestDocMaster.findOne({ masterId: masterId})
.populate('docId')
.exec(function(err, testDocMaster) {
// testDocMaster.docId is populated with the full testDoc for the
// matching _id
});

how to populate schemaref in mongoose using callbacks?

I have a list of items retrieved from mongoose each with a list object referenced, but I need to somehow populate the item.list.user object associated with each list so I can use them in my template as item.list.user.username.
Item.find().populate('list').exec(function(err, items){
items.forEach(function(item){
User.findById(item.list.user), function(err, user){
item.list.user = user;
});
});
//how to get back here from User.findById() so I can render?
res.render('index', { items: items });
});
There are a few ways to go about this. The main issue is that you are assuming that the data will be populated when you render the template. That is not always the case, and you can and should always assume that any time you are doing asynchronous functions, that it won't be done unless you wait until each function call is completed.
Here is a naive way to make sure the data is available for render.
Item.find().populate('list').exec(function (err, items) {
var len = items.length
, populatedItems = [];
items.forEach(function(item, i){
User.findById(item.list.user, function (err, user) {
item.list = item.list.toObject();
item.list.user = user;
populatedItems.push(item);
if (i + 1 === len) {
res.render('index', { items: items });
}
});
});
});
Though that is not very efficient and makes unnecessary database calls. It is also harder to reason about in my opinion.
Item.find().populate('list').exec(function (err, items) {
var itemMap = {}
items.forEach(function (item, i) {
// Map the position in the array to the user id
if (!itemMap[item.list.user]) {
itemMap[item.list.user] = [];
}
itemMap[item.list.user].push(i)
item.list = item.list.toObject()
});
// Can pull an array of user ids from the itemMap object
User.find({_id: {$in: Object.keys(itemMap)}}, function (err, users) {
users.forEach(function (user) {
itemMap[user._id].forEach(function(id) {
// Assign the user object to the appropriate item
items[id].list.user = user;
})
});
res.render('index', { items: items });
});
});
After further discussion with you on IRC and troubleshooting the following is a working example for your particular case.
Item.find().populate('list').exec(function (err, items) {
var itemIds = [];
items.forEach(function (item) {
itemIds.push(item.list.user)
});
// Can pull an array of user ids from the itemMap object
User.find({_id: {$in: itemIds}}, function (err, users) {
var userMap = {}
users.forEach(function (user) {
userMap[user._id] = user
});
res.render('index', { items: items, userMap: userMap });
});
});

Resources