Node ram usage keep increasing - node.js

I have a page build by node and receive 15 requests/second.
And my function is like this:
var somepage = function(req,res){
res.send(200);
call_mongo_to_save_some_data(req.somedata);
}
var call_mongo_to_save_some_data = function(data){
var needToSave = {}
needToSave.val1 = data.val1;
needToSave.val2 = data.val2;
needToSave.val3 = data.val3;
needToSave.val4 = data.val4;
needToSave.val5 = data.val5;
var db = mongoskin();
db.collection.insert(needToSave).success(function(){
db.close();
}).fail(function(err){ throw err; });
}
So you can see I do something after I send the response. To do this, is because I want to reduce the response time. So the client user won't waiting for I save something in mongo.
But after I launch the page, I found that the ram usage is keep increasing. I did some research, saying the res.write clear the output buffer. And compare with my code, I do something after the res.write (res.send). So not sure is that the reason. Or it's some other issue here.

Related

Opening Maxmind db in Nodejs

I am trying to open maxmind opensource database in my nodejs application. My application recieves a list of ip addressses from a java application. Application then returns the latitude and longitude corresponding to each ip. I have succesfully done this synchronously, but i want to do it asynchronously to make things a little faster. I have written a code for this, but the application gets killed everytime. I am guessing that the reason might be simultaneous opening of the same database(I might be wrong :D). I am posting the code below. Please take a look at it and make some suggestions on where I am going wrong. Thanks!!!
app.post('/endPoint', function(req, res){
var obj = req.body;
var list = [];
var ipList = obj.ipList;
for(var i = 0; i<ipList.length; i++){
var ip = ipList[i];
//console.log(i);
maxmind.open('./GeoLite2-City.mmdb', function(err, cityLookup){
if(err) throw err;
console.log("open database");
var city = cityLookup.get(ip);
if(city!=null){
var cordinates = {'latitude': city.location.latitude, 'longitude': geodata.location.longitude};
//console.log(cordinates);
list.push(cordinates);
}
if(list.length == ipList.length){
res.json({finalMap: list});
}
});
}
});
You should open the database only once, and reuse it.
The easiest solution would be to synchronously open the database at the top of your file:
const maxmind = require('maxmind');
const cityLookup = maxmind.openSync('./GeoLite2-City.mmdb');
Reading it asynchronously wouldn't speed things up a whole lot, and because loading the database is done only once (during app startup), I don't think it's a big deal that it may temporarily block the event loop for a few seconds.
And use the cityLookup function in your request handler:
app.post('/endPoint', function(req, res) {
...
let city = cityLookup.get(ip);
...
});

How to insert millions of data into mongo with nodejs

MongoClient.connect(url, function(err, db) {
var batch = db.collection("chatmessage").initializeOrderedBulkOp();
//var batch = db.collection("chatmessage").db.collection.initializeUnorderedBulkOp()
var messageNum=0;
var chatmessage = null;
var count =0;
for (var i = 0;i<300;i++){
messageNum = getMessage();//value from 1~500000;
for(var j = 0;j<messageNum;j++){
count++;
chatmessage = generateChatMessage();
batch.insert(chatmessage);
if(count>=1000){
count=0;
batch.execute(function(err){
console.log(err);
batch = db.collection("chatmessage").initializeOrderedBulkOp();
console.log("batch execute"+util.inspect(process.memoryUsage()));
//db.close();
});
}
}
console.log("execute one chatroom"+util.inspect(process.memoryUsage()));
}
if(count>0){
batch.execute(function(err){
console.log(err);
batch = db.collection("chatmessage").initializeOrderedBulkOp();
});
}
}
need to populate millions messages into mongo with nodejs.using Bulk method to do the insert batch.
but there are some questions about the code
the bulk execute method run async.
when insert data=100,000.have not seen any bulk execute finished,until all the code finish executing,can see the "batch execute" was printed.
when the variable of messageNum is large,about 50,000.it will out of memory.
FATAL ERROR: CALL_AND_RETRY_LAST Allocation failed - process out of memory
the variables are all defined external of the loop.and have run batch.execute.
don't understand why this happended.
when the record is large,the rss is rapidly increase,and never decreased.as it is not managed by V8 engine.it will increase until reaching my computer's memory size.
this is related with DB,when I remove the DB operation,there is no problem.
I guess the batch.execute() methond take this memory.but could't release it even with db.close();
{ rss: 1449750528, heapTotal: 1091999056, heapUsed: 922237384 }
-------------------------------------------UPDATE1------------------------------
Have got serveral heapdump snapshoot files with heapdump package.
the root cause is batch.execute method is async called.it never execute until all code are excuted,as i mentioned at my first question.【also doubt that even the batch.execute() is async executed.it should run indepently,not influenced by main process.but i have not found them writen into db,and the info log in callback method have not been printed】
so all documents that need to be inserted into mongo stay in the memory.and cause the issue.
#joeytwiddle have found that you have a common opionion on this problem.
from this bulk-upsert-in-mongodb
have not found the bulk.execute() method can be configured sync to execute.
anyone have any idea to solve the problem.
I was also getting this error but when I used this code Its working fine now.
Example of a simple insert Many operation using a Generator and the co module. You can also check ;
var MongoClient = require('mongodb').MongoClient,
co = require('co');
test = require('assert');
co(function*() {
var db = yield MongoClient.connect('mongodb://localhost:27017/test');
// Get the collection
var col = db.collection('insert_many_with_generators');
var r = yield col.insertMany([{a:1}, {a:2}]);
test.equal(2, r.insertedCount);
// Finish up test
db.close();
});

ExpressJS and matching delayed responses with original requests

I am looking for a design pattern than can match a stream of answers with the original request and response objects.
Suppose I receive a web request for all dog pictures. I want to submit that request to a message queue so that a worker process can eventually handle it. When a worker machine grabs the dog picture request, it performs the work and submits the response to an answer queue which is being monitored by Express. As I process the incoming queue, I want to match up the dog picture response with the original request and response objects so I can return the dog list or process it further.
Two solutions occur to me, but each seems inelegant. I could keep a global reference to the original context, find it, then delete it from the global list.
Or I could create a subscription to the response queue and look for my answer among all the answers. This would work, but is brutally inefficient and its complexity rises geometrically. (10x10, 100x100, 1000x1000)
var express = require('express');
var app = express();
app.get('/doglist.txt', function(req, res){
putReqIntoQueue(req,res,"dogs");
});
var theRequests ={};
var i = 0;
var giveUpSecs = 60;
var putReqIntoQueue = function(req,res,payload) {
var index = 'index_'+i;
i++
var obj = {req:req,res:res,payload:payload,index:index}
theReqests[index] = obj;
var timeoutId = setTimeout(function(theIndex) {
theRequest[theIndex].res.send('timeout error');
delete theRequest[theIndex];
}(index),giveUpSecs*1000);
// insertIntoQueue(index,payload,timeoutId)
}
var onNewQueueResponse = function(index,payload,answer,timeoutId) {
clearTimeout(timeoutId);
if (index in theRequests) {
var obj = theRequests[index];
obj.res.send(payload);
delete theRequests[index];
} else {
// must have already timed out
}
}
// Queue("onNewMessage",onNewQueueResponse)
app.listen(3000);
console.log('Listening on port 3000');
This answer assumes some kind of queue that accepts work (insertIntoQueue) and then returns data when it is done through "onNewMessage" event. It times out after 60 seconds.

No blocking operation in MongoJS

I have to do an operation that calculate for me something, but I can't use the result of it, because I always stay in a wait state, in fact in my terminal remains in execution my program until I enter ctrl+C.
I have a main in nodejs for my program where I need to use my result calculated in a module.
var myJSONClient = {
"nombre" : "<nombre_cliente>",
"intervalo" : [0,0]
};
var intervalo = gestionar.gestion(myJSONClient,vector_intervalo);
console.log("intervalo: "+intervalo); //return undefined
And this is the module
var gestion = function(myJSON,vector_intervalo) {
var dburl = 'localhost/mongoapp';
var collection = ['clientes'];
var db = require('mongojs').connect(dburl, collection );
var intervalo_final;
function cliente(nombre, intervalo){
this.nombre = nombre;
this.intervalo = intervalo;
}
var cliente1 = new cliente(myJSON.nombre,myJSON.intervalo);
db.clientes.save(cliente1, function(err, saveCliente){
if (err || !saveCliente) console.log("Client "+cliente1.nombre+" not saved Error: "+err);
else console.log("Client "+saveCliente.nombre+" saved");
intervalo_final = calculate(vector_intervalo);
console.log(intervalo_final); //here I can see the right content of the variable intervalo_final
});
console.log(intervalo_final); //this is not executed
return intervalo_final;
}
exports.gestion = gestion;
Welcome to the async world! :)
First of all, you aren't doing blocking operations in Node. Actually, networking in Node is fully asynchronous.
The part you state the console.log works it's because the callback function of the db.clientes.save call. That callback states your mongo save has finished.
What asynchronous networking means?
It means that your save will be processed sometime in the future. The script will not wait for the response to continue the code. The console.log right after the save call will be executed soon as it's reached.
As for the "wait state" of your script, that it never ends, you should take a look at this question. There's the answer.

How to use filesystem's createReadStream with Meteor router(NodeJS)

I need to allow the user of my app to download a file with Meteor. Currently what I do is when the user requests to download a file I enter into a "fileRequests" collection in Mongo a document with the file location and a timestamp of the request and return the ID of the newly created request. When the client gets the new ID it imediately goes to mydomain.com/uploads/:id. I then use something like this to intercept the request before Meteor does:
var connect = Npm.require("connect");
var Fiber = Npm.require("fibers");
var path = Npm.require('path');
var fs = Npm.require("fs");
var mime = Npm.require("mime");
__meteor_bootstrap__.app
.use(connect.query())
.use(connect.bodyParser()) //I add this for file-uploading
.use(function (req, res, next) {
Fiber(function() {
if(req.method == "GET") {
// get the id here, and stream the file using fs.createReadStream();
}
next();
}).run();
});
I check to make sure the file request was made less than 5 seconds ago, and I immediately delete the request document after I've queried it.
This works, and is secure(enough) I think. No one can make a request without being logged in and 5 seconds is a pretty small window for someone to be able to highjack the created request URL but I just don't feel right with my solution. It feels dirty!
So I attempted to use Meteor-Router to accomplish the same thing. That way I can check if they're logged in correctly without doing the 5 second open to the world trickery.
So here's the code I wrote for that:
Meteor.Router.add('/uploads/:id', function(id) {
var path = Npm.require('path');
var fs = Npm.require("fs");
var mime = Npm.require("mime");
var res = this.response;
var file = FileSystem.findOne({ _id: id });
if(typeof file !== "undefined") {
var filename = path.basename(file.filePath);
var filePath = '/var/MeteorDMS/uploads/' + filename;
var stat = fs.statSync(filePath);
res.setHeader('Content-Disposition', 'attachment; filename=' + filename);
res.setHeader('Content-Type', mime.lookup(filePath));
res.setHeader('Content-Length', stat.size);
var filestream = fs.createReadStream(filePath);
filestream.pipe(res);
return;
}
});
This looks great, fits right in with the rest of the code and is easy to read, no hacking involved, BUT! It doesn't work! The browser spins and spins and never quite knows what to do. I have ZERO error messages coming up. I can keep using the app on other tabs. I don't know what it's doing, it never stops "loading". If I restart the server, I get a 0 byte file with all the correct headers, but I don't get the data.
Any help is greatly appreciated!!
EDIT:
After digging around a bit more, I noticed that trying to turn the response object into a JSON object results in a circular structure error.
Now the interesting thing about this is that when I listen to the filestream for the "data" event, and attempt to stringify the response object I don't get that error. But if I attempt to do the same thing in my first solution(listen to "data" and stringify the response) I get the error again.
So using the Meteor-Router solution something is happening to the response object. I also noticed that on the "data" event response.finished is flagged as true.
filestream.on('data', function(data) {
fs.writeFile('/var/MeteorDMS/afterData', JSON.stringify(res));
});
The Meteor router installs a middleware to do the routing. All Connect middleware either MUST call next() (exactly once) to indicate that the response is not yet settled or MUST settle the response by calling res.end() or by piping to the response. It is not allowed to do both.
I studied the source code of the middleware (see below). We see that we can return false to tell the middleware to call next(). This means we declare that this route did not settle the response and we would like to let other middleware do their work.
Or we can return a template name, a text, an array [status, text] or an array [status, headers, text], and the middleware will settle the response on our behalf by calling res.end() using the data we returned.
However, by piping to the response, we already settled the response. The Meteor router should not call next() nor res.end().
We solved the problem by forking the Meteor router and making a small change. We replaced the else in line 87 (after if (output === false)) by:
else if (typeof(output)!="undefined") {
See the commit with sha 8d8fc23d9c in my fork.
This way return; in the route method will tell the router to do nothing. Of course you already settled the response by piping to it.
Source code of the middleware as in the commit with sha f910a090ae:
// hook up the serving
__meteor_bootstrap__.app
.use(connect.query()) // <- XXX: we can probably assume accounts did this
.use(this._config.requestParser(this._config.bodyParser))
.use(function(req, res, next) {
// need to wrap in a fiber in case they do something async
// (e.g. in the database)
if(typeof(Fiber)=="undefined") Fiber = Npm.require('fibers');
Fiber(function() {
var output = Meteor.Router.match(req, res);
if (output === false) {
return next();
} else {
// parse out the various type of response we can have
// array can be
// [content], [status, content], [status, headers, content]
if (_.isArray(output)) {
// copy the array so we aren't actually modifying it!
output = output.slice(0);
if (output.length === 3) {
var headers = output.splice(1, 1)[0];
_.each(headers, function(value, key) {
res.setHeader(key, value);
});
}
if (output.length === 2) {
res.statusCode = output.shift();
}
output = output[0];
}
if (_.isNumber(output)) {
res.statusCode = output;
output = '';
}
return res.end(output);
}
}).run();
});

Resources