Express JS way to clear memory with out shutting down server? - node.js

I currently have an express js server running. The server has 4 gigs of memory. Not the whole server is allocated to run express so it shares with other processes.
server.js code is:
// server.js
var webshot = require('./lib/webshot');
var fs = require('fs');
var http = require('http');
var bodyParser = require('body-parser');
var express = require('express');
var app = express();
app.use( bodyParser.urlencoded() );
// your express configuration here
var httpServer = http.createServer(app);
// For http
httpServer.listen(8080);
app.post('/', function (req, res) {
console.log(req.body);
var firstLine = req.body.firstLine;
var secondLine = req.body.secondLine;
var previewID = req.body.previewId;
var productPlate = req.body.prodName;
res.header('Access-Control-Allow-Origin', 'https://citylocs.com');
res.header('Access-Control-Allow-Methods', 'GET, POST, PUT');
res.header('Access-Control-Allow-Headers', 'X-Requested-With, Content-Type');
takeWebshot(firstLine, secondLine, previewID, productPlate)
.then(() => console.log("a process exit here kills the server"))
res.end()
});
function takeWebshot(fLine, sLine, pID, prodPlate) {
var options = {
takeShotOnCallback: true,
captureSelector: '#img_preview_fancybox',
licensePlate: 'Guzman Plate'
};
return new Promise((resolve, reject) => {
webshot('example.com/preview/productpreview/testy.html?prod=' + prodPlate, '../screenshot/' + pID +'.png', options, function(err) {
if(!err) {
resolve();
} else {
reject(err);
}
})
})
const used = process.memoryUsage();
console.log(used);
};
In the above webshot goes to a url takes a screenshot and puts it in a file in my server.
I get the following results:
{ rss: 29876224,
heapTotal: 18694144,
heapUsed: 12250200,
external: 390493 }
the memoryUsage usually returns about the same on multiple runs.
The server takes a request and then executes the code runs takewebshot() and then it completes. My problem is after about 20 minutes of this the server crashes and has to be restarted, it runs out of memory and gives me an error. Is there anything I can add to "clear memory" cache? Quitting the process is not a solution cause then I have to restart the server.js file again. I would like to avoid a reset of server.js.

Related

Can't kill a specific worker in Node JS cluster

I'm using node + express as my web server, and has a cluster of 4 workers.
I tried several ways to deliberately kill a worker:
process.exit() in a controller, and triggered using a browser action. Thought this is just for a single worker process, but turned out all workers were killed.
Again in a controller, I let a worker send suicide announcement to the master:
process.send('suicide');
and here goes my master process:
if (cluster.isMaster) {
console.log(`Master cluster setting up ${numWorkers} workers...`);
for (let i = 0; i < numWorkers; i++) {
const worker = cluster.fork();
worker.on('message', msg => {
console.log(worker.process.pid + ' wants to suicide');
worker.kill();
process.kill(worker.process.pid);
});
}
}
It turned out, worker.kill() doesn't affect at all, and process.kill(worker.process.pid); killing all 4 workers again. Also, the console.log appeared 4 times which I don't understand. I used a browser to trigger some action that hence triggers the suicide announcement, shouldn't this be a single worker's behavior?
I'm also using WebSockets in the projects and keeps a connection, don't know if this matters. Any help is appreciated!
EDIT:
Thanks for #Mia I found the reason: when I put process.exit() in the else statement(when cluster.isWorker) it works fine, but when put in a specific controller, it turns out to affect all the workers. Don't know how to solve yet. Shouldn't the controller affect only one specific worker?
I am sorry to write here, I am unable to comment due to my reputation.
I have written like the following and it works fine.
first clusters are created and open servers, whenever a cluster gets a request app.get("/"), 2 seconds later it exits by process.exit method.
you can confirm that this works well by printing out remaining workers.
'use strict';
const cluster = require('cluster');
const os = require('os');
if (cluster.isMaster) {
const cpus = os.cpus().length;
for (let i = 0; i < cpus; i++) {
cluster.fork();
}
cluster.on('exit', (worker, code) => {
console.log(`${worker.process.pid} is killed`);
console.log("remaining wokers");
const workers = Object.keys(cluster.workers);
for(let worker of workers){
console.log(cluster.workers[worker].process.pid);
}
});
} else {
console.log("process id ",process.pid);
const express = require("express");
const app = express();
const path = require("path");
const fs = require("fs");
const http = require('http').Server(app);
const io = require('socket.io')(http);
const bodyParser = require('body-parser');
app.use(bodyParser.urlencoded({ extended: false }))
app.use(bodyParser.json())
app.use(express.static('public'));
app.get('/',function(req,res){
console.log("connected via" , process.pid);
setTimeout(()=>{
process.exit();
},2000)
res.sendFile(path.join(__dirname+"/public/view/index.html"));
});
http.listen(3000);
}
----------------------------code separated into modules---------------
Root (server.js)
'use strict';
const cluster = require('cluster');
const os = require('os');
if (cluster.isMaster) {
const cpus = os.cpus().length;
for (let i = 0; i < cpus; i++) {
cluster.fork();
}
cluster.on('exit', (worker, code) => {
console.log(`${worker.process.pid} is killed`);
console.log("remaining wokers");
const workers = Object.keys(cluster.workers);
for(let worker of workers){
console.log(cluster.workers[worker].process.pid);
}
});
} else {
console.log("process id ",process.pid);
const express = require('./modules/express');
const http = require('http').Server(express);
http.listen(3000);
}
Express
const express = require("express");
const app = express();
const bodyParser = require('body-parser');
const route = require('../route');
app.use(bodyParser.urlencoded({ extended: false }))
app.use(bodyParser.json())
app.use(express.static('public'));
route(app);
module.exports = app;
Route (index.js)
const HomeController = require('../controllers/HomeController');
module.exports = function(app){
app.get('/',HomeController.renderIndex);
app.get('/killProcess',HomeController.killProcess);
}
HomeController
const path = require('path');
exports.renderIndex = function(req,res){
console.log("connected via" , process.pid);
res.sendFile(path.join(__dirname+"/../public/view/index.html"));
}
exports.killProcess = function(req,res){
res.write(`current process ${process.pid} is killed`);
res.end();
process.kill(process.pid);
}
Client side (browser)
$("#kill").on("click",function(e){
$.ajax({
url:"/killProcess"
}).success(function(data){
console.log(data);
})
})

Insert a large csv file, 200'000 rows+, into MongoDB in NodeJS

I'm trying to parse and insert a big csv file into MongoDB but when the file extends 100'000 rows I get a bad response from the server. And the files I need to insert are usually above 200'000 rows.
I've tried both bulk insert (insertMany) and Babyparse(Papaparse) streaming approach to insert the file row by row. But with poor results.
Node api:
router.post('/csv-upload/:id', multipartMiddleware, function(req, res) {
// Post vartiables
var fileId = req.params.id;
var csv = req.files.files.path;
// create a queue object with concurrency 5
var q = async.queue(function(row, callback) {
var entry = new Entry(row);
entry.save();
callback();
}, 5);
baby.parseFiles(csv, {
header: true, // Includes header in JSON
skipEmptyLines: true,
fastMode: true,
step: function(results, parser) {
results.data[0].id = fileId;
q.push(results.data[0], function (err) {
if (err) {throw err};
});
},
complete: function(results, file) {
console.log("Parsing complete:", results, file);
q.drain = function() {
console.log('All items have been processed');
res.send("Completed!");
};
}
});
});
This streaming approach results in: POST SERVER net::ERR_EMPTY_RESPONSE
Not sure if I'm using the async.queue correctly though.
Is there a better and more efficient way to do this OR am I doing something wrong?
Express Server:
// Dependencies
var express = require('express');
var path = require('path');
var bodyParser = require('body-parser');
var routes = require('./server/routes');
var mongoose = require("mongoose");
var babel = require("babel-core/register");
var compression = require('compression');
var PORT = process.env.PORT || 3000;
// Include the cluster module
var cluster = require('cluster');
mongoose.connect(process.env.MONGOLAB_URI || 'mongodb://localhost/routes');
// Code to run if we're in the master process
if (cluster.isMaster) {
// Count the machine's CPUs
var cpuCount = require('os').cpus().length;
// Create a worker for each CPU
for (var i = 0; i < cpuCount; i += 1) {
cluster.fork();
}
// Code to run if we're in a worker process
} else {
// Express
var app = express();
app.use(bodyParser.json({limit: '50mb'}));
app.use(bodyParser.urlencoded({limit: '50mb', extended: true}));
// Compress responses
app.use(compression());
// Used for production build
app.use(express.static(path.join(__dirname, 'public')));
routes(app);
// Routes
app.use('/api', require('./server/routes/api'));
app.all('/*', function(req, res) {
res.sendFile(path.join(__dirname, 'public/index.html'));
});
// Start server
app.listen(PORT, function() {
console.log('Server ' + cluster.worker.id + ' running on ' + PORT);
});
}
Handling the import:
Great question, from my experience by far the fastest way to insert a csv into mongo is via the command line:
mongoimport -d db_name -c collection_name --type csv --file file.csv --headerline
I don't believe mongoose has a way of calling mongoimport (someone correct me if I'm wrong)
But it's simple enough to call via node directly:
var exec = require('child_process').exec;
var cmd = 'mongoimport -d db_name -c collection_name --type csv --file file.csv --headerline';
exec(cmd, function(error, stdout, stderr) {
// do whatever you need during the callback
});
The above will have to be modified to be dynamic, but it should be self-explanatory.
Handling the upload:
Uploading the file from a front-end client is another challenge.
Most browsers will timeout if you make a request to a server and don't get a response within 60 seconds (probably what you are referring to above)
One solution would be to open a socket connection (search for socket.io in npm) for details. This will create a constant connection to the server and won't be subject to the timeout restrictions.
If uploading is not an issue, and the timeout is due to the slow parsing/inserting then you may not have to worry about this once you implement the above.
Other considerations:
I'm not sure exactly what you need to send back to the user, or what parsing needs to take place. But that can either be done outside of the normal request/response cycle, or can be handled during a socket connection if it's needed during one request/response cycle.

Express.js/Node.js Responses Before Query Finishes

I have a local server that is fully functioning when POSTs and GETs are sent from Postman or chrome rest apps etc. However, whenever I send it from an ESP8266 it fails as it follows:
This is my server side code:
router.get('/', function (req, res) {
var person = req.query.id;
console.log("1");
Person.find({person: personID} ,function (err, station) {
if(err) console.log(err.message);
console.log("3");
res.send('test');
console.log("4");
});
console.log("2");
});
Lines below are from console when request is sent from Chrome.
1
2
3
4
GET /api?id=ABKWTltz6Ug 200 21.829 ms - -
And the lines below are from console when request is sent from ESP8266.
1
2
GET /api?id=ABKWTltz6Ug - - ms - -
3
4
As you may notice logs 3 and 4 are after GET response line. My ESP8266 TCP connection lines match the HTTP Message Format:
GET /api HTTP/1.1
Host: *SERVER_IP_ADDRESS*
The problem raised when we switched from PHP to NodeJS backend server. ESP8266 used to connect and operate without problems back then. And FYI, everything also works fine if I remove the Person.find code block and move the res.send('test'); outside.
Another freshly written backend code to hopefully spot the error is below. This is the whole code. Which also returns the same error I've mentioned earlier above:
var express = require('express');
var app = express();
var router = express.Router();
var mongoose = require('mongoose');
var morgan = require('morgan');
app.use(morgan('dev'));
mongoose.connect('mongodb://localhost/test');
//mongoose.Promise = Promise;
var Test = mongoose.model('Test', { name: String });
router.get('/test', function (req, res) {
console.log("1");
var test1 = new Test({
name: "Name"
});
var query = test1.save();
query.then(function (doc) {
res.send('1').end();
console.log('3');
});
console.log("2");
});
app.use('/api', router);
var port = 9000;
app.listen(port,function(){
console.log('server started on', port);
});

Nodejs - BrowserSync running on express server

I want to pull a URL from the DB and use it as the proxied URL. However the setup I've come up with initializes a new BrowserSync server for each URL, using incrementing port numbers.
Is there a way to accomplish this without initializing a new BrowserSync server every time?
Or should I be using another approach?
var bs = require("browser-sync");
var express = require("express");
var router = express.Router();
var app = express();
router.get("/", function(req, res){
var proxyUrl = getUrl() //get url from db (www.example.com)
bs.create("bs1").init({
notify: false,
open: false,
ui: false,
port: 10000,
proxy: proxyUrl
});
res.send();
});
app.use(router);
app.listen(8080, function(){
console.log('listening on *:8080');
});
The above is fine(ish) but is it good practice to be initializing a new server for every URL (potentially thousands)?
And is it safe to be exposing a new port number to every user of the system? (Can I mask this with a subdomain?)
Update
My end goal is to use a unique subdomain to refer to each proxy url.
For example:
sub1.mysite.com proxies www.example.com,
sub2.mysite.com proxies www.example2.com
Browser-sync will not work as the proxy is tie to server setup.
I use following packages:
express
express-http-proxy
vhost (express vhost)
const port = 8080;
var app = require('express')();
var proxy = require('express-http-proxy');
var url = require('url');
var vhost = require('vhost');
app.listen(port);
/* Assuming getUrl() will return an array of sites */
// var sites = getUrl();
// DO NOT put '/' at the end of site
var sites = [
'http://www.bing.com',
'http://samanthagooden.com',
'http://www.courtleigh.com'
];
var i = 0;
sites.forEach(site => {
i++;
var subDomain = 'sub' + i + '.mysite.com';
app.use(vhost(subDomain, proxy(site, {
forwardPath: (req, res) => url.parse(req.url).path,
intercept: (rsp, data, req, res, callback) => {
if (res._headers['content-type']) {
var contentType = res._headers['content-type'];
if (
contentType.indexOf('text') !== -1 ||
contentType.indexOf('javascript') !== -1
) {
// Replace link if content-type = text or javascript
var reg = new RegExp(site, 'g');
res.send(data.toString().replace(reg, ''));
} else {
res.send(data);
}
} else {
res.send(data);
}
}
})));
console.log(subDomain + ':' + port + ' proxy: ' + site);
});
The above example will create following proxies:
sub1.mysite.com:8080 proxy: www.bing.com
sub2.mysite.com:8080 proxy: www.example.com
Maybe I'm misunderstanding what you are trying to do, but Browsersync and express seems a bit overkill in this case, why not just use node-http-proxy with the native http module?
var http = require('http')
var httpProxy = require('http-proxy')
var options = ...
var proxy = httpProxy.createProxyServer(options)
var server = http.createServer(function (req, res) {
var proxyUrl = getUrl()
proxy.web(req, res, { target: proxyUrl })
})
server.listen(8080, function () {
console.log('listening on *:8080')
})
As per me If you want SAAS service using proxy is not the good idea to go is what am thinking.. if you are going with proxy for each client will create process with new port... My Solution is to create node server with listen localhost and map *.domain.com to the server..
If you are using individual database for each client :-
in node logic get cname from request host and use that reference to connect database.
Final Controller code would be..
var express = require('express');
var router = express.Router();
var MongoClient = require('mongodb').MongoClient;
/* GET home page. */
router.get('/', function(req, res, next) {
var client = req.subdomains[0];
console.log(client);
MongoClient.connect('mongodb://localhost:27017/'+client, function(err, db) {
if (err) {
throw err;
}
db.collection('app1').find().toArray(function(err, result) {
if (err) {
throw err;
}
console.log('data');
console.log(result);
});
});
res.render('index', { title: 'Express' });
});
module.exports = router;
~
~
In future if you get more clients you can implement node cluster or standard Ubuntu cluster using webservice

not able to export variable( var app = express()) in nodejs using express

I need help in trying to solve this scenario
I have a file web.js. Over there I have
var express = require("express");
var app = express();
var web2 = require("./web2");
/* Code the start the server on the required port*/
app.get('/param1', function(req, res){
console.log("INSIDE GET METHOD OF WEB.JS");
});
module.exports.app = app
I have another file web2.js. over there I have
var web = require("./web");
app = web.app;
app.get('/param2', function(req, res){
console.log("INSIDE GET METHOD OF WEB2.JS");
});
While starting I get an error
TypeError: Cannot call method 'post' of undefined
If I remove the line 3 from web.js -- I am able to start the server, but a request for http:///param2 gives a 404
Updated scenario:
I am using pg database and I try to create a client that keeps an instance of the client(in web.js). I then pass this to other file(web2.js). In web.js I always get this client as null
in web.js I have the following code
var pg = require("pg");
var pgclient;
app.get('*', function(req,res,next){
pg.connect(process.env.DATABASE_URL, function(err, client, done) {
if(client != null){
pgclient = client;
console.log("Client connection with Postgres DB is established");
next();
}
}
}
require("./web2.js")(app, pgclient);
in web2.js, I have the following code
module.exports = function(app, pgclient){
app.get('/param1', function(req,res){
if(pgclient != null){
}
else{
res.send(500, "pgclient is NULL");
}
});
}
The code never reaches the if block(if(pgclient != null)) in web2.js
The problem is the cyclic dependency between web.js and web2.js. When web2.js requires web.js, web.js's module.exports hasn't been set yet. I would rather do something like this:
web.js
var express = require("express");
var app = express();
app.get("/param1", function (req, res) {
// ...
});
require("./web2")(app);
app.listen(/* port number */);
web2.js
module.exports = function (app) {
app.get("/param2", function (req, res) {
// ...
});
};

Resources