Node.js app continuously consume memory without freeing it - node.js

I'm facing a weird situation. I wrote an application that is performing a HTTP GET request every five minutes. Something like this:
// pingy
'use strict';
var https = require('https'),
url = require('url');
exports.ping = function ping (callback) {
var options = url.parse('https://host.tld/ping');
callback = callback || function () {};
https.get(options, function handler (response) {
var body = '';
response
.on('readable', function onReadable() {
body = body + response.read();
})
.on('end', function onEnd() {
return callback(null, body);
})
.on('error', function onError (err) {
return callback(err);
});
});
};
// in other module
var pingy = require('./lib/pingy');
setInterval(pingy.ping, 300000);
Pretty straightforward. The thing is that after some time, the "rss" from process.memoryUsage() climbs and climbs. Looks like that the created ClientRequest objects will never be GCed(). Although I'm using https here in this example, the same happens if using the http module.
Do you have any idea what is wrong here?
EDIT:
I've solved the problem above (see below in my comment). Now I'm facing a different problem, which is really, really hard to track down (used node-webkit-agent in order to analyze the memory usage, but nothing really special. The heap looks stable to me). The scenario is also nothing special I'm copying round about 200 images from source to dest via Streams (see code below). What happens is, that the "RSS" increases also. I'm pretty sure that there is something wrong with my code regarding how to pipe the files. Don't get me wrong I have no problem with a high memory usage. With what I have a problem is, that the memory never will be freed. In order to verify that the memory will be cleared in some point in the future, I start a http.createServer after every single file has been copied. Even after a couple of hours the "rss" value stays the same.
So, well, again, do you have any idea what is wrong here? Thanks in advance for every hint! :)
'use strict';
var http = require('http'),
fs = require('fs'),
path = require('path'),
util = require('util'),
directory = '/path/to/your/files/',
jobs = [];
function printMemoryUsage () {
var memory = process.memoryUsage();
memory.rss = (memory.rss / 1024) | 0;
memory.heapTotal = (memory.heapTotal / 1024) | 0;
memory.heapUsed = (memory.heapUsed / 1024) | 0;
console.log(JSON.stringify(memory));
}
function pipeFile() {
var infile = jobs.pop(),
outfile = jobs.pop(),
instream = fs.createReadStream(infile),
outstream = fs.createWriteStream(outfile);
instream.pipe(outstream);
instream.on('close', outstream.end.bind(outstream));
outstream.on('finish', function onFinish () {
// console.log('Finished %s -> %s', infile, outfile);
instream.destroy();
outstream.destroy();
next();
});
}
function next() {
if (jobs.length) {
setTimeout(pipeFile, 2000);
} else {
http.createServer(function (req, res) {
res.writeHead(200, {'Content-Type': 'text/plain'});
res.end('Fooboot\n');
}).listen(1337, '127.0.0.1');
}
}
fs.readdir(directory, function (err, files) {
files.forEach(function onIteration (file) {
jobs.push(path.join(__dirname, file)); // outfile
jobs.push(path.join(directory, file)); // infile
});
next();
});
setInterval(printMemoryUsage, 3000);
These are the memory footprints:
{"rss":13904,"heapTotal":6963,"heapUsed":1758}
{"rss":16012,"heapTotal":6963,"heapUsed":2016}
{"rss":26040,"heapTotal":6963,"heapUsed":2265}
{"rss":31468,"heapTotal":6963,"heapUsed":2453}
{"rss":41080,"heapTotal":6963,"heapUsed":2712}
{"rss":46620,"heapTotal":6963,"heapUsed":2844}
{"rss":49260,"heapTotal":6963,"heapUsed":1999}
{"rss":49524,"heapTotal":6963,"heapUsed":2249}
{"rss":49524,"heapTotal":6963,"heapUsed":2362}
{"rss":49788,"heapTotal":6963,"heapUsed":2621}
{"rss":49788,"heapTotal":6963,"heapUsed":2755}
{"rss":52692,"heapTotal":6963,"heapUsed":2001}
{"rss":52692,"heapTotal":6963,"heapUsed":2138}
{"rss":52692,"heapTotal":6963,"heapUsed":2270}
{"rss":52692,"heapTotal":6963,"heapUsed":2483}
{"rss":52692,"heapTotal":6963,"heapUsed":2600}
{"rss":52692,"heapTotal":6963,"heapUsed":2796}
{"rss":52956,"heapTotal":6963,"heapUsed":1951}
{"rss":52956,"heapTotal":6963,"heapUsed":2079}
{"rss":52956,"heapTotal":6963,"heapUsed":2343}
{"rss":52956,"heapTotal":6963,"heapUsed":2462}
{"rss":52956,"heapTotal":6963,"heapUsed":2689}
{"rss":52956,"heapTotal":6963,"heapUsed":2831}
{"rss":53136,"heapTotal":9011,"heapUsed":1927}
{"rss":53136,"heapTotal":9011,"heapUsed":2176}
{"rss":53136,"heapTotal":9011,"heapUsed":2273}
{"rss":53136,"heapTotal":9011,"heapUsed":2447}
{"rss":53136,"heapTotal":9011,"heapUsed":2545}
{"rss":53136,"heapTotal":9011,"heapUsed":2627}
{"rss":53136,"heapTotal":9011,"heapUsed":2804}
{"rss":53136,"heapTotal":9011,"heapUsed":2890}
{"rss":59732,"heapTotal":9011,"heapUsed":3100}
{"rss":65012,"heapTotal":9011,"heapUsed":3211}
{"rss":73496,"heapTotal":9011,"heapUsed":3409}
{"rss":79304,"heapTotal":9011,"heapUsed":3536}
{"rss":83792,"heapTotal":9011,"heapUsed":3633}
{"rss":95408,"heapTotal":9011,"heapUsed":3865}
{"rss":98840,"heapTotal":9011,"heapUsed":1824}
{"rss":98840,"heapTotal":9011,"heapUsed":2003}
{"rss":98840,"heapTotal":9011,"heapUsed":2205}
{"rss":98840,"heapTotal":9011,"heapUsed":2297}
{"rss":98840,"heapTotal":9011,"heapUsed":2491}
{"rss":98840,"heapTotal":9011,"heapUsed":2608}
{"rss":98840,"heapTotal":9011,"heapUsed":2717}
{"rss":98840,"heapTotal":9011,"heapUsed":2919}
{"rss":99368,"heapTotal":9011,"heapUsed":3036}
{"rss":99368,"heapTotal":9011,"heapUsed":3247}
{"rss":99632,"heapTotal":9011,"heapUsed":3351}
{"rss":99632,"heapTotal":9011,"heapUsed":3452}
{"rss":99896,"heapTotal":9011,"heapUsed":3606}
{"rss":99896,"heapTotal":9011,"heapUsed":3686}
{"rss":105968,"heapTotal":9011,"heapUsed":3824}
{"rss":106760,"heapTotal":9011,"heapUsed":1936}
{"rss":106760,"heapTotal":9011,"heapUsed":2022}
{"rss":106760,"heapTotal":9011,"heapUsed":2187}
{"rss":106760,"heapTotal":9011,"heapUsed":2279}
{"rss":106760,"heapTotal":9011,"heapUsed":2474}
{"rss":106760,"heapTotal":9011,"heapUsed":2614}
{"rss":106760,"heapTotal":9011,"heapUsed":2690}
{"rss":106760,"heapTotal":9011,"heapUsed":2854}
{"rss":106760,"heapTotal":9011,"heapUsed":2953}
{"rss":106760,"heapTotal":9011,"heapUsed":3241}
{"rss":106760,"heapTotal":9011,"heapUsed":3391}
{"rss":106760,"heapTotal":9011,"heapUsed":3535}
{"rss":107288,"heapTotal":9011,"heapUsed":3797}
{"rss":108248,"heapTotal":9011,"heapUsed":1908}

Related

How to disconnect a socket after streaming data?

I am making use of "socket.io-client" and "socket.io stream" to make a request and then stream some data. I have the following code that handles this logic
Client Server Logic
router.get('/writeData', function(req, res) {
var io = req.app.get('socketio');
var nameNodeSocket = io.connect(NAMENODE_ADDRESS, { reconnect: true });
var nameNodeData = {};
async.waterfall([
checkForDataNodes,
readFileFromS3
], function(err, result) {
if (err !== null) {
res.json(err);
}else{
res.json("Finished Writing to DN's");
}
});
function checkForDataNodes(cb) {
nameNodeSocket.on('nameNodeData', function(data) {
nameNodeData = data;
console.log(nameNodeData);
cb(null, nameNodeData);
});
if (nameNodeData.numDataNodes === 0) {
cb("No datanodes found");
}
}
function readFileFromS3(nameNodeData, cb) {
for (var i in nameNodeData['blockToDataNodes']) {
var IP = nameNodeData['blockToDataNodes'][i]['ipValue'];
var dataNodeSocket = io.connect('http://'+ IP +":5000");
var ss = require("socket.io-stream");
var stream = ss.createStream();
var byteStartRange = nameNodeData['blockToDataNodes'][i]['byteStart'];
var byteStopRange = nameNodeData['blockToDataNodes'][i]['byteStop'];
paramsWithRange['Range'] = "bytes=" + byteStartRange.toString() + "-" + byteStopRange.toString();
//var file = require('fs').createWriteStream('testFile' + i + '.txt');
var getFileName = nameNodeData['blockToDataNodes'][i]['key'].split('/');
var fileData = {
'mainFile': paramsWithRange['Key'].split('/')[1],
'blockName': getFileName[1]
};
ss(dataNodeSocket).emit('sendData', stream, fileData);
s3.getObject(paramsWithRange).createReadStream().pipe(stream);
//dataNodeSocket.disconnect();
}
cb(null);
}
});
Server Logic (that gets the data)
var dataNodeIO = require('socket.io')(server);
var ss = require("socket.io-stream");
dataNodeIO.on('connection', function(socket) {
console.log("Succesfully connected!");
ss(socket).on('sendData', function(stream, data) {
var IP = data['ipValue'];
var blockName = data['blockName'];
var mainFile = data['mainFile'];
dataNode.makeDir(mainFile);
dataNode.addToReport(mainFile, blockName);
stream.pipe(fs.createWriteStream(mainFile + '/' + blockName));
});
});
How can I properly disconnect the connections in function readFileFromS3. I have noticed using dataNodeSocket.disconnect() at the end does not work as I cannot verify the data was received on the 2nd server. But if I comment it out, I can see the data being streamed to the second server.
My objective is to close the connections in Client Server side
It appears that the main problem with closing the socket is that you weren't waiting for the stream to be done writing before trying to close the socket. So, because the writing is all asynchronous and finishes sometime later, you were trying to close the socket before the data had been written.
Also because you were putting asynchronous operations inside a for loop, you were also running all your operations in parallel which may not be exactly what you want as it makes error handling more difficult and server load more difficult.
Here's the code I would suggest that does the following:
Create a function streamFileFromS3() that streams a single file and returns a promise that will notify when it's done.
Use await in a for loop with that streamFileFromS3() to serialize the operations. You don't have to serialize them, but then you would have to change your error handling to figure out what to do if one errors while the others are already running and you'd have to be more careful about concurrency issues.
Use try/catch to catch any errors from streamFileFromS3().
Add error handling on the stream.
Change all occurrences of data['propertyName'] to data.propertyName. The only time you need to use brackets is if the property name contains a character that is not allowed in a Javascript identifier or if the property name is in a variable. Otherwise, the dot notation is preferred.
Add socket.io connection error handling logic for both socket.io connections.
Set returned status to 500 when there's an error processing the request
So, here's the code for that:
const ss = require("socket.io-stream");
router.get('/writeData', function(req, res) {
const io = req.app.get('socketio');
function streamFileFromS3(ip, data) {
return new Promise((resolve, reject) => {
const dataNodeSocket = io.connect(`http://${ip}:5000`);
dataNodeSocket.on('connect_error', reject);
dataNodeSocket.on('connect_timeout', () {
reject(new Error(`timeout connecting to http://${ip}:5000`));
});
dataNodeSocket.on('connection', () => {
// dataNodeSocket connected now
const stream = ss.createStream().on('error', reject);
paramsWithRange.Range = `bytes=${data.byteStart}-${data.byteStop}`;
const filename = data.key.split('/')[1];
const fileData = {
'mainFile': paramsWithRange.Key.split('/')[1],
'blockName': filename
};
ss(dataNodeSocket).emit('sendData', stream, fileData);
// get S3 data and pipe it to the socket.io stream
s3.getObject(paramsWithRange).createReadStream().on('error', reject).pipe(stream);
stream.on('close', () => {
dataNodeSocket.disconnect();
resolve();
});
});
});
}
function connectError(msg) {
res.status(500).send(`Error connecting to ${NAMENODE_ADDRESS}`);
}
const nameNodeSocket = io.connect(NAMENODE_ADDRESS, { reconnect: true });
nameNodeSocket.on('connect_error', connectError).on('connect_timeout', connectError);
nameNodeSocket.on('nameNodeData', async (nameNodeData) => {
try {
for (let item of nameNodeData.blockToDataNodes) {
await streamFileFromS3(item.ipValue, item);
}
res.json("Finished Writing to DN's");
} catch(e) {
res.status(500).json(e);
}
});
});
Other notes:
I don't know what paramsWithRange is as it is not declared here and when you were doing everything in parallel, it was getting shared among all the connections which is asking for a concurrency issue. In my serialized implementation, it's probably safe to share it, but the way it is now bothers me as it's a concurrency issue waiting to happen.

Node.JS redirect eval output to string

Can I redirect nodejs eval output to string or any other way?
edit: I need to return it as string as response for my web application (post data)
var http = require('http');
var server = http.createServer(function(req, res) {
var script = '';
if (req.method === 'POST' && req.url === '/doeval') {
req.on('data', function(chunk) {
script += chunk;
});
req.on('end', function() {
// script = 'console.log("aaaa")';
var result = eval(script);
res.write(String(result));
res.end();
});
}
}
}
server.listen(80);
output:
result: undefined
expected:
result: aaaa
Note: Running untrusted code is extremely dangerous and you should be extremely careful with whatever solution you are choosing. The one proposed here has flaws as well. To be safer, consider using a container, such as docker or lxc.
Don't use eval in this case. It's the wrong tool for the job. Do you really want arbitrary code to be evaled in your webserver context? What if the sent code is throw new Error('haha') or process.exit()? It's a huge security risk! Write the code to a temporary file, spawn a node process that executes the file and read its output. See https://nodejs.org/api/child_process.html .
Example:
var http = require('http');
var fs = require('fs');
var childProcess = require('child_process');
var server = http.createServer(function(req, res) {
var script = '';
if (req.method === 'POST' && req.url === '/doeval') {
req.on('data', function(chunk) {
script += chunk;
});
req.on('end', function() {
var tempFileName = Date.now() + '.js';
fs.writeFile(tempFileName, script, function(err) {
if (err) {
// handle error
}
child_process.execFile('node', [tempFileName], function(err, stdout, stderr) {
if (err) {
// handle err
}
// Delete tempfile...
res.write(stdout);
res.end();
});
});
});
}
}
}
server.listen(80);
There are existing npm packages that help creating and cleaning up temporary files. Also have a look at other methods of child_process.
But, this is not secure yet because the subprocess will run with the same privileges as your server (which by the looks of it runs as root :-/ ).
You should set the owner (and the group) of the file and the subprocess to nobody or some other system user that basically doesn't have any rights to access anything. Or chroot the subprocess.
Note: It was downvoted but read the entire answer before jumping to conclusions
First of all this looks like a completely insecure functionality that can potentially open your system to countless vulnerabilities. But it's an interesting question so I'll answer how you can do what you ask for, but I strongly suggest to reconsider your requirements nonetheless.
That having been said, you could pass a fake console object to you evaluated script, by wrapping it in a closure in a similar way like modules are wrapped when they are required.
Instead of eval you can use the vm module to run the script in a separate V8 context with no access file system or even require().
Note that I don't recommend saving it in a file and running it as a child process because that way the script will have access to your file system which is a serious vulnerability. The only option I would ever consider running untrusted code as a standalone process would be inside of a container that has no access to the network or any shared storage outside of that container.
With eval
For example:
const util = require('util');
const script = 'console.log("aaaa");';
let result = '';
const cons = {
log: (...args) => result += (util.format(...args) + '\n'),
};
eval(`((console) => { ${script} })`)(cons);
console.log('result:', result);
This will work if everything is synchronous. If the console.log happens asynchronously then you will have to add some way of waiting for the changes.
So this will not work:
const util = require('util');
const script = 'setTimeout(() => console.log("aaaa"), 1000);';
let result = '';
const cons = {
log: (...args) => result += (util.format(...args) + '\n'),
};
eval(`((console) => { ${script} })`)(cons);
console.log('result:', result);
but this will:
const util = require('util');
const script = 'setTimeout(() => console.log("aaaa"), 1000);';
let result = '';
const cons = {
log: (...args) => result += (util.format(...args) + '\n'),
};
eval(`((console) => { ${script} })`)(cons);
setTimeout(() => console.log('result:', result), 1500);
because it waits before inspecting the collected output longer than it takes the evaluated code to create that output.
Without eval
You can run that code in a separate V8 context that has no access to other modules, file system, etc. For example:
const vm = require('vm');
const util = require('util');
const script = 'console.log("aaaa");';
let result = '';
const cons = {
log: (...args) => result += (util.format(...args) + '\n'),
};
const context = vm.createContext({console: cons});
vm.runInContext(script, context);
console.log('result:', result);
Handling syntax errors
You can handle syntax errors to make sure that this script will not crash your application like this:
const vm = require('vm');
const util = require('util');
const script = 'console.lo("aaaa");';
let result = '';
const cons = {
log: (...args) => result += (util.format(...args) + '\n'),
};
const context = vm.createContext({console: cons});
try {
vm.runInContext(script, context);
console.log('result:', result);
} catch (err) {
console.log('error:', err.message);
}
Now instead of crashing it will print:
error: console.lo is not a function

Writing buffer response from resemble.js to file

I'm using node-resemble-js to compare two PNG images.
The comparison happens without issue and I get a successful/relevant response however I'm having trouble outputting the image diff.
var express = require('express');
var fs = require('fs');
var resemble = require('node-resemble-js');
var router = express.Router();
router.get('/compare', function(req, res, next) {
compareImages(res);
});
var compareImages = function (res) {
resemble.outputSettings({
largeImageThreshold: 0
});
var diff = resemble('1.png')
.compareTo('2.png')
.ignoreColors()
.onComplete(function(data){
console.log(data);
var png = data.getDiffImage();
fs.writeFile('diff.png', png.data, null, function (err) {
if (err) {
throw 'error writing file: ' + err;
}
console.log('file written');
});
res.render('compare');
});
};
module.exports = router;
It writes to diff.png as expected however it's not creating a valid image.
Any ideas where I'm going wrong? Feel like I'm pretty close but just unsure of final piece.
Thanks
Looks like there is a pack() method that needs to be called, which does some work and then streamifies the data. In that case you can buffer the stream and then call writeFile like this:
var png = data.getDiffImage();
var buf = new Buffer([])
var strm = png.pack()
strm.on('data', function (dat) {
buf = Buffer.concat([buf, dat])
})
strm.on('end', function() {
fs.writeFile('diff.png', buf, null, function (err) {
if (err) {
throw 'error writing file: ' + err;
}
console.log('file written');
})
})
or you can just pipe it like this, which is a little simpler:
png.pack().pipe(fs.createWriteStream('diff.png'))
Honestly, your approach made sense to me (grab the Buffer and write it) but I guess that data Buffer attached to what comes back from getDiffImage isn't really the final png. Seems like the docs are a bit thin but there's some info here: https://github.com/lksv/node-resemble.js/issues/4

Fetch 100 zip files in node

So I'm trying to fetch a bunch of files from a server. The current code is basically as follows.
var http = require('http');
var fs = require('fs');
var arr = [{id:'fileOne', id:'fileTwo', id:'fileThree',....];
function fetchData() {
for (var i = 0; i < arr.length; i++) {
var file = fs.createWriteStream("../path/file.zip");
var request = http.get("url/AFG_adm.zip", function(response) {
response.pipe(file);
});
}
}
I don't think this is the best approach, trying to figure out how to handle errors, how to make sure that a file gets loaded before the next iteration... Any help is much appreciated.
You should use the async module for handling the async part, also the request module will save you a lot of effort.
You can handle this in many ways using either async.cargo or async.map.
The theory is to group up things or a series of things, and then take action according to what you want it to do, but in async way.
so a basic .map of an array of files to download would be like this.
// required modules
var async = require('async');
var request = require('request');
// array of urls
var URLs = ['hxxp://...ZipFile1.zip', 'hxxp://...ZipFile2.zip'];
// destination directory
var destinationDirectory = 'downloads';
// asyncDownload function
function asyncDownload(url, callback) {
// get filename
var filename = url.substring(url.lastIndexOf(".") + 1);
// create write stream
var stream = fs.createWriteStream(destinationDirectory + "/" + filename);
// listen for open event to start request and pipe
stream.on('open', function () {
request(url).pipe(stream);
})
// when finish , call callback
stream.on('finish', function () {
callback(null, destinationDirectory + "/" + filename);
})
}
async.map(
URLs, asyncDownload, function (err, results) {
console.log(results);
});

RxJS + node.js HTTP server implementation?

I 've been well with node.js until RxJS implementation.
Here is my trial code studying-
Reactive-Extensions / rxjs-node
https://github.com/Reactive-Extensions/rxjs-node
rx_http.js
(RxJS wrapper of the http lib of node.js)
var Rx = require("./rx.min");
var http = require("http");
for(var k in http)
{
exports[k] = http[k];
}
exports.createServer = function ()
{
var subject = new Rx.AsyncSubject();
var observable = subject.asObservable();
observable.server = http.createServer(function (request, response)
{
subject.onNext({ request:request, response:response });
subject.onCompleted();
});
return observable;
};
server.js
var http = require('./rx_http');
// rxServer
var serverObservable = http.createServer();
var port = 3000;
serverObservable.server.listen(port);
console.log("Server listening on port: "+port);
// HTTP request event loop function
serverObservable.subscribe(function (data)
{
var req = data.request;
console.log(req.headers);
var res = data.response;
res.writeHead(200, {'Content-Type':"text/html"});
res.end("hello world");
console.log("res content out");
});
// exceptiopn
process.on('uncaughtException', function (err)
{
console.log(['Caught exception:', err.message].join(" "));
});
The code ends up with one-time 'hello world' output to browser, and the RxServer stops reacting to another access (brwoser reload etc.).
I'm on the way to learn RxJS thing, but few documentation found on the web.
Tell me what's wrong with the code, and if you know better implementations, please share.
Thank you.
Use Rx.Subject instead of Rx.AsyncSubject in rx_http.js.
AsyncSubject caches the last value of onNext() and propagates it to the all observers when completed. AsyncSubject
exports.createServer = function ()
{
var subject = new Rx.Subject();
var observable = subject.asObservable();
observable.server = http.createServer(function (request, response)
{
subject.onNext({ request:request, response:response });
});
return observable;
};
Calling oncompleted on the subject when the first request arrives ends the observable sequence. Could you please remove that line an try again.
I hope it helps.
Ahmet Ali Akkas

Resources