I have a node.js script that continuously requests a page, sort of like a cron job.
However, after a few minutes Node starts to use a lot of CPU (up to 70%) and memory (up to 200mb).
What is wrong with my script?
function cron(path)
{
var http = require('http');
var site = http.createClient(443, 'www.website.com', true);
var request = site.request('GET', path, {'host': 'www.website.com'});
request.end();
request.on('response', function (response) {
setTimeout(function(){cron(path)},15000);
});
}
cron('/path/to/page');
request.on('response', function (response) {
setTimeout(function(){cron(path)},15000);
});
For every response you create a new cron job. Log your responses. If your getting more then 1 from your request then your exponantially creating more cron jobs.
Your creating a function() {} with a reference to path. So the entire scope state is kept. you want to free memory by adding this:
var site = null;
var request = null;
Your calling require("http") inside a function rather then outside in module scope. You only need to get http once so place at the top of your file in module scope.
var http = require('http');
var site = http.createClient(443, 'www.website.com', true);
function cron(path)
{
var request = site.request('GET', path, {'host': 'www.website.com'});
request.end();
var once = true;
request.on('response', doIt);
function doIt(response) {
if (!once) {
once = null;
doIt = function() {};
setTimeout(function(){cron(path)},15000);
}
});
site = null;
request = null;
}
cron('/path/to/page');
In addition to the tips from #Raynos, here's another. I find that recursive calls like this in long running processes make me a bit nervous so I'd err on the side of using setInterval instead. I'd maybe split the cron and the http behaviour apart in case you want to try and re-use that logic, although that'll depend on your context:
e.g. in node 0.4.7:
var https = require('https');
function poll(path)
{
https.get({
host: 'www.website.com',
port: 443,
path: path
}, function(res) {
console.log("Got response: " + res.statusCode);
}).on('error', function(e) {
console.log("Got error: " + e.message);
});
}
function cron(path)
{
return setInterval(function(){
poll(path);
},15000);
}
var intervalId = cron('/path/to/page'); // keep in case you need to use clearInterval
Related
I am trying to implement the ._read function of a readable stream, a problem happens when ._read is called and there isn't data, the documentation says that I can push('') until more data comes, and I should only return false when the stream will never have more data.
https://nodejs.org/api/stream.html#stream_readable_read_size_1
But it also says that if I need to do that then something is terribly wrong with my design.
https://nodejs.org/api/stream.html#stream_stream_push
But I can't find an alternative to that.
code:
var http = require('http');
var https = require('https');
var Readable = require('stream').Readable;
var router = require('express').Router();
var buffer = [];
router.post('/', function(clientRequest, clientResponse) {
var delayedMSStream = new Readable;
delayedMSStream._read = function() {
var a=buffer.shift();
if(typeof a === 'undefined'){
this.push('');
return true;
}
else {
this.push(a);
if(a===null) {
return false;
}
return true;
}
};
//I need to get a url from example.com
https.request({hostname:'example.com'}, function(exampleResponse){
data='';
exampleResponse.on('data',function(chunk){data+=chunk});
exampleResponse.on('end',function(){
var MSRequestOptions = {hostname: data, method: 'POST'};
var MSRequest = https.request(MSRequestOptions, function(MSResponse){
MSResponse.on('end', function () {
console.log("MSResponse.on(end)");//>>>
});//end MSResponse.on(end)
}); //end MSRequest
delayedMSStream.pipe(MSRequest);
});
});
clientRequest.on('data', function (chunk) {
buffer.push(chunk);
});
clientRequest.on('end', function () {//when done streaming audio
buffer.push(null);
});
});//end router.post('/')
explanation:
client sends a POST request streaming audio to my server, my server requests a url from example.com, when example.com responds with the url, my server streams the audio to it.
What's a smarter way to do it?
So if I undertstand the code correctly, you:
receive a request,
make your own request to a remote endpoint and fetch a URL
make a new request to that URL and pipe that to original response.
There are ways to do this other then yours, and even your way would look cleaner to me if you just improve the naming a bit. Also, splitting the huge request into a few functions with smaller responsibility scopes might help.
I would make the endpoint this way:
let http = require('http');
let https = require('https');
let Readable = require('stream').Readable;
let router = require('express').Router();
let buffer = [];
/**
* Gets some data from a remote host. Calls back when done.
* We cannot pipe this directly into your stream chain as we need the complete data to get the end result.
*/
function getHostname(cb) {
https.request({
hostname: 'example.com'
}, function(response) {
let data = '';
response.on('error', err => cb(err)); // shortened for brewity
response.on('data', function(chunk) {
data = data + chunk;
});
response.on('end', function() {
// we're done here.
cb(null, data.toString());
});
});
}
router.post('/', function(request, response) {
// first let's get that url.
getHostname(function(err, hostname) {
if (err) { return response.status(500).end(); }
// now make that other request which we can stream.
https.request({
hostname: hostname,
method: 'POST'
}, function(dataStream) {
dataStream.pipe(response);
});
});
});
Now, as said in the comments, with streams2, you don't have to manage your streams. With node versions pre 0.10 you have had to listen to 'read', 'data' etc events, with newer node versions, it's handled. Furthermore, you don't even need it here, streams are smart enough to handle backpressure on their own.
I'm beginning work on my first Node.js application and running into memory leak issues that i cannot pin down. I want the app to act as service in a way that continually runs and polls and endpoint. I think I may be missing something here. The idea of the project is to have a node application continuously make http requests to an Arduino board i have connected to my network with a web server on it. The networked board responds to the requests with some JSON or XML representing the state of the sensors attached to it. The idea of the node app is to log then emit sensor changes that will eventually be consumed by another electron project.
The node app is currently broken into a couple modules:
proxy: make the http calls to the different endpoints on arduino web server:
var http = require('http'),
KeepAliveAgent = require('keep-alive-agent');
var controllerRequestOpts = {};
function send(path, cb){
var response = '';
//build the request
var request = controllerRequestOpts;
request.path = path;
//make the call to the controller
http.get(request, function(res){
res.on('data', function(chunk){
response += chunk;
});
res.on('end', function(){
cb(null, response);
});
})
.on('error',function(e){
cb(e, null);
});
}
module.exports = function(controllerOptions){
controllerOptions.port = controllerOptions.port || 2222;
controllerRequestOpts = controllerOptions;
controllerRequestOpts.agent = new KeepAliveAgent();
return{
//JSON
queryJson: function(cb){
send('/json', cb);
},
//XML
queryXml: function(cb){
send('/xml', cb);
}
//Additional endpoints
}
}
runner: Loop forever with the interval provided making the proxy calls ot the arduino
var proxy = require('proxy');
var Watcher = require('./watcher');
var timer;
var toUpdate;
function initProxy(controllerParams){
proxy = proxy(controllerParams);
Watcher = new Watcher();
}
function startListening(startOptions){
var query;
//determine the query and callback functions based off configuration
query = startOptions.queryType === 'json'
? proxy.queryJson
: proxy.queryXml;
toUpdate = startOptions.queryType === 'json'
? Watcher.updateLatestJson
: Watcher.updateLatestXml;
//Start running and making requests every 15 seconds
timer = setInterval(function(){
query(handleResponse);
},startOptions.queryInterval);
}
function handleResponse(err, resp){
if(err){
console.log('ERROR: ' + err);
}
else{
toUpdate.call(Watcher, resp);
}
}
function stopListening(){
clearInterval(timer);
process.exit();
}
var runner = {
connect: function(controllerParams){
initProxy(controllerParams);
},
start: function(startOptions){
startListening(startOptions);
return Watcher;
},
stop: function(){
stopListening();
}
};
module.exports = runner;
I have a "Watcher" module which is just a constructor function that emits the changes back to the calling app which looks like:
var runner = require('./index');
var controllerSettings = {
hostname: '192.168.1.150',
port:2222
}
var startOptions = {
queryType: 'json',
queryInterval: 15000
}
runner.connect(controllerSettings);
var watcher = runner.start(startOptions);
watcher.on('P1Changed', printParams)
Everything is working as expected but as the app runs over time the memory usage for node process constantly increases. I'm wondering if i'm using either the http module incorrectly or if the runner shouldn't be doing a setInterval perhaps. Is there a standard way to run this kind of app as a 'service' and not so much as a 'server'
Continuously send multiple HTTP requests will cause node to create huge TLSWrap objects that the GC will not be able to clear for several minutes.
If you wish to send data to the same host(s) over and over again, you need to open a TCP connection (stream) rather than use HTTP requests that have huge overhead.
I need to connect to a web page and return the status code of the page, which I've been able to achieve using http.request however the pages I need to request can take a long time, sometimes several minutes, so I'm always getting a socket hang up error.
I'm using the following code so far:
var reqPage = function(urlString, cb) {
// Resolve the URL
var path = url.parse(urlString);
var req = http.request({
host: path.hostname,
path: path.pathname,
port: 80,
method: 'GET'
});
req.on('end', function() {
cb.call(this, res);
});
req.on('error', function(e) {
winston.error(e.message);
});
};
What do I need to do to ensure that my application still attempts to connect to the page even if it's going to take a few minutes?
Use the request module and set the timeout option to an appropriate value (in milliseconds)
var request = require('request')
var url = 'http://www.google.com' // input your url here
// use a timeout value of 10 seconds
var timeoutInMilliseconds = 10*1000
var opts = {
url: url,
timeout: timeoutInMilliseconds
}
request(opts, function (err, res, body) {
if (err) {
console.dir(err)
return
}
var statusCode = res.statusCode
console.log('status code: ' + statusCode)
})
Add this if you don't want to use a higher level http client like request or superagent , then add this...
req.on("connection", function(socket){
socket.setTimeout((1000*60*5)); //5 mins
});
Here is the thing :
I have a client which sends data to a server. This server has to contact an external A.P.I. and send back its response to the client. I just can't figure out how and where I can contact the external A.P.I once the server has got the client data.
I route client data like this :
app.post('/getAutoComplete', routes.read);
routes.read retrieves the data within req.body. With my nodejs version (without express framework), I then request the api this way :
var http = require('http'), options = {
host : "192.168.1.38",
port : 8080,
path : "/myURL",
method : 'POST'
};
var webservice_data = "";
var webservice_request = http.request(options, function(webservice_response)
{
webservice_response.on('error', function(e){ console.log(e.message); });
webservice_response.on('data', function(chunk){ webservice_data += chunk;});
webservice_response.on('end', function(){res.send(webservice_data);});
});
webservice_request.write(req.body);
webservice_request.end();
The problem is that i'd like to use native expressJS method like app.post but I don't know how because :
Express (app) object is not available here (declared in app.js but not in the route file)
I don't know how to send POST data with app.post
Any suggestion ?
app.post('/getAutoComplete', routes.read);
// assuming routes.read lookes something like this
routes.read = function read(req, res) {
var http = require('http'), options = {
host : "192.168.1.38",
port : 8080,
path : "/myURL",
method : 'POST'
};
var webservice_data = "";
var webservice_request = http.request(options, function(webservice_response)
{
webservice_response.on('error', function(e){ console.log(e.message); });
webservice_response.on('data', function(chunk){ webservice_data += chunk;});
webservice_response.on('end', function(){res.send(webservice_data);});
});
webservice_request.write(req.body);
webservice_request.end();
};
Also check out https://github.com/mikeal/request It's the de-facto module for doing web requests in node.
routes.read is a function. You can call it with extra parameters, so for example
app.post('/getAutoComplete', function(req,res) {
var q = req.query.q; // or whatever data you need
routes.read(q, function(err, response) {
if (err) throw err;
return res.json(response);
});
});
Now make the routes.read function use the first parameter as the query and when it's gathered the response from the remote API, call the second parameter with any error as the first parameter and the response as the second one.
Update This answer has already been picked as an answer, but it'd be more helpful if I showed an example of routes.read, too:
routes.read = function(q, cb) {
// pretend we calculate the result
var result = q * 10;
if (result > 100) {
// call the callback with error set
return cb("q value too high");
}
// all is well, use setTimeout to demonstrate
// an asynchronous return
setTimeout(function() { cb(null, result) }, 2000);
};
app.get('/', function(req, res){
var options = {
host: 'www.google.com'
};
http.get(options, function(http_res) {
http_res.on('data', function (chunk) {
res.send('BODY: ' + chunk);
});
res.end("");
});
});
I am trying to download google.com homepage, and reprint it, but I get an "Can't use mutable header APIs after sent." error
Anyone know why? or how to make http call?
Check out the example here on the node.js doc.
The method http.get is a convenience method, it handles a lot of basic stuff for a GET request, which usually has no body to it. Below is a sample of how to make a simple HTTP GET request.
var http = require("http");
var options = {
host: 'www.google.com'
};
http.get(options, function (http_res) {
// initialize the container for our data
var data = "";
// this event fires many times, each time collecting another piece of the response
http_res.on("data", function (chunk) {
// append this chunk to our growing `data` var
data += chunk;
});
// this event fires *one* time, after all the `data` events/chunks have been gathered
http_res.on("end", function () {
// you can use res.send instead of console.log to output via express
console.log(data);
});
});