Why are my requests being paused when they're in a loop? - node.js

I am trying to test some some examples from the book I'm reading, "Learning Node 2012". And my application for testing the server by doing 2000 requests is pausing. The tester pauses after 5 requests and sends another 5 after a certain interval. Why is it pausing? How can I fix this?
The server code:
var http = require('http');
var fs = require('fs');
// write out numbers
var counter = 0;
function writeNumbers(res)
{
for (var i = 0; i < 100; i++)
{
counter++;
res.write(counter.toString() + '\n');
}
}
// create the http server
http.createServer( function(req, res) {
var query = require('url').parse(req.url).query;
var app = require('querystring').parse(query).file + ".txt";
// content header
res.writeHead(200, { 'Content-Type': 'text/plain' } );
// write out numbers
writeNumbers(res);
// timer to open file and read contents
setTimeout(function() {
console.log('opening ' + app);
// open and read in file contents
fs.readFile(app, 'utf8', function(err, data) {
if (err)
res.write('Could not find or open file for reading\n');
else
res.write(data);
res.end();
});
}, 2000);
}).listen(3000);
console.log('Server is running on port 3000');
The spam test code:
var http = require('http');
// the url we want, plus the path and options we need
var options = {
host: 'localhost',
port: 3000,
path: '/?file=secondary',
method: 'GET'
};
var processPublicTimeline = function(response) {
// finished? ok, write the data to a file
console.log('finished request');
};
for (var i = 0; i < 2000; i++)
{
// make the request, and then end it, to close the connection
http.request(options, processPublicTimeline).end();
}

While this definitely does have some relation to Why is node.js only processing six requests at a time?
It is also because you are using a timeOut to call res.end() to close the connection/respond, and thus move onto the next connection in queue.
You should instead think about these types of things aynchronously, without the use of timeOuts, but instead with callBacks.
So your code for your two main blocks could be more like:
var counter = 0;
function writeNumbers(res, callBack){
// notice callBack argument
for (var i = 0; i < 100; i++){
counter++;
res.write(counter.toString() + '\n');
}
// execute callBack (if it exists)
if(callBack && typeof callBack === "function") callBack();
}
http.createServer( function (req, res){
var query = require('url').parse(req.url).query;
var app = require('querystring').parse(query).file + ".txt";
res.writeHead(200, { 'Content-Type': 'text/plain' } );
writeNumbers(res, function(){
// Notice this function which is passed as a callBack argument for writeNumbers to evaluate.
// This executes when the main writeNumbers portion finishes.
console.log('opening ' + app);
fs.readFile(app, 'utf8', function(err, data) {
if (err)
res.write('Could not find or open file for reading\n');
else
res.write(data);
res.end();
});
});
}).listen(3000);
Notice that your writeNumbers function now takes a callBack argument to execute when it is done, and that when you call it in your server's object, you pass a function as that callBack argument. This is one of the core patterns used very frequently in node.js/javascript applications.
This means that you aren't waiting for a timeOut to execute to end your request response, but rather it is ended as it processes your response, and moves onto the next connection immediately. This is likely to happen wayyyy quicker than 2 seconds (the amount of your timeOut). So you should see your connections being processed much quicker.
Because (as someone pointed out in your comments) your system is only able to handle a few open TCP connections at a time, you want to move through your connections as quickly as possible. Leveraging a callBack chain can help you do that when you want to do things in a certain order, or if you need to wait for certain processes to finish before executing others, without guessing with a timeOut.
Hope this helps!

Related

Long running Node.js service to continuously make http requests

I'm beginning work on my first Node.js application and running into memory leak issues that i cannot pin down. I want the app to act as service in a way that continually runs and polls and endpoint. I think I may be missing something here. The idea of the project is to have a node application continuously make http requests to an Arduino board i have connected to my network with a web server on it. The networked board responds to the requests with some JSON or XML representing the state of the sensors attached to it. The idea of the node app is to log then emit sensor changes that will eventually be consumed by another electron project.
The node app is currently broken into a couple modules:
proxy: make the http calls to the different endpoints on arduino web server:
var http = require('http'),
KeepAliveAgent = require('keep-alive-agent');
var controllerRequestOpts = {};
function send(path, cb){
var response = '';
//build the request
var request = controllerRequestOpts;
request.path = path;
//make the call to the controller
http.get(request, function(res){
res.on('data', function(chunk){
response += chunk;
});
res.on('end', function(){
cb(null, response);
});
})
.on('error',function(e){
cb(e, null);
});
}
module.exports = function(controllerOptions){
controllerOptions.port = controllerOptions.port || 2222;
controllerRequestOpts = controllerOptions;
controllerRequestOpts.agent = new KeepAliveAgent();
return{
//JSON
queryJson: function(cb){
send('/json', cb);
},
//XML
queryXml: function(cb){
send('/xml', cb);
}
//Additional endpoints
}
}
runner: Loop forever with the interval provided making the proxy calls ot the arduino
var proxy = require('proxy');
var Watcher = require('./watcher');
var timer;
var toUpdate;
function initProxy(controllerParams){
proxy = proxy(controllerParams);
Watcher = new Watcher();
}
function startListening(startOptions){
var query;
//determine the query and callback functions based off configuration
query = startOptions.queryType === 'json'
? proxy.queryJson
: proxy.queryXml;
toUpdate = startOptions.queryType === 'json'
? Watcher.updateLatestJson
: Watcher.updateLatestXml;
//Start running and making requests every 15 seconds
timer = setInterval(function(){
query(handleResponse);
},startOptions.queryInterval);
}
function handleResponse(err, resp){
if(err){
console.log('ERROR: ' + err);
}
else{
toUpdate.call(Watcher, resp);
}
}
function stopListening(){
clearInterval(timer);
process.exit();
}
var runner = {
connect: function(controllerParams){
initProxy(controllerParams);
},
start: function(startOptions){
startListening(startOptions);
return Watcher;
},
stop: function(){
stopListening();
}
};
module.exports = runner;
I have a "Watcher" module which is just a constructor function that emits the changes back to the calling app which looks like:
var runner = require('./index');
var controllerSettings = {
hostname: '192.168.1.150',
port:2222
}
var startOptions = {
queryType: 'json',
queryInterval: 15000
}
runner.connect(controllerSettings);
var watcher = runner.start(startOptions);
watcher.on('P1Changed', printParams)
Everything is working as expected but as the app runs over time the memory usage for node process constantly increases. I'm wondering if i'm using either the http module incorrectly or if the runner shouldn't be doing a setInterval perhaps. Is there a standard way to run this kind of app as a 'service' and not so much as a 'server'
Continuously send multiple HTTP requests will cause node to create huge TLSWrap objects that the GC will not be able to clear for several minutes.
If you wish to send data to the same host(s) over and over again, you need to open a TCP connection (stream) rather than use HTTP requests that have huge overhead.

nodejs/express - stream stdout instantly to the client

I spawned the following child: var spw = spawn('ping', ['-n','10', '127.0.0.1']) and I would like to receive the ping results on the client side (browser) one by one, not as a whole.
So far I tried this:
app.get('/path', function(req, res) {
...
spw.stdout.on('data', function (data) {
var str = data.toString();
res.write(str + "\n");
});
...
}
and that:
...
spw.stdout.pipe(res);
...
In both cases browser waits 10 of the pings to complete, and then prints the result as a whole. I would like to have them one by one, how to accomplish that?
(Client is just making a call to .../path and console.logs the result)
EDIT: Although I do believe that websockets are necessary to implement this, I just want to know whether there are any other ways. I saw several confusing SO answers, and blog posts (in this post, at step one OP streams the logs to the browser) which didn't help, therefore I decided to go for a bounty for some attention.
Here's a complete example using SSE (Server sent events). This works in Firefox and probably Chrome too:
var cp = require("child_process"),
express = require("express"),
app = express();
app.configure(function(){
app.use(express.static(__dirname));
});
app.get('/msg', function(req, res){
res.writeHead(200, { "Content-Type": "text/event-stream",
"Cache-control": "no-cache" });
var spw = cp.spawn('ping', ['-c', '100', '127.0.0.1']),
str = "";
spw.stdout.on('data', function (data) {
str += data.toString();
// just so we can see the server is doing something
console.log("data");
// Flush out line by line.
var lines = str.split("\n");
for(var i in lines) {
if(i == lines.length - 1) {
str = lines[i];
} else{
// Note: The double-newline is *required*
res.write('data: ' + lines[i] + "\n\n");
}
}
});
spw.on('close', function (code) {
res.end(str);
});
spw.stderr.on('data', function (data) {
res.end('stderr: ' + data);
});
});
app.listen(4000);
And the client HTML:
<!DOCTYPE Html>
<html>
<body>
<ul id="eventlist"> </ul>
<script>
var eventList = document.getElementById("eventlist");
var evtSource = new EventSource("http://localhost:4000/msg");
var newElement = document.createElement("li");
newElement.innerHTML = "Messages:";
eventList.appendChild(newElement);
evtSource.onmessage = function(e) {
console.log("received event");
console.log(e);
var newElement = document.createElement("li");
newElement.innerHTML = "message: " + e.data;
eventList.appendChild(newElement);
};
evtSource.onerror = function(e) {
console.log("EventSource failed.");
};
console.log(evtSource);
</script>
</body>
</html>
Run node index.js and point your browser at http://localhost:4000/client.html.
Note that I had to use the "-c" option rather than "-n" since I'm running OS X.
If you are using Google Chrome, changing the content-type to "text/event-stream" does what your looking for.
res.writeHead(200, { "Content-Type": "text/event-stream" });
See my gist for complete example: https://gist.github.com/sfarthin/9139500
This cannot be achieved with the standard HTTP request/response cycle. Basically what you are trying to do is make a "push" or "realtime" server. This can only be achieved with xhr-polling or websockets.
Code Example 1:
app.get('/path', function(req, res) {
...
spw.stdout.on('data', function (data) {
var str = data.toString();
res.write(str + "\n");
});
...
}
This code never sends an end signal and therefore will never respond. If you were to add a call to res.end() within that event handler, you will only get the first ping – which is the expected behavior because you are ending the response stream after the first chunk of data from stdout.
Code Sample 2:
spw.stdout.pipe(res);
Here stdout is flushing the packets to the browser, but the browser will not render the data chunks until all packets are received. Thus the reason why it waits 10 seconds and then renders the entirety of stdout. The major benefit to this method is not buffering the response in memory before sending — keeping your memory footprint lightweight.

Incr appears to run twice--why?

I'm having a hard time getting node, redis, and async to do what I want. I'm trying very basic things to grasp the patterns of redirecting control flow. Here, I hold a counter variable "success" that increases by one if a comparison key0 > key1 is true. They are static for now so it's always true; the only thing I wish to change is to increment success. I refresh the browser to re-run the comparison and increment success again.
My trouble is: when the page is refreshed, success jumps by 2. I tried putting a callback with incr, but it looks like only get-type commands have callbacks. I have a client.end(); in my script, but it prevented me from reloading the page so I commented it out. I suspect this is the source of my problem. If so, where does client.end belong?
var http = require("http");
var redis = require("redis");
var async = require("async");
client = redis.createClient();
http.createServer(function(request, response) {
// key "success" set to 0 externally, through redis-cli;
client.set("key0", "19");
client.set("key1", "11");
response.writeHead(200, {"Content-Type": "text/plain"});
async.series([
shortcut("key0"),
shortcut("key1"),
shortcut("success")
],
function(err, results){
if (results[0] > results[1]) {
client.incr("success", function(err, reply) {
response.write("incr done");
});
response.write(results[0] + "\n\n");
response.write(results[1] + "\n\n");
response.write(results[2]);
}
response.end();
// client.quit();
});
}).listen(8000);
function shortcut(key) {
return function(callback) {
client.get(key, function(err, reply) {
callback(null, reply);
}
);
}
}
Your browser most likely requests favicon.ico and thus generates the extra request which runs your code a second time.

How to make web service calls in Expressjs?

app.get('/', function(req, res){
var options = {
host: 'www.google.com'
};
http.get(options, function(http_res) {
http_res.on('data', function (chunk) {
res.send('BODY: ' + chunk);
});
res.end("");
});
});
I am trying to download google.com homepage, and reprint it, but I get an "Can't use mutable header APIs after sent." error
Anyone know why? or how to make http call?
Check out the example here on the node.js doc.
The method http.get is a convenience method, it handles a lot of basic stuff for a GET request, which usually has no body to it. Below is a sample of how to make a simple HTTP GET request.
var http = require("http");
var options = {
host: 'www.google.com'
};
http.get(options, function (http_res) {
// initialize the container for our data
var data = "";
// this event fires many times, each time collecting another piece of the response
http_res.on("data", function (chunk) {
// append this chunk to our growing `data` var
data += chunk;
});
// this event fires *one* time, after all the `data` events/chunks have been gathered
http_res.on("end", function () {
// you can use res.send instead of console.log to output via express
console.log(data);
});
});

Node.js http request memory and cpu hog

I have a node.js script that continuously requests a page, sort of like a cron job.
However, after a few minutes Node starts to use a lot of CPU (up to 70%) and memory (up to 200mb).
What is wrong with my script?
function cron(path)
{
var http = require('http');
var site = http.createClient(443, 'www.website.com', true);
var request = site.request('GET', path, {'host': 'www.website.com'});
request.end();
request.on('response', function (response) {
setTimeout(function(){cron(path)},15000);
});
}
cron('/path/to/page');
request.on('response', function (response) {
setTimeout(function(){cron(path)},15000);
});
For every response you create a new cron job. Log your responses. If your getting more then 1 from your request then your exponantially creating more cron jobs.
Your creating a function() {} with a reference to path. So the entire scope state is kept. you want to free memory by adding this:
var site = null;
var request = null;
Your calling require("http") inside a function rather then outside in module scope. You only need to get http once so place at the top of your file in module scope.
var http = require('http');
var site = http.createClient(443, 'www.website.com', true);
function cron(path)
{
var request = site.request('GET', path, {'host': 'www.website.com'});
request.end();
var once = true;
request.on('response', doIt);
function doIt(response) {
if (!once) {
once = null;
doIt = function() {};
setTimeout(function(){cron(path)},15000);
}
});
site = null;
request = null;
}
cron('/path/to/page');
In addition to the tips from #Raynos, here's another. I find that recursive calls like this in long running processes make me a bit nervous so I'd err on the side of using setInterval instead. I'd maybe split the cron and the http behaviour apart in case you want to try and re-use that logic, although that'll depend on your context:
e.g. in node 0.4.7:
var https = require('https');
function poll(path)
{
https.get({
host: 'www.website.com',
port: 443,
path: path
}, function(res) {
console.log("Got response: " + res.statusCode);
}).on('error', function(e) {
console.log("Got error: " + e.message);
});
}
function cron(path)
{
return setInterval(function(){
poll(path);
},15000);
}
var intervalId = cron('/path/to/page'); // keep in case you need to use clearInterval

Resources