Probably this is already answered question but somehow the solutions that I found would not work. It is better to describe.
I have an embedded linux device which has a C based application running and nodejs is running as a web server. I use TCP socket for inter-process communication.
Main problem is that C application is designed to run in a syncronous way but not nodejs.
Web page sends requests to nodejs and nodejs asks to C application and wait for application result to send response to web page.
Usually this is not a problem since everything runs very fast. But in the embed side, now there are slow processes thus it makes nodejs wait.
var counterVal = 0;
app.post('/ajax.html', jsonParser, async function (req, res) {
var q = url.parse(req.url, true);
var formData = req.body;
var cMessage = { counter : counterVal };
var message = { ...q.query, ...formData, ...cMessage };
await remoteSocket.write(JSON.stringify(message), function ()
{
remoteSocket.on('data', function (e) {
try
{
remoteSocket._events.data = function (data) { };
var ret = JSON.parse(e);
if (message.counter == ret.counter)
{
res.send(e);
}
else
{
logError("received error : "+ message.counter + ": " + ret.counter);
}
}
catch (err) {
logError(err);
}
});
});
});
I use this code to handle web requests. Counter value is used as handshake. Some of the requests gets into logError because of mismatch.
But when i look into wireshark records packet by packet, there is no mismatch. ever request has correct response.
app.post is async so data listener is registered twice. Then response for a packet triggers two listeners but one listener is for next packet. Therefore is gives a mismatch.
What is a way to make app.post to run in sync mode?
Related
I have a REST service in Node.js with one specific request running a bunch of DB commands and other file processing that could take 10-15 seconds to run. Since I didn't want to hold up my browser request thread, I wrote a separate .js script to do the needful, called the script using child_process.spawn() in my Node.js code and immediately returned OK back to the client. This works fine, but then so does calling the same script (as a local function) by just using a simple setTimeout.
router.post("/longRequest", function(req, res) {
console.log("Started long request with id: " + req.body.id);
var longRunningFunction = function() {
// Usually runs a bunch of things that take time.
// Simulating a 10 sec delay for sample code.
setTimeout(function() {
console.log("Done processing for 10 seconds")
}, 10000);
}
// Below line used to be
// child_process.spawn('longRunningFunction.js'
setTimeout(longRunningFunction, 0);
res.json({status: "OK"})
})
So, this works for my purpose. But what's the downside ? I probably can't monitor the offline process easily as child_process.spawn which would give me a process id. But, does this cause problems in the long run ? Will it hold up Node.js processing if the 10 second processing increases to a lot more in the future ?
The actual longRunningFunction is something that reads an Excel file, parses it and does a bulk load using tedious to a MS SQL Server.
var XLSX = require('xlsx');
var FileAPI = require('file-api'), File = FileAPI.File, FileList = FileAPI.FileList, FileReader = FileAPI.FileReader;
var Connection = require('tedious').Connection;
var Request = require('tedious').Request;
var TYPES = require('tedious').TYPES;
var importFile = function() {
var file = new File(fileName);
if (file) {
var reader = new FileReader();
reader.onload = function (evt) {
var data = evt.target.result;
var workbook = XLSX.read(data, {type: 'binary'});
var ws = workbook.Sheets[workbook.SheetNames[0]];
var headerNames = XLSX.utils.sheet_to_json( ws, { header: 1 })[0];
var data = XLSX.utils.sheet_to_json(ws);
var bulkLoad = connection.newBulkLoad(tableName, function (error, rowCount) {
if (error) {
console.log("bulk upload error: " + error);
} else {
console.log('inserted %d rows', rowCount);
}
connection.close();
});
// setup your columns - always indicate whether the column is nullable
Object.keys(columnsAndDataTypes).forEach(function(columnName) {
bulkLoad.addColumn(columnName, columnsAndDataTypes[columnName].dataType, { length: columnsAndDataTypes[columnName].len, nullable: true });
})
data.forEach(function(row) {
var addRow = {}
Object.keys(columnsAndDataTypes).forEach(function(columnName) {
addRow[columnName] = row[columnName];
})
bulkLoad.addRow(addRow);
})
// execute
connection.execBulkLoad(bulkLoad);
};
reader.readAsBinaryString(file);
} else {
console.log("No file!!");
}
};
So, this works for my purpose. But what's the downside ?
If you actually have a long running task capable of blocking the event loop, then putting it on a setTimeout() is not stopping it from blocking the event loop at all. That's the downside. It's just moving the event loop blocking from right now until the next tick of the event loop. The event loop will be blocked the same amount of time either way.
If you just did res.json({status: "OK"}) before running your code, you'd get the exact same result.
If your long running code (which you describe as file and database operations) is actually blocking the event loop and it is properly written using async I/O operations, then the only way to stop blocking the event loop is to move that CPU-consuming work out of the node.js thread.
That is typically done by clustering, moving the work to worker processes or moving the work to some other server. You have to have this work done by another process or another server in order to get it out of the way of the event loop. A setTimeout() by itself won't accomplish that.
child_process.spawn() will accomplish that. So, if you have an actual event loop blocking problem to solve and the I/O is already as async optimized as possible, then moving it to a worker process is a typical node.js solution. You can communicate with that child process in a number of ways, but one possibility would be via stdin and stdout.
I work with node-red and develop a custom node at the moment that uses websockets to connect to a device and request data from it.
function query(node, msg, callback) {
var uri = 'ws://' + node.config.host + ':' + node.config.port;
var protocol = 'Lux_WS';
node.ws = new WebSocket(uri, protocol);
var login = "LOGIN;" + node.config.password;
node.ws.on('open', function open() {
node.status({fill:"green",shape:"dot",text:"connected"});
node.ws.send(login);
node.ws.send("REFRESH");
});
node.ws.on('message', function (data, flags) {
processResponse(data, node);
});
node.ws.on('close', function(code, reason) {
node.status({fill:"grey",shape:"dot",text:"disconnected"});
});
node.ws.on('error', function(error) {
node.status({fill:"red",shape:"dot",text:"Error " + error});
});
}
In the processResponse function I need process the first response. It gives me an XML with several ids that I need to request further data.
I plan to set up a structure that holds all the data from the first request, and populate it further with the data that results from the id requests.
And that's where my problem starts, whenever I send a query from within the processResponse function, I trigger an event that results in the same function getting called again, but then my structure is empty.
I know that this is due to the async nature of nodejs and the event system, but I simply don't see how to circumvent this behavior or do my code in the right way.
If anybody can recommend examples on how to deal with situations like this or even better could give an example, that would be great!
I'm trying to implement and existing solution in node.js, specifically, using express.js framework. Now, the existing solution works as follows:
server exposes a GET service that clients can connect to
when a client calls the GET service, the client number increments (a global variable) and then the number of clients is checked;
if there are not at least 3 clients connected, the service is in endless loop, waiting for other clients to connect
if (or rather, when) the rest of the two clients connect, the service sends respond to everyone that enough clients are connected (a 'true' value).
So what basically happens is, the client connects and the connection is active (in a loop) until enough clients connect, then and only then there is a response (to all clients at the same time).
Now I'm not expert in these architectures, but from what I think, this is not a correct or good solution. My initial thought was: this must be solved with sockets. However, since the existing solution works like that (it's not written in node.js), I tried to emulate such behaviour:
var number = (function(){
var count = 0;
return {
increase: function() {
count++;
},
get: function(){
return count;
}
};
})();
app.get('/test', function(req, res){
number.increase();
while (number.get() < 3) {
//hold it here, until enough clients connect
}
res.json(number.get());
});
Now while I think that this is not a correct solution, I have a couple of questions:
Is there any alternative to solving this issue, besides using sockets?
Why does this "logic" work in C#, but not in express.js? The code above hangs, no other request is processed.
I know node.js is single-threaded, but what if we have a more conventional service that responds immediately, and there are 20 requests all at the same time?
I would probably use an event emitter for this:
var EventEmitter = require('events').EventEmitter;
var emitter = new EventEmitter();
app.get('/', function(req, res) {
// Increase the number
number.increase();
// Get the current value
var current = number.get();
// If it's less than 3, wait for the event emitter to trigger.
if (current < 3) {
return emitter.once('got3', function() {
return res.json(number.get());
});
}
// If it's exactly 3, emit the event so we wake up other listeners.
if (current === 3) {
emitter.emit('got3');
}
// Fall through.
return res.json(current);
});
I would like to stress that #Plato is correct in stating that browsers may timeout when a response takes too much time to complete.
EDIT: as an aside, some explanation on the return emitter.once(...).
The code above can be rewritten like so:
if (current < 3) {
emitter.once('got3', function() {
res.json(number.get());
});
} else if (current === 3) {
emitter.emit('got3');
res.json(number.get());
} else {
res.json(number.get());
}
But instead of using those if/else statements, I return from the request handler after creating the event listener. Since request handlers are asynchronous, their return value is discarded, so you can return anything (or nothing). As an alternative, I could also have used this:
if (current < 3) {
emitter.once(...);
return;
}
if (current === 3) {
...etc...
Also, even though you return from the request handler function, the event listener is still referencing the res variable, so the request handler scope is maintained by Node until res.json() in the event listener callback is called.
Your http approach should work
You are blocking the event loop so node refuses to do any other work while it is in the while loop
You're really close, you just need to check every now and then instead of constantly. I do this below with process.nextTick() but setTimeout() would also work:
var number = (function(){
var count = 0;
return {
increase: function() {
count++;
},
get: function(){
return count;
}
};
})();
function waitFor3(callback){
var n = number.get();
if(n < 3){
setImmediate(function(){
waitFor3(callback)
})
} else {
callback(n)
}
}
function bump(){
number.increase();
console.log('waiting');
waitFor3(function(){
console.log('done');
})
}
setInterval(bump, 2000);
/*
app.get('/test', function(req, res){
number.increase();
waitFor3(function(){
res.json(number.get());
})
});
*/
I'm trying to write a simple asynchronous TCP/IP client that runs alongside a Meteor server for communicating to a remote server and posting data to MongoDB. I got it working using net.on callbacks, but the code was messy and it was failing at random times. I decided to try writing it using fibers/Futures to clean it up so I could focus on the failures. The code currently looks like:
var Future = Npm.require('fibers/future'), wait = Future.wait;
var coreComm = function(coreClient) {
console.log('coreCommm started')
try {
var running = true
while (running) {
console.log('calling onF.wait()')
var ev = onF.wait();
console.log('ev received', ev)
switch(ev[0]) {
default:
console.log('unknown event from coreClient: ', ev)
break;
case 'readable':
console.log('read', ev)
break;
}
}
} catch(err) {
console.log('comm error: ', err)
}
}.future()
function tryConnect(options) {
var connect = new Future
onF = new Future
coreClient = net.connect(options, function() {
console.log('connected,')
connect.return()
})
connect.wait()
coreClient.on('readable',
function() { console.log('readable event,'); onF.return(['readable'])})
console.log('coreClient connected to core');
coreComm(coreClient)
}
Meteor.startup(function () {
tryConnect({port: 9987});
}
The output when a message is sent looks like:
=> Meteor server running on: http://localhost:3000/
I2038-10:42:18.160(-5)? starting
I2038-10:42:18.392(-5)? connected,
I2038-10:42:18.398(-5)? coreClient connected to core
I2038-10:42:18.402(-5)? coreCommm started
I2038-10:42:18.409(-5)? calling onF.wait()
I2038-10:42:18.413(-5)? readable event,
As far as I can tell, the message is received from the remote server, the readable event is sent, I call onF.return(...) and nothing happens except Meteor goes to 100% CPU.
Any suggestions as to why the onF.wait() call isn't returning like it's suppose to?
In my node.js server i cant figure out, why it runs out of memory. My node.js server makes a remote http request for each http request it receives, therefore i've tried to replicate the problem with the below sample script, that also runs out of memory.
This only happens if the iterations in the for loop are very high.
From my point of view, the problem is related to the fact that node.js is queueing the remote http requests. How to avoid this?
This is the sample script:
(function() {
var http, i, mypost, post_data;
http = require('http');
post_data = 'signature=XXX%7CPSFA%7Cxxxxx_value%7CMyclass%7CMysubclass%7CMxxxxx&schedule=schedule_name_6569&company=XXXX';
mypost = function(post_data, cb) {
var post_options, req;
post_options = {
host: 'myhost.com',
port: 8000,
path: '/set_xxxx',
method: 'POST',
headers: {
'Content-Length': post_data.length
}
};
req = http.request(post_options, function(res) {
var res_data;
res.setEncoding('utf-8');
res_data = '';
res.on('data', function(chunk) {
return res_data += chunk;
});
return res.on('end', function() {
return cb();
});
});
req.on('error', function(e) {
return console.debug('TM problem with request: ' + e.message);
});
req.write(post_data);
return req.end;
};
for (i = 1; i <= 1000000; i++) {
mypost(post_data, function() {});
}
}).call(this);
$ node -v
v0.4.9
$ node sample.js
FATAL ERROR: CALL_AND_RETRY_2 Allocation failed - process out of memory
Tks in advance
gulden PT
Constraining the flow of requests into the server
It's possible to prevent overload of the built-in Server and its HTTP/HTTPS variants by setting the maxConnections property on the instance. Setting this property will cause node to stop accept()ing connections and force the operating system to drop requests when the listen() backlog is full and the application is already handling maxConnections requests.
Throttling outgoing requests
Sometimes, it's necessary to throttle outgoing requests, as in the example script from the question.
Using node directly or using a generic pool
As the question demonstrates, unchecked use of the node network subsystem directly can result in out of memory errors. Something like node-pool makes the active pool management attractive, but it doesn't solve the fundamental problem of unconstrained queuing. The reason for this is that node-pool doesn't provide any feedback about the state of the client pool.
UPDATE: As of v1.0.7 node-pool includes a patch inspired by this post to add a boolean return value to acquire(). The code in the following section is no longer necessary and the example with the streams pattern is working code with node-pool.
Cracking open the abstraction
As demonstrated by Andrey Sidorov, a solution can be reached by tracking the queue size explicitly and mingling the queuing code with the requesting code:
var useExplicitThrottling = function () {
var active = 0
var remaining = 10
var queueRequests = function () {
while(active < 2 && --remaining >= 0) {
active++;
pool.acquire(function (err, client) {
if (err) {
console.log("Error acquiring from pool")
if (--active < 2) queueRequests()
return
}
console.log("Handling request with client " + client)
setTimeout(function () {
pool.release(client)
if(--active < 2) {
queueRequests()
}
}, 1000)
})
}
}
queueRequests(10)
console.log("Finished!")
}
Borrowing the streams pattern
The streams pattern is a solution which is idiomatic in node. Streams have a write operation which returns false when the stream cannot buffer more data. The same pattern can be applied to a pool object with acquire() returning false when the maximum number of clients have been acquired. A drain event is emitted when the number of active clients drops below the maximum. The pool abstraction is closed again and it's possible to omit explicit references to the pool size.
var useStreams = function () {
var queueRequests = function (remaining) {
var full = false
pool.once('drain', function() {
if (remaining) queueRequests(remaining)
})
while(!full && --remaining >= 0) {
console.log("Sending request...")
full = !pool.acquire(function (err, client) {
if (err) {
console.log("Error acquiring from pool")
return
}
console.log("Handling request with client " + client)
setTimeout(pool.release, 1000, client)
})
}
}
queueRequests(10)
console.log("Finished!")
}
Fibers
An alternative solution can be obtained by providing a blocking abstraction on top of the queue. The fibers module exposes coroutines that are implemented in C++. By using fibers, it's possible to block an execution context without blocking the node event loop. While I find this approach to be quite elegant, it is often overlooked in the node community because of a curious aversion to all things synchronous-looking. Notice that, excluding the callcc utility, the actual loop logic is wonderfully concise.
/* This is the call-with-current-continuation found in Scheme and other
* Lisps. It captures the current call context and passes a callback to
* resume it as an argument to the function. Here, I've modified it to fit
* JavaScript and node.js paradigms by making it a method on Function
* objects and using function (err, result) style callbacks.
*/
Function.prototype.callcc = function(context /* args... */) {
var that = this,
caller = Fiber.current,
fiber = Fiber(function () {
that.apply(context, Array.prototype.slice.call(arguments, 1).concat(
function (err, result) {
if (err)
caller.throwInto(err)
else
caller.run(result)
}
))
})
process.nextTick(fiber.run.bind(fiber))
return Fiber.yield()
}
var useFibers = function () {
var remaining = 10
while(--remaining >= 0) {
console.log("Sending request...")
try {
client = pool.acquire.callcc(this)
console.log("Handling request with client " + client);
setTimeout(pool.release, 1000, client)
} catch (x) {
console.log("Error acquiring from pool")
}
}
console.log("Finished!")
}
Conclusion
There are a number of correct ways to approach the problem. However, for library authors or applications that require a single pool to be shared in many contexts it is best to properly encapsulate the pool. Doing so helps prevent errors and produces cleaner, more modular code. Preventing unconstrained queuing then becomes an evented dance or a coroutine pattern. I hope this answer dispels a lot of FUD and confusion around blocking-style code and asynchronous behavior and encourages you to write code which makes you happy.
yes, you trying to queue 1000000 requests before even starting them. This version keeps limited number of request (100):
function do_1000000_req( cb )
{
num_active = 0;
num_finished = 0;
num_sheduled = 0;
function shedule()
{
while (num_active < 100 && num_sheduled < 1000000) {
num_active++;
num_sheduled++;
mypost(function() {
num_active--;
num_finished++;
if (num_finished == 1000000)
{
cb();
return;
} else if (num_sheduled < 1000000)
shedule();
});
}
}
}
do_1000000_req( function() {
console.log('done!');
});
the node-pool module can help you. For more détails, see this post (in french), http://blog.touv.fr/2011/08/http-request-loop-in-nodejs.html