How to separate heartbeat to its own thread for RabbitMQ connection - node.js

I have a process that uses RabbitMQ and NodeJS to do image processing. Due to the intensive task, I think I have the same issue as the link here https://github.com/squaremo/amqp.node/issues/261
I am trying to figure out how to implement the last comment on that issue.
"Yep. NodeJS is single-threaded, and if you use that thread to do something for a long time, nothing else will happen. As #michaelklishin suggests, the known solution for this general problem is using a child process, or the cluster module."
EDIT:
I Updated the code below with a sample of how I think I can do this with the amqp-connection-manager module. Right now I use a global variable to hold the actual message to be able to ack. I am guessing there is a better way to do this.
//Used to be an example for how to keep the connection thread and the working thread separate
//in order to fix the issue of missing heartbeat intervals due to processing on the same thread
const cluster = require('cluster');
var amqp = require('amqp-connection-manager');
var config = require('./config.json');
var working_queue = "Test_Queue";
//DONT REALLY DO THIS
var rabbit_msg_data;
//******* CLUSTER SETUP *******
// This will spawn off the number of worker for this process.
if (cluster.isMaster) {
console.log("Master "+process.pid+" is running");
worker = cluster.fork();
cluster.on('exit', (worker, code, signal) => {
if(signal)
{
console.log("worker was killed by signal: "+signal);
console.log(worker);
}
else if (code !== 0)
{
console.log("worker exited with error code: "+code);
console.log(worker);
}
else
{
console.log("Worker "+worker.process.pid+" exited successfully");
console.log(worker);
//Not sure if this works this way or if I need to put this worker into variables
}
});
//testing sending a message back and forth
// setTimeout(function() {
// worker.send("I got a request!");
// }, 1000);
//******** RABBIT MQ CONNECTION **********
// Create a connection manager to rabbitmq
var connection = amqp.connect(config.rabbit_connections_arr, {json: true, heartbeatIntervalInSeconds: 2});
connection.on('connect', function() {
console.log('Connected to rabbitmq');
});
connection.on('disconnect', function(params) {
console.log('Disconnected from rabbitmq:', params.err.stack);
});
// Set up a channel listening for messages in the queue.
var channelWrapper_listening = connection.createChannel({
setup: function(channel) {
// `channel` here is a regular amqplib `ConfirmChannel`.
return Promise.all([
channel.assertQueue(working_queue, {durable: true}),
channel.prefetch(1),
channel.consume(working_queue, function(data){
rabbit_msg_data = data;
worker.send(data.content.toString());
}, requeue = false)
]);
}
});
worker.on('message', function(msg){
// console.log("Worker to Master (ack): ", msg.content.toString());
console.log("Worker to Master (ack): ", msg);
//console.log(msg.content.toString());
channelWrapper_listening.ack(rabbit_msg_data);
});
}
else //All worker processes (MAIN LOGIC)
{
console.log("Worker "+process.pid+" started");
process.on('message',function(msg){
console.log("Master to Worker (working): ", msg);
//send msg back when done working on it.
setTimeout(function() {
process.send(msg);
}, 5000);
});
}

Related

RabbitMq program is hanging while handling 100k messages. is this the right way to do it?

The code is attached below. my system hangs. is this the right way to create those many channels?
i wanted to compare the cpu usage of rabbitMQ and redis pub/sub for 100k messages and i am running the rabbitMQ server on windows.
var amqp = require("amqplib/callback_api");
var time = 0;
var limit = 100000;
amqp.connect("amqp://localhost",function(err,conn){
if(err){
console.log('Connection closed-----------error connect');
return;
}
var timer = setInterval(() => {
time+=1;
if(time>=limit){
clearInterval(timer);
}
conn.createChannel(function(err,ch){
if(err){
console.log('Connection closed-----------error createChannel');
return;
}
var q = "queue_name"+time.toString();
// console.log(q);
var msg = "this is the message string!!!";
ch.assertQueue(q,{durable: false});
ch.sendToQueue(q,new Buffer(msg),{persistent: false});
// console.log("time = "+time);
});
}, 10);
});

Difference between inbound and outbound sockets in node

I am writing a function that will make sure a node instance is gracefully shut down.
To do that, I make sure I unref() all sockets. This is what I am doing:
function tidyUp(){
console.log("Closing the server...");
server.close();
console.log("Ordering a hotplate shutdown...");
process.emit( 'hotplateShutdown');
// This will give time to server.close() to actually work..
setTimeout( function() {
console.log("Calling unref() for all Sockets and for the Server:");
wtf.dump();
var handles = Array.prototype.slice.call( process._getActiveHandles() );
handles.forEach( ( h, i ) => {
var name = h.constructor.name;
if( h.unref && typeof h.unref == 'function' & ( name == 'Socket' || name == 'Server' ) ){
console.log("Unreffing:", i );
h.unref();
}
});
console.log("After unreffing:");
wtf.dump();
setTimeout( function(){
console.log("This process should soon close");
console.log("Here is the event queue keeping it alive:");
wtf.dump( true );
}, 1000);
}, 1000 );
};
I am concerned because the server also sends email, and I want to make absolute sure that any that is being sent is indeed sent.
Basically:
"Give a Socket object, how do you tell if it's an INBOUND socket (one receiving connections, as node's HTTP server would open) or an OUTBOUND socket (one open by nodemailer to send an email)."
I will want to unref() all inbound sockets, and leave the outbound ones in peace, till all email has been sent.
Hints?
I ended up keeping track of the inbound sockets opened:
var server = http.createServer(app);
// Keep track of all inbound sockets. They will be
// unref()ed at cleanup time
var inboundSockets = {};
var socketId = 0;
server.on('connection',function( socket ){
socket.__id = socketId ++;
inboundSockets[ socket.__id ] = socket;
socket.on('close',function(){
delete inboundSockets[ socket.__id ];
});
});
process.on('uncaughtException', function (err) {
console.error( "Error caught: ");
console.error(err);
tidyUp();
})
process.on('SIGTERM', function(){
console.log("TERMINATING THIS INSTANCE!");
tidyUp();
});
function tidyUp(){
console.log("Closing the server...");
server.close();
// The next line is important AS IS as it will tell naps that the
// server is no longer functional
console.log("THE SERVER HAS STOPPED");
console.log("Ordering a hotplate shutdown...");
process.emit( 'hotplateShutdown');
// This will give time to server.close() to actually work..
setTimeout( function() {
console.log("Calling unref() for inbound sockets:");
Object.keys( inboundSockets ).forEach( ( __id ) => {
var s = inboundSockets[ __id ];
console.log("Unreffing inbound socket:", s.__id );
s.unref();
});
db.unref();
console.log("After unreffing:");
wtf.dump();
setTimeout( function(){
console.log("This process should soon close");
console.log("Here is the event queue keeping it alive:");
wtf.dump( true );
}, 5000);
}, 1000 );
};
Note that I assign an ID to the socket at creation time (there doesn't seem to be a clear universal way to get a unique ID) and then unref them when the server goes down...

Is there a better way to terminate the childProcess then just doing a childProcess.kill()

I have a node.js server, which will print out some message in the console and then start the server.
I am creating a automation test by using tap to check the message in the console.log and check if server is started, i.e. there is a PID generated.
I tried 2 different methods -child_process.exec and child_process.spawn
1. Use child_process.exec with a call back function.
This does not work as the server is long running and will not even
go to the call back, so I cannot even check for any stdout.
Then I use child_process.exec without call back, this solves the
first issue where I can now get the message back from stdout.
The second issue is that the test will hang since the server is long running and will not terminate by itself.
code snippet:
var exec = require('child_process').exec;
tap.test('test server start', function(t) {
childProcess= exec('node',['server']);
console.log('[exec] childProcess.pid: ', childProcess.pid);
t.notEqual(childProcess.pid, undefined);
childProcess.stdout.on('data', function (data) {
console.log('[exec] stdout: ', data.toString());
t.match(data.toString(), "Example app listening at http://:::3000");
t.end();
childProcess.kill('SIGTERM');
});
childProcess.stderr.on('data', function (data) {
console.log('[exec] stderr: ', data.toString());
});
childProcess.on('close', function (code) {
if (code!=null)
console.log('child process exited with code '+ code);
});
});
use child_process.spawn -code snippet
var spawn = require('child_process').spawn;
tap.test('test server start', function(t) {
childProcess= spawn('node',['server']);
console.log('[spawn] childProcess.pid: ', childProcess.pid);
t.notEqual(childProcess.pid, undefined);
childProcess.stdout.on('data', function (data) {
console.log('[spawn] stdout: ', data.toString());
t.match(data.toString(), "Example app listening at http://:::3000");
t.end();
childProcess.kill('SIGTERM');
});
childProcess.stderr.on('data', function (data) {
console.log('[spawn] stderr: ', data.toString());
});
childProcess.on('close', function (code) {
if (code!=null)
console.log('child process exited with code '+ code);
});
});
In both 1 & 2, the test will hang since the server is long running,
I need to use child_process.kill() to terminate the test
Is there a better method to achieve this?
Thanks in advance for any improvements.
Well, I think that you can check if the server is alive in a different way (without spawning a new process).
For example, you can start your server waiting for connections:
const net = require('net');
var connections = {};
var server = net.createServer(function(conn) { });
server.listen(3333);
server.on('connection',function(conn) {
var key = conn.remoteAddress + ':' + conn.remotePort;
connections[key] = conn;
conn.on('close',function() {
delete connections[key];
});
});
Then, connect some clients (or just one) to the server:
var connected = 0;
for (var i = 0;i < 10;i++) {
var client = net.connect(3333);
client.on('connect',function() {
connected++;
console.log(connected);
});
}
So, if you are be able to connect to the server, then your server is alive.
And finally, when you want to close the server, just create a new function like this one:
var destroy = function ()
{
server.close(function() {
console.log('ok');
});
for (var key in connections) {
connections[key].destroy();
}
}
Call it for example after 10 successful connections to the server. Inside the for loop:
for (var i = 0; i < 10; i++) {
var client = net.connect(3333);
client.on('connect',function() {
connected++;
if (connected === 10) {
destroy();
}
});
}
This is a very basic example, but I think that it's enough to understand another way to do what you want to do.
I.

How to run Cron Job in Node.js application that uses cluster module?

I'm using node-cron module for scheduling tasks in Node.js application. I also want run the application in several processes using core cluster module.
Running application in several processes ends up in scheduled tasks execution in each process (e.g. if task was to send an email the email would be sent multiple times).
What are the best practices/possible ways of running cron job along with cluster module? Should I create some separate process which will handle only cron job and do not accept any requests. If yes, how can I do that in a right way?
If are using PM2,
You can use an environment variable provided by PM2 itself called NODE_APP_INSTANCE which requires PM2 2.5 or greater.
NODE_APP_INSTANCE environment variable can be used to determine difference between process, for example you may want to run a cronjob only on one process, you can just do this
if(process.env.NODE_APP_INSTANCE == 0) {
//schedule your cron job here since this part will be executed for only one cluster
}
,
Since two processes can never have the same number.
More Info on PM2 official doc here.
After some research I ended up with "Distributed locks using Redis" solution.
There is node module for that: node-redis-warlock.
Hope this answer will be useful for someone else.
UPDATE. Minimal sample code:
var Warlock = require('node-redis-warlock'),
redis = require('redis');
// Establish a redis client
redis = redis.createClient();
// and pass it to warlock
var warlock = new Warlock(redis);
function executeOnce (key, callback) {
warlock.lock(key, 20000, function(err, unlock){
if (err) {
// Something went wrong and we weren't able to set a lock
return;
}
if (typeof unlock === 'function') {
setTimeout(function() {
callback(unlock);
}, 1000);
}
});
}
// Executes call back only once
executeOnce('every-three-hours-lock', function(unlock) {
// Do here any stuff that should be done only once...
unlock();
});
UPDATE 2. More detailed example:
const CronJob = require('cron').CronJob;
const Warlock = require('node-redis-warlock');
const redis = require('redis').createClient();
const warlock = new Warlock(redis);
const async = require('async');
function executeOnce (key, callback) {
warlock.lock(key, 20000, function(err, unlock) {
if (err) {
// Something went wrong and we weren't able to set a lock
return;
}
if (typeof unlock === 'function') {
setTimeout(function() {
callback(unlock);
}, 1000);
}
});
}
function everyMinuteJobTasks (unlock) {
async.parallel([
sendEmailNotifications,
updateSomething,
// etc...
],
(err) => {
if (err) {
logger.error(err);
}
unlock();
});
}
let everyMinuteJob = new CronJob({
cronTime: '*/1 * * * *',
onTick: function () {
executeOnce('every-minute-lock', everyMinuteJobTasks);
},
start: true,
runOnInit: true
});
/* Actual tasks */
let sendEmailNotifications = function(done) {
// Do stuff here
// Call done() when finished or call done(err) if error occurred
}
let updateSomething = function(done) {
// Do stuff here
// Call done() when finished or call done(err) if error occurred
}
// etc...
I think you can use the node cluster module, and there you can write your code to run in the master cluster only
const cluster = require('cluster');
if (cluster.isMaster) {
// Write your code which you want to execute in the master cluster only
}
This is a node way to handle cluster, of course, you can use any tool like pm2 to handle this.
I actually do not like the redis approach that is also used in the cron-cluster npm plugin, because I do not want to have that redis server running on my maschine and maintain it, too.
I would like to discuss this approach with you:
Pro: we do not need to use redis
Con: cron jobs are always running on the same worker
I use the message passing only for this, if you use it for other things, you want to pass the information that
if (cluster.isMaster) {
// Count the machine's CPUs
var cpuCount = require('os').cpus().length;;
// Create a worker for each CPU
for (var i = 0; i < cpuCount; i += 1) {
cluster.fork();
}
cluster.on('fork', (worker) => {
console.log("cluster forking new worker", worker.id);
});
// have a mainWorker that does the cron jobs.
var mainWorkerId = null;
cluster.on('listening', (worker, address) => {
console.log("cluster listening new worker", worker.id);
if(null === mainWorkerId) {
console.log("Making worker " + worker.id + " to main worker");
mainWorkerId = worker.id;
worker.send({order: "startCron"});
}
});
// Listen for dying workers if the mainWorker dies, make a new mainWorker
cluster.on('exit', function (worker, code, signal) {
console.log('Worker %d died :(', worker.id);
if(worker.id === mainWorkerId) {
console.log("Main Worker is dead...");
mainWorkerId = null;
}
console.trace("I am here");
console.log(worker);
console.log(code);
console.log(signal);
cluster.fork();
});
// Code to run if we're in a worker process
} else {
// other code like setup app and stuff
var doCron = function() {
// setup cron jobs...
}
// Receive messages from the master process.
process.on('message', function(msg) {
console.log('Worker ' + process.pid + ' received message from master.', message);
if(message.order == "startCron") {
doCron();
}
});
}
I also have a problem with cluster module and finally i found sample way to solve problem.
Let master cluster execute cronJob.
My project use Kue to manage jobs. When cronJob run i get a list of jobs.
index.js
global.cluster = require('cluster');
if (cluster.isMaster) {
const cpuCount = require('os').cpus().length;
for (let i = 0; i < cpuCount; i += 1) {
cluster.fork();
}
} else {
// start your express server here
require('./server')
}
cluster.on('exit', worker => {
logger.warn('Worker %d died :(', worker.id);
cluster.fork();
});
cron.js
const cron = require('cron').CronJob;
const job = new cron('* * * * *', async () => {
if (cluster.isMaster) {
console.log('cron trigger');
}
});
job.start();
Hope this help.

Execute Grunt task using NodeJS cluster

I have a Grunt task and currently I am utilising AsyncJS to run it. AsyncJS worked well but still I feel like it can be more powerful if I can utilise NodeJS cluster to run it. I have checked out Grunt Parallel and Grunt Concurrent and it is not much different to what I doing in my Grunt task. Any suggestions on utilising NodeJS cluster module to speed up Task execution.
Currently I am doing like this
var queue = async.queue(task, function(task, cb){
// Process task with PhantomJS and then
cb();
}, require('os').cpus().length);
async.each(htmlPages, function(val, cb) {
queue.push(val, function() {
cb();
});
}, function() {
console.log('Completed');
done();
});
How can I make this work with NodeJS cluster?
One way to do it is to spawn the number of workers that you want using the cluster module. Then send messages to them when you want to start them working on something.
Below is code that initialises os.cpus().length workers and a queue that sends the work to them. It then pushes everything in htmlPages to that queue, waits for it to finish and then finally kills all the workers.
var os = require('os');
var async = require('async');
var cluster = require('cluster');
if (cluster.isWorker) {
process.on('message', function(msg) {
// Do the Phantom JS stuff
process.send(theResult);
});
}
if (cluster.isMaster) {
var workers = os.cpus().map(function () {
return cluster.fork();
});
var queue = async.queue(function (msg, cb) {
var worker = workers.pop();
worker.once('message', function (msg) {
workers.push(worker);
cb(null, msg);
});
worker.send(msg);
}, workers.length);
async.each(htmlPages, queue.push.bind(queue), function (err) {
if (err) { throw err; }
workers.forEach(function (worker) {
worker.kill();
});
console.log('Completed');
});
}

Resources