The problem I am facing is that the project has already programmed with cluster to distribute task.
if (cluster.isMaster) {
// Fork workers.
for (var i = 0; i < numCPUs; i++) {
cluster.fork();
}
cluster.on('exit', (worker, code, signal) => {
});
} else {
var server = http.createServer(app);
var usernames = {};
var showusernames = {};
var usersmessages = [];
require('../server/controllers/communication/chat.js').chatConfig(io, usernames);
/**
* Listen on provided port, on all network interfaces.
*/
server.listen(port);
server.on('error', onError);
server.on('listening', onListening);
}
I have a basic idea that this code is for distributing task among cpu and keeping server live in case one cpu fails.
The question is:
Everything was working fine till I need started working with node to schedule the cron job (which will be send email).
The cron now is ran by all workers simultaneously and the email is send to worker depending on how many cpu are there on server.
The I worker my way out by scheduling job as:
if(cluster.isWorker)
if(cluster.worker.id == 1){
cron.schedule('*/1 * * * *', function() {
//CRON JOB
})
}
This worked very fine within local system but failed in staging server maybe because of CPU aligned to this very project.
Is there any way to get only the first free worker and assign task to
him.
Now i tried this
var wokerArr = []
wokerArr.push(cluster.worker.id)
if(cluster.worker.id == wokerArr[0])
cron.schedule('*/1 * * * *', function() {
//CRON JOB
})
I did that using the crontab.
Making a separate cron file and schedule the job using crontab in command prompt to schedule job. Thanks for support.
You can schedule the cron in master process itself. Cron needs to be handled in an idempotent way.
if (cluster.isMaster) {
cron.schedule('*/1 * * * *', function() {
//CRON JOB
})
// continue initializing workers here
}
Related
I'm working on a node.js web server using express.js that should offer a dashboard to monitor database servers.
The architecture is quite simple:
a gatherer retrieves the information in a predefined interval and stores the data
express.js listens to user requests and shows a dashboard based on the stored data
I'm now wondering how to best implement the gatherer to make sure that it does not block the main loop and the simplest solution seems be to just use a setTimeout based approach but I was wondering what the "proper way" to architecture this would be?
Your concern is your information-gathering step. It probably is not as CPU-intensive as it seems. Because it's a monitoring app, it probably gathers information by contacting other machines, something like this.
async function gather () {
const results = []
let result
result = await getOracleMetrics ('server1')
results.push(result)
result = await getMySQLMetrics ('server2')
results.push(result)
result = await getMySQLMetrics ('server3')
results.push(result)
await storeMetrics(results)
}
This is not a cpu-intensive function. (If you were doing a fast Fourier transform on an image, that would be a cpu-intensive function.)
It spends most of its time awaiting results, and then a little time storing them. Using async / await gives you the illusion it runs synchronously. But, each await yields the main loop to other things.
You might invoke it every minute something like this. The .then().catch() stuff invokes it asynchronously.
setInterval (
function go () {
gather()
.then()
.catch(console.error)
}, 1000 * 60 * 60)
If you do actually have some cpu-intensive computation to do, you have a few choices.
offload it to a worker thread.
break it up into short chunks, with sleeps between them.
sleep = function sleep (howLong) {
return new Promise(function (resolve) {
setTimeout(() => {resolve()}, howLong)
})
}
async function gather () {
for (let chunkNo = 0; chunkNo < 100; chunkNo++) {
doComputationChunk(chunkNo)
await sleep(1)
}
}
That sleep() function yields to the main loop by waiting for a timeout to expire.
None of this is debugged, sorry to say.
For recurring tasks I prefer to use node-scheduler and shedule the jobs on app start-up.
In case you don't want to run CPU-expensive tasks in the main-thread, you can always run the code below in a worker-thread in parallel instead of the main thread - see info here
Here are two examples, one with a recurrence rule and one with interval in minutes using a cron expression:
app.js
let mySheduler = require('./mysheduler.js');
mySheduler.sheduleRecurrence();
// And/Or
mySheduler.sheduleInterval();
mysheduler.js
/* INFO: Require node-schedule for starting jobs of sheduled-tasks */
var schedule = require('node-schedule');
/* INFO: Helper for constructing a cron-expression */
function getCronExpression(minutes) {
if (minutes < 60) {
return `*/${minutes} * * * *`;
}
else {
let hours = (minutes - minutes % 60) / 60;
let minutesRemainder = minutes % 60;
return `*/${minutesRemainder} */${hours} * * *`;
}
}
module.exports = {
sheduleRecurrence: () => {
// Schedule a job # 01:00 AM every day (Mo-Su)
var rule = new schedule.RecurrenceRule();
rule.hour = 01;
rule.minute = 00;
rule.second = 00;
rule.dayOfWeek = new schedule.Range(0,6);
var dailyJob = schedule.scheduleJob(rule, function(){
/* INFO: Put your database-ops or other routines here */
// ...
// ..
// .
});
// INFO: Verbose output to check if job was scheduled:
console.log(`JOB:\n${dailyJob}\n HAS BEEN SCHEDULED..`);
},
sheduleInterval: () => {
let intervalInMinutes = 60;
let cronExpressions = getCronExpression(intervalInMinutes);
// INFO: Define unique job-name in case you want to cancel it
let uniqueJobName = "myIntervalJob"; // should be unique
// INFO: Schedule the job
var job = schedule.scheduleJob(uniqueJobName,cronExpressions, function() {
/* INFO: Put your database-ops or other routines here */
// ...
// ..
// .
})
// INFO: Verbose output to check if job was scheduled:
console.log(`JOB:\n${job}\n HAS BEEN SCHEDULED..`);
}
}
In case you want to cancel a job, you can use its unique job-name:
function cancelCronJob(uniqueJobName) {
/* INFO: Get job-instance for canceling scheduled task/job */
let current_job = schedule.scheduledJobs[uniqueJobName];
if (!current_job || current_job == 'undefinded') {
/* INFO: Cron-job not found (already cancelled or unknown) */
console.log(`CRON JOB WITH UNIQUE NAME: '${uniqueJobName}' UNDEFINED OR ALREADY CANCELLED..`);
}
else {
/* INFO: Cron-job found and cancelled */
console.log(`CANCELLING CRON JOB WITH UNIQUE NAME: '${uniqueJobName}`)
current_job.cancel();
}
};
In my example the recurrence and the interval are hardcoded, obviously you can also pass the recurrence-rules or the interval as argument to the respective function..
As per your comment:
'When looking at the implementation of node-schedule it feels like a this layer on top of setTimeout..'
Actually, node-schedule is using long-timeout -> https://www.npmjs.com/package/long-timeout so you are right, it's basically a convenient layer on top of timeOuts
I have some scheduled tasks in my node app. These are scheduled using node-cron. I'd like run job in parallel because I noticed when a cron job is not finished yet that others don't run.
Here an example
let cron = require('node-cron');
cron.schedule('*/2 * * * * *', ()=>
{
console.log("Every minute", new Date());
});
cron.schedule('*/2 * * * * *', ()=>
{
let a = 0;
for(i=0; i< 10000000000; i++)
a = a + 1;
console.log("Hello..... from scheduler");
});
How to solve this problem to run job in parallel ?
Thanks
iam programming a prototype application with the following components:
webinterface for admins
doing cron jobs(statistic generation, ..)
interact with other webservices over http
I started programming with nodejs(typescript) and i got the connection to the other services. Now i got a problem with cron-jobs in nodejs.
Iam using node-cron for executing the cronjob.
Inside one job i need to obtain the status of much pc's and make a summary of it. If i would do this, this would block the main thread.
So i think I need to this in a separate thread.
How can i do this in nodejs? Should i use webworker-threads?
Am I on the proper way?
Should i better use Java(Grails/Spring) for this?
I really like the simplicity of nodejs (for http clients, ..)
Hope someone can give me hope that iam on the proper way.
I will just use Node Cluster. Using cluster, a master can create a multiple workers, which means your cron wont block incoming request. Just make sure that only 1 worker doing the Cron.
I have never working with node-cron before, but I have experience with the SyncedCron. But should be the same.
For the http client there are a lot libraries doing this, you can check Request or httpclient.
Your code should look something like this :
var cluster = require('cluster');
var http = require('http');
var numWorkers = require('os').cpus().length-1; // just give 1 cpu for OS to use, or maybe 2
if (cluster.isMaster) {
console.log('Master cluster setting up ' + numWorkers + ' workers...');
var cronPID=null;
for(var i = 0; i < numWorkers; i++) {
var worker=cluster.fork();
if(i==0){
//instructing the first worker to assume role of SyncedCron.
worker.send('you do it!');
cronPID=worker.process.pid;
console.log("worker "+cronPID+" choosed as Cron worker!");
}
}
cluster.on('online', function(worker) {
console.log('Worker ' + worker.process.pid + ' is online');
});
cluster.on('exit', function(worker, code, signal) {
// have to revive the worker
console.log('Worker ' + worker.process.pid + ' died with code: ' + code + ', and signal: ' + signal);
console.log('Starting a new worker');
var newWorker=cluster.fork();
if(cronPID==worker.process.pid)
{ // need to re-elect a new cron worker!
newWorker.send('you do it!');
cronPID=newWorker.process.pid;
console.log("worker "+cronPID+" choosed as Cron worker!");
}
});
}else
{ // worker sides
process.on('message', (msg) => {
// validate your message that you get
// if validated, create a cron job here
});
// create your express below, I assume you use express instead of node's http library
var express = require('express');
var app = express();
app.post...........
}
Note :
To revive the master, use something like "forever"
Your server should have multiple core, at least 4 but I recommend more (8 maybe?).
var crypto = require('crypto');
var sha = crypto.createHash('sha512').update(String(s));
var result = sha.digest('hex');
That's my current code.
How do I do this async? I'm planning to do the sha512 100,000 times.
Node's crypto module does not provide asynchronous SHA512 hashing at present, and although the createHash() stream interface looks asynchronous it will also execute in the main thread and block the event loop.
There is an issue open for this: https://github.com/nodejs/node/issues/678
In the interim, you can use #ronomon/crypto-async to do SHA512 asynchronously and concurrently in the threadpool without blocking the event loop, for multi-core throughput.
If you can not find any better solutions, this trick may help you:
You can create a standalone SHA-512 generator application, which receives your String "s" as standard input, generates the hash, and writes it out to the standard output.
From within your app, you can exec it via the child_process module, and catch the response with an event handler. There is an other stackoverflow thread, which may come handy about child_process:
Is it possible to execute an external program from within node js?
This way you can encapsulate the sync function into an async context. :)
Node.js runs in a single thread, so if you want to do asynchronous processing, you have to either:
use a module that implements threading natively;
spawn multiple Node.js processes.
The method I present below uses the latter approach.
Node.js API provides a module called cluster that allows you to fork your process as you would do if you were programming in C.
My approach breaks the input data (the strings you want to hash) into chunks, where each chunk is passed to a child worker process. When the worker finishes work on its chunk, it signals the master process, passing the results back.
The master node keeps running while the workers do their job, so it can do any unrelated asynchronous work without being blocked. When all workers finish, the master is signaled and it is free to further process the final results.
To run my test, you can simply do:
node parhash
My tests ran on an Intel Core i5 4670 with 8 GB RAM DDR3.
For your need of 100'000 strings, 1 worker completed in 450 ms, while 10 workers took 350 ms.
In a test with a million strings, 1 worker did the job in 4.5 seconds, while 10 workers did in 3.5 seconds.
Here is the code:
parhash.js
var
crypto = require('crypto'),
cluster = require('cluster');
var
STRING_COUNT = 1000000,
STRINGS_PER_WORKER = 100000,
WORKER_COUNT = Math.ceil(STRING_COUNT / STRINGS_PER_WORKER),
chunks = [],
nextChunkId = 0,
results = [],
startTime,
pendingWorkers = WORKER_COUNT;
/**
* Generates strings partitioned in WORKER_COUNT chunks.
* Each of these chunks will later be passed to a child process to be parsed asynchronously.
*
* You should replace this with your working data.
*/
function generateDemoStringChunks() {
var
si, wi,
chunk;
for (wi = 0; wi < WORKER_COUNT; wi++) {
chunk = [];
for (si = STRINGS_PER_WORKER * wi; (si < STRINGS_PER_WORKER * (wi + 1)) && (si < STRING_COUNT); si++) {
chunk.push(si.toString());
}
chunks.push(chunk);
}
}
/**
* After all workers finish processing, this will be executed.
*
* Here you should do whatever you want to process the resulting hashes.
*/
function mergeResults() {
results.sort(function compare(a, b) {
return a.id - b.id;
});
console.info('Summary:');
results.forEach(function (result) {
console.info('\tChunk %d: %d hashes (here is the first hash: "%s")', result.id, result.data.length, result.data[0]);
});
}
/**
* This will be called on the master side every time a worker finishes working.
*
* #param {object} worker the Worker that finished
* #param {{id: number, data: [string]}} result the result
*/
function processWorkerResult(worker, result) {
console.info('Worker %d finished computing %d hashes.', worker.id, result.data.length);
results.push(result);
worker.kill();
if (--pendingWorkers == 0) {
console.info('Work is done. Whole process took %d seconds.', process.hrtime(startTime)[0]);
mergeResults();
}
}
/**
* Gets a chunk of data available for processing.
*
* #returns {{id: number, data: [string]}} the chunk to be passed to the worker
*/
function getNextAvailableChunk() {
var chunk = {
id: nextChunkId,
data: chunks[nextChunkId]
};
nextChunkId++;
return chunk;
}
/**
* The master node will send a chunk of data every time a worker node
* signals it's ready to work.
*/
function waitForWorkers() {
cluster.on('online', function (worker) {
console.info('Worker %d is online.', worker.id);
worker.on('message', processWorkerResult.bind(null, worker));
worker.send(getNextAvailableChunk());
});
}
/**
* Start workers.
*/
function spawnWorkers() {
var wi;
for (wi = 0; wi < WORKER_COUNT; wi++) {
cluster.fork();
}
}
/**
* The hash function.
*
* #param {string} s a string to be hashed
* #returns {string} the hash string
*/
function hashString(s) {
return crypto.createHash('sha512').update(s).digest('hex');
}
/**
* A worker will wait for the master to send a chunk of data and will
* start processing as soon as it arrives.
*/
function processChunk() {
cluster.worker.on('message', function(chunk) {
var
result = [];
console.info('Worker %d received chunk %d with a load of %d strings.', cluster.worker.id, chunk.id, chunk.data.length);
chunk.data.forEach(function processChunk(s) {
result.push(hashString(s));
});
cluster.worker.send({
id: chunk.id,
data: result
});
});
}
function main() {
if (cluster.isMaster) {
/*
The master node will instantiate all required workers
and then pass a chunk of data for each one.
It will then wait for all of them to finish so it can
merge the results.
*/
startTime = process.hrtime();
generateDemoStringChunks();
spawnWorkers();
waitForWorkers();
} else {
/*
A worker node will wait for a chunk to arrive and
then will start processing it. When finished, it will
send a message back to the master node with the
resulting hashes.
*/
console.info('Worker %d is starting.', cluster.worker.id);
processChunk();
}
}
main();
I can't tell how well it would perform if it were implemented using threads because I haven't tested it. You could try WebWorker Threads if you want to do a benchmark (note: I haven't tried the WebWorkers module yet and I don't guarantee it works - you are on your own here).
I am running a node.js script on EC2 to monitor and run a node HTTP server as a child process.
Unfortunately this child server slows down slowly, requests that take 0.2 seconds start dragging out, after days the same requests take over 2 seconds.
As part of debugging this, I implemented a 2 hour restart to kill the child server and start another one. This has no effect! The HTTP server child process is restarted, but it is still slow! Only restarting this parent script makes the child faster.
Why is the HTTP server slowing down, even when killed and restarted?
Environment is 0.4.9 Node.js on EC2 Ubuntu server. Parent script is below.
var http = require('http');
var server,
firstOperated = null;
lastOperated = null;
function operating(str) {
return (str.toString().substring(0, 13) != 'SERVER ONLINE') ? log(str) :
lastOperated = new Date();
}
function log(str) {
str = str.toString('utf8');
if (str.length) console.log(str.replace(/\n+$/gim, ''));
}
function createServer() {
if (server) {
server.kill('SIGKILL');
return console.log('KILLED NON RESPONSIVE SERVER');
}
server = require('child_process').spawn('node', [__dirname + '/http.js', 80]);
firstOperated = new Date();
server.stdout.on('data', operating);
server.stderr.on('data', log);
server.on('exit', function(code) {
lastOperated = null;
server = null;
console.log("SERVER EXITED: " + code);
});
}
createServer();
setInterval(function() {
if (new Date() - firstOperated > 1000 * 60 * 60 * 2) return createServer();
if (new Date() - lastOperated < 5 * 1000) return; // server seems to be operating ok
createServer();
}, 5 * 1000);
If the EC2 instance is a Micro instance, and you are running at high cpu for more than about 15 seconds usage then you will be throttled (severely). This would explain the symptoms. The solution would be to scale up to a small instance. (It would not have to be the Node process that consumes the cpu cycles).