Thanks in advance for anyone who reads this.
I need to be able to send gcm messages (notifications) to a list of client IDs at a certain time.
I am trying to use Agenda.js since it has a persistence layer.
The following code seems to work just fine initially, executing exactly when it is supposed to. But, after a while of letting the server just chill doing nothing, the job will start executing in a loop.
It will also include
"WARNING: Date in past. Will never be fired."
Here is the relevant code.
var agenda = new agenda({db: {address: configParams.db}});
schedule_notifications = function(req) {
// define an agenda task named notify
agenda.define('notify', function(job, done) {
// create a gcm message
var message = new gcm.Message({
notification: { "body": 'test' }
});
var sender = new gcm.Sender('server id');
var regTokens = ['phone id'];
// send the message
sender.send(message, { registrationTokens: regTokens }, function(err, response) {
if (err) console.error(err);
else console.log(response);
done();
});
});
// get the object from the request
var req_json = JSON.parse(req.body.data),
keys = Object.keys(req_json),
key_string = keys[0],
start_obj = new Date(req_json[key_string][0].start);
// schedule the job with the date object found in the request
// start_obj, for example could be made using
// start_obj = new Date();
// notify is the name of the job to run
agenda.schedule(start_obj, 'notify');
agenda.start();
// can comment agenda.schedule and uncomment the following line to delete the unfinished jobs in db
// agenda.purge(function(err, numRemoved) {});
}
Does anyone have any idea of why this could be happening? Any tips on how to debug this issue?
Thanks!
I fixed the problem. I added in the job.remove function and it no longer spazzes.
var agenda = new agenda({db: {address: configParams.db}});
schedule_notifications = function(req) {
// define an agenda task named notify
agenda.define('notify', function(job, done) {
// create a gcm message
var message = new gcm.Message({
notification: { "body": 'test' }
});
var sender = new gcm.Sender('server id');
var regTokens = ['phone id'];
// send the message
sender.send(message, { registrationTokens: regTokens }, function(err, response) {
if (err) console.error(err);
else console.log(response);
done();
});
job.remove(function(err) {
if(!err) console.log("Successfully removed job from collection");
})
});
// get the object from the request
var req_json = JSON.parse(req.body.data),
keys = Object.keys(req_json),
key_string = keys[0],
start_obj = new Date(req_json[key_string][0].start);
// schedule the job with the date object found in the request
// start_obj, for example could be made using
// start_obj = new Date();
// notify is the name of the job to run
agenda.schedule(start_obj, 'notify');
agenda.start();
// can comment agenda.schedule and uncomment the following line to delete the unfinished jobs in db
// agenda.purge(function(err, numRemoved) {});
}
Related
I have an API in Node with many endpoints. It works well but there is one that can have large requests run up to 1 hour of processing which can often break. I'm thinking of only sending back a url on a request where you can check the status of the request and then download it once it's ready. What would be the best way to handle a queue of jobs in Node for this?
Sample code below for the current endpoint
const router = express.Router();
const schema = joi.object().keys // This is a schema to validate the json input
router.post('/', async (req, res) => {
let conn = await connect(); // Util method that connects to a Q/KDB server
let request = req.body;
joi.validate(request, schema, (err, _result) => {
if (err) {
res.status(400).send({ error: err['details'][0]['message'] });
}
else {
let qRequest = buildRequest(request); // Util function to build request
// Connect to Q/KDB server with node-q package and process request
conn.k('api.process', qRequest, function(err, resp) {
if (err) {
log // Write to log
res.status(400).send({ error: err['details'][0]['message']
}
else {
res.status(200).send(resp);
}
});
}
});
});
EDIT:
I have found that I basically just have to build a job queue with job ids corresponding to them. The package Bull seems to be good but I don't want to add another dependency such as Redis.
Conceptually there are a couple ways to approach a problem like this:
You can return a jobID and let the client query that jobID on some recurring basis using a URL that contains the jobID until they get a result (this sounds like what you envisioned)
You can have the client connect a webSocket or socket.io connection to the server and when the result is done, the server can directly send the result over the websocket/socket.io connection.
You can use Server Sent Events (SSE) to "push" the result to the client when it's done.
Here's the outline of a scheme for the first option above:
Coins a unique jobID for each incoming job to process
Creates a route for querying the status of a jobID
Has a Map object that contains a list of jobs in process that is indexed by jobID
Has a setInterval() that sweeps through the jobs in the job Map to remove any expired jobs (jobs where the client never came back to get them). You can set the frequency of that sweep and the amount of time that you keep the job.
When a request comes in, it coins a new jobID, adds a "pending" job to the Map and returns back to the client a URL which they can query the job status on.
When you eventually finish processing the job, the result is added to the job object and its status is changed to "complete".
A route is added to query job status that includes the jobID.
If, when queried, the job status is "complete", then the result is returned and the job is removed from the Map.
If, when queried, the job status is "error", then the error is returned and the job is removed from the Map.
If, when queried, the jobID is not present, 404 status is returned
If, when queried, the job status is anything other than "complete" or "error", then the job.status and optional job.progress is returned. This allows your long running process to communicate back any progress if you want and you can use multiple status values if you want.
Here's code to illustrate the concept:
// A map of objects,
// the key is the jobID
// data is an object {status: "complete", result: someResult, timeStarted: someTime}
// If the job is not yet complete, status will be something other than "complete"
// and result will not yet exist
const jobs = new Map();
// check for expired jobs
const expirationInterval = 60 * 60 * 1000; // run expiration check once an hour
const expirationTime = 12 * 60 * 60 * 1000; // let jobs stay here for 12 hours
setInterval(() => {
// accumulate an array of items to remove so we aren't modifying while iterating
const expired = [];
const now = Date.now();
for (let [key, job] of jobs) {
if (now - job.timeStarted > expirationTime) {
expired.push(key);
}
}
// now remove all expired jobs
for (let key of expired) {
jobs.delete(key);
}
}, expirationInterval);
// make a job id that consists of current time (in ms) plus random number
// jobs can then be sorted or aged by time also
function makeJobID() {
const base = Date.now().toString();
const random = Math.random().toFixed(6).toString().slice(2);
return base + "_" + random;
}
// fetch data for a jobID
// The job may either not exist any more,
// may still be "pending" (or have some other status)
// or may be "complete"
// Note: if this router is not at the top level, you will have to make
// this path line up with the URL you sent back to the client
router.get("/jobstatus/:jobID", (req, res) => {
let job = jobs.get(req.params.jobID);
if (!job) {
res.sendStatus(404);
return;
}
if (job.status === "complete") {
// remove it from the jobs Map and send the data
jobs.delete(req.params.jobID);
res.send({
status: "complete",
result: job.result
});
} else if (job.status === "error") {
// remove it from the jobs Map and send the data
jobs.delete(req.params.jobID);
res.send({
status: "error",
error: job.error
});
} else {
// optional job.progress can also be communicated back. This can be
// a number, a string or an object of other data
if (job.progress) {
res.send({ status: job.status, progress: job.progress });
} else {
res.send({ status: job.status });
}
}
});
router.post('/', async (req, res) => {
let conn;
try {
conn = await connect(); // Util method that connects to a Q/KDB server
} catch (e) {
console.log(e);
res.sendStatus(500);
}
let request = req.body;
joi.validate(request, schema, (err, _result) => {
if (err) {
res.status(400).send({ error: err['details'][0]['message'] });
} else {
// coin job id and add a job object to the jobs map
const jobID = makeJobID();
const job = {
timeStarted: Date.now(),
status: "pending"
};
jobs.set(jobID, job);
// send response now that gives them a URL to query
res.status(202).send({
status: "Job submitted",
url: `https://yourdomain.com/jobstatus/${jobID}` // pick whatever URL you want here
});
let qRequest = buildRequest(request); // Util function to build request
// Connect to Q/KDB server with node-q package and process request
conn.k('api.process', qRequest, function(err, resp) {
if (err) {
// set job status to "error"
job.status = "error";
job.timeCompleted = Date.now();
try {
job.error = err['details'][0]['message'];
} catch (e) {
console.log(e);
job.error = "known";
}
} else {
// job has finished, update the job
// we can update the job object directly because the job Map
// points at this same object
job.status = "complete";
job.timeCompleted = Date.now();
job.result = resp;
}
});
}
});
});
It may be a wrong way to use bull queue but here is what I want to do:
var Promise = require('bluebird');
var redis = require('redis');
var Queue = require('bull');
var redisClient = redis.createClient(6379);
var pdfQueue = new Queue('msg');
function check(resolve, reject,i) {
console.log('check called');
//Or if it is in Router then I want to send request, response in queue so that I can call them in on complete function
pdfQueue.add('msg',{'msg':'Hello', 'resolve':resolve,'reject':reject}).then(job=>{
console.log('added to the pdf')
});
}
pdfQueue.on('completed', function (job, result) {
//Here I want to call request.send('some msg');
//and resolve('complete');
resolve('final callback');
})
pdfQueue.process('msg',100,function (job,done) {
console.log('process');
done(null,'job done ')
})
function check2 () {
return new Promise(function(resolve, reject){
check(resolve,reject);
})
}
check2().then(data => {
console.log('got the value ', data)
});
In my real project I want to implement queue where I will be sending pdf to the user. Like res.download(pdf path); but this function should be in pdf.on('completed',()=>{ res.download(pdf path); }); or in resolve(pdfPath) but I am not able to find anyway to send pdf to the user using queue because I don't know how to call response or resolve in other functions by using queue jobs.
Please help me. Thanks you
I have a weird problem where my callback is never published and the message goes to timeout, even though the method runs in the queue. This happens in some specific queues and after it happens once, i cannot make any other requests from client which even previously worked, they all timeout. Have to restart the client and sever to make it working again.
This is the code, where its happening, and i cant seem to understand whats wrong.
Server.js file where i am creating the queues. I have several such queues, this is one of them.
var amqp = require('amqp');
var util = require('util');
var cnn = amqp.createConnection({host:'127.0.0.1'});
var getCart = require('./services/getCart');
cnn.on('ready', function() {
cnn.queue('getCart_queue', function(q){
q.subscribe(function(message, headers, deliveryInfo, m){
// util.log(util.format( deliveryInfo.routingKey, message));
// util.log("Message: "+JSON.stringify(message));
// util.log("DeliveryInfo: "+JSON.stringify(deliveryInfo));
getCart.handle_request(message, function(err,res){
cnn.publish(m.replyTo, res, {
contentType:'application/json',
contentEncoding:'utf-8',
correlationId:m.correlationId
});
});
});
});
});
Here, the handle request function is completed successfully, but the callback never goes through and its always timeout on the other end
var cart = require('../models/cart');
function handle_request(msg, callback) {
var user_id = msg.id;
cart
.find({id:user_id})
.populate('users ads')
.exec(function(err, results){
// This works, just the callback doesnt
if(!err){
console.log(results);
callback(null, results);
} else {
console.log(err);
callback(err, null);
}
});
}
exports.handle_request = handle_request;
this is how i am calling the request
var msg_payload = {"id":id};
mq_client.make_request('getCart_queue', msg_payload, function(err, results){
console.log(results); // never prints
//stuff that is never reached
});
These are my rpc files, i dont think there should be anything wrong with these, as some other queues work fine.
And this is the error shown on client
GET /getCart - - ms - -
Error: timeout 6ee0bd2a4b2ba1d8286e068b0f674d8f
at Timeout.<anonymous> (E:\Ebay_client\rpc\amqprpc.js:32:18)
at Timeout.ontimeout [as _onTimeout] (timers.js:341:34)
at tryOnTimeout (timers.js:232:11)
at Timer.listOnTimeout (timers.js:202:5)
Hope the information is not vague, if you need more, please let me know. Thanks!
I Think the error is in this file, because i tried debugging and from the rabbitmq server, the callback is being called and it has the correlation id as well as the reply to variable, so the request is not getting picked up here.
var amqp = require('amqp')
, crypto = require('crypto');
var TIMEOUT=8000;
var CONTENT_TYPE='application/json';
var CONTENT_ENCODING='utf-8';
var self;
exports = module.exports = AmqpRpc;
function AmqpRpc(connection){
self = this;
this.connection = connection;
this.requests = {};
this.response_queue = false;
}
AmqpRpc.prototype.makeRequest = function(queue_name, content, callback){
self = this;
var correlationId = crypto.randomBytes(16).toString('hex');
var tId = setTimeout(function(corr_id){
callback(new Error("timeout " + corr_id));
delete self.requests[corr_id];
}, TIMEOUT, correlationId);
var entry = {
callback:callback,
timeout: tId
};
self.requests[correlationId]=entry;
self.setupResponseQueue(function(){
self.connection.publish(queue_name, content, {
correlationId:correlationId,
contentType:CONTENT_TYPE,
contentEncoding:CONTENT_ENCODING,
replyTo:self.response_queue});
});
};
AmqpRpc.prototype.setupResponseQueue = function(next){
if(this.response_queue) return next();
self = this;
self.connection.queue('', {exclusive:true}, function(q){
self.response_queue = q.name;
q.subscribe(function(message, headers, deliveryInfo, m){
var correlationId = m.correlationId;
if(correlationId in self.requests){
var entry = self.requests[correlationId];
clearTimeout(entry.timeout);
delete self.requests[correlationId];
entry.callback(null, message);
}
});
return next();
});
};
This is the code for your make_request() in client.js file:
var amqp = require('amqp');
var connection = amqp.createConnection({host:'127.0.0.1'});
var rpc = new (require('./amqprpc'))(connection);
function make_request(queue_name, msg_payload, callback){
rpc.makeRequest(queue_name, msg_payload, function(err, response){
if(err)
console.error(err);
else{
console.log("response", response);
callback(null, response);
}
});
}
exports.make_request = make_request;
Look at what happens when you have an err on rpc.makeRequest():
rpc.makeRequest(queue_name, msg_payload, function(err, response){
if(err)
console.error(err);
//
//HERE: should be a callback call here.
//
else{
console.log("response", response);
callback(null, response);
}
});
This could be why you are getting a timeout. I hope it helps.
There wasn't a problem with rabbitMQ but with my queries in the handle request and after responding to the request.
For others coming with this problem, check and double check every statement, as the error will not show in the console, but will only show a timeout
I'd like to listen on a MongoDB capped collection, using it as a logging facility.
I use node, express.js, mongo (with mongoose).
This is the (simplified) code I come with up to now:
var mongoose = require('mongoose');
mongoose.connect('mongodb://localhost/mydb');
var logSchema = new mongoose.Schema({
date: Date,
message: String
}, {
capped: {
size: 1024
}
});
var Log = mongoose.model('Log', logSchema);
var filter = { "date": { "$gte": Date.now() } };
var stream = Log.find(filter).tailable().stream();
stream.on('data', function(doc) {
console.log('log stream data - new doc:', doc.message);
}).on('error', function (error) {
console.log('status stream data - error:', error.message);
}).on('close', function () {
console.log('status stream data - closed');
});
// ...
var log = new Log();
logger = function(message) {
log.date = new Date();
log.message = message;
log.save(function(err) {
if (err) {
return console.error('error saving log');
}
console.log('log message "' + message + '" added');
});
};
// ...
myRoutingMethod = function(req, res) {
logger('my routing method started');
// ...
res.json('done');
});
My problem is, before myRoutingMethod() is called, I get:
database connection opened
log message "my new message" added
status stream data - error: No more documents in tailed cursor
status stream data - closed
So, I never get
log stream data - new doc: my new message
I am probably missing something about integration of stream() on capped Log collection with express.js...
Any clue?
It is hard to spot whatever went wrong code.
However, based on other answers here on StackOverflow, the following may help you out:
First, check the version of Mongoose on your environment and make sure it is 2.7 or latest version.
If you had that collection in non-capped mode and added capped mode after a couple of iterations, Try to drop the collection and retry from scratch. You may need to backup the collection and re-initialize from the backup.
based on initializations found in docs here on StackOverflow, I would suggest configuring the capped collection schema as following:
//new Schema declaration
var logSchema = mongoose.Schema({...},{capped:{size: 1024, max: 1000,autoIndexId: true}});
//Export your model as following
module.exports = mongoose.model('Log', logSchema);
To initialize and use your Mongoose:
var Log = require(path/to/log/schema);
var query = { /** query paremeters here*/ };
//Initialize the stream
var stream = Log.find(query).tailable().stream();
//Process data
stream.on('data', function(doc){});
stream.on('error', function(error){});
stream.on('close', function(status){});
To save(or edit) operation, you may refer to the same old approach as
new Log(params).save(function(error, log){
//Do something with the error or new log
});
You may find more information on this StackOverflow answer as well: https://stackoverflow.com/a/18045399/132610
I hope this helps.
I aim to import large amount of data by Mongoose. As a newbie, I fail to setup the flow control properly with various mechanisms by async. Glad if someone could point to an appropriate solution. Thanks.
var async = require('async'),
mongoose = require('mongoose');
mongoose.connect('mongodb://localhost/test');
var Cat = mongoose.model('Cat', { name: String });
// Imagine this is a huge array with a million items.
var content = ['aaa', 'bbb', 'ccc'];
var queries = [];
content.forEach(function(name) {
queries.push(function(cb) {
var obj = new Cat({ name: name });
obj.save(function(err) {
console.log("SAVED: " + name);
console.log(err);
});
return true;
});
});
// FAILED: async.parallel adds all content to db,
// but it would exhaust the resource with too many parallel tasks.
async.parallel(queries, function(err, result) {
if (err)
return console.log(err);
console.log(result);
});
// FAILED: save the first item but not the rest
async.waterfall(queries, function(err, result) {
if (err)
return console.log(err);
console.log(result);
});
// FAILED: same as async.waterfall, async.queue saves the first item only
var q = async.queue(function(name, cb) {
var obj = new Cat({ name: name });
obj.save(function(err) {
console.log("SAVED: " + name);
console.log(err);
});
})
q.push(content, function (err) {
console.log('finished processing queue');
});
I think eachLimit or eachSeries fit your situation best:
var content = ['aaa', 'bbb', 'ccc'];
async.eachLimit(content, 10, function(name, done) {
var obj = new Cat({ name : name });
obj.save(done);
// if you want to print some status info, use this instead:
//
// obj.save(function(err) {
// console.log("SAVED: " + name);
// console.log(err);
// done(err);
// });
//
}, function(err) {
// handle any errors;
});
With eachLimit, you can run an X amount of queries 'in parallel' (10 in the example above) to speed things up without exhausting resources. eachSeries will wait for the previous save before it continues with the next, so effectively saving one object at a time.
Notice that with each*, you won't get a list with (saved) objects back (it's a bit of a fire-and-forget mechanism where you're not interested in the outcome, bar any errors). If you do want a list of saved objects in the end, you can use the equivalent map* functions: mapLimit and mapSeries.