How to make Bull jobs concurrent - node.js

For the context, I'm new to Bull (https://github.com/OptimalBits/bull) and trying to execute a lot of jobs concurrently. But Bull seems it'll wait for one job to complete.
Here is my code
const Bull = require("bull");
const jobs = new Bull('jobs');
jobs.process(async (job) => {
const wait = ms => new Promise(resolve => setTimeout(resolve, ms));
console.log(`start job.id = ${job.id}`);
await wait(3000); // Wait 3 seconds.
console.log(`end job.id = ${job.id}`);
});
void async function main () {
for (let i = 0; i < 10; i++) {
jobs.add({});
}
}();
If I ran this code, it'll take 30 seconds to execute, one job after another. What am I doing wrong and how to make this concurrent?

Related

Node Schedule - Cancelling and schedluling a job makes the job run many times (instead of just once)

I'm running into issues with the node-schedule package.
I'm trying to run it with GCP cloud functions. The case where a job is scheduled and fired normally works fine. Cancelling a job also works.
However, when I try to reschedule it (by cancelling the job and scheduling a new one with the same id), it fires many times instead of just once.
Eg.: It is reschedule three times, but fires 15 times.
This is the code I have in my GCP function:
const functions = require('firebase-functions');
var schedule = require('node-schedule');
const {stringify} = require('flatted');
exports.jobScheduler = functions.https.onRequest(async (req, res) => {
try {
const jobId = req.body.id?.toString();
const type = req.body.type;
const date = new Date(req.body.date);
if (type === 'list_jobs') {
const scheduledJobs = schedule.scheduledJobs;
res.status(200).send(stringify(scheduledJobs));
} else {
if(type === 'schedule_job') {
const scheduledJob = schedule.scheduleJob(jobId, date, () => {
// API CALL
console.log(`SCHEDULED JOB IS RUNNING FOR ${jobId}`);
}
);
functions.logger.log(`Job scheduled for ${jobId}.`);
}
if (type === 'reschedule_job') {
const jobToReschedule = schedule.scheduledJobs[jobId];
if (jobToReschedule) {
jobToReschedule.cancel();
functions.logger.log(`Job canceled for a reschedule for ${jobId}.`);
const rescheduledJob = schedule.scheduleJob(jobId, date, () => {
//API CALL
console.log(`Rescheduled job is running for ${jobId}`);
}
);
functions.logger.log(`[Job rescheduled for ${jobId}.`);
} else {
functions.logger.log(`No pending job to reschedule for ${jobId}.`);
}
}
if (type === 'cancel') {
const jobToCancel = schedule.scheduledJobs[jobId];
if (jobToCancel) {
jobToCancel.cancel();
functions.logger.log(`Job canceled for ${jobId}.`);
} else {
functions.logger.log(`No pending job for ${jobId}.`);
}
}
res.status(200).send(`Job to ${type} notification for ${jobId} was successful.`);
}
} catch (error) {
functions.logger.log(error);
res.status(400).json(error);
}
});
I tried schedule.reschedule(), cancelling and creating a new schedule. I expect the job to be canceled and rescheduled for the new date, and for the job to fire only once (at the rescheduled date). There are some cases where a job might be rescheduled a lot of times. I expect it to only fire once (the last reschedule). However, the job fires more than once. In fact, it fires more than the number of times it was rescheduled.
I appreciate the help!

How to properly write cloud functions that automatically update firestore documents

I am trying to write a firebase cloud function that runs a simple while loop every time a new user creates an account. For some reason, the update function only runs once and stops. The code i use is pasted below
const functions = require("firebase-functions");
const admin = require('firebase-admin');
admin.initializeApp();
const firestore = admin.firestore();
var data;
var counter = 0;
exports.onUserCreate = functions.firestore.document('testCollection/{docID}').onCreate(async(snapshot, context) =>{
data = snapshot.data();
while (counter < 5) {
setInterval(updateCounter(counter), 5000);
}
})
async function updateCounter(counter){
await firestore.collection('testCollection').doc(data['username']).update({
counter: admin.firestore.FieldValue.increment(1)
});
counter++;
}
Cloud Functions stops running your code when it hits the final } of your function, as otherwise it'd be billing your indefinitely for it.
If you want your code to continue running, you'll need to return a promise that resolves when your code is done with its work (up to 9 minutes).
exports.onUserCreate = functions.firestore.document('testCollection/{docID}').onCreate(async(snapshot, context) =>{
data = snapshot.data();
return Promise((resolve, reject) => {
while (counter < 5) {
setInterval(updateCounter(counter), 5000);
}
setInterval(() => {
if (counter >= 5) {
resolve()
}
}, 5000)
})
})
Note that the while (counter < 5) loop in the code still won't do what you expect it to do, but at least now the function will continue to run for a few moments and the counter will be incremented.
This is probably what you want:
exports.onUserCreate = functions.firestore.document('testCollection/{docID}').onCreate(async(snapshot, context) =>{
data = snapshot.data();
return Promise((resolve, reject) => {
setTimeout(updateCounter, 5000);
setTimeout(updateCounter, 10000);
setTimeout(updateCounter, 15000);
setTimeout(updateCounter, 20000);
setTimeout(updateCounter, 25000);
setInterval(() => {
if (counter >= 5) {
resolve()
}
}, 5000)
})
})
This calls updateCounter 5 times, each 5 seconds after the previous call. There is a chance that the final database update won't be completed before the call to resolve, so I strongly recommend learning more about asynchronous behavior by reading the documentation on sync, async, and promises and watching Doug's excellent promises and async behavior in Cloud Functions series.
if the requirement is to run this function 5 times, once every 5 seconds, this could work.
for (let i =0;i<5;i++){
await updateCounter();
await sleep (5000);
}
function sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}

Trigger the execution of a function if any condition is met

I'm writing an HTTP API with expressjs in Node.js and here is what I'm trying to achieve:
I have a regular task that I would like to run regularly, approx every minute. This task is implemented with an async function named task.
In reaction to a call in my API I would like to have that task called immediately as well
Two executions of the task function must not be concurrent. Each execution should run to completion before another execution is started.
The code looks like this:
// only a single execution of this function is allowed at a time
// which is not the case with the current code
async function task(reason: string) {
console.log("do thing because %s...", reason);
await sleep(1000);
console.log("done");
}
// call task regularly
setIntervalAsync(async () => {
await task("ticker");
}, 5000) // normally 1min
// call task immediately
app.get("/task", async (req, res) => {
await task("trigger");
res.send("ok");
});
I've put a full working sample project at https://github.com/piec/question.js
If I were in go I would do it like this and it would be easy, but I don't know how to do that with Node.js.
Ideas I have considered or tried:
I could apparently put task in a critical section using a mutex from the async-mutex library. But I'm not too fond of adding mutexes in js code.
Many people seem to be using message queue libraries with worker processes (bee-queue, bullmq, ...) but this adds a dependency to an external service like redis usually. Also if I'm correct the code would be a bit more complex because I need a main entrypoint and an entrypoint for worker processes. Also you can't share objects with the workers as easily as in a "normal" single process situation.
I have tried RxJs subject in order to make a producer consumer channel. But I was not able to limit the execution of task to one at a time (task is async).
Thank you!
You can make your own serialized asynchronous queue and run the tasks through that.
This queue uses a flag to keep track of whether it's in the middle of running an asynchronous operation already. If so, it just adds the task to the queue and will run it when the current operation is done. If not, it runs it now. Adding it to the queue returns a promise so the caller can know when the task finally got to run.
If the tasks are asynchronous, they are required to return a promise that is linked to the asynchronous activity. You can mix in non-asynchronous tasks too and they will also be serialized.
class SerializedAsyncQueue {
constructor() {
this.tasks = [];
this.inProcess = false;
}
// adds a promise-returning function and its args to the queue
// returns a promise that resolves when the function finally gets to run
add(fn, ...args) {
let d = new Deferred();
this.tasks.push({ fn, args: ...args, deferred: d });
this.check();
return d.promise;
}
check() {
if (!this.inProcess && this.tasks.length) {
// run next task
this.inProcess = true;
const nextTask = this.tasks.shift();
Promise.resolve(nextTask.fn(...nextTask.args)).then(val => {
this.inProcess = false;
nextTask.deferred.resolve(val);
this.check();
}).catch(err => {
console.log(err);
this.inProcess = false;
nextTask.deferred.reject(err);
this.check();
});
}
}
}
const Deferred = function() {
if (!(this instanceof Deferred)) {
return new Deferred();
}
const p = this.promise = new Promise((resolve, reject) => {
this.resolve = resolve;
this.reject = reject;
});
this.then = p.then.bind(p);
this.catch = p.catch.bind(p);
if (p.finally) {
this.finally = p.finally.bind(p);
}
}
let queue = new SerializedAsyncQueue();
// utility function
const sleep = function(t) {
return new Promise(resolve => {
setTimeout(resolve, t);
});
}
// only a single execution of this function is allowed at a time
// so it is run only via the queue that makes sure it is serialized
async function task(reason: string) {
function runIt() {
console.log("do thing because %s...", reason);
await sleep(1000);
console.log("done");
}
return queue.add(runIt);
}
// call task regularly
setIntervalAsync(async () => {
await task("ticker");
}, 5000) // normally 1min
// call task immediately
app.get("/task", async (req, res) => {
await task("trigger");
res.send("ok");
});
Here's a version using RxJS#Subject that is almost working. How to finish it depends on your use-case.
async function task(reason: string) {
console.log("do thing because %s...", reason);
await sleep(1000);
console.log("done");
}
const run = new Subject<string>();
const effect$ = run.pipe(
// Limit one task at a time
concatMap(task),
share()
);
const effectSub = effect$.subscribe();
interval(5000).subscribe(_ =>
run.next("ticker")
);
// call task immediately
app.get("/task", async (req, res) => {
effect$.pipe(
take(1)
).subscribe(_ =>
res.send("ok")
);
run.next("trigger");
});
The issue here is that res.send("ok") is linked to the effect$ streams next emission. This may not be the one generated by the run.next you're about to call.
There are many ways to fix this. For example, you can tag each emission with an ID and then wait for the corresponding emission before using res.send("ok").
There are better ways too if calls distinguish themselves naturally.
A Clunky ID Version
Generating an ID randomly is a bad idea, but it gets the general thrust across. You can generate unique IDs however you like. They can be integrated directly into the task somehow or can be kept 100% separate the way they are here (task itself has no knowledge that it's been assigned an ID before being run).
interface IdTask {
taskId: number,
reason: string
}
interface IdResponse {
taskId: number,
response: any
}
async function task(reason: string) {
console.log("do thing because %s...", reason);
await sleep(1000);
console.log("done");
}
const run = new Subject<IdTask>();
const effect$: Observable<IdResponse> = run.pipe(
// concatMap only allows one observable at a time to run
concatMap((eTask: IdTask) => from(task(eTask.reason)).pipe(
map((response:any) => ({
taskId: eTask.taskId,
response
})as IdResponse)
)),
share()
);
const effectSub = effect$.subscribe({
next: v => console.log("This is a shared task emission: ", v)
});
interval(5000).subscribe(num =>
run.next({
taskId: num,
reason: "ticker"
})
);
// call task immediately
app.get("/task", async (req, res) => {
const randomId = Math.random();
effect$.pipe(
filter(({taskId}) => taskId == randomId),
take(1)
).subscribe(_ =>
res.send("ok")
);
run.next({
taskId: randomId,
reason: "trigger"
});
});

Run node.js functions in parallel

I am new to Node.js and I wanted some functions to run simultaneously.
I have seen several articles and as far as I understood I can use Promise.all and Promise.allSettled.
I don't understand why my functions run sequentially, here's the code I arranged.
async processPropositions(proposition) {
const dataWords = [];
const start = Date.now()
//async functions that return promises
const invariableData = this.invariables(propositionSplitted);
const nounsData = this.nouns(propositionSplitted);
const adjectivesData = this.adjectives(propositionSplitted);
const verbsData = this.verbs(propositionSplitted);
//here the code should stop until every promise is resolved
const [invariables, nouns, adjectives, verbs] = await Promise.all([invariableData, nounsData, adjectivesData, verbsData]);
//I find this time == to the sum of the time printed in the single functions abo
const finish = Date.now()
const time = finish - start
console.log(time)
//here in the original function I append results to dataWords
return dataWords;
}
I have printed the time of the single async functions (i.e. this.invariables(propositionSplitted);, this.nouns(propositionSplitted);, this.adjectives(propositionSplitted);, this.verbs(propositionSplitted);) and their sum is equal to the time I'm printing with this function.
I have the same problem when I try to run the main function for every proposition of the array propositions with a for loop. Since they're indipendent I'd like to run them simultaneously and then collecting results when every promise is solved.
I tried this but obviously I'm missing a fundamental concept of asynchronous coding:
for (let proposition of propositions) {
results.push(this.processPropositions(proposition));
for (let result of results) {
dataWords.push(await result);
if (propositions.length > 1 && results.indexOf(result) < conjunctions.length) dataWords.push(Conjunctions.getConjunction(conjunctions[results.indexOf(result)]));
}
}
If I don't await the for loop finishes before it receives the premise, while if I keep the await it becomes synchronous.
The inner loop isn't waiting for the promises to resolve. Load up all the promises like you are doing in the outer loop, but don't check for resolutions until after they are fulfilled. Then you can loop through the results.
function myPromise(duration) {
return new Promise((resolve) => {
setTimeout(() => {
resolve(duration);
}, duration);
});
}
const propositions = [ 200, 500, 4000 ];
const promises = [];
for(const proposition of propositions) {
promises.push(myPromise(proposition));
}
const start = Date.now();
Promise.all(promises).then(results => {
const totalTime = Date.now() - start;
console.log(`all timers finished in ${totalTime}`);
results.forEach((t, i) => {
console.log(` timer ${i} took ${t}`);
});
});

Why are my promises (running in parallel and running in serial) completing at the same time

I am trying to see the difference in processing time between running promises in parallel and in serial order. But in the below code, I get the output from both functions at the same time. Ideally, the parallel function result should come up much quicker. Is there anything wrong I am doing here.
const timeout = 10000
const function1 = new Promise((resolve, reject) => {
setTimeout(() => {
resolve("hello1")
}, timeout);
})
const function2 = new Promise((resolve, reject) => {
setTimeout(() => {
resolve("hello2")
}, timeout);
})
const parallel = async () => {
const result1 = function1
const result2 = function2
const result = await Promise.all([result1, result2])
console.log(result)
}
const serial = async () => {
const result1 = await function1
const result2 = await function2
console.log("result", result1 + " : "+result2)
}
parallel()
serial()
There are two key things here:
Promises don't "run" at all. A promise is a way of observing the completion of something that's running, they don't run anything. You aren't the only one who's confused by this. :-) It's a very common misunderstanding.
Your function1 and function2 aren't functions, they're constants containing promises. As soon as you call new Promise, your code in the promise executor runs, which starts your timer. The promise executor is called synchronously by the Promise constructor to start whatever async operation the promise is going to report the completion of.
This is why you're seeing the result you're seeing: You're starting all of your timers at the same time, so they all fire at the same time. It doesn't matter whether you're observing those completions in parallel or in series.
If you want to see the difference, wait to start your operation:
const timeout = 1000; // <== Changed to 1s
const function1 = () => new Promise((resolve, reject) => {
// Note the −−−−−−^^^^^−−− change, I've made `function1` actually a function
setTimeout(() => {
resolve("hello1");
}, timeout);
});
// Note the change, I've made `function2` actually a function
const function2 = () => new Promise((resolve, reject) => {
// Note the −−−−−−^^^^^−−− change, I've made `function2` actually a function
setTimeout(() => {
resolve("hello2");
}, timeout);
});
const parallel = async () => {
const result1 = function1(); // <== Calling the function starts the timer
const result2 = function2(); // <== Calling the function starts the timer
const result = await Promise.all([result1, result2]);
console.log(result);
};
const serial = async () => {
const result1 = await function1(); // <== Calling the function starts the timer
const result2 = await function2(); // <== Calling the function starts the timer
console.log("result", result1 + " : " + result2);
};
parallel();
serial();

Resources