I am trying to execute the following AWS lambda:
index.js:
const test = require("test");
exports.handler = async (event) => {
await test.main();
};
test.js:
const {Worker} = require("worker_threads");
const main = () => {
let num1 = 10;
let num2 = 20;
const worker = new Worker("./worker.js", {workerData: {num1, num2}});
worker.on("message", result => {
console.log(`${num1} + ${num2} is ${result}`);
});
worker.on("error", error => {
console.log(error);
});
worker.on("exit", exitCode => {
console.log(exitCode);
});
console.log("I am here!");
}
exports.main = main;
worker.js:
const {parentPort, workerData} = require("worker_threads");
const getSum = (num1, num2) => {
return num1 + num2;
}
parentPort.postMessage(getSum(workerData.num1, workerData.num2));
When I run the same program on my laptop it is working fine. I see the output of the worker thread consistently.
Output on my laptop:
❯ node index.js
I am here!
10 + 20 is 30
0
Output on the lambda:
START RequestId: c178d74b-da57-4765-9fa7-77d3fc83d645 Version: $LATEST
2021-08-31T14:33:37.353Z c178d74b-da57-4765-9fa7-77d3fc83d645 INFO I am here!
END RequestId: c178d74b-da57-4765-9fa7-77d3fc83d645
REPORT RequestId: c178d74b-da57-4765-9fa7-77d3fc83d645 Duration: 2.12 ms Billed Duration: 3 ms Memory Size: 10240 MB Max Memory Used: 108 MB
When I run the lambda, the output is very random. Sometimes I see the output of the worker thread and other times I don't.
Why is there a difference in execution of the program on AWS lambda and on my laptop?
You don't await for the worker async operation to complete in the test.js file. Try adding a promise that resolves when worker finishes. Like this:
const { Worker } = require("worker_threads");
const main = async () => {
let num1 = 10;
let num2 = 20;
const worker = new Worker("./worker.js", { workerData: { num1, num2 } });
worker.on("message", (result) => {
console.log(`${num1} + ${num2} is ${result}`);
});
worker.on("error", (error) => {
console.log(error);
});
console.log("I am here!");
// Awaiting for the worker to finish here
return new Promise((resolve) => {
worker.on("exit", (exitCode) => {
console.log(exitCode);
resolve();
});
});
};
exports.main = main;
Alternatively, you can set context.doNotWaitForEmptyEventLoop = false but it's not recommended as it's error-prone and hard to debug.
Related
I am using a pool of workers to complete some CPU intensive tasks in Node.
However, I have a problem in my code below. When a task is first run, everything goes as expected, the pool is created, then is called which sends a message to a thread that runs the task successfully and responds with a message. However, on a second call, it seems to run the callback function of the worker (via the parentPort.postMessage) before the work in the threaded file is done. This seems to be an issue with using promises here? I see the "This run was complete in X ms" log before the "done" message shows. Why is this happening? Is something wrong with my workerPool class?
Am I not able to use any async/promise logic within the worker?
The version of Node I'm using is 14.
I followed largely the example set in this doc: https://nodejs.org/api/async_context.html#class-asyncresource
workerPool.js:
const { AsyncResource } = require("async_hooks");
const { EventEmitter } = require("events");
const path = require("path");
const { Worker } = require("worker_threads");
const kTaskInfo = Symbol("kTaskInfo");
const kWorkerFreedEvent = Symbol("kWorkerFreedEvent");
const { MONGODB_URI } = process.env;
class WorkerPoolTaskInfo extends AsyncResource {
constructor(callback) {
super("WorkerPoolTaskInfo");
this.callback = callback;
}
done(err, result) {
console.log("<<<<<<<<<<<");
this.runInAsyncScope(this.callback, null, err, result);
this.emitDestroy(); // TaskInfos are used only once.
}
}
class WorkerPool extends EventEmitter {
constructor(numThreads, workerFile) {
super();
this.numThreads = numThreads;
this.workerFile = workerFile;
this.workers = [];
this.freeWorkers = [];
for (let i = 0; i < numThreads; i++) this.addNewWorker();
}
addNewWorker() {
const worker = new Worker(path.resolve(__dirname, this.workerFile), {
workerData: { MONGODB_URI },
});
worker.on("message", (result) => {
// In case of success: Call the callback that was passed to `runTest`,
// remove the `TaskInfo` associated with the Worker, and mark it as free
// again.
worker[kTaskInfo].done(null, result);
worker[kTaskInfo] = null;
this.freeWorkers.push(worker);
this.emit(kWorkerFreedEvent);
});
worker.on("error", (err) => {
// In case of an uncaught exception: Call the callback that was passed to
// `runTest` with the error.
if (worker[kTaskInfo]) worker[kTaskInfo].done(err, null);
else this.emit("error", err);
// Remove the worker from the list and start a new Worker to replace the
// current one.
this.workers.splice(this.workers.indexOf(worker), 1);
this.addNewWorker();
});
this.workers.push(worker);
this.freeWorkers.push(worker);
this.emit(kWorkerFreedEvent);
}
runTest(data, callback) {
if (this.freeWorkers.length === 0) {
// No free threads, wait until a worker thread becomes free.
console.log("No free threads. Process queued.");
this.once(kWorkerFreedEvent, () => this.runTest(data, callback));
return;
}
const worker = this.freeWorkers.pop();
worker[kTaskInfo] = new WorkerPoolTaskInfo(callback);
worker.postMessage(data);
}
close() {
for (const worker of this.workers) worker.terminate();
}
}
module.exports = WorkerPool;
The index.js file where workers are called:
return new Promise(async (resolve, reject) => {
threadPool.runTest(
{ ...some data },
(err, result) => {
if (err) {
console.error("Bad error from test:", err);
reject(err)
}
const endTime = new Date();
// this callback is fired before the message is supposed to be posted...
console.log("Run Test Complete");
console.log(`This run took ${endTime - startTime} ms`);
resolve(true);
}
);
});
};
The code that actually runs in the worker:
const { parentPort, threadId, workerData } = require("worker_threads");
parentPort.on("message", async (data) => {
// await func1(...)
// await func2(...)
console.log("Done");
parentPort.postMessage('Done stuff);
});
I have a setInterval function that's been called in another function, and I need to stop it when the proccess is done. I tried to set this setInterval function as a variable and call clearInterval, but the interval keeps running
const createInterval = (visibilityTimeout, startDateTime, message) => {
setInterval(() => {
const currentDateTime = moment().valueOf();
const timeDifference = (visibilityTimeout * 1000) - (currentDateTime - startDateTime);
if (timeDifference >= 600000) {
return;
}
if (timeDifference < 494983) {
const params = {
QueueUrl: 'http://localhost:4566/000000000000/test-queue2',
ReceiptHandle: message.ReceiptHandle,
VisibilityTimeout: visibilityTimeout,
};
sqs.changeMessageVisibility(params, (err, data) => {
if (err) logger.error(err, err.stack);
else logger.info(data);
});
// eslint-disable-next-line no-param-reassign
visibilityTimeout += 300;
}
}, 5000);
};
module.exports = async (message) => {
const startDateTime = moment().valueOf();
const {
noteId,
} = JSON.parse(message.Body);
logger.info(`Processing message [noteId=${noteId}]`);
try {
const note = await TestSessionNote.findById(noteId);
const testSession = await TestSession.findById(note.test_session_id);
logger.info(`Downloading video [key=${testSession.video_key}]`);
const isProcessing = true;
const interval = createInterval(500, startDateTime, message, isProcessing);
await sleep(20000);
clearInterval(interval);
logger.info(`Finished processing message [noteId=${noteId}]`);
} catch (ex) {
await TestSessionNote.update(noteId, { status: 'transcribe_error' });
logger.error(`Error processing message [noteId=${noteId}]`, ex);
}
};
I know that if i create a var test = setInterval(() => {console.log('blabla')}, 500) and call clearInterval(test) it works, but i don't know how can i do this calling a function
I think that you have to return from createInterval function the intervalId and after that it should work.
Can you check what value has your intervalId right now, with your current implementation?
https://developer.mozilla.org/en-US/docs/Web/API/setInterval
"The returned intervalID is a numeric, non-zero value which identifies the timer created by the call to setInterval(); this value can be passed to clearInterval() to cancel the interval."
I'm learning how to use Puppeteer cluster and I have a question.
How can I interrupt a puppeteer cluster execution running in an infinite loop, by using a key press?
The code would be something like this:
const { Cluster } = require('puppeteer-cluster');
const fs = require('fs').promises;
function sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
async function run() {
const cluster = await Cluster.launch({
concurrency: Cluster.CONCURRENCY_CONTEXT,
maxConcurrency: 2,
monitor: true,
});
await cluster.task(async ({ page, data: acc }) => {
// Do task ~2 minutes
});
// In case of problems, log them
cluster.on('taskerror', (err, data) => {
console.log(` Error crawling ${data}: ${err.message}`);
});
// Read the accs.csv file from the current directory
const csvFile = await fs.readFile(__dirname + '/accs.csv', 'utf8');
const lines = csvFile.split('\n');
while(true){
//for each account in the file
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
cluster.queue(line);
}
// sleep for a moment...
await sleep(60000);
}
await cluster.idle();
await cluster.close();
};
try{
run();
} catch(e) {
console.log(e.message());
}
I manage to do it as I usually do, using readline. I thought it didn't work because of the monitor shown on the terminal.
If anyone need an example on how it's done, check this: Break infinite loop user input nodejs
I'm having an issue with my worker code, currently i have some code that scans through two database tables and finds some matches and then adds some data from one table to the other creating a new table. This is a large set of data so i'm using worker threads to speed this up.
This is all working fine however once the worker threads are complete no other code runs i've tried adding the function LogData everywhere i can and it will not run i've even add the console.log("Finished building merge table") and that doesn't run either. Even the parentResolve does happen as i don't see the console.log("parentResolve") message.
if anyone can help me I would really appreciate it.
const calculateFactorialwithWorker = async () => {
const SCCM = await ProgramDev.find({ "program name": { $not: { $regex: ".*[(]KB*[)]*" } } }).limit(8000)
const sccmLength = SCCM.length
mongoose.connection.close()
return new Promise(async (parentResolve, parentReject) => {
const numbers = [...new Array(sccmLength)].map((_, i) => i);
const segmentSize = Math.ceil(sccmLength / userCPUCount);
const segments = [];
for (let segmentIndex = 0; segmentIndex < userCPUCount; segmentIndex++) {
const start = segmentIndex * segmentSize;
const end = start + segmentSize;
const segment = numbers.slice(start, end)
segments.push(segment);
}
try {
const results = await Promise.all(
segments.map(
segment =>
new Promise((resolve, reject) => {
const worker = new Worker(workerPath, {
workerData: segment,
});
worker.on('message', resolve);
worker.on('error', reject);
worker.on('exit', (code) => {
if (code !== 0)
reject(new Error(`Worker stopped with exit code ${code}`));
});
})
));
parentResolve(() => {
console.log("parentResolve")
})
} catch (e) {
parentReject(e)
}
});
};
calculateFactorialwithWorker().then(() => {
console.log("Finished building merge table")
LogData
})
Add if else block in worker exit event. When exit fired with code === 0 , there is no resolve/reject to handle it. The promises will not be resolved/rejected.
Ref. https://nodejs.org/api/worker_threads.html#worker_threads_event_exit
Also, I rewrite your codes a bit because some promises wrapper is unnecessary.
const calculateFactorialwithWorker = async () => {
try {
const SCCM = await ProgramDev.find({
"program name": { $not: { $regex: ".*[(]KB*[)]*" } },
}).limit(8000);
const sccmLength = SCCM.length;
const numbers = [...new Array(sccmLength)].map((_, i) => i);
const segmentSize = Math.ceil(sccmLength / userCPUCount);
const segments = [];
for (let segmentIndex = 0; segmentIndex < userCPUCount; segmentIndex++) {
const start = segmentIndex * segmentSize;
const end = start + segmentSize;
const segment = numbers.slice(start, end);
segments.push(segment);
}
const promises = segments.map(
segment =>
new Promise((resolve, reject) => {
const worker = new Worker(workerPath, {
workerData: segment,
});
worker.on("message", resolve);
worker.on("error", reject);
worker.on("exit", code => {
if (code !== 0) {
reject(new Error(`Worker stopped with exit code ${code}`));
} else {
resolve();
}
});
})
);
await Promise.all(promises);
} catch (err) {
throw new Error(err);
}
};
calculateFactorialwithWorker()
.then(() => {
console.log("Finished building merge table");
LogData();
})
.catch(console.log)
.finally(() => {
mongoose.connection.close();
});
I tried something like this in the forked child process:
WriterPlugin.js (child process)
function sendCursor(){
try {
let cursor = await getQueryCursor(reqBody, db);
cursor.on('data', (data) => {
process.send(data);
})
} catch (error) {
process.send({
message: error.toString(),
status: 400
})
}
}
controller.js (parent process)
const childProcess = fork(fileToExec);
childProcess.send(objectToProcess);
childProcess.on('message', (data) => {
reply.send(data);
})
This one printed just last data of cursor and i faced with a fastifyError:
"code":"FST_ERR_REP_ALREADY_SENT","statusCode":500},"msg":"Reply already sent"}
How can i properly handle the cursor.stream() from a forked child process using fastify?
You need to push the data into a stream to accomplish this task:
const { fork } = require('child_process')
const { Readable } = require('stream')
const fastify = require('fastify')()
fastify.get('/', (request, reply) => {
const stream = new Readable({
read (size) {}
})
const childProcess = fork('./external-file.js')
childProcess.on('message', (data) => {
stream.push(JSON.stringify(data)) // it must be a string
})
childProcess.on('close', (data) => {
stream.push(null)
})
reply.send(stream)
})
fastify.listen(8080)
Where external-file.js is:
let iteration = 0
const timer = setInterval(() => {
const data = { iteration: iteration++ }
process.send(data)
if (iteration === 3) {
clearInterval(timer)
}
}, 1000)