I am using a pool of workers to complete some CPU intensive tasks in Node.
However, I have a problem in my code below. When a task is first run, everything goes as expected, the pool is created, then is called which sends a message to a thread that runs the task successfully and responds with a message. However, on a second call, it seems to run the callback function of the worker (via the parentPort.postMessage) before the work in the threaded file is done. This seems to be an issue with using promises here? I see the "This run was complete in X ms" log before the "done" message shows. Why is this happening? Is something wrong with my workerPool class?
Am I not able to use any async/promise logic within the worker?
The version of Node I'm using is 14.
I followed largely the example set in this doc: https://nodejs.org/api/async_context.html#class-asyncresource
workerPool.js:
const { AsyncResource } = require("async_hooks");
const { EventEmitter } = require("events");
const path = require("path");
const { Worker } = require("worker_threads");
const kTaskInfo = Symbol("kTaskInfo");
const kWorkerFreedEvent = Symbol("kWorkerFreedEvent");
const { MONGODB_URI } = process.env;
class WorkerPoolTaskInfo extends AsyncResource {
constructor(callback) {
super("WorkerPoolTaskInfo");
this.callback = callback;
}
done(err, result) {
console.log("<<<<<<<<<<<");
this.runInAsyncScope(this.callback, null, err, result);
this.emitDestroy(); // TaskInfos are used only once.
}
}
class WorkerPool extends EventEmitter {
constructor(numThreads, workerFile) {
super();
this.numThreads = numThreads;
this.workerFile = workerFile;
this.workers = [];
this.freeWorkers = [];
for (let i = 0; i < numThreads; i++) this.addNewWorker();
}
addNewWorker() {
const worker = new Worker(path.resolve(__dirname, this.workerFile), {
workerData: { MONGODB_URI },
});
worker.on("message", (result) => {
// In case of success: Call the callback that was passed to `runTest`,
// remove the `TaskInfo` associated with the Worker, and mark it as free
// again.
worker[kTaskInfo].done(null, result);
worker[kTaskInfo] = null;
this.freeWorkers.push(worker);
this.emit(kWorkerFreedEvent);
});
worker.on("error", (err) => {
// In case of an uncaught exception: Call the callback that was passed to
// `runTest` with the error.
if (worker[kTaskInfo]) worker[kTaskInfo].done(err, null);
else this.emit("error", err);
// Remove the worker from the list and start a new Worker to replace the
// current one.
this.workers.splice(this.workers.indexOf(worker), 1);
this.addNewWorker();
});
this.workers.push(worker);
this.freeWorkers.push(worker);
this.emit(kWorkerFreedEvent);
}
runTest(data, callback) {
if (this.freeWorkers.length === 0) {
// No free threads, wait until a worker thread becomes free.
console.log("No free threads. Process queued.");
this.once(kWorkerFreedEvent, () => this.runTest(data, callback));
return;
}
const worker = this.freeWorkers.pop();
worker[kTaskInfo] = new WorkerPoolTaskInfo(callback);
worker.postMessage(data);
}
close() {
for (const worker of this.workers) worker.terminate();
}
}
module.exports = WorkerPool;
The index.js file where workers are called:
return new Promise(async (resolve, reject) => {
threadPool.runTest(
{ ...some data },
(err, result) => {
if (err) {
console.error("Bad error from test:", err);
reject(err)
}
const endTime = new Date();
// this callback is fired before the message is supposed to be posted...
console.log("Run Test Complete");
console.log(`This run took ${endTime - startTime} ms`);
resolve(true);
}
);
});
};
The code that actually runs in the worker:
const { parentPort, threadId, workerData } = require("worker_threads");
parentPort.on("message", async (data) => {
// await func1(...)
// await func2(...)
console.log("Done");
parentPort.postMessage('Done stuff);
});
Related
I have a setInterval function that's been called in another function, and I need to stop it when the proccess is done. I tried to set this setInterval function as a variable and call clearInterval, but the interval keeps running
const createInterval = (visibilityTimeout, startDateTime, message) => {
setInterval(() => {
const currentDateTime = moment().valueOf();
const timeDifference = (visibilityTimeout * 1000) - (currentDateTime - startDateTime);
if (timeDifference >= 600000) {
return;
}
if (timeDifference < 494983) {
const params = {
QueueUrl: 'http://localhost:4566/000000000000/test-queue2',
ReceiptHandle: message.ReceiptHandle,
VisibilityTimeout: visibilityTimeout,
};
sqs.changeMessageVisibility(params, (err, data) => {
if (err) logger.error(err, err.stack);
else logger.info(data);
});
// eslint-disable-next-line no-param-reassign
visibilityTimeout += 300;
}
}, 5000);
};
module.exports = async (message) => {
const startDateTime = moment().valueOf();
const {
noteId,
} = JSON.parse(message.Body);
logger.info(`Processing message [noteId=${noteId}]`);
try {
const note = await TestSessionNote.findById(noteId);
const testSession = await TestSession.findById(note.test_session_id);
logger.info(`Downloading video [key=${testSession.video_key}]`);
const isProcessing = true;
const interval = createInterval(500, startDateTime, message, isProcessing);
await sleep(20000);
clearInterval(interval);
logger.info(`Finished processing message [noteId=${noteId}]`);
} catch (ex) {
await TestSessionNote.update(noteId, { status: 'transcribe_error' });
logger.error(`Error processing message [noteId=${noteId}]`, ex);
}
};
I know that if i create a var test = setInterval(() => {console.log('blabla')}, 500) and call clearInterval(test) it works, but i don't know how can i do this calling a function
I think that you have to return from createInterval function the intervalId and after that it should work.
Can you check what value has your intervalId right now, with your current implementation?
https://developer.mozilla.org/en-US/docs/Web/API/setInterval
"The returned intervalID is a numeric, non-zero value which identifies the timer created by the call to setInterval(); this value can be passed to clearInterval() to cancel the interval."
I'm learning how to use Puppeteer cluster and I have a question.
How can I interrupt a puppeteer cluster execution running in an infinite loop, by using a key press?
The code would be something like this:
const { Cluster } = require('puppeteer-cluster');
const fs = require('fs').promises;
function sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
async function run() {
const cluster = await Cluster.launch({
concurrency: Cluster.CONCURRENCY_CONTEXT,
maxConcurrency: 2,
monitor: true,
});
await cluster.task(async ({ page, data: acc }) => {
// Do task ~2 minutes
});
// In case of problems, log them
cluster.on('taskerror', (err, data) => {
console.log(` Error crawling ${data}: ${err.message}`);
});
// Read the accs.csv file from the current directory
const csvFile = await fs.readFile(__dirname + '/accs.csv', 'utf8');
const lines = csvFile.split('\n');
while(true){
//for each account in the file
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
cluster.queue(line);
}
// sleep for a moment...
await sleep(60000);
}
await cluster.idle();
await cluster.close();
};
try{
run();
} catch(e) {
console.log(e.message());
}
I manage to do it as I usually do, using readline. I thought it didn't work because of the monitor shown on the terminal.
If anyone need an example on how it's done, check this: Break infinite loop user input nodejs
please someone help, I just cant get it.
Can you please help on how to make async/await or promise (doneCb), so script waits for first vlc_snmp(..) to finish and then to call next?
Example:
function doneCb(error) {
console.log(final_result);
final_result = [];
if (error)
console.error(error.toString());
}
function feedCb(varbinds) {
for (var i = 0; i < varbinds.length; i++) {
if (snmp.isVarbindError(varbinds[i]))
console.error(snmp.varbindError(varbinds[i]));
else {
var snmp_rez = {
oid: (varbinds[i].oid).toString()
value: (varbinds[i].value).toString()
};
final_result.push(snmp_rez);
}
}
}
var session = snmp.createSession(VLC_IP, "public", options);
var maxRepetitions = 20;
function vlc_snmp(OID) {
session.subtree(OID_, maxRepetitions, feedCb, doneCb);
}
vlc_snmp(OID_SERIAL_NUMBER);
//wait OID_SERIAL_NUMBER to finish and then call next
vlc_snmp(OID_DEVICE_NAME);
You should be able to use the async / await statements to wait for your done callback to be called. We wrap this callback in the vlc_snmp function, and return a Promise. This allows us to use the await statement.
I've mocked out some of the code that I don't have access to, it should behave somewhat similarly to the real code.
The key point here is, when a function returns a Promise we can await the result in an async function, that will give you the behaviour you wish.
final_result = [];
const VLC_IP = "";
const options = {};
const OID_ = "OID_";
const OID_SERIAL_NUMBER = "OID_SERIAL_NUMBER"
const OID_DEVICE_NAME = "OID_DEVICE_NAME"
// Mock out snmp to call feedcb and donecb
const snmp = {
createSession(...args) {
return {
subtree(oid, maxRepetitions, feedCb, doneCb) {
setTimeout(feedCb, 500, [{ oid, value: 42}])
setTimeout(doneCb, 1000);
}
}
},
isVarbindError(input) {
return false;
}
}
function doneCb(error) {
console.log("doneCb: final_result:", final_result);
final_result = [];
if (error)
console.error("doneCb: Error:", error.toString());
}
function feedCb(varbinds) {
for (var i = 0; i < varbinds.length; i++) {
if (snmp.isVarbindError(varbinds[i]))
console.error(snmp.varbindError(varbinds[i]));
else {
var snmp_rez = {
oid: (varbinds[i].oid).toString(),
value: (varbinds[i].value).toString()
};
final_result.push(snmp_rez);
}
}
}
var session = snmp.createSession(VLC_IP, "public", options);
var maxRepetitions = 20;
function vlc_snmp(OID) {
return new Promise((resolve,reject) => {
session.subtree(OID, maxRepetitions, feedCb, (error) => {
// This is a wrapper callback on doneCb
// Always call the doneCb
doneCb(error);
if (error) {
reject(error);
} else {
resolve();
}
});
});
}
async function run_vls_snmp() {
console.log("run_vls_snmp: Calling vlc_snmp(OID_SERIAL_NUMBER)...");
await vlc_snmp(OID_SERIAL_NUMBER);
console.log("run_vls_snmp: Calling vlc_snmp(OID_DEVICE_NAME)...");
//wait OID_SERIAL_NUMBER to finish and then call next
await vlc_snmp(OID_DEVICE_NAME);
}
run_vls_snmp();
I am performing the useMutation operation in the innermost loop and want to check the remaining cost upon every mutation. But it gets checked after all the mutations which is a problem because for some reason even if all the mutations get done(When the cost is under limits), It calls the .then() part for cost-checking and waiting for unknown reason.
Edit: I also noticed that even though the program is waiting again and again, the network status of chrome shows that all the mutations have happened and only the query of handleDiscountMore i.e. fetchMore is pending
const { loading, error, data, fetchMore, extensions, refetch } = useQuery(GET_COLLECTION, {
variables: { "id": coll.collection.id }
});
const [updatePrice] = useMutation(UPDATE_PRICE);
const redirectToModify = async (data, totalProducts) => {
wait(20000);
var cursor, fetchCount;
fetchCount = data.collection.products.edges.length;
totalProducts -= fetchCount;
data.collection.products.edges.map(async(product) => {
const results = await Promise.all(product.node.variants.edges.map(variant => {
if (selected == 'curr_price') {
//do stuff
}
else {
//do stuff
}
const productVariableInput = {
//Object
};
updatePrice({ variables: { input: productVariableInput } }).then(({ data, extensions }) => {
console.log("Remaining", extensions.cost.throttleStatus.currentlyAvailable)
console.log(data)
if (extensions.cost.throttleStatus.currentlyAvailable < 100) {
console.log("WAITING")
wait(18000);
}
}).catch(e => {
console.log(e)
})
console.log("AFTER")
return 0;
}))
})
if (totalProducts > 0) {
console.log("Calling")
wait(15000);
handleDiscountMore(data, cursor, totalProducts)
}
};
//Below function is Just for reference. It gets called before checking the throttleStatus above. afaik there's no problem with this
const handleDiscountMore = (data, cursor, pc) => {
console.log("Call received")
fetchMore({
variables: {
"id": data.collection.id,
"cursor": cursor
},
updateQuery: (
previousResult,
{ fetchMoreResult }
) => {
console.log("adding", fetchMoreResult);
redirectToModify(fetchMoreResult, pc);
// return fetchMoreResult;
}
})
}
Your map of maps is evaluating all promises at exactly the same time. Here's a cleaned up example that uses a nested for loop instead, which will wait for each request to finish before starting the next (note: I couldn't run it to test, so there's probably some bugs, but the idea is there):
const id = coll.collection.id;
const { loading, error, data, fetchMore, extensions, refetch } = useQuery(GET_COLLECTION, {
variables: { id }
});
const [updatePrice] = useMutation(UPDATE_PRICE);
// Given a product, returns a promise that resolves when all variants are processed
async function process_product(product){
const variants = product.node.variants.edges;
for (let i = 0; i < variants.length; i++){
await process_variant(variants[i]);
}
}
// Given a variant, returns a promise after the product is processed
async function process_variant(variant){
if (variant) {
console.log('doing stuff')
}
else {
console.log('doing other stuff')
}
const productVariableInput = {};
const variables = { input: productVariableInput };
try {
const {data, extensions} = await updatePrice({ variables });
const remaining_throttle = extensions.cost.throttleStatus.currentlyAvailable;
console.log("Remaining", remaining_throttle)
console.log(data)
// Change to a while loop to make sure you actually wait until resources are available
if (remaining_throttle < 100) {
console.log("WAITING")
await wait(18000);
}
} catch (e) {
console.log('error:', e);
}
console.log("AFTER")
return 0;
}
const redirectToModify = async (data, totalProducts) => {
await wait(20000);
let cursor;
const products = data.collection.product.edges;
totalProducts = totalProducts - products.length;
// Wait for all products to finish processing
for (var i = 0; i < products.length; i++){
await process_product(products[i]);
}
if (totalProducts > 0) {
console.log("Calling")
await wait(15000);
handleDiscountMore(data, cursor, totalProducts)
}
};
function updateQuery(previousResult, { fetchMoreResult }){
console.log("adding", fetchMoreResult);
redirectToModify(fetchMoreResult, pc);
return fetchMoreResult;
}
//Below function is Just for reference. It gets called before checking the throttleStatus above. afaik there's no problem with this
function handleDiscountMore(data, cursor, pc) {
console.log("Call received")
const variables = { id: data.collection.id, cursor };
fetchMore({ variables, updateQuery })
}
I have this monitor object that raises an event every time data is found, by means of an event handler:
monitor.on("data", data => { /* do something */ })
I would like to replace this pattern by using a generator:
for await(const data of monitor.iterate()) { /* do something */ }
I know I can do this:
async function monitorWrapper() {
const allData = await new Promise( resolve => {
const _allData = []
monitor.on("data", d => _allData.push(d))
monitor.on("end", () => resolve(_allData))
} )
yield *allData
}
and call it this way:
for await (const data of monitorWrapper()) { /* do something */ }
But this negates the whole of point of using yield since I have to wait for all elements to be available before processing.
In this context, my question is : Is there any pattern that allows to yield as the data events are triggered ?
Just ran into this problem myself.
const stream = new class { // mock event stream
async start() {
while (true) {
const ms = Math.random() * 100;
await new Promise(resolve => setTimeout(resolve, ms));
if (this.handler) this.handler(ms);
}
}
on(handler) { this.handler = handler; }
}
class Monitor {
events = []; // store events
resolve = () => {}; // resolves current outstanding promise
constructor(handler) {
handler(event => { // binds event listener
this.events.push(event); // add event
this.resolve(); // call resolve
});
}
async *stream() { // generator
while (true) {
let event;
while (event = this.events.shift()) yield event; // remove and yield
await new Promise(r => this.resolve = r); // wait for next
}
}
}
const monitor = new Monitor(handler => stream.on(handler)); // bind events
stream.start();
for await (const event of monitor.stream()) { console.log({ event }); }
I am not sure, but I think you can try this and let me know what happened.
//start is a thunk creator
function start(cb){
var c = cb();
c.next(); //start up!
return function(data){
c.next(data);
};
}
monitor.on('data', start(function(message){
var m = yield message;
console.log(m);
}));