Calling Match Template in Promise.All No Performace Improvement? - node.js

I am using OpenCV4NodeJS-prebuilt for my project to use match template.
I Created Two Files one being Index.js and other named matchTemlate.js
In Index.js i call match template:
const { matchTemplate } = require("./matchTemplate");
...
let a = async function () {
let tm = performance.now();
try {
await Promise.all([
matchTemplate(baseImage, templateR),
matchTemplate(baseImage, templateL)
]).then(result => {
const c = result.map((ob) => (ob.C)) // confidence
top = c[0] > c[1] ? result[0].Y + 8 : result[1].Y + 11
})
} catch (error) {
console.log(error)
}
tm = performance.now() - tm;
console.log(tm)
}
and this is matchTemplate.js
const cv = require('opencv4nodejs-prebuilt')
exports.matchTemplate = async function (inputFile, templateImage) {
// eslint-disable-next-line no-unused-expressions
const matS = await cv.imdecodeAsync(templateImage)
console.time('templateMatching')
const matched = inputFile.matchTemplate(matS, 3)
console.timeEnd('templateMatching')
const minMax = matched.minMaxLoc()
return ({ Y: minMax.maxLoc.y, C: minMax.maxVal })
}
The log output of matchTemplate is:
templateMatching: 892.648ms templateMatching: 890.387ms
and The Log output of index.js is:
TemplateMatching: 1824.8019220000133
Why there is no improvement is speed ?
while the execution is done in parallel why it's still taking time equal to time taken by both ?
I tried Promise.all method to call ghostscript via gs4fb npm package and convert PDF to Image and the time improvement was there.
By time improvement i mean the difference of total time taken by Promise.all method and calling the functions one by one.

Related

nodejs - Async generator/iterator with or without awaiting long operation

I'm trying to understand which setup is the best for doing the following operations:
Read line by line a CSV file
Use the row data as input of a complex function that at the end outputs a file (one file for each row)
When the entire process is finished I need to zip all the files generated during step 2
My goal: fast and scalable solution able to handle huge files
I've implemented step 2 using two approaches and I'd like to know what is the best and why (or if there are other better ways)
Step 1
This is simple and I rely on CSV Parser - async iterator API:
async function* loadCsvFile(filepath, params = {}) {
try {
const parameters = {
...csvParametersDefault,
...params,
};
const inputStream = fs.createReadStream(filepath);
const csvParser = parse(parameters);
const parser = inputStream.pipe(csvParser)
for await (const line of parser) {
yield line;
}
} catch (err) {
throw new Error("error while reading csv file: " + err.message);
}
}
Step 2
Option 1
Await the long operation handleCsvLine for each line:
// step 1
const csvIterator = loadCsvFile(filePath, options);
// step 2
let counter = 0;
for await (const row of csvIterator) {
await handleCvsLine(
row,
);
counter++;
if (counter % 50 === 0) {
logger.debug(`Processed label ${counter}`);
}
}
// step 3
zipFolder(folderPath);
Pro
nice to see the files being generated one after the other
since it wait for the operation to end I can show the progress nicely
Cons
it waits for each operation, can I be faster?
Option 2
Push the long operation handleCsvLine in an array and then after the loop do Promise.all:
// step 1
const csvIterator = loadCsvFile(filePath, options);
// step 2
let counter = 0;
const promises = [];
for await (const row of csvIterator) {
promises.push(handleCvsLine(row));
counter++;
if (counter % 50 === 0) {
logger.debug(`Processed label ${counter}`);
}
}
await Promise.all(promises);
// step 3
zipFolder(folderPath);
Pro
I do not wait, so it should be faster, isn't it?
Cons
since it does not wait, the for loop is very fast but then there is a long wait at the end (aka, bad progress experience)
Step 3
A simple step in which I use the archiver library to create a zip of the folder in which I saved the files from step 2:
function zipFolder(folderPath, globPath, outputFolder, outputName, logger) {
return new Promise((resolve, reject) => {
// create a file to stream archive data to.
const stream = fs.createWriteStream(path.join(outputFolder, outputName));
const archive = archiver("zip", {
zlib: { level: 9 }, // Sets the compression level.
});
archive.glob(globPath, { cwd: folderPath });
// good practice to catch warnings (ie stat failures and other non-blocking errors)
archive.on("warning", function (err) {
if (err.code === "ENOENT") {
logger.warning(err);
} else {
logger.error(err);
reject(err);
}
});
// good practice to catch this error explicitly
archive.on("error", function (err) {
logger.error(err);
reject(err);
});
// pipe archive data to the file
archive.pipe(stream);
// listen for all archive data to be written
// 'close' event is fired only when a file descriptor is involved
stream.on("close", function () {
resolve();
});
archive.finalize();
});
}
Not using await does not make operations faster. It will not wait for the response and will move to the next operation. It will keep adding operations to the event queue, with or without await.
You should use child_process instead to mock parallel processing. Node js is not multithreaded but you can achieve it using child_process, which runs on CPU cores. This way, you can generate multiple files at a time based on number of CPU cores available in the system.

Why does a function with return gives me undefined when I use it inside module export? node.js

I am practicing some module export exercises using a function with a return statement passed inside a module export then to be imported in a new file, it tells me total is not defined? why is this happening?
Code:
file 1:
// Using Async & Await with MODULE EXPORT.
const googleDat = require('../store/google-sheets-api.js');
//passing a let var from a function to another file
let total = googleDat.addVat(100);
console.log(total);
File 2:
function addVat(price) {
let total = price*1.2
return total
};
module.exports = {
total
};
result:
That's because you export a variable that havn't ben initialized AND you didn't exported your function :
function addVat(price) {
//defining variable with let work only in this scope
let total = price*1.2
return total
};
//In this scope, total doesn't exists, but addVat does.
module.exports = {
total //So this is undefined and will throw an error.
};
What you want to do is to export your function, not the result inside.
function addVat(price) {
return price * 1.2;
};
module.exports = {
addVat
};
On file 2, you should be exporting the addVat() function itself and not just its return. Try this one:
exports.addVat = (price) => {
let total = price*1.2
return total
};

Correct way to organise this process in Node

I need some advice on how to structure this function as at the moment it is not happening in the correct order due to node being asynchronous.
This is the flow I want to achieve; I don't need help with the code itself but with the order to achieve the end results and any suggestions on how to make it efficient
Node routes a GET request to my controller.
Controller reads a .csv file on local system and opens a read stream using fs module
Then use csv-parse module to convert that to an array line by line (many 100,000's of lines)
Start a try/catch block
With the current row from the csv, take a value and try to find it in a MongoDB
If found, take the ID and store the line from the CSV and this id as a foreign ID in a separate database
If not found, create an entry into the DB and take the new ID and then do 6.
Print out to terminal the row number being worked on (ideally at some point I would like to be able to send this value to the page and have it update like a progress bar as the rows are completed)
Here is a small part of the code structure that I am currently using;
const fs = require('fs');
const parse = require('csv-parse');
function addDataOne(req, id) {
const modelOneInstance = new InstanceOne({ ...code });
const resultOne = modelOneInstance.save();
return resultOne;
}
function addDataTwo(req, id) {
const modelTwoInstance = new InstanceTwo({ ...code });
const resultTwo = modelTwoInstance.save();
return resultTwo;
}
exports.add_data = (req, res) => {
const fileSys = 'public/data/';
const parsedData = [];
let i = 0;
fs.createReadStream(`${fileSys}${req.query.file}`)
.pipe(parse({}))
.on('data', (dataRow) => {
let RowObj = {
one: dataRow[0],
two: dataRow[1],
three: dataRow[2],
etc,
etc
};
try {
ModelOne.find(
{ propertyone: RowObj.one, propertytwo: RowObj.two },
'_id, foreign_id'
).exec((err, searchProp) => {
if (err) {
console.log(err);
} else {
if (searchProp.length > 1) {
console.log('too many returned from find function');
}
if (searchProp.length === 1) {
addDataOne(RowObj, searchProp[0]).then((result) => {
searchProp[0].foreign_id.push(result._id);
searchProp[0].save();
});
}
if (searchProp.length === 0) {
let resultAddProp = null;
addDataTwo(RowObj).then((result) => {
resultAddProp = result;
addDataOne(req, resultAddProp._id).then((result) => {
resultAddProp.foreign_id.push(result._id);
resultAddProp.save();
});
});
}
}
});
} catch (error) {
console.log(error);
}
i++;
let iString = i.toString();
process.stdout.clearLine();
process.stdout.cursorTo(0);
process.stdout.write(iString);
})
.on('end', () => {
res.send('added');
});
};
I have tried to make the functions use async/await but it seems to conflict with the fs.openReadStream or csv parse functionality, probably due to my inexperience and lack of correct use of code...
I appreciate that this is a long question about the fundamentals of the code but just some tips/advice/pointers on how to get this going would be appreciated. I had it working when the data was sent one at a time via a post request from postman but can't implement the next stage which is to read from the csv file which contains many records
First of all you can make the following checks into one query:
if (searchProp.length === 1) {
if (searchProp.length === 0) {
Use upsert option in mongodb findOneAndUpdate query to update or upsert.
Secondly don't do this in main thread. Use a queue mechanism it will be much more efficient.
Queue which I personally use is Bull Queue.
https://github.com/OptimalBits/bull#basic-usage
This also provides the functionality you need of showing progress.
Also regarding using Async Await with ReadStream, a lot of example can be found on net such as : https://humanwhocodes.com/snippets/2019/05/nodejs-read-stream-promise/

Using batch to recursively update documents only works on small collection

I have a collection of teams containing around 80 000 documents. Every Monday I would like to reset the scores of every team using firebase cloud functions. This is my function:
exports.resetOrgScore = functions.runWith(runtimeOpts).pubsub.schedule("every monday 00:00").timeZone("Europe/Oslo").onRun(async (context) => {
let batch = admin.firestore().batch();
let count = 0;
let overallCount = 0;
const orgDocs = await admin.firestore().collection("teams").get();
orgDocs.forEach(async(doc) => {
batch.update(doc.ref, {score:0.0});
if (++count >= 500 || ++overallCount >= orgDocs.docs.length) {
await batch.commit();
batch = admin.firestore().batch();
count = 0;
}
});
});
I tried running the function in a smaller collection of 10 documents and it's working fine, but when running the function in the "teams" collection it returns "Cannot modify a WriteBatch that has been committed". I tried returning the promise like this(code below) but that doesn't fix the problem. Thanks in advance :)
return await batch.commit().then(function () {
batch = admin.firestore().batch();
count = 0;
return null;
});
There are three problems in your code:
You use async/await with forEach() which is not recommended: The problem is that the callback passed to forEach() is not being awaited, see more explanations here or here.
As detailed in the error you "Cannot modify a WriteBatch that has been committed". With await batch.commit(); batch = admin.firestore().batch(); it's exactly what you are doing.
As important, you don't return the promise returned by the asynchronous methods. See here for more details.
You'll find in the doc (see Node.js tab) a code which allows to delete, by recursively using a batch, all the docs of a collection. It's easy to adapt it to update the docs, as follows. Note that we use a dateUpdated flag to select the docs for each new batch: with the original code, the docs were deleted so no need for a flag...
const runtimeOpts = {
timeoutSeconds: 540,
memory: '1GB',
};
exports.resetOrgScore = functions
.runWith(runtimeOpts)
.pubsub
.schedule("every monday 00:00")
.timeZone("Europe/Oslo")
.onRun((context) => {
return new Promise((resolve, reject) => {
deleteQueryBatch(resolve).catch(reject);
});
});
async function deleteQueryBatch(resolve) {
const db = admin.firestore();
const snapshot = await db
.collection('teams')
.where('dateUpdated', '==', "20210302")
.orderBy('__name__')
.limit(499)
.get();
const batchSize = snapshot.size;
if (batchSize === 0) {
// When there are no documents left, we are done
resolve();
return;
}
// Delete documents in a batch
const batch = db.batch();
snapshot.docs.forEach((doc) => {
batch.update(doc.ref, { score:0.0, dateUpdated: "20210303" });
});
await batch.commit();
// Recurse on the next process tick, to avoid
// exploding the stack.
process.nextTick(() => {
deleteQueryBatch(resolve);
});
}
Note that the above Cloud Function is configured with the maximum value for the time out, i.e. 9 minutes.
If it appears that all your docs cannot be updated within 9 minutes, you will need to find another approach, for example using the Admin SDK from one of your server, or cutting the work into pieces and run the CF several times.

How do I chain a set of functions together using promises and q in node.js?

I have some dynamic data that needs to have work performed on it. The work must happen sequentially. Using the Q Library, I'd like to create an array of functions and execute the code sequentially using sequences. I can't seem to quite figure out the syntax to achieve this.
const fruits = ["apple", "cherry", "blueberry"]
function makeFruitPie (fruit) {
return Q.Promise((resolve, reject) => {
// Do some stuff here
resolve(fruit+" pie")
// Error handling here
reject(new Error(""))
})
}
const fruitFuncs = new Array(fruits.length)
for(var i = 0; i < fruits.length; i++) {
fruitFuncs[i] = makeFruitPie(fruits[i])
}
// Stole this example from the github docs but can't quite get it right.
i = 0
var result = Q(fruits[i++])
fruitFuncs.forEach((f) => {
result = result(fruits[i++]).then(f)
})
With these lines
for(var i = 0; i < fruits.length; i++) {
fruitFuncs[i] = makeFruitPie(fruits[i])
}
you already run the functions and, hence, their processing will begin.
Assuming you want the execution of the functions in sequence, the following would be more appropriate:
// construct the pipeline
const start = Q.defer();
let result = start.promise; // we need something to set the pipeline off
fruits.forEach( (fruit) => {
result = result.then( () => makeFruitPie( fruit ) );
});
// start the pipeline
start.resolve();
Sidenote: There is a native Promise implementation supported by almost all environments. Maybe consider switching from the library backed version.
You can use Promise.all
Promise.all(fruits.map(fruit=>makeFruitPie(fruit).then(res=> res) )).
then(final_res => console.log(final_res))
final_res will give you array of results
you could use for..of and do things sequentially. something like this
const Q = require("q");
const fruits = ["apple", "cherry", "blueberry"];
function makeFruitPie(fruit) {
return Q.Promise((resolve, reject) => {
// Do some stuff here
resolve(`${fruit} pie`);
// Error handling here
reject(new Error(""));
});
}
for (const fruit of fruits) {
const result = await makeFruitPie(fruit);
console.log(result);
}
By the way also worth considering native Promise insteead of using q

Resources