making node wait for db call to get completed - node.js

I just started writing node.js code.
I'm writing a code that extracts data from a pdf file, cleans it up and stores it in a database (using couchdb and accessing that using nano library).
The problem is that the calls are being made asynchronously... so the database get calls (i make some get calls to get a few affiliation files during the clean up) get completed only after the program runs resulting in variables being undefined. is there any way around this?
I've reproduced my code below
const fs = require('fs');
const os = require('os');
var couchDB = require('couch-db').CouchDB;
var pdf_table_extractor = require('pdf-table-extractor');
const filename = "PQ-PRI-0005-1806-01-0000_quoteSlipForLIBVIDGI1.pdf"
var nano = require('nano')('https://couchadmin:difficulttoguessmypassword#dbdev.perilwise.com');
var server = new couchDB('https://db.url.com');
server.auth("admin","admin");
var db = nano.db.use('pwfb');
var temp = [];
//New callView function
async function callView(){
try{
const doc = await view('liabilitymdm','pi');
for (var i =0; i<doc.rows.length;i++){
tmp.push(doc.rows[i]);
};
return doc;
} catch(e){
console.log(e);
};
};
function suc(result){
let ttmp = [];
console.log(result);
var pageTables = result.pageTables;
var firstPageTables = pageTables[0].tables;
ttmp = callView();
//this console log shows Promise { <pending> }
console.log(ttmp)
for (var k = 0; k < firstPageTables.length; k++) {
var temp = firstPageTables[k];
if (temp.length > 0) {
dump.push(temp);
}
}
// console.log(dump);
var insurer = filename.substr(37,8);
read_quote_slip(insurer,dump);
}
var read_quote_slip = (insurer,data) => {
console.log("read_quote_slip correctly entered");
var finOut = {};
if (insurer === "LIBVIDGI"){
finOut.insurer = insurer;
finOut.policyType = data[2][0].replace(/Quotation for/g,"");
finOut.natureOfWork = data[13][3];
let dedpos = indexGetter(data, "Deductible")[0];
finOut.deductible = data[dedpos+1][0];
let cov = indexGetter(data, "Coverage Territory and Jurisdiction")[0];
finOut.coverageTerritory = data[cov+1][0].replace(/Territory/g,"");
finOut.coverageJurisdiction = data[cov+2][0].replace(/Jurisdiction/g,"");
let ext = indexGetter(data,"Extensions")[0];
finOut.coverage = data[ext+1][0].split(/\r?\n/);
let majexc = indexGetter(data,"Major Exclusions")[0];
finOut.exclusions = data[majexc+1][0].split(/\r?\n/);
let prdtl = indexGetter(data,"Description")[0];
let prm = premiumcompute(data,prdtl,dedpos);
finOut.premium = prm;
finCleaned = libvidgi_cleaned(finOut);
// console.log(finCleaned);
}
}
var indexGetter = (words,toFind) => {
var finindex = [];
for (var i = 0; i < words.length; i++){
for (var j = 0; j < words[i].length; j++){
if(words[i][j].indexOf(toFind) >=0 ){
finindex.push(i);
}
}
}
return finindex;
}
var premiumcompute = (data, from, to) => {
let finprem = [];
let numbop = to - from - 2;
let incr = 0;
for (var i = from+2; i < to; i++){
let pr = {};
pr.option = incr+1;
pr.sumInsured = data[i][2].replace(/ /g,"");
pr.premium = data[i][data[i].length - 1].replace(/ /g,"");
finprem.push(pr);
incr +=1;
}
return finprem;
}
var libvidgi_cleaned = (finOut) => {
return finOut;
}
var fal = (result) => {
console.log(result);
console.log("there was an error");
}
var readPDFFile = function(filename){
//Decide which insurer from the filename
// console.log(filename);
console.log(filename.substr(37,8)+"Printed on line 38");
insurer = filename.substr(37,8)
pdf_table_extractor(filename, (result) => {suc(result)} , fal);
}
var libvidgi_data_extract = (data) => {
console.log(data);
let arr = data.pageTables.tables;
for (var i = 0; i <= arr.length; i++ ){
console.log(arr[i]);
}
}
readPDFFile(filename);

This answer assumes you are using Node.js > v7.6
Since db.view accepts a callback, and you wish to wait for it to finish, one solution will be to promisify it - meaning to turn it into a promise which can be awaited. You can use a library like Bluebird or you can even use Node's builtin promisify util. Then you can rewrite callViews:
const {promisify} = require('util');
const view = promisify(db.view);
async function callView() {
try {
const doc = await view('liabilitymdm', 'pi');
// the async operation is now guaranteed to be done
// (if there is an error it will be caught by the catch clause)
for (var i = 0; i < doc.rows.length; i++) {
temp.push(doc.rows[i]);
}
console.log(temp);
} catch (e) {
}
}
If you are not using Node.js > v7.6 (and cannot use async\await you can still utilize promises, by using their then method:
const {promisify} = require('util');
const view = promisify(db.view);
function callView() {
view('liabilitymdm', 'pi')
.then(doc => {
for (var i = 0; i < doc.rows.length; i++) {
temp.push(doc.rows[i]);
}
console.log(temp);
return temp;
})
.then(temp => {
console.log(temp);
})
.catch(e => {});
}
Notice how the first then is returning something which is used in a later then.

To make Node run asynchronously, you can use the keywords async and await.
They work like this:
async function doSomething () {
const formattedData = formatData();
const result = await db.postToDatabase(formattedData);
// the below will not happen until the above line is finished
doSomethingElse(result);
}
It's pretty simple in Node to get functions to execute asynchronously. Just put the async keyword at the beginning of the function definition and then put await in front of anything that you want to block execution until completed.

Related

.push is not a function in web crawler

I am writing a node JS web crawler class, and I have encountered the following error, this.textInvertedIndex[word].push is not a function. Upon further inspection I realised that for some reason this.textInvertedIndex[word] was written as a native object, function Object({ [native code] }). For the first few iterations, by console logging this.textInvertedIndex everything seemed fine as it was an object of arrays. But then suddenly this error occurred. Is there any part of the code where I am implicitly rewriting textInvertedIndex?
Here is the relevant class:
function Crawler(queue, maxIndexSize) {
this.queue = queue;
this.maxIndexSize = maxIndexSize;
this.findChunks = () => {
let currentChunk;
let minimumDistance = Infinity;
for (i = 1; i <= this.maxIndexSize; i++) {
if (this.maxIndexSize % i === 0) {
const newDistance = Math.abs(i - 30);
if (newDistance < minimumDistance) {
minimumDistance = newDistance;
currentChunk = i;
} else {
return currentChunk
};
};
};
};
this.chunks = this.findChunks();
this.chunkSize = this.maxIndexSize / this.chunks;
this.totalWordOccurances = {};
this.imageInvertedIndex = {};
this.textInvertedIndex = {};
this.images = [];
this.sites = [];
this.seen = {};
this.write = (url, html) => {
const documentId = this.sites.length;
const website = new Website(url, html);
const title = website.title();
const content = website.content(title);
const words = content.filter(item => typeof item !== "object");
const wordsLength = words.length;
const query = new Query(words);
const individualWords = query.individualize(words);
this.seen[url] = true;
this.sites.push({
url,
title,
description: website.description()
});
for (word of individualWords) {
const normalizedTf = query.count(word) / wordsLength;
const textInvertedIndexEntry = {
documentId,
normalizedTf
};
if (this.textInvertedIndex[word]) {
this.textInvertedIndex[word].push(textInvertedIndexEntry);
} else {
this.textInvertedIndex[word] = [textInvertedIndexEntry];
};
if (this.totalWordOccurances[word]) {
this.totalWordOccurances[word] += 1;
} else {
this.totalWordOccurances[word] = 1;
};
};
for (i = 0; i < content.length; i++) {
const item = content[i];
if (typeof item === "object") {
const imageId = this.images.length;
this.images.push(item);
for (word of individualWords) {
const imageScore = getImageScore(i, word, content);
const imageInvertedIndexEntry = {
imageId,
imageScore
};
if (this.imageInvertedIndex[word]) {
this.imageInvertedIndex[word].push(imageInvertedIndexEntry);
} else {
this.imageInvertedIndex[word] = [imageInvertedIndexEntry];
};
};
};
};
};
this.crawl = async () => {
while (this.sites.length !== this.maxIndexSize) {
let nextQueue = [];
const websitesUnfiltered = await Promise.all(this.queue.map((url) => {
const website = new Website(url);
return website.request();
}));
const websitesToAdd = this.maxIndexSize - this.sites.length;
let websites = websitesUnfiltered.filter(message => message !== "Failure")
.slice(0, websitesToAdd);
for (site of websites) {
const url = site.url;
const htmlCode = site.htmlCode;
const website = new Website(url, htmlCode);
this.write(url, htmlCode);
nextQueue = nextQueue.concat(website.urls());
};
nextQueue = new Query(nextQueue.filter(url => !this.seen[url]))
.individualize();
this.queue = nextQueue;
};
};
};
Called like this
const crawler = new Crawler(["https://stanford.edu/"], 25000000);
crawler.crawl();
this.textInvertedIndex = {}; is defining an Object of which push is not a valid function. you can change it to an array by defining it as this.textInvertedIndex = []; otherwise you can add key/value entries to the object as it is defined like this: this.textInvertedIndex[key] = value;
Turns out, my key was accessing this.textInvertedIndex[word]. And word was constructor. constructor is already a built in object property so it can never be rewritten as an array with .push defined. To solve this problem, make all object keys capital, so constructor will become CONSTRUCTOR, thus making sure that already existing object properties are never called.

Batch 500 writes into firestore loop from json file

Using some inspiration I got from this thread and reply I tried to get my loop working which is to write into firestore in batches. But somehow I only can only update 1 document even if I can see I iterate through different values from my array. I load data into an array and work from there.
const db = admin.firestore();
const jsonStream = StreamArray.withParser();
let arr = []
jsonStream.on('data', ({ key, value }) => {
arr.push(value);
});
jsonStream.on('end', () => {
var counter = 0;
var commitCounter = 0;
var batches = [];
arr.forEach((a, ind) => {
batches[commitCounter] = db.batch();
if (counter <= 498) {
var thisRef = db.collection('Testing').doc(a.id);
console.log("id")
console.log(a.id);
batches[commitCounter].set(thisRef, { ...a });
counter = counter + 1;
} else {
counter = 0;
commitCounter = commitCounter + 1;
batches[commitCounter] = db.batch();
}
})
for (var i = 0; i < batches.length; i++) {
if(i==0)
{
console.log(batches[0])
}
batches[i].commit().then(function () {
console.count('wrote batch');
});
}
});
const filename = path.join(__dirname, 'mydata.json');
fs.createReadStream(filename).pipe(jsonStream.input);
Following line gets executed on each iteration, which essentially "resets" your batch on each round:
batches[commitCounter] = db.batch();
So at the end each of your batches will only contain one document write.

How to handle a long request with Express

I'm working on a simple function I have for a specific GET request triggered in the browser. The objective of this request is to make multiple queries to a mongodb (mongoose) database and then perform some calculation and structure formating on the results to send it back to the browser.
The only problem is that everything takes too long and it results in an error in the browser:
net::ERR_EMPTY_RESPONSE
to give an example of part of the function I'm trying to build here it goes:
async function getPriceByMake(makes, id) {
return new Promise(async (resolve, reject) => {
let pMakes = {};
const makesArr = Object.keys(makes);
for (let i = 0; i < makesArr.length; i++) {
console.log('Getting the docs ... ' + Math.round(i/makesArr.length*100) + '%')
const currMake = makesArr[i];
pMakes[currMake] = {};
const modelsArr = Object.keys(makes[currMake]);
for (let j = 0; j < modelsArr.length; j++) {
const currModel = modelsArr[j];
await Listing.find({ catFrom: id, model: currModel }, 'year asking', (err, docs) => {
if (docs.length > 1) {
pMakes[currMake][currModel] = [docs];
} else {
pMakes[currMake][currModel] = {};
}
});
}
}
resolve(pMakes);
});
}
In this function, if I leave the async / await out, I get an empty {} on the other end. Which is obviously not the objective.
I've been searching the web a little and was able to find an article pointing to this scheme:
Browser:
Initiates request
displays progress
Show result
WebServer:
Submit event
Checks for completion
Return result
BackEndApp:
Picks up event
Runs task
Returns results
My question is the following:
How can I do that with NodeJS and Express?
In this code:
for (let j = 0; j < modelsArr.length; j++) {
const currModel = modelsArr[j];
await Listing.find({ catFrom: id, model: currModel }, 'year asking', (err, docs) => {
if (docs.length > 1) {
pMakes[currMake][currModel] = [docs];
} else {
pMakes[currMake][currModel] = {};
}
});
}
Your await isn't working because you're passing a callback to Listing.find(). When you do that, it does NOT return a promise and therefore the await does nothing useful. You get the empty response because the await doesn't work and thus you call resolve() before there's any actual data there.
Change the code to this:
for (let j = 0; j < modelsArr.length; j++) {
const currModel = modelsArr[j];
let docs = await Listing.find({ catFrom: id, model: currModel }, 'year asking');
if (docs.length > 1) {
pMakes[currMake][currModel] = [docs];
} else {
pMakes[currMake][currModel] = {};
}
}
And, then the await will work properly.
You also should remove the return new Promise() wrapper. You don't want that. Just make the function async and use await and it will already return a promise.
Here's your function with the unnecessary promise wrapper removed:
async function getPriceByMake(makes, id) {
let pMakes = {};
const makesArr = Object.keys(makes);
for (let i = 0; i < makesArr.length; i++) {
console.log('Getting the docs ... ' + Math.round(i/makesArr.length*100) + '%')
const currMake = makesArr[i];
pMakes[currMake] = {};
const modelsArr = Object.keys(makes[currMake]);
for (let j = 0; j < modelsArr.length; j++) {
const currModel = modelsArr[j];
let docs = await Listing.find({ catFrom: id, model: currModel }, 'year asking');
if (docs.length > 1) {
pMakes[currMake][currModel] = [docs];
} else {
pMakes[currMake][currModel] = {};
}
}
}
return pMakes;
}
Then, keep in mind that whatever code sends your actual response needs to use .then() or await when calling this async function in order to get the final result.
Your best bet to speed up this code would be to refactor either your queries or your database structure or both to not have to do N * M separate queries to get your final result. That's likely where your slowness is coming from. The biggest performance gains will probably come from reducing the number of queries you have to run here to far fewer.
Depending upon your database configuration and capabilities, it might speed things up to run the inner loop queries in parallel as shown here:
async function getPriceByMake(makes, id) {
let pMakes = {};
const makesArr = Object.keys(makes);
for (let i = 0; i < makesArr.length; i++) {
console.log('Getting the docs ... ' + Math.round(i/makesArr.length*100) + '%')
const currMake = makesArr[i];
pMakes[currMake] = {};
const modelsArr = Object.keys(makes[currMake]);
await Promise.all(modelsArr.map(async currModel => {
let docs = await Listing.find({ catFrom: id, model: currModel }, 'year asking');
if (docs.length > 1) {
pMakes[currMake][currModel] = [docs];
} else {
pMakes[currMake][currModel] = {};
}
}));
}
return pMakes;
}

NodeJS: How to wait for the HTTP Get request is complete in For Loop?

I have a for loop function in NodeJS. I would like to wait until the result of Http Get request is completed in For Loop before it executes the next iteration, how do I achieve that?
for (let k=0; k<fd.length; k++) {
url = fd[k].nct_id;
HttpSearch({condition: url}).then(trials => {
//Get the result first before execute the next iteration
console.log(trials);
});
}
You should make the for-loop async:
const main = async () => {
for (let k = 0; k < fd.length; k++) {
const url = fd[k].nct_id;
const trials = await HttpSearch({ condition: url });
console.log(trials);
}
};
main().catch(console.error);
This will cause the loop to "pause" at each HttpSearch.
I will do like this
let k = 0 ;
let len = fd.length;
for (; k > len;) {
let url = fd[k].nct_id;
let subs = await HttpSearch({condition: url});
console.log(subs);
k++
}
or like this with promise
let url;
let promiseChain = Promise.resolve();
for (let i = 0; i < fd.length; i++) {
url = fd[k].nct_id;
// you need to pass the current value of `url`
// into the chain manually, to avoid having its value
// changed before the .then code accesses it.
const makeNextPromise = (url) => () => {
HttpSearch({condition: url})
.then((result) => {
// return promise here
return result
});
}
promiseChain = promiseChain.then(makeNextPromise(url))
}
This is using recursion, which calls next, once previous is finished
var limit = fd.length;
var counter = 0;
HttpSearch({condition: fd[0].nct_id;}).then(yourCallBack);
function yourCallBack(trials){
console.log(trails);
if(counter == limit)
return console.log('Done')
HttpSearch({condition: fd[counter].nct_id;}).then(yourCallBack);
counter++;
}

node wait for iteration to complete before callback

I have a lambda function in node.js to send a push notification.
In that function I need to iterate through my users sending a notification for each one prior to the callback.
Ideally I would like the iteration to perform in parallel.
What would be the best way to do this?
My code is currently as follows but it does not work as expected because the last user is not always the last to be handled:
var apnProvider = new apn.Provider(options);
var iterationComplete = false;
for (var j = 0; j < users.length; j++) {
if (j === (users.length - 1)) {
iterationComplete = true;
}
var deviceToken = users[j].user_device_token;
var deviceBadge = users[j].user_badge_count;
var notification = new apn.Notification();
notification.alert = message;
notification.contentAvailable = 1;
notification.topic = "com.example.Example";
apnProvider.send(notification, [deviceToken]).then((response) => {
if (iterationComplete) {
context.succeed(event);
}
});
}
Use Promise.all instead - map each user's associated apnProvider.send call to a Promise in an array, and when all Promises in the array are resolved, call the callback:
const apnProvider = new apn.Provider(options);
const userPromises = users.map((user) => {
const deviceToken = user.user_device_token;
const deviceBadge = user.user_badge_count;
const notification = new apn.Notification();
notification.alert = message;
notification.contentAvailable = 1;
notification.topic = "com.example.Example";
return apnProvider.send(notification, [deviceToken]);
})
Promise.all(userPromises)
.then(() => {
context.succeed(event);
})
.catch(() => {
// handle errors
});

Resources