Batch 500 writes into firestore loop from json file - node.js

Using some inspiration I got from this thread and reply I tried to get my loop working which is to write into firestore in batches. But somehow I only can only update 1 document even if I can see I iterate through different values from my array. I load data into an array and work from there.
const db = admin.firestore();
const jsonStream = StreamArray.withParser();
let arr = []
jsonStream.on('data', ({ key, value }) => {
arr.push(value);
});
jsonStream.on('end', () => {
var counter = 0;
var commitCounter = 0;
var batches = [];
arr.forEach((a, ind) => {
batches[commitCounter] = db.batch();
if (counter <= 498) {
var thisRef = db.collection('Testing').doc(a.id);
console.log("id")
console.log(a.id);
batches[commitCounter].set(thisRef, { ...a });
counter = counter + 1;
} else {
counter = 0;
commitCounter = commitCounter + 1;
batches[commitCounter] = db.batch();
}
})
for (var i = 0; i < batches.length; i++) {
if(i==0)
{
console.log(batches[0])
}
batches[i].commit().then(function () {
console.count('wrote batch');
});
}
});
const filename = path.join(__dirname, 'mydata.json');
fs.createReadStream(filename).pipe(jsonStream.input);

Following line gets executed on each iteration, which essentially "resets" your batch on each round:
batches[commitCounter] = db.batch();
So at the end each of your batches will only contain one document write.

Related

How to use worker threads in nodejs?

I have below code, Where passing array length is too high and processing each array element taking one second of time, How i can use worker threads in below condition?
function processData(arr){
var result = [];
for(var i = 0; i < arr.length; i++){
result.push(process(arr[i]));
}
return result;
}
function process() {
// some code here takes 1 second to execute
}
processData(arr);
You can use workerpool npm package to do your long processing job in a worker thread, something like this
const workerpool = require('workerpool');
const pool = workerpool.pool();
const arr = [...] // Large Data set which is to be processed
new Promise((resolve, reject) => {
pool.exec(
arr => {
var result = [];
for(var i = 0; i < arr.length; i++){
/*
Do the long processing on data
*/
const processedData = a[i];
result.push(processedData);
}
return result;
},
[arr]
)
.then(result => resolve(result))
.catch(err => reject(err));
});

NodeJS: How to wait for the HTTP Get request is complete in For Loop?

I have a for loop function in NodeJS. I would like to wait until the result of Http Get request is completed in For Loop before it executes the next iteration, how do I achieve that?
for (let k=0; k<fd.length; k++) {
url = fd[k].nct_id;
HttpSearch({condition: url}).then(trials => {
//Get the result first before execute the next iteration
console.log(trials);
});
}
You should make the for-loop async:
const main = async () => {
for (let k = 0; k < fd.length; k++) {
const url = fd[k].nct_id;
const trials = await HttpSearch({ condition: url });
console.log(trials);
}
};
main().catch(console.error);
This will cause the loop to "pause" at each HttpSearch.
I will do like this
let k = 0 ;
let len = fd.length;
for (; k > len;) {
let url = fd[k].nct_id;
let subs = await HttpSearch({condition: url});
console.log(subs);
k++
}
or like this with promise
let url;
let promiseChain = Promise.resolve();
for (let i = 0; i < fd.length; i++) {
url = fd[k].nct_id;
// you need to pass the current value of `url`
// into the chain manually, to avoid having its value
// changed before the .then code accesses it.
const makeNextPromise = (url) => () => {
HttpSearch({condition: url})
.then((result) => {
// return promise here
return result
});
}
promiseChain = promiseChain.then(makeNextPromise(url))
}
This is using recursion, which calls next, once previous is finished
var limit = fd.length;
var counter = 0;
HttpSearch({condition: fd[0].nct_id;}).then(yourCallBack);
function yourCallBack(trials){
console.log(trails);
if(counter == limit)
return console.log('Done')
HttpSearch({condition: fd[counter].nct_id;}).then(yourCallBack);
counter++;
}

node wait for iteration to complete before callback

I have a lambda function in node.js to send a push notification.
In that function I need to iterate through my users sending a notification for each one prior to the callback.
Ideally I would like the iteration to perform in parallel.
What would be the best way to do this?
My code is currently as follows but it does not work as expected because the last user is not always the last to be handled:
var apnProvider = new apn.Provider(options);
var iterationComplete = false;
for (var j = 0; j < users.length; j++) {
if (j === (users.length - 1)) {
iterationComplete = true;
}
var deviceToken = users[j].user_device_token;
var deviceBadge = users[j].user_badge_count;
var notification = new apn.Notification();
notification.alert = message;
notification.contentAvailable = 1;
notification.topic = "com.example.Example";
apnProvider.send(notification, [deviceToken]).then((response) => {
if (iterationComplete) {
context.succeed(event);
}
});
}
Use Promise.all instead - map each user's associated apnProvider.send call to a Promise in an array, and when all Promises in the array are resolved, call the callback:
const apnProvider = new apn.Provider(options);
const userPromises = users.map((user) => {
const deviceToken = user.user_device_token;
const deviceBadge = user.user_badge_count;
const notification = new apn.Notification();
notification.alert = message;
notification.contentAvailable = 1;
notification.topic = "com.example.Example";
return apnProvider.send(notification, [deviceToken]);
})
Promise.all(userPromises)
.then(() => {
context.succeed(event);
})
.catch(() => {
// handle errors
});

Callback not returning data

I am trying to make a convolutional neural network and activate it using a single function which returns the data. I am having trouble with the asynchronous part because when I try to callback the data and log it, it returns an empty object.
const convolute = (callback) => {
let inputData = {};
for(let p = 0; p < files.length; p++){
let fileData = [];
let pixels = [];
let dimensions = [];
let image, iterationArr;
let FinalVector = [];
fs.readFile("./imgs/" + files[p], "utf8", (err, data) => {
if(err) console.log(err);
fileData = data.split("\n");
dimensions = fileData[0].split(" ").map(l => parseInt(l));
pixels = fileData[1].split(" ").map(p => parseInt(p));
pixels.splice(pixels[pixels.length-1], 1);
pixels = MM.convertToMatrix(pixels, dimensions[0]);
image = new ImageProcess(pixels);
for(let i = 0; i < edges.length; i++){
iterationArr = image.pixels;
let iteration = 0
while(iterationArr.length > 30){
iteration++;
iterationArr = image.start(iterationArr, edges[i]);
}
let iterationVector = MM.convertToVector(iterationArr)
for(let i = 0; i < iterationVector.length; i++){
FinalVector.push(iterationVector);
}
}//end of each file
let fileName = files[p].substring(0, files[p].length -4);
inputData[fileName] = FinalVector;
});//end of reading file
}//new file reading starts here
callback(inputData)
}//takes roughly around 5 seconds to execute
convolute((data) => {
console.log(data);
})
files is an array that stores all the file names. FinalVector is the array that contains 2000+ items. I've done some testing and the code works, I know this because it works when I log each files vector separately it returns the data, but there is trouble when I use a callback to return the data from the function.
PS. I've only included the relevant parts of my code that may contribute to my problem.
inputData is empty because readFile is asynchronous : callback(inputData) is called before the callbacks of each readFile where inputData is filled.
So callback(inputData) should be called after all the (asynchronous) readFile are processed : inside the callback of readFile once all the files are read :
const convolute = (callback) => {
let inputData = {};
let remainings = files.length;
if (!remainings) {
callback(inputData);
return;
}
for (let p = 0; p < files.length; p++) {
fs.readFile("./imgs/" + files[p], "utf8", (err, data) => {
if (err) {
console.log(err);
}
else {
/* process data simplified */
inputData[files[p]] = data;
}
if (--remainings <= 0) {
callback(inputData);
}
});
}
};

making node wait for db call to get completed

I just started writing node.js code.
I'm writing a code that extracts data from a pdf file, cleans it up and stores it in a database (using couchdb and accessing that using nano library).
The problem is that the calls are being made asynchronously... so the database get calls (i make some get calls to get a few affiliation files during the clean up) get completed only after the program runs resulting in variables being undefined. is there any way around this?
I've reproduced my code below
const fs = require('fs');
const os = require('os');
var couchDB = require('couch-db').CouchDB;
var pdf_table_extractor = require('pdf-table-extractor');
const filename = "PQ-PRI-0005-1806-01-0000_quoteSlipForLIBVIDGI1.pdf"
var nano = require('nano')('https://couchadmin:difficulttoguessmypassword#dbdev.perilwise.com');
var server = new couchDB('https://db.url.com');
server.auth("admin","admin");
var db = nano.db.use('pwfb');
var temp = [];
//New callView function
async function callView(){
try{
const doc = await view('liabilitymdm','pi');
for (var i =0; i<doc.rows.length;i++){
tmp.push(doc.rows[i]);
};
return doc;
} catch(e){
console.log(e);
};
};
function suc(result){
let ttmp = [];
console.log(result);
var pageTables = result.pageTables;
var firstPageTables = pageTables[0].tables;
ttmp = callView();
//this console log shows Promise { <pending> }
console.log(ttmp)
for (var k = 0; k < firstPageTables.length; k++) {
var temp = firstPageTables[k];
if (temp.length > 0) {
dump.push(temp);
}
}
// console.log(dump);
var insurer = filename.substr(37,8);
read_quote_slip(insurer,dump);
}
var read_quote_slip = (insurer,data) => {
console.log("read_quote_slip correctly entered");
var finOut = {};
if (insurer === "LIBVIDGI"){
finOut.insurer = insurer;
finOut.policyType = data[2][0].replace(/Quotation for/g,"");
finOut.natureOfWork = data[13][3];
let dedpos = indexGetter(data, "Deductible")[0];
finOut.deductible = data[dedpos+1][0];
let cov = indexGetter(data, "Coverage Territory and Jurisdiction")[0];
finOut.coverageTerritory = data[cov+1][0].replace(/Territory/g,"");
finOut.coverageJurisdiction = data[cov+2][0].replace(/Jurisdiction/g,"");
let ext = indexGetter(data,"Extensions")[0];
finOut.coverage = data[ext+1][0].split(/\r?\n/);
let majexc = indexGetter(data,"Major Exclusions")[0];
finOut.exclusions = data[majexc+1][0].split(/\r?\n/);
let prdtl = indexGetter(data,"Description")[0];
let prm = premiumcompute(data,prdtl,dedpos);
finOut.premium = prm;
finCleaned = libvidgi_cleaned(finOut);
// console.log(finCleaned);
}
}
var indexGetter = (words,toFind) => {
var finindex = [];
for (var i = 0; i < words.length; i++){
for (var j = 0; j < words[i].length; j++){
if(words[i][j].indexOf(toFind) >=0 ){
finindex.push(i);
}
}
}
return finindex;
}
var premiumcompute = (data, from, to) => {
let finprem = [];
let numbop = to - from - 2;
let incr = 0;
for (var i = from+2; i < to; i++){
let pr = {};
pr.option = incr+1;
pr.sumInsured = data[i][2].replace(/ /g,"");
pr.premium = data[i][data[i].length - 1].replace(/ /g,"");
finprem.push(pr);
incr +=1;
}
return finprem;
}
var libvidgi_cleaned = (finOut) => {
return finOut;
}
var fal = (result) => {
console.log(result);
console.log("there was an error");
}
var readPDFFile = function(filename){
//Decide which insurer from the filename
// console.log(filename);
console.log(filename.substr(37,8)+"Printed on line 38");
insurer = filename.substr(37,8)
pdf_table_extractor(filename, (result) => {suc(result)} , fal);
}
var libvidgi_data_extract = (data) => {
console.log(data);
let arr = data.pageTables.tables;
for (var i = 0; i <= arr.length; i++ ){
console.log(arr[i]);
}
}
readPDFFile(filename);
This answer assumes you are using Node.js > v7.6
Since db.view accepts a callback, and you wish to wait for it to finish, one solution will be to promisify it - meaning to turn it into a promise which can be awaited. You can use a library like Bluebird or you can even use Node's builtin promisify util. Then you can rewrite callViews:
const {promisify} = require('util');
const view = promisify(db.view);
async function callView() {
try {
const doc = await view('liabilitymdm', 'pi');
// the async operation is now guaranteed to be done
// (if there is an error it will be caught by the catch clause)
for (var i = 0; i < doc.rows.length; i++) {
temp.push(doc.rows[i]);
}
console.log(temp);
} catch (e) {
}
}
If you are not using Node.js > v7.6 (and cannot use async\await you can still utilize promises, by using their then method:
const {promisify} = require('util');
const view = promisify(db.view);
function callView() {
view('liabilitymdm', 'pi')
.then(doc => {
for (var i = 0; i < doc.rows.length; i++) {
temp.push(doc.rows[i]);
}
console.log(temp);
return temp;
})
.then(temp => {
console.log(temp);
})
.catch(e => {});
}
Notice how the first then is returning something which is used in a later then.
To make Node run asynchronously, you can use the keywords async and await.
They work like this:
async function doSomething () {
const formattedData = formatData();
const result = await db.postToDatabase(formattedData);
// the below will not happen until the above line is finished
doSomethingElse(result);
}
It's pretty simple in Node to get functions to execute asynchronously. Just put the async keyword at the beginning of the function definition and then put await in front of anything that you want to block execution until completed.

Resources