.push is not a function in web crawler - node.js

I am writing a node JS web crawler class, and I have encountered the following error, this.textInvertedIndex[word].push is not a function. Upon further inspection I realised that for some reason this.textInvertedIndex[word] was written as a native object, function Object({ [native code] }). For the first few iterations, by console logging this.textInvertedIndex everything seemed fine as it was an object of arrays. But then suddenly this error occurred. Is there any part of the code where I am implicitly rewriting textInvertedIndex?
Here is the relevant class:
function Crawler(queue, maxIndexSize) {
this.queue = queue;
this.maxIndexSize = maxIndexSize;
this.findChunks = () => {
let currentChunk;
let minimumDistance = Infinity;
for (i = 1; i <= this.maxIndexSize; i++) {
if (this.maxIndexSize % i === 0) {
const newDistance = Math.abs(i - 30);
if (newDistance < minimumDistance) {
minimumDistance = newDistance;
currentChunk = i;
} else {
return currentChunk
};
};
};
};
this.chunks = this.findChunks();
this.chunkSize = this.maxIndexSize / this.chunks;
this.totalWordOccurances = {};
this.imageInvertedIndex = {};
this.textInvertedIndex = {};
this.images = [];
this.sites = [];
this.seen = {};
this.write = (url, html) => {
const documentId = this.sites.length;
const website = new Website(url, html);
const title = website.title();
const content = website.content(title);
const words = content.filter(item => typeof item !== "object");
const wordsLength = words.length;
const query = new Query(words);
const individualWords = query.individualize(words);
this.seen[url] = true;
this.sites.push({
url,
title,
description: website.description()
});
for (word of individualWords) {
const normalizedTf = query.count(word) / wordsLength;
const textInvertedIndexEntry = {
documentId,
normalizedTf
};
if (this.textInvertedIndex[word]) {
this.textInvertedIndex[word].push(textInvertedIndexEntry);
} else {
this.textInvertedIndex[word] = [textInvertedIndexEntry];
};
if (this.totalWordOccurances[word]) {
this.totalWordOccurances[word] += 1;
} else {
this.totalWordOccurances[word] = 1;
};
};
for (i = 0; i < content.length; i++) {
const item = content[i];
if (typeof item === "object") {
const imageId = this.images.length;
this.images.push(item);
for (word of individualWords) {
const imageScore = getImageScore(i, word, content);
const imageInvertedIndexEntry = {
imageId,
imageScore
};
if (this.imageInvertedIndex[word]) {
this.imageInvertedIndex[word].push(imageInvertedIndexEntry);
} else {
this.imageInvertedIndex[word] = [imageInvertedIndexEntry];
};
};
};
};
};
this.crawl = async () => {
while (this.sites.length !== this.maxIndexSize) {
let nextQueue = [];
const websitesUnfiltered = await Promise.all(this.queue.map((url) => {
const website = new Website(url);
return website.request();
}));
const websitesToAdd = this.maxIndexSize - this.sites.length;
let websites = websitesUnfiltered.filter(message => message !== "Failure")
.slice(0, websitesToAdd);
for (site of websites) {
const url = site.url;
const htmlCode = site.htmlCode;
const website = new Website(url, htmlCode);
this.write(url, htmlCode);
nextQueue = nextQueue.concat(website.urls());
};
nextQueue = new Query(nextQueue.filter(url => !this.seen[url]))
.individualize();
this.queue = nextQueue;
};
};
};
Called like this
const crawler = new Crawler(["https://stanford.edu/"], 25000000);
crawler.crawl();

this.textInvertedIndex = {}; is defining an Object of which push is not a valid function. you can change it to an array by defining it as this.textInvertedIndex = []; otherwise you can add key/value entries to the object as it is defined like this: this.textInvertedIndex[key] = value;

Turns out, my key was accessing this.textInvertedIndex[word]. And word was constructor. constructor is already a built in object property so it can never be rewritten as an array with .push defined. To solve this problem, make all object keys capital, so constructor will become CONSTRUCTOR, thus making sure that already existing object properties are never called.

Related

How to make the function wait till I get the response in node.js

Async and Await are not working as expected. Please correct me where I am doing wrong in code.
I am reading data (url, pagelimit, company)from excel and by using switch(), I am navigating to the service.
I have to wait till I get the response from this function cnbservice.GetcnbOpenings(url, pageLimit,company), store the response to global array and call this function mdsservice.GetMdsOpenings(url, pageLimit,company), append the results to the global array.
const readexcel = async (request, response) => {
const workbook = XLSX.readFile('file.xlsx');
const sheetnamelist = workbook.SheetNames;
var xldata = XLSX.utils.sheet_to_json(workbook.Sheets[sheetnamelist[0]]);
dataarray =[];
for (i = 0; i < xldata.length; i++) {
company = xldata[i].company;
url = xldata[i].careers_link_url;
pageLimit = xldata[i].pagelimit;
switch(company){
case process.env.cnb_company_name:
const arr = await cnbservice.GetcnbOpenings(url, pageLimit,company)
if(arr !== undefined){
dataarray.push(arr);
}
break;
case process.env.mds_company_name:
const arr1 = await mdsservice.GetMdsOpenings(url, pageLimit,company)
if(arr1 !== undefined){
dataarray.push(arr1);
}
break;
case "default":
console.log("Company Name not matching with any of the services")
}
}
}
You are running await code inside standard for loop which will not work synchronously. to run async/await inside a for loop you should use for...of loop.
for(let element of array){
//await call
}
after making following changes your code will work as expected.
const readexcel = async (request, response) => {
const workbook = XLSX.readFile('file.xlsx');
const sheetnamelist = workbook.SheetNames;
var xldata = XLSX.utils.sheet_to_json(workbook.Sheets[sheetnamelist[0]]);
dataarray = [];
for (let element of xldata) {
company = element.company;
url = element.careers_link_url;
pageLimit = element.pagelimit;
switch (company) {
case process.env.cnb_company_name:
const arr = await cnbservice.GetcnbOpenings(url, pageLimit, company)
if (arr !== undefined) {
dataarray.push(arr);
}
break;
case process.env.mds_company_name:
const arr1 = await mdsservice.GetMdsOpenings(url, pageLimit, company)
if (arr1 !== undefined) {
dataarray.push(arr1);
}
break;
case "default":
console.log("Company Name not matching with any of the services")
}
}
}

node wait for iteration to complete before callback

I have a lambda function in node.js to send a push notification.
In that function I need to iterate through my users sending a notification for each one prior to the callback.
Ideally I would like the iteration to perform in parallel.
What would be the best way to do this?
My code is currently as follows but it does not work as expected because the last user is not always the last to be handled:
var apnProvider = new apn.Provider(options);
var iterationComplete = false;
for (var j = 0; j < users.length; j++) {
if (j === (users.length - 1)) {
iterationComplete = true;
}
var deviceToken = users[j].user_device_token;
var deviceBadge = users[j].user_badge_count;
var notification = new apn.Notification();
notification.alert = message;
notification.contentAvailable = 1;
notification.topic = "com.example.Example";
apnProvider.send(notification, [deviceToken]).then((response) => {
if (iterationComplete) {
context.succeed(event);
}
});
}
Use Promise.all instead - map each user's associated apnProvider.send call to a Promise in an array, and when all Promises in the array are resolved, call the callback:
const apnProvider = new apn.Provider(options);
const userPromises = users.map((user) => {
const deviceToken = user.user_device_token;
const deviceBadge = user.user_badge_count;
const notification = new apn.Notification();
notification.alert = message;
notification.contentAvailable = 1;
notification.topic = "com.example.Example";
return apnProvider.send(notification, [deviceToken]);
})
Promise.all(userPromises)
.then(() => {
context.succeed(event);
})
.catch(() => {
// handle errors
});

How to use fs.watchFile to reload Json File

I have couple functions that re-structure the JSON file. They write and manipulate one another. They are in the same js file. Every function depends on the function before to run properly. But since every one of them manipulates the file synchronously after reading it, I have encountered a problem which is hard to explain for me because I am not sure if I understand it well. When the first function finishes the task second function can't read it(It reads but can't see the updated object and its property). If I run all the functions one by one and restart the program afterward, it reads very well. Here is what I have tried until now;
I tried to make my functions asynchronous
I tried to use setTimeout function and give it up until 10 seconds
Both did not work for me. I read in StackOverflow that it fs.watch could help me but couldn't find a proper explanation of how to implement that. Any help regarding this issue I am having would be much appreciated.
-I know I can use another database but I just want to know this one because I am regularly using this. Please don't be rude if I am stupid :P
const fs = require('fs');
const ReadRawData= fs.readFileSync('Data.json');
const ReadParsed = JSON.parse(ReadRawData);
fs.watchFile('Data.json', (curr, prev) => {
console.log(`the current mtime is: ${curr.mtime}`);
console.log(`the previous mtime was: ${prev.mtime}`);
});
I just want to know how can I reload this ReadParsed everytime a file changed and can reach the reloaded version from functions.
Here is my full code; This is testing2.js
const request = require('request');
const cherio= require('cheerio');
const fs = require('fs');
const cheerioTableparser = require('cheerio-tableparser');
async function htmlCollector () {
const ReadTransactionBase = await fs.readFileSync('transactionDatabase.json');
const ReadTransaction = JSON.parse(ReadTransactionBase);
ReadTransaction.pageInformation = "";
let editedDataBase = JSON.stringify(ReadTransaction, null, 2);
fs.writeFileSync('transactionDatabase.json', editedDataBase);
for (let counter = 1; counter <= ReadTransaction.maxPage; counter++) {
request(ReadTransaction.link + counter,
(error, response, html) => {
if (!error && response.statusCode === 200) {
const $ = cherio.load(html);
const items = [];
$('#maindiv').each(function (i, elem) {
items[i] = $(this).html();
});
let newPage = {['PageNumber'+counter] : items};
ReadTransaction.pageInformation = Object.assign(newPage,ReadTransaction.pageInformation);
let editedDataBase = JSON.stringify(ReadTransaction, null, 2);
fs.writeFileSync('transactionDatabase.json', editedDataBase);
}
})
}
}
async function databaseModifier () {
const ReadTransactionBase = await fs.readFileSync('transactionDatabase.json');
const ReadTransaction = JSON.parse(ReadTransactionBase);
delete ReadTransaction.from;
delete ReadTransaction.quantities;
delete ReadTransaction.orangeBox;
let editedDataBase = JSON.stringify(ReadTransaction, null, 2);
console.log('test');
fs.writeFileSync('transactionDatabase.json', editedDataBase);
for (let i=1; i<=Object.keys(ReadTransaction.pageInformation).length;i++) {
let nums = 'PageNumber' + i;
let newObject = {[nums]: {}};
ReadTransaction.from = Object.assign(newObject,ReadTransaction.from);
ReadTransaction.orangeBox = Object.assign(newObject,ReadTransaction.orangeBox);
ReadTransaction.quantities = Object.assign(newObject,ReadTransaction.quantities);
let editedDataBase = JSON.stringify(ReadTransaction, null, 2);
fs.writeFileSync('transactionDatabase.json', editedDataBase);
}
}
async function fromOrangeBoxQuantities () {
const ReadTransactionBase = await fs.readFileSync('transactionDatabase.json');
const ReadTransaction = JSON.parse(ReadTransactionBase);
for (let counter = 1; counter <= ReadTransaction.maxPage ; counter++) {
let HTMLPageNumber = 'PageNumber' + counter;
let $ = cherio.load(ReadTransaction.pageInformation[HTMLPageNumber][0]);
cheerioTableparser($);
let data = $('.table').parsetable(true,true,true);
for (let sCounter = 1; sCounter <= data[2].length; sCounter++) {
let fromTable = {[sCounter] : [data[2][sCounter-1]]};
let orangeBoxTable = {[sCounter] : [data[3][sCounter-1]]};
let quantityTable = {[sCounter] : [data[5][sCounter-1]]};
ReadTransaction.from[HTMLPageNumber] = Object.assign(fromTable,ReadTransaction.from[HTMLPageNumber]);
ReadTransaction.orangeBox[HTMLPageNumber] = Object.assign(orangeBoxTable,ReadTransaction.orangeBox[HTMLPageNumber]);
ReadTransaction.quantities[HTMLPageNumber] = Object.assign(quantityTable,ReadTransaction.quantities[HTMLPageNumber]);
let editedDataBase = JSON.stringify(ReadTransaction, null, 2);
fs.writeFileSync('transactionDatabase.json', editedDataBase);
}
}
}
async function validatorOfTransactions (){
const ReadTransactionBase = await fs.readFileSync('transactionDatabase.json');
const ReadTransaction = JSON.parse(ReadTransactionBase);
ReadTransaction.validQuantities = [];
let editedDataBase = JSON.stringify(ReadTransaction, null, 2);
fs.writeFileSync('transactionDatabase.json', editedDataBase);
for (let counter = 1; counter <= ReadTransaction.maxPage ; counter++) {
let HTMLPageNumber = 'PageNumber' + counter;
let length = Object.keys(ReadTransaction.from[HTMLPageNumber]).length;
for (let sCounter = 1; sCounter <= length; sCounter++) {
let a = ReadTransaction.from[HTMLPageNumber][sCounter].toString();
let b = ReadTransaction.fromAddress;
let c = ReadTransaction.orangeBox[HTMLPageNumber][sCounter].toString();
let d = ReadTransaction.OrangeBox;
if (a === b && c === d) {
console.log('yay');
ReadTransaction.validQuantities.push(parseFloat(ReadTransaction.quantities[HTMLPageNumber][sCounter].toString().replace(/,/g,'')));
let editedDataBase = JSON.stringify(ReadTransaction, null, 2);
fs.writeFileSync('transactionDatabase.json', editedDataBase);
}
}
}
}
async function finalizeCalculation () {
const ReadTransactionBase = await fs.readFileSync('transactionDatabase.json');
const ReadTransaction = JSON.parse(ReadTransactionBase);
let NewArray = ReadTransaction.validQuantities;
return NewArray.reduce((a,b) => a +b, 0);
}
module.exports = {
htmlCollector : htmlCollector,
databaseModifier : databaseModifier,
fromOrangeBoxQuantities : fromOrangeBoxQuantities,
validatorOfTransactions : validatorOfTransactions,
finalizeCalculation : finalizeCalculation
};
This is transactionDatabase.json;
{
"link": "https://etherscan.io/token/generic-tokentxns2?contractAddress=0xaf2de5cb07d8f5de2369ff104150fef9dc0e604b&mode=&a=0x11209cbc2ea8cf829aa5aa1cdc5a4e5962e70655&p=",
"maxPage": 12,
"fromAddress": "0x11209cbc2ea8cf829aa5aa1cdc5a4e5962e70655",
"OrangeBox": "OUT",
"validQuantities": [],
"pageInformation": {}
}
this is newtest.js;
let testing2 = require('./testing2');
testing2.htmlCollector().then(() => testing2.databaseModifier().then(()=> testing2.fromOrangeBoxQuantities().then(()=> testing2.validatorOfTransactions().then())));
Here is the error I am getting;
(node:21760) UnhandledPromiseRejectionWarning: TypeError: Cannot read property '0' of undefined

making node wait for db call to get completed

I just started writing node.js code.
I'm writing a code that extracts data from a pdf file, cleans it up and stores it in a database (using couchdb and accessing that using nano library).
The problem is that the calls are being made asynchronously... so the database get calls (i make some get calls to get a few affiliation files during the clean up) get completed only after the program runs resulting in variables being undefined. is there any way around this?
I've reproduced my code below
const fs = require('fs');
const os = require('os');
var couchDB = require('couch-db').CouchDB;
var pdf_table_extractor = require('pdf-table-extractor');
const filename = "PQ-PRI-0005-1806-01-0000_quoteSlipForLIBVIDGI1.pdf"
var nano = require('nano')('https://couchadmin:difficulttoguessmypassword#dbdev.perilwise.com');
var server = new couchDB('https://db.url.com');
server.auth("admin","admin");
var db = nano.db.use('pwfb');
var temp = [];
//New callView function
async function callView(){
try{
const doc = await view('liabilitymdm','pi');
for (var i =0; i<doc.rows.length;i++){
tmp.push(doc.rows[i]);
};
return doc;
} catch(e){
console.log(e);
};
};
function suc(result){
let ttmp = [];
console.log(result);
var pageTables = result.pageTables;
var firstPageTables = pageTables[0].tables;
ttmp = callView();
//this console log shows Promise { <pending> }
console.log(ttmp)
for (var k = 0; k < firstPageTables.length; k++) {
var temp = firstPageTables[k];
if (temp.length > 0) {
dump.push(temp);
}
}
// console.log(dump);
var insurer = filename.substr(37,8);
read_quote_slip(insurer,dump);
}
var read_quote_slip = (insurer,data) => {
console.log("read_quote_slip correctly entered");
var finOut = {};
if (insurer === "LIBVIDGI"){
finOut.insurer = insurer;
finOut.policyType = data[2][0].replace(/Quotation for/g,"");
finOut.natureOfWork = data[13][3];
let dedpos = indexGetter(data, "Deductible")[0];
finOut.deductible = data[dedpos+1][0];
let cov = indexGetter(data, "Coverage Territory and Jurisdiction")[0];
finOut.coverageTerritory = data[cov+1][0].replace(/Territory/g,"");
finOut.coverageJurisdiction = data[cov+2][0].replace(/Jurisdiction/g,"");
let ext = indexGetter(data,"Extensions")[0];
finOut.coverage = data[ext+1][0].split(/\r?\n/);
let majexc = indexGetter(data,"Major Exclusions")[0];
finOut.exclusions = data[majexc+1][0].split(/\r?\n/);
let prdtl = indexGetter(data,"Description")[0];
let prm = premiumcompute(data,prdtl,dedpos);
finOut.premium = prm;
finCleaned = libvidgi_cleaned(finOut);
// console.log(finCleaned);
}
}
var indexGetter = (words,toFind) => {
var finindex = [];
for (var i = 0; i < words.length; i++){
for (var j = 0; j < words[i].length; j++){
if(words[i][j].indexOf(toFind) >=0 ){
finindex.push(i);
}
}
}
return finindex;
}
var premiumcompute = (data, from, to) => {
let finprem = [];
let numbop = to - from - 2;
let incr = 0;
for (var i = from+2; i < to; i++){
let pr = {};
pr.option = incr+1;
pr.sumInsured = data[i][2].replace(/ /g,"");
pr.premium = data[i][data[i].length - 1].replace(/ /g,"");
finprem.push(pr);
incr +=1;
}
return finprem;
}
var libvidgi_cleaned = (finOut) => {
return finOut;
}
var fal = (result) => {
console.log(result);
console.log("there was an error");
}
var readPDFFile = function(filename){
//Decide which insurer from the filename
// console.log(filename);
console.log(filename.substr(37,8)+"Printed on line 38");
insurer = filename.substr(37,8)
pdf_table_extractor(filename, (result) => {suc(result)} , fal);
}
var libvidgi_data_extract = (data) => {
console.log(data);
let arr = data.pageTables.tables;
for (var i = 0; i <= arr.length; i++ ){
console.log(arr[i]);
}
}
readPDFFile(filename);
This answer assumes you are using Node.js > v7.6
Since db.view accepts a callback, and you wish to wait for it to finish, one solution will be to promisify it - meaning to turn it into a promise which can be awaited. You can use a library like Bluebird or you can even use Node's builtin promisify util. Then you can rewrite callViews:
const {promisify} = require('util');
const view = promisify(db.view);
async function callView() {
try {
const doc = await view('liabilitymdm', 'pi');
// the async operation is now guaranteed to be done
// (if there is an error it will be caught by the catch clause)
for (var i = 0; i < doc.rows.length; i++) {
temp.push(doc.rows[i]);
}
console.log(temp);
} catch (e) {
}
}
If you are not using Node.js > v7.6 (and cannot use async\await you can still utilize promises, by using their then method:
const {promisify} = require('util');
const view = promisify(db.view);
function callView() {
view('liabilitymdm', 'pi')
.then(doc => {
for (var i = 0; i < doc.rows.length; i++) {
temp.push(doc.rows[i]);
}
console.log(temp);
return temp;
})
.then(temp => {
console.log(temp);
})
.catch(e => {});
}
Notice how the first then is returning something which is used in a later then.
To make Node run asynchronously, you can use the keywords async and await.
They work like this:
async function doSomething () {
const formattedData = formatData();
const result = await db.postToDatabase(formattedData);
// the below will not happen until the above line is finished
doSomethingElse(result);
}
It's pretty simple in Node to get functions to execute asynchronously. Just put the async keyword at the beginning of the function definition and then put await in front of anything that you want to block execution until completed.

Jest test init conditions

Lets say I have a file file1.js which contains:
const a = true;
let b;
if (a) {
b = c;
}
if (!a) {
b = d;
}
Now when I run test case on this file my first condition gets covered. Is there any way that I can cover second condition by setting a to false or should I change my code in a way that I can call a method with different values to test for each case kind of like:
const a = true;
getBVal(a) {
return a ? c : d;
}
let b = getBVal(a);
Update:
Below is my code for requestAnimationFrame with fallbacks for older browsers:
let lastTime = 0;
const vendors = ["ms", "moz", "webkit", "o"];
let rAF = window.requestAnimationFrame;
if (!rAF) {
rAF = vendors.find(prefix => window[`${prefix}RequestAnimationFrame`]);
}
if (!rAF) {
rAF = cB => {
const currTime = new Date().getTime();
const timeToCall = Math.max(0, 16 - (currTime - lastTime));
const id = setTimeout(() => {
cB(currTime + timeToCall);
}, timeToCall);
lastTime = currTime + timeToCall;
return id;
};
}
function requestAnimationFrame(callback) {
return rAF(callback);
}
export default requestAnimationFrame;
I am using jsdom in setup for window object. Now if I have to test case for window.requestAnimationFrame = null its not possible in the way I have written my code
Now after changing it to:
import { requestAnimationFrameVendor } from "../constants";
let lastTime = 0;
const customFn = cB => {
const currTime = new Date().getTime();
const timeToCall = Math.max(0, 16 - (currTime - lastTime));
const id = setTimeout(() => {
cB(currTime + timeToCall);
}, timeToCall);
lastTime = currTime + timeToCall;
return id;
};
function requestAnimationFrame(callback) {
const rAF = window.requestAnimationFrame;
return rAF && rAF(callback) || requestAnimationFrameVendor && requestAnimationFrameVendor(callback) || customFn(callback);
}
export default requestAnimationFrame;
And then if I write test like:
import * as constants from "../../constants";
describe("animationFrame", () => {
let requestAnimationFrame;
let cancelAnimationFrame;
beforeAll(() => {
requestAnimationFrame = global.window.requestAnimationFrame;
cancelAnimationFrame = global.window.cancelAnimationFrame;
});
test("requestAnimationFrame", done => {
global.window.requestAnimationFrame = null;
global.window.cancelAnimationFrame = null;
const requestId1 = Utils.requestAnimationFrame(jest.fn());
constants.requestAnimationFrameVendor = jest.fn(() => {
return requestAnimationFrame;
});
const requestId2 = Utils.requestAnimationFrame(jest.fn());
setTimeout(() => {
Utils.cancelAnimationFrame(requestId1);
Utils.cancelAnimationFrame(requestId2);
done();
}, 300);
});
afterEach(() => {
global.window.webkitRequestAnimationFrame = null;
global.window.webkitCancelAnimationFrame = null;
});
});
Then it covers all the conditions.
I would go the second rout (getBVal()) because it makes your interface more testable.
In general, removing global state (like your const a or let b) will make your code and interfaces more testable. If you cannot remove the global state completely, you can introduce abstractions so that your test does not need to know about the global state (like your suggested getBVal()).
Maybe you can go even further and remove the global b: Instead always call getBVal() instead? The performance impact will be negligible in most cases, and your code becomes even more testable and less coupled...

Resources