How to avoid require avalanches with node.js? - node.js

I want to cache some data in node server:
rpc data provider like this:
var cnt = 0;
function rpcDataProvider(areaId) {
return Q.delay(100).then(() => {
cnt += 1
console.log("I am rpcDataProvider and I am not so fast. I am requested " + cnt + " times.");
const base = areaId * 10;
var arr = [];
for (var i = 0; i < 10; i ++) {
arr.push(base + i);
}
return arr;
})
}
http server
// cache data
var provinceList;
var cityList;
var countyList;
function getProvinceList() {
if (provinceList && provinceList.length > 0) return Q.resolve(provinceList);
return rpcDataProvider(1).then((rv) => {
provinceList = rv;
return rv;
});
}
function getCityList() {
if (cityList && cityList.length > 0) return Q.resolve(cityList);
return getProvinceList().then((provinceList) => {
return Q.all(provinceList.map((item) => rpcDataProvider(item))).then(rvs => {
cityList = rvs.reduce((prev, cur) => prev.concat(cur));
return cityList;
});
});
}
function getCountyList() {
if (countyList && countyList.length > 0) return Q.resolve(countyList);
return getCityList().then((cityList) => {
return Q.all(cityList.map((item) => rpcDataProvider(item))).then(rvs => {
countyList = rvs.reduce((prev, cur) => prev.concat(cur));
return countyList;
})
})
}
function api1() {
console.log("I am http api1");
return getProvinceList();
}
function api2() {
console.log("I am http api2");
return getCityList();
}
function api3() {
console.log("I am http api3");
return getCountyList();
}
function api4() {
console.log("I am http api4");
return getCountyList();
}
function api5() {
console.log("I am http api5");
return getCountyList();
}
client request:
function httpRequest() {
console.log("I am client.");
Q.all([api1(), api2(), api3(), api4(), api5()]);
}
httpRequest();
the problem is that parallel requests, the cache doest not work!

Don't cache the promise value. Cache the promise.
// cache data
var provinceList;
var cityList;
var countyList;
function rpcReduceAndConcat(list) {
return Q.all(list.map(rpcDataProvider)).then(rvs => {
return rvs.reduce((prev, cur) => prev.concat(cur));
});
}
function getProvinceList() {
if (!provinceList) provinceList = rpcDataProvider(1);
return provinceList;
}
function getCityList() {
if (!cityList) cityList = getProvinceList().then(rpcReduceAndConcat);
return cityList;
}
function getCountyList() {
if (!countyList) countyList = getCityList().then(rpcReduceAndConcat);
return countyList;
}

Related

reset request queue in node.js vanilla server

I have a program that accepts POST requests with a body that contains numbers. My program checks is the numbers in body are prime numbers or not. Everything is well the first time, but every time after that the program remembers the past POST requests a checks them as well.
how can I reset the request queue so my program will only check the newest POST request?
here is the code:
import { createServer } from "http";
import { parse } from "querystring";
import { url } from "inspector";
import { port } from "./config.js";
let body = "";
let checkNumber = true;
const PORT = process.env.PORT || port;
const server = createServer(async (req, res) => {
res.statusCode = 200;
res.setHeader("Content-Type", "text/html");
const FORM_URLENCODED = "application/x-www-form-urlencoded";
const POST_PATH = "api/numbers/prime/validate";
const GET_PATH = "api/numbers/prime";
if (req.headers["content-type"] === FORM_URLENCODED) {
//console.log(req.method);
if (req.method === "POST") {
req.on("data", (chunk) => {
body += chunk.toString();
});
}
if (req.url === `/${POST_PATH}`) {
let re = parse(body);
await checkValuesInObject(re);
console.log(checkNumber);
res.end(`${checkNumber}`);
}
res.end();
});
server.listen(PORT, () => {
console.log(`listening on port ${PORT}`);
});
async function checkIfFirstNumber(number) {
if (number == 1) {
return false;
} else {
for (let i = 2; i < number - 1; i++) {
if (number % i === 0) {
return false;
}
}
return true;
}
}
async function checkValuesInObject(obj) {
const amount = Object.keys(obj);
for (let i = 0; i < amount.length; i++) {
let valueAmount = obj[Object.keys(obj)[i]];
for (let j = 0; j < valueAmount.length; j++) {
checkIfFirstNumber(valueAmount[j]).then((answer) => {
console.log(`answer: ${answer}`);
if (!answer) {
checkNumber = false;
}
});
}
}
checkNumber = true;
}
async function printPrime(number) {
let numbers = "";
for (let i = 2; i <= number; i++) {
const check = await checkIfFirstNumber(i);
if (check) {
numbers += i + " ";
}
}
return numbers;
}

Why on my NodeJS+Express REST API a promise calling my function fails while the same promise with setTimeout works?

I have a NodeJS+Express REST API method executing reverse geocoding (using Google's Maps API).
I'm trying to solve it with Promises but the 'then' is getting executed before my function returns with the answers from Google.
When testing the same code just calling a setTimeout, it works as expected. Please see comments in the code (simplify version).
app.get('/api/v1/events', verifyToken, async (req, res) => {
await db.poolPromise.then(pool => {
return pool.request()
.input('UserId', db.sql.UniqueIdentifier, res.authData.userId)
.input('DateFrom', db.sql.DateTime2(7), req.query.dateFrom)
.input('DateTill', db.sql.DateTime2(7), req.query.dateTo)
.output('UserIdAuthorized', db.sql.Bit)
.execute('sp')
}).then(result => {
let output = (result.output || {})
if (!output.UserIdAuthorized) {
res.sendStatus(403)
}
else if (result.recordset.length > 0) {
(new Promise( (resolve) => {
//resolve(123) // this one works as expected
//setTimeout(resolve, 3000, 'temp success') // this one works as expected
// *** this one get passed and the following then is being executed before it answers ***
resolve( getAddress_TEST(result.recordset) )
// **************************************************************************************
})).then(function (value) {
res.json(
{
meta: { count: 10 }, //this is just a sample
result: value // *** this one fails with undefined ***
})
})
} else {
res.sendStatus(404)
}
}).catch(err => {
res.sendStatus(500)
console.error(err)
})
});
const nodeGeocoder_options = {
provider: 'google',
apiKey: process.env.GOOGLE_API_KEY
}
async function getAddress_TEST(recordset) {
//sample recordset for debugging - as you dont have my database
recordset = [{'eventId':14205556,'Lat':54.57767,'Lon':-2.4920483},{'eventId':14205558,'Lat':54.57767,'Lon':-2.492048},{'eventId':14205579,'Lat':53.416908,'Lon':-2.952071},{'eventId':14205588,'Lat':52.644448,'Lon':-1.153185},{'eventId':14205601,'Lat':52.29174,'Lon':-1.532283},{'eventId':14205645,'Lat':52.644448,'Lon':-1.153185},{'eventId':14205801,'Lat':53.68687,'Lon':-1.498708},{'eventId':14206041,'Lat':51.471521,'Lon':-0.2038033},{'eventId':14206049,'Lat':51.471521,'Lon':-0.2038033},{'eventId':14206072,'Lat':51.471521,'Lon':-0.2038033}]
let geocoder = nodeGeocoder(nodeGeocoder_options)
let ps = []
for (var i = 0, length = recordset.length; i < length; i++) {
if (i == 0 || !(i > 0
&& recordset[i - 1].Lat == recordset[i].Lat
&& recordset[i - 1].Lon == recordset[i].Lon)) {
ps.push(new Promise(function (resolve) {
resolve(reverseGeocode(geocoder, recordset[i].Lat, recordset[i].Lon))
}))
} else {
ps.push('-')
}
}
await Promise.all(ps)
.then(function (values) {
for (var i = 0, length = values.length; i < length; i++) {
if (values[i] != '-') {
recordset[i].locationAddress = values[i]
} else {
recordset[i].locationAddress = recordset[i - 1].locationAddress
}
}
}).then(function () {
recordset.forEach(function (v) {
delete v.Lat
delete v.Lon
});
console.log(recordset)
return recordset
})
};
async function reverseGeocode(geocoder, lat, lon) {
let address = '+'
if (lat != 0 && lon != 0) {
await geocoder.reverse({ lat: lat, lon: lon })
.then(res => {
address = res[0].formattedAddress
})
.catch(err => {
console.error(err)
});
}
return address
};
I'm sure it is something simple that I'm missing here...
The basic problem is that your getAddress_TEST function returns a promise that fulfills with nothing (undefined), because it does not contain a return statement. The return recordset is in a then() callback, from where it affects the promise resolution of the awaited promise, but that result is thrown away.
If you want to use async/await, you should get rid of any new Promise and then calls:
app.get('/api/v1/events', verifyToken, async (req, res) => {
try {
const pool = await db.poolPromise
const result = await pool.request()
.input('UserId', db.sql.UniqueIdentifier, res.authData.userId)
.input('DateFrom', db.sql.DateTime2(7), req.query.dateFrom)
.input('DateTill', db.sql.DateTime2(7), req.query.dateTo)
.output('UserIdAuthorized', db.sql.Bit)
.execute('sp')
let output = (result.output || {})
if (!output.UserIdAuthorized) {
res.sendStatus(403)
} else if (result.recordset.length > 0) {
const value = await getAddress_TEST(result.recordset)
res.json({
meta: { count: 10 }, //this is just a sample
result: value
})
} else {
res.sendStatus(404)
}
} catch(err) {
res.sendStatus(500)
console.error(err)
}
});
const nodeGeocoder_options = {
provider: 'google',
apiKey: process.env.GOOGLE_API_KEY
}
async function getAddress_TEST(recordset) {
const geocoder = nodeGeocoder(nodeGeocoder_options)
const ps = recordset.map((record, i) => {
if (i == 0 || !(i > 0
&& recordset[i - 1].Lat == record.Lat
&& recordset[i - 1].Lon == recordLon)) {
return reverseGeocode(geocoder, recordset[i].Lat, recordset[i].Lon))
} else {
return '-'
}
});
const values = await Promise.all(ps)
// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
for (var i = 0, length = values.length; i < length; i++) {
if (values[i] != '-') {
recordset[i].locationAddress = values[i]
} else {
recordset[i].locationAddress = recordset[i - 1].locationAddress
}
}
recordset.forEach(function (v) {
delete v.Lat
delete v.Lon
});
console.log(recordset)
return recordset
// ^^^^^^^^^^^^^^^^
}
async function reverseGeocode(geocoder, lat, lon) {
if (lat != 0 && lon != 0) {
const res = await geocoder.reverse({ lat: lat, lon: lon })
return res[0].formattedAddress
}
return '+'
}

How to get code to execute in order in node.js

I am trying to finish my script, but for some reason i don't know, it refuses to execute in the order i put it in.
I've tried placing a 'wait' function between the JoinRequest update function and the following code, but when run, it acts as if the function call and wait function were the other way round, countering the point of the wait().
const Roblox = require('noblox.js')
var fs = require('fs');
var joinRequests = []
...
function wait(ms) {
var d = new Date();
var d2 = null;
do { d2 = new Date(); }
while(d2-d < ms*1000);
};
...
function updateJReqs() {
Roblox.getJoinRequests(4745601).then((array) => {
var i;
var final = [];
for(i = 0; i < array.length; i++) {
final.push(array[i].username);
};
if(final === '') {
final = '-None';
};
joinRequests = final
console.log('Updated join requests.')
});
}
function check() {
setTimeout(() => {
fs.readFile('Request.txt',encoding = 'utf-8', function(err, data) {
if (err) {
check();
} else {
updateJReqs(); //for some reason this function is executed alongside the below, not before it.
// Tried putting wait(x) in here.
console.log('Request received: ' + data)
var solution = joinRequests
console.log('Fuffiling request with ' + solution)
fufillRequest(solution)
fs.unlink('Request.txt', function(err) {
if(err) throw err;
});
check();
}
});
}, 400)
}
check();
The script is supposed to wait until a file is created (accomplished), update the list of join requests (accomplished) and then create a new file with the list of join requests in(not accomplished).
if I understand your code you work with async code, you need to return a promise in updateJReqs and add a condition of leaving from the function because you have an infinite recursion
function updateJReqs() {
return new Promise(resolve => {
Roblox.getJoinRequests(4745601).then((array) => {
var i;
var final = [];
for(i = 0; i < array.length; i++) {
final.push(array[i].username);
};
if(final === '') {
final = '-None';
};
joinRequests = final
console.log('Updated join requests.')
resolve();
});
}
}
async function check() {
setTimeout(() => {
fs.readFile('Request.txt',encoding = 'utf-8', function(err, data) {
if (err) {
await check();
} else {
await updateJReqs();
// Tried putting wait(x) in here.
console.log('Request received: ' + data)
var solution = joinRequests
console.log('Fuffiling request with ' + solution)
fufillRequest(solution)
fs.unlink('Request.txt', function(err) {
if(err) throw err;
});
// you dont have an exit from your function check();
return 'Success';
}
});
}, 400)
}
check().then(res => console.log(res));

Node js Scraper

I have written a scraper in typescript, Running on node:10.12.0,
Issue: The code goes on sleep after few hours, randomly. And I had to restart it. My best guess is it stucks on url request
Tools/Packages Using:
Puppeteer
Cheerio
Typescript
Code:
import * as cheerio from "cheerio";
import * as request from "request";
import * as fs from "fs";
import * as shell from "shelljs";
import pup = require("puppeteer");
class App {
// #ts-ignore
public browser: pup.Browser;
public appendToFile(file: string, content: string): Promise < string > {
return new Promise < string > ((resolve, reject) => {
try {
fs.appendFileSync(file, content);
resolve("DONE");
} catch (e) {
reject(e);
}
});
}
public loadPage(url: string): Promise < any > {
return new Promise < any > ((resolve, reject) => {
request.get(url, async (err, res, html) => {
if (!err && res.statusCode === 200) {
resolve(html);
} else {
if (err) {
reject(err);
} else {
reject(res);
}
}
});
});
}
public step1(url: string): Promise < string > {
return new Promise < string > (async (resolve, reject) => {
let page: pup.Page | undefined;
try {
let next = false;
let urlLink = url;
let first = true;
let header = "unknown";
let f = url.split("/");
let folder = f[f.length - 3];
folder = folder || header;
let path = "data/" + folder;
shell.mkdir("-p", path);
page = await this.browser.newPage();
await page.goto(url, {
timeout: 0
});
let count = 1;
do {
next = false;
let res = await page.evaluate(() => {
let e = document.querySelectorAll(".ch-product-view-list-container.list-view li ul > li > h6 > a");
let p: string[] = [];
e.forEach((v) => {
p.push(("https://www.link.com") + (v.getAttribute("href") as string));
});
return p;
});
// for(const l of res) {
// try {
// await this.step2(l, "" , "")
// } catch(er) {
// this.appendToFile("./error.txt", l + "::" + url + "\n").catch(e=>e)
// }
// }
let p = [];
let c = 1;
for (const d of res) {
p.push(await this.step2(d, folder, c.toString()).catch((_e) => {
console.log(_e);
fs.appendFileSync("./error-2.txt", urlLink + " ### " + d + "\n");
}));
c++;
}
await Promise.all(p);
await this.appendToFile("./processed.txt", urlLink + ":" + count.toString() + "\n").catch(e => e);
count++;
console.log(urlLink + ":" + count);
let e = await page.evaluate(() => {
let ele = document.querySelector("#pagination-next") as Element;
let r = ele.getAttribute("style");
return r || "";
});
if (e === "") {
next = true;
await page.click("#pagination-next");
// console.log('waitng')
await page.waitFor(1000);
// console.log('done wait')
// await page.waitForNavigation({waitUntil: 'load'}).catch(e=> console.log(e));
// await Promise.all([
// page.click("#pagination-next"),
// page.waitForNavigation({ waitUntil: 'networkidle0'}), // ]);
}
} while (next);
// await page.close();
resolve("page all scrapped");
} catch (errrr) {
reject(errrr);
} finally {
if (page !== undefined) {
await page.close().catch(e => e);
}
}
});
}
public step2(url: string, folder: string, file: string): Promise < string > {
return new Promise < string > (async (resolve, reject) => {
try {
let html = await this.loadPage(url).catch(e => reject(e));
let $ = cheerio.load(html);
let ress: any = {};
let t = $(".qal_title_heading").text();
if (t) {
ress.header = t.replace(/"/g, "'").replace(/\n|\r|\t/g, "");
}
let d = $("div.ch_formatted_text.qal_thread-content_text.asker").html();
if (d) {
ress.body = d.replace(/"/g, "'").replace(/\n|\r|\t/g, "");
}
// let sprit = "-------------------------------";
let filename = "data" + file + ".json"; // ((t.replace(/[^\w\s]/gi, "")).substring(0,250)+".txt")
let data = JSON.stringify(ress) // t +sprit + d + "\n---end---\n"; await this.appendToFile("./data/"+ folder + "/" +filename, data+",\n")
.then((r) => {
resolve(r);
});
} catch (err) {
reject(err);
}
});
}
}
async function main() {
process.on("SIGTERM", () => {
console.log("SigTerm received");
process.exit(1);
});
process.on("SIGINT", () => {
console.log("SigInt received");
process.exit(1);
});
let path = "data/unknown";
shell.mkdir("-p", path);
let c = new App();
let list: string[] = [];
console.log(process.argv[2]);
require("fs").readFileSync(process.argv[2], "utf-8").split(/\r?\n/).forEach((line: string) => {
list.push(line);
});
console.log("total links->" + list.length);
c.browser = await pup.launch({
headless: true
});
for (const l of list) {
await c.step1(l).then(e => {
fs.appendFileSync("./processed.txt", l);
}).catch(e => {
fs.appendFileSync("./error.txt", l);
});
}
}
main();
Let me know if you need something else from me. Also this is all the code.
So , I figured two problems.
The chrome (under puppeteer) consumes high CPU, which gives the trend like this:
at start it's on moderate usage. and it gradually increases. My trend was it started off with 4% usage and after a day, it reached 100%. I've submitted an issue on their git
I did not specify the timeout in request
was:
request.get(url, async (err, res, html) => {
should be:
request.get(url,{timeout: 1500} async (err, res, html) => {
So far my code is running fine for more than a day now. only issue is high cpu usage. But it's none of my concern as for now.

How to traverse all files, and support pause and continue

I have created a NodeJS (electron) code for read all the files in a specific directory and subdirectories.
I don't want to use too much HD resources, that why I use a delay of 5ms between folders.
Now my question. I want the if my NODE process stop? I want to be able to continue from when it is stopped. How should I do that?
In other words: How to keep index of current state while walking in all files and folder, so I can continue the traversing from when it has stopped.
Thank you
My Code:
var walkAll=function(options){
var x=0
walk(options.dir,function(){})
function walk(dir,callback) {
var files=fs.readdirSync(dir);
var stat;
async.eachSeries(files,function(file,next){
file=dir +'/' + file
if (dir.match(/Recycle/)) return next()
if (dir.match(/.git/)) return next()
if (dir.match(/node_modules/)) return next()
fs.lstat(file,function(err,stat){
if(err) return next()
if(stat.mode==41398) return next()
if (stat.isDirectory()) {
setTimeout(function(file){
walk(file,next)
}.bind(null,file),5)
}
else{
x++
if(false || x % 1000===0) console.log((new Date().valueOf()-start)/1000,x,file)
next()
}
})
},function(){
callback()
})
}
}
walkAll({
dir:'c:/',
delay:1000
});
Keep a list of sub directories to be visited, and update the list every iteration.
The walk function in the following example takes a previous state, and returns files of next sub directory with next state.
You can save the state before stopping the process, then load the saved state to continue the traversal when restarting.
function walk(state, readdir) {
let files = [], next = [];
while (state.length > 0) {
try {
const current = state.shift()
files = readdir(current).map(file => current + '/' + file)
next = state.concat(files)
break
} catch(e) {}
}
return [next, files]
}
function main() {
const {writeFileSync: writeFile, readdirSync: readdir} = require('fs')
const save = './walk.json'
let state
try {
state = require(save)
} catch(e) {}
if (!state || state.length < 1) state = ['.']
const [nextState, files] = walk(state, readdir)
console.log(files)
writeFile(save, JSON.stringify(nextState, null, 2))
}
main()
an alternate idea,
var miss = require('mississippi')
var fs = require("fs")
var through2 = require("through2")
var path = require("path")
function traverseDir(dirPath) {
var stack = [path.resolve(dirPath)];
var filesStack = []
return miss.from.obj(function(size, next) {
if (filesStack.length) {
return next(null, filesStack.shift())
}
var self = this;
try {
while(stack.length) {
readADir(stack.pop()).forEach(function (f) {
if (f.t=="d") {
stack.push(f.p)
}
filesStack.push(f)
})
if (filesStack.length) {
return next(null, filesStack.shift())
}
}
return next(null, null)
}catch(ex) {
return next(ex)
}
})
}
function readADir (dir) {
return fs.readdirSync(dir)
.map(function (f) {return path.join(dir, f)})
.filter(function (f) { return !f.match(/\.git/) })
.filter(function (f) { return !f.match(/Recycle/)})
.filter(function (f) { return !f.match(/node_modules/)})
.map(function (p) {
try {
var stat = fs.lstatSync(p);
if(stat.mode==41398) return null
var t = stat.isDirectory() ? "d":"f"
return { t: t, p: p }
}catch (ex) {}
return null
})
.filter(function (o) {return o!==null})
}
function loadState(base){
base = path.resolve(base)
var state = {base: base, last:null}
if (fs.existsSync("state.json")) {
state = JSON.parse(fs.readFileSync("state.json"))
} else {
saveState(state)
}
return state
}
function saveState(state){
fs.writeFileSync("state.json", JSON.stringify(state))
}
var state = loadState("..")
var sincePath = state.last;
var filesStream = traverseDir(state.base)
.on('end', function () {
console.log("end")
})
.pipe(through2.obj(function (chunk, enc, next) {
if(!sincePath) this.push(chunk)
if(chunk.p===sincePath) {
sincePath=null
}
next()
}))
var tr = through2.obj(function (chunk, enc, next) {
state.last = chunk.p
saveState(state)
console.log("data %v %j", chunk.t, chunk.p)
this.push(chunk)
setTimeout(next, 500)
}).resume()
require('keypress')(process.stdin);
process.stdin.on('keypress', function (ch, key) {
if(!key) return
if (key.name == "c") {
console.log("continue")
filesStream.pipe(tr)
} else if (key.name=="p") {
console.log("pause")
filesStream.unpipe(tr)
}
});
console.log("Press 'c' to start")

Resources