Detect cycle in directed Graph - node.js

Recently had to detect recursions in a directed graph in code logic. My nodejs implementation feels complex and I am now wondering:
Are there any problems with the code?
Can we simplify it / make it more readable?
const checkCyclic = (graph) => {
const nodes = new Set(Object.keys(graph));
const searchCycle = (trace, node) => {
const cycleStartIdx = trace.indexOf(node);
if (cycleStartIdx !== -1) {
throw new Error(`Cycle detected: ${trace
.slice(cycleStartIdx).concat(node).join(' <- ')}`);
}
if (nodes.delete(node) === true) {
const nextTrace = trace.concat(node);
graph[node].forEach((nextNode) => searchCycle(nextTrace, nextNode));
}
};
while (nodes.size !== 0) {
searchCycle([], nodes.values().next().value);
}
};
checkCyclic({
p1: ['p3'],
p2: ['p1'],
p3: ['p2']
});
// => Recursion detected: p1 <- p3 <- p2 <- p1
checkCyclic({
p0: ['p1'],
p1: ['p3'],
p2: ['p1'],
p3: ['p2']
});
// => Recursion detected: p1 <- p3 <- p2 <- p1
checkCyclic({
p0: ['p0']
});
// => Cycle detected: p0 <- p0
For the curious, this is being used in promise-pool-ext, which also contains tests.
Thank you very much for your feedback!
Edit: Played around and did the iterative implementation (looks even uglier!)
module.exports = (G) => {
const pending = new Set(Object.keys(G));
while (pending.size !== 0) {
const trace = [pending.values().next().value];
const parentIdx = [0];
pending.delete(trace[0]);
while (trace.length !== 0) {
const c = trace.length - 1;
const parent = G[trace[c]][parentIdx[c]];
if (parent !== undefined) {
if (trace.includes(parent)) {
throw new Error(`Cycle detected: ${trace
.slice(trace.indexOf(parent)).concat(parent).join(' <- ')}`);
}
parentIdx[c] += 1;
if (pending.delete(parent)) {
trace.push(parent);
parentIdx.push(0);
}
} else {
trace.pop();
parentIdx.pop();
}
}
}
};
I usually prefer iterative to recursive, but in this case it might not worth the readability trade-off. Any idea how to improve this implementation?

We may shorten it a bit:
function getCycle (G, n, path) {
if (path.includes(n)) {
throw `cycle ${path.slice(path.indexOf(n)).concat(n).join('<-')}`
}
path.push(n)
return G[n].forEach(next => getCycle(G, next, path.slice(0)))
}
function validate (G) {
Object.keys(G).forEach(n => getCycle(G, n, []))
}
validate({
p1:['p2','p3','p4'],
p2:['p3'],
p3:['p0'],
p0:[],
p4:[]
})
console.log('ok')
validate({
p1:['p2','p3','p4'],
p2:['p3'],
p3:['p0'],
p0:[],
p4:['p1']
})
Now this is not the most efficient since we:
find on path which is an array and not a set (idem O(k) instead of O(1))
do revisit vertices even though they have already been visited
Below a slightly more optimized version at the sake of readability?
function getCycle (G, n, path, visited) {
if (path.has(n)) {
const v = [...path]
throw `cycle ${v.slice(v.indexOf(n)).concat(n).join('<-')}`
}
visited.add(n)
path.add(n)
return G[n].forEach(next => getCycle(G, next, new Set(path), visited))
}
function validate (G) {
const visited = new Set()
Object.keys(G).forEach(n => {
if (visited.has(n)) return
getCycle(G, n, new Set(), visited)
})
}
validate({
p1:['p2','p3','p4'],
p2:['p3'],
p3:['p0'],
p0:[],
p4:[]
})
console.log('ok')
validate({
p1:['p2','p3','p4'],
p2:['p3'],
p3:['p0'],
p0:[],
p4:['p1']
})
Regarding perfs, I have (cheaply) tried to reproduce and compare algos on the same graph G (generated by random-dag) with 50 nodes.
They seem to be equivalent.
function checkCyclic (G) {
const pending = new Set(Object.keys(G));
while (pending.size !== 0) {
const trace = [pending.values().next().value];
const parentIdx = [0];
pending.delete(trace[0]);
while (trace.length !== 0) {
const lastIdx = trace.length - 1;
const parent = G[trace[lastIdx]][parentIdx[lastIdx]];
if (parent === undefined) {
trace.pop();
parentIdx.pop();
} else {
if (trace.includes(parent)) {
throw new Error(`cycle ${trace
.slice(trace.indexOf(parent)).concat(parent).join('<-')}`);
}
parentIdx[lastIdx] += 1;
if (pending.delete(parent)) {
trace.push(parent);
parentIdx.push(0);
}
}
}
}
};
function grodzi1(G) {
function getCycle (G, n, path) {
if (path.includes(n)) {
throw `cycle ${path.slice(path.indexOf(n)).concat(n).join('<-')}`
}
path.push(n)
return G[n].forEach(next => getCycle(G, next, path.slice(0)))
}
Object.keys(G).forEach(n => getCycle(G, n, []))
}
function grodzi2(G) {
function getCycle (G, n, path, visited) {
if (path.has(n)) {
const v = [...path]
throw `cycle ${v.slice(v.indexOf(n)).concat(n).join('<-')}`
}
visited.add(n)
path.add(n)
return G[n].forEach(next => getCycle(G, next, new Set(path), visited))
}
const visited = new Set()
Object.keys(G).forEach(n => {
if (visited.has(n)) return
getCycle(G, n, new Set(), visited)
})
}
// avoid copying the set
function grodziNoCopy(G) {
function getCycle (G, n, path, visited) {
if (path.has(n)) {
const v = [...path]
throw `cycle ${v.slice(v.indexOf(n)).concat(n).join('<-')}`
}
visited.add(n)
path.add(n)
return G[n].forEach(next => {
getCycle(G, next, path, visited)
path.delete(next)
})
}
const visited = new Set()
Object.keys(G).forEach(n => {
if (visited.has(n)) return
getCycle(G, n, new Set(), visited)
})
}
// avoid visiting the already visited set of nodes
function grodziStopVisit(G) {
function getCycle (G, n, path, visited) {
if (path.has(n)) {
const v = [...path]
throw `cycle ${v.slice(v.indexOf(n)).concat(n).join('<-')}`
}
if (visited.has(n)) return
visited.add(n)
path.add(n)
return G[n].forEach(next => {
getCycle(G, next, path, visited)
path.delete(next)
})
}
const visited = new Set()
Object.keys(G).forEach(n => {
if (visited.has(n)) return
getCycle(G, n, new Set(), visited)
})
}
// same but iterative
function grodziIter(G) {
function dfs (G, n, visited) {
let stack = [{ path: [], n }]
let x
while (x = stack.pop()) {
const {n, path} = x
if (path.includes(n)) {
const v = [...path]
throw `cycle ${v.slice(v.indexOf(n)).concat(n).join('<-')}`
}
if (visited.has(n)) continue
visited.add(n)
path.push(n)
G[n].forEach(next => stack.push({ path: path.slice(0), n: next }))
}
}
const visited = new Set()
Object.keys(G).forEach(n => visited.has(n) || dfs(G, n, visited))
}
const G = {"0":["5","6","12","15","18","30","31","32","33","35","39","41","52","54"],"1":["12","17","29","30","34","35","38","39","40","43","53"],"2":["5","7","12","13","14","15","16","19","21","31","35","36","37","40","41","53"],"3":["14","16","15","30","32","40","52","55"],"4":["5","6","13","15","17","18","32","35","40","41","42","51"],"5":["16","15","30","33","52","53","55"],"6":["11","16","18","33","36","37","42","51","53"],"7":["14","15","16","22","30","33","35","36","39","41","43","49","53","54","55"],"8":["31","36","41","51"],"9":["18","30","36","37","39","40","50","52"],"10":["15","17","18","19","31","32","33","35","37","40","41","48","54","55"],"11":["15","17","19","31","32","35","38","41","40","43","48","52"],"12":["17","21","32","33","35","52","54","55"],"13":["18","19","20","29","33","35","36","38","41","43","52"],"14":["16","17","19","35","39","55"],"15":["20","22","30","33","35","38","39","41","42","43","49","50","54"],"16":["20","32","34","36","37","39","40","42","44","53"],"17":["28","31","36","35","38","41","43","44","48"],"18":["19","31","34","36","35","38","41","49","52","53","55"],"19":["29","36","48","51"],"20":["29","32","33","36","37","49"],"21":["30","31","33","34","35","36","39","48"],"22":["30","31","32","34","36","37","41","43","48"],"23":["33","34","35","36","37","40","44","50"],"24":["28","34","35","36","38","41","42","48","52"],"25":["28","29","31","32","36","41","43","53"],"26":["29","35","37","38","39","41","43","50"],"27":["31","35","36","37","41","42","48","51","53"],"28":["35","37","38","40","41","50","55"],"29":["38","39","40","42","44","51","54"],"30":["37","38","40","41","42","43","49","50","53"],"31":["36","39","40","50","52","54"],"32":["37","38","39","41","44","48","49","52","55"],"33":["41","40","42","44","52","53"],"34":["35","36","41","42","49","52","54"],"35":["44","55"],"36":["41","50","52","53","54"],"37":["52","55"],"38":["55"],"39":["40","41","51"],"40":["48","49","52"],"41":["49","52","53"],"42":["53"],"43":["48","50","52","55"],"44":["48","52","54"],"45":["49","53","54"],"46":["49","50","52"],"47":["48","50","52","53","55"],"48":[],"49":[],"50":[],"51":[],"52":[],"53":[],"54":[],"55":[]}
function bench (fn, label) {
console.time(label)
for (let idx = 0; idx < 50; idx += 1) { fn(G) }
console.timeEnd(label)
}
function shouldThrow (...fns) {
const cyc = {"p1":["p2","p3","p4"],"p2":["p3"],"p3":["p0"],"p0":[],"p4":["p1"]}
fns.forEach(fn => {
let ok = false
try { fn(cyc) } catch (e) {
ok = e.toString().includes('cycle p1<-p4<-p1')
if(!ok){
throw new Error('failzed ', e)
}
}
if (!ok){ throw 'should have thrown' }
})
}
shouldThrow(checkCyclic, grodzi1, grodzi2, grodziNoCopy, grodziStopVisit, grodziIter)
for(let i = 0; i < 3; ++i) {
bench(checkCyclic, 'cyclic')
bench(grodzi1, 'grodzi1')
bench(grodzi2, 'grodzi2')
bench(grodziNoCopy, 'grodziNoCopy')
bench(grodziStopVisit, 'grodziStopVisit')
bench(grodziIter, 'grodziIter')
console.log('next')
}

Related

How do i restrict the user without passing the middleware

const restricted = async (x, myID) => {
try {
const i = x.posts.forEach((a, b, c, d) => {
let lol = x.PrivacySettings.restricted.some(x => { return x; })
let lole = a.comments.some(x => { return x; }).postedBy;
if (lol === lole && myID !== lol || a.postBy) {
const FU = a.comments.filter((a, b, c, d) => {
return a.postedBy === lol;
});
return FU;
} else {
return;
}
});
return i;
} catch (ex) {
console.log(ex.message);
res.status(200).json('something went wrong');
}
}
So i have this function above, now how do i check if the user is restricted or not?
i have to implement this function in all the comment routes so how can i do it other than
making it a middleware

why nested async/await doesn't work as intended?

I'm learning NodeJs and having some problems using async/ await. I'm using Firebase database to read/write data. Here what i'm doing. (full function in case you need it).
async getImport(reqData: any): Promise<any> {
const username = 'kdat0310';
const db = admin.database();
const userRef = db.ref('/user');
const importRef = db.ref('/import');
const refAuthentication = db.ref('/Authentication');
const keyList = [];
const providerKey = [];
const khoList = [];
let index = 0;
const providerList = [];
const isValid = await refAuthentication.once('value', function (snapshot) {
for (const val of Object.values(snapshot.val())) {
if (
Object(val).username === Object(reqData).username &&
Object(val).token === Object(reqData).token
) {
return true;
}
}
return false;
});
if (isValid) {
await userRef.once('value', function (snapshot) {
for (const value of Object.values(snapshot.val())) {
if (value) {
if (Object(value).username == username) {
for (const val of Object(value).workAt) {
if (val) khoList.push(val.khoId);
}
}
}
}
});
const typeAndColorKey = [];
const typeAndColorValue = [];
const typeAndColorRef = db.ref('/TypeAndColor');
await typeAndColorRef.once('value', function (snapshot) {
let count = 0;
for (const key in snapshot.val()) {
typeAndColorKey.push(key);
}
for (const value of snapshot.val()) {
if (value !== undefined && value != null) {
typeAndColorValue.push({
id: typeAndColorKey[count],
type: value.type,
color: value.color,
});
count = count + 1;
}
}
});
const findTypeAndColor = (id: any) => {
for (const value of typeAndColorValue) {
if (id == value.id) {
return { type: value.type, color: value.color };
}
}
};
const userKey = [];
const userList = [];
await userRef.once('value', function (snapshot) {
let count = 0;
for (const key in snapshot.val()) {
userKey.push(key);
}
for (const value of Object(snapshot.val())) {
if (value != undefined && value != null) {
userList.push({
id: userKey[count],
name: Object(value).name,
});
count++;
}
}
});
const findUserName = (userId: any) => {
const returnValue = '';
for (const value of userList) {
if (userId == Object(value).id) {
return Object(value).name;
}
}
};
const importList = [];
await importRef.once('value', async function (snapshot) {
const importKey = [];
const cloneArr = snapshot.val().map((item: any) => {
return item;
});
for (const key in snapshot.val()) {
importKey.push(key);
}
let countTemp = 0;
for (const value of Object.values(cloneArr)) {
const goodsKeyList = [];
let count = 0;
if (khoList.indexOf(Object(value).warehouseId) !== -1) {
const listGoodsList = [];
if (Object(value).listGoods) {
for (const key in Object(value).listGoods) {
goodsKeyList.push(key);
}
const refListGoods = db.ref(
'/import/' + importKey[countTemp] + '/listGoods',
);
await refListGoods.once('value', function (snapshot) {
let item: any;
for (item of Object.values(snapshot.val())) {
if (item) {
const tempItem = item.filter((n: any) => n);
listGoodsList.push({
typeAndColor: findTypeAndColor(goodsKeyList[count]),
listGoods: tempItem,
number: tempItem.length,
});
}
count++;
}
});
}
console.log('test 1', listGoodsList);
if (listGoodsList !== []) {
importList.push({
listGoods: listGoodsList,
driver: Object(value).driver,
userId: Object(value).importEmployee,
name: findUserName(Object(value).importEmployee),
orderId: Object(value).orderId,
warehouseId: Object(value).warehouseId,
time: Object(value).time,
});
}
}
countTemp++;
}
console.log('test 2', importList);
});
return importList;
}
return 'Invalid';
}
The problem show up when it came to await importRef.once When I tried to handle some data and add the Firebase once function "async" and await inside to push the data I need to the array. Then return importList; return nothing. I figure that the await refListGoods.once cause this problems. As i thought, the await inside had done its duty and I can console.log importList inside very well. But I thought that await importRef.once will finish before return too. when I delete await refListGoods.once, the return is fine but I dont get the data I need. Do I need to refactor all code as I do to findTypeAndColor and findUserName above or there's a better way to solve this problem?
If you want to use await on the Promise returned by once, you should not pass a callback function to it.
So instead of:
const isValid = await refAuthentication.once('value', function (snapshot) {
for (const val of Object.values(snapshot.val())) {
if (
Object(val).username === Object(reqData).username &&
Object(val).token === Object(reqData).token
) {
return true;
}
}
return false;
});
Do:
const snapshot = await refAuthentication.once('value');
let isValid = false;
snapshot.forEach((child) => {
const val = child.val();
if (val.username === Object(reqData).username &&
val.token === Object(reqData).token
) {
isValid = true;
}
})

Why on my NodeJS+Express REST API a promise calling my function fails while the same promise with setTimeout works?

I have a NodeJS+Express REST API method executing reverse geocoding (using Google's Maps API).
I'm trying to solve it with Promises but the 'then' is getting executed before my function returns with the answers from Google.
When testing the same code just calling a setTimeout, it works as expected. Please see comments in the code (simplify version).
app.get('/api/v1/events', verifyToken, async (req, res) => {
await db.poolPromise.then(pool => {
return pool.request()
.input('UserId', db.sql.UniqueIdentifier, res.authData.userId)
.input('DateFrom', db.sql.DateTime2(7), req.query.dateFrom)
.input('DateTill', db.sql.DateTime2(7), req.query.dateTo)
.output('UserIdAuthorized', db.sql.Bit)
.execute('sp')
}).then(result => {
let output = (result.output || {})
if (!output.UserIdAuthorized) {
res.sendStatus(403)
}
else if (result.recordset.length > 0) {
(new Promise( (resolve) => {
//resolve(123) // this one works as expected
//setTimeout(resolve, 3000, 'temp success') // this one works as expected
// *** this one get passed and the following then is being executed before it answers ***
resolve( getAddress_TEST(result.recordset) )
// **************************************************************************************
})).then(function (value) {
res.json(
{
meta: { count: 10 }, //this is just a sample
result: value // *** this one fails with undefined ***
})
})
} else {
res.sendStatus(404)
}
}).catch(err => {
res.sendStatus(500)
console.error(err)
})
});
const nodeGeocoder_options = {
provider: 'google',
apiKey: process.env.GOOGLE_API_KEY
}
async function getAddress_TEST(recordset) {
//sample recordset for debugging - as you dont have my database
recordset = [{'eventId':14205556,'Lat':54.57767,'Lon':-2.4920483},{'eventId':14205558,'Lat':54.57767,'Lon':-2.492048},{'eventId':14205579,'Lat':53.416908,'Lon':-2.952071},{'eventId':14205588,'Lat':52.644448,'Lon':-1.153185},{'eventId':14205601,'Lat':52.29174,'Lon':-1.532283},{'eventId':14205645,'Lat':52.644448,'Lon':-1.153185},{'eventId':14205801,'Lat':53.68687,'Lon':-1.498708},{'eventId':14206041,'Lat':51.471521,'Lon':-0.2038033},{'eventId':14206049,'Lat':51.471521,'Lon':-0.2038033},{'eventId':14206072,'Lat':51.471521,'Lon':-0.2038033}]
let geocoder = nodeGeocoder(nodeGeocoder_options)
let ps = []
for (var i = 0, length = recordset.length; i < length; i++) {
if (i == 0 || !(i > 0
&& recordset[i - 1].Lat == recordset[i].Lat
&& recordset[i - 1].Lon == recordset[i].Lon)) {
ps.push(new Promise(function (resolve) {
resolve(reverseGeocode(geocoder, recordset[i].Lat, recordset[i].Lon))
}))
} else {
ps.push('-')
}
}
await Promise.all(ps)
.then(function (values) {
for (var i = 0, length = values.length; i < length; i++) {
if (values[i] != '-') {
recordset[i].locationAddress = values[i]
} else {
recordset[i].locationAddress = recordset[i - 1].locationAddress
}
}
}).then(function () {
recordset.forEach(function (v) {
delete v.Lat
delete v.Lon
});
console.log(recordset)
return recordset
})
};
async function reverseGeocode(geocoder, lat, lon) {
let address = '+'
if (lat != 0 && lon != 0) {
await geocoder.reverse({ lat: lat, lon: lon })
.then(res => {
address = res[0].formattedAddress
})
.catch(err => {
console.error(err)
});
}
return address
};
I'm sure it is something simple that I'm missing here...
The basic problem is that your getAddress_TEST function returns a promise that fulfills with nothing (undefined), because it does not contain a return statement. The return recordset is in a then() callback, from where it affects the promise resolution of the awaited promise, but that result is thrown away.
If you want to use async/await, you should get rid of any new Promise and then calls:
app.get('/api/v1/events', verifyToken, async (req, res) => {
try {
const pool = await db.poolPromise
const result = await pool.request()
.input('UserId', db.sql.UniqueIdentifier, res.authData.userId)
.input('DateFrom', db.sql.DateTime2(7), req.query.dateFrom)
.input('DateTill', db.sql.DateTime2(7), req.query.dateTo)
.output('UserIdAuthorized', db.sql.Bit)
.execute('sp')
let output = (result.output || {})
if (!output.UserIdAuthorized) {
res.sendStatus(403)
} else if (result.recordset.length > 0) {
const value = await getAddress_TEST(result.recordset)
res.json({
meta: { count: 10 }, //this is just a sample
result: value
})
} else {
res.sendStatus(404)
}
} catch(err) {
res.sendStatus(500)
console.error(err)
}
});
const nodeGeocoder_options = {
provider: 'google',
apiKey: process.env.GOOGLE_API_KEY
}
async function getAddress_TEST(recordset) {
const geocoder = nodeGeocoder(nodeGeocoder_options)
const ps = recordset.map((record, i) => {
if (i == 0 || !(i > 0
&& recordset[i - 1].Lat == record.Lat
&& recordset[i - 1].Lon == recordLon)) {
return reverseGeocode(geocoder, recordset[i].Lat, recordset[i].Lon))
} else {
return '-'
}
});
const values = await Promise.all(ps)
// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
for (var i = 0, length = values.length; i < length; i++) {
if (values[i] != '-') {
recordset[i].locationAddress = values[i]
} else {
recordset[i].locationAddress = recordset[i - 1].locationAddress
}
}
recordset.forEach(function (v) {
delete v.Lat
delete v.Lon
});
console.log(recordset)
return recordset
// ^^^^^^^^^^^^^^^^
}
async function reverseGeocode(geocoder, lat, lon) {
if (lat != 0 && lon != 0) {
const res = await geocoder.reverse({ lat: lat, lon: lon })
return res[0].formattedAddress
}
return '+'
}

Node js Scraper

I have written a scraper in typescript, Running on node:10.12.0,
Issue: The code goes on sleep after few hours, randomly. And I had to restart it. My best guess is it stucks on url request
Tools/Packages Using:
Puppeteer
Cheerio
Typescript
Code:
import * as cheerio from "cheerio";
import * as request from "request";
import * as fs from "fs";
import * as shell from "shelljs";
import pup = require("puppeteer");
class App {
// #ts-ignore
public browser: pup.Browser;
public appendToFile(file: string, content: string): Promise < string > {
return new Promise < string > ((resolve, reject) => {
try {
fs.appendFileSync(file, content);
resolve("DONE");
} catch (e) {
reject(e);
}
});
}
public loadPage(url: string): Promise < any > {
return new Promise < any > ((resolve, reject) => {
request.get(url, async (err, res, html) => {
if (!err && res.statusCode === 200) {
resolve(html);
} else {
if (err) {
reject(err);
} else {
reject(res);
}
}
});
});
}
public step1(url: string): Promise < string > {
return new Promise < string > (async (resolve, reject) => {
let page: pup.Page | undefined;
try {
let next = false;
let urlLink = url;
let first = true;
let header = "unknown";
let f = url.split("/");
let folder = f[f.length - 3];
folder = folder || header;
let path = "data/" + folder;
shell.mkdir("-p", path);
page = await this.browser.newPage();
await page.goto(url, {
timeout: 0
});
let count = 1;
do {
next = false;
let res = await page.evaluate(() => {
let e = document.querySelectorAll(".ch-product-view-list-container.list-view li ul > li > h6 > a");
let p: string[] = [];
e.forEach((v) => {
p.push(("https://www.link.com") + (v.getAttribute("href") as string));
});
return p;
});
// for(const l of res) {
// try {
// await this.step2(l, "" , "")
// } catch(er) {
// this.appendToFile("./error.txt", l + "::" + url + "\n").catch(e=>e)
// }
// }
let p = [];
let c = 1;
for (const d of res) {
p.push(await this.step2(d, folder, c.toString()).catch((_e) => {
console.log(_e);
fs.appendFileSync("./error-2.txt", urlLink + " ### " + d + "\n");
}));
c++;
}
await Promise.all(p);
await this.appendToFile("./processed.txt", urlLink + ":" + count.toString() + "\n").catch(e => e);
count++;
console.log(urlLink + ":" + count);
let e = await page.evaluate(() => {
let ele = document.querySelector("#pagination-next") as Element;
let r = ele.getAttribute("style");
return r || "";
});
if (e === "") {
next = true;
await page.click("#pagination-next");
// console.log('waitng')
await page.waitFor(1000);
// console.log('done wait')
// await page.waitForNavigation({waitUntil: 'load'}).catch(e=> console.log(e));
// await Promise.all([
// page.click("#pagination-next"),
// page.waitForNavigation({ waitUntil: 'networkidle0'}), // ]);
}
} while (next);
// await page.close();
resolve("page all scrapped");
} catch (errrr) {
reject(errrr);
} finally {
if (page !== undefined) {
await page.close().catch(e => e);
}
}
});
}
public step2(url: string, folder: string, file: string): Promise < string > {
return new Promise < string > (async (resolve, reject) => {
try {
let html = await this.loadPage(url).catch(e => reject(e));
let $ = cheerio.load(html);
let ress: any = {};
let t = $(".qal_title_heading").text();
if (t) {
ress.header = t.replace(/"/g, "'").replace(/\n|\r|\t/g, "");
}
let d = $("div.ch_formatted_text.qal_thread-content_text.asker").html();
if (d) {
ress.body = d.replace(/"/g, "'").replace(/\n|\r|\t/g, "");
}
// let sprit = "-------------------------------";
let filename = "data" + file + ".json"; // ((t.replace(/[^\w\s]/gi, "")).substring(0,250)+".txt")
let data = JSON.stringify(ress) // t +sprit + d + "\n---end---\n"; await this.appendToFile("./data/"+ folder + "/" +filename, data+",\n")
.then((r) => {
resolve(r);
});
} catch (err) {
reject(err);
}
});
}
}
async function main() {
process.on("SIGTERM", () => {
console.log("SigTerm received");
process.exit(1);
});
process.on("SIGINT", () => {
console.log("SigInt received");
process.exit(1);
});
let path = "data/unknown";
shell.mkdir("-p", path);
let c = new App();
let list: string[] = [];
console.log(process.argv[2]);
require("fs").readFileSync(process.argv[2], "utf-8").split(/\r?\n/).forEach((line: string) => {
list.push(line);
});
console.log("total links->" + list.length);
c.browser = await pup.launch({
headless: true
});
for (const l of list) {
await c.step1(l).then(e => {
fs.appendFileSync("./processed.txt", l);
}).catch(e => {
fs.appendFileSync("./error.txt", l);
});
}
}
main();
Let me know if you need something else from me. Also this is all the code.
So , I figured two problems.
The chrome (under puppeteer) consumes high CPU, which gives the trend like this:
at start it's on moderate usage. and it gradually increases. My trend was it started off with 4% usage and after a day, it reached 100%. I've submitted an issue on their git
I did not specify the timeout in request
was:
request.get(url, async (err, res, html) => {
should be:
request.get(url,{timeout: 1500} async (err, res, html) => {
So far my code is running fine for more than a day now. only issue is high cpu usage. But it's none of my concern as for now.

Is it possible to bind this in bluebird map?

So I've tried to write up an example as best I could of what I'm trying to do, this isn't a very practical example but I tried to simplify it, but I feel like I may have complicated this in trying to make an example.
class add {
constructor(baseValue) {
this.base = baseValue;
return new Promise((resolve, reject) => {
resolve(this);
});
}
addBase(num) {
return new Promise((resolve, reject) => {
resolve(this.base + num);
});
}
}
const values = [1,2,3,4,5];
Promise.try(() => {
return new add(5);
}).then((add) => {
// we want to find if a 5 exists in the results
const addPromise = Promise.resolve(values).map(add.addBase, {concurrency: 1});
return Promise.try(() => {
return addPromise;
}).then((results) => {
for(let i = 0; i < results.length; i++) {
if(results[i] === 10) {
return i;
}
}
// doesn't exist
return null;
});
}).then((result) => {
if(result === null) {
console.log('10 does not exist');
} else {
console.log('10 is at position ' + result);
}
})
<script src="https://cdn.jsdelivr.net/bluebird/latest/bluebird.min.js"></script>
If you run this you'll get an error that you can't get base of undefined, this is because of the mapping in bluebird. const addPromise = Promise.resolve(values).map(add.addBase, {concurrency: 1}); Is there a way on this line to bind the add object to this when making these calls?
This is actually a little simpler than you're making it I think. You are passing the raw function into map(), but you should probably be passing an arrow function instead. Consider this simple class and code that tries to use map() by passing add():
class Test{
constructor(n) {
this.n = n
}
add(k) {
return this.n + k
}
}
let t = new Test(10)
let arr = [1, 2, 3]
// error TypeError: undefined is not an object (evaluating 'this.n')
arr.map(t.add)
This throws an error because map isn't calling add() from the object, it just thinks it's a function. An easy fix is to call map like this:
class Test {
constructor(n) {
this.n = n
}
add(k) {
return this.n + k
}
}
let t = new Test(10)
let arr = [1, 2, 3]
let mapped = arr.map((n) => t.add(n))
console.log(mapped)
You could also use:
let mapped = arr.map(t.add.bind(t))
but to me that's harder to read and understand quickly. I'm not sure what's going on with all the immediately resolved promises in your code, but changing the way you call map() makes that error go away. (there's another error later where you reference i that's not in scope.)
You've pretty much answered your own question…
class add {
constructor(baseValue) {
this.base = baseValue;
return new Promise((resolve, reject) => {
resolve(this);
});
}
addBase(num) {
return new Promise((resolve, reject) => {
resolve(this.base + num);
});
}
}
const values = [1,2,3,4,5];
Promise.try(() => {
return new add(5);
}).then((add) => {
// we want to find if a 5 exists in the results
const addPromise = Promise.resolve(values).map(add.addBase.bind(add), {concurrency: 1});
return Promise.try(() => {
return addPromise;
}).then((results) => {
for(let i = 0; i < results.length; i++) {
if(results[i] === 10) {
return i;
}
}
// doesn't exist
return null;
});
}).then((result) => {
if(result === null) {
console.log('10 does not exist');
} else {
console.log('10 is at position ' + result);
}
})
<script src="https://cdn.jsdelivr.net/bluebird/latest/bluebird.min.js"></script>

Resources