Puppeteer - ExpressJS infinite loop a function - node.js

I have a problem with my Express JS app : When I'm trying to call a function, this function is endlessly called ... It opens a lot of chromium browser and cause performance issues ...
I just want to call this function one time.
I've found a solution to make it work (And called just one time), but in this situation I can't pass any parameters ...
const farm = (async () => {
const browser = await puppeteer.launch({headless: true});
const page = await browser.newPage();
await page.goto("https://www.example.com/?s=" + term);
await page.waitForSelector("div");
const postLinks = await page.evaluate(() => {
let postLinks = [];
let elements = document.querySelectorAll('div.article');
for (element of elements) {
postLinks.push({
title: element.querySelector('div.meta-info > h3 > a')?.textContent,
url: element.querySelector('div.meta-info > h3 > a')?.href
})
}
return postLinks;
});
console.log(postLinks);
await browser.close();
})();
app.get('/', (req, res) => {
var term = "Drake";
res.send(farm);
});
With the code below, I can pass parameters but I can't return the result in "res.send", and the function is called endlessly :
const farm = async (term) => {
const browser = await puppeteer.launch({headless: true});
const page = await browser.newPage();
await page.goto("https://www.example.com/?s=" + term);
await page.waitForSelector("div");
const postLinks = await page.evaluate(() => {
let postLinks = [];
let elements = document.querySelectorAll('div.article');
for (element of elements) {
postLinks.push({
title: element.querySelector('div.meta-info > h3 > a')?.textContent,
url: element.querySelector('div.meta-info > h3 > a')?.href
})
}
return postLinks;
});
console.log(postLinks);
await browser.close();
}
app.get('/', (req, res) => {
var term = "Drake";
var results = farm(term);
res.send(results);
});
Did I miss something ?
Thanks !

It's not an infinite loop, but unresolved promise. The farm returns a promise, which you're not waiting for, but instead send the pending promise before it resolves, i.e. before the puppeteer is done.
You need to wait for farm's promise to resolve, make middleware function async and add await to the farm call:
app.get('/', async(req, res) => {
var term = "Drake";
// farm returns a promise, so you need to wait for it to resolve, i.e. block execution
// otherwise it just sends pending promise, because node.js runs in non-blocking fashion
var results = await farm(term);
res.send(results);
});

Related

manage stored variables in Chrome extendion

I am trying to get the hang of managing persistent variables to be used across my first chrome extension. If I get it correctly the proper way is to use storage (i am using storage.local in my case).
In my background.js script I set up the code as below, with 3 functions to initialize a variable (to 0), update it (by increasing it by 1 everytime the updateApiCalls() function is called) and to print it console to check if the process worked:
const resetApiCalls = () => {
chrome.storage.local.set({"apiCalls": 0})
};
const updateApiCalls = () => {
chrome.storage.local.get(["apiCalls"]).then((items) => {
chrome.storage.local.set({"apiCalls": items.apiCalls + 1})
});
};
const printApiCalls = () => {
chrome.storage.local.get(["apiCalls"]).then((items) => {
console.log("apicalls: " + items.apiCalls);
});
};
resetApiCalls();
updateApiCalls();
printApiCalls();
The result is that the printApicalls() still logs 0 to console as if the variable didn't get changed by updateApiCalls().
What I am doing wrong in my code?
Or am I completely off track and should use a completely different approach to perform this task?
const resetApiCalls = () => {
return chrome.storage.local.set({"apiCalls": 0})
};
const updateApiCalls = async () => {
var items = await chrome.storage.local.get(["apiCalls"]);
return chrome.storage.local.set({"apiCalls": items.apiCalls + 1})
};
const printApiCalls = () => {
chrome.storage.local.get(["apiCalls"]).then((items) => {
console.log("apicalls: " + items.apiCalls);
});
};
(async _ => {
await resetApiCalls();
await updateApiCalls();
printApiCalls();
})()
/*
//you can also write this way
resetApiCalls().then(updateApiCalls).then(printApiCalls);
*/
The 3 functions are asynchronous so you have to find a way to execute them one after the other synchronously otherwise the result is unpredictable.
I transformed the first 2 functions so that they returned a promise after which I waited for each of these promises to be fullfilled to execute the next function.
On the surface you're doing everything correctly.
However there is a caveat with chrome storage... quote from their api.
Storage and throttling limits
Don't think of adding to the Storage API as putting things in a big truck. Think of adding to storage as being like putting something in a pipe. The pipe may have material in it already, and it may even be filled. Always assume a delay between when you add to storage and when it is actually recorded.
Two examples with working results.
Using async/await
const resetApiCalls = async () => {
await chrome.storage.local.set({"apiCalls": 0})
};
const updateApiCalls = async () => {
const result = await chrome.storage.local.get(["apiCalls"]);
const incremented = result.apiCalls + 1;
await chrome.storage.local.set({"apiCalls": incremented})
};
const printApiCalls = async () => {
const result = await chrome.storage.local.get(["apiCalls"]);
console.log("result: ", result);
};
(async function () {
await resetApiCalls();
await updateApiCalls();
await printApiCalls(); //console.log prints {"apiCalls": 1}
})();
using a setTimeout as a test with your code.
const resetApiCalls = () => {
chrome.storage.local.set({"apiCalls": 0})
};
const updateApiCalls = () => {
chrome.storage.local.get(["apiCalls"]).then((items) => {
chrome.storage.local.set({"apiCalls": items.apiCalls + 1})
});
};
const printApiCalls = () => {
chrome.storage.local.get(["apiCalls"]).then((items) => {
console.log("apicalls: " + items.apiCalls);
});
};
resetApiCalls();
updateApiCalls();
// printApiCalls();
setTimeout(function() {
printApiCalls(); //console.log prints "apicalls: 1"
}, 5000);
Hope this clears up the issue.

Foreach and push() with async await doesn't push the values

Here's my code:
app.get("/bots/:id", async (req, res)=>{
var bot = await Bots.findOne({id: req.params.id});
if(!bot) return await res.send("not there");
bot.desc = await marked(bot.desc);
var user = req.user;
bot.owner = [];
await bot.owners.forEach(async (id)=>{
await fetch(`${process.env.DOMAIN}/api/client/users/${id}`).then(r=>r.json()).then(async d=>{
await bot.owner.push(d.tag);
});
});
await console.log(bot.owner);
await res.render('botpage.ejs', {user, bot});
})
App is express, fetch is node-fetch and I'm using ejs to render.
When I do console.log(bot.owner), it logs [] and not the array of d.tags that I fetch.
Everything is fine except this, including my api and ejs page.
process.env.DOMAIN = https://discord.rovelstars.com
foreach loop not wait. use for loop instead of foreach.
for(int i=0;i<bot.owners.length();i++){
....your task
}
or
for(var obj of bot.owners){
...your task
}
visit this for more details

Exposed function querySelector not working in Puppeteer

document.querySelectorAll('.summary').innerText;
This throws an error in the below snippet saying "document.querySelector is not a function" in my Puppeteer page's exposed fucntion docTest.
I want to pass a specific node to each method and get the result inside evaluate.
Same with document.getElemenetbyId.
const puppeteer = require('puppeteer');
//var querySelectorAll = require('query-selector');
let docTest = (document) => {
var summary = document.querySelectorAll(.summary).innerText;
console.log(summary);
return summary;
}
let scrape = async () => {
const browser = await puppeteer.launch({
headless: false
});
const page = await browser.newPage();
await page.goto('http://localhost.com:80/static.html');
await page.waitFor(5000)
await page.exposeFunction('docTest', docTest);
var result = await page.evaluate(() => {
var resultworking = document.querySelector("tr");
console.log(resultworking);
var summary = docTest(document);
console.log(resultworking);
return summary;
});
console.log(result);
await page.waitFor(7000);
browser.close();
return {
result
}
};
scrape().then((value) => {
console.log(value); // Success!
});
I just had the same question. The problem is that the page.evaluate() function callback has to be an async function and your function docTest() will return a Promise when called inside the page.evaluate(). To fix it, just add the async and await keywords to your code:
await page.exposeFunction('docTest', docTest);
var result = await page.evaluate(async () => {
var summary = await docTest(document);
console.log(summary);
return summary;
});
Just remember that page.exposeFunction() will make your function return a Promise, then, you need to use async and await. This happens because your function will not be running inside your browser, but inside your nodejs application.
exposeFunction() does not work after goto()
Why can't I access 'window' in an exposeFunction() function with Puppeteer?
How to use evaluateOnNewDocument and exposeFunction?
exposeFunction remains in memory?
Puppeteer: pass variable in .evaluate()
Puppeteer evaluate function
allow to pass a parameterized funciton as a string to page.evaluate
Functions bound with page.exposeFunction() produce unhandled promise rejections
How to pass a function in Puppeteers .evaluate() method?
How can I dynamically inject functions to evaluate using Puppeteer?

Simple web scraping with puppeteer / cheerio not working with params

I am trying to scrape https://www.premierleague.com/clubs/38/Wolverhampton-Wanderers/stats?se=274
The results being returned are for the page minus the ?se=274
This is applied by using the filter dropdown on the page and selecting 2019/20 season. I can navigate directly to the page and it works fine, but through code it does not work.
I have tried in cheerio and puppeteer. I was going to try nightmare too but this seems overkill I think. I am clearly not an expert! ;)
function getStats(callback){
var url = "https://www.premierleague.com/clubs/38/Wolverhampton-Wanderers/stats?se=274";
request(url, function (error, response, html) {
//console.log(html);
var $ = cheerio.load(html);
if(!error){
$('.allStatContainer.statontarget_scoring_att').filter(function(){
var data = $(this);
var vSOT = data.text();
//console.log(data);
console.log(vSOT);
});
}
});
callback;
}
This will return 564 instead of 2
It seems like you're calling callback before request returns. Move the callback call into the internal block, where the task you need is completed (in your case, it looks like the filter block).
It also looks like you're missing the () on the callback call.
Also, a recommendation: return the value you need through the callback.
So this code works....$10 from a rent-a-coder did the trick. Easy when you know how!
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch()
const page = await browser.newPage()
await page.goto('https://www.premierleague.com/clubs/4/Chelsea/stats?se=274')
const sleep = ms => new Promise(resolve => setTimeout(resolve, ms))
await sleep(4000)
const element = await page.$(".allStatContainer.statontarget_scoring_att");
const text = await page.evaluate(element => element.textContent, element);
console.log("Shots on Target:"+text)
browser.close()
})()

How to pass dynamic page automation commands to puppeteer from external file?

I'm trying to pass dynamic page automation commands to puppeteer from an external file. I'm new to puppeteer and node so I apologize in advance.
// app.js
// ========
app.get('/test', (req, res) =>
(async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto('http://testurl.com');
var events = require('./events.json');
for(var i=0;i<events.length;i++){
var tmp = events[i];
await page.evaluate((tmp) => { return Promise.resolve(tmp.event); }, tmp);
}
await browser.close();
})());
My events json file looks like:
// events.json
// ========
[
{
"event":"page.waitFor(4000)"
},
{
"event":"page.click('#aLogin')"
},
{
"event":"page.waitFor(1000)"
}
]
I've tried several variations of the above as well as importing a module that passes the page object to one of the module function, but nothing has worked. Can anyone tell me if this is possible and, if so, how to better achieve this?
The solution is actually very simple and straightforward. You just have to understand how this works.
First of all, you cannot pass page elements like that to evaluate. Instead you can do the following,
On a seperate file,
module.exports = async function getCommands(page) {
return Promise.all([
await page.waitFor(4000),
await page.click("#aLogin"),
await page.waitFor(1000)
]);
};
Now on your main file,
await require('./events.js').getCommands(page);
There, it's done! It'll execute all commands for you one by one just as you wanted.
Here is a complete code with some adjustments,
const puppeteer = require("puppeteer");
async function getCommands(page) {
return Promise.all([
await page.title(),
await page.waitFor(1000)
]);
};
(async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto("https://example.com");
let data = await getCommands(page);
console.log(data);
await page.close();
await browser.close();
})();

Resources