Loop not working in nodejs puppeteer script - node.js

New to nodejs and puppeteer. I'm trying to loop through some street names in a public county property value search. However, I cannot see what I'm doing wrong. I've had this working for a individual street name before I attempted to do a loop of street names. I've replaced the street names for protection.
const puppeteer = require('puppeteer');
var street_names = ["street1","street2","street3"]
for (var i = 0; i < street_names.length;i++) {
// console.log(street_names[i]); // Used to test if the loop works.
(async () => {
const browser = await puppeteer.launch({executablePath: '/usr/bin/chromium-browser'});
const page = await browser.newPage();
await page.setViewport({ width: 1920, height: 1080 });
await page.setDefaultNavigationTimeout(0);
// Property Search Page
await page.goto('http://propaccess.traviscad.org/clientdb/PropertySearch.aspx?cid=1', {waitUntil: 'domcontentloaded'});
//type the enter street
await page.select('select[name="propertySearchOptions:recordsPerPage"]', '250'); // Select 250 results per page
await page.screenshot({path: 'screenshot.jpg', fullPage: true});
await page.focus('#propertySearchOptions_streetName');
await page.type('input[name="propertySearchOptions:streetName"]',street_names[i]);
//await page.keyboard.type('street_names[i]');
await page.click('#propertySearchOptions_searchAdv');
// Enter Results Page
await page.screenshot({path: 'street_names[i]_screenshot.jpg', fullPage: true});
await page._client.send('Page.setDownloadBehavior', {behavior: 'allow', downloadPath: './results'});
await page.waitForSelector('#footer');
await page.click('#propertySearchResults_exportResults');
await page.waitForTimeout (3500);
await page.screenshot({path: 'screenshot.jpg', fullPage: true});
await browser.close();
process.exit(1);
});
}

You forgot to call (to add () after) the defined async function.
It would be more efficient to open the browser once and then reuse it with its page. To do so, you can place the loop inside the async function.
const puppeteer = require('puppeteer');
var street_names = ["street1","street2","street3"]
(async () => {
const browser = await puppeteer.launch({executablePath: '/usr/bin/chromium-browser'});
const page = await browser.newPage();
await page.setViewport({ width: 1920, height: 1080 });
await page.setDefaultNavigationTimeout(0);
for (var i = 0; i < street_names.length;i++) {
// console.log(street_names[i]); // Used to test if the loop works.
// Property Search Page
await page.goto('http://propaccess.traviscad.org/clientdb/PropertySearch.aspx?cid=1', {waitUntil: 'domcontentloaded'});
// type the enter street
await page.select('select[name="propertySearchOptions:recordsPerPage"]', '250'); // Select 250 results per page
await page.screenshot({path: 'screenshot.jpg', fullPage: true});
await page.focus('#propertySearchOptions_streetName');
await page.type('input[name="propertySearchOptions:streetName"]',street_names[i]);
//await page.keyboard.type('street_names[i]');
await page.click('#propertySearchOptions_searchAdv');
// Enter Results Page
await page.screenshot({path: 'street_names[i]_screenshot.jpg', fullPage: true});
await page._client.send('Page.setDownloadBehavior', {behavior: 'allow', downloadPath: './results'});
await page.waitForSelector('#footer');
await page.click('#propertySearchResults_exportResults');
await page.waitForTimeout (3500);
await page.screenshot({path: 'screenshot.jpg', fullPage: true});
}
await browser.close();
process.exit(1);
})();

I see that you defined the function inside the loop but you do not call the function

Related

Puppeteer - How to wait img after change innerHTML?

I would like to add an image before a screenshot with puppeteer.
The following code works but instead of waiting like this, I would like to wait until the img is here :
element.innerHTML = "<img id=\"logo_website\" src=\"http://random.com/logo.jpg\">";
await page.waitFor(2000)
I tried with the following "waitFor" but it doesn't work.
await page.waitFor("#logo_website")
You can try page.waitForResponse() in this way:
'use strict';
const puppeteer = require('puppeteer');
(async function main() {
try {
const browser = await puppeteer.launch();
const [page] = await browser.pages();
await page.goto('https://example.org/');
await Promise.all([
page.waitForResponse('https://www.iana.org/_img/2013.1/iana-logo-header.svg'),
page.evaluate(() => {
document.body.innerHTML = '<img id="logo_website" src="https://www.iana.org/_img/2013.1/iana-logo-header.svg">';
}),
]);
await page.screenshot({ path: 'scr.png' });
await browser.close();
} catch (err) {
console.error(err);
}
})();

Ensure element is exist in the page, to ensure is logged in or any other action

I'm using puppeteer, node js I'm writing a script to log in it successfully logged, but I need to write code to ensure it successfully logged in by ensure some element is present, I need to write if element is present focused on it or if it not present print "element is not present".
This the script I've written:
const puppeteer = require('puppeteer');
async function log_in() {
const browser = await puppeteer.launch({
headless: false,
args: ['--Window-size=1929,1170', '--Window-position=0,0']
});
const page = await browser.newPage();
await page.setViewport({ 'width': 1366, 'height': 768 });
await page.goto(URL, { waitUntil: 'networkidle2' });
await page.click('.fancybox-item');
await delay(1000);
// fun for waiting
function delay(time) {
return new Promise(function (resolve) {
setTimeout(resolve, time)
});
}
const UserName = 'xxxxxxxxxx';
const Password = '222222';
page.click('selector');
await delay(1000);
//Focus on user name
await page.focus('selector');
await delay(2000);
await page.type('selector', UserName);
await delay(2000);
//Focus on password
await page.focus('selector');
await page.type('selector'', Password);
// Clicking the link will indirectly cause a navigation
page.click('selector');
await delay(5000);
if (await page.waitForSelector('selector')) {
console.log("found")
} else console.log("not found");
await delay(5000);
} log_in();
Note: the code is work successfully but it doesn't print anything in console output.
You can use page.$(selector). It returns an element or null if no matches the selector. And you need to wait for navigation after you click on submit button using page.waitForNavigation([options]) probably.
...
// Clicking the link will indirectly cause a navigation
await page.click('selector')
await page.waitForNavigation()
const element = await page.$('selector')
if (element) {
console.log ('found')
} else {
console.log ('not found')
}
...
First of all, I'm afraid you have lacks experience in working with asynchronous Javascript. It's crucial to understand this concept since most of the functions of Puppeteer is async. The same other libraries or tools like Selenium.
Try to learn more about that https://developer.mozilla.org/en-US/docs/Learn/JavaScript/Asynchronous
Answering on your problem: I think it'd be to use waitForSelector() instead of document.querySelector (page.$) as #drobnikj suggested. First of all waitForSelector() waits for the element so you can handle elements that might load asynchronously. It might not be the case on your page but in general, it's a better approach I believe. All that you need to do is to handle a potential exception.
My proposition is:
const puppeteer = require('puppeteer');
const browser = await puppeteer.launch({
headless: false,
args: ['--Window-size=1929,1170', '--Window-position=0,0']
});
const page = await browser.newPage()[0];
async function logIn(loginPageUrl, userName, password) {
const fancyBoxItemSelector = '.fancybox-item';
const loginInputSelector = '';
const passwordInputSelector = '';
const loginRedirectionLink = '';
const expectedSelectorAfterLogin = '';
await page.goto(URL, { waitUntil: 'networkidle2' });
await page.waitForSelector(fancyBoxItemSelector);
await page.click(fancyBoxItemSelector);
await page.waitForSelector(loginInputSelector);
await page.click(loginInputSelector);
await page.type(loginInputSelector, userName);
await page.waitForSelector(passwordInputSelector);
await page.click(passwordInputSelector);
await page.type(passwordInputSelector, password);
await page.waitForSelector(loginRedirectionLink);
const response = page.waitForNavitation({ waitUntil: 'networkidle2' });
await page.click(loginRedirectionLink);
await response;
try {
await page.waitForSelector(expectedSelectorAfterLogin);
console.log('Selector found!');
} catch (err) {
console.log('Selector not found!');
}
}
(async () => {
await logIn('', '', '')
})()
I allowed myself to polish your code. I hope it's clear what I have done here.

Wait until function is complete and then render the page

I have a function which scrapes an element and return the element value. This is the code of reale-scraper.js:
module.exports.RealeScraper = function() {
return new Promise((res, rej) => {
var url = 'example.com';
var compagnia;
//Start Puppeteer and scrape element
ptr.launch().then(async browser => {
const page = await browser.newPage();
await page.setViewport({ width: 1280, height: 800 });
await page.goto(url, {waitUntil: "networkidle0"});
await page.type('input[name="username"]', config.utente);
await page.type('input[name="password"]', config.pass);
await Promise.all([
page.click('input[type="SUBMIT"]'),
page.waitForNavigation({waitUntil: 'networkidle2'})
]);
await page.waitForSelector('#tableSceltaProfilo > tbody > tr:nth-child(1) > td:nth-child(2)');
const element = await page.$("#tableSceltaProfilo > tbody > tr:nth-child(1) > td:nth-child(2)");
compagnia = await page.evaluate(element => element.textContent, element);
await page.screenshot({path: 'screenshot.png'});
await browser.close();
});
res(compagnia);
});
}
Then i call that function and try to send data to my ejs template in home.js:
var scraper = require('../scrapers/reale-scraper');
router.get('/home', function(req, res, next) {
RealeScraper().then((compagnia) => {
res.render('nuovo-sinistro', {
titolo: 'Manager Perizie',
compagnia: compagnia
});
}).catch((error) => {
console.log(error);
});
});
I want to wait until 'RealeScraper' is finished and returned me a value so that I can pass it to res.render. I've tried using Promise but it doesn't work. It gives me no errors but when I load the page, the function doesn't start and so is rendered without the variable.
I've also tried different methods but ended up having the page loading forever.
Any help would be really appreciated, thanks!
You call //Start Puppeteer and scrape element and res(compagnia); at the same time, while compagnia is empty, it has been returned.
Just call res when scrape element finished.
...
await browser.close();
res(compagnia);
...
I think it will be better if you only use async\await like this:
module.exports.RealeScraper = async function () {
var url = 'example.com';
var compagnia;
//Start Puppeteer and scrape element
let browser = await ptr.launch();
const page = await browser.newPage();
await page.setViewport({ width: 1280, height: 800 });
await page.goto(url, { waitUntil: "networkidle0" });
await page.type('input[name="username"]', config.utente);
await page.type('input[name="password"]', config.pass);
await page.click('input[type="SUBMIT"]'); // why you do that in parallel?
await page.waitForNavigation({ waitUntil: 'networkidle2' });
await page.waitForSelector('#tableSceltaProfilo > tbody > tr:nth-child(1) > td:nth-child(2)');
const element = await page.$("#tableSceltaProfilo > tbody > tr:nth-child(1) > td:nth-child(2)");
compagnia = await page.evaluate(element => element.textContent, element);
await page.screenshot({ path: 'screenshot.png' });
await browser.close();
return compagnia;
}
// ...
var scraper = require('../scrapers/reale-scraper');
router.get('/home', async function (req, res, next) {
try {
let compagnia = await RealeScraper();
res.render('nuovo-sinistro', {
titolo: 'Manager Perizie',
compagnia: compagnia
});
} catch (error) {
console.log(error);
}
});

I would like to know how to use $$eval from Puppeteer

I can not use $$ eval well.
(async() => {
const browser = await puppeteer.launch({ executablePath: chrome ,args: [chromeArgs]});
const page = await browser.newPage();
await page.goto('https://www.example.com/', {waitUntil: "domcontentloaded"});
var links = await page.evaluate(() => {
var hreflist = [];
var tags = document.querySelectorAll("p");
Array.prototype.forEach.call(tags, (tag)=>{
hreflist.push(tag.textContent);
});
return hreflist;
});
console.log(util.inspect(links, false, null));
browser.close();
})();
I would like to do the same thing as the source code written above.
(async() => {
const browser = await puppeteer.launch({ executablePath: chrome ,args: [chromeArgs]});
const page = await browser.newPage();
await page.goto('https://www.example.com/', {waitUntil: "domcontentloaded"});
var links = await page.$$eval('p', list => {
list.map(data => {
data.textContent
})
});
console.log(util.inspect(links, false, null));
browser.close();
})();
The execution result of $$eval() is undefined.
https://pptr.dev/#?product=Puppeteer&version=v1.10.0&show=api-pageevalselector-pagefunction-args
I saw the official document.
However, we can not confirm the problem.
You forgot to return the value. this will work
var links = await page.$$eval('p', list => list.map(data => data.textContent));

use same browser instance?

Hi I am trying to make a screenshot service
const puppeteer = require('puppeteer');
var resWidth = 1366;
var resHeight = 1000;
var browser;
(async () => {
browser = await puppeteer.launch({ignoreHTTPSErrors: true});
});
and when I receive a work I try to do
data.forEach(function(d){
try {
console.log(d["url"]);
(async () => {
var page = await browser.newPage();
await page.setViewport({width: resWidth, height: resHeight});
await page.goto(d["url"], {timeout: 90000, waitUntil: 'networkidle'});
await page.screenshot({path: './picdata/' + d['id'] + '.png' ,fullPage: true});
await page.close();
})();
} catch(e) {}
});
but I can't... here is the error:
(node:40596) UnhandledPromiseRejectionWarning: Unhandled promise rejection (rejection id: 7): TypeError: Cannot read property 'newPage' of undefined
I don't want to open a new browser for each screenshot launching browser takes time and requires more memory?
what should I do?
The problem:
(async () => {
browser = await puppeteer.launch({ignoreHTTPSErrors: true});
});
This code never gets executed. Why? because it's not a true closure.
More on closures, here.
That being said, that wont have work for your given scenario, as they are async tasks.
My try with your example:
'use strict';
const puppeteer = require('puppeteer');
const resWidth = 1366;
const resHeight = 1000;
let browser;
let page;
async function launchBrowser() {
browser = await puppeteer.launch({ headless: true }); //this "{ headless: true }" will ensure a browser window is not open a single time.
};
launchBrowser().then((x) => { // wait until browser has launched
data.forEach(async (d) => {
try {
page = await browser.newPage(); // the document is not very clear about this method, so I am still unsure if I could move this in the above launchBrowser() method.
await page.setViewport({ width: resWidth, height: resHeight });
await page.goto(d['url'], { timeout: 90000, waitUntil: 'networkidle' });
await page.screenshot({ path: './picdata/' + d['id'] + '.png', fullPage: true });
}
catch (e) {
console.log(e);
await browser.close(); // close browser if there is an error
}
});
})
.then(() => {
await browser.close(); // close browser finally.
});
Pro Tip: Start using let, const instead of var.
There is a great article on this, here

Resources