Puppeteer taking so long to load the page - node.js

I am trying to load a URL using Puppeteer and take the screenshot, somehow the page loading is taking too long. Here is my code
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch({
// headless: false,
args: [
`--window-size=42280,39960`,
// `--shm-size=1gb`,
// `--disable-dev-shm-usage`
]
});
const page = await browser.newPage();
await page.setViewport({
height: 39960,
width: 42280,
});
console.log('Page created');
await page.goto('https://www.google.com');
console.log('page loaded');
await page.screenshot({
path: 'example.png'
});
await browser.close();
})();
Did anyone face the same problem?

The reason of the failure is this:
await page.setViewport({
height: 39960,
width: 42280,
});
Error running your code. Error: Protocol error
(Page.captureScreenshot): Target closed.
Puppeteer can't take a screenshot with that resolution.

Related

Puppeteer - Unable to launch more than 2 browsers

Hi I have a simple puppeteer script that uses a different userDataDir per instance.
I'm unable to spawn more than 2 instances of puppeteer.
Here's the code:
ipcMain.on('request-mainprocess-action', (event, arg) => {
var taskData = arg[0];
var url = taskData[5];
var headlessChoice = arg[0][11];
var taskName = taskData[21];
var browserArgs = [
'--no-sandbox',
'--disable-setuid-sandbox',
'--window-size=1920x1080',
'--lang=en-US,en',
'--disable-infobars',
'--no-zygote',
'--renderer-process-limit=1',
'--no-first-run',
'--ignore-certificate-errors',
'--ignore-certificate-errors-spki-list',
'--disable-dev-shm-usage',
'--disable-extensions',
'--allow-insecure-localhost',
'--disable-blink-features=AutomationControlled',
'--remote-debugging-port=9222'
];
(async () => {
const browser = await puppeteer.launch({
userDataDir: tasksDataDirPath+'\\'+taskName,
headless: headlessChoice,
ignoreHTTPSErrors: true,
executablePath : arg[0][23],
args: browserArgs,
timeout: 3600000
});
const page = await browser.newPage();
const pagesArray = await browser.pages();
pagesArray[0].close();
await page.emulateTimezone("Asia/Singapore");
if(headlessChoice == true){
await page.setViewport({width: (width/2)-21, height: height-111});
}
if(headlessChoice == true){
await page.goto(url, { waitUntil: 'load', timeout: 3600000 });
}
else{
await page.goto("https://bot.sannysoft.com/", { waitUntil: 'load', timeout: 3600000 });
}
await new Promise(resolve => { });
})();
});
I'm able to only start 2 Instances, on the 3rd instance it just loads a blank page.
I'm also aware of puppeteer-cluster however I realize that with puppeteer-cluster i'm unable to set the userDataDir, so i'll have to use normal puppeteer :(
Anyone able to help?

Diagnosing errors on Puppeteer running on Heroku with screenshot

I can't figure out why Puppeteer is unable to locate elements when running on Heroku that it can locate while running locally.
I'd like to try and diagnose my issue by having Puppeteer take a screenshot of the page, but I can't figure out how to retrieve the screenshot so I can see it.
Where do I go on Heroku to view the screenshot which is generated?
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
let url =
"https://www.realtor.ca/map#ZoomLevel=14&Center=43.744046%2C-79.406789&LatitudeMax=43.76832&LongitudeMax=-79.36641&LatitudeMin=43.71977&LongitudeMin=-79.44717&view=list&Sort=6-D&PropertyTypeGroupID=1&PropertySearchTypeId=1&TransactionTypeId=2&PriceMax=1500000&BuildingTypeId=1&Currency=CAD";
await page.goto(url, { timeout: 60000, waitUntil: "domcontentloaded" });
await page.setViewport({ width: 1024, height: 800 });
await page.screenshot({
path: "./screenshot.jpg",
type: "jpeg",
fullPage: true
});
await page.close();
await browser.close();
})();

Click checkbox with puppeteer via element ID

I have this puppeteer code:
(async () => {
const browser = await puppeteer.launch({ args: ['--no-sandbox'] });
const page = await browser.newPage();
await page.goto("https://myurl.com/page");
await page.waitForSelector("#select-all-checkbox");
var bodyHTML = await page.content();
console.log(bodyHTML + "\n\n");
await page.click("#select-all-checkbox");
await page.close();
await browser.close();
})();
Logging the HTML to the console, I have verified the page I am accessing has this HTML:
<label><input type="checkbox" name="" id="select-all-checkbox" value="" checked=""><span class="ifaFs"><span data-testid="icon-checkbox-someselected" class="hdDWuD"></span></span></label>
I am receiving this error on the page.click line:
(node:3827) UnhandledPromiseRejectionWarning: Error: Node is either
not visible or not an HTMLElement
at ElementHandle._clickablePoint (/path/to/node_modules/puppeteer/lib/JSHandle.js:217:13)
at process._tickCallback (internal/process/next_tick.js:68:7)
-- ASYNC --
at ElementHandle. (/path/to/node_modules/puppeteer/lib/helper.js:111:15)
at DOMWorld.click (/path/to/node_modules/puppeteer/lib/DOMWorld.js:367:18)
at process._tickCallback (internal/process/next_tick.js:68:7)
-- ASYNC --
at Frame. (/path/to/node_modules/puppeteer/lib/helper.js:111:15)
at Page.click (/path/to/node_modules/puppeteer/lib/Page.js:1037:29)
With my code example above, this was how I resolved the problem.
(async () => {
const browser = await puppeteer.launch({ args: ['--no-sandbox'] });
const page = await browser.newPage();
await page.goto("https://myurl.com/page");
await page.waitForSelector("#select-all-checkbox");
await page.evaluate(() => {
document.querySelector("#select-all-checkbox").parentElement.click();
});
await page.close();
await browser.close();
})();
This approach worked for me:
(async () => {
const browser = await puppeteer.launch({ args: ['--no-sandbox'] });
const page = await browser.newPage();
await page.goto("https://myurl.com/page");
const checkboxEl = await page.waitForSelector("#select-all-checkbox");
checkboxEl.click();
await page.close();
await browser.close();
})();

Running a loop within an instance of Puppeteer

Just getting started with Puppeteer and i can launch the browser, go to a url, run a bunch of actions and then close the browser. What i am looking to see if i can do though is open the browser and loop over a set of actions in the same session.
I have a JSON object with urls i want to visit, so want to loop over that
// teams.js
module.exports = {
premier_league: [
{ team_name: "Team 1", url: "https://url-of-site/team_1"},
{ team_name: "Team 2", url: "https://url-of-site/team_2"}
]
}
My script to launch puppeteer is as follows
// index.js
const TEAM = require('./teams');
const puppeteer = require('puppeteer');
(async () => {
// Initialise Browser
const browser = await puppeteer.launch({headless: false});
const page = await browser.newPage();
await page.setViewport({
width: 1280,
height: 800
});
await page.goto('login page');
await page.click('login_box');
await page.keyboard.type('username');
await page.click('login_password');
await page.keyboard.type('password');
await page.click('login_button');
await page.waitForNavigation();
// Go To Team URL
await page.goto('Team URL')
await browser.close();
})();
So to loop over my JSON object I can use
Object.keys(TEAM['premier_league']).forEach(function(key) {
// Output url of each team
console.log(TEAM['premier_league'][key]['url'])
});
If i wrap my go to url with my loop, then page is no longer accessible
// index.js
const TEAM = require('./teams');
const puppeteer = require('puppeteer');
(async () => {
// Initialise Browser
const browser = await puppeteer.launch({headless: false});
const page = await browser.newPage();
await page.setViewport({
width: 1280,
height: 800
});
await page.goto('login page');
await page.click('login_box');
await page.keyboard.type('username');
await page.click('login_password');
await page.keyboard.type('password');
await page.click('login_button');
await page.waitForNavigation();
Object.keys(TEAM['premier_league']).forEach(function(key) {
// Go To Team URL
await page.goto(TEAM['premier_league'][key]['url'])
});
await browser.close();
})();
The actual error is
await page.goto(TEAM[args][key]['url'])
^^^^
SyntaxError: Unexpected identifier
Your Object.keys callback function need to use async as well in order to use await inside. Try to change as below
Object.keys(TEAM['premier_league']).forEach( async function(key) {
// Go To Team URL
await page.goto(TEAM['premier_league'][key]['url'])
});
Hope it helps

Node puppeteer take screenshot full page SPA

I have a single page application with scrolling. I am trying to take a screenshot of the whole page, but it only gives me the visible part. How can I make it shoot the whole page?
const browser = await puppeteer.launch(options);
const page = await browser.newPage();
await page.goto(url);
await page.screenshot({ path: 'page.png', fullPage: true })
await browser.close();
Actually what is happening here that your page might took a while to load in full. So we have to increase the timeout. And before taking screen shot take a short break of 500ms and then it will take full page screenshot. Try below code.
const puppeteer = require('puppeteer');
async function runTest() {
const browser = await puppeteer.launch({
headless: false,
timeout: 100000
});
const page = await browser.newPage();
const url = 'https://stackoverflow.com/questions/47616985/node-puppeteer-take-screenshot-full-page-spa';
await page.goto(url, {
waitUntil: 'networkidle2'
});
await page.waitFor(500);
await page.screenshot({ path: 'fullpage.png', fullPage: true });
browser.close();
}
runTest();
Your code looks correct. What are you passing in options?
Is the page actually launching? Try turning off headless browser to check:
const browser = await puppeteer.launch({
headless: false
});
The following works for me to take a full page screenshot:
const puppeteer = require('puppeteer');
async function run() {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto('https://yahoo.com');
await page.screenshot({ path: 'yahooPage.png', fullPage: true });
browser.close();
}
run();

Resources