How to store console output as a variable Puppeteer - node.js

I'm having an issue storing the console output as a variable. I've created a short demo less than 20 lines of code when executed will output "hello how are you" to my node console, I'd like to store this text as a variable. Thank you
const puppeteer = require('puppeteer');
(async function main() {
const browser = await puppeteer.launch({
headless: false,
defaultViewport: null
});
const Page = await browser.newPage();
await Page.goto('https://www.google.com/');
await Page.on('console', code => console.log(code.text()));
await Page.evaluate(_ => {
var b = "123"
console.log(b);
});
})();

You could use a promise, which will be resolved on the event, and then await for that.
const browser = await puppeteer.launch({
headless: false,
defaultViewport: null
});
let resolve;
var consoleLogPromise = new Promise(x => resolve = x);
const Page = await browser.newPage();
await Page.goto('https://www.google.com/');
await Page.on('console', code => resolve(code.text()));
await Page.evaluate(test);
var output = await consoleLogPromise;
console.log(output);

Related

Ensure element is exist in the page, to ensure is logged in or any other action

I'm using puppeteer, node js I'm writing a script to log in it successfully logged, but I need to write code to ensure it successfully logged in by ensure some element is present, I need to write if element is present focused on it or if it not present print "element is not present".
This the script I've written:
const puppeteer = require('puppeteer');
async function log_in() {
const browser = await puppeteer.launch({
headless: false,
args: ['--Window-size=1929,1170', '--Window-position=0,0']
});
const page = await browser.newPage();
await page.setViewport({ 'width': 1366, 'height': 768 });
await page.goto(URL, { waitUntil: 'networkidle2' });
await page.click('.fancybox-item');
await delay(1000);
// fun for waiting
function delay(time) {
return new Promise(function (resolve) {
setTimeout(resolve, time)
});
}
const UserName = 'xxxxxxxxxx';
const Password = '222222';
page.click('selector');
await delay(1000);
//Focus on user name
await page.focus('selector');
await delay(2000);
await page.type('selector', UserName);
await delay(2000);
//Focus on password
await page.focus('selector');
await page.type('selector'', Password);
// Clicking the link will indirectly cause a navigation
page.click('selector');
await delay(5000);
if (await page.waitForSelector('selector')) {
console.log("found")
} else console.log("not found");
await delay(5000);
} log_in();
Note: the code is work successfully but it doesn't print anything in console output.
You can use page.$(selector). It returns an element or null if no matches the selector. And you need to wait for navigation after you click on submit button using page.waitForNavigation([options]) probably.
...
// Clicking the link will indirectly cause a navigation
await page.click('selector')
await page.waitForNavigation()
const element = await page.$('selector')
if (element) {
console.log ('found')
} else {
console.log ('not found')
}
...
First of all, I'm afraid you have lacks experience in working with asynchronous Javascript. It's crucial to understand this concept since most of the functions of Puppeteer is async. The same other libraries or tools like Selenium.
Try to learn more about that https://developer.mozilla.org/en-US/docs/Learn/JavaScript/Asynchronous
Answering on your problem: I think it'd be to use waitForSelector() instead of document.querySelector (page.$) as #drobnikj suggested. First of all waitForSelector() waits for the element so you can handle elements that might load asynchronously. It might not be the case on your page but in general, it's a better approach I believe. All that you need to do is to handle a potential exception.
My proposition is:
const puppeteer = require('puppeteer');
const browser = await puppeteer.launch({
headless: false,
args: ['--Window-size=1929,1170', '--Window-position=0,0']
});
const page = await browser.newPage()[0];
async function logIn(loginPageUrl, userName, password) {
const fancyBoxItemSelector = '.fancybox-item';
const loginInputSelector = '';
const passwordInputSelector = '';
const loginRedirectionLink = '';
const expectedSelectorAfterLogin = '';
await page.goto(URL, { waitUntil: 'networkidle2' });
await page.waitForSelector(fancyBoxItemSelector);
await page.click(fancyBoxItemSelector);
await page.waitForSelector(loginInputSelector);
await page.click(loginInputSelector);
await page.type(loginInputSelector, userName);
await page.waitForSelector(passwordInputSelector);
await page.click(passwordInputSelector);
await page.type(passwordInputSelector, password);
await page.waitForSelector(loginRedirectionLink);
const response = page.waitForNavitation({ waitUntil: 'networkidle2' });
await page.click(loginRedirectionLink);
await response;
try {
await page.waitForSelector(expectedSelectorAfterLogin);
console.log('Selector found!');
} catch (err) {
console.log('Selector not found!');
}
}
(async () => {
await logIn('', '', '')
})()
I allowed myself to polish your code. I hope it's clear what I have done here.

How to use multiple link in .goto(url) puppeteer?

const puppeteer = require("puppeteer");
(async () => {
try {
const browser = await puppeteer.launch({ headless: true});
const page = await browser.newPage();
await page.goto('url/c-0');
await page.waitForSelector('.box-chap');
const element = await page.$(".box-chap");
const content = await page.evaluate(element => element.textContent, element);
console.log(content + "chapter");
} catch (error) {
}
})();
Hi all, currently i wanna to loop then :
url/c-0'
url/c-1'
url/c-2'
.....
please give me solutions thanks all.
Just loop your job. You could create a forloop to loop all chapters which you want to crawl (if your chapter urls have the same format).
const puppeteer = require("puppeteer");
(async () => {
try {
const browser = await puppeteer.launch({ headless: true });
const page = await browser.newPage();
const endOfChapterNumber = 10; // number of chapters
for (const c = 0; c <= endOfChapterNumber; c++) {
const chapterUrl = 'url/c-' + c;
await page.goto(chapterUrl);
await page.waitForSelector('.box-chap');
const element = await page.$(".box-chap");
const content = await page.evaluate(element => element.textContent, element);
console.log(content + " chapter: " + c);
}
} catch (error) {
}
})();

can we scrape data for a website which requires authentication in Node.js?

I am trying to web scraping using node.js for a website that requires authentication. Is there any way to perform this in node.js?
You can try puppeteer:
'use strict';
const puppeteer = require('puppeteer');
(async function main() {
try {
const browser = await puppeteer.launch({ headless: false });
const [page] = await browser.pages();
await page.goto('https://httpbin.org/forms/post');
await page.type('input[name="custname"]', 'user');
await page.type('input[name="custemail"]', 'user#example.com');
await Promise.all([
page.click('button'),
page.waitForNavigation(),
]);
await page.waitForSelector('pre');
const data = await page.evaluate(() => {
return document.querySelector('pre').innerText;
});
console.log(JSON.parse(data).form.custemail);
await browser.close();
} catch (err) {
console.error(err);
}
})();
===============================
For the side from the comment:
'use strict';
const puppeteer = require('puppeteer');
(async function main() {
try {
const browser = await puppeteer.launch({ headless: false });
const [page] = await browser.pages();
page.setDefaultTimeout(0);
await page.goto('https://www.trxade.com/market/login');
await page.waitForSelector('input[name="deaNumber"]');
await page.type('input[name="deaNumber"]', '...');
await page.type('input[name="password"]', '...');
await Promise.all([
page.click('input[name="form_login_proceed"]'),
page.waitForNavigation(),
]);
// await browser.close();
} catch (err) {
console.error(err);
}
})();

I would like to know how to use $$eval from Puppeteer

I can not use $$ eval well.
(async() => {
const browser = await puppeteer.launch({ executablePath: chrome ,args: [chromeArgs]});
const page = await browser.newPage();
await page.goto('https://www.example.com/', {waitUntil: "domcontentloaded"});
var links = await page.evaluate(() => {
var hreflist = [];
var tags = document.querySelectorAll("p");
Array.prototype.forEach.call(tags, (tag)=>{
hreflist.push(tag.textContent);
});
return hreflist;
});
console.log(util.inspect(links, false, null));
browser.close();
})();
I would like to do the same thing as the source code written above.
(async() => {
const browser = await puppeteer.launch({ executablePath: chrome ,args: [chromeArgs]});
const page = await browser.newPage();
await page.goto('https://www.example.com/', {waitUntil: "domcontentloaded"});
var links = await page.$$eval('p', list => {
list.map(data => {
data.textContent
})
});
console.log(util.inspect(links, false, null));
browser.close();
})();
The execution result of $$eval() is undefined.
https://pptr.dev/#?product=Puppeteer&version=v1.10.0&show=api-pageevalselector-pagefunction-args
I saw the official document.
However, we can not confirm the problem.
You forgot to return the value. this will work
var links = await page.$$eval('p', list => list.map(data => data.textContent));

use same browser instance?

Hi I am trying to make a screenshot service
const puppeteer = require('puppeteer');
var resWidth = 1366;
var resHeight = 1000;
var browser;
(async () => {
browser = await puppeteer.launch({ignoreHTTPSErrors: true});
});
and when I receive a work I try to do
data.forEach(function(d){
try {
console.log(d["url"]);
(async () => {
var page = await browser.newPage();
await page.setViewport({width: resWidth, height: resHeight});
await page.goto(d["url"], {timeout: 90000, waitUntil: 'networkidle'});
await page.screenshot({path: './picdata/' + d['id'] + '.png' ,fullPage: true});
await page.close();
})();
} catch(e) {}
});
but I can't... here is the error:
(node:40596) UnhandledPromiseRejectionWarning: Unhandled promise rejection (rejection id: 7): TypeError: Cannot read property 'newPage' of undefined
I don't want to open a new browser for each screenshot launching browser takes time and requires more memory?
what should I do?
The problem:
(async () => {
browser = await puppeteer.launch({ignoreHTTPSErrors: true});
});
This code never gets executed. Why? because it's not a true closure.
More on closures, here.
That being said, that wont have work for your given scenario, as they are async tasks.
My try with your example:
'use strict';
const puppeteer = require('puppeteer');
const resWidth = 1366;
const resHeight = 1000;
let browser;
let page;
async function launchBrowser() {
browser = await puppeteer.launch({ headless: true }); //this "{ headless: true }" will ensure a browser window is not open a single time.
};
launchBrowser().then((x) => { // wait until browser has launched
data.forEach(async (d) => {
try {
page = await browser.newPage(); // the document is not very clear about this method, so I am still unsure if I could move this in the above launchBrowser() method.
await page.setViewport({ width: resWidth, height: resHeight });
await page.goto(d['url'], { timeout: 90000, waitUntil: 'networkidle' });
await page.screenshot({ path: './picdata/' + d['id'] + '.png', fullPage: true });
}
catch (e) {
console.log(e);
await browser.close(); // close browser if there is an error
}
});
})
.then(() => {
await browser.close(); // close browser finally.
});
Pro Tip: Start using let, const instead of var.
There is a great article on this, here

Resources