How to run latest chromium/puppeteer on firebase - node.js

I do have a working application on firebase, that renders code and delivers a png image as output using puppeteer. However I would like to have a newer chromium version running (new features).
There are newer puppeteer versions, but no newer version of chrome-aws-lambda then 10.1.0
and that uses chromium 92.0.4512.0.
Is this project dead (last update 9 month ago), or is there a other package that I can use?
For reference, this is the code that runs on firebase.
const functions = require("firebase-functions");
const chromium = require("chrome-aws-lambda");
const puppeteer = chromium.puppeteer;
const admin = require("firebase-admin");
admin.initializeApp();
exports.preview = functions
.runWith({ memory: "512MB", timeoutSeconds: 10 })
.https.onRequest(async (req, res) => {
const browser = await puppeteer.launch({
args: chromium.args,
defaultViewport: { width: 400, height: 300 },
executablePath: await chromium.executablePath,
headless: true,
});
const {
query: { q = "" },
} = req;
const page = await browser.newPage();
await page.setContent(q);
const screenshot = await page.screenshot();
await browser.close();
res.header({ "Content-Type": "image/png" });
res.end(screenshot, "binary");
});

I have worked around that by using puppeteer directly without -core. So there is no chrome-aws-lambda dependency needed anymore and I can use the latest puppeteer version.
So this is how it works now if anyone else needs that:
const functions = require("firebase-functions");
const puppeteer = require('puppeteer');
// admin seems to be necessary in order
// to run the function via `firebase emulators`
const admin = require("firebase-admin");
admin.initializeApp();
exports.preview = functions
.runWith({ memory: "512MB", timeoutSeconds: 10 })
.https.onRequest(async (req, res) => {
const browser = await puppeteer.launch({
defaultViewport: { width: 400, height: 300 },
headless: true
});
const {
query: { q = "" },
} = req;
const page = await browser.newPage();
await page.setContent(q);
const screenshot = await page.screenshot();
await browser.close();
res.header({ "Content-Type": "image/png" });
res.end(screenshot, "binary");
});

Related

how to trigger chrome extension function from puppeteer Cluster

I need to trigger a function in background.js in extension from puppeteer Cluster
here is my code :
const wait = (ms) => new Promise(resolve => setTimeout(resolve, ms));
(async () => {
const puppeteer = addExtra(puppeteerStream);
const pathToExtension = "C:/Users/Proj/test-extension";
const extensionId = 'lpnlgnlkloegindjk443sfdbjipo';
const cluster = await Cluster.launch({
concurrency: Cluster.CONCURRENCY_PAGE,
maxConcurrency: 50,
timeout: 86400000,
retryLimit: 10,
retryDelay: 1000,
puppeteer: puppeteer,
executablePath: executablePath(),
puppeteerOptions: {
executablePath: "C:/Program Files/Google/Chrome/Application/chrome.exe",
timeout: 120000,
headless: false,
pipe: true,
defaultViewport:null,
ignoreDefaultArgs: [
],
args: ["--no-sandbox", "--disable-setuid-sandbox", "--disable-notifications", "--allow-http-screen-capture", "--autoplay-policy=no-user-gesture-required",
'--disable-gpu',
'--disable-dev-shm-usage',
'--no-first-run',
'--enable-usermedia-screen-capturing',
'--auto-select-desktop-capture-source=pickme',
'--no-zygote',
`--whitelisted-extension-id=${extensionId}`,
'--enable-remote-extensions',
'--start-maximized',
`--disable-extensions-except=${pathToExtension}`,
`--load-extension=${pathToExtension}`,
]
}
});
// setup the function to be executed for each request
cluster.task(async ({ page, data: url }) => {
return start(page, url);
});
app.get('/', async function (req, res) {
res.send("index page");
});
app.get('/start', async function (req, res) { // expects URL to be given by ?url=...
try {
// run the task function for the URL
const resp = cluster.queue(req.query.url);
res.send(resp);
} catch (err) {
res.end('Error: ' + err.message);
}
});
async function start(page, Url) {
const context = page.browser().defaultBrowserContext();
await page.goto(Url);
}
})();
now I can run mt puppeteer and click a button in my extension manually but what I need is call the function (same as button click in extension) from puppeteer cluster automatically after await page.goto(Url);
I can do that when I'm using puppeteer without cluster like :
const browser = await puppeteer.launch({
headless: false,
args: [
`--whitelisted-extension-id=${extensionId}`,
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
`--disable-extensions-except=${pathToExtension}`,
`--load-extension=${pathToExtension}`,
],
});
const page = await browser.newPage();
await page.goto('https://www.youtube.com/', { waitUntil: 'networkidle2' });
const targets = await browser.targets();
const backgroundPageTarget = targets.find(target => target.type() === 'background_page' && target.url().startsWith(`chrome-extension://${extensionId}/`));
const backgroundPage = await backgroundPageTarget.page();
backgroundPage.on('console', msg => {
for (let i = 0; i < msg.args().length; i++) {
console.log(`${i}: ${msg.args()[i]}`);
}
});
await backgroundPage.evaluate(() => {
startFunction();
return Promise.resolve(42);
});
But I cannot get the targets in cluster any suggestion

Puppeteer - Unable to launch more than 2 browsers

Hi I have a simple puppeteer script that uses a different userDataDir per instance.
I'm unable to spawn more than 2 instances of puppeteer.
Here's the code:
ipcMain.on('request-mainprocess-action', (event, arg) => {
var taskData = arg[0];
var url = taskData[5];
var headlessChoice = arg[0][11];
var taskName = taskData[21];
var browserArgs = [
'--no-sandbox',
'--disable-setuid-sandbox',
'--window-size=1920x1080',
'--lang=en-US,en',
'--disable-infobars',
'--no-zygote',
'--renderer-process-limit=1',
'--no-first-run',
'--ignore-certificate-errors',
'--ignore-certificate-errors-spki-list',
'--disable-dev-shm-usage',
'--disable-extensions',
'--allow-insecure-localhost',
'--disable-blink-features=AutomationControlled',
'--remote-debugging-port=9222'
];
(async () => {
const browser = await puppeteer.launch({
userDataDir: tasksDataDirPath+'\\'+taskName,
headless: headlessChoice,
ignoreHTTPSErrors: true,
executablePath : arg[0][23],
args: browserArgs,
timeout: 3600000
});
const page = await browser.newPage();
const pagesArray = await browser.pages();
pagesArray[0].close();
await page.emulateTimezone("Asia/Singapore");
if(headlessChoice == true){
await page.setViewport({width: (width/2)-21, height: height-111});
}
if(headlessChoice == true){
await page.goto(url, { waitUntil: 'load', timeout: 3600000 });
}
else{
await page.goto("https://bot.sannysoft.com/", { waitUntil: 'load', timeout: 3600000 });
}
await new Promise(resolve => { });
})();
});
I'm able to only start 2 Instances, on the 3rd instance it just loads a blank page.
I'm also aware of puppeteer-cluster however I realize that with puppeteer-cluster i'm unable to set the userDataDir, so i'll have to use normal puppeteer :(
Anyone able to help?

How to download PDF blob using puppeteer?

When the download button is clicked, a new tab is opened where the user can view a PDF statement.
This new tab has a URL starting with blob:, e.g.: blob:https://some-domain.com/statement-id.
How could I download this PDF statement to the file system?
Note: I'm using { headless: false } mode.
Trying to simulate the case:
import puppeteer from 'puppeteer';
import { writeFileSync } from 'fs';
// Minimal PDF from https://github.com/mathiasbynens/small#documents
const minimalPdf = `%PDF-1.
1 0 obj<</Pages 2 0 R>>endobj
2 0 obj<</Kids[3 0 R]/Count 1>>endobj
3 0 obj<</Parent 2 0 R>>endobj
trailer <</Root 1 0 R>>`;
const browser = await puppeteer.launch({ headless: false, defaultViewport: null });
try {
const [page] = await browser.pages();
await page.goto('http://example.com/');
await page.evaluate((pdf) => {
const url = URL.createObjectURL(new Blob([pdf], {type: 'application/pdf'}));
window.open(url);
}, minimalPdf);
const newTarget = await page.browserContext().waitForTarget(
target => target.url().startsWith('blob:')
);
const newPage = await newTarget.page();
const blobUrl = newPage.url();
page.once('response', async (response) => {
console.log(response.url());
const pdfBuffer = await response.buffer();
console.log(pdfBuffer.toString());
console.log('same:', pdfBuffer.toString() === minimalPdf);
writeFileSync('minimal.pdf', pdfBuffer);
});
await page.evaluate((url) => { fetch(url); }, blobUrl);
} catch(err) { console.error(err); } finally { /* await browser.close(); */ }

How to login to google account with playwright?

I have following source code and run it in headful mode.
I can input email address.
But, after that, there is message that saying "Couldn't sign you in.For your protection, you can't sign in from this device. Try again later, or sign in from another device.".
Do I need to set additional header or something else?
Here is my source code.
const playwright = require('playwright');
const cookiePath = '/home/ubuntu/.config/chromium/Default';
browser['chromium'] = await playwright['chromium'].launchPersistentContext(cookiePath,{
headless: false,
args: [
`--disable-extensions-except=${pathToExtension}`,
`--load-extension=${pathToExtension}`,
],
});
const page = await browser['chromium'].newPage();
const login_url = "https://accounts.google.com/signin/v2/identifier?hl=ja&flowName=GlifWebSignIn&flowEntry=ServiceLogin";
await page.goto(login_url);
await page.fill('#identifierId',userinfo['id']);
await page.click("#identifierNext");
await page.fill('[name=password]',userinfo['password']);
await page.click("#passwordNext");
My solution:
const { chromium } = require("playwright");
(async () => {
const browser = await chromium.launch({
headless: false,
args: ["--disable-dev-shm-usage"],
});
const context = await browser.newContext({});
const page = await context.newPage();
const navigationPromise = page.waitForNavigation({
waitUntil: "domcontentloaded",
});
await page.setDefaultNavigationTimeout(0);
await page.goto(
"https://accounts.google.com/signin/v2/identifier?hl=en&flowName=GlifWebSignIn&flowEntry=ServiceLogin"
);
await navigationPromise;
await page.waitForSelector('input[type="email"]');
await page.type('input[type="email"]', "youremail");
await page.click("#identifierNext");
await page.waitForSelector('input[type="password"]', { visible: true });
await page.type('input[type="password"]', "yourpassword");
await page.waitForSelector("#passwordNext", { visible: true });
await page.click("#passwordNext");
await navigationPromise;
//you are in
I think you can search for login to google with Puppeteer also.
This works for me:
add --disable-blink-features=AutomationControlled to your args.
This works for me:
const browser = await playwright.chromium.launch({
ignoreDefaultArgs: ['--disable-component-extensions-with-background-pages']
})

Running a loop within an instance of Puppeteer

Just getting started with Puppeteer and i can launch the browser, go to a url, run a bunch of actions and then close the browser. What i am looking to see if i can do though is open the browser and loop over a set of actions in the same session.
I have a JSON object with urls i want to visit, so want to loop over that
// teams.js
module.exports = {
premier_league: [
{ team_name: "Team 1", url: "https://url-of-site/team_1"},
{ team_name: "Team 2", url: "https://url-of-site/team_2"}
]
}
My script to launch puppeteer is as follows
// index.js
const TEAM = require('./teams');
const puppeteer = require('puppeteer');
(async () => {
// Initialise Browser
const browser = await puppeteer.launch({headless: false});
const page = await browser.newPage();
await page.setViewport({
width: 1280,
height: 800
});
await page.goto('login page');
await page.click('login_box');
await page.keyboard.type('username');
await page.click('login_password');
await page.keyboard.type('password');
await page.click('login_button');
await page.waitForNavigation();
// Go To Team URL
await page.goto('Team URL')
await browser.close();
})();
So to loop over my JSON object I can use
Object.keys(TEAM['premier_league']).forEach(function(key) {
// Output url of each team
console.log(TEAM['premier_league'][key]['url'])
});
If i wrap my go to url with my loop, then page is no longer accessible
// index.js
const TEAM = require('./teams');
const puppeteer = require('puppeteer');
(async () => {
// Initialise Browser
const browser = await puppeteer.launch({headless: false});
const page = await browser.newPage();
await page.setViewport({
width: 1280,
height: 800
});
await page.goto('login page');
await page.click('login_box');
await page.keyboard.type('username');
await page.click('login_password');
await page.keyboard.type('password');
await page.click('login_button');
await page.waitForNavigation();
Object.keys(TEAM['premier_league']).forEach(function(key) {
// Go To Team URL
await page.goto(TEAM['premier_league'][key]['url'])
});
await browser.close();
})();
The actual error is
await page.goto(TEAM[args][key]['url'])
^^^^
SyntaxError: Unexpected identifier
Your Object.keys callback function need to use async as well in order to use await inside. Try to change as below
Object.keys(TEAM['premier_league']).forEach( async function(key) {
// Go To Team URL
await page.goto(TEAM['premier_league'][key]['url'])
});
Hope it helps

Resources