Puppeteer - Unable to launch more than 2 browsers - node.js

Hi I have a simple puppeteer script that uses a different userDataDir per instance.
I'm unable to spawn more than 2 instances of puppeteer.
Here's the code:
ipcMain.on('request-mainprocess-action', (event, arg) => {
var taskData = arg[0];
var url = taskData[5];
var headlessChoice = arg[0][11];
var taskName = taskData[21];
var browserArgs = [
'--no-sandbox',
'--disable-setuid-sandbox',
'--window-size=1920x1080',
'--lang=en-US,en',
'--disable-infobars',
'--no-zygote',
'--renderer-process-limit=1',
'--no-first-run',
'--ignore-certificate-errors',
'--ignore-certificate-errors-spki-list',
'--disable-dev-shm-usage',
'--disable-extensions',
'--allow-insecure-localhost',
'--disable-blink-features=AutomationControlled',
'--remote-debugging-port=9222'
];
(async () => {
const browser = await puppeteer.launch({
userDataDir: tasksDataDirPath+'\\'+taskName,
headless: headlessChoice,
ignoreHTTPSErrors: true,
executablePath : arg[0][23],
args: browserArgs,
timeout: 3600000
});
const page = await browser.newPage();
const pagesArray = await browser.pages();
pagesArray[0].close();
await page.emulateTimezone("Asia/Singapore");
if(headlessChoice == true){
await page.setViewport({width: (width/2)-21, height: height-111});
}
if(headlessChoice == true){
await page.goto(url, { waitUntil: 'load', timeout: 3600000 });
}
else{
await page.goto("https://bot.sannysoft.com/", { waitUntil: 'load', timeout: 3600000 });
}
await new Promise(resolve => { });
})();
});
I'm able to only start 2 Instances, on the 3rd instance it just loads a blank page.
I'm also aware of puppeteer-cluster however I realize that with puppeteer-cluster i'm unable to set the userDataDir, so i'll have to use normal puppeteer :(
Anyone able to help?

Related

puppeteer-cluster error page.solveRecaptchas is not a function

i'm converting my puppeteer code to puppeteer cluster it was working just fine now i'm facing this error "page.solveRecaptchas is not a function" when trying to 2captcha to solve hcaptcha
this is the complete code that i wrote, it just takes data from an excel file and then filled them on the website
number of the pages depends
`
const xlsx = require('xlsx')
const puppeteer = require('puppeteer-extra')
const StealthPlugin = require('puppeteer-extra-plugin-stealth')
const RecaptchaPlugin = require('puppeteer-extra-plugin-recaptcha')
puppeteer.use(StealthPlugin())
puppeteer.use(
RecaptchaPlugin({
provider: {
id: '2captcha',
token: 'xxxxxxxxxxxx'
},
visualFeedback: true
})
)
const {executablePath} = require('puppeteer')
const { Cluster } = require('puppeteer-cluster');
(async () => {
const cluster = await Cluster.launch({
concurrency: Cluster.CONCURRENCY_PAGE,
maxConcurrency: 10,
timeout: 150 * 1000 ,
puppeteerOptions: {
headless: false,
args: ["--no-sandbox", "--disable-setuid-sandbox","--disable-web-security"],
defaultViewport: null,
executablePath: executablePath()
},
});
cluster.on('taskerror', (err, url) => {
console.error((new Date()).toJSON() + ` Error crawling ${url}: ${err.message}`);
});
//get excele data
let fileURL = 'C:/xxxx/xxxx/xxxxx/clients2.xlsx'
let workbook = xlsx.readFile(fileURL)
const sheet_name_list = workbook.SheetNames;
let clientsArr = xlsx.utils.sheet_to_json(workbook.Sheets[sheet_name_list[0]])
console.log(clientsArr);
await cluster.task(async ({ page, data: [email , password,appiontment, firstName , lastName ] }) => {
await page.goto('https://website.com/')
await page.waitForTimeout(1000)
// close popup 1
await page.waitForSelector('#IDBodyPanelapp > div.popup-appCloseIcon');
await page.click('#IDBodyPanelapp > div.popup-appCloseIcon')
//choose region
await page.waitForSelector('#juridiction');
if(region == 'ALGER'){
region = "15#Al#10"
await page.select('#juridiction', region);
}
else{
region = "14#Ora#9"
await page.select('#juridiction', region);
}
// click to get 2nd otp
page.$eval(`#verification_code`, element =>
element.click()
)
// close popup 2
await page.waitForTimeout(1500)
await page.waitForSelector('#IDBodyPanelapp > div.popup-appCloseIcon');
await page.click('#IDBodyPanelapp > div.popup-appCloseIcon')
//solve hcaptcha and submit form
await page.waitForTimeout(2000)
await page.waitForSelector('#category');
if(appiontment == 'Normal'){
appiontment = "Normal"
await page.select('#category', appiontment);
}
else{
appiontment = "Premuim"
await page.select('#category', appiontment);
}
await page.waitForTimeout(15000)
await page.solveRecaptchas()
await Promise.all([
page.waitForNavigation(),
//click submit
page.click(`#em_tr > div.col-sm-6 > input`)
])
await page.screenshot({ path: 'screenshot.png', fullPage: true })
});
clientsArr.map((data)=>{
cluster.execute([data.email, data.password , data.appiontment, data.firstname , data.lastPrenom ]);
})
// await cluster.idle();
// await cluster.close();
})();
`
i have already searched but there are no solutions
need help and thank you

how to trigger chrome extension function from puppeteer Cluster

I need to trigger a function in background.js in extension from puppeteer Cluster
here is my code :
const wait = (ms) => new Promise(resolve => setTimeout(resolve, ms));
(async () => {
const puppeteer = addExtra(puppeteerStream);
const pathToExtension = "C:/Users/Proj/test-extension";
const extensionId = 'lpnlgnlkloegindjk443sfdbjipo';
const cluster = await Cluster.launch({
concurrency: Cluster.CONCURRENCY_PAGE,
maxConcurrency: 50,
timeout: 86400000,
retryLimit: 10,
retryDelay: 1000,
puppeteer: puppeteer,
executablePath: executablePath(),
puppeteerOptions: {
executablePath: "C:/Program Files/Google/Chrome/Application/chrome.exe",
timeout: 120000,
headless: false,
pipe: true,
defaultViewport:null,
ignoreDefaultArgs: [
],
args: ["--no-sandbox", "--disable-setuid-sandbox", "--disable-notifications", "--allow-http-screen-capture", "--autoplay-policy=no-user-gesture-required",
'--disable-gpu',
'--disable-dev-shm-usage',
'--no-first-run',
'--enable-usermedia-screen-capturing',
'--auto-select-desktop-capture-source=pickme',
'--no-zygote',
`--whitelisted-extension-id=${extensionId}`,
'--enable-remote-extensions',
'--start-maximized',
`--disable-extensions-except=${pathToExtension}`,
`--load-extension=${pathToExtension}`,
]
}
});
// setup the function to be executed for each request
cluster.task(async ({ page, data: url }) => {
return start(page, url);
});
app.get('/', async function (req, res) {
res.send("index page");
});
app.get('/start', async function (req, res) { // expects URL to be given by ?url=...
try {
// run the task function for the URL
const resp = cluster.queue(req.query.url);
res.send(resp);
} catch (err) {
res.end('Error: ' + err.message);
}
});
async function start(page, Url) {
const context = page.browser().defaultBrowserContext();
await page.goto(Url);
}
})();
now I can run mt puppeteer and click a button in my extension manually but what I need is call the function (same as button click in extension) from puppeteer cluster automatically after await page.goto(Url);
I can do that when I'm using puppeteer without cluster like :
const browser = await puppeteer.launch({
headless: false,
args: [
`--whitelisted-extension-id=${extensionId}`,
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
`--disable-extensions-except=${pathToExtension}`,
`--load-extension=${pathToExtension}`,
],
});
const page = await browser.newPage();
await page.goto('https://www.youtube.com/', { waitUntil: 'networkidle2' });
const targets = await browser.targets();
const backgroundPageTarget = targets.find(target => target.type() === 'background_page' && target.url().startsWith(`chrome-extension://${extensionId}/`));
const backgroundPage = await backgroundPageTarget.page();
backgroundPage.on('console', msg => {
for (let i = 0; i < msg.args().length; i++) {
console.log(`${i}: ${msg.args()[i]}`);
}
});
await backgroundPage.evaluate(() => {
startFunction();
return Promise.resolve(42);
});
But I cannot get the targets in cluster any suggestion

Puppeteer Chromium can't browse

I have a node app to evaluate a web page. It was working fine until I changed the computer. After reinstalling, Puppeteer can't find any page, getting this message:
RESULT_CODE_NORMAL_EXIT_EXP3
async function Prueba() {
const Apify = require('apify');
Apify.main(async () => {
puppeteer=require('puppeteer')
const browser = await puppeteer.launch({
ignoreDefaultArgs: ['--disable-extensions'], headless: false
});
const page = await browser.newPage();
await page.goto("https://www.smoreno.com.ar");
})
Use the --no-sandbox argument:
const browser = await puppeteer.launch({
ignoreDefaultArgs: ['--disable-extensions'],
headless: false,
args: ['--no-sandbox'],
});

How to login to google account with playwright?

I have following source code and run it in headful mode.
I can input email address.
But, after that, there is message that saying "Couldn't sign you in.For your protection, you can't sign in from this device. Try again later, or sign in from another device.".
Do I need to set additional header or something else?
Here is my source code.
const playwright = require('playwright');
const cookiePath = '/home/ubuntu/.config/chromium/Default';
browser['chromium'] = await playwright['chromium'].launchPersistentContext(cookiePath,{
headless: false,
args: [
`--disable-extensions-except=${pathToExtension}`,
`--load-extension=${pathToExtension}`,
],
});
const page = await browser['chromium'].newPage();
const login_url = "https://accounts.google.com/signin/v2/identifier?hl=ja&flowName=GlifWebSignIn&flowEntry=ServiceLogin";
await page.goto(login_url);
await page.fill('#identifierId',userinfo['id']);
await page.click("#identifierNext");
await page.fill('[name=password]',userinfo['password']);
await page.click("#passwordNext");
My solution:
const { chromium } = require("playwright");
(async () => {
const browser = await chromium.launch({
headless: false,
args: ["--disable-dev-shm-usage"],
});
const context = await browser.newContext({});
const page = await context.newPage();
const navigationPromise = page.waitForNavigation({
waitUntil: "domcontentloaded",
});
await page.setDefaultNavigationTimeout(0);
await page.goto(
"https://accounts.google.com/signin/v2/identifier?hl=en&flowName=GlifWebSignIn&flowEntry=ServiceLogin"
);
await navigationPromise;
await page.waitForSelector('input[type="email"]');
await page.type('input[type="email"]', "youremail");
await page.click("#identifierNext");
await page.waitForSelector('input[type="password"]', { visible: true });
await page.type('input[type="password"]', "yourpassword");
await page.waitForSelector("#passwordNext", { visible: true });
await page.click("#passwordNext");
await navigationPromise;
//you are in
I think you can search for login to google with Puppeteer also.
This works for me:
add --disable-blink-features=AutomationControlled to your args.
This works for me:
const browser = await playwright.chromium.launch({
ignoreDefaultArgs: ['--disable-component-extensions-with-background-pages']
})

Error: Protocol error (Performance.enable): Target closed

I am running an application for auto-login into Amazon and Walmart but sometimes the script is throwing protocol error. I mentioned the code for launching the browser below -
var browser = await puppeteer.launch({
headless: false,
ignoreHTTPSErrors: true,
args: [
'--disable-background-timer-throttling',
'--disable-backgrounding-occluded-windows',
'--disable-renderer-backgrounding',
'--no-sandbox',
'--disable-setuid-sandbox',
'--ignore-ssl-errors'
],
slowMo: Math.round(Math.random() * 10),
userDataDir: dir
});
var page = await browser.newPage();
await page.setViewport({ width: 1280, height: 1024, deviceScaleFactor: 1 });
page.on('console', msg => {
if (msg._type == "log")
console.log(msg._text);
});
page.on('error', async (error) => {
if (error.toString().match(/Page crashed/i)) {
console.log("<--------- Page crashed ------------->");
await browser.close();
}
});

Resources