puppeteer-cluster error page.solveRecaptchas is not a function - bots

i'm converting my puppeteer code to puppeteer cluster it was working just fine now i'm facing this error "page.solveRecaptchas is not a function" when trying to 2captcha to solve hcaptcha
this is the complete code that i wrote, it just takes data from an excel file and then filled them on the website
number of the pages depends
`
const xlsx = require('xlsx')
const puppeteer = require('puppeteer-extra')
const StealthPlugin = require('puppeteer-extra-plugin-stealth')
const RecaptchaPlugin = require('puppeteer-extra-plugin-recaptcha')
puppeteer.use(StealthPlugin())
puppeteer.use(
RecaptchaPlugin({
provider: {
id: '2captcha',
token: 'xxxxxxxxxxxx'
},
visualFeedback: true
})
)
const {executablePath} = require('puppeteer')
const { Cluster } = require('puppeteer-cluster');
(async () => {
const cluster = await Cluster.launch({
concurrency: Cluster.CONCURRENCY_PAGE,
maxConcurrency: 10,
timeout: 150 * 1000 ,
puppeteerOptions: {
headless: false,
args: ["--no-sandbox", "--disable-setuid-sandbox","--disable-web-security"],
defaultViewport: null,
executablePath: executablePath()
},
});
cluster.on('taskerror', (err, url) => {
console.error((new Date()).toJSON() + ` Error crawling ${url}: ${err.message}`);
});
//get excele data
let fileURL = 'C:/xxxx/xxxx/xxxxx/clients2.xlsx'
let workbook = xlsx.readFile(fileURL)
const sheet_name_list = workbook.SheetNames;
let clientsArr = xlsx.utils.sheet_to_json(workbook.Sheets[sheet_name_list[0]])
console.log(clientsArr);
await cluster.task(async ({ page, data: [email , password,appiontment, firstName , lastName ] }) => {
await page.goto('https://website.com/')
await page.waitForTimeout(1000)
// close popup 1
await page.waitForSelector('#IDBodyPanelapp > div.popup-appCloseIcon');
await page.click('#IDBodyPanelapp > div.popup-appCloseIcon')
//choose region
await page.waitForSelector('#juridiction');
if(region == 'ALGER'){
region = "15#Al#10"
await page.select('#juridiction', region);
}
else{
region = "14#Ora#9"
await page.select('#juridiction', region);
}
// click to get 2nd otp
page.$eval(`#verification_code`, element =>
element.click()
)
// close popup 2
await page.waitForTimeout(1500)
await page.waitForSelector('#IDBodyPanelapp > div.popup-appCloseIcon');
await page.click('#IDBodyPanelapp > div.popup-appCloseIcon')
//solve hcaptcha and submit form
await page.waitForTimeout(2000)
await page.waitForSelector('#category');
if(appiontment == 'Normal'){
appiontment = "Normal"
await page.select('#category', appiontment);
}
else{
appiontment = "Premuim"
await page.select('#category', appiontment);
}
await page.waitForTimeout(15000)
await page.solveRecaptchas()
await Promise.all([
page.waitForNavigation(),
//click submit
page.click(`#em_tr > div.col-sm-6 > input`)
])
await page.screenshot({ path: 'screenshot.png', fullPage: true })
});
clientsArr.map((data)=>{
cluster.execute([data.email, data.password , data.appiontment, data.firstname , data.lastPrenom ]);
})
// await cluster.idle();
// await cluster.close();
})();
`
i have already searched but there are no solutions
need help and thank you

Related

how to trigger chrome extension function from puppeteer Cluster

I need to trigger a function in background.js in extension from puppeteer Cluster
here is my code :
const wait = (ms) => new Promise(resolve => setTimeout(resolve, ms));
(async () => {
const puppeteer = addExtra(puppeteerStream);
const pathToExtension = "C:/Users/Proj/test-extension";
const extensionId = 'lpnlgnlkloegindjk443sfdbjipo';
const cluster = await Cluster.launch({
concurrency: Cluster.CONCURRENCY_PAGE,
maxConcurrency: 50,
timeout: 86400000,
retryLimit: 10,
retryDelay: 1000,
puppeteer: puppeteer,
executablePath: executablePath(),
puppeteerOptions: {
executablePath: "C:/Program Files/Google/Chrome/Application/chrome.exe",
timeout: 120000,
headless: false,
pipe: true,
defaultViewport:null,
ignoreDefaultArgs: [
],
args: ["--no-sandbox", "--disable-setuid-sandbox", "--disable-notifications", "--allow-http-screen-capture", "--autoplay-policy=no-user-gesture-required",
'--disable-gpu',
'--disable-dev-shm-usage',
'--no-first-run',
'--enable-usermedia-screen-capturing',
'--auto-select-desktop-capture-source=pickme',
'--no-zygote',
`--whitelisted-extension-id=${extensionId}`,
'--enable-remote-extensions',
'--start-maximized',
`--disable-extensions-except=${pathToExtension}`,
`--load-extension=${pathToExtension}`,
]
}
});
// setup the function to be executed for each request
cluster.task(async ({ page, data: url }) => {
return start(page, url);
});
app.get('/', async function (req, res) {
res.send("index page");
});
app.get('/start', async function (req, res) { // expects URL to be given by ?url=...
try {
// run the task function for the URL
const resp = cluster.queue(req.query.url);
res.send(resp);
} catch (err) {
res.end('Error: ' + err.message);
}
});
async function start(page, Url) {
const context = page.browser().defaultBrowserContext();
await page.goto(Url);
}
})();
now I can run mt puppeteer and click a button in my extension manually but what I need is call the function (same as button click in extension) from puppeteer cluster automatically after await page.goto(Url);
I can do that when I'm using puppeteer without cluster like :
const browser = await puppeteer.launch({
headless: false,
args: [
`--whitelisted-extension-id=${extensionId}`,
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
`--disable-extensions-except=${pathToExtension}`,
`--load-extension=${pathToExtension}`,
],
});
const page = await browser.newPage();
await page.goto('https://www.youtube.com/', { waitUntil: 'networkidle2' });
const targets = await browser.targets();
const backgroundPageTarget = targets.find(target => target.type() === 'background_page' && target.url().startsWith(`chrome-extension://${extensionId}/`));
const backgroundPage = await backgroundPageTarget.page();
backgroundPage.on('console', msg => {
for (let i = 0; i < msg.args().length; i++) {
console.log(`${i}: ${msg.args()[i]}`);
}
});
await backgroundPage.evaluate(() => {
startFunction();
return Promise.resolve(42);
});
But I cannot get the targets in cluster any suggestion

Puppeteer - Unable to launch more than 2 browsers

Hi I have a simple puppeteer script that uses a different userDataDir per instance.
I'm unable to spawn more than 2 instances of puppeteer.
Here's the code:
ipcMain.on('request-mainprocess-action', (event, arg) => {
var taskData = arg[0];
var url = taskData[5];
var headlessChoice = arg[0][11];
var taskName = taskData[21];
var browserArgs = [
'--no-sandbox',
'--disable-setuid-sandbox',
'--window-size=1920x1080',
'--lang=en-US,en',
'--disable-infobars',
'--no-zygote',
'--renderer-process-limit=1',
'--no-first-run',
'--ignore-certificate-errors',
'--ignore-certificate-errors-spki-list',
'--disable-dev-shm-usage',
'--disable-extensions',
'--allow-insecure-localhost',
'--disable-blink-features=AutomationControlled',
'--remote-debugging-port=9222'
];
(async () => {
const browser = await puppeteer.launch({
userDataDir: tasksDataDirPath+'\\'+taskName,
headless: headlessChoice,
ignoreHTTPSErrors: true,
executablePath : arg[0][23],
args: browserArgs,
timeout: 3600000
});
const page = await browser.newPage();
const pagesArray = await browser.pages();
pagesArray[0].close();
await page.emulateTimezone("Asia/Singapore");
if(headlessChoice == true){
await page.setViewport({width: (width/2)-21, height: height-111});
}
if(headlessChoice == true){
await page.goto(url, { waitUntil: 'load', timeout: 3600000 });
}
else{
await page.goto("https://bot.sannysoft.com/", { waitUntil: 'load', timeout: 3600000 });
}
await new Promise(resolve => { });
})();
});
I'm able to only start 2 Instances, on the 3rd instance it just loads a blank page.
I'm also aware of puppeteer-cluster however I realize that with puppeteer-cluster i'm unable to set the userDataDir, so i'll have to use normal puppeteer :(
Anyone able to help?

How to download PDF blob using puppeteer?

When the download button is clicked, a new tab is opened where the user can view a PDF statement.
This new tab has a URL starting with blob:, e.g.: blob:https://some-domain.com/statement-id.
How could I download this PDF statement to the file system?
Note: I'm using { headless: false } mode.
Trying to simulate the case:
import puppeteer from 'puppeteer';
import { writeFileSync } from 'fs';
// Minimal PDF from https://github.com/mathiasbynens/small#documents
const minimalPdf = `%PDF-1.
1 0 obj<</Pages 2 0 R>>endobj
2 0 obj<</Kids[3 0 R]/Count 1>>endobj
3 0 obj<</Parent 2 0 R>>endobj
trailer <</Root 1 0 R>>`;
const browser = await puppeteer.launch({ headless: false, defaultViewport: null });
try {
const [page] = await browser.pages();
await page.goto('http://example.com/');
await page.evaluate((pdf) => {
const url = URL.createObjectURL(new Blob([pdf], {type: 'application/pdf'}));
window.open(url);
}, minimalPdf);
const newTarget = await page.browserContext().waitForTarget(
target => target.url().startsWith('blob:')
);
const newPage = await newTarget.page();
const blobUrl = newPage.url();
page.once('response', async (response) => {
console.log(response.url());
const pdfBuffer = await response.buffer();
console.log(pdfBuffer.toString());
console.log('same:', pdfBuffer.toString() === minimalPdf);
writeFileSync('minimal.pdf', pdfBuffer);
});
await page.evaluate((url) => { fetch(url); }, blobUrl);
} catch(err) { console.error(err); } finally { /* await browser.close(); */ }

How to login to google account with playwright?

I have following source code and run it in headful mode.
I can input email address.
But, after that, there is message that saying "Couldn't sign you in.For your protection, you can't sign in from this device. Try again later, or sign in from another device.".
Do I need to set additional header or something else?
Here is my source code.
const playwright = require('playwright');
const cookiePath = '/home/ubuntu/.config/chromium/Default';
browser['chromium'] = await playwright['chromium'].launchPersistentContext(cookiePath,{
headless: false,
args: [
`--disable-extensions-except=${pathToExtension}`,
`--load-extension=${pathToExtension}`,
],
});
const page = await browser['chromium'].newPage();
const login_url = "https://accounts.google.com/signin/v2/identifier?hl=ja&flowName=GlifWebSignIn&flowEntry=ServiceLogin";
await page.goto(login_url);
await page.fill('#identifierId',userinfo['id']);
await page.click("#identifierNext");
await page.fill('[name=password]',userinfo['password']);
await page.click("#passwordNext");
My solution:
const { chromium } = require("playwright");
(async () => {
const browser = await chromium.launch({
headless: false,
args: ["--disable-dev-shm-usage"],
});
const context = await browser.newContext({});
const page = await context.newPage();
const navigationPromise = page.waitForNavigation({
waitUntil: "domcontentloaded",
});
await page.setDefaultNavigationTimeout(0);
await page.goto(
"https://accounts.google.com/signin/v2/identifier?hl=en&flowName=GlifWebSignIn&flowEntry=ServiceLogin"
);
await navigationPromise;
await page.waitForSelector('input[type="email"]');
await page.type('input[type="email"]', "youremail");
await page.click("#identifierNext");
await page.waitForSelector('input[type="password"]', { visible: true });
await page.type('input[type="password"]', "yourpassword");
await page.waitForSelector("#passwordNext", { visible: true });
await page.click("#passwordNext");
await navigationPromise;
//you are in
I think you can search for login to google with Puppeteer also.
This works for me:
add --disable-blink-features=AutomationControlled to your args.
This works for me:
const browser = await playwright.chromium.launch({
ignoreDefaultArgs: ['--disable-component-extensions-with-background-pages']
})

How to trigger "Followers" modal on Instagram?

I can't get my puppet to click on the "Followers" link in Instagram.
I assume that Instagram has done some anti-bot magic, but maybe I'm just too conspiratory about it.
How could I get the "Followers" modal to show?
This is my code:
const puppeteer = require('puppeteer');
var instagram = {
username: 'username',
password: 'password'
}
var run = ()=>{
(async () => {
const browser = await puppeteer.launch({headless: false, args: ['--lang=en-EN,en']});
const page = await browser.newPage();
await page.setExtraHTTPHeaders({ 'Accept-Language': 'en' });
await page.goto('https://www.instagram.com/accounts/login');
await page.waitFor(1500);
await page.click("input[type=\"text\"]")
await page.keyboard.type(instagram.username);
await page.click("input[type=\"password\"]")
await page.keyboard.type(instagram.password);
await page.click("button[type=\"submit\"]");
await page.waitForNavigation();
await page.click(".HoLwm");
await page.click(".glyphsSpriteUser__outline__24__grey_9")
await page.waitForNavigation();
await page.waitFor(2500);
await page.waitForSelector('a.-nal3');
await page.evaluate(() => document.querySelector('a.-nal3')) //does not work
//await page.click(".-nal3") //does not work
await page.waitFor(3000);
await page.waitFor(1000);
//await browser.close();
})();
}
run()
While reviewing your script I noticed that not all of your selectors are the same in my Instagram so I fixed it trying not to use exact selectors since they may change tomorrow.
But this works today (see comments on what changed in the script):
var run = ()=>{
(async () => {
const browser = await puppeteer.launch({headless: false, args: ['--lang=en-EN,en']});
const rand = function(){ return Math.floor(1000 + Math.random() * 2000) }
const page = await browser.newPage();
await page.setExtraHTTPHeaders({ 'Accept-Language': 'en' });
await page.goto('https://www.instagram.com/accounts/login');
// When you can try not to `waitFor` set periods of time
// Wait for selectors, wait for random periods of time
await page.waitForSelector('button[type=\"submit\"]');
await page.click("input[type=\"text\"]")
await page.keyboard.type(instagram.username);
await page.waitFor(rand())
await page.click("input[type=\"password\"]")
await page.keyboard.type(instagram.password);
await page.waitFor(rand())
await page.click("button[type=\"submit\"]");
await page.waitForNavigation();
await page.waitFor(rand())
// After login we're back on the main page
// Wait till React starts and paints the interface
// We're waiting for "Profile" icon to be visible
await page.waitForSelector("span[aria-label='Profile']");
// Then we click on the link inside of which the icon is located
// That is link to the profile
await page.evaluate(() => document.querySelector("span[aria-label='Profile']").parentNode.click() );
await page.waitForNavigation();
await page.waitFor(rand())
// Do not rely on a selector
// Find a link that contains "followers" in its href
await page.waitForSelector("a[href*='followers']");
const followers = await page.evaluate(() => document.querySelector("a[href*='followers']").textContent)
console.log("Total followers: " + followers);
// Click on the followers link
await page.evaluate( () => document.querySelector("a[href*='followers']").click() )
// Wait for the followers modal and profiles
await page.waitFor("div[role='presentation'] div[role='dialog'] div:nth-child(2) ul li");
// Get followers that are in the list in the second div of that modal
const people = await page.evaluate(() => {
return [...document.querySelectorAll("div[role='presentation'] div[role='dialog'] div:nth-child(2) ul li")]
.map(user => {
const profLink = user.querySelector("a[title]")
return {
"name" : profLink.textContent,
"url" : profLink.href
};
})
})
console.log(people)
// await browser.close();
})();
}

Resources