Node JS, Captcha Solving - node.js

This is my code!
const https = require('https');
const puppeteer = require('puppeteer-extra')
// add recaptcha plugin and provide it your 2captcha token (= their apiKey)
// 2captcha is the builtin solution provider but others would work as well.
// Please note: You need to add funds to your 2captcha account for this to work
const RecaptchaPlugin = require('puppeteer-extra-plugin-recaptcha')
puppeteer.use(
RecaptchaPlugin({
provider: {
id: '2captcha', token: '93...' },
visualFeedback: true // colorize reCAPTCHAs (violet = detected, green = solved)
})
async function login() {
global.browser = await puppeteer.launch({
headless: false,
slowMo: 10,
userDataDir: 'C:\\userData',
});
global.page = await browser.pages();
const setUserAgent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.75 Safari/537.36';
var userAgent = require('user-agents');
await page[0].setUserAgent(userAgent.toString());
console.log("Going to Website");
await page[0].goto("https://www.google.com/recaptcha/api2/demo", {
waitUntil: 'networkidle2'
});
console.log("Solving Captcha");
await page[0].solveRecaptchas()
await Promise.all([
page[0].waitForNavigation()
])
}
This is a roughly my code, i have 2captcha balance
code above doesnt solve the captcha, it detects the captcha and turns it purple, but it doesnt solve the captcha, i need help

After you got response from 2captcha, You have to call submit button manually using following code.
await page.waitForSelector('#recaptcha-demo-submit')
await page.click('#recaptcha-demo-submit')

Related

Does type input and click not support multiple lines?

I am trying to send a DM on Instagram, but each new line is sent as one message.
Does type input and click not support multiple lines?
If so, does anyone have any insight into alternative solutions, such as fixing this area of the puppeteer source code?
The simplified code is as follows. It may not work as it is because of the different language area. Sorry.
import pp from 'puppeteer';
import puppeteer from 'puppeteer-extra';
export const instagram = async () => {
console.log('START!!!');
const browser = await puppeteer.launch({
executablePath: pp.executablePath(),
args: [
'--user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36',
],
headless: true,
slowMo: 100,
timeout: 30000,
});
const page = await browser.newPage();
await page.setViewport({
height: 900,
width: 1366,
});
await page.goto('https://www.instagram.com/accounts/login/');
await page.waitForSelector('input[name="username"]');
await page.type('input[name=username]', '___your_id___');
await page.type('input[name=password]', '___your_password___');
await page.click('button[type=submit]');
await page.waitForNavigation({ waitUntil: 'domcontentloaded' });
await page.goto('https://www.instagram.com/direct/__your_frend_dm_direct_url__');
await page.waitForSelector('textarea[placeholder="メッセージ..."]'); // japanese
await page.type('textarea[placeholder="メッセージ..."]', "this is\nmultiple line");
await browser.close();
};
enter image description here
I want to send a multi-line message in a single transmission.

How to fix "please wait a few minutes before you try again" error instagram?

I'm using puppeteer for the first time running it on locally hosted firebase cloud functions.
I've been trying with different accounts, and I waited hours so that the error may resolves, but no luck. The error I'm getting:
I can't interact with the site, and even if I switch routs this is the only thing popping up.
What I did/tried:
I followed this tutorial and coded the exact same app: https://www.youtube.com/watch?v=dXjKh66BR2U
Searched for hours on google if there is anything like my problem, still no solution that worked for me.
Edit:
The code I'm using is basically from fireship.io:
const puppeteer = require('puppeteer');
const scrapeImages = async (username) => {
const browser = await puppeteer.launch( { headless: true });
const page = await browser.newPage();
await page.goto('https://www.instagram.com/accounts/login/');
// Login form
await page.screenshot({path: '1.png'});
await page.type('[name=username]', 'fireship_dev');
await page.type('[name=password]', 'some-pa$$word');
await page.screenshot({path: '2.png'});
await page.click('[type=submit]');
// Social Page
await page.waitFor(5000);
await page.goto(`https://www.instagram.com/${username}`);
await page.waitForSelector('img ', {
visible: true,
});
await page.screenshot({path: '3.png'});
// Execute code in the DOM
const data = await page.evaluate( () => {
const images = document.querySelectorAll('img');
const urls = Array.from(images).map(v => v.src);
return urls;
});
await browser.close();
console.log(data);
return data;
}
The error I'm getting on console:
UnhandledPromiseRejectionWarning: TimeoutError: waiting for selector `input[name="username"]` failed: timeout 30000ms exceeded
Try to add additional headers, before your page.goto(), like this:
await page.setExtraHTTPHeaders({
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36',
'upgrade-insecure-requests': '1',
'accept-encoding': 'gzip, deflate, br',
'accept-language': 'en-US,en;q=0.9,en;q=0.8'
})
It adds headers to make puppeteer look like a normal browser used from a normal OS

Cannot download file while headless: true, works when headless: false [Puppeteer]

Im running a script that logs into an authenticated session on a website and clicks a button to download an excel file. Im able to run it with no problems while headless: false, but when headless:true, the file does not download.
My research suggests that the browser is closing before the download completes possibly? Ive added a wait of about 15 seconds, which is much longer than it should need to download the file, but still not getting anything. Another solution I tried was manually removing the HeadlessChrome substring from the userAgent in case the site was blocking it, but that didnt work either. Is it okay to use headless:false in a script that is used in a production web application deployed on Heroku?
async function getData () {
try {
const wait = (ms) => new Promise(resolve => setTimeout(resolve, ms))
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto('<url>');
//login
await page.type('#username',username);
await page.click('#signIn');
await wait(4000)
await page.type('#password',password);
await page.click('#signIn');
await page.waitForNavigation();
await page.keyboard.press('Enter'); //click out of any pop up
// //go to merchandising page
await page.click('#m_69-link');
await page.waitForSelector('#ExcelReportButton', {visible: true})
//click on export as excel icon
await wait(4000)
await page.click('#ExcelReportButton');
await wait(15000)
await browser.close();
} catch (error) {
console.log(error)
}
};
try by adding additional headers, it worked for me:
await page.setExtraHTTPHeaders({
'Accept-Language': 'en-US,en;q=0.9'
});
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36');

Playwright not able to intercept all requests

const { webkit } = require('playwright');
(async () => {
const browser = await webkit.launch({ headless: false });
const context = await browser.newContext();
const page = await context.newPage();
// Log and continue all network requests
page.route('**', route => {
console.log(route.request());
route.continue();
});
await page.goto('https://www.sahibinden.com');
//await browser.close();
})();
When I open it in incognito window and intercept using fiddler I am able to get these headers:
X-Client-SrcPort: 49784
Accept-Language: en-US,en;q=0.9
X-J3PopQvX-a: eRELp_ivj7vKEvBmbMRSG-kfU55GIHd5Wc4a06qtEa8-W4hUa90nd0GhKDAoOzTOE6nbdU270qt3pEYVVmFKgVTToiLUqAr9yF684URRjFEdbaPsZXZSQYAJBYyvVsSi_K44SKCEGPl3aAksgQ35gw27vJvvBuscZzVhLbW5PQgfeiMKVbRrWpbR5xHUuVtZ2WWJm7HboV5kXXpkiMPA5vOktA-g4mGZm_zh6ag-OMFmLSfnepSm4PEc=xUFop6_-RYgRLI020e8S8uXXWsuQR95QLXnPCQHvZ3yRFRBtHAG6j0natnjXAxodDcm2Z_tiq7H36uRojXmBVE6A=BwWr0j-cXmsXEuSiKPairOZ6lOskntyNSoE0mao=ge0YrWgZnvEWQIb8FPadYSSufxYmxuGVur0v-5RBRcw=VCk6e6d2sbMDmuvr6zR-F_T_A4K-q-8LlnXywUUfpqG-GInbAyExoE5QdJoRiI8IdTX5NHY0uzddRbfuV9ItOwwY-0iYSMgQ=Aqr=XLeqozjSV7r_Enq0FPcQjNklgG6unf=I3VMURNTQb42pvqWK5RSa-FAD3bTCp4a_OAV=Zx3cIV9JTsSpKKMmFo5q4PMPbj9boGSLSqORhvP95PwMAxyEeMUNbnorRlVOlKH3h2h2sH9rf768CgsXo-GMzCWwQXfoEuKGyl2vRQeP_rddkJ3RHuawS=Z38p69xNcjXr9J_zX4YVoAdXxN56Jm=Qo5K8Xsz3ndwSuvBJYnPXfXNr9Et80GYIjcx-TOkr5ZmPAbRIHkZVKacYvNG_utrkSQmn63IYYydikScedTdnUW5ky=gW3sliikzmZh-cRGaGIaA7FD5Un0Gc8TER6x_mKq-vL5nSuCOnHmzRTNBXZteNk8Gghe6V=ZnRNfCWFeiijdqPP0iPohDEhnjtA32h5vvyCgkx6UWBhAqSsU4IJWGPx8v9S3NtcBuYgTdexPHjnpYVF2CjufPFq0YVL5-di_ATRDfQktSh9gnBdYtIP9ejYLrq6kW6bBm5dxJkgGFD2aEz4bO2QEvoMJ=kuQGc34MBpAG2=PmtA9TErE0WfFftgec-sQfDFn2v4kerFl88C6lGdR=QQu3RBYOdW3V2ZDOONEmzXKAWQDpEZtBrID0Sb8mkQFLcOlbN28tcR8kpha_2E=DgBgLETJE_NqLs=60RdznfPKTcbGmrpBpIZGUKYGoqYzu7AHG-U9MtUAzPP-PVrgA=uOHqN_iFtPl_SCREmnCCLmihX=SsiMCGtaTqvw3RHLYhcUociEskbQh8a4wpniDSp5OM82AHGyrRJK0b29J_KzEgzeGlNn-CLTa=8aaJRfXIIBxHs9Jr6o9Q2DKuwKxvLJpN9kkSMLNbn9Q8R7NeeEeR9oECc9D8h8WjCGkCYJXIPtp-0hpRrlkvvF5ezZhFE0D=GewHdENPEj7CTjqfClVQ=gtzPQF6lG8UX9SKYKxOdyH-jKXJNp2KYWNFTWXS6ZHc56hsw9apvtbxuMciLZYjd4ZGFmLOpHsd7liyRk3QalSm0zxCnnOAL88S=VKDvdWm_qHkslUSEYOOznKY6wqfMlUUxIvRQISzWKY56ajlfyx0qEY6VHuiWbph2kwLGyuVZ6zwmO7RtVCiwp3poGfEnaA2ILv2IwjYm_BIcw8W2joV5G5LwkyEoeUquQWGg9EI_NEW4TO=wKOnmnwFI_f43k595lTHhiYCAwamNnFuiTXA4ZjE5J3S_O45altEdQCYJyJey2K5_iUa--qTmB3OLVZcDA05jN9TTer=-8BCZm=B9F=L=oHYSUlUC=TuzgBRH52I9BnRHDhLW8-U5yy-zYuoq55W-pboEVmlpIocbkE5A6v4gZkgxkLMKk9OzhOGNe7IgIZ=rzP7ocA0CfIKUj80anZlQj8FpVapjmpjWfJlxiZQ9oVcxZWWeSZ9ZKbrGBbx4kja_kjZeVhXOtH0MQLdyqtjIuPKK9pFkvpAOBRv4xrDPpZPOzi59F97zhc2IPhNlgEq9-Dvnl2w40xKRnjBS2cihGy_s2HI2Rf8HsrtHqrWuVtZ7clf2pEwHmdEzIr3aIHSwRzIma_ymwUZ3y4mPqs_d=JtpbqFqd6xykIT_=6Ftz_tqDC4xgTsNUL7-f57F45qpTw5QUNi7CpNUR=wvGhEOskiY2A60ID8IzGlz5hAu785vhseZHnv2fIl5KeH=I=2TH7wJGsUQ-=XMMzg5O8f940hyGJ-y4cHvWppYDK6J6LRvU-AR_Ojb4YgGj9DXgWIWCFrbiS0XY53O-_MvUfsLiURAmsfy5ReKw6-i=rTJ-kN_tf7xcOEbHNCVjFR=ZpJV6LIsI2S5loedjIRHWPhqVI5rJe-NXLs9bo53qnYsWFHbMonxaERtC2ZTYAhbWZglUGHAjv-hqvq22kCGb5fCte6Zb5brvE9CPI8Y8PZ7BCFfcbTpLOAesvHNAxJMaBUVhRGOoYVAL7tq5E-FVmH9L=bdbBgOAXR9s5lHwMRleEOIpqHuyC32z5CC75FAJVly8IP7vLjCjKrDHhNu6yc_5MOvbOQO8CCr0TwLj_XsoSkshH9Q5ICGARL64Ts8F7pmDKC9EDyKR7lZ-tkAJXcCgJd_KCIGnHfULSlwoFrj5QPcVYvGMcaY3Jl4rjyIhctEKXkUiGDwzpjnYbhzVBpsIR=LW6VLVxQXinIFkyBob7lFlyX_y=Gm=5garfnBd29OZWjfym0qh0Em24hJXlbWE7ufXNFUAA8dPNfzjsFk=hOtnpqAokP56vsnOnK0Jl_fOl=5ycCZG6rqGeIDXbRbM_AEy=CPJbE_wbbB2S2Mp9g49Bv9tn_S3LPG_aA_aAWGVDSIVs-3=kwdIybN8y
X-J3PopQvX-d: AAaCxAiBBKCMgcGASZAQgICAACIqGSI6QDSXAaoclOq4s6T2AkpRq7ZjyDmvHZIRbYeFd-E
X-Forwarded-For: 39.41.162.210
X-Forwarded-Proto: https
X-J3PopQvX-f: A3r8AMZyAQAABWpomSNWTAA_aOd-ML0d_DenMFOwBhK_GRxSWvW5FLJRxZ2sAScpotKucnW8wH8AAEB3AAAAAA==
X-J3PopQvX-c: ACCn_sVyAQAAdkviREb3CdA4iVdSrR4-Gc4Y3RxJWrLhnUx8XioZIjpANJcB
X-TLS-Version: 771
X-J3PopQvX-z: q
But Im unable to get these headers by executing above code. How do I get these?
Edit:
I used the answer and it works fine. There is only one missing part.
page.route('**', route => {
const request = route.request()
//console.log(request.url(), JSON.stringify(request.headers()));
return route.continue();
});
let pageRes = await page.goto(url);
await page.waitForTimeout(3 * 1000)
//await page.unroute("**");
return pageRes;
When I print PageRes.request().headers() it doesnt give j3pop headers.It simply gives
{
accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.2 Safari/605.1.15'
}
Edit:
This is what I tried in interactive Playwright
const { webkit } = require('playwright');
(async () => {
const browser = await webkit.launch();
const context = await browser.newContext();
const page = await context.newPage();
// Log and continue all network requests
headers = {}
page.route('**/*', route => {
const request = route.request()
console.log(request.url(), JSON.stringify(request.headers()));
headers = JSON.stringify(request.headers());
return route.continue();
});
let url = await page.goto('https://www.sahibinden.com/en/cars?sorting=date_desc');
console.log("headerw");
console.log(headers)
await page.waitForTimeout(3 * 1000)
await browser.close();
})();
headers isn't giving me the j3pop headers
The issue is, that the requests will be made, after the page is fully loaded from the browser perspective. So you have to either use something like waitForTimeout which waits X milliseconds or use waitForSelector if you have an element which will in the end appear after your request was made.
See this slightly modified example, which waits additional 3 seconds and logs then your request with the special headers to the console.
const { webkit } = require('playwright');
(async () => {
const browser = await webkit.launch();
const context = await browser.newContext();
const page = await context.newPage();
// Log and continue all network requests
await page.route('**', route => {
const request = route.request()
console.log(request.url(), JSON.stringify(request.headers()));
return route.continue();
});
await page.goto('https://www.sahibinden.com');
await page.waitForTimeout(3 * 1000)
await browser.close();
})();
Or interactive here: https://try.playwright.tech/?s=17v5y
You need to add await before page.route(... or else routing may not be setup yet when page.goto runs.
ref: https://playwright.dev/docs/api/class-page#page-route

puppeteer not able to log into google account

In puppeteer when opening regular chrome and trying to access a GMAIL account, get message:
Couldn't sign you in This browser or app may not be secure. Learn more
Try using a different browser. If you’re already using a supported
browser, you can refresh your screen and try again to sign in.
Code:
var p = require("puppeteer");
var a = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.106 Safari/537.36"
p.launch({
headless:false,
args: [
],
executablePath:"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
}).then((b) => {
console.log("ASD");
b.newPage().then((g) => {
g.evaluate(() => {
var v = document.querySelector(".signUpLink")
console.log("hi", v)
})
g.setUserAgent(a);
Promise.all([
g.goto("https://accounts.google.com/signin/v2/identifier?service=grandcentral&passive=1209600&continue=https%3A%2F%2Fvoice.google.com%2Fsignup&followup=https%3A%2F%2Fvoice.google.com%2Fsignup&flowName=GlifWebSignIn&flowEntry=ServiceLogin"),
g.setUserAgent(a),
g.waitForNavigation({ waitUntil: 'networkidle0' }),
g.waitForSelector(".signUpLink", {
visible:true
}),
g.waitFor(3000),
new Promise((r,e) => {
console.log("hi")
r();
}),
g.click(".signUpLink"),
g.evaluate(() => {
var v = document.querySelector(".signUpLink")
console.log("hi", v)
})
])
})
})
fixes?
Google login doesn't allow automated login.
It was just recently they came up with this approach
I was able to get it to work with the below code.
await page.setBypassCSP(true) does the magic.
const puppeteer = require('puppeteer-extra');
// const chromium = require('#sparticuz/chrome-aws-lambda');
const chromium = require('chrome-aws-lambda');
const pluginStealth = require('puppeteer-extra-plugin-stealth')();
console.log(pluginStealth.availableEvasions); // => Set { 'user-agent', 'console.debug' }
puppeteer.use(pluginStealth);
pluginStealth.setMaxListeners = () => {};
const browser = await puppeteer.launch({
args: chromium.args,
defaultViewport: chromium.defaultViewport,
dumpio: process.env.STAGE === 'dev',
executablePath: await chromium.executablePath,
headless: chromium.headless,
ignoreHTTPSErrors: true,
});
const page = await browser.newPage();
await page.setBypassCSP(true)

Resources