Playwright not able to intercept all requests - node.js

const { webkit } = require('playwright');
(async () => {
const browser = await webkit.launch({ headless: false });
const context = await browser.newContext();
const page = await context.newPage();
// Log and continue all network requests
page.route('**', route => {
console.log(route.request());
route.continue();
});
await page.goto('https://www.sahibinden.com');
//await browser.close();
})();
When I open it in incognito window and intercept using fiddler I am able to get these headers:
X-Client-SrcPort: 49784
Accept-Language: en-US,en;q=0.9
X-J3PopQvX-a: eRELp_ivj7vKEvBmbMRSG-kfU55GIHd5Wc4a06qtEa8-W4hUa90nd0GhKDAoOzTOE6nbdU270qt3pEYVVmFKgVTToiLUqAr9yF684URRjFEdbaPsZXZSQYAJBYyvVsSi_K44SKCEGPl3aAksgQ35gw27vJvvBuscZzVhLbW5PQgfeiMKVbRrWpbR5xHUuVtZ2WWJm7HboV5kXXpkiMPA5vOktA-g4mGZm_zh6ag-OMFmLSfnepSm4PEc=xUFop6_-RYgRLI020e8S8uXXWsuQR95QLXnPCQHvZ3yRFRBtHAG6j0natnjXAxodDcm2Z_tiq7H36uRojXmBVE6A=BwWr0j-cXmsXEuSiKPairOZ6lOskntyNSoE0mao=ge0YrWgZnvEWQIb8FPadYSSufxYmxuGVur0v-5RBRcw=VCk6e6d2sbMDmuvr6zR-F_T_A4K-q-8LlnXywUUfpqG-GInbAyExoE5QdJoRiI8IdTX5NHY0uzddRbfuV9ItOwwY-0iYSMgQ=Aqr=XLeqozjSV7r_Enq0FPcQjNklgG6unf=I3VMURNTQb42pvqWK5RSa-FAD3bTCp4a_OAV=Zx3cIV9JTsSpKKMmFo5q4PMPbj9boGSLSqORhvP95PwMAxyEeMUNbnorRlVOlKH3h2h2sH9rf768CgsXo-GMzCWwQXfoEuKGyl2vRQeP_rddkJ3RHuawS=Z38p69xNcjXr9J_zX4YVoAdXxN56Jm=Qo5K8Xsz3ndwSuvBJYnPXfXNr9Et80GYIjcx-TOkr5ZmPAbRIHkZVKacYvNG_utrkSQmn63IYYydikScedTdnUW5ky=gW3sliikzmZh-cRGaGIaA7FD5Un0Gc8TER6x_mKq-vL5nSuCOnHmzRTNBXZteNk8Gghe6V=ZnRNfCWFeiijdqPP0iPohDEhnjtA32h5vvyCgkx6UWBhAqSsU4IJWGPx8v9S3NtcBuYgTdexPHjnpYVF2CjufPFq0YVL5-di_ATRDfQktSh9gnBdYtIP9ejYLrq6kW6bBm5dxJkgGFD2aEz4bO2QEvoMJ=kuQGc34MBpAG2=PmtA9TErE0WfFftgec-sQfDFn2v4kerFl88C6lGdR=QQu3RBYOdW3V2ZDOONEmzXKAWQDpEZtBrID0Sb8mkQFLcOlbN28tcR8kpha_2E=DgBgLETJE_NqLs=60RdznfPKTcbGmrpBpIZGUKYGoqYzu7AHG-U9MtUAzPP-PVrgA=uOHqN_iFtPl_SCREmnCCLmihX=SsiMCGtaTqvw3RHLYhcUociEskbQh8a4wpniDSp5OM82AHGyrRJK0b29J_KzEgzeGlNn-CLTa=8aaJRfXIIBxHs9Jr6o9Q2DKuwKxvLJpN9kkSMLNbn9Q8R7NeeEeR9oECc9D8h8WjCGkCYJXIPtp-0hpRrlkvvF5ezZhFE0D=GewHdENPEj7CTjqfClVQ=gtzPQF6lG8UX9SKYKxOdyH-jKXJNp2KYWNFTWXS6ZHc56hsw9apvtbxuMciLZYjd4ZGFmLOpHsd7liyRk3QalSm0zxCnnOAL88S=VKDvdWm_qHkslUSEYOOznKY6wqfMlUUxIvRQISzWKY56ajlfyx0qEY6VHuiWbph2kwLGyuVZ6zwmO7RtVCiwp3poGfEnaA2ILv2IwjYm_BIcw8W2joV5G5LwkyEoeUquQWGg9EI_NEW4TO=wKOnmnwFI_f43k595lTHhiYCAwamNnFuiTXA4ZjE5J3S_O45altEdQCYJyJey2K5_iUa--qTmB3OLVZcDA05jN9TTer=-8BCZm=B9F=L=oHYSUlUC=TuzgBRH52I9BnRHDhLW8-U5yy-zYuoq55W-pboEVmlpIocbkE5A6v4gZkgxkLMKk9OzhOGNe7IgIZ=rzP7ocA0CfIKUj80anZlQj8FpVapjmpjWfJlxiZQ9oVcxZWWeSZ9ZKbrGBbx4kja_kjZeVhXOtH0MQLdyqtjIuPKK9pFkvpAOBRv4xrDPpZPOzi59F97zhc2IPhNlgEq9-Dvnl2w40xKRnjBS2cihGy_s2HI2Rf8HsrtHqrWuVtZ7clf2pEwHmdEzIr3aIHSwRzIma_ymwUZ3y4mPqs_d=JtpbqFqd6xykIT_=6Ftz_tqDC4xgTsNUL7-f57F45qpTw5QUNi7CpNUR=wvGhEOskiY2A60ID8IzGlz5hAu785vhseZHnv2fIl5KeH=I=2TH7wJGsUQ-=XMMzg5O8f940hyGJ-y4cHvWppYDK6J6LRvU-AR_Ojb4YgGj9DXgWIWCFrbiS0XY53O-_MvUfsLiURAmsfy5ReKw6-i=rTJ-kN_tf7xcOEbHNCVjFR=ZpJV6LIsI2S5loedjIRHWPhqVI5rJe-NXLs9bo53qnYsWFHbMonxaERtC2ZTYAhbWZglUGHAjv-hqvq22kCGb5fCte6Zb5brvE9CPI8Y8PZ7BCFfcbTpLOAesvHNAxJMaBUVhRGOoYVAL7tq5E-FVmH9L=bdbBgOAXR9s5lHwMRleEOIpqHuyC32z5CC75FAJVly8IP7vLjCjKrDHhNu6yc_5MOvbOQO8CCr0TwLj_XsoSkshH9Q5ICGARL64Ts8F7pmDKC9EDyKR7lZ-tkAJXcCgJd_KCIGnHfULSlwoFrj5QPcVYvGMcaY3Jl4rjyIhctEKXkUiGDwzpjnYbhzVBpsIR=LW6VLVxQXinIFkyBob7lFlyX_y=Gm=5garfnBd29OZWjfym0qh0Em24hJXlbWE7ufXNFUAA8dPNfzjsFk=hOtnpqAokP56vsnOnK0Jl_fOl=5ycCZG6rqGeIDXbRbM_AEy=CPJbE_wbbB2S2Mp9g49Bv9tn_S3LPG_aA_aAWGVDSIVs-3=kwdIybN8y
X-J3PopQvX-d: AAaCxAiBBKCMgcGASZAQgICAACIqGSI6QDSXAaoclOq4s6T2AkpRq7ZjyDmvHZIRbYeFd-E
X-Forwarded-For: 39.41.162.210
X-Forwarded-Proto: https
X-J3PopQvX-f: A3r8AMZyAQAABWpomSNWTAA_aOd-ML0d_DenMFOwBhK_GRxSWvW5FLJRxZ2sAScpotKucnW8wH8AAEB3AAAAAA==
X-J3PopQvX-c: ACCn_sVyAQAAdkviREb3CdA4iVdSrR4-Gc4Y3RxJWrLhnUx8XioZIjpANJcB
X-TLS-Version: 771
X-J3PopQvX-z: q
But Im unable to get these headers by executing above code. How do I get these?
Edit:
I used the answer and it works fine. There is only one missing part.
page.route('**', route => {
const request = route.request()
//console.log(request.url(), JSON.stringify(request.headers()));
return route.continue();
});
let pageRes = await page.goto(url);
await page.waitForTimeout(3 * 1000)
//await page.unroute("**");
return pageRes;
When I print PageRes.request().headers() it doesnt give j3pop headers.It simply gives
{
accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.2 Safari/605.1.15'
}
Edit:
This is what I tried in interactive Playwright
const { webkit } = require('playwright');
(async () => {
const browser = await webkit.launch();
const context = await browser.newContext();
const page = await context.newPage();
// Log and continue all network requests
headers = {}
page.route('**/*', route => {
const request = route.request()
console.log(request.url(), JSON.stringify(request.headers()));
headers = JSON.stringify(request.headers());
return route.continue();
});
let url = await page.goto('https://www.sahibinden.com/en/cars?sorting=date_desc');
console.log("headerw");
console.log(headers)
await page.waitForTimeout(3 * 1000)
await browser.close();
})();
headers isn't giving me the j3pop headers

The issue is, that the requests will be made, after the page is fully loaded from the browser perspective. So you have to either use something like waitForTimeout which waits X milliseconds or use waitForSelector if you have an element which will in the end appear after your request was made.
See this slightly modified example, which waits additional 3 seconds and logs then your request with the special headers to the console.
const { webkit } = require('playwright');
(async () => {
const browser = await webkit.launch();
const context = await browser.newContext();
const page = await context.newPage();
// Log and continue all network requests
await page.route('**', route => {
const request = route.request()
console.log(request.url(), JSON.stringify(request.headers()));
return route.continue();
});
await page.goto('https://www.sahibinden.com');
await page.waitForTimeout(3 * 1000)
await browser.close();
})();
Or interactive here: https://try.playwright.tech/?s=17v5y

You need to add await before page.route(... or else routing may not be setup yet when page.goto runs.
ref: https://playwright.dev/docs/api/class-page#page-route

Related

Node JS, Captcha Solving

This is my code!
const https = require('https');
const puppeteer = require('puppeteer-extra')
// add recaptcha plugin and provide it your 2captcha token (= their apiKey)
// 2captcha is the builtin solution provider but others would work as well.
// Please note: You need to add funds to your 2captcha account for this to work
const RecaptchaPlugin = require('puppeteer-extra-plugin-recaptcha')
puppeteer.use(
RecaptchaPlugin({
provider: {
id: '2captcha', token: '93...' },
visualFeedback: true // colorize reCAPTCHAs (violet = detected, green = solved)
})
async function login() {
global.browser = await puppeteer.launch({
headless: false,
slowMo: 10,
userDataDir: 'C:\\userData',
});
global.page = await browser.pages();
const setUserAgent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.75 Safari/537.36';
var userAgent = require('user-agents');
await page[0].setUserAgent(userAgent.toString());
console.log("Going to Website");
await page[0].goto("https://www.google.com/recaptcha/api2/demo", {
waitUntil: 'networkidle2'
});
console.log("Solving Captcha");
await page[0].solveRecaptchas()
await Promise.all([
page[0].waitForNavigation()
])
}
This is a roughly my code, i have 2captcha balance
code above doesnt solve the captcha, it detects the captcha and turns it purple, but it doesnt solve the captcha, i need help
After you got response from 2captcha, You have to call submit button manually using following code.
await page.waitForSelector('#recaptcha-demo-submit')
await page.click('#recaptcha-demo-submit')

How to fix "please wait a few minutes before you try again" error instagram?

I'm using puppeteer for the first time running it on locally hosted firebase cloud functions.
I've been trying with different accounts, and I waited hours so that the error may resolves, but no luck. The error I'm getting:
I can't interact with the site, and even if I switch routs this is the only thing popping up.
What I did/tried:
I followed this tutorial and coded the exact same app: https://www.youtube.com/watch?v=dXjKh66BR2U
Searched for hours on google if there is anything like my problem, still no solution that worked for me.
Edit:
The code I'm using is basically from fireship.io:
const puppeteer = require('puppeteer');
const scrapeImages = async (username) => {
const browser = await puppeteer.launch( { headless: true });
const page = await browser.newPage();
await page.goto('https://www.instagram.com/accounts/login/');
// Login form
await page.screenshot({path: '1.png'});
await page.type('[name=username]', 'fireship_dev');
await page.type('[name=password]', 'some-pa$$word');
await page.screenshot({path: '2.png'});
await page.click('[type=submit]');
// Social Page
await page.waitFor(5000);
await page.goto(`https://www.instagram.com/${username}`);
await page.waitForSelector('img ', {
visible: true,
});
await page.screenshot({path: '3.png'});
// Execute code in the DOM
const data = await page.evaluate( () => {
const images = document.querySelectorAll('img');
const urls = Array.from(images).map(v => v.src);
return urls;
});
await browser.close();
console.log(data);
return data;
}
The error I'm getting on console:
UnhandledPromiseRejectionWarning: TimeoutError: waiting for selector `input[name="username"]` failed: timeout 30000ms exceeded
Try to add additional headers, before your page.goto(), like this:
await page.setExtraHTTPHeaders({
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36',
'upgrade-insecure-requests': '1',
'accept-encoding': 'gzip, deflate, br',
'accept-language': 'en-US,en;q=0.9,en;q=0.8'
})
It adds headers to make puppeteer look like a normal browser used from a normal OS

puppeteer not able to log into google account

In puppeteer when opening regular chrome and trying to access a GMAIL account, get message:
Couldn't sign you in This browser or app may not be secure. Learn more
Try using a different browser. If you’re already using a supported
browser, you can refresh your screen and try again to sign in.
Code:
var p = require("puppeteer");
var a = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.106 Safari/537.36"
p.launch({
headless:false,
args: [
],
executablePath:"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
}).then((b) => {
console.log("ASD");
b.newPage().then((g) => {
g.evaluate(() => {
var v = document.querySelector(".signUpLink")
console.log("hi", v)
})
g.setUserAgent(a);
Promise.all([
g.goto("https://accounts.google.com/signin/v2/identifier?service=grandcentral&passive=1209600&continue=https%3A%2F%2Fvoice.google.com%2Fsignup&followup=https%3A%2F%2Fvoice.google.com%2Fsignup&flowName=GlifWebSignIn&flowEntry=ServiceLogin"),
g.setUserAgent(a),
g.waitForNavigation({ waitUntil: 'networkidle0' }),
g.waitForSelector(".signUpLink", {
visible:true
}),
g.waitFor(3000),
new Promise((r,e) => {
console.log("hi")
r();
}),
g.click(".signUpLink"),
g.evaluate(() => {
var v = document.querySelector(".signUpLink")
console.log("hi", v)
})
])
})
})
fixes?
Google login doesn't allow automated login.
It was just recently they came up with this approach
I was able to get it to work with the below code.
await page.setBypassCSP(true) does the magic.
const puppeteer = require('puppeteer-extra');
// const chromium = require('#sparticuz/chrome-aws-lambda');
const chromium = require('chrome-aws-lambda');
const pluginStealth = require('puppeteer-extra-plugin-stealth')();
console.log(pluginStealth.availableEvasions); // => Set { 'user-agent', 'console.debug' }
puppeteer.use(pluginStealth);
pluginStealth.setMaxListeners = () => {};
const browser = await puppeteer.launch({
args: chromium.args,
defaultViewport: chromium.defaultViewport,
dumpio: process.env.STAGE === 'dev',
executablePath: await chromium.executablePath,
headless: chromium.headless,
ignoreHTTPSErrors: true,
});
const page = await browser.newPage();
await page.setBypassCSP(true)

puppeteer: Access JSON response of a specific request as in the network tab of DevTools

I'd like to directly get the response of the last HTTP request shown in the screenshot.
The current puppeteer code is shown below. Could anybody show me how to modify it so that it will get the JSON response directly from the browser? Thanks.
const puppeteer = require('puppeteer');
(async () => {
// const browser = await puppeteer.launch();
const browser = await puppeteer.launch({
headless: false
, args: ['--user-agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3312.0 Safari/537.36"']
});
const page = await browser.newPage();
await page.goto('https://www.genecards.org/cgi-bin/carddisp.pl?gene=BSCL2');
await page.goto('https://www.genecards.org/cgi-bin/carddisp.pl?gene=BSCL2');
const linkHandlers = await page.$x("//div[#id='enhancers']//a[#data-track-event='Table See-All']");
if (linkHandlers.length > 0) {
await linkHandlers[0].click();
} else {
throw new Error("Link not found");
}
const html = await page.content()
//await browser.close();
console.log(html)
})();
You can use page.waitForResponse to wait for the response and response.json to parse the response as JSON.
Code
Replace the await linkHandlers[0].click(); part by this:
const [response] = await Promise.all([
page.waitForResponse(response => response.url().includes('/gene/api/data/Enhancers')),
linkHandlers[0].click()
]);
const dataObj = await response.json();
console.log(dataObj);
This will first wait for the response (while in parallel making the click). After the response is detected the response is parsed as JSON. To get the response result as plain text (instead of parsing it), you can use response.text()

Proper request with async/await in Node.JS

In my program I make async call for my function from another API module:
var info = await api.MyRequest(value);
Module code:
var request = require("request")
module.exports.MyRequest = async function MyRequest(value) {
var options = {
uri: "http://some_url",
method: "GET",
qs: { // Query string like ?key=value&...
key : value
},
json: true
}
try {
var result = await request(options);
return result;
} catch (err) {
console.error(err);
}
}
Execution returns immediately, however result and therefore info contains request object and request body - info.body like key=value&..., not required response body.
What I'm doing wrong? How to fix? What is proper request usage with async, or it only works correctly with promises like mentioned here: Why await is not working for node request module? Following article mentioned it is possible: Mastering Async Await in Node.js.
You need to use the request-promise module, not the request module or http.request().
await works on functions that return a promise, not on functions that return the request object and expect you to use callbacks or event listeners to know when things are done.
The request-promise module supports the same features as the request module, but asynchronous functions in it return promises so you can use either .then() or await with them rather than the callbacks that the request module expects.
So, install the request-promise module and then change this:
var request = require("request");
to this:
const request = require("request-promise");
Then, you can do:
var result = await request(options);
EDIT Jan, 2020 - request() module in maintenance mode
FYI, the request module and its derivatives like request-promise are now in maintenance mode and will not be actively developed to add new features. You can read more about the reasoning here. There is a list of alternatives in this table with some discussion of each one.
I have been using got() myself and it's built from the beginning to use promises, supports many of the same options as the request() module and is simple to program.
Pretty sure you can also do the following. If what you need does not return Promise by default you can provide it via new Promise method. Above answer is less verbose though.
async function getBody(url) {
const options = {
url: url,
method: 'GET',
};
// Return new promise
return new Promise(function(resolve, reject) {
// Do async job
request.get(options, function(err, resp, body) {
if (err) {
reject(err);
} else {
resolve(body);
}
})
})
}
I just managed to get it to work with async/await. I wrapped it inside a function promisifiedRequest to return a promise that runs the request callback and resolves or rejects it based on error and response.
const request = require('request');
const promisifiedRequest = function(options) {
return new Promise((resolve,reject) => {
request(options, (error, response, body) => {
if (response) {
return resolve(response);
}
if (error) {
return reject(error);
}
});
});
};
(async function() {
const options = {
url: 'https://www.google.com',
method: 'GET',
gzip: true,
headers: {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.96 Safari/537.36'
}
};
let response = await promisifiedRequest(options);
console.log(response.headers);
console.log(response.body);
})();
Since request-promise has been deprecated, here are other options that don't depend on the NPM request package. got has been mentioned already, but it depends on 11 other packages. axios, in contrast, only has 1 dependency (for redirects). Everything else is natively implemented and built on top of the native NodeJS packages.
Here is the same example using axios:
const axios = require('axios')
const response = await axios.get(url)
const result = response.data
or, as a one-liner in JavaScript
const result = (await axios.get(url)).data
One-liner in TypeScript:
const {data} = await axios.get(url)
For simple cases where you don't need advanced features like cookies, following redirects and retrying, you can use native http/https module to make requests:
const https = require('https')
async function fetch(url) {
return new Promise((resolve, reject) => {
const request = https.get(url, { timeout: 1000 }, (res) => {
if (res.statusCode < 200 || res.statusCode > 299) {
return reject(new Error(`HTTP status code ${res.statusCode}`))
}
const body = []
res.on('data', (chunk) => body.push(chunk))
res.on('end', () => {
const resString = Buffer.concat(body).toString()
resolve(resString)
})
})
request.on('error', (err) => {
reject(err)
})
request.on('timeout', () => {
request.destroy()
reject(new Error('timed out'))
})
})
}
const res = await fetch('https://...')

Resources