var phantom = require('phantom');
console.dir(phantom);
phantom.create(function(browser){
browser.createPage(function(page){
page.customHeaders={
"HTTP_USER_AGENT": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.71 Safari/537.36",
};
console.dir(page.settings);
//undefined
page.settings={};
page.settings.userAgent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.71 Safari/537.36';
page.settings.HTTP_USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.71 Safari/537.36';
console.dir(page.settings);
page.open('http://example.com/req.php', function() {
setTimeout(function() {
var output = page.evaluate(function() {
return document;
});
console.dir(output);
//undefined
}, 1000);
});});});
when I use phantomjs I try and set the header for userAgent using three different ways but when I visit the page and save the PHP $_SERVER object to a txt pad I still see PhantomJS
HTTP_USER_AGENT: Mozilla/5.0 (Unknown; Linux x86_64) AppleWebKit/538.1 (KHTML, like Gecko) PhantomJS/2.0.1-development Safari/538.1
not only that but the output of the page is also undefined.
It seems that the docs have changed or I cant find the correct ones. I am looking at
http://phantomjs.org/api/webpage/property/settings.html
https://www.npmjs.com/package/phantom
How is this used correctly?
According to the Functional Details in the docs, you have to set the user agent through page.set():
page.set('settings.userAgent', 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.71 Safari/537.36');
It has to be done this way, because the bridge has to communicate with the PhantomJS process and isn't doing this in a non-asynchronous fashion. This could've probably been implemented with Object.defineProperty.
If you want to set multiple settings at once, you can do (ref):
page.set('settings', {
userAgent: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_5) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.97 Safari/537.11",
javascriptEnabled: false,
loadImages: false
});
You can find a list of settings that you can set in page.settings.
Currently [ 27.01.2018 ], with these requirements:
phantom: ^4.0.12,
webpage: ^0.3.0
i use this method to set up this property:
page.setting(key, value);
I checked it out with php in $_SERVER array. It works correctly.
Сompletely code looks like this:
const phantom = require('phantom');
(async function() {
const instance = await phantom.create();
const page = await instance.createPage();
page.setting('userAgent',"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_5) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.97 Safari/537.11");
await page.on('onResourceRequested', function(requestData) {
//Dump request settings to view result of our changes:
console.info('Requesting', requestData);
});
const status = await page.open('https://stackoverflow.com');
const content = await page.property('content');
//console.log(content);
await instance.exit();
})();
Related
I am trying to make a request to "https://www.walmart.com/search/?page=1&query=" using request function or using scrapy module but getting the response code 444.
See below my snippet:
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.128 Safari/537.36',
'Accept': 'application/json', 'Content-Type': 'application/json'
}
res = requests.get('https://www.walmart.com/', headers=headers)
cookie = res.cookies
res1 = requests.get('https://www.walmart.com/search/?page=1&query=',headers=res.headers,cookies=cookie)
But I m getting the res1.status_code as 444. Would appreciate any help here.
This is how you should reuse request elements:
import requests
with requests.Session() as connection:
connection.headers.update(
{
"Accept": "application/json",
"Content-Type": "application/json",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/89.0.4389.86 YaBrowser/21.3.0.740 "
"Yowser/2.5 Safari/537.36",
}
)
_ = connection.get("https://www.walmart.com/")
response = connection.get('https://www.walmart.com/search/?page=1&query=')
print(response.status_code)
Output (status code):
200
headers["user-agent"] = fakeUa();
console.log(fakeUa())
let firstReq = true;
page.route('**/*', route => {
const request = route.request()
//console.log(request.url(), JSON.stringify(request.headers()));
if("x-j3popqvx-a" in request.headers()){
headers = request.headers();
//console.log(headers);
console.log("exiting");
return;
}
else {
console.log("in");
return route.continue({headers: headers});
}
});
let pageRes = await page.goto(url, {waitUntil: 'load', timeout: 0});
I want to add fake user agent when sending request to url. But it doesn't add the fake useragent rather goes with the default one.
While in puppeteer it was possible with the page.setUserAgent() method to apply a custom UA and page.setExtraHTTPHeaders() to set any custom headers, in playwright you can set custom user agent (userAgent) and headers (extraHTTPHeaders) as options of browser.newPage() or browser.newContext() like:
const page = await browser.newPage({ userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36' })
const page = await browser.newPage({
extraHTTPHeaders: {
'Cache-Control': 'no-cache'
}
})
Edit: In case you are using it with newContext() usage looks like this (make sure to set userAgent in the settings of newContext and not in newPage!):
const context = await browser.newContext({ userAgent: 'hello' })
const page = await context.newPage()
// to check the UA:
console.log(await page.evaluate(() => navigator.userAgent))
If you're using #playwright/test, you can set a user agent as follows:
import {expect, test} from "#playwright/test"; // ^1.30.0
const userAgent =
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36";
test.describe("with user agent", () => {
test.use({userAgent});
test("does stuff", async ({page}) => {
await page.goto("https://example.com/");
await expect(page.locator("h1")).toHaveText("Example Domain");
});
});
I am struggling to successfully make a request using request-promise npm on a site that requires a cookie to view or for the request to be successful.
Henceforth, I have looked into cookieJars in order to store all those that are given in the repsonse after the request has been done.
const rp = require("request-promise")
var cookieJar = rp.jar()
function grabcfToken(){
let token = ""
let options = {
url : 'https://www.off---white.com/en/GB',
method: "GET",
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
resolveWithFullResponse : true
}
rp(options)
.then((response)=>{
console.log(response)
})
.catch((error)=>{
console.log(error)
})
}
Can someone tell me why the request isn't successfully going through? How do I apply the cookies that I initially get before being timed out.
const rp = require("request-promise")
var cookieJar = rp.jar()
function grabcfToken(){
let token = ""
let options = {
url : 'https://www.off---white.com/en/GB',
method: "GET",
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
resolveWithFullResponse : true,
jar: cookieJar
}
rp(options)
.then((response)=>{
console.log(response)
})
.catch((error)=>{
console.log(error)
})
}
If you're asking about including your jar which you filled with the cookies from the request to be sent to across you have to add jar: cookiejar as pasrt of your options object before sending it.
I have a created a local node server and when i'm printing User-Agent from req of GET request, like:
router.get('**', function (req, res, next) {
if (req.header('User-Agent')) {
console.log('user-agent = ', (req.header('User-Agent')))
res.end(req.header('User-Agent'));
} else {
res.send('Hello World!!!')
}
});
then it print different User-Agent for / path and /favicon.ico path for my One Plus device.
result:-
/ = Mozilla/5.0 (Linux; Android 8.0.0; ONEPLUS A3003 Build/OPR6.170623.013) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.109 Mobile Safari/537.36
/favicon.ico = Mozilla/5.0 (Linux; Android 8.0.0; Build/OPR6.170623.013) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.109 Mobile Safari/537.36
why these two different User-Agent is comming from same browser.
I'm testing this on Chrome browser.
I am having trouble setting a custom user-agent for my phantom page. I have searched for possible solutions but I seem to be missing some fundamental part of how this should be working because when I try to set my settings, my phantom just hangs and doesn't complete the request or move into the page.open method. Here is my code:
phantom.create().then(function(ph) {
ph.createPage().then(function(page) {
page.set('settings.userAgent', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.256');
page.open(req.cookies.website).then(function(status) {
page.property('content').then(function(content) {
res.send(content);
page.close();
ph.exit();
});
});
});
});
In case anyone is wondering, I solved it... I just needed to look more closely at the phantom npm documentation. Here is the solution if anyone else has the same problem:
phantom.create().then(function(ph) {
ph.createPage().then(function(page) {
page.setting('userAgent', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.256');
page.open(req.cookies.website).then(function(status) {
page.property('content').then(function(content) {
res.send(content);
page.close();
ph.exit();
});
});
});
});