twitter.com Fetch NodeJS 404 Reply - node.js

I'm always getting a 404 reply when querying twitter.com (not the API, the homepage), I've used the exact same query than Edge is using.
const http2 = require('node:http2');
const fs = require('node:fs');
const client = http2.connect('https://twitter.com', {});
client.on('error', (err) => console.error(err));
const req = client.request({
':path': '/',
':method': 'GET',
':authority': `twitter.com`,
accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'accept-encoding': 'gzip, deflate, br',
'accept-language': 'en-US',
'cache-control': 'no-cache',
dnt: '1',
pragma: 'no-cache',
referer: 'https://twitter.com/',
"sec-ch-ua": "\" Not A;Brand\";v=\"99\", \"Chromium\";v=\"101\", \"Microsoft Edge\";v=\"101\"",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "\"Windows\"",
"sec-fetch-dest": "document",
"sec-fetch-mode": "navigate",
"sec-fetch-site": "same-origin",
"sec-fetch-user": "?1",
"sec-gpc": "1",
"upgrade-insecure-requests": "1",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.41 Safari/537.36 Edg/101.0.1210.32"
});
req.on('response', (headers, flags) => {
for (const name in headers) {
console.log(`${name}: ${headers[name]}`);
}
});
req.setEncoding('utf8');
let data = '';
req.on('data', (chunk) => { data += chunk; });
req.on('end', () => {
//console.log(`\n${data}`);
client.close();
});
req.end();
Sorry to ask but I'm stuck on this for hours and I simply want to retrieve the homepage of twitter.com (or other websites) but I keep getting this 404 while the browser returns 200.
If I do a simple wget https://twitter.com then it's all good, no 404 message, so it has to do with NodeJS way I'm doing I guess, I do not wish to use a NPM package but simply understand what is the problem here.
Thank you

Related

Web scraping using fetch - promise doesn't resolve

I am trying to fetch a particular website, and I already mimic all the request headers that Chrome sends and I am still getting a pending promise that never resolves.
Here is my current code and headers:
const fetch = require('node-fetch');
(async () => {
console.log('Starting fetch');
const fetchResponse = await fetch('https://www.g2a.com/rocket-league-pc-steam-key-global-i10000003107015', {
method: 'GET',
headers: {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36',
'Accept-Language': 'en-US;q=0.7,en;q=0.3',
'Accept-Encoding': 'gzip, deflate, br'
}
})
console.log('I never see this console.log: ', fetchResponse);
if(fetchResponse.ok){
console.log('ok');
}else {
console.log('not ok');
}
console.log('Leaving...');
})();
This is the console logs I can read:
Starting fetch
This is a pending promise: Promise { <pending> }
not ok
Leaving...
Is there something I can do here? I notice on similar questions that for this specific website, I only need to use Accept-Language header, I already tried that, but still the promise never gets resolved.
Also read on another question that they have security against Node.js requests, maybe I need to use another language?
You'll have a better time using async functions and await instead of then here.
I'm assuming your Node.js doesn't support top-level await, hence the last .then.
const fetch = require("node-fetch");
const headers = {
"User-Agent":
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36",
"Accept-Language": "en-US;q=0.7,en;q=0.3",
"Accept-Encoding": "gzip, deflate, br",
};
async function doFetch(url) {
console.log("Starting fetch");
const fetchResponse = await fetch(url, {
method: "GET",
headers,
});
console.log(fetchResponse);
if (!fetchResponse.ok) {
throw new Error("Response not OK");
}
const data = await fetchResponse.json();
return data;
}
doFetch("https://www.g2a.com/rocket-league-pc-steam-key-global-i10000003107015").then((data) => {
console.log("All done", data);
});

How do I set multiple custom HTTP headers in puppeteer?

I am trying to login using puppeteer at https://kith.com/account/login?return_url=%2Faccount
When I login and solve the captcha with audio, it detects me as a bot, so I am trying to change the request headers to see if that helps but cannot find anything on how to change them.
I found this, but it only shows 1 header:
await page.setRequestInterception(true)
page.on('request', (request) => {
const headers = request.headers();
headers['X-Just-Must-Be-Request-In-All-Requests'] = '1';
request.continue({
headers
});
});
You are able to set multiple HTTP headers with the dedicated puppeteer method: page.setExtraHTTPHeaders as well.
E.g.:
await page.setExtraHTTPHeaders({
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36',
'upgrade-insecure-requests': '1',
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
'accept-encoding': 'gzip, deflate, br',
'accept-language': 'en-US,en;q=0.9,en;q=0.8'
})
await page.goto('...')
header is array you can add many as you want
page.on('request', (request) => {
const headers = request.headers();
headers['X-Just-Must-Be-Request-In-All-Requests'] = '1';
headers['foo'] = 'bar';
headers['foo2'] = 'bar2';
request.continue({
headers
});
});

log negociated protocol and cipher suite when using https module in nodeJs

Introduction
I use the native https module of node.js to make a get request on a website.
The website only give the good document when you use some specific cipher and tls version.
A quick draft of my code is like this
var ciphers = ["RSA-PSK-AES256-CBC-SHA384","RSA-PSK-AES256-CBC-SHA"].join(":");
const options = {
host:'www.example.mg',
port:'443',
minVersion: 'TLSv1.2',
ciphers:ciphers,
path : '/',
agent: agent,
headers: {
'Host': 'www.example.mg',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language': 'fr,fr-FR;q=0.8,en-US;q=0.5,en;q=0.3',
'Accept-Encoding': 'gzip, deflate, br',
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1'
}
}
https.get(options,(res) =>{
console.log('statusCode:', res.statusCode);
console.log('headers:', res.headers);
res.on('data', (d) => {
process.stdout.write(res.headers["content-encoding"] == "gzip" ? zlib.gunzipSync(d) : d);
});
}).on('error', (e) => {
console.error(e);
});
The point of the question
I want to know what protocol version and cipher were finally negociated during the handshake. I want to log those informations like in the screenshoot but that was done in Java HttpClient. Now i need to do it in Node.js.
I already tried using NODE_DEBUG='tls,https' but the informations logged were not enough

NodeJs request.get() function not working while the url is accessible from the browser

I am using the request npm module.I want to retrieve an image from a url. The request.get(url) function is returning me a '400 Bad Request', whereas the image is accessible from the browser.
The url i am hitting is : http://indiatribune.com/wp-content/uploads/2017/09/health.jpg
You could try to add some headers:
const request = require('request');
request.get({
url: 'http://indiatribune.com/wp-content/uploads/2017/09/health.jpg',
headers: {
Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'en-GB,en;q=0.8,en-US;q=0.6,hu;q=0.4',
'Cache-Control': 'max-age=0',
Connection: 'keep-alive',
Host: 'indiatribune.com',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36',
},
}, (err, response, data) => {
console.log(response, data);
});
The User-Agent seems to be enough.
Use download module . It's pretty simple.
const fs = require('fs');
const download = require('download');
download('http://indiatribune.com/wp-content/uploads/2017/09/health.jpg').pipe(fs.createWriteStream('foo.jpg'));

Aurelia http-client/http-fetch not sending headers

Tried multiple approaches to send custom-headers via Aurelia-http-client and Aurelia-Fetch-client to pass Headers in the get/post requests that I am making, but in the actual request, the headers are not being passed
approach 1
var client = new HttpClient()
client.createRequest('/api/information/save')
.asPost()
.withBaseUrl('http://10.0.0.13:3000')
.withHeader("X-auth-code", "abc")
.send()
approach 2
var client = new HttpClient()
.configure(x => {
x.withBaseUrl('http://10.0.0.13:3000');
x.withCredentials(true);
x.withHeader('Content-Type', 'application/json; charset=utf-8');
x.withHeader('x-client-code', 'abc');
});
Approach 3
this.http.configure(config => {
config
.withDefaults({
credentials: 'same-origin',
headers: {
"Content-Type": "application/json",
"x-client-code": "abc",
}
})
.useStandardConfiguration()
.withInterceptor({
request(request) {
request.headers.append("x-client-code","abc");
console.log(`${request.headers}`);
return request; // you can return a modified Request, or you can short-circuit the request by returning a Response
},
response(response) {
console.log(`Received ${response.status} ${response.url}`);
return response; // you can return a modified Response
}
});
})
But all of them lead to the same error
{ host: '10.0.0.13:3000',
connection: 'keep-alive',
'access-control-request-method': 'POST',
origin: 'http://localhost:9000',
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.104 Safari/537.36',
'access-control-request-headers': 'content-type',
accept: '*/*',
referer: 'http://localhost:9000/',
'accept-encoding': 'gzip, deflate',
'accept-language': 'en-GB,en-US;q=0.8,en;q=0.6' }
At the end we are unbable to pass the headers.
it's a security against cross-site scripting (and it's super annoying) #see : Cors Access-Control-Allow-Headers wildcard being ignored?

Resources