I'm brand new to Node JS (v.10.9.0) and wanted to make a simple web scraping tool that gets statistics and ranks for players on this page. No matter what I can't make it work with this website, I tried multiple request methods including http.request and https.request and have gotten every method working with 'http://www.google.com'. However every attempt for this specific website either gives me a 301 error or a socket hang up error. The location the 301 error gives me is the same link but with a '/' on the end and requesting it results in a socket hang up. I know the site runs on port 443. Do some sites just block node js, why are browsers able to connect but not stuff like this?
Please don't link me to any other threads I've seen every single one and none of them have helped
var request = require('request');
var options = {
method: "GET",
uri: 'https://www.smashboards.com',
rejectUnauthorized: false,
port: '443'
};
request(options, function (error, response, body) {
console.log('error:', error); // Print the error if one occurred
console.log('statusCode:'); // Print the response status code if a response was received
console.log('body:', body); // Print the HTML for the Google homepage.
});
Error:
error: { Error: socket hang up
at createHangUpError (_http_client.js:322:15)
at TLSSocket.socketOnEnd (_http_client.js:425:23)
at TLSSocket.emit (events.js:187:15)
at endReadableNT (_stream_readable.js:1085:12)
at process._tickCallback (internal/process/next_tick.js:63:19) code: 'ECONNRESET' }
EDIT:
Adding this to my options object fixed my problem
headers: {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'
}
OP Here
All I did was add:
headers: {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'
}
To my options Object and it's working perfectly.
New code:
var request = require('request');
var options = {
method: 'GET',
uri: 'https://www.smashboards.com',
rejectUnauthorized: false,
headers: {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'
}
};
request(options, function (error, response, body) {
console.log('error:', error); // Print the error if one occurred
console.log('statusCode:'); // Print the response status code if a response was received
console.log('body:', body); // Print the HTML for the Google homepage.
});
Thats 12+ hours I'm never getting back
Related
Using the following code in an async function and with the got library:
var { body: html, url } = await got('https://open.spotify.com/track/1iFwvtvFNbQVcIXKBiBRqD')
I was previously able to get a song page from the Spotify website in order to scrape its metadata with metascraper. However, it stopped working recently, now giving an error:
HTTPError: Response code 406 (Not Acceptable)
at Request.<anonymous> (/app/node_modules/got/dist/source/as-promise/index.js:117:42)
at processTicksAndRejections (node:internal/process/task_queues:96:5) {
code: 'ERR_NON_2XX_3XX_RESPONSE',
timings: {
start: 1641245342392,
socket: 1641245342394,
lookup: 1641245342396,
connect: 1641245342398,
secureConnect: 1641245342407,
upload: 1641245342408,
response: 1641245342538,
end: 1641245342542,
error: undefined,
abort: undefined,
phases: {
wait: 2,
dns: 2,
tcp: 2,
tls: 9,
request: 1,
firstByte: 130,
download: 4,
total: 150
Since I am not aware of any changes I made that would have impacted this, what changed and how do I fix it? It seems like the code still works for other URLs I've tried, so maybe Spotify itself changed something? Thanks.
I think you just need to add options in which to write headers. Although your code works for me without them.
const options = {
headers: {
Accept: "application/json",
"Content-Type": "application/json",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.54 Safari/537.36",
},
};
var { body: html, url } = await got("https://open.spotify.com/track/1iFwvtvFNbQVcIXKBiBRqD", options);
I am trying to fetch a particular website, and I already mimic all the request headers that Chrome sends and I am still getting a pending promise that never resolves.
Here is my current code and headers:
const fetch = require('node-fetch');
(async () => {
console.log('Starting fetch');
const fetchResponse = await fetch('https://www.g2a.com/rocket-league-pc-steam-key-global-i10000003107015', {
method: 'GET',
headers: {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36',
'Accept-Language': 'en-US;q=0.7,en;q=0.3',
'Accept-Encoding': 'gzip, deflate, br'
}
})
console.log('I never see this console.log: ', fetchResponse);
if(fetchResponse.ok){
console.log('ok');
}else {
console.log('not ok');
}
console.log('Leaving...');
})();
This is the console logs I can read:
Starting fetch
This is a pending promise: Promise { <pending> }
not ok
Leaving...
Is there something I can do here? I notice on similar questions that for this specific website, I only need to use Accept-Language header, I already tried that, but still the promise never gets resolved.
Also read on another question that they have security against Node.js requests, maybe I need to use another language?
You'll have a better time using async functions and await instead of then here.
I'm assuming your Node.js doesn't support top-level await, hence the last .then.
const fetch = require("node-fetch");
const headers = {
"User-Agent":
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36",
"Accept-Language": "en-US;q=0.7,en;q=0.3",
"Accept-Encoding": "gzip, deflate, br",
};
async function doFetch(url) {
console.log("Starting fetch");
const fetchResponse = await fetch(url, {
method: "GET",
headers,
});
console.log(fetchResponse);
if (!fetchResponse.ok) {
throw new Error("Response not OK");
}
const data = await fetchResponse.json();
return data;
}
doFetch("https://www.g2a.com/rocket-league-pc-steam-key-global-i10000003107015").then((data) => {
console.log("All done", data);
});
Here is my code:
(function getComments(offset) {
var options = {
url: path + songId + '?limit=' + step + '&offset=' + offset,
headers: {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36',
'Content-Type': 'application/x-www-form-urlencoded',
'Accept': '*/*',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.8'
},
proxy: '***.***.***.***:****',
};
Request.get(options, function (error, res, body) {
if (!error && res.statusCode === 200) {
var data = JSON.parse(body);
if (offset < data.total) {
setTimeout(function () {
console.log(offset);
getComments(offset);
}, Math.random() *2000 + 2000);
} else {
response.json(comments);
}
}
});
})(offset);
But my proxy didn't work, and I get an error when using Request.get() like message: "Invalid protocol: 125.123.143.186:"
Can anybody tell me how did that happen, and do I have a decent way to send a proxy request?
In this case, most likely the reason of "Invalid protocol" error is that the URL, which you set in options object looks like this: 125.123.143.186:/some/path. Check the path you are trying to hit, I'm pretty sure it is malformed, looks like you don't have a port after IP address. Due to the fact that URLs begin with the protocol and :// combination, the URL parser of Request treats that IP address as a protocol, fails to validate it and then you get this error message.
I am struggling to successfully make a request using request-promise npm on a site that requires a cookie to view or for the request to be successful.
Henceforth, I have looked into cookieJars in order to store all those that are given in the repsonse after the request has been done.
const rp = require("request-promise")
var cookieJar = rp.jar()
function grabcfToken(){
let token = ""
let options = {
url : 'https://www.off---white.com/en/GB',
method: "GET",
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
resolveWithFullResponse : true
}
rp(options)
.then((response)=>{
console.log(response)
})
.catch((error)=>{
console.log(error)
})
}
Can someone tell me why the request isn't successfully going through? How do I apply the cookies that I initially get before being timed out.
const rp = require("request-promise")
var cookieJar = rp.jar()
function grabcfToken(){
let token = ""
let options = {
url : 'https://www.off---white.com/en/GB',
method: "GET",
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
resolveWithFullResponse : true,
jar: cookieJar
}
rp(options)
.then((response)=>{
console.log(response)
})
.catch((error)=>{
console.log(error)
})
}
If you're asking about including your jar which you filled with the cookies from the request to be sent to across you have to add jar: cookiejar as pasrt of your options object before sending it.
I send a request through a proxy and always receive such a response
tunneling socket could not be established, cause=read ECONNRESET
or
tunneling socket could not be established, cause= socket hang up
My code
let settings = {
url: `url`,
headers: {
'Connection': 'keep-alive',
'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36"
},
method: 'POST',
proxy: `http://${ip}:${port}`,
strictSSL: false
}
request.request(settings, (err, response, body) => {
// err here
})
what am I doing wrong ?
Now this error : Error: Tunnel creation failed. Socket error: Error: read ECONNRESET
My code:
const request = require('request'),
proxyingAgent = require('proxying-agent');
;
let settings = {
url: url,
headers: {
'Connection': 'keep-alive',
'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36"
},
method: 'POST',
// proxy: `http://${obj.proxy[obj.proxyIdx]}`,
agent: proxyingAgent.create(`http://${obj.proxy[obj.proxyIdx]}`, url),
}
About your code, problem possibly lies in your settings object.
You need to use syntax like this:
let settings = {
url,
headers: {
'Connection': 'keep-alive',
'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36"
},
method: 'POST',
proxy: `http://${ip}:${port}`,
strictSSL: false
}
Here we use ES6 to make object short.
But also, you can establish proxy connection with npm package proxying agent.
Your code should look something like this:
const proxyingAgent = require('proxying-agent');
const fetch = require('node-fetch');
const host = <your host>;
const port = <port>;
const creds = {
login: 'username',
password: 'pass'
};
const port = <proxy port>;
const buildProxy = (url) => {
return {
agent: proxyingAgent.create(`http://${creds.login}:${creds.password}#${host}:${port}`, url)
};
};
//If you don't have credentials for proxy, you can rewrite function
const buildProxyWithoutCreds = (url) => {
return {
agent: proxyingAgent.create(`http://${host}:${port}`, url)
};
};
And than you can use it with your url and credentials. We'll use fetch package.
const proxyGetData = async (url, type) => {
try {
const proxyData = buildProxyWithoutCreds(url);
// Make request with proxy. Here we use promise based library node-fetch
let req = await fetch(url, proxyData);
if (req.status === 200) {
return await req[type]();
}
return false;
} catch (e) {
throw new Error(`Error during request: ${e.message}`);
}
};