Fetch working in HTML page but not in JS one [duplicate] - node.js

This question already has answers here:
CORS error even after setting Access-Control-Allow-Origin or other Access-Control-Allow-* headers on client side
(2 answers)
Closed 1 year ago.
Very simple HTML pages fetches me the JSON I want with a 200 status code:
<body>
<button id="login" name="login">login</button>
<script type="text/javascript">
document.getElementById('login').onclick = function () {
fetch('https://api.contonso.com/search/company',
{ method: 'GET' }).then(_ => console.log(_.status));
};
</script>
</body>
Which I launch that way :
// index.js
const fs = require('fs');
var http = require('http');
fs.readFile('./index.html', function (err, html) {
if (err) {
throw err;
}
http.createServer(function (request, response) {
response.writeHeader(200, { "Content-Type": "text/html" });
response.write(html);
response.end();
}).listen(8000);
});
So clicking in the HTML pages, fetch works with console.log printing 200 (OK).
Now consider I'm modifying code in index.js with the following one :
// index.js (modified)
const fetch = require('node-fetch');
fetch('https://api.contonso.com/search/company',
{ method: 'GET' }).then(_ => console.log(_.status));
Here console.log prints me 403 (Forbidden)
Could you please explain me what I'm doing wrong? Why it is working in HTML pages and not in JS one?
I'm developing a bot that does not use any frontend, I only need JS files.
Thanks,
In JS only I added the following headers (seen in the browser missing in JS), still the same error
headers: {
'Origin': 'https://localhost:8000',
'Accept': '*/*',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'en-US,en;q=0.9',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36'
}

The request from the browser is sending some headers like the user agent (that the library is by default setting to node-fetch)
You have to inspect your request response for the reason of the 403, and check the API documentation for further instructions.

Related

Axios POST data does not send in correct format to Express Server

Hi I'm running an express server that has this .post routed on / and using Formidable and express.json() as middleware.
Express Server
const formidable = require('express-formidable');
app.use(express.json());
app.use(formidable());
app.post('/test', function(req, res){
console.log(req.fields);
})
Using AJAX (No Issues)
When I send a POST request using AJAX like so:
$.ajax({
url:'http://localhost:3000/test',
type: "POST",
crossDomain: true,
dataType: "json",
data: {
"file" : "background.js"
},
success: async function (response) {
}
})
The server outputs:
{ file: 'background.js' }
The Problem
However, when I send the same POST request using AXIOS
var fUrl = 'http://localhost:3000/test';
var fHeader = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.88 Safari/537.36',
'content-type': 'application/x-www-form-urlencoded; charset=UTF-8'
};
var req = await axios({
method: "POST",
url: fUrl,
withCredentials: true,
data: {"file" : 'background.js'},
headers: fHeader
});
The server ouputs in the wrong format:
{ '{"file":"background.js"}': '' }
I suspect that the issue may be because of the content-type header, however when i change it to application/json, the request doesn't complete/timeout and awaits for an apparently infinite amount of time.
app.use(express.json());
app.use(formidable());
never use both at the same time.
Also that is not the way to send a file, but that would be another Q&A

Node axios returns empty status and curl return 200 for tiktok

Axios
I am sending the request to this image URL using Axios. It returns a status of 200
But for TikTok image URL, it returns empty status.
Curl
When I send the request from curl to the TikTok image URL, I received 200 status
Problem
Why Axios did not show any status for TikTok when the request is hit
Trying
I also set the headers but it's not working. You can send the request one by one both links in
the code
Code
'use strict';
const axios = require('axios');
const rp = require('request');
async function getStatus() {
try {
//tiktok link status empty
var link = encodeURI('https://p16-sign-sg.tiktokcdn.com/obj/tos-alisg-p-0037/213536d641064ff7a8c160164f7d289c_1610544424?x-expires=1610730000&x-signature=wdpLWkCwl%2BW4rA2BYYu2yLUVCi0%3D');
//other image link status 200
// var link = encodeURI('https://images.thebetwaygroup.com/umbraco/umbracobetway/media/6882235/logoheader-betway.png');
var result = await axios.get(link,{
headers: {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.80 Safari/537.36',
Referer: 'https://www.tiktok.com/',
Accept: 'application/json, text/plain, */*',
'Content-type' : 'image/png',
}
});
console.log(result)
} catch (error) {
console.error(error);
}
}
getStatus()

request nodejs gets unreadable data

I'm trying to scrape the html using library request on node.js. The response code is 200 and the data I get is unreadable. Here my code:
var request = require("request");
const options = {
uri: 'https://www.wikipedia.org',
encoding: 'utf-8',
headers: {
"Accept": "text/html,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3",
"charset": "utf-8",
"Accept-Encoding": "gzip, deflate, br",
"Accept-Language": "en-US,en;q=0.9",
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/78.0.3904.108 Chrome/78.0.3904.108 Safari/537.36"
}
};
request(options, function(error, response, body) {
console.log(body);
});
As you can see, I sent the request for html and utf-8 but got a large string like f��j���+���x��,�G�Y�l
My node version is v8.10.0 and the request version is 2.88.0.
Is something wrong with the code or I'am missing something??
Any hint to overtake this problem would be appreciate.
Updated Answer:
In response to your latest post:
The reason it is not working for Amazon is because the response is gzipped.. In order to decompress the gzip response, you simply need to add gzip: true to the options object you are using. This will work for both Amazon and Wikipedia:
const request = require('request');
const options = {
uri: "https://www.amazon.com",
gzip: true
}
request(options, function(error, response, body) {
if (error) throw error;
console.log(body);
});
Lastly, if you are wanting to scrape webpages like this, it is probably best to use a web scraping framework, like Puppeteer, since it is built for web scraping.
See here for Puppeteer GitHub.
Original Answer:
Since you are just grabbing the HTML from the main page, you do not have to specify charset, encoding, or Accept-Encoding..
const request = require('request');
const options = {
uri: 'https://www.wikipedia.org',
//encoding: 'utf-8',
headers: {
"Accept": "text/html,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3",
//"charset": "utf-8",
//"Accept-Encoding": "gzip, deflate, br",
"Accept-Language": "en-US,en;q=0.9",
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/78.0.3904.108 Chrome/78.0.3904.108 Safari/537.36"
}
};
request(options, function (error, response, body) {
if (error) throw error
console.log(body);
});
To take it a bit further... in this scenario, you don't need to specify headers at all...
const request = require('request');
request('https://www.wikipedia.org', function (error, response, body) {
if (error) throw error
console.log(body);
});
Thanks you the reply, when I used that to the Wikipedia page works properly, but when I use it to scrape another website like the amazon, got the same bad result
const request = require('request');
request('https://www.amazon.com', function (error, response, body) {
if (error) throw error
console.log(body);
});

Node server unable to retrive the value of react axios request header parameter

I need to access axios header authorization token in server side(Node), showing undefined. Please help..
Client side(React) request:
var config = {
headers: {
'cache-control':'no-cache',
'content-type': 'application/x-www-form-urlencoded',
'authorization' :'bearer '+Auth.getToken()
}
};
axios.get(ApiConfig.API_BASE+'api/admin/profile/', config).then(function(response) {
this.setState({status:'success', profile: response.data.data});
}).catch(function(response) {
console.log(response);
});
Server side(Node):
module.exports = (req, res, next) => {
console.log(req.headers.authorization);
if(!req.headers.authorization) {
return res.status(401).end();
}
};
Log showing undefined. I also console the entire header, but their output is:
{ host: 'localhost:8027',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:55.0) Gecko/20100101 Firefox/55.0',
accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'accept-language': 'en-US,en;q=0.5',
'accept-encoding': 'gzip, deflate',
'access-control-request-method': 'GET',
'access-control-request-headers': 'authorization,cache-control',
origin: 'http://localhost:3001',
connection: 'keep-alive' }
How do I retrieve the authorization token value?
Thank you.
I'm assuming you are using express. If so, instead of getting the header value as req.headers.authorization, try req.get('authorization').
http://expressjs.com/en/api.html#req.get
If you are making a cross-origin HTTP request, please make sure CORS has been enabled in your server. If you are using express cors middleware can be used.
I guess your problem here is that since CORS has not been enabled, your server will receive a OPTIONS request first, so the entire header you console is from the OPTIONS request not the GET request as you desired. You can use console.log(req.method) to verify. BTW req.headers.authorization is ok to receive the header.

Scraping Nodejs

I want to scrape page "https://www.ukr.net/ua/news/sport.html" with Nodejs.
I`m trying to make basic get request with 'request' npm module, here is example:
const inspect = require('eyespect').inspector();
const request = require('request');
const url = 'https://www.ukr.net/news/dat/sport/2/';
const options = {
method: 'get',
json: true,
url: url
};
request(options, (err, res, body) => {
if (err) {
inspect(err, 'error posting json');
return
}
const headers = res.headers;
const statusCode = res.statusCode;
inspect(headers, 'headers');
inspect(statusCode, 'statusCode');
inspect(body, 'body');
});
But in response body I only get
body: '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01
Transitional//EN">\n<html>\n<head>\n<META HTTP-EQUIV="expires"
CONTENT="Wed, 26 Feb 1997 08:21:57 GMT">\n<META HTTP-EQUIV=Refresh
CONTENT="10">\n<meta HTTP-EQUIV="Content-type" CONTENT="text/html;
charset=utf-8">\n<title>www.ukr.net</title>\n</head>\n<body>\n
Идет загрузка, подождите .....\n</body>\n</html>'
If I make get request from Postman, I get exactly what I need:
Please help me guys.
You might have been blocked by bot protection - this can be checked with curl.
curl -vL https://www.ukr.net/news/dat/sport/2/
curl seem to get the result and if curl is working then there is probably something missing in the request from node, a solution could be to mimic a browser of your choice.
For example - Here is an example of Chrome-like request taken from developer-tools:
deriving the following options for the request:
const options = {
method: 'get',
json: true,
url: url,
gzip: true,
headers: {
"Host": "www.ukr.net",
"Pragma": "no-cache",
"Cache-Control": "no-cache",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
"Accept-Encoding": "gzip, deflate, sdch, br",
"Accept-Language": "en-US,en;q=0.8"
}
};
If you have experience in jquery, there a library to access of the HTML, for example.
Markup example we'll be using:
<ul id="fruits">
<li class="apple">Apple</li>
<li class="orange">Orange</li>
<li class="pear">Pear</li>
</ul>
First you need to load in the HTML. This step in jQuery is implicit, since jQuery operates on the one, baked-in DOM. With Cheerio, we need to pass in the HTML document.
var cheerio = require('cheerio');
$ = cheerio.load('<ul id="fruits">...</ul>');
Selectors
$('ul .pear').attr('class')
probably you can make something like this.
request(options, (err, res, body) => {
var $ = cheerio.load(html);
})
https://github.com/cheeriojs/cheerio

Resources