Handling redirecting request in NodeJs - node.js

I am trying to get the data from the url: autotrader_url
Unfortunately I am not able to handle the redirect.
Here is my code so far.
var request = require('request');
var cheerio = require('cheerio');
request({followAllRedirects: true,url:"http://www.autotrader.com/cars-for-sale/showcase.xhtml?zip=94536&endYear=2017&Log=0&modelCode1=LEGACY&sortBy=derivedpriceDESC&startYear=1981&makeCode1=SUB&numRecords=25&searchRadius=25&mmt=%5BSUB%5BLEGACY%5B%5D%5D%5B%5D%5D&makeCodes=SUB"},
function (error, response, html) {
console.log(response)
if (!error && response.statusCode == 200) {
console.log("yo");
var $ = cheerio.load(html);
console.log($("title").text());
$('div.listing-title h2').each(function(i, element){
var a = $(this);
console.log(a.innerHTML);
});
}
});
What am i missing?

The followAllRedirects: true option will follow http redirects sent back from the server. It appears that they are not using an http redirect because when you visit that page in the browser it loads a page that says "We're searching for the car that you want" and that page does a client side redirect in the browser using javascript. To follow that kind of redirect you probably have to use something like phatomjs.
Alternatively using cheerio(maybe) or some combination of regex you could get the redirect url directly from the source and then make a second request to that url on your own once you have the right url.
<script type="text/javascript">
$(function() {
atc.create(atc.showcaseRedirect, {
destinationUrl: '/cars-for-sale/Subaru/Legacy/Fremont+CA-94536?endYear=2017&firstRecord=0&makeCode1=SUB&mmt=%5BSUB%5BLEGACY%5B%5D%5D%5B%5D%5D&modelCode1=LEGACY&searchRadius=25&showcaseOwnerId=68619541&startYear=1981&Log=0',
queryString: '',
....
}).init();
});
</script>
You just have to grab that destinationUrl. Now with all that said, this assumes you aren't breaking any of their terms of use so you should definitely look into that before you move forward.
I'm not sure if its a bug on their end or if they are trying to prevent people from scraping but you need to set a User-Agent header to get them to respond so add this to your request.
Here is a full working example:
var request = require('request');
var cheerio = require('cheerio');
var firstUrl = "http://www.autotrader.com/cars-for-sale/showcase.xhtml?zip=94536&endYear=2017&Log=0&modelCode1=LEGACY&sortBy=derivedpriceDESC&startYear=1981&makeCode1=SUB&numRecords=25&searchRadius=25&mmt=%5BSUB%5BLEGACY%5B%5D%5D%5B%5D%5D&makeCodes=SUB";
makeRequest(firstUrl, function(err, html) {
if(err) {
return console.log('There was a problem');
}
// get "redirect" url from page source
var re = new RegExp("destinationUrl\:[^\,\}]*");
var redirectUrl = 'http://www.autotrader.com' + html.match(re)[0].replace('destinationUrl: ', '').replace('\'', '');
console.log('redirectUrl', redirectUrl);
// make the second request and process the markup with cheerio
makeRequest(redirectUrl, processFinalMarkup);
});
function processFinalMarkup(err, html) {
var $ = cheerio.load(html);
console.log($("title").text());
$('div.listing-title h2').each(function(i, element){
var a = $(this);
console.log(a.innerHTML);
});
}
function makeRequest(url, callback) {
request({
// Their page requires a User-Agent to be set.
headers: {
'User-Agent': 'express'
},
followAllRedirects: true,
url: url
},
function (error, response, html) {
console.log(response.headers, response.statusCode);
if (!error && response.statusCode == 200) {
console.log("yo");
callback(null, html);
}
});
}

Related

How to always add prefix to url in Node.js request

I'm using the package "request" (https://www.npmjs.com/package/request) in Node.js. What I want to have is somehow like the "defaults" api, that I give a default url prefix to the request object, the url of any request which sent from my request object will be prefixed before sending. Is that possible? Thank you!
You can use request.defaults
const request = require('request');
const baseRequest = request.defaults({baseUrl : 'https://example.com/api/'}})
now use baseRequest object
Documentation Link-
Request.defaults
You can create a default request instance, in there config you baseUrl
var request = require('request');
var r = request.defaults({
baseUrl: 'https://example.com/api/',
})
r({
'url':'/end/point?test=true',
'method': "GET",
},function (error, response, body) {
if (!error && response.statusCode == 200) {
console.log(body);
}
}); // will fetch https://example.com/api/end/point?test=true

Node.JS make multiple POST requests untill response is empty

I'm making a POST request to API that has limited number of characters in response. In order to get full response I need to make multiple POST requests and then append them all to one file. I'm not that familiar with asynchronous programming so I can't think of a solution for my problem. Here is snippet of my code:
var request = require('request');
var fs = require('fs');
var options = {//options for POST request body};
request(options, function (error, response, body) {
if (!error && response.statusCode == 200) {
fs.appendFileSync("./response.json", JSON.stringify(body), 'utf8');
var resp = JSON.parse(fs.readFileSync('./response.json', 'utf8'));
options.offset = resp.parameters.length; //this is length of my data so far, next request will have this number in it and response will be new data offseted by this number
}
});
And after this request I need to make another one untill body.length is zero. So I guess what I need is to call request function from its own callback. How do I achieve this? Thanks!
Like JM-AGMS said, wrap the request function call in another function to have the callback of one request trigger the next request.
A recursive solution would look somewhat like this:
var request = require('request');
var fs = require('fs');
var options = {/*options for POST request body*/};
function loop(options) {
request(options, function (error, response, body) {
if (!error && response.statusCode == 200) {
if (body.length !== 0) {
fs.appendFileSync("./response.json", JSON.stringify(body), 'utf8');
var resp = JSON.parse(fs.readFileSync('./response.json', 'utf8'));
loop({ ...options, offset: resp.parameters.length });
}
}
});
}

Consuming a service API using nodejs + jsonwebtoken(jwt)

Hello guys i want to use node.js to retrieve the user authenticated from an api URL.
I retrieved the codeToken(jsonwebtoken) and put it in the header of my request in postman it works.
But when i go to the code i can't find a way to use node.js+express request, to retrieve the json response of the user that's logged in using all that.
i can't put the url of the API in the sake of the client.
here is my code guys but it's not the right one please advise
var request = require('request');
var options = {
url: '**********',//for the sake of client hidden
headers: {
tokenCode: '15288648455b20a04d5463e'
}
};
function callback(error, response, body) {
if (!error && response.statusCode == 200) {
var info = JSON.parse(body);
console.log(info);
}else
//console.log(response );
console.log(response.statusCode);
}
request(options, callback);
Headers in my POSTMAN (only tokenCode)

costom http Header can not get respect value

I am trying to write a server app that client sends an HTTP request with two custom headers targeturl and setheader.And the server gets the value from that.
And then server create a new request whit URL and headers get from this client.
var http = require('http')
var request = require('request')
const express = require('express')
var app = express();
app.get("/",(req,res)=>{
let targeturl = req.headers.targeturl
let headers1 = req.headers.setheaders
let headers2 = { 'x-shard': 'shopid=1354933;loc=138.252924,36.20482399999999' }
console.log(headers)
console.log(headers2)
getData(targeturl,headers1)//wrong response
getData(targeturl,headers2)//right response
res.send("ok")
})
function getData(url,header){
var options = {
url:url,
headers:header
}
var data=''
function callback(error, response, body) {
if (!error && response.statusCode == 200) {
data = JSON.parse(body);
console.log(data)
}
}
request(options, callback);
}
When I create a request from the client, this strange thing happened.
this is output:
{ 'x-shard': 'shopid=1354933;loc=138.252924,36.20482399999999' }
{ 'x-shard': 'shopid=1354933;loc=138.252924,36.20482399999999' }
//a right response
//anther wrong response
In my bash,headers1 and header2 are exactly with same chars.That confused me a lot, I really can not find what is the difference between them.Is there some different than cannot show on bash?

Insert data through SPARQL over HTTP POST request from node js

I am using 'request' module in my node app to POST data in ontology model which resides in a fuseki server. I am using the following code:
var request = require('request');
var querystring = require('querystring');
var myquery = querystring.stringify({update: "PREFIX test:<http://www.semanticweb.org/muhammad/ontologies/2017/2/untitled-ontology-14#> INSERT { ?KPIs test:hasValue 2009} WHERE { ?KPIs test:hasValue ?Newvalue}"});
request.post('http://localhost:3030/DS-1/sparql?'+myquery, function (error, response, body) {
if (!error && response.statusCode == 200) {
// Show the HTML for the Google homepage.
console.log('successful update');
console.log(body);
} else {
console.log(response.statusCode);
console.warn(error);
}
});
PS: When I use POSTMAN to send the Post request to insert data it works fine but from my node app, it doesn't. it shows error 'bad request 400'.
P.S: GET methods work fine from both POSTMAN and node app.
Problem Solved:
I was making mistake in the format of post request. The corrected format is given below.
var request = require('request');
var querystring = require('querystring');
var myquery2 = querystring.stringify({update: "PREFIX test:<http://www.semanticweb.org/muhammad/ontologies/2017/2/untitled-ontology-14#> INSERT { ?KPI_Variables test:hasValue_ROB1 2000} WHERE { ?KPI_Variables test:hasValue_ROB1 ?Newvalue FILTER(?KPI_Variables= test:Actual_Production_Time)}"});
request.post({headers: {'content-type' : 'application/x-www-form-urlencoded'},url:'http://localhost:3030/DS-1/?'+myquery2 }, function (error, response, body) {
if (!error && response.statusCode == 200) {
// Show the HTML for the Google homepage.
console.log('successful update');
console.log(body);
}
else
{
console.log(response.statusCode)
console.warn(error);
}
});
I was missing the 'headers' and 'url' elements in my request.post.
/DS-1/sparql is the query service.
INSERT is an update operation.
Try /DS-1/update
It is better to POST the update in the body of the request with a Content-type. ?update= may not work.

Resources