I want to get the flights table for every city from a page but befor that i have to set search fields and hit the button and befor that i have to import captcha at the first page. The web site designed with .Net. I import the captcha by my self but i want to do the rest with nodejs.
This is what i'm doing. first, import the captcha and then from loaded page and by looking at the firebug, set request header and request url and then send using http.request with GET method to scrape the page with nodejs again. after that i get viewstate of the page and again usign firebug, set another request header and "post data" and request url and send them using http.request to scrape the final page. This information that i set using firebug, are fixed; i mean, urls, header options and post data. what i have to change is city name in post data.
But i get a page with empty table. if is possible, What should i do? (sorry for my bad english :) )
This is the url (in persian): http://sepehr.iranhrc.ir.
Thanks in advance.
var http = require('follow-redirects').http;
var querystring = require('querystring');
var cheerio = require('cheerio');
var dataToAttach = {
'ScriptManager1': 'uplFlightSearch|btnSubmit37756070715319',
'__ASYNCPOST': true,
'__EVENTARGUMENT': '',
'__EVENTTARGET': '',
'__LASTFOCUS': '',
'__VIEWSTATE': '',
'__VIEWSTATEGENERATOR': 'E4CF65F9',
'btnSubmit37756070715319': '?????',
'dplFrom': 'THR',
'dplReservationRouteType': 'RoundTrip_FixedDate',
'dplTo': '0',
'dplFlightAdults': '1',
'dplFlightChilds': '0',
'dplFlightInfants': '0',
'txtCountUp': '00:26',
'txtDepartureDate': '1394/04/02',
'txtReturningDate': '1394/04/04'
};
var flightsSearchPageROs = {
hostname: 'sepehr.iranhrc.ir',
path: '/Systems/FA/Reservation/Flight_NewReservation_Search.aspx?qry=sbV7wBdq4B7yEk1yv0OpvmofQkdkbWH49Wjk6UIMGiw95zDjDGo0/ssWJH8Wjv1D',
method: 'GET',
headers:{
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.5',
'Connection': 'keep-alive',
'Cache-Control': 'no-cache',
'Cookie': 'ASP.NET_SessionId=2iexj4pfxld4mdilfwttka2q;',
'Content-Type': 'text/html; charset=utf-8',
'Host': 'sepehr.iranhrc.ir',
'Referer': 'sepehr.iranhrc.ir'
}
};
var resultsPageROs = {
hostname: 'sepehr.iranhrc.ir',
path: '/Systems/FA/Reservation/Flight_NewReservation_Search.aspx?action=display&rnd=2378726045210585',
method: 'POST',
headers:{
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.5',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
'Cookie': 'ASP.NET_SessionId=o1ipad335qahuaahc25ngalr;',
'Content-Length': '',
'Content-Type': 'application/x-www-form-urlencoded; charset=utf-8',
'Referer': 'http://sepehr.iranhrc.ir/Systems/FA/Reservation/Flight_NewReservation_Search.aspx',
'Host': 'sepehr.iranhrc.ir',
'Pragma': 'no-cache',
'X-MicrosoftAjax': 'Delta=true',
'X-Requested-With': 'XMLHttpRequest'
}
};
var flightsSearchPage = http.request(flightsSearchPageROs, function(response{
var dataStream = '';
var htmlCode = '';
var date = '';
response.on('data', function(chunk){
dataStream += chunk;
});
response.on('end', function(){
htmlCode = cheerio.load(dataStream);
secondDataToAttach.__VIEWSTATE = htmlCode("__VIEWSTATE").val();
resultsPageROs.headers['Content-Length'] = querystring.stringify(dataToAttach).length;
resultsPageRequest();
});
});
flightsSearchPage.on('error', function(e){console.log("Error0: " + e.message);});
flightsSearchPage.end();
function resultsPageRequest(){
var changingCitiesBoxResponse = http.request(resultsPageROs, function(response){
response.setEncoding('utf8');
var dataStream = '';
var htmlCode = '';
response.on('data', function(chunk){
dataStream += chunk;
});
response.on('end', function(){
htmlCode = cheerio.load(dataStream);
console.log(htmlCode.html());
});
console.log('STATUS: ' + response.statusCode);
});
changingCitiesBoxResponse.on('error', function(e){console.log("Error1: " + e.message);});
changingCitiesBoxResponse.end(querystring.stringify(dataToAttach));
}
EDIT
One thing that i forgot to mention it was that, this is done in PHP with Curl and i looked at the code in curl part and done exactly like that with node and http.request. PHP returns correct answer but mine not.
And I used phantom as well.This is the code:
var url = "http://sepehr.iranhrc.ir/Systems/FA/Reservation/Flight_NewReservation_Search.aspx?action=display&rnd=4565721642440773";
var settings = {
operation: "POST",
encoding: "utf8",
weak: false,
headers: {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.5',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
'Cookie': 'ASP.NET_SessionId=2iexj4pfxld4mdilfwttka2q;',
'Content-Length': '',
'Content-Type': 'application/x-www-form-urlencoded; charset=utf-8',
'Referer': 'http://sepehr.iranhrc.ir/Systems/FA/Reservation/Flight_NewReservation_Search.aspx?qry=sbV7wBdq4B7yEk1yv0OpvmofQkdkbWH49Wjk6UIMGiw95zDjDGo0/ssWJH8Wjv1D',
'Host': 'sepehr.iranhrc.ir',
'Pragma': 'no-cache',
'X-MicrosoftAjax': 'Delta=true',
'X-Requested-With': 'XMLHttpRequest'
},
data: querystring.stringify(secondDataToAttach)
};
phantom.create(function (ph){
ph.createPage(function (page){
page.open(url2, settings, function(status){
console.log(status);
page.evaluate(function(){ return document.body.innerHTML; }, function(result){
console.log('Content ' + result);
ph.exit();
});
});
});
}, {dnodeOpts: {weak: false}
});
But i didn't get the answer.
And yes, it uses AJAX to send the request. According to Firebug:
The first request is sending a post data and the second one, i think is a redirect that gives the link to the result page(i used that link but no luck) and the third is the result page with the flight details that i can't get.
May be i used them wrong. How can i use these Three request to get flights table?
I think what's happening is that the page you want the results from is loading them through a separate AJAX request, and a regular get request isn't going to get that. You'll need to either figure out what that separate request is and do it yourself, or scrape the page with something like PhantomJS that can execute the javascript for the page.
First though, go to the page on your browser regularly and watch the network tab and locate that request that loads in the data.
Related
I'm facing an issue trying to connect an flutter application with my nodejs backend with express-session. In postman the response header includes a "Set-Cookie"-Header, but the flutter headers with http.post(...) do not: headers: {content-length: 113, content-type: application/json; charset=utf-8}.
I need a cookie to keep the authenticated session with passport. Any ideas how to fix it?
Flutter headers:
host: '127.0.0.1:3000', connection: 'keep-alive', 'content-length': '57', 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.75 Safari/537.36', 'content-type': 'application/json; charset=utf-8', accept: '*/*', origin: 'http://localhost:51879', 'sec-fetch-site': 'cross-site', 'sec-fetch-mode': 'cors', 'sec-fetch-dest': 'empty', referer: 'http://localhost:51879/', 'accept-encoding': 'gzip, deflate, br', 'accept-language': 'de-DE,de;q=0.9,en-US;q=0.8,en;q=0.7'
Postman Headers: 'content-type': 'application/json', accept: '*/*', 'postman-token': '7c79280d-****-****-a985-c01395e50e08', host: 'localhost:3000', 'accept-encoding': 'gzip, deflate, br', connection: 'keep-alive', 'content-length': '66'
Recommend you to use dio library for HTTP calls.
import 'package:dio/dio.dart';
class ApiProvider {
Dio _dio;
String aToken = '';
final BaseOptions options = new BaseOptions(
// base url to backend server
baseUrl: 'http://a.b.c.d:port/',
connectTimeout: 15000,
receiveTimeout: 13000,
);
static final ApiProvider _instance = ApiProvider._internal();
factory ApiProvider() => _instance;
ApiProvider._internal() {
_dio = Dio(options);
_dio.interceptors.add(InterceptorsWrapper(
onRequest:(Options options) async {
// to prevent other request enter this interceptor,
// use a new Dio(to avoid dead lock) instance to request token.
_dio.interceptors.requestLock.lock();
// set the cookie to headers
options.headers["cookie"] = aToken;
_dio.interceptors.requestLock.unlock();
return options; // continue
}
));
}
Future login() async {
final request = {
"userName": "",
"password": "",
"token": ""
};
final response = await _dio.post('/login', data: request, options: Options(
followRedirects: false,
validateStatus: (status) { return status < 500; }
));
//get cooking from response
final cookies = response.headers.map['set-cookie'];
if (cookies.isNotEmpty && cookies.length == 2) {
// it depends on how your server sending cookie
aToken = cookies[1].split(';')[0];
}
}
/// if we call this function without cookie then it will throw 500 err.
Future getSomething() async {
final response = await _dio.post('/something');
}
}
Introduction
I use the native https module of node.js to make a get request on a website.
The website only give the good document when you use some specific cipher and tls version.
A quick draft of my code is like this
var ciphers = ["RSA-PSK-AES256-CBC-SHA384","RSA-PSK-AES256-CBC-SHA"].join(":");
const options = {
host:'www.example.mg',
port:'443',
minVersion: 'TLSv1.2',
ciphers:ciphers,
path : '/',
agent: agent,
headers: {
'Host': 'www.example.mg',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language': 'fr,fr-FR;q=0.8,en-US;q=0.5,en;q=0.3',
'Accept-Encoding': 'gzip, deflate, br',
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1'
}
}
https.get(options,(res) =>{
console.log('statusCode:', res.statusCode);
console.log('headers:', res.headers);
res.on('data', (d) => {
process.stdout.write(res.headers["content-encoding"] == "gzip" ? zlib.gunzipSync(d) : d);
});
}).on('error', (e) => {
console.error(e);
});
The point of the question
I want to know what protocol version and cipher were finally negociated during the handshake. I want to log those informations like in the screenshoot but that was done in Java HttpClient. Now i need to do it in Node.js.
I already tried using NODE_DEBUG='tls,https' but the informations logged were not enough
Tried multiple approaches to send custom-headers via Aurelia-http-client and Aurelia-Fetch-client to pass Headers in the get/post requests that I am making, but in the actual request, the headers are not being passed
approach 1
var client = new HttpClient()
client.createRequest('/api/information/save')
.asPost()
.withBaseUrl('http://10.0.0.13:3000')
.withHeader("X-auth-code", "abc")
.send()
approach 2
var client = new HttpClient()
.configure(x => {
x.withBaseUrl('http://10.0.0.13:3000');
x.withCredentials(true);
x.withHeader('Content-Type', 'application/json; charset=utf-8');
x.withHeader('x-client-code', 'abc');
});
Approach 3
this.http.configure(config => {
config
.withDefaults({
credentials: 'same-origin',
headers: {
"Content-Type": "application/json",
"x-client-code": "abc",
}
})
.useStandardConfiguration()
.withInterceptor({
request(request) {
request.headers.append("x-client-code","abc");
console.log(`${request.headers}`);
return request; // you can return a modified Request, or you can short-circuit the request by returning a Response
},
response(response) {
console.log(`Received ${response.status} ${response.url}`);
return response; // you can return a modified Response
}
});
})
But all of them lead to the same error
{ host: '10.0.0.13:3000',
connection: 'keep-alive',
'access-control-request-method': 'POST',
origin: 'http://localhost:9000',
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.104 Safari/537.36',
'access-control-request-headers': 'content-type',
accept: '*/*',
referer: 'http://localhost:9000/',
'accept-encoding': 'gzip, deflate',
'accept-language': 'en-GB,en-US;q=0.8,en;q=0.6' }
At the end we are unbable to pass the headers.
it's a security against cross-site scripting (and it's super annoying) #see : Cors Access-Control-Allow-Headers wildcard being ignored?
I used to send requests using Postman Interceptor. This is how I handled the headers and body of the request:
You can try it by yourself. You can see that once you turn on "interception mode", you get a different response than "without" it.
Now, I want to send the same request, but by using 'HTTPS module in Node.js.
I followed the following pattern:
var https = require('https');
var querystring = require('querystring');
var post_data = querystring.stringify({
hid_last: "SMITH",
hid_first: "JOHN",
__RequestVerificationToken: "EiO369xBXRY9sHV/x26RNwlMzWjM9sR/mNlO9p9tor0PcY0j3dRItKH8XeljXmTfFWT0vQ1DYBzlGpLtnBBqEcOB51E9lh6wrEQbtMLUNOXpKKR3RzFqGc9inDP+OBIyD7s9fh9aMAypCHFCNFatUkx666nf7NOMHHKfiJKhfxc=",
hid_max_rows: 20,
hid_page: 1,
hid_SearchType: 'PARTYNAME'
});
// An object of options to indicate where to post to
var post_options = {
host: 'a836-acris.nyc.gov',
path: '/DS/DocumentSearch/PartyNameResult',
method: 'POST',
headers: {
'Content-Type': 'application/x-www-form-urlencoded',
'Origin': 'https://a836-acris.nyc.gov',
'Referer': "https://a836-acris.nyc.gov/DS/DocumentSearch/PartyName",
'Upgrade-Insecure-Requests': 1,
'User-Agent': "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36",
'Content-Length': Buffer.byteLength(post_data),
'Cookie': '_ga=GA1.2.1526584332.1483281720; WT_FPC=id=2fb6833e-6ae6-4529-b84a-4a1c61f24978:lv=1483256520738:ss=1483256520738',
}
};
// Set up the request
var post_req = https.request(post_options, function(res) {
res.setEncoding('utf8');
res.on('data', function (chunk) {
console.log('Response: ' + chunk);
});
});
// post the data
post_req.write(post_data);
post_req.end();
The only thing missing is the "interceptor" issue. When I use this code now, I get the same response I used to get without using 'interceptor' mode in Postman.
My question is how to "convert" the "interceptor mode" in Postman to 'HTTPS module in node.js?
Edit: Code is typescript, but very similar to JS, so I hope thats not a problem ;)
I try to make an HTTPS request but only getting "read ECONNRESET" after some timeout in the error event.
To encode post data I use require('querystring'); and it works fine, data is exactly what the browser sends.
var postData = querystring.stringify(data);
var postOptions = {
host: 'my.host.com',
port: 443,
path: '/openid/loginsubmit',
method: 'POST',
headers: {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'en-US,en;q=0.5',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:26.0) Gecko/20100101 Firefox/26.0',
'Content-Type': 'application/x-www-form-urlencoded',
'Referer':' https://my.host.com/openid/login',
'Content-Length': postData.length // 157 in web
},
rejectUnauthorized: false
};
I took all the headers I tracked with Fiddler to be sure there is no reason to reject my request on serverside.
Then I start try to send the request using:
var resString = '';
var postReq:any = https.request(postOptions, (httpRes:any) => {
console.log("statusCode: ", httpRes.statusCode);
console.log("headers: ", httpRes.headers);
httpRes.on('data', function (chunk) {
resString += chunk;
console.log('Response: ' + chunk);
});
httpRes.on('end', function () {
res.send(resString);
});
});
postReq.on('error', (err) => {
console.error('ERROR failed to login into website');
res.send(err.message);
});
postReq.write(postData); // even tried to add 'utf8' as second parameter
postReq.end();
Now all I get is an error after some timeout with the message read ECONNRESET
If one could help me with an example (maybe on some arbitrary https host) where an https post request works it would help much.
Or are there some misstakes in it? Maybe I have to provide some server cert somewhere to encrypt my post data?