I need to connect to a web page and return the status code of the page, which I've been able to achieve using http.request however the pages I need to request can take a long time, sometimes several minutes, so I'm always getting a socket hang up error.
I'm using the following code so far:
var reqPage = function(urlString, cb) {
// Resolve the URL
var path = url.parse(urlString);
var req = http.request({
host: path.hostname,
path: path.pathname,
port: 80,
method: 'GET'
});
req.on('end', function() {
cb.call(this, res);
});
req.on('error', function(e) {
winston.error(e.message);
});
};
What do I need to do to ensure that my application still attempts to connect to the page even if it's going to take a few minutes?
Use the request module and set the timeout option to an appropriate value (in milliseconds)
var request = require('request')
var url = 'http://www.google.com' // input your url here
// use a timeout value of 10 seconds
var timeoutInMilliseconds = 10*1000
var opts = {
url: url,
timeout: timeoutInMilliseconds
}
request(opts, function (err, res, body) {
if (err) {
console.dir(err)
return
}
var statusCode = res.statusCode
console.log('status code: ' + statusCode)
})
Add this if you don't want to use a higher level http client like request or superagent , then add this...
req.on("connection", function(socket){
socket.setTimeout((1000*60*5)); //5 mins
});
Related
So I want to use this: (taken from their API site -> node.js documentation)
https://github.com/blockchain/api-v1-client-node
Recieving payments:
https://github.com/blockchain/api-v1-client-node/blob/master/docs/Receive.md
var blockchain = require('blockchain.info');
var identifier = 'myidentifier';
var password = 'mypassword';
var myWallet = new blockchain.MyWallet(identifier, password);
var myBTCadress = '14Q3ufL1BUHtWskBKtsshVDATRY65TaJMB';
Ok, so the recieving part:
var receive = new blockchain.Receive( [confirmations: 1], ? ); // What do I need to put here?
Documentation says:
callbackURL: the url to which the callback should be sent (string)
I don't understand what URL it should go to?!
The callback URL should be the one that redirects back to your site. So setup a callback url with blockchain like...
https://www.yoursite.com/callback/blockchain
Assuming you are using something like express in your app make a route like so.
app.get('/callback/blockchain', function (req, res) {
// Stuff here
});
you will prob need to include
var https = require('https');
That way then you can set up your logic inside for example...
// Stuff here
var options = {
host : 'api.blockchain.info',
path : '/some/path/',
port : 443,
method : 'GET'
}
var request = https.request(options, function(response){
var body = ""
response.on('data', function(data) {
body += data;
});
response.on('end', function() {
res.send(JSON.parse(body));
});
});
request.on('error', function(e) {
console.log('Problem with request: ' + e.message);
});
request.end();
That will for example output you request in json on whatever page you have your app.get('whateverurl') set to.
I am trying to process Wikipedia articles, and want to receive a list of all Wikipedia articles. In order to do this I am frequently sending http requests to the Wikipedia API, which allows you to receive 500 titles at time and also returns an apcontinue string, which, when used in the following request, returns title starting from that string.
In order to do this, I am using the agentkeepalive module:
var http = require('http');
var Agent = require('agentkeepalive');
var keepaliveAgent = new Agent({
keepAlive: true,
maxSockets: 5,
timeout: 5000,
keepAliveTimeout: 3000
});
To send an http request to Wikipedia, I use the following code:
function wikipediaApiCall(params, callback) {
var options = {
host: 'en.wikipedia.org',
path: '/w/api.php?' + createParamString(params),
method: 'GET',
agent: keepaliveAgent
};
var callbackFunc = function(response) {
var err;
var str = '';
if (('' + response.statusCode).match(/^5\d\d$/)) {
err = new Error('Server error');
}
//another chunk of data has been recieved, so append it to `str`
response.on('data', function (chunk) {
str += chunk;
});
response.on('error', function (e) {
err = new Error('Request error');
});
response.on('timeout', function () {
err = new Error('Timeout');
response.abort();
callback(err);
});
response.on('end', function () {
var obj = JSON.parse(str);
if (obj.warnings) {
err = new Error('Request error');
}
callback(err, obj);
});
}
var req = http.request(options, callbackFunc);
req.setTimeout(5000);
req.on('error', function(err) {
callback(err, null);
return;
});
req.on('timeout', function () {
err = new Error('Timeout');
response.abort();
callback(err);
});
req.on('finish', function(){
console.log('ended');
});
req.end();
}
However, after sending between 16 and 20 request, I am not getting any response, but my request also does not time out.
Any ideas why this is happening?
Update
The request I send to Wikipedia contains the following parameters:
var params = {
list: 'allpages',
aplimit: limit,
apfrom: from,
continue: cont,
// apfilterredir: 'nonredirects'
};
Interestingly, after leaving out the nonredirects setting, I was able to send and receive up to 330 requests, but no more than that.
Update 2
I was able to register a finished event. It appears to be fired for the request that is failing as well. I modified the code accordingly.
Perhaps you need a bot flag to have higher API limits. Maybe there are too many requests in parallel; WMF recommendation is to make requests serially in case of such big tasks. Also, you should use the maxlag parameter with low values, per WMF API Etiquette.
I'm trying to catch ECONNREFUSED errors when using a HTTP client in node.js. I'm making requests like this:
var http = require('http');
var options = { host: 'localhost', port: '3301', path: '/', method: 'GET' };
http.request(options).on('response', function (res) {
// do some stuff
});
I can't figure out how to catch this error:
Error: connect ECONNREFUSED
at errnoException (net.js:614:11)
at Object.afterConnect [as oncomplete] (net.js:605:18)
If I do request.on('error', function () {});, it doesn't catch it. If I do it like this:
var req = request.on(etc)
req.on('error', function blah () {});
Then I get TypeError: Object false has no method 'on'.
Do I really have to do a top-level uncaught error thing to deal with this? At the moment whatever I do my whole process quits out.
Edit: I found some blog posts on how to do it by creating a connection object, calling request on that, and then binding to errors on the connection object, but doesn't that make the entire http.request() shortcut useless?
Any reason you're not using http://nodejs.org/docs/v0.6.5/api/http.html#http.request as your base? Try this:
var req = http.request(options, function(res) {
// Bind 'data', 'end' events here
});
req.on('error', function(error) {
// Error handling here
});
req.end();
Each call to http.request() returns its self.
So try it like this...
http.request(options.function(){}).on('error',function(){}).end();
I've got a solution for this, having tried all the suggestions on this (and many other) pages.
My client needs to detect a turnkey product that runs embedded windows. The client is served from a different machine to the turnkey.
The turnkey can be in 3 states:
turned off
booted into windows, but not running the turnkey app
running the turnkey app
My client sends a 'find the turnkey product' GET message to my nodejs/express service, which then tries to find the turnkey product via http.request. The behavior for each of the 3 use cases are;
timeout
ECONNREFUSED - because the windows embedded phase of the turnkey is
refusing connections.
normal response to request (happy day scenario)
The code below handles all 3 scenarios. The trick to catching the ECONNREFUSED event was learning that its handler binds to the socket event.
var http = require('http');
var express = require('express');
var url = require('url');
function find (req, res) {
var queryObj = url.parse(req.url, true).query;
var options = {
host: queryObj.ip, // client attaches ip address of turnkey to url.
port: 1234,
path: '/some/path',
}; // http get options
var badNews = function (e) {
console.log (e.name + ' error: ', e.message);
res.send({'ok': false, 'msg': e.message});
}; // sends failure messages to log and client
// instantiate http request object and fire it
var msg = http.request(options, function (response) {
var body = '';
response.on ('data', function(d) {
body += d;
}); // accumulate response chunks
response.on ('end', function () {
res.send({'ok': true, 'msg': body});
console.log('sent ok');
}); // done receiving, send reply to client
response.on('error', function (e) {
badNews(e);
}); // uh oh, send bad news to client
});
msg.on('socket', function(socket) {
socket.setTimeout(2000, function () { // set short timeout so discovery fails fast
var e = new Error ('Timeout connecting to ' + queryObj.ip));
e.name = 'Timeout';
badNews(e);
msg.abort(); // kill socket
});
socket.on('error', function (err) { // this catches ECONNREFUSED events
badNews(err);
msg.abort(); // kill socket
});
}); // handle connection events and errors
msg.on('error', function (e) { // happens when we abort
console.log(e);
});
msg.end();
}
For those not using DNS (you can also use request instead of get by simply replacing get with request like so: http.request({ ... })):
http.get({
host: '127.0.0.1',
port: 443,
path: '/books?author=spongebob',
auth: 'user:p#ssword#'
}, resp => {
let data;
resp.on('data', chunk => {
data += chunk;
});
resp.on('end', () => console.log(data));
}).on('error', err => console.log(err));
app.get('/', function(req, res){
var options = {
host: 'www.google.com'
};
http.get(options, function(http_res) {
http_res.on('data', function (chunk) {
res.send('BODY: ' + chunk);
});
res.end("");
});
});
I am trying to download google.com homepage, and reprint it, but I get an "Can't use mutable header APIs after sent." error
Anyone know why? or how to make http call?
Check out the example here on the node.js doc.
The method http.get is a convenience method, it handles a lot of basic stuff for a GET request, which usually has no body to it. Below is a sample of how to make a simple HTTP GET request.
var http = require("http");
var options = {
host: 'www.google.com'
};
http.get(options, function (http_res) {
// initialize the container for our data
var data = "";
// this event fires many times, each time collecting another piece of the response
http_res.on("data", function (chunk) {
// append this chunk to our growing `data` var
data += chunk;
});
// this event fires *one* time, after all the `data` events/chunks have been gathered
http_res.on("end", function () {
// you can use res.send instead of console.log to output via express
console.log(data);
});
});
I have a node.js script that continuously requests a page, sort of like a cron job.
However, after a few minutes Node starts to use a lot of CPU (up to 70%) and memory (up to 200mb).
What is wrong with my script?
function cron(path)
{
var http = require('http');
var site = http.createClient(443, 'www.website.com', true);
var request = site.request('GET', path, {'host': 'www.website.com'});
request.end();
request.on('response', function (response) {
setTimeout(function(){cron(path)},15000);
});
}
cron('/path/to/page');
request.on('response', function (response) {
setTimeout(function(){cron(path)},15000);
});
For every response you create a new cron job. Log your responses. If your getting more then 1 from your request then your exponantially creating more cron jobs.
Your creating a function() {} with a reference to path. So the entire scope state is kept. you want to free memory by adding this:
var site = null;
var request = null;
Your calling require("http") inside a function rather then outside in module scope. You only need to get http once so place at the top of your file in module scope.
var http = require('http');
var site = http.createClient(443, 'www.website.com', true);
function cron(path)
{
var request = site.request('GET', path, {'host': 'www.website.com'});
request.end();
var once = true;
request.on('response', doIt);
function doIt(response) {
if (!once) {
once = null;
doIt = function() {};
setTimeout(function(){cron(path)},15000);
}
});
site = null;
request = null;
}
cron('/path/to/page');
In addition to the tips from #Raynos, here's another. I find that recursive calls like this in long running processes make me a bit nervous so I'd err on the side of using setInterval instead. I'd maybe split the cron and the http behaviour apart in case you want to try and re-use that logic, although that'll depend on your context:
e.g. in node 0.4.7:
var https = require('https');
function poll(path)
{
https.get({
host: 'www.website.com',
port: 443,
path: path
}, function(res) {
console.log("Got response: " + res.statusCode);
}).on('error', function(e) {
console.log("Got error: " + e.message);
});
}
function cron(path)
{
return setInterval(function(){
poll(path);
},15000);
}
var intervalId = cron('/path/to/page'); // keep in case you need to use clearInterval