Having a problem with making a lot of requests with method 'HEAD'.
I've made async.queue set to 20, and timeout to 3000ms.
Anyway when i run:
I see 10-15 success, than some timouts with some more success, an hangs... nothing happening further.
If i remove timeout i have about 10 success and hang...
And i dont get the error message neither.
The Code of request:
function getHeader(link)
{
var correctUrl = url.parse(link);
var options = {method: 'HEAD', host: correctUrl.hostname, port: 80, path: correctUrl.pathname};
var req = http.request(options, function(res) {
if(res.statusCode == 404 || res.statusCode == 500) return;
var x = {
loc : link
};
if(typeof(res.headers['last-modified']) != "undefined")
{
x.lastmod = dateConverter(res.headers['last-modified']);
console.log("Added lastmodify: " + x.lastmod);
}
console.log(res.headers);
parser.allObjects.push(x);
});
req.setTimeout(3000, function() {
console.log("Timeout reached. Link:" + link);
req.abort();
});
req.on('error', function (e) {
console.log('problem with request: ' + e.message);
});
req.end();
}
And the queue is here:
var queue = async.queue(function (href, callback) {
getHeader(href,function(err){
if(err) return callback(err);
return callback();
});
}, parser.serverMight); // this set to 20 at the mom (decreased from 50)
queue.drain = function() {
formXml(null, parser.allObjects);
};
queue.push(toRequest, function(err) {
if(err) console.log(err);
});
Any help is highly appreciated, thanks.
Heh, found myself. Maybe this may help someone.
So the mistake was very simple:
I didn't callback from the getHeader function, i just used return. That's why the queue couldn't start the next round.
Httpreq takes less space, so i i'll let it stay.
Here is how the correct code look:
function getHeader(link, callback)
{
httpreq.get(link, function(err, res) {
if(err) return callback(err);
if(res.statusCode == 404 || res.statusCode == 500)
{
parser.allHrefs.remove(parser.allHrefs.indexOf(link));
console.log("Faced status code 404 || 500. url deleted: " + link);
return callback(null);
}
//collect header-info
var x = { loc : link };
if(typeof(res.headers['last-modified']) != "undefined")
x.lastmod = dateConverter(res.headers['last-modified']);
console.log("Success adding header:" + x.loc);
parser.allObjects.push(x);
return callback(null);
});
}
p.s.: somewhy the 'httpreq' (requesting full request body) is making this faster, than 'http' (requesting HEAD)...
Related
Request: https://www.npmjs.com/package/request
I am having trouble while requesting a huge amount of sites (10000 at once) with the request module in a queue. It works for say 250 or some more list. However, when I am providing a csv of anything close to 300 or more the script it just hangs and the q.drain never really fires.
Code:
program.command('sanitizedata <file> <outfile>').description('Sanitize Data').action(( file, outfile ) => {
if(file !== '' && outfile != '') {
var request = require("request");
var parse = require('url-parse');
csv({noheader:false, trim:true})
.fromFile(file)
.on('end_parsed', function(SitesArray) {
var possibleUrls = [];
var q = async.queue(function (task, done) {
var parsed = parse(task.url);
if(parsed.protocol == '') {
task.url = 'http://' + task.url;
task.host = parsed.pathname;
}
var options = {
url: `${task.url}`,
headers: {
'User-Agent': 'request',
'Host': `${task.host}`
}
};
request(options , function(err, res, body) {
if (err) return done(err);
if (res.statusCode != 200) return done(res.statusCode);
done(res);
});
}, 5);
SitesArray.map(function( site, index ) {
q.push(site, function( result ) {
if( result.statusCode == 200 ) {
delete site['host'];
console.log('\x1b[42m\x1b[37m%s\x1b[0m \x1b[46m\x1b[37m%s\x1b[0m', `Assert Success:${site.url}`, `${index}`);
possibleUrls.insert( site.index, site );
} else {
console.log( '\n\r' + result )
return false;
}
});
});
q.drain = function() {
var csvOutput = toCSV( possibleUrls );
console.log('draining')
fs.outputFile(`./data/sanitizedata/${outfile}`, csvOutput, function(err) {
if(err) {
return console.log(err);
}
console.log(`The file ${outfile} was saved!`);
process.exit();
});
console.log('all items have been processed');
}
});
}}
);
Somewhere near the last request it shows an ETIMEDOUT (connection timed out) error. I have data in csv format..
index,url
...
...
9993,supercircusspectacular.com
9994,theleadershipnetwork.com
9995,wizardofozthemusical.com
9996,allnews365.com
9997,blog.vendhq.com
9998,businesspropertynetwork.co.uk
9999,dashboardjunkie.com
I was able to use a return in front of done(res); in order to handle the error case.
request(options , function(err, res, body) {
if (err) return done(err);
if (res.statusCode != 200) return done(res.statusCode);
return done(res);
});
Here's the code:
app.get('/vklogin', function(request, response) {
console.log('Авторизация через соц.сеть "Вконтакте"'.green);
var url_parts = url.parse(request.url, true);
var query = url_parts.query;
var data = querystring.stringify({
client_id: '4836170',
client_secret: 'cPkR53zhon0lU7TAiz9f',
code: query.code,
redirect_uri: 'http://' + request.headers.host + '/vklogin'
});
var options = {
host: 'oauth.vk.com',
port: 443,
path: '/access_token?' + data,
method: 'GET'
};
var httpsreq = https.request(options, function(response) {
response.setEncoding('utf8');
response.on('data', function(chunk) {
var chunk = JSON.parse(chunk);
pg.connect(dbconfig, function(err, client, done) {
if (err) {
return console.error('Ошибка подключения к БД',err);
}
client.query('select * from users where vk = $1', [chunk.user_id], function(err, result) {
done();
if (err) {
console.error('Ошибка получения данных',err);
} else {
if (result.rows[0]) {
console.log(result.rows[0]);
request.session.authorized = true;
request.session.userid = result.rows[0].id;
} else {
console.log('Попытка создания нового пользователя. ');
client.query("insert into users (email, vk) values ('" + chunk.email + "', " + chunk.user_id + ") returning id", function(err, result) {
done();
if (err) {
console.error('Ошибка записи данных в БД', err);
} else {
request.session.authorized = true;
request.session.userid = result.rows[0].id;
console.log('Добавлен новый пользователь # ' + result.rows[0].id);
}
});
}
}
client.end();
});
console.log("№ пользователья: " + request.session.userid);
});
});
});
httpsreq.end();
if (request.session.authorized) {
response.writeHead(301, {
Location: 'http://' + request.headers.host + '/cabinet'
});
} else {
response.writeHead(301, {
Location: 'http://' + request.headers.host
});
}
response.end();
});
That is why outside functions session is not saved? What is wrong in my code?
Inside the function, everything is fine, outside functions - undefined.
After this session, the logic must be maintained and be available everywhere, too, everywhere, or is not it?
Tried to declare a variable with the session, but it also did not work, and no error does not give, do not even know where to dig.
var sess;
app.get('/vklogin', function(request, response) {
sess = request.session;
// other code...
});
UPD:
My problem is related to the lack of understanding of the control of asynchronous processes. I can not understand how to perform the originally one - database queries, information preservation in the session, and then check the session variables and forwarding to the desired page.
If you know how to make the correct execution order for me, write the answer.
Ok, I find need async pattern. Look here: http://book.mixu.net/node/ch7.html
I have a feeling I have some kind of leak somewhere, but I'm not sure how to identify or troubleshoot. I'm using express and the request module for nodejs. Under load, https calls made by request to the Facebook Graph API start experiencing long delays or time out altogether. At first I thought it was a throttling issue on the receiving side (Facebook), but if I make a simple C# console application that calls the same URLs several hundred times, none of the response times are greater than 150ms. However, the same code in node varies from 50ms to up to 10s. If I set the timeout property when using request, I start getting ESOCKETTIMEDOUT errors. If I set pool.maxsize on the request options to 100, then I get ETIMEDOUT errors instead.
How can I figure out where my hang up is occurring?
Here's a sample of my use of the request module. I also tried adding this to my app:
require('http').globalAgent.maxSockets = Infinity;
require('https').globalAgent.maxSockets = Infinity;
var executeGetUrl = function getUrl(url, cacheKey, parseJson, accessToken, callback) {
if (accessToken) {
url = url + '?access_token=' + accessToken;
}
try {
request({url: url, timeout: 10000, 'pool.maxSockets' : 100}, function (err, res, body) {
if (err) {
logger.error('Made facebook api call to ' + url + ' with error ' + err);
callback(err, null);
return;
}
if (parseJson) {
try {
body = JSON.parse(body);
} catch (err) {
callback(new Error('Could not parse ' + res.body + ': ' + err), null);
return;
}
if (body.error) {
err = new Error('Error calling ' + url + '. ' + body.error.message);
err.code = body.error.code;
callback(err, null);
}
else {
callback(null, body || {});
}
} else {
if (res.statusCode != 200) {
callback(new Error(res.body));
} else {
callback(null, res.body || {});
}
}
});
} catch (err) {
callback(null, err);
}
};
I'm memoizing the results for 60 seconds, but that doesn't seem to have any relation to the problem
var getUrl = memoize(executeGetUrl, {async: true, maxAge: 60000, length: 2});
And using the function in a promise chain
var deferred = q.defer();
getUrl(photosUrl, null, false, null, function (err, body) {
if (err) {
deferred.reject(err);
return;
}
deferred.resolve(body);
});
return deferred.promise;
For some reason when the status code is not 200 and I return then call the callback the executed script just hangs there, not exiting. Why is that?
var http = require('http');
var qs = require('querystring');
var args = {
q: 'dfdfdf'
};
var opts = {
hostname: 'api.openweathermap.org',
path: '/data/2.5/weather?' + qs.stringify(args)
};
function cb(err, result) {
console.log(err, result);
}
http.get(opts, function(res) {
var buffer = new Buffer(0);
if (res.statusCode !== 200) return cb(new Error('Unable to fulfill request.'));
res.on('readable', function() {
return buffer = Buffer.concat([buffer, this.read()]);
});
res.on('end', function() {
return cb(null, JSON.parse(buffer.toString('utf8')));
});
});
Command line:
$ node plugins/weather.js
[Error: Unable to fulfill request.] undefined
# I have to ctrl+c at this point
You still need to consume the stream until it emits end event:
http.get(opts, function(res) {
var buffer = new Buffer(0);
res.on('readable', function() {
return buffer = Buffer.concat([buffer, this.read()]);
});
res.on('end', function() {
if (res.statusCode !== 200) return cb(new Error('Unable to fulfill request.'))
return cb(null, buffer.toString());
});
});
You never call res.end(). You should also do some res.write calls before res.end().
I am trying to periodically load some data from external API like this:
setInterval(function() {
getData();
}, 60000);
function getData() {
if (typeof someObject.data === 'object') {
for (var prop in someObject.data) {
if (prop === 1 || prop === 2) {
var options = {
host: 'somehost.com',
path: '/somepath?param=' + prop
};
var req = http.request(options, function(res) {
// EXECUTION NEVER REACHES THIS POINT ?!?!
req.on('end', function() { alert('ended'); });
});
req.end();
}
}
}
}
If I do not do any intervals and loops, such request to the same host works perfectly. However, if I try to do something like shown above, then the request never ever calls its callback function.
What I am doing wrong here?
I think one of your conditions are bad, the following works fine for me.
var http = require('http');
setInterval(function() {
getData();
}, 1000);
function getData() {
console.log('get');
//if (typeof someObject.data === 'object') {
console.log('get 1');
//for (var prop in someObject.data) {
console.log('get 2');
//if (prop === 1 || prop === 2) {
console.log('get 3');
var options = {
host: 'google.com',
path: '/'
};
var req = http.request(options, function(res) {
console.log('http request', res.statusCode);
//req.on('end', function() {
// console.log('ended', req);
//});
});
req.end();
//}
//}
//}
}
also If I'm right, you don't need req.on('end'), the callback of the request is called when it's completed. You can also use http.get, so you don't need to call req.end
var req = http.get( options.host, function(res) {
console.log('http request', res.statusCode);
//req.on('end', function() {
// console.log('ended', req);
//});
}).on('error', function( e ) {
console.error( 'error', e );
})
see more info in the docs
hope I could help.