I need to make an HTTP call and then put the response in database. i should repeat it forever. i have been reading on async module but i didn't understood how to combine these actions along with the waiting for couple of seconds between each iteration.
Can someone help?
Thanks in advance.
Look into async.forever. Your code would look something like this:
var async = require("async");
var http = require("http");
//Delay of 5 seconds
var delay = 5000;
async.forever(
function(next) {
http.get({
host: "google.com",
path: "/"
}, function(response) {
// Continuously update stream with data
var body = "";
response.on("data", function(chunk) {
body += chunk;
});
response.on("end", function() {
//Store data in database
console.log(body);
//Repeat after the delay
setTimeout(function() {
next();
}, delay)
});
});
},
function(err) {
console.error(err);
}
);
Why using such a module only for doing this ? Why don't you just use setTimeout like:
function makeRequest() {
request(url, function(response) {
saveInDatabase(function() {
// After save is complete, use setTimeout to call again
// "makeRequest" a few seconds later (Here 1 sec)
setTimeout(makeRequest, 1000);
});
}
}
This code won't really work for the request and save part of course, it was just to give an example of what I was proposing.
Related
I'm trying to send off interleaved GET and POST requests to a server, but the POST request is sending data from a file, which seems to throw off the timing.
var async = require('async');
var http = require('http');
var request = require('request');
var fs = require('fs');
var arr = [];
for (var i = 1; i <= 50; i++) {
arr.push(i);
}
var limitedAgent = new http.Agent({maxSockets: 6});
function processThenSendRequest(data, onfinish) {
request.get({
url: 'http://www.google.com',
pool: limitedAgent
}, (function(j) {
return function(err, res) {
console.log("GET: response from " + j);
};
})(data)).on('socket', (function(j) {
return function(socket) {
console.log("GET: socket assigned for " + j);
}
})(data));
var source = fs.createReadStream('README.md');
var postReq = request.post({
url: 'http://www.google.com',
pool: limitedAgent
}, (function(j) {
return function(err, res) {
console.log("POST: response from " + j);
};
})(data)).on('socket', (function(j) {
return function(socket) {
console.log("POST: socket assigned for " + j);
}
})(data));
// source.pipe(postReq);
setTimeout(function() {
onfinish(null, data);
}, 10000);
}
async.map(arr, processThenSendRequest, function(err, results) {
if (err) console.error(err);
console.log("finished");
});
The code as written above runs fine, with the GET and POST requests being sent out in alternating order, but if I uncomment the source.pipe(postReq) line, then all the GET requests are sent before all the POST requests.
Is there a solution to this issue? I could use async.mapLimit but that feels like a hack and that the solution should be through the request library - this impression may be based on a misunderstanding though.
Per my comment:
Because Node is entirely non-blocking (at least when written this way) you can't be sure anything will occur in order unless you run it in series. async.series can also do this for you, or async.eachSeries.
Further to that, since Node doesn’t wait for asynchronous activities to finish, each task gets queued up immediately, while the callbacks (the event completion event) will occur on a first-come-first-serve basis. In your case, since GET requests take far less time to go around than POST requests, that's why they're completing first.
Currently using http GET to an external API. When called individually, the response is good. When put in a for loop, some requests don't seem to have a response.
This is the http GET function:
function httpGetChunk(url, callback) {
http.get(url, function(resp) {
var body='';
resp.on('data', function(chunk) {
body += chunk; //chunk too large from this response
});
resp.on('end', function() {
var data = JSON.parse(body);
callback(data);
});
resp.on("error", function(e) {
console.log("Got error: " + e.message);
});
});
}
When I call the GET function in a for loop for 5 different urls, I only get responses for some of them. Ran it a couple of times and the response would be from a different combination of the called urls but never all of them.
Any insight?
Edit 1: To give more information, my for loop looks something like this.
for (var i=0;i<5; i++) {
httpGetChunk(someUrl, function(data) {
console.log(data);
});
}
This would only print out some responses but not all.
Edit 2:
I've taken into account all the advice on this thread. I'm now using the async module and have increased the number of concurrent connections to 20:
http.globalAgent.maxSockets = 20;
Following code is the one im currently testing:
getMatchStats() returns an game 'match' object with statistics (e.g kills, deaths in the match etc.)
matchIds is the array containing all the id keys of the matches
async.parallel([
getMatchStats(matchIds[0], function (matchData) {
console.log('0');
}),
getMatchStats(matchIds[1], function (matchData) {
console.log('1');
}),
getMatchStats(matchIds[2], function (matchData) {
console.log('2');
}),
getMatchStats(matchIds[3], function (matchData) {
console.log('3');
}),
getMatchStats(matchIds[4], function (matchData) {
console.log('4');
}),
], function(err, result) {
console.log('done');
callback(result);
});
and getMatchStats
function getMatchStats(matchId, callback) {
var url = getMatchStatsUrl(matchId); //gets url based on id
httpGetChunk(url, function(data) {
callback(data);
});
}
again, the async.parallel never finishes since only some of the requests have responses. Every time i run it, the responses would be from a different combination of matches. Sometimes, it even completes all of the requests.
Maybe my OS has limitations on number of connections (im testing on localhost)?
Each request is asynchronous. So, if you use a regular for loop, each step is going to be executed synchronously and won't wait for callback to be called. What do you need is something like the each method from the async module, like:
async.each(yourArrayOfUrls, function (url, callback) {
httpGetChunk(url, function(data) {
console.log(data);
callback();
});
}, function (err) {
// if some step produce an error, you can get it here...
});
I have an HTTP Get request and I want to parse the response and save it to my database.
If i call crawl(i) independentely i get good results. But i have to call crawl() from 1 to 2000.
I get good results but some responses seem to get lost and some responses are duplicates. I don't think I understand how to call thousands of asynchronous functions. I am using the async module queue function but so far I am still missing some data and still have some duplicates. What am I doing wrong here? Thanks for your help.
What i am crawling
My node functions :
function getOptions(i) {
return {
host: 'magicseaweed.com',
path: '/syndicate/rss/index.php?id='+i+'&unit=uk',
method: 'GET'
}
};
function crawl(i){
var req = http.request(getOptions(i), function(res) {
res.on('data', function (body) {
parseLocation(body);
});
});
req.end();
}
function parseLocation(body){
parser.parseString(body, function(err, result) {
if(result && typeof result.rss != 'undefined') {
var locationTitle = result.rss.channel[0].title;
var locationString = result.rss.channel[0].item[0].link[0];
var location = new Location({
id: locationString.split('/')[2],
name: locationTitle
});
location.save();
}
});
}
N = 2 //# of simultaneous tasks
var q = async.queue(function (task, callback) {
crawl(task.url);
callback();
}, N);
q.drain = function() {
console.log('Crawling done.');
}
for(var i = 0; i < 100; i++){
q.push({url: 'http://magicseaweed.com/syndicate/rss/index.php?id='+i+'&unit=uk'});
}
[EDIT] WELL, after a lot of testing it seems that the service I am crawling cannot handle so many request that fast. Because when I do each requests sequentially, I can get all the good responses.
Is there a way to SLOW DOWN ASYNC queue method?
You should have a look at this great module, async which simplifies async tasks like this. You can use queue, simple example:
N = # of simultaneous tasks
var q = async.queue(function (task, callback) {
somehttprequestfunction(task.url, function(){
callback();
}
}, N);
q.drain = function() {
console.log('all items have been processed');
}
for(var i = 0; i < 2000; i++){
q.push({url:"http://somewebsite.com/"+i+"/feed/"});
}
It will have a window of ongoing actions and the tasks room will be available for a future task if you only invoke the callback function. Difference is, your code now opens 2000 connections immidiately and obviously the failure rate is high. Limiting it to a reasonable value, 5,10,20 (depends on site and connection) will result in a better sucess rate. If a request fails, you can always try it again, or push the task to another async queue for another trial. The key point is to invoke callback() in queue function, so that a room will be available when it is done.
var q = async.queue(function (task, callback) {
crawl(task.url);
callback();
}, N);
You'are executing next task immediately after starting the previous one, in this way, the queue is just meaningless. You should modify your code like this:
// first, modify your 'crawl' function to take a callback argument, and call this callback after the job is done.
// then
var q = async.queue(function (task, next/* name this argument as 'next' is more meaningful */) {
crawl(task.url, function () {
// after this one is done, start next one.
next();
});
// or, more simple way, crawl(task.url, next);
}, N);
Another option if you want. Vanilla JS without fancy libraries.
var incrementer = 0;
var resultsArray = [];
var myInterval = setInterval(function() {
incrementer++
if(incrementer == 100){
clearInterval(myInterval)
//when done parse results array
}
//make request here
//push request result to array here
}, 500);
Invokes the function every half second. Easy way to force sync and exit after x requests.
I know I am a little late to the question, however here is a solution I wrote to slow down the number of requests when testing an api endpoint, using node 4 or node 5:
var fs = require('fs');
var supertest = require('supertest');
var request = supertest("http://sometesturl.com/api/test/v1/")
var Helper = require('./check.helper');
var basicAuth = Helper.basicAuth;
var options = Helper.options;
fs.readFile('test.txt', function(err, data){
var parsedItems = JSON.parse(data);
var urlparts = []
// create a queue
for (let year of range(1975, 2016)) {
for (var make in parsedItems[year]){
console.log(year, make, '/models/' + year + '/' + make)
urlparts.push({urlpart:'/models/' + year + '/' + make, year: year, make: make})
}
}
// start dequeue
waitDequeue();
// This function calls itself after the makeRequest promise completes
function waitDequeue(){
var item = urlparts.pop()
if (item){
makeRequest(item)
.then(function(){
// wait this time before next dequeue
setTimeout(function() {
waitDequeue();
}, 3000);
})
} else {
write(parsedItems)
}
}
// make a request, mutate parsedItems then resolve
function makeRequest(item){
return new Promise((resolve, reject)=>{
request
.get(item.urlpart)
.set(options.auth[0], options.auth[1])
.set(options.type[0], options.type[1])
.end(function(err, res) {
if (err) return done1(err);
console.log(res.body)
res.body.forEach(function(model){
parsedItems[item.year][item.make][model] = {}
});
resolve()
})
})
}
// write the results back to the file
function write(parsedItems){
fs.writeFile('test.txt', JSON.stringify(parsedItems, null, 4), function(err){
console.log(err)
})
}
})
A little late but I have found this works!
Using async you can slow down the queue by using whilst inside the task handler eg:
var q = async.priorityQueue(function(task, callback) {
// your code process here for each task
//when ready to complete the task delay it by calling
async.whilst( //wait 6 seconds
function() {
return count < 10;
},
function(callback) {
count++;
setTimeout(function() {
callback(null, count);
}, 1000);
},
function (err, n) {
// n seconds have passed
callback(); //callback to q handler
}
); //whilst
} , 5);
There is a post: How do I set a timeout for client http connections in node.js
but none of the answer will work.
So, I have the code like that:
var remote_client = http.createClient(myPost, myHost);
var path = '/getData?';
var param = { };
var request = remote_client.request("POST", path,);
// error case
remote_client.addListener('error', function(connectionException){
console.log("Nucleus Error: " + connectionException);
next(connectionException);
});
request.addListener('response', function (response) {
response.setEncoding('utf-8');
var body = '';
response.addListener('data', function (chunk) {
// get the result!
});
});
request.end();
The biggest problem is that the url that I'm connection to may timeout. Therefore, I would like to set a timeout, like 15 secs. If so, trigger a listener.
However, I haven't seen any timeout features in the documentation for http.createClient. Please advise. Thanks. :)
var foo = setTimeout(function() {
request.emit("timeout-foo");
}, 15000);
// listen to timeout
request.on("timeout-foo", function() { });
request.addListener('response', function (response) {
// bla
// clear counter
clearTimeout(foo);
});
Just run the counter yourself.
i'm trying to write a simple app with node.js to pull some json from twitter..
json loads ok the first time, however, my 'tweets' event isnt getting triggered correctly..
anyone see where I'm going wrong? any advice hugely appreciated !
var twitterClient = http.createClient(80, 'api.twitter.com');
var tweetEmitter = new events.EventEmitter();
var request = twitterClient.request("GET", "/1/statuses/public_timeline.json", {"host": "api.twitter.com"});
function getTweats() {
request.addListener("response", function (response) {
var body = "";
response.addListener("data", function (data) {
body += data;
});
response.addListener("end", function (end) {
var tweets = JSON.parse(body);
if (tweets.length > 0) {
tweetEmitter.emit("tweets", tweets);
console.log(tweets, 'tweets loaded');
}
});
});
request.end();
}
setInterval(getTweats(), 1000);
http.createServer(function (request, response) {
var uri = url.parse(request.url).pathname;
console.log(uri);
if (uri === '/stream') {
var cb = function (tweets) {
console.log('tweet'); // never happens!
response.writeHead(200, {"Content-Type": "text/plain"});
response.write(JSON.stringify(tweets));
response.end();
clearTimeout(timeout);
};
tweetEmitter.addListener("tweets", cb);
// timeout to kill requests that take longer than 10 secs
var timeout = setTimeout(function () {
response.writeHead(200, {"Content-Type": "text/plain"});
response.write(JSON.stringify([]));
response.end();
tweetEmitter.removeListener("tweets", cb);
}, 10000);
} else {
loadStaticFile(uri, response);
}
}).listen(port);
console.log("Server running at http://localhost:" + port + "/");
see full file # https://gist.github.com/770810
You have two problems here:
httpClient.request is just that, a one time request.
You're not passing the function getTweats to the interval, but its return value:
setInterval(getTweats() */ <-- parenthesis execute the function */, 1000);
In order to fix it, create a new request for each call of getTweats:
function getTweats() {
// There's no need for request being global, just make it local to getTweats
var request = twitterClient.request("GET", "/1/statuses/public_timeline.json", {"host": "api.twitter.com"});
And pass the function correctly to setTimeout:
setInterval(getTweats, 1000);
PS: Thx for the gist! Took me like 15 seconds to figure it out from there, always great when people post more than just 5 lines of out of context code :)