Node.js dialling back API requests - node.js

I have a list of 125,000 + Id numbers.
I am making a request to an api to get more information for each one.
But my problem is that the api will stop giving me a response if I request more then 6 per second.
I need a way to control the speed of the requests.

Just use a function called by setInterval to do the actual API querying ?
Simple example:
var ids = [ /* big array here */ ];
function queryAllIds(ids, callback) {
var processedIds = [];
var lastProcessedId = 0;
var processedIdCount = 0;
var intervalId;
function queryApi() {
var idToProcess = lastProcessedId++;
doActualQuery(ids[idToProcess], function(result) {
processedIds[idToProcess] = result;
processedIdCount++;
if (processedIdCount === ids.length) {
nextTick(callback, processedIds);
}
});
}
if (intervalId && lastProcessedId === ids.length)
clearInterval(intervalId);
}
intervalId = setInterval(queryApi, 1000/6);
}
queryAllIds(ids, function (processedIds) {
// Do stuff here
});

We ended up using rate limiter which provided the rate limiting we needed right out of the box. https://www.npmjs.com/package/limiter

Related

Node.js: given array of URLs, determine which are valid

I am a total scrub with the node http module and having some trouble.
The ultimate goal here is to take a huge list of urls, figure out which are valid and then scrape those pages for certain data. So step one is figuring out if a URL is valid and this simple exercise is baffling me.
say we have an array allURLs:
["www.yahoo.com", "www.stackoverflow.com", "www.sdfhksdjfksjdhg.net"]
The goal is to iterate this array, make a get request to each and if a response comes in, add the link to a list of workingURLs (for now just another array), else it goes to a list brokenURLs.
var workingURLs = [];
var brokenURLs = [];
for (var i = 0; i < allURLs.length; i++) {
var url = allURLs[i];
var req = http.get(url, function (res) {
if (res) {
workingURLs.push(?????); // How to derive URL from response?
}
});
req.on('error', function (e) {
brokenURLs.push(e.host);
});
}
what I don't know is how to properly obtain the url from the request/ response object itself, or really how to structure this kind of async code - because again, I am a nodejs scrub :(
For most websites using res.headers.location works, but there are times when the headers do not have this property and that will cause problems for me later on. Also I've tried console logging the response object itself and that was a messy and fruitless endeavor
I have tried pushing the url variable to workingURLs, but by the time any response comes back that would trigger the push, the for loop is already over and url is forever pointing to the final element of the allURLs array.
Thanks to anyone who can help
You need to closure url value to have access to it and protect it from changes on next loop iteration.
For example:
(function(url){
// use url here
})(allUrls[i]);
Most simple solution for this is use forEach instead of for.
allURLs.forEach(function(url){
//....
});
Promisified solution allows you to get a moment when work is done:
var http = require('http');
var allURLs = [
"http://www.yahoo.com/",
"http://www.stackoverflow.com/",
"http://www.sdfhksdjfksjdhg.net/"
];
var workingURLs = [];
var brokenURLs = [];
var promises = allURLs.map(url => validateUrl(url)
.then(res => (res?workingURLs:brokenURLs).push(url)));
Promise.all(promises).then(() => {
console.log(workingURLs, brokenURLs);
});
// ----
function validateUrl(url) {
return new Promise((ok, fail) => {
http.get(url, res => return ok(res.statusCode == 200))
.on('error', e => ok(false));
});
}
// Prevent nodejs from exit, don't need if any server listen.
var t = setTimeout(() => { console.log('Time is over'); }, 1000).ref();
You can use something like this (Not tested):
const arr = ["", "/a", "", ""];
Promise.all(arr.map(fetch)
.then(responses=>responses.filter(res=> res.ok).map(res=>res.url))
.then(workingUrls=>{
console.log(workingUrls);
console.log(arr.filter(url=> workingUrls.indexOf(url) == -1 ))
});
EDITED
Working fiddle (Note that you can't do request to another site in the browser because of Cross domain).
UPDATED with #vp_arth suggestions
const arr = ["/", "/a", "/", "/"];
let working=[], notWorking=[],
find = url=> fetch(url)
.then(res=> res.ok ?
working.push(res.url) && res : notWorking.push(res.url) && res);
Promise.all(arr.map(find))
.then(responses=>{
console.log('woking', working, 'notWorking', notWorking);
/* Do whatever with the responses if needed */
});
Fiddle

Perform arbitrary set of asynchronous tasks

My input is streamed from another source, which makes it difficult to use async.forEach. I am pulling data from an API endpoint, but I have a limit of 1000 objects per request to the endpoint, and I need to get hundreds of thousands of them (basically all of them) and I will know they're finished when the response contains < 1000 objects. Now, I have tried this approach:
/* List all deposits */
var depositsAll = [];
var depositsIteration = [];
async.doWhilst(this._post(endpoint_path, function (err, response) {
// check err
/* Loop through the data and gather only the deposits */
for (var key in response) {
//do some stuff
}
depositsAll += depositsIteration;
return callback(null, depositsAll);
}, {limit: 1000, offset: 0, sort: 'desc'}),
response.length > 1000, function (err, depositsAll) {
// check for err
// return the complete result
return callback(null, depositsAll);
});
With this code I get an async internal error that iterator is not a function. But in general I am almost sure the logic is not correct as well.
If it's not clear what I'm trying to achieve - I need to perform a request multiple times, and add the response data to a result that at the end contains all the results, so I can return it. And I need to perform requests until the response contains less than 1000 objects.
I also looked into async.queue but could not get the hang of it...
Any ideas?
You should be able to do it like that, but if that example is from your real code you have misunderstood some of how async works. doWhilst takes three arguments, each of them being a function:
The function to be called by async. Gets argument callback that must be called. In your case, you need to wrap this._post inside another function.
The test function (you would give value of response.length > 1000, ie. a boolean, if response would be defined)
The final function to be called once execution is stopped
Example with each needed function separated for readability:
var depositsAll = [];
var responseLength = 1000;
var self = this;
var post = function(asyncCb) {
self._post(endpoint_path, function(err, res) {
...
responseLength = res.length;
asyncCb(err, depositsAll);
});
}
var check = function() {
return responseLength >= 1000;
};
var done = function(err, deposits) {
console.log(deposits);
};
async.doWhilst(post, check, done);

Nodejs run promises sequentially

Dears ,
How can i run promises in nodejs sequentially , in the following example am looping through array of hours then for each fetched hour get result from the database , the issue here : am getting results but i want it sequentially same order that i got hours .
angular.forEach(SharedVar.getCategories(), function (h) {
t = h.split('-', 2);
t = t[0];
RESTApi.getAnswerdCallsByHour(t).then(function (answerdTotal) {
$scope.answerdCallsTotalByHour.push(answerdTotal);
var d = SharedVar.getDataS();
d[count] = answerdTotal;
SharedVar.setDataS(d);
count++;
});
});
Thanks ,
var promise = Promise.resolve(); // make an empty promise in the way you do it with your promise library
angular.forEach(SharedVar.getCategories(), function (h) {
promise.then(function() {
return RESTApi.getAnswerdCallsByHour(t).then(function (answerdTotal) {});
});
});
The way to do it sequently would be to do one Request and do the next request inside the promise.
I think the better approach by far is to extend your SharedVar.setDataS(d) function in a way, that it does not depend on getting the data sequentially. Like having a SharedVar.setDataS(d, index) and using the config var in your $http.get (or whatever) functioncall inside your RESTApi to promote that index all the way to the promise.
If your RESTApi looks like this:
var RESTApi = {
getAnswerdCallsByHour : function(hour) {
var url = "bla.com/myservice?hour=" + hour;
return $http.get(url).data;
}
// Some other Methods...
Then you need a way to pass something to "reorder" your Data when it arrives asynchronously, this could be a index you count up or in your case maybe the hour Variable:
var RESTApi = {
getAnswerdCallsByHour : function(hour) {
var url = "bla.com/myservice?hour=" + hour;
var config = [];
config.hour = hour;
return $http.get(url, config); // Return the promise not just data or a specific field
}
// Some other Methods...
Now when your promise is fullfiled you can access your "hour" Variable like so:
var d = SharedVar.getDataS();
d[promise.config.hour] = promise.data;
SharedVar.setDataS(d);
Now you know what piece of data correlates to which request and you do not need to recieve Data in order. The last piece only works properly when hours runs sequential from 0 to 23, if that isn't the case you need to:
var RESTApi = {
getAnswerdCallsByHour : function(hour, index) {
var url = "bla.com/myservice?hour=" + hour;
var config = [];
config.index = index;
return $http.get(url, config);
}
// Some other Methods...
...
...
var d = SharedVar.getDataS();
d[promise.config.index] = promise.data;
SharedVar.setDataS(d);
Safari's answer is how I typically handle this. (Sorry, I don't have enough rep to comment yet...) You were experiencing problems with it because the example provided does not capture and use the new promise in subsequent loops. See my comments on the slightly modified version here:
var promise = Promise.resolve();
angular.forEach(SharedVar.getCategories(), function (h) {
t = h.split('-', 2);
t = t[0];
// You must capture the new promise here; the next loop will wait
// for the promise returned from getAnswerdCallsByHour to resolve.
promise = promise.then(function() {
// Halt downstream promises until this returned promises finishes
return RESTApi.getAnswerdCallsByHour(t).then(function (answerdTotal) {
$scope.answerdCallsTotalByHour.push(answerdTotal);
var d = SharedVar.getDataS();
d[count] = answerdTotal;
SharedVar.setDataS(d);
count++;
});
});
});

Mongoose promise built in but not working?

Or quite possibly I am doing it wrong, in fact, more than likely I am doing it wrong.
Have a table which contains a "tree" of skill, starting at the root level and may be as deep as ten levels (only two so far), but I want to return it as one big fat JSON structure, so I want to ask the database for each set of data, build my structure then ask for the next level.
Of course if I just send of my requests using mongoose, they will come back at any time, as they are all nice asyncronous calls. Normally a good things.
Looking at the documentation for Mongoose(using 4.1.1) it seems like it has a promise built in, but whenever I try to use it the api call throws a hissy fit and I get a 500 back.
Here is my simple function:
exports.getSkills = function(req,res) {
console.log("Will return tree of all skills");
for (var i = 0; i<10; i++){
var returnData = [];
console.log("Lets get level " + i );
var query = Skill.find({level: i });//The query function
var promise = query.exec; //The promise?
promise.then(function(doc) { //Totally blows up at this point
console.log("Something came back")
return "OK";
});
}
}
The Mongoose documentation on the subject can be found here
http://mongoosejs.com/docs/api.html#promise_Promise
var promise = query.exec;
// =>
var promise = query.exec()
exports.getSkills = function(req,res) {
console.log("Will return tree of all skills");
var p;
for (var i = 0; i < 10; i ++) {
if (i == 0 ) {
p = Skill.find({level:i}).exec();
} else {
p.then(function (){
return Skill.find({level:i}).exec()
})
}
p.then(function (data) {
//deal with your data
})
}
p.then(function () {
// deal with response
})
}

How to get rid of the asynchoronous code here

I have been trying to retrieve the data using the MongoJS driver for node.js.The Code which I am using is as follows
req.on('end', function(){
var decodedBody = querystring.parse(fullBody);
story=decodedBody.name;
var z=new Array();
console.log(story);
res.writeHead(200,{'Content-Type': 'text/html'});
res.write('<html><body>');
db.frames.find({str_id:story}).toArray(function(err,doc){
console.log(doc);
for(var t=0;t<doc.length;t++)
{
var picid=doc[t].pic_id;
console.log(picid);
db.pictures.find({_id:picid}).toArray(function(err,pic){
res.write('<img src="'+pic[0].name+'"/>');
});
}
})
res.end('</body></html>');
});
The problem here is that because of the asynchronous nature of the code the response gets ends first and then the code inside the block of database gets executed and because of that nothing gets displayed on the browser i.e an image in this case .Thankx in advance.
Don't fight the asynchronous nature of node.js, embrace it!
So you should fire off all your requests, marking each one as completed when the response arrives. When all requests are completed, render your images and body/html closing tags.
I don't usually work with node.js, so I can make some mistakes, but it may look like this:
res.write('<html><body>');
db.frames.find({str_id:story}).toArray(function(err,doc){
console.log(doc);
var completed = {};
for(var t = 0; t < doc.length; t++) {
var picid = doc[t].pic_id;
completed.picid = false;
console.log(picid);
db.pictures.find({_id: picid}).toArray(function(err, pic) {
// mark request as completed
completed.picid = pic;
// check if all requests completed
var all_finished = true;
for(var k in completed) {
if(completed[k] === false) {
all_finished = false;
break;
}
}
// render final markup
if(all_finished) {
for(var k in completed) {
var pic = completed[k];
res.write('<img src="'+pic[0].name+'"/>');
}
res.end('</body></html>);
}
});
}
})
just put the res.end('</body></html>'); inside your db.frames.find function. Check when you reached doc.length - 1 and then send the end command.

Resources