Throttle and queue up API requests due to per second cap

Throttle and queue up API requests due to per second cap - node.js

I'm use mikeal/request to make API calls. One of the API's I use most frequently (the Shopify API). Recently put out a new call limit, I'm seeing errors like:
Exceeded 6.0 calls per second for api client. Slow your requests or contact support for higher limits.
I've already gotten an upgrade, but regardless of how much bandwidth I get I have to account for this. A large majority of the requests to the Shopify API are within async.map() functions, which loop asynchronous requests, and gather the bodies.
I'm looking for any help, perhaps a library that already exists, that would wrap around the request module and actually block, sleep, throttle, allocate, manage, the many simultaneous requests that are firing off asynchronously and limit them to say 6 requests at a time. I have no problem with working on such a project if it doesn't exist. I just don't know how to handle this kind of situation, and I'm hoping for some kind of standard.
I made a ticket with mikeal/request.

For an alternative solution, I used the node-rate-limiter to wrap the request function like this:
var request = require('request');
var RateLimiter = require('limiter').RateLimiter;
var limiter = new RateLimiter(1, 100); // at most 1 request every 100 ms
var throttledRequest = function() {
var requestArgs = arguments;
limiter.removeTokens(1, function() {
request.apply(this, requestArgs);
});
};

The npm package simple-rate-limiter seems to be a very good solution to this problem.
Moreover, it is easier to use than node-rate-limiter and async.queue.
Here's a snippet that shows how to limit all requests to ten per second.
var limit = require("simple-rate-limiter");
var request = limit(require("request")).to(10).per(1000);

I've run into the same issue with various APIs. AWS is famous for throttling as well.
A couple of approaches can be used. You mentioned async.map() function. Have you tried async.queue()? The queue method should allow you to set a solid limit (like 6) and anything over that amount will be placed in the queue.
Another helpful tool is oibackoff. That library will allow you to backoff your request if you get an error back from the server and try again.
It can be useful to wrap the two libraries to make sure both your bases are covered: async.queue to ensure you don't go over the limit, and oibackoff to ensure you get another shot at getting your request in if the server tells you there was an error.

My solution using modern vanilla JS:
function throttleAsync(fn, wait) {
let lastRun = 0;
async function throttled(...args) {
const currentWait = lastRun + wait - Date.now();
const shouldRun = currentWait <= 0;
if (shouldRun) {
lastRun = Date.now();
return await fn(...args);
} else {
return await new Promise(function(resolve) {
setTimeout(function() {
resolve(throttled(...args));
}, currentWait);
});
}
}
return throttled;
}
// Usage:
const run = console.log.bind(console);
const throttledRun = throttleAsync(run, 1000);
throttledRun(1); // Will execute immediately.
throttledRun(2); // Will be delayed by 1 second.
throttledRun(3); // Will be delayed by 2 second.

In async module, this requested feature is closed as "wont fix"
Reason given in 2016 is "managing that kind of construct properly is
a hard problem." See right side of here:
https://github.com/caolan/async/issues/1314
Reason given in 2013 is "wouldn't scale to multiple processes" See:
https://github.com/caolan/async/issues/37#issuecomment-14336237
There is a solution using leakybucket or token bucket model, it is implemented "limiter" npm module as RateLimiter.
RateLimiter, see example here: https://github.com/caolan/async/issues/1314#issuecomment-263715550
Another way is using PromiseThrottle, I used this, working example is below:
var PromiseThrottle = require('promise-throttle');
let RATE_PER_SECOND = 5; // 5 = 5 per second, 0.5 = 1 per every 2 seconds
var pto = new PromiseThrottle({
requestsPerSecond: RATE_PER_SECOND, // up to 1 request per second
promiseImplementation: Promise // the Promise library you are using
});
let timeStart = Date.now();
var myPromiseFunction = function (arg) {
return new Promise(function (resolve, reject) {
console.log("myPromiseFunction: " + arg + ", " + (Date.now() - timeStart) / 1000);
let response = arg;
return resolve(response);
});
};
let NUMBER_OF_REQUESTS = 15;
let promiseArray = [];
for (let i = 1; i <= NUMBER_OF_REQUESTS; i++) {
promiseArray.push(
pto
.add(myPromiseFunction.bind(this, i)) // passing am argument using bind()
);
}
Promise
.all(promiseArray)
.then(function (allResponsesArray) { // [1 .. 100]
console.log("All results: " + allResponsesArray);
});
Output:
myPromiseFunction: 1, 0.031
myPromiseFunction: 2, 0.201
myPromiseFunction: 3, 0.401
myPromiseFunction: 4, 0.602
myPromiseFunction: 5, 0.803
myPromiseFunction: 6, 1.003
myPromiseFunction: 7, 1.204
myPromiseFunction: 8, 1.404
myPromiseFunction: 9, 1.605
myPromiseFunction: 10, 1.806
myPromiseFunction: 11, 2.007
myPromiseFunction: 12, 2.208
myPromiseFunction: 13, 2.409
myPromiseFunction: 14, 2.61
myPromiseFunction: 15, 2.811
All results: 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
We can clearly see the rate from output, i.e. 5 calls for every second.

The other solutions were not up to my tastes. Researching further, I found promise-ratelimit which gives you an api that you can simply await:
var rate = 2000 // in milliseconds
var throttle = require('promise-ratelimit')(rate)
async function queryExampleApi () {
await throttle()
var response = await get('https://api.example.com/stuff')
return response.body.things
}
The above example will ensure you only make queries to api.example.com every 2000ms at most. In other words, the very first request will not wait 2000ms.

Here's my solution use a library request-promise or axios and wrap the call in this promise.
var Promise = require("bluebird")
// http://stackoverflow.com/questions/28459812/way-to-provide-this-to-the-global-scope#28459875
// http://stackoverflow.com/questions/27561158/timed-promise-queue-throttle
module.exports = promiseDebounce
function promiseDebounce(fn, delay, count) {
var working = 0, queue = [];
function work() {
if ((queue.length === 0) || (working === count)) return;
working++;
Promise.delay(delay).tap(function () { working--; }).then(work);
var next = queue.shift();
next[2](fn.apply(next[0], next[1]));
}
return function debounced() {
var args = arguments;
return new Promise(function(resolve){
queue.push([this, args, resolve]);
if (working < count) work();
}.bind(this));
}

I use async-sema module handle throttle HTTP request. Which means it allow you send HTTP request with a rate limit.
Here is an example:
A simple Node.js server, add express-rate-limit middleware to API so that the API has rate-limit feature. Let's say this is the Shopify API for your case.
server.ts:
import express from 'express';
import rateLimit from 'express-rate-limit';
import http from 'http';
const port = 3000;
const limiter = new rateLimit({
windowMs: 1000,
max: 3,
message: 'Max RPS = 3',
});
async function createServer(): Promise<http.Server> {
const app = express();
app.get('/place', limiter, (req, res) => {
res.end('Query place success.');
});
return app.listen(port, () => {
console.log(`Server is listening on http://localhost:${port}`);
});
}
if (require.main === module) {
createServer();
}
export { createServer };
On client-side, we want to send HTTP requests with concurrency = 3 and per second cap between them. I put the client-side code inside a test case. So don't feel weird.
server.test.ts:
import { RateLimit } from 'async-sema';
import rp from 'request-promise';
import { expect } from 'chai';
import { createServer } from './server';
import http from 'http';
describe('20253425', () => {
let server: http.Server;
beforeEach(async () => {
server = await createServer();
});
afterEach((done) => {
server.close(done);
});
it('should throttle http request per second', async () => {
const url = 'http://localhost:3000/place';
const n = 10;
const lim = RateLimit(3, { timeUnit: 1000 });
const resArr: string[] = [];
for (let i = 0; i < n; i++) {
await lim();
const res = await rp(url);
resArr.push(res);
console.log(`[${new Date().toLocaleTimeString()}] request ${i + 1}, response: ${res}`);
}
expect(resArr).to.have.lengthOf(n);
resArr.forEach((res) => {
expect(res).to.be.eq('Query place success.');
});
});
});
Test results, Pay attention to the time of the request
20253425
Server is listening on http://localhost:3000
[8:08:17 PM] request 1, response: Query place success.
[8:08:17 PM] request 2, response: Query place success.
[8:08:17 PM] request 3, response: Query place success.
[8:08:18 PM] request 4, response: Query place success.
[8:08:18 PM] request 5, response: Query place success.
[8:08:18 PM] request 6, response: Query place success.
[8:08:19 PM] request 7, response: Query place success.
[8:08:19 PM] request 8, response: Query place success.
[8:08:19 PM] request 9, response: Query place success.
[8:08:20 PM] request 10, response: Query place success.
✓ should throttle http request per second (3017ms)
1 passing (3s)

So many great options here, also here is the one that i am using in one of my projects.
axios-request-throttle
Usage:
import axios from 'axios';
import axiosThrottle from 'axios-request-throttle';
axiosThrottle.use(axios, { requestsPerSecond: 5 });

Related

How to loop many http requests with axios in node.js

I have an array of users where each user has an IP address.
I have an API that I send an IP as a request and it returns a county code that belongs to this IP.
In order to get a country code to each user I need to send separate request to each user.
In my code I do async await but it takes about 10 seconds until I get all the responses, if I don't do the async await, I don’t get the country codes at all.
My code:
async function getAllusers() {
let allUsersData = await usersDao.getAllusers();
for (let i = 0; i < allUsersData.length; i++) {
let data = { ip: allUsersData[i].ip };
let body = new URLSearchParams(data);
await axios
.post("http://myAPI", body)
.then((res) => {
allUsersData[i].countryCode = res.data.countryCode;
});
}
return allUsersData;
}

You can use Promise.all to make all your requests once instead of making them one by one.
let requests = [];
for (let i = 0; i < allUsersData.length; i++) {
let data = { ip: allUsersData[i].ip };
let body = new URLSearchParams(data);
requests.push(axios.post("http://myAPI", body)); // axios.post returns a Promise
}
try {
const results = await Promise.all(requests);
// results now contains each request result in the same order
// Your logic here...
}
catch (e) {
// Handles errors
}

If you're just trying to get all the results faster, you can request them in parallel and know when they are all done with Promise.all():
async function getAllusers() {
let allUsersData = await usersDao.getAllusers();
await Promise.all(allUsersData.map((userData, index) => {
let body = new URLSearchParams({ip: userData.ip});
return axios.post("http://myAPI", body).then((res) => {
allUsersData[index].countryCode = res.data.countryCode;
});
}));
return allUsersData;
}
Note, I would not recommend doing it this way if the allUsersData array is large (like more than 20 long) because you'll be raining a lot of requests on the target server and it may either impeded its performance or you may get rate limited or even refused service. In that case, you'd need to send N requests at a time (like perhaps 5) using code like this pMap() here or mapConcurrent() here.

Nodejs - Fire multiple API calls while limiting the rate and wait until they are all done

My issues
Launch 1000+ online API that limits the number of API calls to 10 calls/sec.
Wait for all the API calls to give back a result (or retry), it can take 5 sec before the API sends it data
Use the combined data in the rest of my app
What I have tried while looking at a lot of different questions and answers here on the site
Use promise to wait for one API request
const https = require("https");
function myRequest(param) {
const options = {
host: "api.xxx.io",
port: 443,
path: "/custom/path/"+param,
method: "GET"
}
return new Promise(function(resolve, reject) {
https.request(options, function(result) {
let str = "";
result.on('data', function(chunk) {str += chunk;});
result.on('end', function() {resolve(JSON.parse(str));});
result.on('error', function(err) {console.log("Error: ", err);});
}).end();
});
};
Use Promise.all to do all the requests and wait for them to finish
const params = [{item: "param0"}, ... , {item: "param1000+"}]; // imagine 1000+ items
const promises = [];
base.map(function(params){
promises.push(myRequest(params.item));
});
result = Promise.all(promises).then(function(data) {
// doing some funky stuff with dat
});
So far so good, sort of
It works when I limit the number of API requests to a maximum of 10 because then the rate limiter kicks in. When I console.log(promises), it gives back an array of 'request'.
I have tried to add setTimeout in different places, like:
...
base.map(function(params){
promises.push(setTimeout(function() {
myRequest(params.item);
}, 100));
});
...
But that does not seem to work. When I console.log(promises), it gives back an array of 'function'
My questions
Now I am stuck ... any ideas?
How do I build in retries when the API gives an error
Thank you for reading up to hear, you are already a hero in my book!

When you have a complicated control-flow using async/await helps a lot to clarify the logic of the flow.
Let's start with the following simple algorithm to limit everything to 10 requests per second:
make 10 requests
wait 1 second
repeat until no more requests
For this the following simple implementation will work:
async function rateLimitedRequests (params) {
let results = [];
while (params.length > 0) {
let batch = [];
for (i=0; i<10; i++) {
let thisParam = params.pop();
if (thisParam) { // use shift instead
batch.push(myRequest(thisParam.item)); // of pop if you want
} // to process in the
// original order.
}
results = results.concat(await Promise.all(batch));
await delayOneSecond();
}
return results;
}
Now we just need to implement the one second delay. We can simply promisify setTimeout for this:
function delayOneSecond() {
return new Promise(ok => setTimeout(ok, 1000));
}
This will definitely give you a rate limiter of just 10 requests each second. In fact it performs somewhat slower than that because each batch will execute in request time + one second. This is perfectly fine and already meet your original intent but we can improve this to squeeze a few more requests to get as close as possible to exactly 10 requests per second.
We can try the following algorithm:
remember the start time
make 10 requests
compare end time with start time
delay one second minus request time
repeat until no more requests
Again, we can use almost exactly the same logic as the simple code above but just tweak it to do time calculations:
const ONE_SECOND = 1000;
async function rateLimitedRequests (params) {
let results = [];
while (params.length > 0) {
let batch = [];
let startTime = Date.now();
for (i=0; i<10; i++) {
let thisParam = params.pop();
if (thisParam) {
batch.push(myRequest(thisParam.item));
}
}
results = results.concat(await Promise.all(batch));
let endTime = Date.now();
let requestTime = endTime - startTime;
let delayTime = ONE_SECOND - requestTime;
if (delayTime > 0) {
await delay(delayTime);
}
}
return results;
}
Now instead of hardcoding the one second delay function we can write one that accept a delay period:
function delay(milliseconds) {
return new Promise(ok => setTimeout(ok, milliseconds));
}
We have here a simple, easy to understand function that will rate limit as close as possible to 10 requests per second. It is rather bursty in that it makes 10 parallel requests at the beginning of each one second period but it works. We can of course keep implementing more complicated algorithms to smooth out the request pattern etc. but I leave that to your creativity and as homework for the reader.

Grouping redis.get for 2ms and then executing by mget

My application makes about 50 redis.get call to serve a single http request, it serves millions of request daily and application runs on about 30 pods.
When monitoring on newrelic i am getting 200MS average redis.get time, To Optimize this i wrote a simple pipeline system in nodejs which is simply a wrapper over redis.get and it pushes all the request in queue, and then execute the queue using redis.mget (getting all the keys in bulk).
Following is the code snippet:
class RedisBulk {
constructor() {
this.queue = [];
this.processingQueue = {};
this.intervalId = setInterval(() => {
this._processQueue();
}, 5);
}
clear() {
clearInterval(this.intervalId);
}
get(key, cb) {
this.queue.push({cb, key});
}
_processQueue() {
if (this.queue.length > 0) {
let queueLength = this.queue.length;
logger.debug('Processing Queue of length', queueLength);
let time = (new Date).getTime();
this.processingQueue[time] = this.queue;
this.queue = []; //empty the queue
let keys = [];
this.processingQueue[time].forEach((item)=> {
keys.push(item.key);
});
global.redisClient.mget(keys, (err, replies)=> {
if (err) {
captureException(err);
console.error(err);
} else {
this.processingQueue[time].forEach((item, index)=> {
item.cb(err, replies[index]);
});
}
delete this.processingQueue[time];
});
}
}
}
let redis_bulk = new RedisBulk();
redis_bulk.get('a');
redis_bulk.get('b');
redis_bulk.get('c');
redis_bulk.get('d');
My Question is: is this a good approach? will it help in optimizing redis get time? is there any other solution for above problem?
Thanks

I'm not a redis expert but judging by the documentation ;
MGET has the time complexity of
O(N) where N is the number of keys to retrieve.
And GET has the time complexity of
O(1)
Which brings both scenarios to the same end result in terms of time complexity in your scenario. Having a bulk request with MGET can bring you some improvements for the IO but apart from that looks like you have the same bottleneck.
I'd ideally split my data into chunks, responding via multiple http requests in async fashion if that's an option.
Alternatively, you can try calling GET with promise.all() to run GET requests in parallel, for all the GET calls you need.
Something like;
const asyncRedis = require("async-redis");
const client = asyncRedis.createClient();
function bulk() {
const keys = [];
return Promise.all(keys.map(client.get))
}

How to run asynchronous tasks synchronous?

I'm developing an app with the following node.js stack: Express/Socket.IO + React. In React I have DataTables, wherein you can search and with every keystroke the data gets dynamically updated! :)
I use Socket.IO for data-fetching, so on every keystroke the client socket emits some parameters and the server calls then the callback to return data. This works like a charm, but it is not garanteed that the returned data comes back in the same order as the client sent it.
To simulate: So when I type in 'a', the server responds with this same 'a' and so for every character.
I found the async module for node.js and tried to use the queue to return tasks in the same order it received it. For simplicity I delayed the second incoming task with setTimeout to simulate a slow performing database-query:
Declaration:
const async = require('async');
var queue = async.queue(function(task, callback) {
if(task.count == 1) {
setTimeout(function() {
callback();
}, 3000);
} else {
callback();
}
}, 10);
Usage:
socket.on('result', function(data, fn) {
var filter = data.filter;
if(filter.length === 1) { // TEST SYNCHRONOUSLY
queue.push({name: filter, count: 1}, function(err) {
fn(filter);
// console.log('finished processing slow');
});
} else {
// add some items to the queue
queue.push({name: filter, count: filter.length}, function(err) {
fn(data.filter);
// console.log('finished processing fast');
});
}
});
But the way I receive it in the client console, when I search for abc is as follows:
ab -> abc -> a(after 3 sec)
I want it to return it like this: a(after 3sec) -> ab -> abc
My thought is that the queue runs the setTimeout and then goes further and eventually the setTimeout gets fired somewhere on the event loop later on. This resulting in returning later search filters earlier then the slow performing one.
How can i solve this problem?

First a few comments, which might help clear up your understanding of async calls:
Using "timeout" to try and align async calls is a bad idea, that is not the idea about async calls. You will never know how long an async call will take, so you can never set the appropriate timeout.
I believe you are misunderstanding the usage of queue from async library you described. The documentation for the queue can be found here.
Copy pasting the documentation in here, in-case things are changed or down:
Creates a queue object with the specified concurrency. Tasks added to the queue are processed in parallel (up to the concurrency limit). If all workers are in progress, the task is queued until one becomes available. Once a worker completes a task, that task's callback is called.
The above means that the queue can simply be used to priorities the async task a given worker can perform. The different async tasks can still be finished at different times.
Potential solutions
There are a few solutions to your problem, depending on your requirements.
You can only send one async call at a time and wait for the first one to finish before sending the next one
You store the results and only display the results to the user when all calls have finished
You disregard all calls except for the latest async call
In your case I would pick solution 3 as your are searching for something. Why would you use care about the results for "a" if they are already searching for "abc" before they get the response for "a"?
This can be done by giving each request a timestamp and then sort based on the timestamp taking the latest.

SOLUTION:
Server:
exports = module.exports = function(io){
io.sockets.on('connection', function (socket) {
socket.on('result', function(data, fn) {
var filter = data.filter;
var counter = data.counter;
if(filter.length === 1 || filter.length === 5) { // TEST SYNCHRONOUSLY
setTimeout(function() {
fn({ filter: filter, counter: counter}); // return to client
}, 3000);
} else {
fn({ filter: filter, counter: counter}); // return to client
}
});
});
}
Client:
export class FilterableDataTable extends Component {
constructor(props) {
super();
this.state = {
endpoint: "http://localhost:3001",
filters: {},
counter: 0
};
this.onLazyLoad = this.onLazyLoad.bind(this);
}
onLazyLoad(event) {
var offset = event.first;
if(offset === null) {
offset = 0;
}
var filter = ''; // filter is the search character
if(event.filters.result2 != undefined) {
filter = event.filters.result2.value;
}
var returnedData = null;
this.state.counter++;
this.socket.emit('result', {
offset: offset,
limit: 20,
filter: filter,
counter: this.state.counter
}, function(data) {
returnedData = data;
console.log(returnedData);
if(returnedData.counter === this.state.counter) {
console.log('DATA: ' + JSON.stringify(returnedData));
}
}
This however does send unneeded data to the client, which in return ignores it. Somebody any idea's for further optimizing this kind of communication? For example a method to keep old data at the server and only send the latest?

Bytes sent/received for Node.js HTTP request

Once an HTTP request has been served, I would like to log the number of bytes sent/received.
A simple source for this data is req.connection.bytesRead/.bytesWritten. However, this is problematic for HTTP 1.1 keep-alive connections, as the same socket can be used for multiple requests. I need to log per-request, not per-connection.
The solution must lie on the HTTP side of things, but I see no methods documented for getting the data I need.
What is the proper way to calculate bytes read/written for HTTP requests served by Node.js's http.Server?

Unfortunately, I never found a proper way to do this. I've resorted some fairly terrible duck punching, but it works for my particular use case. In case anyone else stumbles along with this problem, you can start with this and refine from there.
Module #1: "Extra Events"
All this module does is make the response object emit a finishBeforeSocketDestroy event. Since I needed this event in a few places in my application, I effectively made a separate module just for this duck punch. app.use() it before Module #2.
module.exports = function (req, res, next) {
var end = res.end;
res.end = function () {
res.end = end;
res.emit('finishBeforeSocketDestroy');
res.end.apply(this, arguments);
}
next();
}
Module #2: "Stats"
This module creates a req.stats object, containing all sorts of useful goodies for tracking bandwidth usage during usage of the connection, and after it is finished.
var pollTime = 1000;
module.exports = function (req, res, next) {
var pollInterval;
function pollStats () {
if (typeof req.stats._lastMeasuredTime === 'object') {
var secondsSinceLastMeasurement = ((new Date() - req.stats._lastMeasuredTime) / 1000);
req.stats.averageRate = {
read: (req.socket.bytesRead - req.stats.bytesRead) / secondsSinceLastMeasurement,
write: (req.socket.bytesWritten - req.stats.bytesWritten) / secondsSinceLastMeasurement
};
}
req.stats._lastMeasuredTime = new Date();
req.stats.bytesRead = req.socket.bytesRead;
req.stats.bytesWritten = req.socket.bytesWritten;
}
req.stats = {
startTime: new Date(),
endTime: null,
averageRate: {read: null, write: null},
bytesRead: req.socket.bytesRead,
bytesWritten: req.socket.bytesWritten,
_lastMeasuredTime: new Date()
};
pollInterval = setInterval(pollStats, pollTime);
res.on('finishBeforeSocketDestroy', function () {
clearInterval(pollInterval);
pollStats();
req.stats.endTime = new Date();
});
next();
}
Like I said... messy. I'm only posting it as duck punching may be your only option. Also beware that socket may get re-used for multiple HTTP requests, which could cause you to double-count some bytes if you're not careful.

Just store traffic value after each response and calculate difference in 'finish' or 'end' handler:
// server.onRequest:
...
req._prevBytesWritten = 0;
// response.onFinish/onEnd:
...
responseLen = req.socket.bytesWritten - req._prevBytesWritten;
req._prevBytesWritten = req.socket.bytesWritten;

Develop Reference

node.js excel linux python-3.x azure haskell apache-spark rust .htaccess string