Throttling Axios Requests - node.js

I'm using axios to make requests to the Deezer API. Unfortunately, with Deezer's API when you request an artist's albums it does not include album tracks. So, I am working around this by requesting the artist's albums and then performing a subsequent axios request for each album. The problem I'm running into is that the API limits requests to 50 per 5 seconds. If an artist has more than 50 albums I usually get a "quota exceeded" error. Is there a way to throttle axios requests to 50 per 5 seconds, specifically when using axios.all?
var axios = require('axios');
function getAlbums(artistID) {
axios.get(`https://api.deezer.com/artist/${artistID}/albums`)
.then((albums) => {
const urls = albums.data.data.map((album) => {
return axios.get(`https://api.deezer.com/album/${album.id}`)
.then(albumInfo => albumInfo.data);
});
axios.all(urls)
.then((allAlbums) => {
console.log(allAlbums);
});
}).catch((err) => {
console.log(err);
});
}
getAlbums(413);

First of all, let's see what you really need. Your goal here is to make request at most each 100 milliseconds, if you have a large number of albums. (Using axios.all for this matter is no different from using Promise.all, you just want to wait for all of the requests to complete.)
Now, with axios you have the interception API, allowing to plug your logic before requests. So you can use an interceptor like this:
function scheduleRequests(axiosInstance, intervalMs) {
let lastInvocationTime = undefined;
const scheduler = (config) => {
const now = Date.now();
if (lastInvocationTime) {
lastInvocationTime += intervalMs;
const waitPeriodForThisRequest = lastInvocationTime - now;
if (waitPeriodForThisRequest > 0) {
return new Promise((resolve) => {
setTimeout(
() => resolve(config),
waitPeriodForThisRequest);
});
}
}
lastInvocationTime = now;
return config;
}
axiosInstance.interceptors.request.use(scheduler);
}
What it does is timing requests so they are performed at intervalMs milliseconds intervals.
In your code:
function getAlbums(artistID) {
const deezerService = axios.create({ baseURL: 'https://api.deezer.com' });
scheduleRequests(deezerService, 100);
deezerService.get(`/artist/${artistID}/albums`)
.then((albums) => {
const urlRequests = albums.data.data.map(
(album) => deezerService
.get(`/album/${album.id}`)
.then(albumInfo => albumInfo.data));
//you need to 'return' here, otherwise any error in album
// requests will not propagate to the final 'catch':
return axios.all(urls).then(console.log);
})
.catch(console.log);
}
This is, however, a simplistic approach, in your case you probably would like to receive the results as fast as possible for number of requests less than 50. For this, you have to add some kind of a counter inside the scheduler which will count the number of requests and delay their execution based both on the interval and the counter.

Here is my solution using simple async setTimeout / Es6 code :
you can setup the delay in the sleep param func
const sleep = (delay) => {
return new Promise(function(resolve) {
setTimeout(resolve, delay);
});
}
axios.interceptors.response.use(async function (response) {
await sleep(3000)
return response;
}, function (error) {
// Do something with response error
console.error(error)
return Promise.reject(error);
});
There is also this npm package you can use :
https://www.npmjs.com/package/axios-request-throttle

There is a module that worked for me outside the box with a NestJS application
Throttle axios request-per-second rate with 3 lines of code. The main
difference with this module and others like axios-throttled is that
you don't have to create a new axios instance, and by extension don't
have to fix imports project-wide. Apply once and every axios.get,
post, put, delete etc is throttled.
axios-request-throttle

Related

Koa API server - Wait until previous request is processed before processing a new request

I'm building an API in Node with Koa which uses another API to process some information. A request comes in to my API from the client and my API does some different requests to another API. Problem is, the other API is fragile and slow so to guarantee data integrity, I have to check if there is no previous incoming request being processed, before starting a new process. My first idea was to use promises and a global boolean to check if theres an ongoing processing and await until the process has finished. Somehow this prevents concurrent requests but even if 3-4 requests come in during the process, only the first one is done and that is it. Why are the rest of the incoming requests forgotten ?
Edit: As a side note, I do not need to respond to the incoming request with processed information. I could send response right after the request is recieved. I need to do operations with the 3rd party API.
My solution so far:
The entry point:
router.get('/update', (ctx, next) => {
ctx.body = 'Updating...';
update();
next();
});
And the update function:
let updateInProgress = false;
const update = async () => {
const updateProcess = () => {
return new Promise((resolve, reject) => {
if (!updateInProgress) {
return resolve();
} else {
setTimeout(updateProcess, 5000);
}
});
};
await updateProcess();
updateInProgress = true;
// Process the request
updateInProgress = false
}
Ok, I found a working solution, not sure how elegant it is tough...
I'm guessing the problem was, that new Promise was created with the Timeout function, and another one, and another one until one of them was resolved. That did not resolve the first Promise tough and the code got stuck. The solution was to create an interval which checked if the condition is met and then resolve the Promise. If someone smarter could comment, I'd appreciate it.
let updateInProgress = false;
const update = async () => {
const updateProcess = () => {
return new Promise((resolve, reject) => {
if (!updateInProgress) {
return resolve();
} else {
const processCheck = setInterval(() => {
if (!updateInProgress) {
clearInterval(processCheck);
return resolve();
}
}, 5000);
}
});
};
await updateProcess();
updateInProgress = true;
// Process the request
updateInProgress = false
}

Chunking axios.get requests with a 1 second delay per chunk - presently getting 429 error

I have a script using axios that hits an API with a limit of 5 requests per second. At present my request array length is 72 and will grow over time. I receive an abundance of 429 errors. The responses per endpoint change with each run of the script; ex: url1 on iteration1 returns 429, then url1 on iteration2 returns 200, url1 on iteration3 returns 200, url1 on iteration4 returns 429.
Admittedly my understanding of async/await and promises are spotty at best.
What I understand:
I can have multiple axios.get running because of async. The variable I set in my main that uses the async function can include the await to ensure all requests have processed before continuing the script.
Promise.all can run multiple axios.gets but, if a single request fails the chain breaks and no more requests will run.
Because the API will only accept 5 requests per second I have to chunk my axios.get requests to 5 endpoints, wait for those to finish processing before sending the next chunk of 5.
setTimeout will assign a time limit to a single request, once the time is up the request is done and will not be sent again no matter the return being other than 200.
setInterval will assign a time limit but it will send the request again after time's up and keep requesting until it receives a 200.
async function main() {
var endpoints = makeEndpoints(boards, whiteList); //returns an array of string API endpoints ['www.url1.com', 'www.url2.com', ...]
var events = await getData(endpoints);
...
}
The getData() has seen many iterations in attempt to correct the 429's. Here are a few:
// will return the 200's sometimes and not others, I believe it's the timeout but that won't attempt the hit a failed url (as I understand it)
async function getData(endpoints) {
let events = [];
for (x = 0; x < endpoints.length; x++) {
try {
let response = await axios.get(endpoints[x], {timeout: 2000});
if ( response.status == 200 &&
response.data.hasOwnProperty('_embedded') &&
response.data._embedded.hasOwnProperty('events')
) {
let eventsArr = response.data._embedded.events;
eventsArr.forEach(event => {
events.push(event)
});
}
} catch (error) {
console.log(error);
}
}
return events;
}
// returns a great many 429 errors via the setInterval, as I understand this function sets a delay of N seconds before attempting the next call
async function getData(endpoints) {
let data = [];
let promises = [];
endpoints.forEach((url) => {
promises.push(
axios.get(url)
)
})
setInterval(function() {
for (i = 0; i < promises.length; i += 5) {
let requestArr = promises.slice(i, i + 5);
axios.all(requestArr)
.then(axios.spread((...res) => {
console.log(res);
}))
.catch(err => {
console.log(err);
})
}
}, 2000)
}
// Here I hoped Promise.all would allow each request to do its thing and return the data, but after further reading I found that if a single request fails the rest will fail in the Promise.all
async getData(endpoints) {
try {
const res = await Promise.all(endpoints.map(url => axios.get(url))).catch(err => {});
} catch {
throw Error("Promise failed");
}
return res;
}
// Returns so many 429 and only 3/4 data I know to expect
async function getData(endpoints) {
const someFunction = () => {
return new Promise(resolve => {
setTimeout(() => resolve('222'), 100)
})
}
const requestArr = endpoints.map(async data => {
let waitForThisData = await someFunction(data);
return axios.get(data)
.then(response => { console.log(response.data)})
.catch(error => console.log(error.toString()))
});
Promise.all(requestArr).then(() => {
console.log('resolved promise.all')
})
}
// Seems to get close to solving but once an error is it that Promise.all stops processing endpoint
async function getData(endpoints) {
(async () => {
try {
const allResponses = await Promise.all(
endpoints.map(url => axios.get(url).then(res => console.log(res.data)))
);
console.log(allResponses[0]);
} catch(e) {
console.log(e);
// handle errors
}
})();
}
It seems like I have so many relevant pieces but I cannot connect them in an efficient and working model. Perhaps axios has something completely unknown to me? I've also tried using blurbird concurrent to limit the request to 5 per attempt but that still returned the 429 from axios.
I've been starring at this for days and with so much new information swirling in my head I'm at a loss as to how to send 5 requests per second, await the response, then send another set of 5 requests to the API.
Guidance/links/ways to improve upon the question would be much appreciated.

Node.js/Vuetify- Is there a way to get data from server based on time?

I have a node.js server set up for a vuetify project. In my server, I am parsing a csv file that has information in it about scheduling and time. In my vuetify project, is there a way to get data from the csv based on the time that the client is being used?
OK, let's go with an example.
From what I understand, you have the following information in your CSV file:
Time,Activity
07:00,Breakfast
08:00,Go to work
12:00,Lunch break
Since you didn't specify, I will use an example parser, which will push all rows, as objects, into an array:
[
{ Time: '07:00', Activity: 'Breakfast' },
{ Time: '08:00', Activity: 'Go to work' },
{ Time: '12:00', Activity: 'Lunch break' }
]
You need to send that information to your clients, so assuming you are using Express, you could go with something in the lines of:
const csv = require('csv-parser');
const fs = require('fs');
const express = require('express');
const app = express();
const timeSchedule = [];
function parseCsv() {
return new Promise((resolve, reject) => {
fs.createReadStream('data.csv')
.pipe(csv())
.on('data', (data) => timeSchedule.push(data))
.on('error', (err) => reject(err))
.on('end', () => {
csvParsed = true;
resolve();
});
}
}
app.get('/scheduled-for/:hour', function (req, res) {
// You need to come up with the logic for your case
// As an example, I will assume that if I make a request at any time
// between 7 and 8, I will get "Breakfast"
// between 8 and 9 - "Go to work"
// between 12 and 13 - "Lunch break"
parseCsv().then(() => {
res.json(timeSchedule.find(row => row.Time.startsWith(req.params.hour)))
})
})
Please note, all of the above is happening on the Nodejs server.
From the client, you will have to call the scheduled-for GET handle with the hour param. Another option is to allow the back-end to determine the hour of the request, by using the Date object, but the above is more flexible for the client. It will also avoid issues with different timezones, given that your client requests are coming from different timezones than the one your server is on.
Assuming you are using axios in your Vue application, the simplest way to get the schedule is to call your API in your component:
new Vue({
el: '#app',
data () {
return {
activity: null
}
},
mounted () {
axios
.get('https://urlToYourApi/v1/scheduled-for/' + new Date().getHours())
.then(schedule => {
if (schedule) {
this.activity = schedule.Activity;
}
else {
console.log("No activity found for this hour!");
}
}
}
})
This code is NOT for production! You need to handle many cases, as with new Date().getHours() returning single-digit hours, parsing of CSV, not to mention the domain logic itself, which depends on your specific case. This is just a simple example. I hope it helps to guide you in the right direction!

How to make multiple http requests from a Google Cloud Function (Cheerio, Node.js)

MY PROBLEM:
I'm building a web-scraper with Cheerio, Node.js, and Google Cloud Functions.
The problem is I need to make multiple requests, then write data from each request to a Firestore database before calling response.send() and thereby terminating the function.
My code requires two loops: the first loop is with urls from my db, with each one making a separate request. The second loop is with Cheerio using .each to scrape multiple rows of table data from the DOM and make a separate write for each row.
WHAT I'VE TRIED:
I've tried pushing each request to an array of promises and then waiting for all the promises to resolve with promises.all() before calling res.send(), but I'm still a little shaky on promises and not sure that is the right approach. (I have gotten the code to work for smaller datasets that way, but still inconsistently.)
I also tried creating each request as a new promise and using async/await to await each function call from the forEach loop to allow time for each request and write to fully finish so I could call res.send() afterward, but I found out that forEach doesn't support Async/await.
I tried to get around that with the p-iteration module but because its not actually forEach but rather a method on the query (doc.forEach()) I don't think it works like that.
So here's my code.
NOTE:
As mentioned, this is not everything I tried (I removed my promise attempts), but this should show what I am trying to accomplish.
export const getCurrentLogs = functions.https.onRequest((req, response) => {
//First, I make a query from my db to get the urls
// that I want the webscraper to loop through.
const ref = scheduleRef.get()
.then((snapshot) => {
snapshot.docs.forEach((doc) => {
const scheduleGame = doc.data()
const boxScoreUrl = scheduleGame.boxScoreURL
//Inside the forEach I call the request
// as a function with the url passed in
updatePlayerLogs("https://" + boxScoreUrl + "/");
});
})
.catch(err => {
console.log('Error getting schedule', err);
});
function updatePlayerLogs (url){
//Here I'm not sure on how to set these options
// to make sure the request stays open but I have tried
// lots of different things.
const options = {
uri: url,
Connection: 'keep-alive',
transform: function (body) {
return cheerio.load(body);
}
};
request(options)
.then(($) => {
//Below I loop through some table data
// on the dom with cheerio. Every loop
// in here needs to be written to firebase individually.
$('.stats-rows').find('tbody').children('tr').each(function(i, element){
const playerPage = $(element).children('td').eq(0).find('a').attr('href');
const pts = replaceDash($(element).children('td').eq(1).text());
const reb = replaceDash($(element).children('td').eq(2).text());
const ast = replaceDash($(element).children('td').eq(3).text());
const fg = replaceDash($(element).children('td').eq(4).text());
const _3pt = replaceDash($(element).children('td').eq(5).text());
const stl = replaceDash($(element).children('td').eq(9).text());
const blk = replaceDash($(element).children('td').eq(10).text());
const to = replaceDash($(element).children('td').eq(11).text());
const currentLog = {
'pts': + pts,
'reb': + reb,
'ast': + ast,
'fg': fgPer,
'3pt': + _3ptMade,
'stl': + stl,
'blk': + blk,
'to': + to
}
//here is the write
playersRef.doc(playerPage).update({
'currentLog': currentLog
})
.catch(error =>
console.error("Error adding document: ", error + " : " + url)
);
});
})
.catch((err) => {
console.log(err);
});
};
//Here I call response.send() to finish the function.
// I have tried doing this lots of different ways but
// whatever I try the response is being sent before all
// docs are written.
response.send("finished writing logs")
});
Everything I have tried either results in a deadline exceeded error (possibly because of quota limits which I have looked into but I don't think I should be exceeding) Or some unexplained error where the code doesn't finish executing but shows me nothing in the logs.
Please help, is there a way to use async/await in this scenario that I am not understanding? Is there a way to use promises to make this elegant?
Many thanks,
Maybe have a look at something like this. It uses Bluebird promises and the request-promise library
const Promise = require('bluebird');
var rp = require('request-promise');
const urlList = ['http://www.google.com', 'http://example.com']
async function getList() {
await Promise.map(urlList, (url, index, length) => {
return rp(url)
.then((response) => {
console.log(`${'\n\n\n'}${url}:${'\n'}${response}`);
return;
}).catch(async (err) => {
console.log(err);
return;
})
}, {
concurrency: 10
}); //end Promise.map
}
getList();

Delaying promises: .delay() doesn't seem to work

This is an extended question from Throttling requests to 3rd part Apis with Node . You can see my full code in that post if you want to see the sub functions and a little backstory of what im trying to do.
What i need to do is delay api request to an api. I am hitting the server max per second. I am trying to use Promise.delay but it doesnt seem to hold anything. This is how im using it.
function getAllRegions(req, res){
getAllRegionsHrefs().then(function (hrefs){
var chunks = _.chunk(hrefs, 25);
return Promise.map(chunks, function(chunk) {
return Promise.map(chunk, getRegion).then(function (getRegionResults){
for(var item in getRegionResults) {
Promise.map(getRegionResults[item].constellations, getConstellationInfo).then(function (constellationInfo) {
var chunks = _.chunk(constellationInfo, 150);
return Promise.map(chunks, function (chunk) {
return Promise.map(chunk, getSystem).delay(20000);
})
}).delay(20000);
}
}).delay(200000);
});
});
}
Now this will make just about 9k api requests and it builds a universe based off regions, constellations and systems. Is there a better way of throttling the nested requests? At each nest, the number of calls exponentially goes up.
Update
Still doesnt seem to want to wait. Did I mess up the nesting again? I added 1 more function in there that was needed for the flow. Also the chunking seemed to wrap a double array so I took it out.
function getAllRegions(req, res) {
getAllRegionsHrefs().then(function (hrefs) {
return Promise.mapSeries(hrefs, function (href) {
getRegion(href).then(function (regions) {
return Promise.mapSeries(regions.constellations, function (constellation) {
getConstellationInfo(constellation).then(function (constellationInfo) {
return Promise.map(constellationInfo, getSystem).delay(15000);
});
});
});
});
});
}
Error (per request):
Unhandled rejection RequestError: Error: connect ETIMEDOUT
Promise.map starts all requests in parallel, so your code runs like:
requests -> delay
requests -> delay
requests -> delay
....
But the desired behavior should be sequential:
requests -> delay -> requests -> delay -> requests -> delay ...
each chunk of requests should wait for delay of previous chunk.
You could achieve this by chaining promises:
startRequests(chunks[0])
.then(function() {
return startRequests(chunks[1])
}).then(function() {
return startRequests(chunks[2])
}) // ...
or use mapSeries
function getAllRegions(req, res){
getAllRegionsHrefs().then(function(hrefs) {
return Promise.mapSeries(hrefs, function(href) {
getRegion(href).then(function(region) {
var chunks = _.chunk(region.constellations, 150);
return Promise.mapSeries(chunks, function (chunk) {
return Promise.map(chunk, getSystem).delay(10000);
});
});
});
});
}
only innermost requests are chunked, others are started one by one.
edit: as Bergi said, returning result promise is required for mapSeries (and other promise utilities) to work. The first promise can also be returned for handling error or chaining more promises.
function getAllRegions(req, res){
return getAllRegionsHrefs().then(function(hrefs) {
//...
getAllRegions().then(systems => {
//next step
}).catch(err => {
//handle errors
})

Resources