Rate limit API requests in node.js - node.js

First off, I'm new here, so please...be gentle...I've been teaching myself node.js over the last few months, mostly with the desire to scrape a bunch of data from the FlightAware website API.
I am trying to request from their site a list of flights for aircraft, here is what I have so far.
var aircraft = [array,of,aircraft,tail,numbers]
for (i=0; i < aircraft.length; i++) {
faGetFlight(aircraft[i],function doneLookup(data) {
dbUpdateFlight(collectionName, data)
})
}
This code works, but as soon as there is more than 10 aircraft in the list, it fails, because is sending more than 10 API requests in a minute. What are some easy/straightforward ways to slow this down a little. I would like to send about 50-60 API requests total each time this runs, so I need it spaced over 5-6 minutes. The faGetFlight() function uses the 'request' module. I've tried the request-rate-limit module instead of the request module, but I can't seem to make it work. I don't think the authorization works properly with the request-rate-limiter module. Getting an error about anonymous user. For what it's work, it works with just the request module instead, but I run into the rate limit issues.
Here is my faGetFlight() code.
var RateLimiter = require('request-rate-limiter');
const REQS_PER_MIN = 10; // that's 25 per second
var limiter = new RateLimiter(REQS_PER_MIN);
//API Variables //
var apiUrl = 'url'
var apiEndpoint = 'endpoint'
var apiAuth = 'apikey'
var apiExtData = 0
var apiHowMany = 15 //Number of results to request.
var options = { method: 'GET',
url: apiUrl + apiEndpoint,
qs: { ident: acIdent
},
headers:
{
Authorization: apiAuth }
};
limiter.request(options, function doneDownload(error, response, body) {
if (error) throw new error(error);
callback(body)
});
}
Sorry if this isn't clear...it's my first post!

You can do a naive implementation using functions and a simple setTimeout.
See:
var aircrafts = [array,of,aircraft,tail,numbers];
var REQS_PER_MIN = 10;
var MAX_AMOUNT_REQUESTS = 100;
var timeout = (1 / REQS_PER_MIN) * 60 * 1000;
processAircraft(0);
function processAircraft(index){
if(index >= MAX_AMOUNT_REQUESTS)
return console.log("All done!");
//On start of function, schedule next processing in "timeout" ms
setTimeout(function(){
processAircraft(index+1);
}, timeout);
faGetFlight(aircrafts[index], function doneLookup(data) {
dbUpdateFlight(collectionName, data)
})
}

Related

send multiple data from backend to front end

I am using the fetch api to get data from backend. The data I am getting is dynamic and more and more data keeps producing. What I want is to send the data to the front end when I get the data in the backend. How can I achieve this? I have coded a sample example of my senario below. Thanks in advance
fetch('/api/blah', {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify({
request: `{"requestType": "numbers"}`
})
})
.then((res) => res.json())
.then(data => {
if (data.status == 'success') {
const numbers = data.numbers
console.log(numbers)
}
});
const distribution = async(req, res) => {
const request = JSON.parse(req.body.request)
if (request.requestType == 'numbers') {
var ceiling = 100;
var floor = 1;
var x = 1;
var step = 1;
setInterval(function() {
res.send({
status: 'success',
numbers: x
})
x += step;
if (x === ceiling || x === floor) {
step = -step;
}
}, 500);
}
}
You can use sockets to get your desired output. You can follow this link for more info Send message to specific client with socket.io and node.js
const
{Server} = require("socket.io"),
server = new Server(8000);
var ceiling = 100;
var floor = 1;
var x = 1;
var step = 1;
let
sequenceNumberByClient = new Map();
// event fired every time a new client connects:
server.on("connection", (socket) => {
console.info(`Client connected [id=${socket.id}]`);
// initialize this client's sequence number
sequenceNumberByClient.set(socket, 1);
// when socket disconnects, remove it from the list:
socket.on("disconnect", () => {
sequenceNumberByClient.delete(socket);
console.info(`Client gone [id=${socket.id}]`);
});
});
//emit your data to specific channel
setInterval(function() {
for (const [client, sequenceNumber] of sequenceNumberByClient.entries()) {
client.emit("numbers", x);
x += step;
if (x === ceiling || x === floor) {
step = -step;
}
}
}, 500);
For passing data from the back-end to the front-end dynamically, you can use jQuery AJAX
https://www.w3schools.com/js/js_ajax_intro.asp
In your case, you can call the AJAX endpoint once every few minutes or so to get the new data.
In REST API structure you can send a response to an API only once. You cannot use setInterval to generate multiple response.
instead you should add all possible data in array and put that array in your response once.
const distribution = async (req, res) => {
const request = JSON.parse(req.body.request);
if (request.requestType == "numbers") {
var ceiling = 100;
var step = 1;
var return_arr = [];
for (let i = 1; i <= ceiling; i += step) {
return_arr.push(i);
}
res.send({
status: "success",
numbers: return_arr,
});
}
};
If I understand correctly you want to send server events from your server to your frontend as soon as data changes on the server. In order to achieve this you will need websocket connections from frontend to server. websockets are a bidirectional connection between your server and client. while the clients are connected to the server via sockets, the server can send events to the frontend without receiving a request first. I suggest using socket.io for this.
You can find more info about socket.io here:
https://socket.io/docs/v4/
There are more alternatives like the websocketAPI. more info about websocketAPI you can find in mozilla webdocs here:
https://developer.mozilla.org/en-US/docs/Web/API/WebSockets_API
Also I found this article on how to implement websockets without socket.io as socket.io is more like a framework on top of normal websockets:
https://www.piesocket.com/blog/nodejs-websocket

Add delay response in nodejs

I am a beginner at nodejs. My problem is that I have 16 clients sending requests to the server. The server must aggregate 16 requests and send them to the client. (each client will receive 16 requests, of which 15 are from other clients).
How can I add delay to server (nodejs) while waiting for request aggregation then send it to each client.
Can you help me.
You have to ensure that all the clients will send a parameter called batchId. You can simply,
Keep a cache
Put the request body & response in there with a batchId
On every client request, push the body & response using batchId.
On every client request, check if the batch size is 16
If 16, aggregate and response all the requests at once.
Delete the local cache of the batch.
const requestsCache = {};
function aggregateAndRespond(batchId) {
const arrivedRequests = requestsCache[batchId];
// process "body" property in the "arrivedRequests" elements
const result = processRequests(arrivedRequests);
for(let arrivedRequest of arrivedRequests) {
arrivedRequest.response.json(result);
}
delete requestsCache[batchId];
}
app.post('/client-requests', function(request, response) {
const body = request.body;
const batchId = body.batchId;
const arrivedRequests = requestsCache[batchId] || [];
arrivedRequests.push({ response, body });
if(arrivedRequests.length === 16) {
aggregateAndRespond(batchId);
}
});
Update:
const requestsCache = {};
function aggregateAndRespond(batchId) {
const hours = 2;
const batchDetails = requestsCache[batchId];
const arrivedRequests = batchDetails.requests;
// process "body" property in the "arrivedRequests" elements
const result = processRequests(arrivedRequests);
for(let arrivedRequest of arrivedRequests) {
arrivedRequest.response.json(result);
}
batchDetails.processed = true;
// batch details cannot be deleted immediately because,
// if a same batch request comes after the time out,
// it will end up creating a new batch details
// if that is desired, replace the next timer lines with "delete requestsCache[batchId];"
// otherwise, as the batch details is marked as "processed", remaining requests of same batch
// will simply be dropped.
// The batch details will also be dropped after 2 hours.
// If you think shorter duration would suffice, update accordingly.
setTimeout(function someMoreTimeLater() {
delete requestsCache[batchId];
}, hours * 60 * 60 * 1000);
}
function tryAggregateAndRespond(batchId, timeoutInMinutes, requestData) {
const defaultBatchDetails = {
processed: false,
requests: [],
aggregateTimerReference: null
};
const batchDetails = requestsCache[batchId] || defaultBatchDetails;
if(batchDetails.processed === true) {
return true;
}
batchDetails.requests.push(requestData);
// timer is reset every time the request is arrived.
// if no request is arrived for configured time after the last request,
// aggregation will kick in
// if you want the timer to be set only at the first request,
// delete the next line and uncomment the "if" block after that.
clearTimeout(batchDetails.aggregateTimerReference);
//if(batchDetails.aggregateTimerReference !== null) {
// return false;
//}
batchDetails.aggregateTimerReference = setTimeout(function someTimeLater() {
aggregateAndRespond(batchId);
}, timeoutInMinutes * 60 * 1000);
}
app.post('/client-requests', function(request, response) {
const timeoutInMinutes = 2;
const body = request.body;
const batchId = body.batchId;
const endRequest = tryAggregateAndRespond(batchId, timeoutInMinutes, { response, body });
if(endRequest === true) {
response.json({ message: 'Batch already processed', batchId });
}
});

Node JS - Increasing latency for get requests inside a map method

I have a fairly straightforward application which 1) accepts an array of urls 2) iterates over those urls 3) makes a get request to stream media (video / audio). Below is a code snippet illustrating what I'm doing
import request from 'request';
const tasks = urlsArray.map(async (url: string) => {
const startTime = process.hrtime();
let body = '';
let headers = {};
try {
const response = await promisify(request).call(request, {url, method: 'GET',
encoding: null, timeout: 10000});
} catch (e) {
logger.warn(failed to make request)
}
const [seconds] = process.hrtime(startTime);
logger.info(`Took ${seconds} seconds to make request`)
return body;
}
(await Promise.all(tasks)).forEach((body) => {
// processing body...
}
What I'm currently experiencing is that the time to make the request keeps rising as I make more requests and I'm struggling to understand why that is the case.

limit maxsockets request package

I try to limit request package maxsockets ,
I use fiddler for testing how much it uses concurrent.
As I see ,it doesnt apply the 10 limit that i try to set.
i dont want request module use more than 10 concurrent
what may i be doing wrong?
process.env.NODE_TLS_REJECT_UNAUTHORIZED = "0";
var loglar='idler2.txt';
var url = require('url');
var util = require('util');
var http = require('http');
var https = require('https');
var request = require('request');
var fs = require('fs');
var linkler = [];
var starttime = Math.round(new Date().getTime() / 1000);
http.globalAgent.maxSockets = 10;
https.globalAgent.maxSockets = 10;
var timeoutsure = 30 * 1000;
var success=0,fail=0;
process.on('exit', onExit);
function onExit() {
console.log('\n%d secs,\n%d suc ,\n%d fail\n---------------------\n',
(Math.round(new Date().getTime() / 1000)) - starttime,success,fail);
}
function logla(data)
{
var fd = fs.openSync(loglar, 'a+');
fs.writeSync(fd, data);
fs.closeSync(fd);
}
for(i=0;i<1000;i++)
{
sorgutest();
}
var r = request.defaults({'proxy':'http://127.0.0.1:8888',pool: {maxSockets: 10}});
function sorgutest()
{
r.get({url:'https://freegeoip.net/json/',pool: {maxSockets: 10}}, function optionalCallback(err, httpResponse, body) {
if (err) {
fail++;
return console.error('failed: 49', err);
}
else {
try {bodyjson=JSON.parse(body);
logla(body);
success++;
}
catch(e){console.log("hamina 54");}
}
});
}
As stated in the documentation:
Note that if you are sending multiple requests in a loop and creating multiple new pool objects, maxSockets will not work as intended. To work around this, either use request.defaults with your pool options or create the pool object with the maxSockets property outside of the loop.
try to change the pool option of request.defaults or create a pool object and use it on all request calls.
Edit:
I've noticed that you already used request.default, the fix should be to simply remove the pool option in the r.get call.
from:
r.get({url:'https://freegeoip.net/json/',pool: {maxSockets: 10}}, function optionalCallback(err, httpResponse, body) {
to: r.get({url:'https://freegeoip.net/json/'}, function optionalCallback(err, httpResponse, body) {

How to concurrent download files using cheerio and nodejs?

I have a website with multiple pages, each page lists download links which I want to scrap and download.
I have few issues with it:
My script only downloads about 4-5 files and getting stuck.
I would like to concurrently download as much files as my CPU can.
I got stuck with maximum event emitters, I don't understand why is that so I just go
How to follow redirects purely using request module (without follow-redirects)?
How to download the file like the browser does without mentioning it's name? there is no content-disposition but I think the browser follow redirects and the redirected URL has the filename in it's path.
My current code looks like so:
var request = require('request');
var cheerio = require('cheerio');
var https = require('follow-redirects').https;
require('events').EventEmitter.prototype._maxListeners = 1000;
for(var i = 1; i <= 10000; i++) {
(function(i){
url = 'http://mywebsite.com/files?page=' + i;
request(url, gotHTML)
})(i);
}
function gotHTML(err, resp, html) {
var $ = cheerio.load(html);
$('.file-header').each(function() {
var data = $(this);
var fileLink = data.children().first().children().first().attr('href');
var fileName = fileLink.substring(10);
var downloadLink = 'https://mywebsite.com/api/download/' + fileName;
download(downloadLink, function() {
console.log('downloaded');
})
})
}
function download(url, cb) {
var request = https.get(url, function(response) {
var location = request.res.headers.location;
console.log(location);
location = location.split('/').pop();
console.log(location);
var file = fs.createWriteStream(location);
response.pipe(file);
file.on('finish', function() {
file.close(cb);
});
});
}
The default HTTP/HTTPS Agent only uses a maximum of 5 sockets (maxSockets) for requests to the same origin. So this could be causing some issues for you.
Try changing this:
var request = https.get(url, function(response) {
to this:
var options = require('url').parse(url);
options.agent = false; // or use a custom https.Agent with a higher `maxSockets`
var request = https.get(options, function(response) {

Resources