How to handle an HTTP request that stops sending data - node.js

I'm trying to download videos from some URLs and for some reason the downloads will sometimes stop before they're complete - as in res.on('data', ...) will stop being called (using https.get) before the response has completed. I would like to be able to detect this and retry the request after a given time, but I'm having a hard time getting working code for it. I'm hoping there would be a built in way to deal with something like this that I'm missing. I've already tried setting a timeout for the request and setting a timeout for the response and neither of those seem to do the trick.
Current attempt:
async function downloadVideo(send) {
try {
https.get(downloadUrl, res => {
let timerId;
res.on(`data`, (c) => {
clearTimeout(timerId);
timerId = setTimeout(() => {
res.destroy();
console.log(`retrying`, name);
downloadVideo(send);
}, 2000);
if (!throttle) {
send(`PercentDownloaded`, [
index,
100 * fileSize(filePath) / res.headers[`content-length`]
]);
throttle = true;
setTimeout(() => throttle = false, 500);
}
});
res
.pipe(fs.createWriteStream(filePath))
.on(`finish`, () => {
clearTimeout(timerId);
send(`Done`, index);
});
});
} catch (error) {
console.log(name, error);
send(`DownloadError`, index);
}
}

Related

Chunking axios.get requests with a 1 second delay per chunk - presently getting 429 error

I have a script using axios that hits an API with a limit of 5 requests per second. At present my request array length is 72 and will grow over time. I receive an abundance of 429 errors. The responses per endpoint change with each run of the script; ex: url1 on iteration1 returns 429, then url1 on iteration2 returns 200, url1 on iteration3 returns 200, url1 on iteration4 returns 429.
Admittedly my understanding of async/await and promises are spotty at best.
What I understand:
I can have multiple axios.get running because of async. The variable I set in my main that uses the async function can include the await to ensure all requests have processed before continuing the script.
Promise.all can run multiple axios.gets but, if a single request fails the chain breaks and no more requests will run.
Because the API will only accept 5 requests per second I have to chunk my axios.get requests to 5 endpoints, wait for those to finish processing before sending the next chunk of 5.
setTimeout will assign a time limit to a single request, once the time is up the request is done and will not be sent again no matter the return being other than 200.
setInterval will assign a time limit but it will send the request again after time's up and keep requesting until it receives a 200.
async function main() {
var endpoints = makeEndpoints(boards, whiteList); //returns an array of string API endpoints ['www.url1.com', 'www.url2.com', ...]
var events = await getData(endpoints);
...
}
The getData() has seen many iterations in attempt to correct the 429's. Here are a few:
// will return the 200's sometimes and not others, I believe it's the timeout but that won't attempt the hit a failed url (as I understand it)
async function getData(endpoints) {
let events = [];
for (x = 0; x < endpoints.length; x++) {
try {
let response = await axios.get(endpoints[x], {timeout: 2000});
if ( response.status == 200 &&
response.data.hasOwnProperty('_embedded') &&
response.data._embedded.hasOwnProperty('events')
) {
let eventsArr = response.data._embedded.events;
eventsArr.forEach(event => {
events.push(event)
});
}
} catch (error) {
console.log(error);
}
}
return events;
}
// returns a great many 429 errors via the setInterval, as I understand this function sets a delay of N seconds before attempting the next call
async function getData(endpoints) {
let data = [];
let promises = [];
endpoints.forEach((url) => {
promises.push(
axios.get(url)
)
})
setInterval(function() {
for (i = 0; i < promises.length; i += 5) {
let requestArr = promises.slice(i, i + 5);
axios.all(requestArr)
.then(axios.spread((...res) => {
console.log(res);
}))
.catch(err => {
console.log(err);
})
}
}, 2000)
}
// Here I hoped Promise.all would allow each request to do its thing and return the data, but after further reading I found that if a single request fails the rest will fail in the Promise.all
async getData(endpoints) {
try {
const res = await Promise.all(endpoints.map(url => axios.get(url))).catch(err => {});
} catch {
throw Error("Promise failed");
}
return res;
}
// Returns so many 429 and only 3/4 data I know to expect
async function getData(endpoints) {
const someFunction = () => {
return new Promise(resolve => {
setTimeout(() => resolve('222'), 100)
})
}
const requestArr = endpoints.map(async data => {
let waitForThisData = await someFunction(data);
return axios.get(data)
.then(response => { console.log(response.data)})
.catch(error => console.log(error.toString()))
});
Promise.all(requestArr).then(() => {
console.log('resolved promise.all')
})
}
// Seems to get close to solving but once an error is it that Promise.all stops processing endpoint
async function getData(endpoints) {
(async () => {
try {
const allResponses = await Promise.all(
endpoints.map(url => axios.get(url).then(res => console.log(res.data)))
);
console.log(allResponses[0]);
} catch(e) {
console.log(e);
// handle errors
}
})();
}
It seems like I have so many relevant pieces but I cannot connect them in an efficient and working model. Perhaps axios has something completely unknown to me? I've also tried using blurbird concurrent to limit the request to 5 per attempt but that still returned the 429 from axios.
I've been starring at this for days and with so much new information swirling in my head I'm at a loss as to how to send 5 requests per second, await the response, then send another set of 5 requests to the API.
Guidance/links/ways to improve upon the question would be much appreciated.

Problem getting puppeteer-cluster waiting on page event before closing

I'm currently setting up a CI environment to automate e2e tests our team runs in a test harness. I am setting this up on Gitlab and currently using Puppeteer. I have an event that fires from our test harness that designates when the test is complete. Now I am trying to "pool" the execution so I don't use up all resources or run out of listeners. I decided to try out "puppeteer-cluster" for this task. I am close to having things working, however I can't seem to get it to wait for the event on page before closing the browser. Prior to using puppeteer-cluster, I was passing in a callback to my function and when the custom event was fired (injected via exposeFunction), I would go about calling it. That callback function is now being passed in data though now and therefore not waiting. I can't seem to find a way to get the execution to wait and was hoping someone might have an idea here. If anyone has any recommendations, I'd love to hear them.
test('Should launch the browser and run e2e tests', async (done) => {
try {
const cluster = await Cluster.launch({
concurrency: Cluster.CONCURRENCY_CONTEXT,
maxConcurrency: 10,
monitor: false,
timeout: 1200000,
puppeteerOptions: browserConfig
});
// Print errors to console
cluster.on("taskerror", (err, data) => {
console.log(`Error crawling ${data}: ${err.message}`);
});
//Setup our task to be run
await cluster.task( async ({page, data: {testUrl, isLastIndex, cb}, worker}) => {
console.log(`Test starting at url: ${testUrl} - isLastIndex: ${isLastIndex}`);
await page.goto(testUrl);
await page.waitForSelector('#testHarness');
await page.exposeFunction('onCustomEvent', async (e) => {
if (isLastIndex === true){ ;
//Make a call to our callback, finalizing tests are complete
cb();
}
console.log(`Completed test at url: ${testUrl}`);
});
await page.evaluate(() => {
document.addEventListener('TEST_COMPLETE', (e) => {
window.onCustomEvent('TEST_COMPLETE');
console.log("TEST COMPLETE");
});
});
});
//Perform the assignment of all of our xml tests to an array
let arrOfTests = await buildTestArray();
const arrOfTestsLen = arrOfTests.length;
for( let i=0; i < arrOfTestsLen; ++i){
//push our tests on task queue
await cluster.queue( {testUrl: arrOfTests[i], isLastIndex: (i === arrOfTestsLen - 1), cb: done });
};
await cluster.idle();
await cluster.close();
} catch (error) {
console.log('ERROR:',error);
done();
throw error;
}
});
So I got something working, but it really feels hacky to me and I'm not really sure it is the right approach. So should anyone have the proper way of doing this or a more recommended way, don't hesitate to respond. I am posting here shoudl anyone else deal with something similar. I was able to get this working with a bool and setInterval. I have pasted working result below.
await cluster.task( async ({page, data: {testUrl, isLastIndex, cb}, worker}) => {
let complete = false;
console.log(`Test starting at url: ${testUrl} - isLastIndex: ${isLastIndex}`);
await page.goto(testUrl)
await page.waitForSelector('#testHarness');
await page.focus('#testHarness');
await page.exposeFunction('onCustomEvent', async (e) => {
console.log("Custom event fired");
if (isLastIndex === true){ ;
//Make a call to our callback, finalizing tests are complete
cb();
complete = true;
//console.log(`VAL IS ${complete}`);
}
console.log(`Completed test at url: ${testUrl}`);
});
//This will run on the actual page itself. So setup an event listener for
//the TEST_COMPLETE event sent from the test harness itself
await page.evaluate(() => {
document.addEventListener('TEST_COMPLETE', (e) => {
window.onCustomEvent('TEST_COMPLETE');
});
});
await new Promise(resolve => {
try {
let timerId = setInterval(()=>{
if (complete === true){
resolve();
clearInterval(timerId);
}
}, 1000);
} catch (e) {
console.log('ERROR ', e);
}
});
});

Watch an API for Updates

I am trying to make an API watch to https://api.exchangeratesapi.io/latest
and detect if any changes so I can update my clients just like a web hook notification. But what am doing is
while(true){
fetch(https://api.exchangeratesapi.io/latest)
.then(res => ......
}
I am caching the result and always check if there is any changes, If there is, I will send a request to the client.
I am looking for a better way to pull the data instead of making a while loops
If you use a while loop, you would potentially send many requests before one returns. And even when one returns, it's not going to necessarily be in order. Here is a quick example of what might happen if there is a network spike for example:
const sleep = ms => new Promise(res => setTimeout(res, ms));
async function mockNetworkCall(num) {
const timings = [50, 150, 50]
console.log(`sending request ${num}`);
await sleep(timings[num]);
console.log(`request ${num} finished`)
}
for(let i = 0; i < 3; i++) {
mockNetworkCall(i);
}
You could avoid the while loop if you instead take the approach to only do a new request when the last one finishes. In that case, you will only have a single request active at any one time and you know that you're getting the result in order.
You can wrap the logic for that in a simple function to watch a URL and only re-initiate a request when the previous one is finished. This is the skeleton for such a function - it might need tweaking according to your needs:
function watch({ url, retryCount, lastResponse = null}) {
fetch(url)
.then(res => {
/* do something */
return res;
})
.then(res => watch({url, retryCount, lastResponse: res})) //launch again
.catch(err => {
/* handle error */
console.error("error getting URL", err);
console.warn("retries left: ", retryCount);
if (retryCount-- > 0) {
watch({url, retryCount, lastResponse});
}
})
}
watch({url: "google.com", retryCount: 3});
Or the same using async/await
async function watch({ url, retryCount, lastResponse = null}) {
try {
const res = await fetch(url);
/* do something */
watch({url, retryCount, lastResponse: res}); //launch again
} catch (err) {
/* handle error */
console.error("error getting URL", err);
console.warn("retries left: ", retryCount);
if (retryCount-- > 0) {
watch({url, retryCount, lastResponse});
}
}
}
watch({url: "google.com", retryCount: 3});

Multiple and fast external POST requests

I have a node.js server that making POST requests to an external API, each time I have to make ~10k requests (don't worry I'm not abusing the API) and I need that it will take around 2-3 minutes.
I'm using request-promise library in order to make the requests along with Promise.all() to wait for all the requests to resolve.
My problem is that the requests seems stuck and not running in parallel, I know that the promise executes as soon it's created but it seems that the resolve event can only listen to about 10 events at one time.
I tried updating the maxListeners and also using es6-promise-pool (with pool of 500) but no luck.
My next solution will probably be to use child-process with fork, will this solution seems the best for my problem?
Thanks!
code:
async function send_msg(msg) {
return new Promise(function (resolve, reject) {
request.post(options, function (err, res, body) {
if (err) {
logger.error('error sending msg ' + err);
resolve(null);
} else {
resolve(body);
}
})
});
}
}
async function send_msgs() {
let msgs = await OutgoingMessage.findAll();
for (let i = 0; i < msgs.length; i++) {
promises.push(send_msg(msgs[i]).then(async (result) => {
if (result != null) {
try {
let sid = result['MessageSid'];
let status = result['Status'];
msgs[i].update({sid: sid, status: status});
} catch (e) {
logger.error(e + JSON.stringify(result));
msgs[i].update({status: 'failed'});
}
}
}));
}
return Promise.all(promises);
}

Looping a perpetual UDP packet ping in Node.js (3s interval)

I am trying to build a loop that my server will run, pinging an arduino which will return data when it receives a packet from my IP. I already have the mongoDB collection working for the data and am just stuck at this point where I would like my server to begin pinging the arduino every few seconds from start up.
Note: this is really my first crack at really understanding async JS and is likely just a noob mistake.
Code:
const dgram = require('dgram');
const message = Buffer.from('Some bytes');
const clientPing = dgram.createSocket('udp4');
const pinging = true;
function ping() {
return new Promise ((resolve, reject) => {
// This chunk
clientPing.send("Hi Uno", 8888, 'xxx.xx.xx.xxx', (err) => {
clientPing.close();
});
// When run on it's own successfully pings the arduino
const error = false;
if(error == true) {
reject('Error');
} else {
resolve('Pinged');
}
});
}
//Working ping
ping().then().catch(err => console.log(err));
All of this above was meant to act as just a simple proof that the promise does work and successfully emits a udp packet to the target.
function loop() {
while(pinging == true) {
setTimeout(() => {
ping().then().catch(err => console.log(err));
console.log('Pinged');
}, 3000);
}
}
The rest of these are just different combinations of how I've tried to solve the problem here. As far as I can tell I don't have a memory leak as the loop runs just fine without increasing resource consumption.
function loop() {
console.log("entered loop")
while(pinging == true) {
setTimeout(() => {
clientPing.send("Hi Uno", 8888, 'xxx.xx.xx.xxx', (err) => {
clientPing.close();
});
console.log('Pinged');
}, 3000);
}
}
This one was intended to run just the code for the ping without using the promise but still doesn't yield the desired result. The console.log is printed above but it doesn't seem to ever trigger the timeout.
loop();
This just runs one of the two loop() functions.
do {
setTimeout(() => {
ping().then().catch(err => console.log(err));
console.log("pinged");
}, 2000)
} while(pinging == true)
Lastly I thought I'd try do but also without success.
If all of these are pasted into a document sequentially it builds my file. Simply uncomment/comment out each section as needed.
Am I just missing something obvious or is there something fairly complex here that's stopping this from working?
Thanks!
Your ping function is written incorrectly. It is resolving promise just after sending ping not on callback of it. Please change it like this.
function ping() {
return new Promise ((resolve, reject) => {
// This chunk
clientPing.send("Hi Uno", 8888, 'xxx.xx.xx.xxx', (err) => {
const error = false;
clientPing.close();
if(error == true) {
reject('Error');
} else {
resolve('Pinged');
}
});
});
}
And for looping if you shouldn't do sync loop it won't wait until setTimeout finished or your function has finished executing.
For async looping you can loop it by async await
for example:
function wait () {
return new Promise((resolve, reject)=> {
setTimeout(() => {
resolve()
}, 3000)
})
}
for (let i = 0; i<n; i++) {
await ping();
await wait();
}

Resources