Node JS Promises not working on local server - node.js

I created "small" server that shoud use NodeJs and promises to copy and compres files of given type from given directory. It had to be separeted, each in promise - reading files and packing them. First I created just those promises to check if they would work, and they did:
function packFile(file){
var fileName = path.basename(file);
console.log(' (+) Add to the archive : ' + fileName + ' ['+file+']');
archive.append(fs.createReadStream(file), { name:fileName });
}
Q.nfcall(recursive, pathGiven, filterGiven)
.then(function(data) {
var chain = Q.when();
for(var i = 0; i < data.length; i++){
console.log(' > '+data[i]+' > adding to chain ...');
chain = chain.then(packFile.bind(this, data[i]));
}
chain.then(function() {
archive.finalize();
});
Ofc, i added, in front of previouslty presented code:
fs = require('fs');
archiver = require('archiver');
Q = require('q');
recursive = require('recursive-readdir');
path = require('path');
All those were isntalled and working fine. "archive" is also corrently initialized at the beggining.
Since all worked- it was time to get on with a server:
//Lets require/import the HTTP module
var http = require('http');
//Lets define a port we want to listen to
const PORT=8080;
//Create a server
var server = http.createServer(handleRequest);
//Lets start our server
server.listen(PORT, function(){
console.log("Server listening on: http://localhost:%s", PORT);
});
Function "handleRequest" is quite big, so to get just the gist of it i will place part where is shoud handle those promises:
switch(request.url) {
case '/formhandler':
if (request.method == 'POST') {
// (...) Got my "Sour" (Path to source directory ) and "Mask"
// Here i tried to place that promise chain, but server just skipped it and continued
Q.nfcall(recursive, Sour, Mask)
.then(function(data) {
var chain = Q.when();
for(var i = 0; i < data.length; i++){
console.log(' > '+data[i]+' > adding to chain ...');
chain = chain.then(packFile.bind(this, data[i]));
}
chain.then(function() {
console.log("whole chain resolved");
console.log('FINISHING');
archive.finalize();
});
});
// (...) Finishing this request
}
break;
}
Nothing I do seems to work. As long as I use those promises (calling them like that or in some function). I have no idea why.
EDIT 1
What i mean by skipped - I placed 2 console.log before and after promises in that handler. I shoud see:
LOG1
TEXT_FROM_PROMISES //Something those promises send to console.log
LOG2
But there was only:
LOG1
LOG2
And server was ready to get another request.
EDIT 2
Some suggested in comments to change packFile so it would return promise, so i did:
function packFile(file){
return new Promise(function(resolve,reject){
var fileName = path.basename(file);
console.log(' (+) Adding to the archive : ' + fileName + ' ['+file+']');
archive.append(fs.createReadStream(file), { name:fileName });
});
}
Rest of the code is unchanged. Result is - Now ONLY FIRST from this chain will be performed (if I call it "alone" in script, without server) and it still wont work on server.
EDIT 3
I changed 'packFile' again:
function packFile(file){
return new Promise(function(resolve,reject){
var fileName = path.basename(file);
console.log(' (+) Adding to the archive : ' + fileName + ' ['+file+']');
archive.append(fs.createReadStream(file), { name:fileName });
resolve("Success!");
});
}
Not whole chain will work - for no-server version, but still - nothing happen on server.
EDIT 4
I changed
Q.nfcall(recursive, pathGiven, filterGiven)
.then(function(data) {
var chain = Q.when();
for(var i = 0; i < data.length; i++){
console.log(' > '+data[i]+' > adding to chain ...');
chain = chain.then(packFile.bind(this, data[i]));
}
chain.then(function() {
console.log("whole chain resolved");
archive.finalize();
});
}, function(reason) {
console.log("ERROR:"+reason); // Error!
});
and got some new error in console:
ERROR:TypeError: ignores.map is not a function
EDIT 5
Code I use for archive, just to make and open .zip to add files to with my 'packFile'
archive = archiver.create('zip', {});
var output = fs.createWriteStream('./output.zip');
archive.pipe(output);
EDIT 6
Since there seems to be need for more code, tehre is function I call on my server whenever i recive proper request (-> check beggining of this post)
function handleWithPromises(pathGiven, filterGiven){
console.log(' > Promising ...');
archive = archiver.create('zip', {});
var output = fs.createWriteStream('./output.zip');
archive.pipe(output);
function packFile(file){
return new Promise(function(resolve,reject){
var fileName = path.basename(file);
console.log(' (+) Adding to the archive : ' + fileName + ' ['+file+']');
archive.append(fs.createReadStream(file), { name:fileName });
resolve("Success!");
});
}
Q.nfcall(recursive, pathGiven, filterGiven)
.then(function(data) {
var chain = Q.when();
for(var i = 0; i < data.length; i++){
console.log(' > '+data[i]+' > adding to chain ...');
chain = chain.then(packFile.bind(this, data[i]));
}
chain.then(function() {
console.log('FINISHING');
archive.finalize();
});
});
}
and code that is placed at the very begging of script:
fs = require('fs');
archiver = require('archiver');
Q = require('q');
recursive = require('recursive-readdir');
path = require('path');

Go back to Edit 1 where you expected to see
LOG1
TEXT_FROM_PROMISES //Something those promises send to console.log
LOG2
but got
LOG1
LOG2
An explanation is that A+ compliant promises interface with the event loop scheduler to execute functions supplied with then in their own thread, after the thread that called then executes to completion.
Chained promises execute asynchronously to the code that defines them.
On a very simplistic level, you will need to postpone ending the request until all files have been packed by making sure response is in function scope of the last anonymous function and ending the response there:
chain.then(function() {
console.log('FINISHING');
archive.finalize();
response.write(...) // are you sending the archive back?
response.end(); // now end the request
});

Related

How to disconnect a socket after streaming data?

I am making use of "socket.io-client" and "socket.io stream" to make a request and then stream some data. I have the following code that handles this logic
Client Server Logic
router.get('/writeData', function(req, res) {
var io = req.app.get('socketio');
var nameNodeSocket = io.connect(NAMENODE_ADDRESS, { reconnect: true });
var nameNodeData = {};
async.waterfall([
checkForDataNodes,
readFileFromS3
], function(err, result) {
if (err !== null) {
res.json(err);
}else{
res.json("Finished Writing to DN's");
}
});
function checkForDataNodes(cb) {
nameNodeSocket.on('nameNodeData', function(data) {
nameNodeData = data;
console.log(nameNodeData);
cb(null, nameNodeData);
});
if (nameNodeData.numDataNodes === 0) {
cb("No datanodes found");
}
}
function readFileFromS3(nameNodeData, cb) {
for (var i in nameNodeData['blockToDataNodes']) {
var IP = nameNodeData['blockToDataNodes'][i]['ipValue'];
var dataNodeSocket = io.connect('http://'+ IP +":5000");
var ss = require("socket.io-stream");
var stream = ss.createStream();
var byteStartRange = nameNodeData['blockToDataNodes'][i]['byteStart'];
var byteStopRange = nameNodeData['blockToDataNodes'][i]['byteStop'];
paramsWithRange['Range'] = "bytes=" + byteStartRange.toString() + "-" + byteStopRange.toString();
//var file = require('fs').createWriteStream('testFile' + i + '.txt');
var getFileName = nameNodeData['blockToDataNodes'][i]['key'].split('/');
var fileData = {
'mainFile': paramsWithRange['Key'].split('/')[1],
'blockName': getFileName[1]
};
ss(dataNodeSocket).emit('sendData', stream, fileData);
s3.getObject(paramsWithRange).createReadStream().pipe(stream);
//dataNodeSocket.disconnect();
}
cb(null);
}
});
Server Logic (that gets the data)
var dataNodeIO = require('socket.io')(server);
var ss = require("socket.io-stream");
dataNodeIO.on('connection', function(socket) {
console.log("Succesfully connected!");
ss(socket).on('sendData', function(stream, data) {
var IP = data['ipValue'];
var blockName = data['blockName'];
var mainFile = data['mainFile'];
dataNode.makeDir(mainFile);
dataNode.addToReport(mainFile, blockName);
stream.pipe(fs.createWriteStream(mainFile + '/' + blockName));
});
});
How can I properly disconnect the connections in function readFileFromS3. I have noticed using dataNodeSocket.disconnect() at the end does not work as I cannot verify the data was received on the 2nd server. But if I comment it out, I can see the data being streamed to the second server.
My objective is to close the connections in Client Server side
It appears that the main problem with closing the socket is that you weren't waiting for the stream to be done writing before trying to close the socket. So, because the writing is all asynchronous and finishes sometime later, you were trying to close the socket before the data had been written.
Also because you were putting asynchronous operations inside a for loop, you were also running all your operations in parallel which may not be exactly what you want as it makes error handling more difficult and server load more difficult.
Here's the code I would suggest that does the following:
Create a function streamFileFromS3() that streams a single file and returns a promise that will notify when it's done.
Use await in a for loop with that streamFileFromS3() to serialize the operations. You don't have to serialize them, but then you would have to change your error handling to figure out what to do if one errors while the others are already running and you'd have to be more careful about concurrency issues.
Use try/catch to catch any errors from streamFileFromS3().
Add error handling on the stream.
Change all occurrences of data['propertyName'] to data.propertyName. The only time you need to use brackets is if the property name contains a character that is not allowed in a Javascript identifier or if the property name is in a variable. Otherwise, the dot notation is preferred.
Add socket.io connection error handling logic for both socket.io connections.
Set returned status to 500 when there's an error processing the request
So, here's the code for that:
const ss = require("socket.io-stream");
router.get('/writeData', function(req, res) {
const io = req.app.get('socketio');
function streamFileFromS3(ip, data) {
return new Promise((resolve, reject) => {
const dataNodeSocket = io.connect(`http://${ip}:5000`);
dataNodeSocket.on('connect_error', reject);
dataNodeSocket.on('connect_timeout', () {
reject(new Error(`timeout connecting to http://${ip}:5000`));
});
dataNodeSocket.on('connection', () => {
// dataNodeSocket connected now
const stream = ss.createStream().on('error', reject);
paramsWithRange.Range = `bytes=${data.byteStart}-${data.byteStop}`;
const filename = data.key.split('/')[1];
const fileData = {
'mainFile': paramsWithRange.Key.split('/')[1],
'blockName': filename
};
ss(dataNodeSocket).emit('sendData', stream, fileData);
// get S3 data and pipe it to the socket.io stream
s3.getObject(paramsWithRange).createReadStream().on('error', reject).pipe(stream);
stream.on('close', () => {
dataNodeSocket.disconnect();
resolve();
});
});
});
}
function connectError(msg) {
res.status(500).send(`Error connecting to ${NAMENODE_ADDRESS}`);
}
const nameNodeSocket = io.connect(NAMENODE_ADDRESS, { reconnect: true });
nameNodeSocket.on('connect_error', connectError).on('connect_timeout', connectError);
nameNodeSocket.on('nameNodeData', async (nameNodeData) => {
try {
for (let item of nameNodeData.blockToDataNodes) {
await streamFileFromS3(item.ipValue, item);
}
res.json("Finished Writing to DN's");
} catch(e) {
res.status(500).json(e);
}
});
});
Other notes:
I don't know what paramsWithRange is as it is not declared here and when you were doing everything in parallel, it was getting shared among all the connections which is asking for a concurrency issue. In my serialized implementation, it's probably safe to share it, but the way it is now bothers me as it's a concurrency issue waiting to happen.

How to abort a request?

I'm writing an upload script. If there's an error while writing to disk I want to abort the request and return a 500 status code.
My code (below) sends a 500 as expected, but the upload doesn't stop. I keep getting "data" events until the upload is complete (even though my write pipe is already broken) and then the req.on('end' event fires which tries to send a 204 even though I've already sent a 500.
How can I abort the request so that I stop getting event notifications?
var filename = req.headers['wx-filename'];
var docType = req.headers['wx-doc-type'];
var clientId = req.headers['wx-client-id'];
var dir = Path.join(config.storage_path, docType, clientId);
var filePath = Path.join(dir, filename);
mkdirp.sync(dir);
var destFile = FileSystem.createWriteStream(filePath, {
flags: 'wx' // don't overwrite files
});
if (config.env === 'dev') {
(function () {
var fileSize = req.headers['content-length'];
var uploadedBytes = 0;
req.on('data', function (data) {
uploadedBytes += data.length;
var p = uploadedBytes / fileSize * 100;
var fn = Path.relative(config.storage_path, filePath);
util.log(fn + ': ' + p.toFixed(1) + '%');
});
})();
}
req.on('end', function () {
util.log("END");
resp.writeHead(204);
resp.end();
});
destFile.on('error', function (err) {
util.log("ERROR", err);
resp.writeHead(500, err.code);
resp.end();
// HOW TO STOP REQUEST?
});
req.pipe(destFile);
You need to remove the listeners for data and end, after that send a Connection: close header and at the end send the 500 error.

Fetch 100 zip files in node

So I'm trying to fetch a bunch of files from a server. The current code is basically as follows.
var http = require('http');
var fs = require('fs');
var arr = [{id:'fileOne', id:'fileTwo', id:'fileThree',....];
function fetchData() {
for (var i = 0; i < arr.length; i++) {
var file = fs.createWriteStream("../path/file.zip");
var request = http.get("url/AFG_adm.zip", function(response) {
response.pipe(file);
});
}
}
I don't think this is the best approach, trying to figure out how to handle errors, how to make sure that a file gets loaded before the next iteration... Any help is much appreciated.
You should use the async module for handling the async part, also the request module will save you a lot of effort.
You can handle this in many ways using either async.cargo or async.map.
The theory is to group up things or a series of things, and then take action according to what you want it to do, but in async way.
so a basic .map of an array of files to download would be like this.
// required modules
var async = require('async');
var request = require('request');
// array of urls
var URLs = ['hxxp://...ZipFile1.zip', 'hxxp://...ZipFile2.zip'];
// destination directory
var destinationDirectory = 'downloads';
// asyncDownload function
function asyncDownload(url, callback) {
// get filename
var filename = url.substring(url.lastIndexOf(".") + 1);
// create write stream
var stream = fs.createWriteStream(destinationDirectory + "/" + filename);
// listen for open event to start request and pipe
stream.on('open', function () {
request(url).pipe(stream);
})
// when finish , call callback
stream.on('finish', function () {
callback(null, destinationDirectory + "/" + filename);
})
}
async.map(
URLs, asyncDownload, function (err, results) {
console.log(results);
});

Node.js app continuously consume memory without freeing it

I'm facing a weird situation. I wrote an application that is performing a HTTP GET request every five minutes. Something like this:
// pingy
'use strict';
var https = require('https'),
url = require('url');
exports.ping = function ping (callback) {
var options = url.parse('https://host.tld/ping');
callback = callback || function () {};
https.get(options, function handler (response) {
var body = '';
response
.on('readable', function onReadable() {
body = body + response.read();
})
.on('end', function onEnd() {
return callback(null, body);
})
.on('error', function onError (err) {
return callback(err);
});
});
};
// in other module
var pingy = require('./lib/pingy');
setInterval(pingy.ping, 300000);
Pretty straightforward. The thing is that after some time, the "rss" from process.memoryUsage() climbs and climbs. Looks like that the created ClientRequest objects will never be GCed(). Although I'm using https here in this example, the same happens if using the http module.
Do you have any idea what is wrong here?
EDIT:
I've solved the problem above (see below in my comment). Now I'm facing a different problem, which is really, really hard to track down (used node-webkit-agent in order to analyze the memory usage, but nothing really special. The heap looks stable to me). The scenario is also nothing special I'm copying round about 200 images from source to dest via Streams (see code below). What happens is, that the "RSS" increases also. I'm pretty sure that there is something wrong with my code regarding how to pipe the files. Don't get me wrong I have no problem with a high memory usage. With what I have a problem is, that the memory never will be freed. In order to verify that the memory will be cleared in some point in the future, I start a http.createServer after every single file has been copied. Even after a couple of hours the "rss" value stays the same.
So, well, again, do you have any idea what is wrong here? Thanks in advance for every hint! :)
'use strict';
var http = require('http'),
fs = require('fs'),
path = require('path'),
util = require('util'),
directory = '/path/to/your/files/',
jobs = [];
function printMemoryUsage () {
var memory = process.memoryUsage();
memory.rss = (memory.rss / 1024) | 0;
memory.heapTotal = (memory.heapTotal / 1024) | 0;
memory.heapUsed = (memory.heapUsed / 1024) | 0;
console.log(JSON.stringify(memory));
}
function pipeFile() {
var infile = jobs.pop(),
outfile = jobs.pop(),
instream = fs.createReadStream(infile),
outstream = fs.createWriteStream(outfile);
instream.pipe(outstream);
instream.on('close', outstream.end.bind(outstream));
outstream.on('finish', function onFinish () {
// console.log('Finished %s -> %s', infile, outfile);
instream.destroy();
outstream.destroy();
next();
});
}
function next() {
if (jobs.length) {
setTimeout(pipeFile, 2000);
} else {
http.createServer(function (req, res) {
res.writeHead(200, {'Content-Type': 'text/plain'});
res.end('Fooboot\n');
}).listen(1337, '127.0.0.1');
}
}
fs.readdir(directory, function (err, files) {
files.forEach(function onIteration (file) {
jobs.push(path.join(__dirname, file)); // outfile
jobs.push(path.join(directory, file)); // infile
});
next();
});
setInterval(printMemoryUsage, 3000);
These are the memory footprints:
{"rss":13904,"heapTotal":6963,"heapUsed":1758}
{"rss":16012,"heapTotal":6963,"heapUsed":2016}
{"rss":26040,"heapTotal":6963,"heapUsed":2265}
{"rss":31468,"heapTotal":6963,"heapUsed":2453}
{"rss":41080,"heapTotal":6963,"heapUsed":2712}
{"rss":46620,"heapTotal":6963,"heapUsed":2844}
{"rss":49260,"heapTotal":6963,"heapUsed":1999}
{"rss":49524,"heapTotal":6963,"heapUsed":2249}
{"rss":49524,"heapTotal":6963,"heapUsed":2362}
{"rss":49788,"heapTotal":6963,"heapUsed":2621}
{"rss":49788,"heapTotal":6963,"heapUsed":2755}
{"rss":52692,"heapTotal":6963,"heapUsed":2001}
{"rss":52692,"heapTotal":6963,"heapUsed":2138}
{"rss":52692,"heapTotal":6963,"heapUsed":2270}
{"rss":52692,"heapTotal":6963,"heapUsed":2483}
{"rss":52692,"heapTotal":6963,"heapUsed":2600}
{"rss":52692,"heapTotal":6963,"heapUsed":2796}
{"rss":52956,"heapTotal":6963,"heapUsed":1951}
{"rss":52956,"heapTotal":6963,"heapUsed":2079}
{"rss":52956,"heapTotal":6963,"heapUsed":2343}
{"rss":52956,"heapTotal":6963,"heapUsed":2462}
{"rss":52956,"heapTotal":6963,"heapUsed":2689}
{"rss":52956,"heapTotal":6963,"heapUsed":2831}
{"rss":53136,"heapTotal":9011,"heapUsed":1927}
{"rss":53136,"heapTotal":9011,"heapUsed":2176}
{"rss":53136,"heapTotal":9011,"heapUsed":2273}
{"rss":53136,"heapTotal":9011,"heapUsed":2447}
{"rss":53136,"heapTotal":9011,"heapUsed":2545}
{"rss":53136,"heapTotal":9011,"heapUsed":2627}
{"rss":53136,"heapTotal":9011,"heapUsed":2804}
{"rss":53136,"heapTotal":9011,"heapUsed":2890}
{"rss":59732,"heapTotal":9011,"heapUsed":3100}
{"rss":65012,"heapTotal":9011,"heapUsed":3211}
{"rss":73496,"heapTotal":9011,"heapUsed":3409}
{"rss":79304,"heapTotal":9011,"heapUsed":3536}
{"rss":83792,"heapTotal":9011,"heapUsed":3633}
{"rss":95408,"heapTotal":9011,"heapUsed":3865}
{"rss":98840,"heapTotal":9011,"heapUsed":1824}
{"rss":98840,"heapTotal":9011,"heapUsed":2003}
{"rss":98840,"heapTotal":9011,"heapUsed":2205}
{"rss":98840,"heapTotal":9011,"heapUsed":2297}
{"rss":98840,"heapTotal":9011,"heapUsed":2491}
{"rss":98840,"heapTotal":9011,"heapUsed":2608}
{"rss":98840,"heapTotal":9011,"heapUsed":2717}
{"rss":98840,"heapTotal":9011,"heapUsed":2919}
{"rss":99368,"heapTotal":9011,"heapUsed":3036}
{"rss":99368,"heapTotal":9011,"heapUsed":3247}
{"rss":99632,"heapTotal":9011,"heapUsed":3351}
{"rss":99632,"heapTotal":9011,"heapUsed":3452}
{"rss":99896,"heapTotal":9011,"heapUsed":3606}
{"rss":99896,"heapTotal":9011,"heapUsed":3686}
{"rss":105968,"heapTotal":9011,"heapUsed":3824}
{"rss":106760,"heapTotal":9011,"heapUsed":1936}
{"rss":106760,"heapTotal":9011,"heapUsed":2022}
{"rss":106760,"heapTotal":9011,"heapUsed":2187}
{"rss":106760,"heapTotal":9011,"heapUsed":2279}
{"rss":106760,"heapTotal":9011,"heapUsed":2474}
{"rss":106760,"heapTotal":9011,"heapUsed":2614}
{"rss":106760,"heapTotal":9011,"heapUsed":2690}
{"rss":106760,"heapTotal":9011,"heapUsed":2854}
{"rss":106760,"heapTotal":9011,"heapUsed":2953}
{"rss":106760,"heapTotal":9011,"heapUsed":3241}
{"rss":106760,"heapTotal":9011,"heapUsed":3391}
{"rss":106760,"heapTotal":9011,"heapUsed":3535}
{"rss":107288,"heapTotal":9011,"heapUsed":3797}
{"rss":108248,"heapTotal":9011,"heapUsed":1908}

NodeJS async queue too fast (Slowing down async queue method)

I have an HTTP Get request and I want to parse the response and save it to my database.
If i call crawl(i) independentely i get good results. But i have to call crawl() from 1 to 2000.
I get good results but some responses seem to get lost and some responses are duplicates. I don't think I understand how to call thousands of asynchronous functions. I am using the async module queue function but so far I am still missing some data and still have some duplicates. What am I doing wrong here? Thanks for your help.
What i am crawling
My node functions :
function getOptions(i) {
return {
host: 'magicseaweed.com',
path: '/syndicate/rss/index.php?id='+i+'&unit=uk',
method: 'GET'
}
};
function crawl(i){
var req = http.request(getOptions(i), function(res) {
res.on('data', function (body) {
parseLocation(body);
});
});
req.end();
}
function parseLocation(body){
parser.parseString(body, function(err, result) {
if(result && typeof result.rss != 'undefined') {
var locationTitle = result.rss.channel[0].title;
var locationString = result.rss.channel[0].item[0].link[0];
var location = new Location({
id: locationString.split('/')[2],
name: locationTitle
});
location.save();
}
});
}
N = 2 //# of simultaneous tasks
var q = async.queue(function (task, callback) {
crawl(task.url);
callback();
}, N);
q.drain = function() {
console.log('Crawling done.');
}
for(var i = 0; i < 100; i++){
q.push({url: 'http://magicseaweed.com/syndicate/rss/index.php?id='+i+'&unit=uk'});
}
[EDIT] WELL, after a lot of testing it seems that the service I am crawling cannot handle so many request that fast. Because when I do each requests sequentially, I can get all the good responses.
Is there a way to SLOW DOWN ASYNC queue method?
You should have a look at this great module, async which simplifies async tasks like this. You can use queue, simple example:
N = # of simultaneous tasks
var q = async.queue(function (task, callback) {
somehttprequestfunction(task.url, function(){
callback();
}
}, N);
q.drain = function() {
console.log('all items have been processed');
}
for(var i = 0; i < 2000; i++){
q.push({url:"http://somewebsite.com/"+i+"/feed/"});
}
It will have a window of ongoing actions and the tasks room will be available for a future task if you only invoke the callback function. Difference is, your code now opens 2000 connections immidiately and obviously the failure rate is high. Limiting it to a reasonable value, 5,10,20 (depends on site and connection) will result in a better sucess rate. If a request fails, you can always try it again, or push the task to another async queue for another trial. The key point is to invoke callback() in queue function, so that a room will be available when it is done.
var q = async.queue(function (task, callback) {
crawl(task.url);
callback();
}, N);
You'are executing next task immediately after starting the previous one, in this way, the queue is just meaningless. You should modify your code like this:
// first, modify your 'crawl' function to take a callback argument, and call this callback after the job is done.
// then
var q = async.queue(function (task, next/* name this argument as 'next' is more meaningful */) {
crawl(task.url, function () {
// after this one is done, start next one.
next();
});
// or, more simple way, crawl(task.url, next);
}, N);
Another option if you want. Vanilla JS without fancy libraries.
var incrementer = 0;
var resultsArray = [];
var myInterval = setInterval(function() {
incrementer++
if(incrementer == 100){
clearInterval(myInterval)
//when done parse results array
}
//make request here
//push request result to array here
}, 500);
Invokes the function every half second. Easy way to force sync and exit after x requests.
I know I am a little late to the question, however here is a solution I wrote to slow down the number of requests when testing an api endpoint, using node 4 or node 5:
var fs = require('fs');
var supertest = require('supertest');
var request = supertest("http://sometesturl.com/api/test/v1/")
var Helper = require('./check.helper');
var basicAuth = Helper.basicAuth;
var options = Helper.options;
fs.readFile('test.txt', function(err, data){
var parsedItems = JSON.parse(data);
var urlparts = []
// create a queue
for (let year of range(1975, 2016)) {
for (var make in parsedItems[year]){
console.log(year, make, '/models/' + year + '/' + make)
urlparts.push({urlpart:'/models/' + year + '/' + make, year: year, make: make})
}
}
// start dequeue
waitDequeue();
// This function calls itself after the makeRequest promise completes
function waitDequeue(){
var item = urlparts.pop()
if (item){
makeRequest(item)
.then(function(){
// wait this time before next dequeue
setTimeout(function() {
waitDequeue();
}, 3000);
})
} else {
write(parsedItems)
}
}
// make a request, mutate parsedItems then resolve
function makeRequest(item){
return new Promise((resolve, reject)=>{
request
.get(item.urlpart)
.set(options.auth[0], options.auth[1])
.set(options.type[0], options.type[1])
.end(function(err, res) {
if (err) return done1(err);
console.log(res.body)
res.body.forEach(function(model){
parsedItems[item.year][item.make][model] = {}
});
resolve()
})
})
}
// write the results back to the file
function write(parsedItems){
fs.writeFile('test.txt', JSON.stringify(parsedItems, null, 4), function(err){
console.log(err)
})
}
})
A little late but I have found this works!
Using async you can slow down the queue by using whilst inside the task handler eg:
var q = async.priorityQueue(function(task, callback) {
// your code process here for each task
//when ready to complete the task delay it by calling
async.whilst( //wait 6 seconds
function() {
return count < 10;
},
function(callback) {
count++;
setTimeout(function() {
callback(null, count);
}, 1000);
},
function (err, n) {
// n seconds have passed
callback(); //callback to q handler
}
); //whilst
} , 5);

Resources