Async.queue crashes after processing 3 initial elements of the queue with concurrency = 3 - node.js

Async.queue() intially runs as expected but crashes after processing the first N elements (N = 3).
When adding callback() after running getAddress(), concurrency is totally ignored. Subsequently getAddress() runs for all tasks passed to the queue via the stream.
The problem arose when attempting to build upon this tutorial.
Trying to determine the root cause and a solution. Seems possible that this is related to promise chaining?
Have attempted to refactor async.queue() following the async docs, but appears that the syntax is out of date and can't find a working example with chained promises.
const { csvFormat } = require('d3-dsv');
const Nightmare = require('nightmare');
const { readFileSync, writeFileSync } = require('fs');
const numbers = readFileSync('./tesco-title-numbers.csv',
{encoding: 'utf8'}).trim().split('\n');
const START = 'https://eservices.landregistry.gov.uk/wps/portal/Property_Search';
var async = require("async")
console.log(numbers)
// create a read stream
var ArrayStream = require('arraystream')
var stream = ArrayStream.create(numbers)
// set concurrency
N = 3
var q = async.queue(function (task, callback) {
let data = getAddress(task)
// , function(){
// callback();
},
// },
N);
q.drain = function() {
stream.resume()
console.log('all items have been processed');
resolve()
}
// or await the end
// await q.drain()
q.saturated = function() {
stream.pause();
}
// assign an error callback
q.error = function(err, task) {
console.error('task experienced an error');
}
stream.on("data", function(data) {
// console.log(data);
q.push(data)
})
var getAddress = async id => {console.log(`Now checking ${id}`);
const nightmare = new Nightmare({ show: true });
// Go to initial start page, navigate to Detail search
try {
await nightmare
.goto(START)
.wait('.bodylinkcopy:first-child')
.click('.bodylinkcopy:first-child');
} catch(e) {
console.error(e);
}
// Type the title number into the appropriate box; click submit
try {
let SOMEGLOBALVAR;
await nightmare
// does some work
} catch(e) {
console.error(e);
return undefined;
}
};

Determined the cause of the problem. The callback along with getAddressed needs to be returned.
let dataArray = []
N = 4
var q = async.queue(async function (task, callback) {
return getAddress(task).then((response)=>{
console.log(response);
dataArray.push(response);
callback()})
} ,
N);

Related

How do I wait for a promise in loop to finish before do some other stuff?

I still confused about how to use promises. I have a for loop call an asynchronous method which returns a value. I use this value to push into an array. But when I print the array it is empty. Here is what I did:
async function getLink(link) {
var browser = await puppeteer.launch({headless: true});
const page = await browser.newPage();
await page.goto(LINK)
const result = await page.evaluate( async() => {
let data = [];
const $ = window.$;
$('#gallery_01 .item').each(function(index, product) {
data.push($(product).find('a').attr('data-image'));
});
return data;
});
await browser.close();
return result;
}
var final = [];
for (var i = 0; i < 10; i++) {
var data = getLink(value[i].url).then(function(data) {
console.log(data); // urls show here
final.push(data);
});
}
Promise.all(final).then(() => {
console.log(final) // empty
})
The final show empty. What did I do wrong with Promise? Pls help!
I can't see what value is, but it looks like it's supposed to be an array of objects with a url property?
Assuming the getLink() function is okay, try this for your loop:
const final = [];
for (var i = 0; i < 10; i++) {
final.push(getLink(value[i].url));
}
Promise.all(final)
.then(data => {
console.log(data);
});
Or a slightly more compact way of accomplishing the same thing:
const promises = value.map(v => getLink(v.url));
Promise.all(promises)
.then(data => {
console.log(data);
});
Update: My bad, got a bit confused. The following code would only work without () => after the var fn
You are very close. Try this:
var final = [];
var results = []; // you need a separate array for results
for (var i = 0; i < 10; i++) {
// renamed the variable, changed 'data' to 'fn'
var fn = () => getLink(value[i].url).then(function(data) {
console.log(data); // urls show here
results.push(data);
});
final.push(fn);
}
Promise.all(final).then(() => {
console.log(results)
})
Promise.all accepts an array of promises. You have an array 'final' but seem to try to store the result of the fucntion execution as well as the function itself.
To do this correctly - first get an array of promises. Then pass them to Promise.all().
P.S. Assuming your function actually works, haven't looked at it, since the question was about promises.

How to add delay in nodejs

i am calling a 3rd party API in a loop in my nodejs application. Basically I have a list, am iterating through the list and calling the 3rd party API.
The 3rd party API is very slow and cannot handle more than 3 requests. I have been advised to add some delay.
Please can someone advise how to add delay in this scenario.
var promises = [];
promises = rids.map((rid,j) => {
// 3rd party API
// getServiceDetailsApi is wrapper around 3rd party API
return getServiceDetailsApi(rid)
});
// console.log(promises);
Promise.all(promises)
.then(res => {
// console.log('promise complete..' + res.length)
var responses = [];
res.map((response,i) => {
var serviceAttributesDetail = {};
// console.log(response);
serviceAttributesDetails = response.data.serviceAttributesDetails;
serviceAttributesDetail.rid = serviceAttributesDetails.rid;
responses = responses.concat(serviceAttributesDetail);
})
// Add more logic above
return Promise.all(responses);
})
If one request at a time is enough, you can try this way:
'use strict';
(async function main() {
try {
const responses = [];
for (const rid of rids) {
const response = await getServiceDetailsApi(rid);
responses.push({ rid: response.data.serviceAttributesDetails.rid });
}
console.log(responses);
} catch (err) {
console.error(err);
}
})();
If your restriction is about having a maximum of 3 concurrent requests to that API, here is a possibility (untested though, there might be typos, and I didn't think the rejection handling):
const cfgMaxApiCalls = 3;
...
function getServiceDetailsApi() {...}
...
const rids = ...
...
const promisedResponses = new Promise((generalResolve) => {
let currentCalls = 0; // to know how many calls in progress
const responses = []; // the output of the general promise
// this chains promises, ensuring we do an API call only when another one finished
const consumer = (response) => {
responses.push(response); // first remember the data
// stop condition: nothing more to process, and all current calls have resolved
if (!rids.length && !currentCalls--) {
return generalResolve(responses);
}
// otherwise make a new call since this one's done
return getServiceDetailsApi(rids.shift()).then(consumer);
};
// start the process for maximum `cfgMaxApiCalls` concurrent calls
for (; currentCalls < cfgMaxApiCalls && rids.length; currentCalls++) {
getServiceDetailsApi(rids.shift()).then(consumer);
}
});
promisedResponses.then((res) => {
// here `res` === your code's `res`
// and by the way, Array.prototype.concat is not asynchronous,
// so no need to Promise.all(responses) at the end ;)
});

kafka-node asynchronous consumer handler

That's how my consumer is initialised:
const client = new kafka.Client(config.ZK_HOST)
const consumer = new kafka.Consumer(client, [{ topic: config.KAFKA_TOPIC, offset: 0}],
{
autoCommit: false
})
Now the consumer consumer.on('message', message => applyMessage(message))
The thing is applyMessage talks to the database using knex, the code looks something like:
async function applyMessage(message: kafka.Message) {
const usersCount = await db('users').count()
// just assume we ABSOLUTELY need to calculate a number of users,
// so we need previous state
await db('users').insert(inferUserFromMessage(message))
}
The code above makes applyMessage to execute in parallel for all the messages in kafka, so in the code above given that there are no users in the database yet, usersCount will ALWAYS be 0 even for the second message from kafka where it should be 1 already since first call to applyMessage inserts a user.
How do I "synchronise" the code in a way that all the applyMessage functions run sequentially?
You'll need to implement some sort of Mutex. Basically a class which queues up things to execute synchronously. Example
var Mutex = function() {
this.queue = [];
this.locked = false;
};
Mutex.prototype.enqueue = function(task) {
this.queue.push(task);
if (!this.locked) {
this.dequeue();
}
};
Mutex.prototype.dequeue = function() {
this.locked = true;
const task = this.queue.shift();
if (task) {
this.execute(task);
} else {
this.locked = false;
}
};
Mutex.prototype.execute = async function(task) {
try { await task(); } catch (err) { }
this.dequeue();
}
In order for this to work, your applyMessage function (whichever handles Kafka messages) needs to return a Promise - notice also the async has moved from the parent function to the returned Promise function:
function applyMessage(message: kafka.Message) {
return new Promise(async function(resolve,reject) {
try {
const usersCount = await db('users').count()
// just assume we ABSOLUTELY need to calculate a number of users,
// so we need previous state
await db('users').insert(inferUserFromMessage(message))
resolve();
} catch (err) {
reject(err);
}
});
}
Finally, each invocation of applyMessage needs to be added to the Mutex queue instead of called directly:
var mutex = new Mutex();
consumer.on('message', message => mutex.enqueue(function() { return applyMessage(message); }))

For loop in promise.then()?

I need to iterate between two values and create/touch files (I/O) on each iteration.
I'm using the fs-promise module to do so asynchronously:
const path = require('path');
const fsp = require('fs-promise');
function addPages(startAt, pages, mode) {
let htmlExt = mode.HTML;
let cssExt = mode.CSS;
fsp.readFile(path.join('.', 'templates', 'body.html'), { encoding: 'utf-8' })
.then((content) => {
// return Promise.all(() => {}).then().catch(); // Do this.
for (let i = startAt, endAt = startAt + pages; i < endAt; i++) {
console.log(i);
fsp.writeFile(path.join('.', 'manuscript', `page-${i}`, `style.${cssExt}`), '')
.then(() => { console.log('Yay!') })
.catch(console.log.bind(console));
// fsp.writeFile(path.join('.', 'manuscript', `page-${i}`, `style.${cssExt}`), '')
// .then((i, templateHTML) => {
// fsp.writeFile(path.join('.', 'manuscript', `page-${i}`, `body.${htmlExt}`), content);
// })
// .catch((err) => {
// console.log.bind(console);
// });
}
})
.catch((err) => {
if (err) return error('Couldn\'t create pages', err);
});
Now I did read that Promises.all([Array of promises]) is the way to go for looping inside the then() scope, but the question is why/how?
I'm unable to wrap my head around why the for-loop doesn't execute before the context moves out of the promised then() scope, and then how should I get to the expected outcome.
const path = require('path');
const fsp = require('fs-promise');
function addPages(startAt, pages, mode) {
let htmlExt = mode.HTML;
let cssExt = mode.CSS;
return fsp.readFile(path.join('.', 'templates', 'body.html'), { encoding: 'utf-8' })
.then((content) => {
var pendingWrites = [];
for (let i = startAt, endAt = startAt + pages; i < endAt; i++) {
let filename = path.join('.', 'manuscript', `page-${i}`, `style.${cssExt}`);
let thisWrite = fsp.writeFile(filename, '');
pendingWrites.push(thisWrite);
}
return Promise.all(pendingWrites);
})
.catch((err) => {
// either fully recover from the error or rethrow
console.log("Could not add pages: ", err);
throw err;
});
}
As elaborated in the comments, resist the temptation to introduce none-functional .catch() handlers into your promise chain.
Non-functional means in this case: It does not recover from the error and does not rethrow the error. A catch handler that does not throw marks an error as handled, i.e. it returns a resolved promise, not a rejected one. This makes proper error handling later in the promise chain impossible. It's bad practice and unhelpful.
If you want to log the error, log it and rethrow it. If you have fully recovered from the error and subsequent code is unimpeded, don't rethrow.

Using promises with download module

I am using bluebird for promises.
I am trying to promisify the download module.
Here is my implementation:
Promise = require('bluebird'),
download = require('download');
var methodNameToPromisify = ["download"];
function EventEmitterPromisifier(originalMethod) {
// return a function
return function promisified() {
var args = [].slice.call(arguments);
// Needed so that the original method can be called with the correct receiver
var self = this;
// which returns a promise
return new Promise(function(resolve, reject) {
// We call the originalMethod here because if it throws,
// it will reject the returned promise with the thrown error
var emitter = originalMethod.apply(self, args);
emitter
.on("response", function(data) {
resolve(data);
})
.on("data ", function(data) {
resolve(data);
})
.on("error", function(err) {
reject(err);
})
.on("close", function() {
resolve();
});
});
};
};
download = { download: download };
Promise.promisifyAll(download, {
filter: function(name) {
return methodNameToPromisify.indexOf(name) > -1;
},
promisifier: EventEmitterPromisifier
});
Then using it:
return download.downloadAsync(fileURL, copyTo, {});
My problem is that it doesn't download all of the files (I have a list sent to this function), what am I doing wrong?
An emitter does emit multiple data events, one for every chunk it receives. However, a represents only one future value, in your case you want that to be the complete response.
resolve is supposed to be called only once, to fulfill the promise with the passed value, which is then settled. Further calls will have no effect - and that's why you get only the first parts of your list.
Instead, you will need to accumulate all the data, and when the stream ends you can fulfill the promise with all of it.
var Promise = require('bluebird'),
download = require('download'),
Buffer = require('buffer'); // should be global anyway
exports = {
downloadAsync: function promisifiedDownload() {
var args = arguments, self = this;
return new Promise(function(resolve, reject) {
// We call the originalMethod here because if it throws,
// it will reject the returned promise with the thrown error
var emitter = download.apply(self, args);
var buffers = [];
emitter.on("data", function(data) {
buffers.push(data);
}).on("error", function(err) {
reject(err);
}).on("close", function() {
resolve(Buffer.concat(buffers));
});
});
};
};
Notice it's quite nonsensical to use promisifyAll when you only want to promisify a single method. I've omitted it for simplicity
You might also listen for the incoming response object, and attach the data listener directly to it. You can then use the end event instead of close.

Resources