How the Node.js async eachLimit works in this situation? - node.js

I wrote a little async script to batch insert a lot of JSON files into a MongoDB sharded cluster. This is my first time with this module (and I'm still learning Node.js). I don't know if I'm doing it right.
The code is the last part of a waterfall (1): previous functions end
up with an object with db, coll and files properties.
files array contains hundred of file paths and the function to
apply to each element of the array is, again, a waterfall (2).
Waterfall (2) is made of the following: read, parse, insert. When this waterfall ends (3) I call complete to finalize the processing of a single item in the array, passing the error (if any).
So far so good, correct?
What I can't understand is what happens inside the async.eachLimit callback (4). From the documentation:
A callback which is called after all the iterator functions have
finished, or an error has occurred.
That is, when all functions have finished, the next() call (5) ends the script. But the same callback (4) is invoked when a single error occurred, as per documentation. That is my script stops when a fail with a single file happens.
How can I avoid this?
async.waterfall([ // 1
// ...
function (obj, next) {
async.eachLimit(obj.files, 1000,
function (file, complete) {
async.waterfall([ // 2
function (next) {
fs.readFile(file, {}, function (err, data) {
next(err, data);
});
},
function (data, next) { // Parse (assuming all well formed)
next(null, JSON.parse(data));
},
function (doc, next) { // Insert
obj.coll.insert(doc, {w: 1}, function (err, doc) {
next(err);
});
}
], function (err, result) { // 3
complete(err);
});
},
function (err) { // 4
if (err) console.error(err);
next(null, obj); // 5
}
);
}
], function (err, obj) { // Waterfall end
if (err) console.error(err);
obj.db.close(); // Always close the connection
});

If you don't want it to break in case of an error you should just invoke the callback with a falsy first parameter, like so (look after // 3).
Is this ok with you / did I understand correctly?
async.waterfall([ // 1
// ...
function (obj, next) {
async.eachLimit(obj.files, 1000,
function (file, complete) {
async.waterfall([ // 2
function (next) {
fs.readFile(file, {}, function (err, data) {
next(err, data);
});
},
function (data, next) { // Parse (assuming all well formed)
next(null, JSON.parse(data));
},
function (doc, next) { // Insert
obj.coll.insert(doc, {w: 1}, function (err, doc) {
next(err);
});
}
], function (err, result) { // 3
if (err) {
console.log(file + ' threw an error');
console.log(err);
console.log('proceeding with execution');
}
complete();
});
},
function (err) { // 4
next(null, obj); // 5
}
);
}
], function (err, obj) { // Waterfall end
if (err) console.error(err);
obj.db.close(); // Always close the connection
});

Related

The final callback function of async.each is never called. What am I doing wrong?

This function is a part of async.waterfall
The arr contains 2 objects with foldername, filename, width, height, etc.
I want to perform file operations on each of those files and put those 2 files in another array photoArr.
Then I want to pass that photoArr array to the next function in async.waterfall.
The issue is:
I am unable to reach the callback function, which is the 3rd argument to async.each. The console logs calling next but never logs oops error or here hi.
function(arr, image, callback) {
console.log("function3");
var photoArr = [];
async.each(arr, function(value, next) {
Jimp.read(`${photosDirectory}/${value["Folder"]}/${value["Photo"]}.jpg`, (err, photo) => {
if(err) next(err);
else {
photo.resize(value["Size-X(cm)"] * 37.8, value["Size Y(cm)"] * 37.8).rotate(-90);
photoArr.push(photo);
if(photoArr.length == 2) {
console.log('calling next');
next(null);
}
}
});
}, function(err) {
if(err) {
console.log('oops error');
console.log(err);
} else {
console.log("here hi");
console.log(photoArr.length);
callback(err, arr, image, photoArr);
}
});
}

Node.js - synchronous operations: file read followed by updates

Probably this is a promise implementation but would like to check with experts before doing so.
Need to do:
Read entire file line-by-line into MongoDB collection A.
Upon completion of step 1, Insert/Update/Delete documents from collection B based on state in collection A. If document not present in A delete from B.
Problem: Even before completion of step 1 above, step 2 starts execution and starts deleting records from B.
Tried so far: Async.series does not work. Below given is my code.
MongoClient.connect(config.mongodb.uri, function (err, db) {
if (err) {
logger.error('Unable to connect to the mongoDB server. Error:', err);
reject(err);
} else {
let startTime = new Date();
async.series([
function(callback) {
console.log('First in series');
db.collection('eligibilityStage').drop({}, function (err, oldObject) {
debugger;
var lr = new LineByLineReader(config.eligibiltyFile.fileRemoteLocation + '/' + latestEligibilityfileName);
lr.on('error', function (err) {
console.log(err);
});
var lineCount;
lr.on('line', function (line) { //** --> Jumps from here to second function in series, line#43**
if (line.length == config.eligibiltyFile.detailRecordlineWidth) {
var document = require('fixy').parse({
map: mapData, options: {
skiplines: null, fullwidth: config.eligibiltyFile.detailRecordlineWidth
}
}, line);
db.collection('eligibilityStage').insertOne(document[0], function (err, records) {
lineCount++;
if (err) {
console.log(err);
}
});
}
});
lr.on('end', function () {
console.log('File is closed, read lines:'+lineCount);
console.log('File is closed, rowcount:'+db.eigibilityStage.Count());
});
callback(null, 'loadStage');
});
},
function(callback) {
// Deletes
console.log('Series 2 function, read lines:'+lineCount);
console.log('Series 2 function, rowcount:'+db.eigibilityStage.Count());
callback(null, 'processStage');
}
],
function(err, results){
});
}
})
Am I doing it wrong? Or is this a standard problem to be solved using promise?

How to perform async.each in async .series

In my function i have to call async series inside async foreach to compute the final result and create my json.is that possible
async.series([
function(callback) {
});
},
function(callback) {
async.forEachSeries(temp,function(quest,callback) {
}, function(err) {
if (err) return next(err);
});
callback();
}
],
function(err) {
if (err) return next(err);
res.json(output);
});
You should be able to nest as much async functions into each other, however it is better to use a naming conventions so you can easily track which callbacks are passed where and to avoid collisions due to hoisting. So basically this should work as you'd expect:
async.series([
function first(seriesCallback) {
seriesCallback();
},
// other functions in series
// ...
function (seriesCallback) {
var someArray = [];
async.each(someArray, function (value, eachCallback) {
// process the value
// return an error if there is one
eachCallback(err);
}, function(err) {
// add any additional processing you might need
// pass the control to the parent async method and handle the errors
// in a more central place if there is an error here it will
// be processed in onSeriesDone as well as all other errors
seriesCallback(err);
});
}], function onSeriesDone(err) {
next(err);
});

How to implement Async with Mongoose method

I've got following code now:
exports.listByUser = function(req, res) {
Attack.find({user: req.user._id}, function(err, attacks) {
if(err)
return next(err);
for(var i in attacks) {
attacks[i].evaluateFight();
}
res.json(attacks);
});
};
the main problem is that attacks[i].evaluateFight() is called asynchronously, I want to transform it to make sure that [i-1] iteration is done ... and finally call res.json(attacks). I think, it can be done with async, but I don't know how :( Something like this should work, but how can I call attacks.method?
async.eachSeries(attacks, function (callback) {
//something??
callback();
}, function (err) {
if (err) { throw err; }
res.json(attacks);
});
You can leverage async whilst method call to implement the same. However, there is question I have about the callback of evaluateFight because if it is executed asynchronously then there has to be some callback associated with it which will notify if the previous call is succeeded.
The example code can be as follows assuming evaluateFight returns a callback when completed -
exports.listByUser = function(req, res) {
Attack.find({user: req.user._id}, function(err, attacks) {
if(err)
return next(err);
var attacksLength = attacks.length;
var count = 0;
async.whilst(function () {
return count < attacksLength;
},
function (callback) {
attacks[count].evaluateFight(function(err, result){
count++;
callback();
}); // assuming it returns a callback on success
},
function (err) {
// all the iterations have been successfully called
// return the response
res.json(attacks);
});
};

Call same function many times and process combined result set

I have a requirement to make several API requests and then do some processing on the combines result sets. In the example below, you can see that 3 requests are made (to /create) by duplicating the same request code however I would like to be able to specify how many to make. For example, I may wish to run the same API call 50 times.
How can I make n calls without duplicating the API call function n times?
async.parallel([
function(callback){
request.post('http://localhost:3000/create')
.send(conf)
.end(function (err, res) {
if (err) {
callback(err, null);
}
callback(null, res.body.id);
});
},
function(callback){
request.post('http://localhost:3000/create')
.send(conf)
.end(function (err, res) {
if (err) {
callback(err, null);
}
callback(null, res.body.id);
});
},
function(callback){
request.post('http://localhost:3000/api/store/create')
.send(conf)
.end(function (err, res) {
if (err) {
callback(err, null);
}
callback(null, res.body.id);
});
}
],
function(err, results){
if (err) {
console.log(err);
}
// do stuff with results
});
First, wrap the code that you want to call many times in a function:
var doRequest = function (callback) {
request.post('http://localhost:3000/create')
.send(conf)
.end(function (err, res) {
if (err) {
callback(err);
}
callback(null, res.body.id);
});
}
Then, use the async.times function:
async.times(50, function (n, next) {
doRequest(function (err, result) {
next(err, result);
});
}, function (error, results) {
// do something with your results
}
Create an array with as many references to the function as you need tasks in your workload. Then pass them to async.parallel. For example:
var async = require("async");
var slowone = function (callback) {
setTimeout(function () {
callback(null, 1);
}, 1000);
};
async.parallel(
dd(slowone, 100),
function (err, r) {
console.log(JSON.stringify(r));
}
);
// Returns an array with count instances of value.
function dd(value, count) {
var result = [];
for (var i=0; i<count; i++) {
result.push(value);
}
return result;
}
Note again that there is only one instance of the slow running function, in spite of there being many references to it.

Resources