Node.js - synchronous operations: file read followed by updates - node.js

Probably this is a promise implementation but would like to check with experts before doing so.
Need to do:
Read entire file line-by-line into MongoDB collection A.
Upon completion of step 1, Insert/Update/Delete documents from collection B based on state in collection A. If document not present in A delete from B.
Problem: Even before completion of step 1 above, step 2 starts execution and starts deleting records from B.
Tried so far: Async.series does not work. Below given is my code.
MongoClient.connect(config.mongodb.uri, function (err, db) {
if (err) {
logger.error('Unable to connect to the mongoDB server. Error:', err);
reject(err);
} else {
let startTime = new Date();
async.series([
function(callback) {
console.log('First in series');
db.collection('eligibilityStage').drop({}, function (err, oldObject) {
debugger;
var lr = new LineByLineReader(config.eligibiltyFile.fileRemoteLocation + '/' + latestEligibilityfileName);
lr.on('error', function (err) {
console.log(err);
});
var lineCount;
lr.on('line', function (line) { //** --> Jumps from here to second function in series, line#43**
if (line.length == config.eligibiltyFile.detailRecordlineWidth) {
var document = require('fixy').parse({
map: mapData, options: {
skiplines: null, fullwidth: config.eligibiltyFile.detailRecordlineWidth
}
}, line);
db.collection('eligibilityStage').insertOne(document[0], function (err, records) {
lineCount++;
if (err) {
console.log(err);
}
});
}
});
lr.on('end', function () {
console.log('File is closed, read lines:'+lineCount);
console.log('File is closed, rowcount:'+db.eigibilityStage.Count());
});
callback(null, 'loadStage');
});
},
function(callback) {
// Deletes
console.log('Series 2 function, read lines:'+lineCount);
console.log('Series 2 function, rowcount:'+db.eigibilityStage.Count());
callback(null, 'processStage');
}
],
function(err, results){
});
}
})
Am I doing it wrong? Or is this a standard problem to be solved using promise?

Related

Passing arguments through predefined function in Node.js

I'm struggling with passing data concept in Node.js.
Let's take SQL tedious as example. Here is code from examples:
//acquire a connection
pool.acquire(function poolAcquire(err, connection) {
if (err) { console.error(err); return; }
//use the connection as normal
var request = new Request('select 1;select 2;select 3', function requestCallback (err, rowCount) {
if (err) { console.error(err); return;}
console.log('rowCount: ' + rowCount);
//release the connection back to the pool when finished
connection.release();
});
request.on('row', function onRequestRow(columns) {
console.log('value: ' + columns[0].value);
});
connection.execSql(request);
});
pool.acguire takes function as the argument and this function has particular signature (err,connection)
My question is - how do I pass SQL statement inside this function?
I cannot change signature because different function signature is not called.
Also I cannot use global scope because variable may be changed outside.
In other words I need to find way to bypass wrappers calls and still pass some data.
Something like
var mySQLs = ['select 1;select 2;select 3','select 4;select 5;'];
async.forEach(mySQLs,WorkWithOneItem, AllIsDone);
function WorkWithOneItem(item, callback){
pool.acquire(?????(item));
callback(); // tell async that the iterator has completed
}
function AllIsDone (err) {
console.log('All done');
}
By wrapping it in another function:
function aquire(sql, callback) {
pool.acquire(function poolAcquire(err, connection) {
if (err) { console.error(err); return callback(); }
//use the connection as normal
var request = new Request(sql, function requestCallback (err, rowCount) {
if (err) { console.error(err); return;}
console.log('rowCount: ' + rowCount);
//release the connection back to the pool when finished
connection.release();
callback();
});
request.on('row', function onRequestRow(columns) {
console.log('value: ' + columns[0].value);
});
connection.execSql(request);
});
}
function WorkWithOneItem(item, callback){
acquire(item, () => {
callback(); // tell async that the iterator has completed
});
}
Do you need the results out as well?
var mySQLs = ['select 1;select 2;select 3','select 4;select 5;'];
async.forEach(mySQLs, WorkWithOneItem, AllIsDone);
function WorkWithOneItem(sql, callback){
pool.acquire(function poolAcquire(err, connection) {
if (err) return callback(err);
//use the connection as normal
var request = new Request(sql, function requestCallback (err, rowCount) {
if (err) return callback(err);
console.log('rowCount: ' + rowCount);
//release the connection back to the pool when finished
connection.release();
});
var rows = [];
var count = 0;
request.on('row', function onRequestRow(columns) {
console.log('value: ' + columns[0].value);
rows.push(columns[0].value); // or whatever you actually want out of these.
});
request.on('done', function onRequestDone() {
callback(null, rows);
});
connection.execSql(request);
});
callback(); // tell async that the iterator has completed
}
function AllIsDone (err) {
console.log('All done');
// you probably want async.map, so you can get the result back
// as the second argument for a function like this
}

nodejs: Async and find issues

I have an array, and for each row I need to do findIfExist and save into mongodb. The code is here:
router.post('/namespaceUpload', function(req, res,next) {
var data=req.body;
var totalRows=data.allRows.length;
var conceptObject ={};
var existingConcept;
for (var i=0;i<totalRows;i++){
async.series([
conceptPrepare,
conceptFind,
conceptSave,
], function (err, result) {
console.log('kraj');
res.json('Ok');
});
}
function conceptPrepare(callback){
conceptObject.name= data.allRows[i].name;
conceptObject.user= data.userId;
callback();
}
function conceptFind(callback){
namespaces.find({name: conceptObject.name}, function(err, result) {
if (err)
next(err);
else {
if (result.length==0){
console.log('0');
existingConcept='';
} else {
console.log(result.length);
existingConcept=result[0];
}
}
callback();
});
}
function conceptSave(callback){
var namespace = new namespaces();
if (existingConcept==''){
namespace.name=conceptObject.name;
namespace.description=conceptObject.description;
namespace.lastUpdate.user=conceptObject.user;
namespace.save(function(err) {
if (err)
return next(err);
callback();
})
}
}
So I Used async.series, but only last record is written in database as much times as many array members i have. Also, I get an error " Can't set headers after they are sent." Any idea?
You're getting the Can't set headers after they are sent error message because you 're not allowed to return smtg eg : res.send,res.render more than one time but in your for loop, it goes totalRows times
try to return one value at the end of the loop

nodejs loop async callback function

I am running a cron job with node with mongodb as the database. I am trying to close db connection and exit the process once the curr_1 each loop has executed completely.
However the exit() is called while function_2 is being executed. I understand this is due to the callback and is async in nature.
How do I make sure exit is called only once the curr_1.each is complete?
Any solution without promises?
function function_1(obj){
var curr_1 = coll_1.find({})
curr_1.each(function(err, doc) {
function_2(doc)
});
exit(obj)
}
function function_2(obj) {
coll_2.findOne({}, function(err, document) {
dosomeprocess(obj)
})
}
function exit(obj) {
// Close connection
console.log('closing connection')
obj.db.close();
process.exit();
}
It's a job for Node async....
For example:
async.each(
curr_1, // the collection to iterate over
function(doc, callback) { // the function, which is passed each
// document of the collection, and a
// callback to call when doc handling
// is complete (or an error occurs)
function_2(doc);
},
function(err) { // callback called when all iteratee functions
// have finished, or an error occurs
if (err) {
// handle errors...
}
exit(obj); // called when all documents have been processed
}
);
Without using any library:
function function_1(obj, callback) {
var curr_1 = coll_1.find({})
curr_1.each(function(err, doc) {
callback(err, doc);
});
}
function function_2(err, obj) {
coll_2.findOne({}, function(err, document) {
dosomeprocess(obj)
exit(err, obj);
})
}
function exit(err, obj) {
// Close connection
console.log('closing connection')
obj.db.close();
process.exit();
}
function_1(obj, function_2);
Using async module
var async = require('async');
async.waterfall([
function function_1(callback) {
var curr_1 = coll_1.find({})
curr_1.each(function(err, doc) {
if (err) {
callback(err, null)
} else {
allback(null, doc)
}
});
},
function function_2(obj, callback) {
coll_2.findOne({}, function(err, document) {
if (err) {
callback(err, null);
} else {
dosomeprocess(obj)
callback(null, obj);
}
})
}
], function done() {
obj.db.close();
process.exit();
});
Simply give a condition in your loop using counter.
function function_1(obj){
var curr_1 = coll_1.find({})
var curr_1Length = curr_1.length;
var counter = 0;
curr_1.each(function(err, doc) {
++counter;
//Check condition everytime for the last occurance of loop
if(counter == curr_1Length - 1){
exit(obj)
}
function_2(doc)
});
}
Hope it helps :)

How the Node.js async eachLimit works in this situation?

I wrote a little async script to batch insert a lot of JSON files into a MongoDB sharded cluster. This is my first time with this module (and I'm still learning Node.js). I don't know if I'm doing it right.
The code is the last part of a waterfall (1): previous functions end
up with an object with db, coll and files properties.
files array contains hundred of file paths and the function to
apply to each element of the array is, again, a waterfall (2).
Waterfall (2) is made of the following: read, parse, insert. When this waterfall ends (3) I call complete to finalize the processing of a single item in the array, passing the error (if any).
So far so good, correct?
What I can't understand is what happens inside the async.eachLimit callback (4). From the documentation:
A callback which is called after all the iterator functions have
finished, or an error has occurred.
That is, when all functions have finished, the next() call (5) ends the script. But the same callback (4) is invoked when a single error occurred, as per documentation. That is my script stops when a fail with a single file happens.
How can I avoid this?
async.waterfall([ // 1
// ...
function (obj, next) {
async.eachLimit(obj.files, 1000,
function (file, complete) {
async.waterfall([ // 2
function (next) {
fs.readFile(file, {}, function (err, data) {
next(err, data);
});
},
function (data, next) { // Parse (assuming all well formed)
next(null, JSON.parse(data));
},
function (doc, next) { // Insert
obj.coll.insert(doc, {w: 1}, function (err, doc) {
next(err);
});
}
], function (err, result) { // 3
complete(err);
});
},
function (err) { // 4
if (err) console.error(err);
next(null, obj); // 5
}
);
}
], function (err, obj) { // Waterfall end
if (err) console.error(err);
obj.db.close(); // Always close the connection
});
If you don't want it to break in case of an error you should just invoke the callback with a falsy first parameter, like so (look after // 3).
Is this ok with you / did I understand correctly?
async.waterfall([ // 1
// ...
function (obj, next) {
async.eachLimit(obj.files, 1000,
function (file, complete) {
async.waterfall([ // 2
function (next) {
fs.readFile(file, {}, function (err, data) {
next(err, data);
});
},
function (data, next) { // Parse (assuming all well formed)
next(null, JSON.parse(data));
},
function (doc, next) { // Insert
obj.coll.insert(doc, {w: 1}, function (err, doc) {
next(err);
});
}
], function (err, result) { // 3
if (err) {
console.log(file + ' threw an error');
console.log(err);
console.log('proceeding with execution');
}
complete();
});
},
function (err) { // 4
next(null, obj); // 5
}
);
}
], function (err, obj) { // Waterfall end
if (err) console.error(err);
obj.db.close(); // Always close the connection
});

How to create callback post saving an array of objects?

Assuming I have the following in a function:
exports.addnames = function(req, res) {
var names = ["Kelley", "Amy", "Mark"];
for(var i = 0; i < names.length; i++) {
(function (name_now) {
Person.findOne({ name: name_now},
function(err, doc) {
if(!err && !doc) {
var personDoc = new PersonDoc();
personDoc.name = name_now;
console.log(personDoc.name);
personDoc.save(function(err) {});
} else if(!err) {
console.log("Person is in the system");
} else {
console.log("ERROR: " + err);
}
}
);
)(names[i]);
}
My issue is after I save the names, I want to return the results:
Person.find({}, function(err, doc) {
res.json(200, doc);
})
Though I have a callback for names, it appears that the last block of code (Persons.find({})) gets executed before the calls to save all the names is complete... thusly when the user goes to the url in the browser, "doc" is empty... Is there some way I can ensure that the Persons.find({}) is called after the for loop completes?
The easiest way to do things like this is to use an async library like the aptly named async which can be found at https://github.com/caolan/async.
If you have a list of names that you want to save and then return when complete, it would look like:
// save each of the names asynchronously
async.forEach(names, function(name, done) {
Person.findOne({name: name},
function(err, doc) {
// return immediately if there was an error
if(err) return done(err);
// save the person if it doesn't already exist
if(!doc) {
var personDoc = new PersonDoc();
personDoc.name = name;
console.log(personDoc.name);
// the async call is complete after the save completes
return personDoc.save(done);
}
// or if the name is already there, just return successfully
console.log("Person is in the system");
done();
}
);
},
// this function is called after all of the names have been saved
// or as soon as an error occurs
function(err) {
if(err) return console.log('ERROR: ' + err);
Person.find({}, function(err, doc) {
res.json(200, doc);
})
});

Resources