Mongoose schema does not save inside async.forEach loop - node.js

First post here, so apologies if I get things wrong...
I'm creating some standalone code to read a folder structure and return all .mp3 files in an Array. Once this has been returned I then loop through the array and for each item I create a Mongoose Object and populate the fields, before saving the object with .save()
I am looping through the array using async.forEach - and while it does loop through all items in the Array they do not save, and there is no error produced to help me identify what is wrong.
If I move the logic of the loop elsewhere then the MP3s are stored in the mongodb database - if I have the example shown nothing is saved.
var saveMP3s = function(MP3Files, callback) {
console.log('start loop -> saving MP3s');
async.forEach( MP3Files, function(mp3file, callback) {
newTrack = new MP3Track();
newTrack.title = mp3file.title;
newTrack.track = mp3file.track;
newTrack.disk = mp3file.disk;
newTrack.metadata = mp3file.metadata;
newTrack.path = mp3file.path;
console.log('....:> Song Start: ');
console.log(newTrack.title);
console.log(newTrack.track);
console.log(newTrack.disk);
console.log(newTrack.metadata);
console.log(newTrack.path);
console.log('....:> Song End: ');
newTrack.save(function (err) {
if (err) {
console.log(err);
} else {
console.log('saving Track: '+ newTrack.title);
callback();
}
});
}, function(err) {
if (err) { console.log(err); }
});
console.log('end loop -> finished saving MP3s');
};
The trouble I have is that when the code is NOT in the async loop the code works and the MP3 is saved in the MongoDB database, inside the async code nothing is saved and no errors are given as to why.
I did try (in an earlier incarnation of the code) create the objects once I had read the metadata of the MP3 files - but for some reason it would not save the last 2 objects in the list (of 12)... So I've rewritten it to scan all the items first and then populate mongoDB using mongoose from an array; just to split things up. But having no luck in finding out why nothing happens and why there are no errors on the .save()
Any help with be greatly appreciated.
Regards,
Mark

var saveMP3s = function(MP3Files, callback) {
console.log('start loop -> saving MP3s');
async.forEach( MP3Files, function(mp3file, callback) {
var newTrack = new MP3Track(); // <--- USE VAR HERE
newTrack.title = mp3file.title;
newTrack.track = mp3file.track;
newTrack.disk = mp3file.disk;
newTrack.metadata = mp3file.metadata;
newTrack.path = mp3file.path;
console.log('....:> Song Start: ');
console.log(newTrack.title);
console.log(newTrack.track);
console.log(newTrack.disk);
console.log(newTrack.metadata);
console.log(newTrack.path);
console.log('....:> Song End: ');
newTrack.save(function (err) {
if (err) {
console.log(err);
} else {
console.log('saving Track: '+ newTrack.title);
callback();
}
});
}, function(err) {
if (err) { console.log(err); }
});
console.log('end loop -> finished saving MP3s');
};
I suspect that the missing var keyword is declaring a global variable and you are overriding it on each iteration of the loop. Meaning, before the first newTrack can complete it's async save operation, you've already moved on in the loop and overridden that variable with the next instance.
Also, in async.forEach you MUST invoke the callback when the operation is completed. You are only calling it if the record saves successfully. You should also be calling it if an error occurs and pass in the error.
Finally, the callback argument to your saveMP3s function never gets called at all. The call to callback() inside the newTrack.save function only refers to the callback argument passed into the anonymous function by async.forEach.

SOLVED
Hi,
I ended up solving it by having the async loop higher up the in the logic so the saveMP3s function was called from within the async loop itself.
Thank you all for your thoughts and suggestions.
Enjoy your day.
mark
(not allowed to Accept my answer until tomorrow, so will update the question status then)

Related

Model.findOne() function not working when passed a variable as the value

I am new to node.js.
I am trying to create function, where a randomly generated String is queried to check if it exists or not. If it already exists, the String is randomly generated till it is unique.
let validID = false;
console.log(temp); //temp is the randomly generated String.
while(!validID){
Website.findOne({shortcut: temp},function(err,docs){
if(docs==null){
validID = true;
console.log("The shortcut for the url is" + temp);
}else{
console.log("FOUND");
temp = generateId();
}
});
}
When run, the code is stuck in an infinite while loop.
I tried to see whether the code works with a String value ( not a variable ) passed in as the query inside findOne(). It worked. I am assuming that the fact that temp is a variable is causing the problem. Can variables be passed in as a value in a query? If so, what is the correct way?
Website.findOne operates asynchronously, i.e. the callback-function you passed to it, will be run once the results from the mongodb are fetched. However, node will not be able to actually process this callback, since your callstack never gets emptied due to your while-loop. If you're interested, you can find out more about this here.
One way to solve this is to wrap your Mongo-DB call in a promise, wait for it to resolve, then return if the ID is unique and continue by calling it recursively otherwise (note that this can be highly simplified by using async/await but for understanding how this works using promised are beneficial imo):
function findIdPromise(temp) {
return new Promise((resolve, reject) => {
Website.findOne({
shortcut: temp
}, function (err, docs) {
if (err) {
return reject(err);
}
resolve(docs);
});
});
}
function getNextIsUniqueIdPromise(shortcut) {
return findIdPromise()
.then(docs => {
if (docs == null) {
return shortcut;
}
return getNextIsUniqueIdPromise(generateId());
});
}
// call it initially with
getNextIsUniqueIdPromise(firstShortcutToCheck)
.then(shortcut => {
console.log("The shortcut for the url is" + shortcut):
})
.catch(err => {
console.log("an error occured", err):
});

Error when using moment.max with the results of a mongoDB query

I am working on a nodeJS application that inserts documents into a mongoDB collection. The document has a createdAt field that records the time of creation. I also have a function that logs the most recent match of a query to the console.
Code:
function getResult(player){
let timeList = [];
MongoClient.connect(url, (err, db) => {
if(err){
console.log(err);
}
else{
let count = db.collection('PokeBook').count({ 'player1': player }); //count function returns a promise
count.then((val) => {
db.collection('PokeBook').find({ 'player1': player }).forEach((doc) => {
timeList.push(doc.createdAt);
console.log(doc.createdAt);
if(val===timeList.length){
myEmitter.emit('full', timeList);
}
});
});
}
});
}
//code for the emitter:
myEmitter.on('full', (arr) => {
console.log('full emitted');
console.log(moment.max(arr));
});
the code returns an error saying moments[i].isValid is not a function. Commenting the moment.max line of code results in successfully logging "full emitted" to the console.
Any advice on why this happens and how to fix this will be much appreciated. :)
moment.max() expects an array of Moment objects, but doc.createdAt is (probably) a regular Date object. You can try to replace:
timeList.push(doc.createdAt);
with:
timeList.push(moment(doc.createdAt));
so arr would be an array of Moment objects.
Alternatively, you can implement max yourself w/o using moment, assuming doc.createdAt is either Date or Number:
console.log(arr.sort()[arr.length-1]);

nodejs and mongoskin, callback after all items have been saved

I have the following piece of code where I iterate through a collection and do another db query and construct an object within its callback. Finally I save that object to another collection.
I wish to call another function after all items have been saved, but can't figure out how. I tried using the async library, specifically async whilst when item is not null, but that just throws me in an infinite loop.
Is there a way to identify when all items have been saved?
Thanks!
var cursor = db.collection('user_apps').find({}, {timeout:false});
cursor.each(function (err, item) {
if (err) {
throw err;
}
if (item) {
var appList = item.appList;
var uuid= item.uuid;
db.collection('app_categories').find({schema_name:{$in: appList}}).toArray(function (err, result) {
if (err) throw err;
var catCount = _.countBy(result, function (obj) {
return obj.category;
})
catObj['_id'] = uuid;
catObj['total_app_num'] = result.length;
catObj['app_breakdown'] = catCount;
db.collection('audiences').insert(catObj, function (err) {
if (err) console.log(err);
});
});
}
else {
// do something here after all items have been saved
}
});
The key here is to use something that is going to respect the callback signal when performing the "loop" operation. The .each() as implemented here will not do that, so you need an "async" loop control that will signify that each loop has iterated and completed, with it's own callback within the callback.
Provided your underlying MongoDB driver is at least version 2, then there is a .forEach() which has a callback which is called when the loop is complete. This is better than .each(), but it does not solve the problem of knowing when the inner "async" .insert() operations have been completed.
So a better approach is to use the stream interface returned by .find(), where this is more flow control allowed. There is a .stream() method for backwards compatibility, but modern drivers will just return the interface by default:
var stream = db.collection('user_apps').find({});
stream.on("err",function(err){
throw(err);
});
stream.on("data",function(item) {
stream.pause(); // pause processing of stream
var appList = item.appList;
var uuid= item.uuid;
db.collection('app_categories').find({schema_name:{ "$in": appList}}).toArray(function (err, result) {
if (err) throw err;
var catCount = _.countBy(result, function (obj) {
return obj.category;
})
var catObj = {}; // always re-init
catObj['_id'] = uuid;
catObj['total_app_num'] = result.length;
catObj['app_breakdown'] = catCount;
db.collection('audiences').insert(catObj, function (err) {
if (err) console.log(err);
stream.resume(); // resume stream processing
});
});
});
stream.on("end",function(){
// stream complete and processing done
});
The .pause() method on the stream stops further events being emitted so that each object result is processed one at a time. When the callback from the .insert() is called, then the .resume() method is called, signifying that processing is complete for that item and a new call can be made to process the next item.
When the stream is complete, then everything is done so the "end" event hook is called to continue your code.
That way, both each loop is signified with an end to move to the next iteration as well as there being a defined "end" event for the complete end of processing. As the control is "inside" the .insert() callback, then those operations are respected for completion as well.
As a side note, you might consider including your "category" information in the source collection, as it seems likely your results can be more efficiently returned using .aggregate() if all required data were in a single collection.

how to make this function async in node.js

Here is the situation:
I am new to node.js, I have a 40MB file containing multilevel json file like:
[{},{},{}] This is an array of objects (~7000 objects). Each object has properties and a one of those properties is also an array of objects
I wrote a function to read the content of the file and iterate it. I succeeded to get what I wanted in terms of content but not usability. I thought that I wrote an async function that would allow node to serve other web requests while iterating the array but that is not the case. I would be very thankful if anyone can point me to what I've done wrong and how to rewrite it so I can have a non-blocking iteration. Here's the function that handles the situation:
function getContents(callback) {
fs.readFile(file, 'utf8', function (err, data) {
if (err) {
console.log('Error: ' + err);
return;
}
js = JSON.parse(data);
callback();
return;
});
}
getContents(iterateGlobalArr);
var count = 0;
function iterateGlobalArr() {
if (count < js.length) {
innerArr = js.nestedProp;
//iterate nutrients
innerArr.forEach(function(e, index) {
//some simple if condition here
});
var schema = {
//.....get props from forEach iteration
}
Model.create(schema, function(err, post) {
if(err) {
console.log('\ncreation error\n', err);
return;
}
if (!post) {
console.log('\nfailed to create post for schema:\n' + schema);
return;
}
});
count++;
process.nextTick(iterateGlobalArr);
}
else {
console.log("\nIteration finished");
next();
}
Just so it is clear how I've tested the above situation. I open two tabs one loading this iteration which takes some time and second with another node route which does not load until the iteration is over. So essentially I've written a blocking code but not sure how to re-factor it! I suspect that just because everything is happening in the callback I am unable to release the event loop to handle another request...
Your code is almost correct. What you are doing is inadvertently adding ALL the items to the very next tick... which still blocks.
The important piece of code is here:
Model.create(schema, function(err, post) {
if(err) {
console.log('\ncreation error\n', err);
return;
}
if (!post) {
console.log('\nfailed to create post for schema:\n' + schema);
return;
}
});
// add EVERYTHING to the very same next tick!
count++;
process.nextTick(iterateGlobalArr);
Let's say you are in tick A of the event loop when getContents() runs and count is 0. You enter iterateGlobalArr and you call Model.create. Because Model.create is async, it is returning immediately, causing process.nextTick() to add processing of item 1 to the next tick, let's say B. Then it calls iterateGlobalArr, which does the same thing, adding item 2 to the next tick, which is still B. Then item 3, and so on.
What you need to do is move the count increment and process.nextTick() into the callback of Model.create(). This will make sure the current item is processed before nextTick is invoked... which means next item is actually added to the next tick AFTER the model item has been created... which will give your app time to handle other things in between. The fixed version of iterateGlobalArr is here:
function iterateGlobalArr() {
if (count < js.length) {
innerArr = js.nestedProp;
//iterate nutrients
innerArr.forEach(function(e, index) {
//some simple if condition here
});
var schema = {
//.....get props from forEach iteration
}
Model.create(schema, function(err, post) {
// schedule our next item to be processed immediately.
count++;
process.nextTick(iterateGlobalArr);
// then move on to handling this result.
if(err) {
console.log('\ncreation error\n', err);
return;
}
if (!post) {
console.log('\nfailed to create post for schema:\n' + schema);
return;
}
});
}
else {
console.log("\nIteration finished");
next();
}
}
Note also that I would strongly suggest that you pass in your js and counter with each call to iterageGlobalArr, as it will make your iterateGlobalArr alot easier to debug, among other things, but that's another story.
Cheers!
Node is single-threaded so async will only help you if you are relying on another system/subsystem to do the work (a shell script, external database, web service etc). If you have to do the work in Node you are going to block while you do it.
It is possible to create one node process per core. This solution would result in only blocking one of the node processes and leave the rest to service your requests, but this feature is still listed as experimental http://nodejs.org/api/cluster.html.
A single instance of Node runs in a single thread. To take advantage
of multi-core systems the user will sometimes want to launch a cluster
of Node processes to handle the load.
The cluster module allows you to easily create child processes that
all share server ports.

nodejs express fs iterating files into array or object failing

So Im trying to use the nodejs express FS module to iterate a directory in my app, store each filename in an array, which I can pass to my express view and iterate through the list, but Im struggling to do so. When I do a console.log within the files.forEach function loop, its printing the filename just fine, but as soon as I try to do anything such as:
var myfiles = [];
var fs = require('fs');
fs.readdir('./myfiles/', function (err, files) { if (err) throw err;
files.forEach( function (file) {
myfiles.push(file);
});
});
console.log(myfiles);
it fails, just logs an empty object. So Im not sure exactly what is going on, I think it has to do with callback functions, but if someone could walk me through what Im doing wrong, and why its not working, (and how to make it work), it would be much appreciated.
The myfiles array is empty because the callback hasn't been called before you call console.log().
You'll need to do something like:
var fs = require('fs');
fs.readdir('./myfiles/',function(err,files){
if(err) throw err;
files.forEach(function(file){
// do something with each file HERE!
});
});
// because trying to do something with files here won't work because
// the callback hasn't fired yet.
Remember, everything in node happens at the same time, in the sense that, unless you're doing your processing inside your callbacks, you cannot guarantee asynchronous functions have completed yet.
One way around this problem for you would be to use an EventEmitter:
var fs=require('fs'),
EventEmitter=require('events').EventEmitter,
filesEE=new EventEmitter(),
myfiles=[];
// this event will be called when all files have been added to myfiles
filesEE.on('files_ready',function(){
console.dir(myfiles);
});
// read all files from current directory
fs.readdir('.',function(err,files){
if(err) throw err;
files.forEach(function(file){
myfiles.push(file);
});
filesEE.emit('files_ready'); // trigger files_ready event
});
As several have mentioned, you are using an async method, so you have a nondeterministic execution path.
However, there is an easy way around this. Simply use the Sync version of the method:
var myfiles = [];
var fs = require('fs');
var arrayOfFiles = fs.readdirSync('./myfiles/');
//Yes, the following is not super-smart, but you might want to process the files. This is how:
arrayOfFiles.forEach( function (file) {
myfiles.push(file);
});
console.log(myfiles);
That should work as you want. However, using sync statements is not good, so you should not do it unless it is vitally important for it to be sync.
Read more here: fs.readdirSync
fs.readdir is asynchronous (as with many operations in node.js). This means that the console.log line is going to run before readdir has a chance to call the function passed to it.
You need to either:
Put the console.log line within the callback function given to readdir, i.e:
fs.readdir('./myfiles/', function (err, files) { if (err) throw err;
files.forEach( function (file) {
myfiles.push(file);
});
console.log(myfiles);
});
Or simply perform some action with each file inside the forEach.
I think it has to do with callback functions,
Exactly.
fs.readdir makes an asynchronous request to the file system for that information, and calls the callback at some later time with the results.
So function (err, files) { ... } doesn't run immediately, but console.log(myfiles) does.
At some later point in time, myfiles will contain the desired information.
You should note BTW that files is already an Array, so there is really no point in manually appending each element to some other blank array. If the idea is to put together the results from several calls, then use .concat; if you just want to get the data once, then you can just assign myfiles = files directly.
Overall, you really ought to read up on "Continuation-passing style".
I faced the same problem, and basing on answers given in this post I've solved it with Promises, that seem to be of perfect use in this situation:
router.get('/', (req, res) => {
var viewBag = {}; // It's just my little habit from .NET MVC ;)
var readFiles = new Promise((resolve, reject) => {
fs.readdir('./myfiles/',(err,files) => {
if(err) {
reject(err);
} else {
resolve(files);
}
});
});
// showcase just in case you will need to implement more async operations before route will response
var anotherPromise = new Promise((resolve, reject) => {
doAsyncStuff((err, anotherResult) => {
if(err) {
reject(err);
} else {
resolve(anotherResult);
}
});
});
Promise.all([readFiles, anotherPromise]).then((values) => {
viewBag.files = values[0];
viewBag.otherStuff = values[1];
console.log(viewBag.files); // logs e.g. [ 'file.txt' ]
res.render('your_view', viewBag);
}).catch((errors) => {
res.render('your_view',{errors:errors}); // you can use 'errors' property to render errors in view or implement different error handling schema
});
});
Note: you don't have to push found files into new array because you already get an array from fs.readdir()'c callback. According to node docs:
The callback gets two arguments (err, files) where files is an array
of the names of the files in the directory excluding '.' and '..'.
I belive this is very elegant and handy solution, and most of all - it doesn't require you to bring in and handle new modules to your script.

Resources