How node.js implement async-callback with single process - node.js

I don't know how node implement its amazing idea. And i have a question when use it.
I have to read four files file1.js file2.js file3.js file4.js and concat them into one big javascript file result.js. It's important to keep their order.
So it's normal for me to use readFileSync instead of readFile.
I know it's a bad solution. Anyone has a good idea to do that?
Q: Is it possible for node.js to read four files at the same time?
Hope someone can explain the principle of node.js and when process.nextTick will be fired.

A: yes it is possible for node to read 4 files at the same time.
My answer would be, it depends on your situation, for reading the files synchronously or asynchronously. If it's configuration data, or the files can be cached, I would suggest just doing it synchronously, it's easy, and it's only done once. So you won't be waiting around very much. Long operations on initialization are typical, and can make things in the long run more efficient. That being said, reading four files in order, asynchronously, so that your program can do other things in the background isn't that hard. I will work on sync and async examples of each and add an edit.
/* jshint node:true*/
var fs = require('fs');
function readFilesSync(fileNames) {
'use strict';
var results = '';
for (var i = 0; i < fileNames.length; i++) {
results += fs.readFileSync(fileNames[i]);
}
return results;
}
function readFiles(fileNames, callback) {
'use strict';
var results = '';
function readFile(index) {
if (index < fileNames.length) {
fs.readFile(fileNames[index], function (err, data) {
results += data;
readFile(index + 1);
});
} else {
callback(results);
}
}
readFile(0);
}
function readAllFilesAtOnce(fileNames, callback) {
'use strict';
var results = {};
var numFiles = fileNames.length;
function callBackWrapper() {
var resultsOrdered = '';
for (var i = 0; i < fileNames.length; i++) {
resultsOrdered += results[fileNames[i]];
}
callback(resultsOrdered);
}
function readFileAsync(fileName) {
fs.readFile(fileName, function (err, data) {
results[fileName] = data;
numFiles--;
if (numFiles === 0) {
callBackWrapper();
}
});
}
for (var i = 0; i < fileNames.length; i++) {
readFileAsync(fileNames[i]);
}
}
function doSomethingWithTheData(data) {
'use strict';
console.log('Results async: ' + data);
}
function doSomethingWithTheData2(data) {
'use strict';
console.log('Results async all at once: ' + data);
}
var fileNamesArray = ['blah.js', 'file.js', 'hello.txt'];
console.log('The results sync: ' + readFilesSync(fileNamesArray));
readFiles(fileNamesArray, doSomethingWithTheData);
readAllFilesAtOnce(fileNamesArray, doSomethingWithTheData2);
EDIT: There I added a method to read all of the files at once.
Process.nextTick does no more than process this function on the next time around the event loop. EX:
process.nextTick(function() {
console.log('never printed out');
});
while(true);
ex 2:
process.nextTick(function() {
console.log('printed last');
});
console.log('printed first');

Related

How to Sync call in Node.js

I have following code snippet:
var array = [1, 2, 3];
var data = 0;
for(var i=0; i<array.length; i++){
asyncFunction(data++);
}
console.log(data);
executeOtherFunction(data);
I am expecting value of data as 3 but I see it as 0 due to asyncFunction. How do I call executeOtherFunction when all the asyncFunction calls are done?
Use async.each:
var async = require('async');
var data = 0;
var array = [ 1, 2, 3 ];
async.each(array, function(item, done) {
asyncFunction(data++, done);
}, function(err) {
if (err) ... // handle error
console.log(data);
executeOtherFunction(data);
});
(assuming that asyncFunction takes two arguments, a number (data) and a callback)
If asyncFunction is implemented like the following:
function asyncFunction(n) {
process.nextTick(function() { /* do some operations */ });
}
Then you'll have no way of knowing when asyncFunction is actually done executing because it's left the callstack. So it'll need to notify when execution is complete.
function asyncFunction(n, callback) {
process.nextTick(function() {
/* do some operations */
callback();
});
}
This is using the simple callback mechanism. If you want to use one of freakishly many modules to have this handled for you, go ahead. But implementing something like with basic callbacks might not be pretty, but isn't difficult.
var array = [1, 2, 3];
var data = 0;
var cntr = 0;
function countnExecute() {
if (++cntr === array.length)
executeOtherFunction(data);
}
for(var i = 0; i < array.length; i++){
asyncFunction(data++, countnExecute);
}
Take a look at this module, I think this is what you're looking for:
https://github.com/caolan/async

accessing an array outside the function in node js

I know node.js run asynchronously, so outer functions execute earlier than the inner. But what is the way to access the notification array outside the for loop? I would like to access all the values in array at once, is this feasible?
var notification=[];
for(var j=0;j<6; j++)
{
getNotification(response[j].sender_id,function(results) // a function called
{
notification[j] =results;
console.log(notification); // output: correct
});
}
console.log(notification); // output: [], need notification array values here
EDIT: If you don't want to use third party libs, this is how to do this in your own code.
/* jshint node:true*/
function getNotifications(responses, callbackToMainProgramLogic) {
'use strict';
var results = [];
function getNotificationAsync(response) {
getNotification(response.sender_id, function (data) {
results.push(data);
if (responses.length) {
getNotificationAsync(responses.pop());//If there are still responses, launch another async getNotification.
} else {
callbackToMainProgramLogic(results);//IF there aren't we're done, and we return to main program flow
}
});
}
getNotificationAsync(responses.pop());
}
getNotifications(someArrayOfResonses, function (dataFromNotifications) {
console.log('The collected data: ' + JSON.stringify(dataFromNotifications, 0, 4));
});
If you absolutely must, you could do something ridiculous like this. Your logic in the loopUntilDatReceived would be waiting for array sizes, not waiting for a non-empty string, but the idea is similar, and you shouldn't be using this anyway! :)
var fileData = '';
fs.readFile('blah.js', function (err, data) { //Async operation, similar to your issue.
'use strict';
fileData = data;
console.log('The Data: ' + data);
});
function loopUntilDataReceived() {
'use strict';
process.nextTick(function () {//A straight while loop would block the event loop, so we do this once per loop around the event loop.
if (fileData === '') {
console.log('No Data Yet');
loopUntilDataReceived();
} else {
console.log('Finally: ' + fileData);
}
});
}
loopUntilDataReceived();
Did I mention this is ridiculous? Honestly, this is an awful idea, but it may help you understand what is going on and how the Node event loop works, and why what you want is not possible. AND why the other posts about callbacks, and flow control libraries are the way to go.
First off, you're having a closure issue in your code (please see the details here)
Then, you simply can't have the array values just next to the loop, because the values are not ready at this point.
You need to wait until all 6 of your getNotification calls get resolved. You can do that with the async library. Something like:
var notification = [];
function createRequest (index) {
return function (callback) {
getNotification(response[index].sender_id, function(results) {
notification[index] = results;
callback(results);
});
}
}
var requests = [];
for(var j=0;j<6; j++) {
requests.push(createRequest(j));
}
async.parallel(requests, function (allResults) {
// notifications array is ready at this point
// the data should also be available in the allResults array
console.log(notifications);
});
Send a callback to the notification loop like this:
var notification=[];
getNotificationArray( function() {
console.log(notification);
});
function getNotificationArray (callback)
{
for(var j=0;j<6; j++)
{
getNotification(response[j].sender_id,function(results) // a function called
{
notification[j] =results;
console.log(notification); // output: correct
});
}
callback();
}

NodeJS async queue too fast (Slowing down async queue method)

I have an HTTP Get request and I want to parse the response and save it to my database.
If i call crawl(i) independentely i get good results. But i have to call crawl() from 1 to 2000.
I get good results but some responses seem to get lost and some responses are duplicates. I don't think I understand how to call thousands of asynchronous functions. I am using the async module queue function but so far I am still missing some data and still have some duplicates. What am I doing wrong here? Thanks for your help.
What i am crawling
My node functions :
function getOptions(i) {
return {
host: 'magicseaweed.com',
path: '/syndicate/rss/index.php?id='+i+'&unit=uk',
method: 'GET'
}
};
function crawl(i){
var req = http.request(getOptions(i), function(res) {
res.on('data', function (body) {
parseLocation(body);
});
});
req.end();
}
function parseLocation(body){
parser.parseString(body, function(err, result) {
if(result && typeof result.rss != 'undefined') {
var locationTitle = result.rss.channel[0].title;
var locationString = result.rss.channel[0].item[0].link[0];
var location = new Location({
id: locationString.split('/')[2],
name: locationTitle
});
location.save();
}
});
}
N = 2 //# of simultaneous tasks
var q = async.queue(function (task, callback) {
crawl(task.url);
callback();
}, N);
q.drain = function() {
console.log('Crawling done.');
}
for(var i = 0; i < 100; i++){
q.push({url: 'http://magicseaweed.com/syndicate/rss/index.php?id='+i+'&unit=uk'});
}
[EDIT] WELL, after a lot of testing it seems that the service I am crawling cannot handle so many request that fast. Because when I do each requests sequentially, I can get all the good responses.
Is there a way to SLOW DOWN ASYNC queue method?
You should have a look at this great module, async which simplifies async tasks like this. You can use queue, simple example:
N = # of simultaneous tasks
var q = async.queue(function (task, callback) {
somehttprequestfunction(task.url, function(){
callback();
}
}, N);
q.drain = function() {
console.log('all items have been processed');
}
for(var i = 0; i < 2000; i++){
q.push({url:"http://somewebsite.com/"+i+"/feed/"});
}
It will have a window of ongoing actions and the tasks room will be available for a future task if you only invoke the callback function. Difference is, your code now opens 2000 connections immidiately and obviously the failure rate is high. Limiting it to a reasonable value, 5,10,20 (depends on site and connection) will result in a better sucess rate. If a request fails, you can always try it again, or push the task to another async queue for another trial. The key point is to invoke callback() in queue function, so that a room will be available when it is done.
var q = async.queue(function (task, callback) {
crawl(task.url);
callback();
}, N);
You'are executing next task immediately after starting the previous one, in this way, the queue is just meaningless. You should modify your code like this:
// first, modify your 'crawl' function to take a callback argument, and call this callback after the job is done.
// then
var q = async.queue(function (task, next/* name this argument as 'next' is more meaningful */) {
crawl(task.url, function () {
// after this one is done, start next one.
next();
});
// or, more simple way, crawl(task.url, next);
}, N);
Another option if you want. Vanilla JS without fancy libraries.
var incrementer = 0;
var resultsArray = [];
var myInterval = setInterval(function() {
incrementer++
if(incrementer == 100){
clearInterval(myInterval)
//when done parse results array
}
//make request here
//push request result to array here
}, 500);
Invokes the function every half second. Easy way to force sync and exit after x requests.
I know I am a little late to the question, however here is a solution I wrote to slow down the number of requests when testing an api endpoint, using node 4 or node 5:
var fs = require('fs');
var supertest = require('supertest');
var request = supertest("http://sometesturl.com/api/test/v1/")
var Helper = require('./check.helper');
var basicAuth = Helper.basicAuth;
var options = Helper.options;
fs.readFile('test.txt', function(err, data){
var parsedItems = JSON.parse(data);
var urlparts = []
// create a queue
for (let year of range(1975, 2016)) {
for (var make in parsedItems[year]){
console.log(year, make, '/models/' + year + '/' + make)
urlparts.push({urlpart:'/models/' + year + '/' + make, year: year, make: make})
}
}
// start dequeue
waitDequeue();
// This function calls itself after the makeRequest promise completes
function waitDequeue(){
var item = urlparts.pop()
if (item){
makeRequest(item)
.then(function(){
// wait this time before next dequeue
setTimeout(function() {
waitDequeue();
}, 3000);
})
} else {
write(parsedItems)
}
}
// make a request, mutate parsedItems then resolve
function makeRequest(item){
return new Promise((resolve, reject)=>{
request
.get(item.urlpart)
.set(options.auth[0], options.auth[1])
.set(options.type[0], options.type[1])
.end(function(err, res) {
if (err) return done1(err);
console.log(res.body)
res.body.forEach(function(model){
parsedItems[item.year][item.make][model] = {}
});
resolve()
})
})
}
// write the results back to the file
function write(parsedItems){
fs.writeFile('test.txt', JSON.stringify(parsedItems, null, 4), function(err){
console.log(err)
})
}
})
A little late but I have found this works!
Using async you can slow down the queue by using whilst inside the task handler eg:
var q = async.priorityQueue(function(task, callback) {
// your code process here for each task
//when ready to complete the task delay it by calling
async.whilst( //wait 6 seconds
function() {
return count < 10;
},
function(callback) {
count++;
setTimeout(function() {
callback(null, count);
}, 1000);
},
function (err, n) {
// n seconds have passed
callback(); //callback to q handler
}
); //whilst
} , 5);

Overhanding data to callbacks

Hej,
I'm experimenting with node.js and its callback mechanism. And now I wonder how to overhand data to such an anonymous callback:
var fs = require('fs');
for (var i = 0; i < 4; i++) {
console.log('Outer: ' + i);
fs.readFile('/etc/hosts', 'ascii', function(err, data) {
console.log('Inner: ' + i);
});
}
I do understand why the inner call to i returns 4 always. But how can I supply some variables to that specific readFile-callback function (so that the inner i will have the value of the outer i)? I could imagine some queue mechanism, where the callback function acts as a consumer, but as a node beginner I'd like to ask for the best practices. Thanks a lot,
mechko
With the pure Javascript (that is, not using libraries such as async):
var fs = require('fs');
var i,
reader = function (i) {
return function (err, data) {
console.log('Inner: ' + i);
}
};
for (i = 0; i < 4; i++) {
console.log('Outer: ' + i);
fs.readFile('/etc/hosts', 'ascii', reader(i));
}
Or, alternatively,
var fs = require('fs');
var i;
for (i = 0; i < 4; i++) {
console.log('Outer: ' + i);
fs.readFile('/etc/hosts', 'ascii', function (i, err, data) {
console.log('Inner: ' + i);
}.bind(null, i));
}
(moving the var i; declaration out of the for loop does not really changes anything in this specific example, it is just a code style to prevent the bugs related to Javascript, as opposed to the programming languages like Java, having only the function scope for variables - so that, in your original example, i is declared for the entire module, not just for for loop).
Alternative approach would be to use libraries such as async and underscore like this:
var _ = require('underscore'),
async = require('async'),
fs = require('fs');
async.parallel(_.map(_.range(0, 4), function (i) {
return async.waterfall.bind(null, [
fs.readFile.bind(null, '/etc/hosts', 'ascii'),
function (data, callback) {
console.log("Inner: " + i);
callback();
}
]);
}, function (err) {
if (err) {
console.log("Some read attempt failed");
} else {
console.log("Done reading");
}
});

Asynchronously reading and caching multiple files in nodejs

I have an array which keeps URL of several files. For example:
var files = ['1.html', '2.html', '3.html'];
I need to read them asynchronously and save them in an object named cache (cache = {}).
To do this I used the code:
for(var i = 0; i < files.length; i++){
require('fs').readFile(files[i], 'utf8', function (error,data) {
cache[files[i]]=data;
});
}
In the end I have the result:
cache = { undefined : 'File 3 content' }
I do understand that the "readFile" acts after the loop is ended and it looses it's scope. Is there a way to fix this or another method to read files from an array and cache them?
When your callback to readFile executes, the for loop will already have finished. So i will be files.length and files[i] will be undefined. To mitigate this, you need to wrap the variables in a closure. The simplest way to do this is to create a function which does your readFile call, and call that in the loop:
function read(file) {
require('fs').readFile(file, 'utf8', function (error,data) {
cache[file]=data;
});
}
for(var i = 0; i < files.length; i++){
read(files[i]);
}
For even better execution control, you might want to look into async:
function readAsync(file, callback) {
fs.readFile(file, 'utf8', callback);
}
async.map(files, readAsync, function(err, results) {
// results = ['file 1 content', 'file 2 content', ...]
});
Edit: Made use of helper function for async example.
The existing answer didn't work for me. I did find an NPM package which did the job: https://www.npmjs.com/package/read-multiple-files. After npm install read-multiple-files at the command line, here's the code I used:
var files = ['1.html', '2.html', '3.html'];
console.log("\n");
readMultipleFiles(files, 'utf8', function(err, inputFiles) {
if(err) {
console.log("Read Error: " + err);
}
fileOne = inputFiles[0];
fileTwo = inputFiles[1];
...
console.log(fileOne);
console.log(fileTwo);
});

Resources