Node.js - execute the anonymous function that was passed to another function - node.js

I understand that Node.js has the concept of event-driven, asynchronous callbacks, by utilizing an event loop.
database.query("SELECT * FROM hugetable", function(rows) { var result = rows; });
console.log("Hello World");
Here, instead of expecting database.query() to directly return a result to us, we pass it a second parameter, an anonymous function.
Now, Node.js can handle the database request asynchronously. Provided that database.query() is part of an asynchronous library, this is what Node.js does: just as before, it takes the query and sends it to the database. But instead of waiting for it to be finished, it makes a mental note that says "When at some point in the future the database server is done and sends the result of the query, then I have to execute the anonymous function that was passed to database.query()."
I am trying the same with a sample code (as I am a newbie and not reached till Node.js DB interactions):
[root#example]# cat server8.js
function myfun(noparm , afterend)
{
for ( var i =0; i < 10; i ++)
console.log("The valus is " + i);
}
function mynextfn()
{
console.log("Hello World");
}
function afterend()
{
console.log("Hello afterend");
}
myfun(0, afterend);
mynextfn();
[root#idc-bldtool01 example]# node server8.js
The valus is 0
The valus is 1
The valus is 2
The valus is 3
The valus is 4
The valus is 5
The valus is 6
The valus is 7
The valus is 8
The valus is 9
Hello World
[root#iexample]#
As such I do not see the " concept of event-driven, asynchronous callbacks, by utilizing an event loop" ?
Can anyone please help me in implementing some basic examples?

You hand the afterend function over as a parameter, but you never call it.
Your function myfun must be:
function myfun(noparm , afterend)
{
for (var i = 0; i < 10; i++) {
console.log("The valus is " + i);
}
afterend();
}
Then that what you expect to will happen ;-)
And: Second thing is that your myfun function is completely synchronous. So there is no chance for Node.js to run mynextfn before the content of the myfun function.
Potentially afterend will be run before myfun, that depends on timing issues as both do not do any heavy lifting.

Related

How to write non-blocking async function in Express request handler [duplicate]

This question already has answers here:
How node.js server serve next request, if current request have huge computation?
(2 answers)
NodeJs how to create a non-blocking computation
(5 answers)
Closed 5 years ago.
All:
I am pretty new to Node async programming, I wonder how can I write some Express request handler which can handle time consuming heavy calculation task without block Express handling following request?
I thought setTimeout can do that to put the job in a event loop, but it still block other requests:
var express = require('express');
var router = express.Router();
function heavy(callback){
setTimeout(callback, 1);
}
router.get('/', function(req, res, next) {
var callback = function(req, res){
var loop = +req.query.loop;
for(var i=0; i<loop; i++){
for(var j=0; j<loop; j++){}
}
res.send("finished task: "+Date.now());
}.bind(null, req, res);
heavy(callback)
});
I guess I did not understand how setTimeout works(my understanding about setTimeout is after that 1ms delay it will fire up the callback in a seperated thread/process without blocking other call of heavy), could any one show me how to do this without blocking other request to heavy()?
Thanks
Instead of setTimeout it's better to use process.nextTick or setImmediate (depending od when you want your callback to be run). But it is not enough to put a long running code into a function because it will still block your thread, just a millisecond later.
You need to break your code and run setImmediate or process.nextTick multiple times - like in every iteration and then schedule a new iteration from that. Otherwise you will not gain anything.
Example
Instead of a code like this:
var a = 0, b = 10000000;
function numbers() {
while (a < b) {
console.log("Number " + a++);
}
}
numbers();
you can use code like this:
var a = 0, b = 10000000;
function numbers() {
var i = 0;
while (a < b && i++ < 100) {
console.log("Number " + a++);
}
if (a < b) setImmediate(numbers);
}
numbers();
The first one will block your thread (and likely overflow your call stack) and the second one will not block (or, more precisely, it will block your thread 10000000 times for a very brief moment, letting other stuff to run in between those moments).
You can also consider spawning an external process or writing a native add on in C/C++ where you can use threads.
For more info see:
How node.js server serve next request, if current request have huge computation?
Maximum call stack size exceeded in nodejs
Node; Q Promise delay
How to avoid jimp blocking the code node.js
NodeJS, Promises and performance

NODEJS: Uncork() method on writable stream doesn't really flush the data

I am writing quite simple application to transform data - read one file and write to another. Files are relatively large - 2 gb. However, what I found is that flush to the file system is not happening, on cork-uncork cycle, it only happens on end(), so the end() basically hangs the system until it's fully flashed.
I simplified the example so it just writes a line to the stream a lot of times.
var PREFIX = 'E:\\TEST\\';
var line = 'AA 11 999999999 20160101 123456 20160101 AAA 00 00 00 0 0 0 2 2 0 0 20160101 0 00';
var fileSystem = require('fs');
function writeStrings() {
var stringsCount = 0;
var stream = fileSystem.createWriteStream(PREFIX +'output.txt');
stream.once('drain', function () {
console.log("drained");
});
stream.once('open', function (fileDescriptor) {
var started = false;
console.log('writing file ');
stream.cork();
for (i = 0; i < 2000000; i++) {
stream.write(line + i);
if (i % 10000 == 0) {
// console.log('passed ',i);
}
if (i % 100000 == 0) {
console.log('uncorcked ',i,stream._writableState.writing);
stream.uncork();
stream.cork();
}
}
stream.end();
});
stream.once('finish', function () {
console.log("done");
});
}
writeStrings();
going inside the node _stream_writable.js, I found that it flushes the buffer only on this condition:
if (!state.writing &&
!state.corked &&
!state.finished &&
!state.bufferProcessing &&
state.buffer.length)
clearBuffer(this, state);
and, as you can see from example, the writing flag doesn't set back after first uncork(), which prevents the uncork to flush.
Also, I don't see drain events evoking at all. Playing with highWaterMark doesn't help (actually doesn't seems to have effect on anything). Manually setting the writing to false (+ some other flags) indeed helped but this is surely wrong.
Am I am misunderstanding the concept of this?
From the node.js documentation I found that number of uncork() should match the number of cork() call, I am not seeing matching stream.uncork() call for stream.cork(), which is called before the for loop. That might be the issue.
Looking at a guide on nodejs.org, you aren't supposed to call stream.uncork() twice in the same event loop. Here is an excerpt:
// Using .uncork() twice here makes two calls on the C++ layer, rendering the
// cork/uncork technique useless.
ws.cork();
ws.write('hello ');
ws.write('world ');
ws.uncork();
ws.cork();
ws.write('from ');
ws.write('Matteo');
ws.uncork();
// The correct way to write this is to utilize process.nextTick(), which fires
// on the next event loop.
ws.cork();
ws.write('hello ');
ws.write('world ');
process.nextTick(doUncork, ws);
ws.cork();
ws.write('from ');
ws.write('Matteo');
process.nextTick(doUncork, ws);
// as a global function
function doUncork(stream) {
stream.uncork();
}
.cork() can be called as many times we want, we just need to be careful to call .uncork() the same amount of times to make it flow again.

phantomJS scraping with breaks not working

I'm trying to scrape some URLS from a webservice, its working perfect but I need to scrape something like 10,000 pages from the same web servicve.
I do this by creating multiple phantomJS processes and they each open and evaluate a different URL (Its the same service, all I change is one parameter in the URL of the website).
Problem is I don't want to open 10,000 pages at once, since I don't want their service to crash, and I don't want my server to crash either.
I'm trying to make some logic of opening/evaluating/insertingToDB ~10 pages, and then sleeping for 1 minute or so.
Let's say this is what I have now:
var numOfRequests = 10,000; //Total requests
for (var dataIndex = 0; dataIndex < numOfRequests; dataIndex++) {
phantom.create({'port' : freeport}, function(ph) {
ph.createPage(function(page) {
page.open("http://..." + data[dataIncFirstPage], function(status) {
I want to insert somewhere in the middle something like:
if(dataIndex % 10 == 0){
sleep(60); //I can use the sleep module
}
Every where I try to place sleepJS the program crashes/freezes/loops forever...
Any idea what I should try?
I've tried placing the above code as the first line after the for loop, but this doesn't work (maybe because of the callback functions that are waiting to fire..)
If I place it inside the phantom.create() callback also doesn't work..
Realize that NodeJS runs asynchronously and in your for-loop, each method call is being executing one after the other. That phantom.create call finishes near immediately, and then the next cycle of the for-loop kicks in.
To answer your question, you want the sleep command at the end of the phantom.create block, still in side the for-loop. Like this:
var numOfRequests = 10000; // Total requests
for( var dataIndex = 0; dataIndex < numOfRequests; dataIndex++ ) {
phantom.create( { 'port' : freeport }, function( ph ) {
// ..whatever in here
} );
if(dataIndex % 10 == 0){
sleep(60); //I can use the sleep module
}
}
Also, consider using a package to help with these control flow issues. Async is a good one, and has a method, eachLimit that will concurrently run a number of processes, up to a limit. Handy! You will need to create an input object array for each iteration you wish to run, like this:
var dataInputs = [ { id: 0, data: "/abc"}, { id : 1, data : "/def"} ];
function processPhantom( dataItem, callback ){
console.log("Starting processing for " + JSON.stringify( dataItem ) );
phantom.create( { 'port' : freeport }, function( ph ) {
// ..whatever in here.
//When done, in inner-most callback, call:
//callback(null); //let the next parallel items into the queue
//or
//callback( new Error("Something went wrong") ); //break the processing
} );
}
async.eachLimit( dataInputs, 10, processPhantom, function( err ){
//Can check for err.
//It is here that everything is finished.
console.log("Finished with async.eachLimit");
});
Sleeping for a minute isn't a bad idea, but in groups of 10, that will take you 1000 minutes, which is over 16 hours! Would be more convenient for you to only call when there is space in your queue - and be sure to log what requests are in process, and have completed.

Node.js function with callback dos't run async

I am trying to run a function in a async way, but I always recive the result in the order that i call them instead of receive the quickest function.
This is an example. The expected result is to print first 100 even it is called last.
function f1(callback){
i = 0;
for (i;i< 100;i++){
}
if (i == 100){
return callback(i);
}
}
function f2(callback){
i = 0;
for (i;i< 99999999999;i++){
}
if (i == 99999999999){
return callback(i);
}
}
f2(function(i){
console.log(i);
})
f1(function(i){
console.log(i);
});
In node (and in javascript in general), functions are not asynchronous by default.
In your code above, at no point are you yielding control to the javascript's event loop. As you call each function, it runs through its for loop until finished and then calls back which is why you are seeing the output the way you are.
The asynchronous nature of node kicks in when you start doing IO or purposely yield control to the VM and give it a chance to process other events off the event loop.
To see the code above run in an asynchronous fashion for the purposes of your test:
function f1(callback){
setTimeout(callback('f1'), 100);
}
function f2(callback){
setTimeout(callback('f2'), 500);
}
f2(function(i){
console.log(i);
})
f1(function(i){
console.log(i);
});
The setTimeout call basically just says: call back to the provided function as close to n milliseconds as you can (but not before). Basically acting as a sleep statement here and yielding control back to the currently executing code.

How to setTimeout in node.js?

I need to be able to make retries in node.js in the event of failure inside a function. I've setup a while loop as shown below, but I am getting slightly confused about how I should wrap the function call to not make sure that it won't block my whole server.
What should I do?
while(retryCount < 10 && !success){
// Alternative one
while(new Date().getTime() < now + 1000) {
myFunction();
}
// Or:
setTimeout( myFunction(), 1000);
}
You can store number of tryes in function object. It's will be fine for cronjob. If you need same behaviour in request context you must store attempts counter in request scope (not in function object).
var fnc = function() {
console.log('try');
if (true) { // Error condition
// Error here
if (!fnc.tryes) fnc.tryes = 0;
fnc.tryes++;
console.log(fnc.tryes);
if (fnc.tryes <= 10) {
setTimeout(fnc, 1000);
} else {
fnc.tryes = 0;
}
// Something wrong
} else {
// We hame result
}
};
fnc();
I'd say use the setTimeout method, that way the client won't be stuck inside the while loop that checks the time.
That outer while loop is going to block, you'd have to refactor using only setTimeout. However, the fact that you want this sort of thing indicates to me that your code structure is really terrible and needs more reworking. What is it that you are retrying? How are you detecting an error condition? Does doing it 10 times really make the chances of success higher?
I have a gist containing a generic function that will do this sort of thing for you, but I'm reluctant to share if this is an XY problem.

Resources