HTTP request stream not firing readable when reading fixed sizes - node.js

I am trying to work with the new Streams API in Node.js, but having troubles when specifying a fixed read buffer size.
var http = require('http');
var req = http.get('http://143.226.75.100/waug_mp3_128k', function (res) {
res.on('readable', function () {
var receiveBuffer = res.read(1024);
console.log(receiveBuffer.length);
});
});
This code will receive a few buffers and then exit. However, if I add this line after the console.log() line:
res.read(0);
... all is well again. My program continues to stream as predicted.
Why is this happening? How can I fix it?

It's explained here.
As far as I understand it, by reading only 1024 bytes with each readable event, Node is left to assume that you're not interested in the rest of the data that's in the stream buffers, and discards it. Issuing the read(0) (in the same event loop iteration) 'resets' this behaviour. I'm not sure why the process exits after reading a couple of 1024-byte buffers though; I can recreate it, but I don't understand it yet :)
If you don't have a specific reason to use the 1024-byte reads, just read the entire buffer for each event:
var receiveBuffer = res.read();
Or instead of using non-flowing mode, use flowing mode by using the data/end events instead:
var http = require('http');
var req = http.get('http://143.226.75.100/waug_mp3_128k', function (res) {
var chunks = [];
res.on('data', function(chunk) {
chunks.push(chunk);
console.log('chunk:', chunk.length);
});
res.on('end', function() {
var result = Buffer.concat(chunks);
console.log('final result:', result.length);
});
});

Related

Read chunk from Gridfs and convert to Buffer

I got a question about buffer. Here is my code:
var Grid = require('gridfs-stream');
var mongodb = require('mongodb');
var gfs = Grid(db, mongodb);
var deferred = Q.defer();
var image_buf = new Buffer('buffer');
var readableStream = gfs.createReadStream(name);
readableStream.on('data',function(chunk){
console.log(chunk);
image_buf = Buffer.concat([image_buf, chunk]);
console.log(image_buf)//differ from the chunk above
});
readableStream.on('end',function(){
db.close();
deferred.resolve(image_buf);
})
return deferred.promise;
What I'm doing is to read an image from MongoDB and put it in the gridfs-stream. I really want to retrieve all chunks in the stream and pass them to another variable so that I can reuse these chunks to draw an image in another API. Therefore I use image_buf and Buffer to perform the task. However, I get a completely different buffer string. As you can see in the above code, I consoled the chunk and the image_buf I got, but they are totally different. Can anyone tell me the reason for this and how can I correctly collect all chunks? Thanks a lot!!!
UPDATE: OK, so I figured it out now: I will append my code below for anyone who is struggling with the same problem as mine:
readableStream.on('data',function(chunk){
console.log("writing!!!");
if (!image_buf)
image_buf = chunk;
else image_buf = Buffer.concat([image_buf, chunk]);
});
The update provided by question poster does not work . So i am going to provide answer of my own. Instead of using new Buffer('buffer') it is better to use an simple array and push chunks into it and use Buffer.concat(bufferArray) at the end to get buffer of stream like this:
var readableStream = gfs.createReadStream(name);
var bufferArray = [];
readableStream.on('data',function(chunk){
bufferArray.push(chunk);
});
readableStream.on('end',function(){
var buffer = Buffer.concat(bufferArray);
deferred.resolve(buffer);
})

request.on in http.createServer(function(request,response) {});

var http = require('http');
var map = require('through2-map');
uc = map(function(ch) {
return ch.toString().toUpperCase();
});
server = http.createServer(function(request, response) {
request.on('data',function(chunk){
if (request.method == 'POST') {
//change the data from request to uppercase letters and
//pipe to response.
}
});
});
server.listen(8000);
I have two questions about the code above. First, I read the documentation for request, it said that request is an instance of IncomingMessage, which implements Readable Stream. However, I couldn't find .on method in the Stream documentation. So I don't know what chunk in the callback function in request.on does. Secondly, I want to do some manipulation to the data from request and pipe it to response. Should I pipe from chunk or from request? Thank you for consideration!
is chunk a stream?
nop. The stream is the flow among what the chunks of the whole data are sent.
A simple example, If you read a 1gb file, a stream will read it by chunks of 10k, each chunk will go through your stream, from the beginning to the end, with the right order.
I use a file as example, but a socket, request or whatever streams is based on that idea.
Also, whenever someone sends a request to this server would that entire thing be a chunk?
In the particular case of http requests, only the request body is a stream. It can be the posted files/data. Or the response body of the response. Headers are treated as Objects to apply on the request before the body is written on the socket.
A small example to help you with some concrete code,
var through2 = require('through2');
var Readable = require('stream').Readable;
var s1 = through2(function transform(chunk, enc, cb){
console.log("s1 chunk %s", chunk.toString())
cb(err=null, chunk.toString()+chunk.toString() )
});
var s2 = through2(function transform(chunk, enc, cb){
console.log("s2 chunk %s", chunk.toString())
cb(err=null, chunk)
});
s2.on('data', function (data) {
console.log("s2 data %s", data.toString())
})
s1.on('end', function (data) {
console.log("s1 end")
})
s2.on('end', function (data) {
console.log("s2 end")
})
var rs = new Readable;
rs.push('beep '); // this is a chunk
rs.push('boop'); // this is a chunk
rs.push(null); // this is a signal to end the stream
rs.on('end', function (data) {
console.log("rs end")
})
console.log(
".pipe always return piped stream: %s", rs.pipe(s1)===s1
)
s1.pipe(s2)
I would like to suggest you to read more :
https://github.com/substack/stream-handbook
http://maxogden.com/node-streams.html
https://github.com/maxogden/mississippi
All Streams are instances of EventEmitter (docs), that is where the .on method comes from.
Regarding the second question, you MUST pipe from the Stream object (request in this case). The "data" event emits data as a Buffer or a String (the "chunk" argument in the event listener), not a stream.
Manipulating Streams is usually done by implementing a Transform stream (docs). Though there are many NPM packages available that make this process simpler (like through2-map or the like), though in reality, they produce Transform streams.
Consider the following:
var http = require('http');
var map = require('through2-map');
// Transform Stream to uppercase
var uc = map(function(ch) {
return ch.toString().toUpperCase();
});
var server = http.createServer(function(request, response) {
// Pipe from the request to our transform stream
request
.pipe(uc)
// pipe from transfrom stream to response
.pipe(response);
});
server.listen(8000);
You can test by running curl:
$ curl -X POST -d 'foo=bar' http://localhost:8000
# logs FOO=BAR

Does the new way to use streams (streams2) in node create blocking?

I realize that node is non-blocking, however, I also realize that because node has only one thread, putting a three second while loop in the middle of your event loop will cause blocking. I.e.:
var start = new Date();
console.log('Test 1');
function sleep(time, words) {
while(new Date().getTime() < start.getTime() + time);
console.log(words);
}
sleep(3000, 'Test 2'); //This will block
console.log('Test 3') //Logs Test 1, Test 2, Test 3
Many of the examples I have seen dealing with the new "Streams2" interface look like they would cause this same blocking. For instance this one, borrowed from here:
var crypto = require('crypto');
var fs = require('fs');
var readStream = fs.createReadStream('myfile.txt');
var hash = crypto.createHash('sha1');
readStream
.on('readable', function () {
var chunk;
while (null !== (chunk = readStream.read())) {
hash.update(chunk); //DOESN'T This Cause Blocking?
}
})
.on('end', function () {
console.log(hash.digest('hex'));
});
If I am following right, the readStream will emit the readable event when there is data in the buffer. So it seems that once the readable event is emitted, the entire event loop would be stopped until the readStream.read() emits null. This seems less desirable than the old way (because it would not block). Can somebody please tell me why I am wrong. Thanks.
You don't have to read until the internal stream buffer is empty. You could just read once if you wanted and then read another chunk some time later.
readStream.read() itself is not blocking, but hash.update(chunk) is (for a brief amount of time) because the hashing is done on the main thread (there is a github issue about adding an async interface that would execute crypto functions in the thread pool though).
Also, you can simplify the code you have to use the crypto stream interface:
var crypto = require('crypto'),
fs = require('fs');
var readStream = fs.createReadStream('myfile.txt'),
hasher = crypto.createHash('sha1');
readStream.pipe(hasher).on('readable', function() {
// the hash stream automatically pushes the digest
// to the readable side once the writable side is ended
console.log(this.read());
}).setEncoding('hex');
All JS code is single-threaded, so a loop will block, but you are misunderstanding how long that loop will run for. Calling .read() takes a readable item from the stream, just like a 'data' handler would be called with the item. It will stop executing and unblock as soon as there are no items. 'readable' is triggered whenever there is data, and then it empties the buffer and waits for another 'readable'. So where your first while loop relies on the time to be updated, which could be some unbounded amount of time, the other loop is basically doing:
while (items.length > 0) items.pop()
which is pretty much the minimum amount of work you need to do to process items from a stream.

Node Streaming, Writing, and Memory

I'm attempting to dynamically concatenate files prior to serving their content. The following very simplified code shows an approach:
var http = require('http');
var fs = require('fs');
var start = '<!doctype html><html lang="en"><head><script>';
var funcsA = fs.readFileSync('functionsA.js', 'utf8');
var funcsB = fs.readFileSync('functionsB.js', 'utf8');
var funcsC = fs.readFileSync('functionsC.js', 'utf8');
var finish = '</script></head><body>some stuff here</body></html>';
var output = start + funcsA + funcsB + funcsC + finish;
http.createServer(function (req, res) {
res.writeHead(200, {'Content-Type': 'text/html'});
res.end(output);
}).listen(9000);
In reality, how I concatenate might depend on clues from the userAgent. My markup and scripts could be several hundred kilobytes combined.
I like this approach because there is no file system I/O happening within createServer. I seem to have read somewhere that this response.write(...); approach is not as efficient/low overhead as streaming data using an fs.createReadStream approach. I seem to recall this had something to do with what happens when the client cannot receive data as fast as Node can send it.(?) We seem to be able to create a readable stream from a file system object, but not from memory. Is it possible to do what I have coded above with a streaming approach? With file I/O happening initially, outside of the CreateServer function?
Or, on the other hand, are my concerns not that critical, and the approach above offers perhaps no less efficiency than a streaming approach.
Thanks.
res.write(start)
var A = fs.createReadStream()
var B = fs.createReadStream()
var C = fs.createReadStream()
A.pipe(res, {
end: false
})
A.on('end', function () {
B.pipe(res, {
end: false
})
})
B.on('end', function () {
C.pipe(res, {
end: false
})
})
C.on('end', function () {
res.write(finish)
res.end()
})
Defining Streams prior to (and not inside) the createServer callback won't typically work, see here

Abort ReadStream on client connection close

I am trying to send a (huge) file with a limited amount of data passing every second (using TooTallNate/node-throttle):
var fs = require('fs');
var Throttle = require('throttle');
var throttle = new Throttle(64);
throttle.on('data', function(data){
console.log('send', data.length);
res.write(data);
});
throttle.on('end', function() {
console.log('error',arguments);
res.end();
});
var stream = fs.createReadStream(filePath).pipe(throttle);
If I cancel the download at the clients browser, the stream will just continue until it completly transferred.
I also tested the scenario above with npm node-throttled-stream, same behavour.
How to cancel the stream if the browser closed his request?
Edit:
I am able to obtain the connections close event by using
req.connection.on('close',function(){});
But the stream has neither a destroy nor an end or stop property which I could use to stop the stream from further reading.
I does provide the property pause Doc, but I would rather stop node from reading the whole file than just stopping to recieve the contents (as described in the doc).
I ended up using the following dirty workaround:
var aborted = false;
stream.on('data', function(chunk){
if(aborted) return res.end();
// stream contents
});
req.connection.on('close',function(){
aborted = true;
res.end();
});
As mentioned above, this isn't really a nice solution, but it works.
Any other solution would be highly appreciated!

Resources