Read chunk from Gridfs and convert to Buffer - node.js

I got a question about buffer. Here is my code:
var Grid = require('gridfs-stream');
var mongodb = require('mongodb');
var gfs = Grid(db, mongodb);
var deferred = Q.defer();
var image_buf = new Buffer('buffer');
var readableStream = gfs.createReadStream(name);
readableStream.on('data',function(chunk){
console.log(chunk);
image_buf = Buffer.concat([image_buf, chunk]);
console.log(image_buf)//differ from the chunk above
});
readableStream.on('end',function(){
db.close();
deferred.resolve(image_buf);
})
return deferred.promise;
What I'm doing is to read an image from MongoDB and put it in the gridfs-stream. I really want to retrieve all chunks in the stream and pass them to another variable so that I can reuse these chunks to draw an image in another API. Therefore I use image_buf and Buffer to perform the task. However, I get a completely different buffer string. As you can see in the above code, I consoled the chunk and the image_buf I got, but they are totally different. Can anyone tell me the reason for this and how can I correctly collect all chunks? Thanks a lot!!!
UPDATE: OK, so I figured it out now: I will append my code below for anyone who is struggling with the same problem as mine:
readableStream.on('data',function(chunk){
console.log("writing!!!");
if (!image_buf)
image_buf = chunk;
else image_buf = Buffer.concat([image_buf, chunk]);
});

The update provided by question poster does not work . So i am going to provide answer of my own. Instead of using new Buffer('buffer') it is better to use an simple array and push chunks into it and use Buffer.concat(bufferArray) at the end to get buffer of stream like this:
var readableStream = gfs.createReadStream(name);
var bufferArray = [];
readableStream.on('data',function(chunk){
bufferArray.push(chunk);
});
readableStream.on('end',function(){
var buffer = Buffer.concat(bufferArray);
deferred.resolve(buffer);
})

Related

Please tell me how to print space in a file after stream pipe works completely in nodeJS

I have a simple code which creates a readable stream and pipes it to a writeable stream and count spaces in the file.
My logic seems to be right.The problem is that before calculating space it goes to printing it.
var fs=require('fs');
var inStream=fs.createReadStream('data.txt');
var outStream=fs.createWriteStream('out.txt');
var printStream=process.stdout;
var space=0;
var upperStream=new Transform({
transform(chunck,enc,cb){
var text=chunck.toString().toUpperCase();
for (var i=0;i<text.length;i++)
if(text[i]==' ')
space++;
this.push(text);
cb();
}
});
inStream.pipe(upperStream).pipe(printStream);
console.log("Number of space in file : ",space);
data.txt contains 'Standard error'
This is because your code to count spaces is async code. You should handle the instream end event. The end event will be emitted when data is read completely from instream. Once the space counting is done and data is written to printstream, and consumed fully from instream then only you will be able to get the actual space count.
Also you have missed requiring the Transform stream.
var { Transform } = require('stream');
var fs=require('fs');
var inStream=fs.createReadStream('data.txt');
var outStream=fs.createWriteStream('out.txt');
var printStream=process.stdout;
var space=0;
var upperStream=new Transform({
transform(chunck,enc,cb){
var text=chunck.toString().toUpperCase();
for (var i=0;i<text.length;i++)
if(text[i]==' ')
space++;
this.push(text);
cb();
}
});
inStream.pipe(upperStream).pipe(printStream);
inStream.on('end', () => console.log(`Number of space in file : ${space}`));
Read more about stream events here: https://nodejs.org/api/stream.html

Is it possible to register multiple listeners to a child process's stdout data event? [duplicate]

I need to run two commands in series that need to read data from the same stream.
After piping a stream into another the buffer is emptied so i can't read data from that stream again so this doesn't work:
var spawn = require('child_process').spawn;
var fs = require('fs');
var request = require('request');
var inputStream = request('http://placehold.it/640x360');
var identify = spawn('identify',['-']);
inputStream.pipe(identify.stdin);
var chunks = [];
identify.stdout.on('data',function(chunk) {
chunks.push(chunk);
});
identify.stdout.on('end',function() {
var size = getSize(Buffer.concat(chunks)); //width
var convert = spawn('convert',['-','-scale',size * 0.5,'png:-']);
inputStream.pipe(convert.stdin);
convert.stdout.pipe(fs.createWriteStream('half.png'));
});
function getSize(buffer){
return parseInt(buffer.toString().split(' ')[2].split('x')[0]);
}
Request complains about this
Error: You cannot pipe after data has been emitted from the response.
and changing the inputStream to fs.createWriteStream yields the same issue of course.
I don't want to write into a file but reuse in some way the stream that request produces (or any other for that matter).
Is there a way to reuse a readable stream once it finishes piping?
What would be the best way to accomplish something like the above example?
You have to create duplicate of the stream by piping it to two streams. You can create a simple stream with a PassThrough stream, it simply passes the input to the output.
const spawn = require('child_process').spawn;
const PassThrough = require('stream').PassThrough;
const a = spawn('echo', ['hi user']);
const b = new PassThrough();
const c = new PassThrough();
a.stdout.pipe(b);
a.stdout.pipe(c);
let count = 0;
b.on('data', function (chunk) {
count += chunk.length;
});
b.on('end', function () {
console.log(count);
c.pipe(process.stdout);
});
Output:
8
hi user
The first answer only works if streams take roughly the same amount of time to process data. If one takes significantly longer, the faster one will request new data, consequently overwriting the data still being used by the slower one (I had this problem after trying to solve it using a duplicate stream).
The following pattern worked very well for me. It uses a library based on Stream2 streams, Streamz, and Promises to synchronize async streams via a callback. Using the familiar example from the first answer:
spawn = require('child_process').spawn;
pass = require('stream').PassThrough;
streamz = require('streamz').PassThrough;
var Promise = require('bluebird');
a = spawn('echo', ['hi user']);
b = new pass;
c = new pass;
a.stdout.pipe(streamz(combineStreamOperations));
function combineStreamOperations(data, next){
Promise.join(b, c, function(b, c){ //perform n operations on the same data
next(); //request more
}
count = 0;
b.on('data', function(chunk) { count += chunk.length; });
b.on('end', function() { console.log(count); c.pipe(process.stdout); });
You can use this small npm package I created:
readable-stream-clone
With this you can reuse readable streams as many times as you need
For general problem, the following code works fine
var PassThrough = require('stream').PassThrough
a=PassThrough()
b1=PassThrough()
b2=PassThrough()
a.pipe(b1)
a.pipe(b2)
b1.on('data', function(data) {
console.log('b1:', data.toString())
})
b2.on('data', function(data) {
console.log('b2:', data.toString())
})
a.write('text')
I have a different solution to write to two streams simultaneously, naturally, the time to write will be the addition of the two times, but I use it to respond to a download request, where I want to keep a copy of the downloaded file on my server (actually I use a S3 backup, so I cache the most used files locally to avoid multiple file transfers)
/**
* A utility class made to write to a file while answering a file download request
*/
class TwoOutputStreams {
constructor(streamOne, streamTwo) {
this.streamOne = streamOne
this.streamTwo = streamTwo
}
setHeader(header, value) {
if (this.streamOne.setHeader)
this.streamOne.setHeader(header, value)
if (this.streamTwo.setHeader)
this.streamTwo.setHeader(header, value)
}
write(chunk) {
this.streamOne.write(chunk)
this.streamTwo.write(chunk)
}
end() {
this.streamOne.end()
this.streamTwo.end()
}
}
You can then use this as a regular OutputStream
const twoStreamsOut = new TwoOutputStreams(fileOut, responseStream)
and pass it to to your method as if it was a response or a fileOutputStream
If you have async operations on the PassThrough streams, the answers posted here won't work.
A solution that works for async operations includes buffering the stream content and then creating streams from the buffered result.
To buffer the result you can use concat-stream
const Promise = require('bluebird');
const concat = require('concat-stream');
const getBuffer = function(stream){
return new Promise(function(resolve, reject){
var gotBuffer = function(buffer){
resolve(buffer);
}
var concatStream = concat(gotBuffer);
stream.on('error', reject);
stream.pipe(concatStream);
});
}
To create streams from the buffer you can use:
const { Readable } = require('stream');
const getBufferStream = function(buffer){
const stream = new Readable();
stream.push(buffer);
stream.push(null);
return Promise.resolve(stream);
}
What about piping into two or more streams not at the same time ?
For example :
var PassThrough = require('stream').PassThrough;
var mybiraryStream = stream.start(); //never ending audio stream
var file1 = fs.createWriteStream('file1.wav',{encoding:'binary'})
var file2 = fs.createWriteStream('file2.wav',{encoding:'binary'})
var mypass = PassThrough
mybinaryStream.pipe(mypass)
mypass.pipe(file1)
setTimeout(function(){
mypass.pipe(file2);
},2000)
The above code does not produce any errors but the file2 is empty

write base64 to file using stream

I am sending a base64 string to my server. On the server I want to create a readable stream that I push the base64 chunks onto that then goes to a writable stream and written to file. My problem is only the first chunk is written to file. My guess is because I create a new buffer with each chunk this is what is causing the problem but if I send just the string chunks in without creating the buffer the image file is corrupt.
var readable = new stream.Readable();
readable._read = function() {}
req.on('data', function(data) {
var dataText = data.toString();
var dataMatch = dataText.match(/^data:([A-Za-z-+\/]+);base64,(.+)$/);
var bufferData = null;
if (dataMatch) {
bufferData = new Buffer(dataMatch[2], 'base64')
}
else {
bufferData = new Buffer(dataText, 'base64')
}
readable.push(bufferData)
})
req.on('end', function() {
readable.push(null);
})
This is not so trivial as you might think:
Use Transform, not Readable. You can pipe request stream to transform, thus handling back pressure.
You can't use regular expressions, because text you are expecting can be broken in two or more chunks. You could try to accumulate chunks and exec regular expression each time, but if the format of stream is incorrect (that is, not a data uri) you will end up buffering the whole request and running regular expression a lot of times on megabytes long string.
You can't take arbitrary chunk and do new Buffer(chunk, 'base64') because it may not be valid itself. Example: new Buffer('AQID', 'base64') yields new Buffer([1, 2, 3]), but Buffer.concat([new Buffer('AQ', 'base64'), new Buffer('ID', 'base64')]) yields new Buffer([1, 32])
For the 3 problem you can use one of available modules (like base64-stream). Here is an example:
var base64 = require('base64-stream');
var stream = require('stream');
var decoder = base64.decode();
var input = new stream.PassThrough();
var output = new stream.PassThrough();
input.pipe(decoder).pipe(output);
output.on('data', function (data) {
console.log(data);
});
input.write('AQ');
input.write('ID');
You can see that it buffers input and emits data as soon as enough arrived.
As for the 2 problem you need to implement simple stream parser. As an idea: wait for data: string, then buffer chunks (if you need them) until ;base64, found, then pipe to base64-stream.

NodeJS: convert pngjs stream to base64

I have a PNG object which I created using node-png and according the docs it's a "readable and writable Stream".
I would like to convert the PNG object to base64 and send it to the client via socket.io where I'll place the string in an image src.
I've tried many things but it seems that it's not trivial to convert a stream into a string.
Note that the data is created inside Node and not from the file system.
How can I achieve this?
Here is what I did for future readers (this helped too):
png.pack();
var chunks = [];
png.on('data', function (chunk) {
chunks.push(chunk);
console.log('chunk:', chunk.length);
});
png.on('end', function () {
var result = Buffer.concat(chunks);
console.log('final result:', result.length);
io.sockets.emit('image', result.toString('base64'));
});
You do not want to convert the stream into a string but its readable chunk:
stream.on('readable', function() {
var string = stream.read().toString('base64');
// send through websocket
});
You can also do this for the complete data running through the stream:
var data = '';
stream.on('readable', function() {
data += stream.read().toString('base64');
});
stream.on('end', function() {
console.log(data);
});
Depends on if the client requires the complete png picture to be available or if it is okay to have single chunks.
However if you are interested how this could look like in an practical example (with the image being uploaded by HTML5 Drag & Drop) you can checkout messenger.

HTTP request stream not firing readable when reading fixed sizes

I am trying to work with the new Streams API in Node.js, but having troubles when specifying a fixed read buffer size.
var http = require('http');
var req = http.get('http://143.226.75.100/waug_mp3_128k', function (res) {
res.on('readable', function () {
var receiveBuffer = res.read(1024);
console.log(receiveBuffer.length);
});
});
This code will receive a few buffers and then exit. However, if I add this line after the console.log() line:
res.read(0);
... all is well again. My program continues to stream as predicted.
Why is this happening? How can I fix it?
It's explained here.
As far as I understand it, by reading only 1024 bytes with each readable event, Node is left to assume that you're not interested in the rest of the data that's in the stream buffers, and discards it. Issuing the read(0) (in the same event loop iteration) 'resets' this behaviour. I'm not sure why the process exits after reading a couple of 1024-byte buffers though; I can recreate it, but I don't understand it yet :)
If you don't have a specific reason to use the 1024-byte reads, just read the entire buffer for each event:
var receiveBuffer = res.read();
Or instead of using non-flowing mode, use flowing mode by using the data/end events instead:
var http = require('http');
var req = http.get('http://143.226.75.100/waug_mp3_128k', function (res) {
var chunks = [];
res.on('data', function(chunk) {
chunks.push(chunk);
console.log('chunk:', chunk.length);
});
res.on('end', function() {
var result = Buffer.concat(chunks);
console.log('final result:', result.length);
});
});

Resources