Convert stream into buffer? - node.js

How to convert stream into buffer in nodejs? Here is my code to parse a file in post request in express.
app.post('/upload', express.multipart({
defer: true
}), function(req, res) {
req.form.on('part', function(part) {
//Here I want to convert the streaming part into a buffer.
//do something buffer-specific task
var out = fs.createWriteStream('image/' + part.filename);
part.pipe(out);
});
req.form.on('close', function() {
res.send('uploaded!');
});
});

Instead of piping, you can attach readable and end event handlers to the part stream to read it:
var buffers = [];
part.on('readable', function(buffer) {
for (;;) {
let buffer = part.read();
if (!buffer) { break; }
buffers.push(buffer);
}
});
part.on('end', function() {
var buffer = Buffer.concat(buffers);
...do your stuff...
// write to file:
fs.writeFile('image/' + part.filename, buffer, function(err) {
// handle error, return response, etc...
});
});
Note: If you instead use data, it will read the entire upload into memory.
You could also create a custom transform stream to transform the incoming data, but that might not be trivial.

You can use the stream-to module, which can convert a readable stream's data into an array or a buffer:
var streamTo = require('stream-to');
req.form.on('part', function (part) {
streamTo.buffer(part, function (err, buffer) {
// Insert your business logic here
});
});
If you want a better understanding of what's happening behind the scenes, you can implement the logic yourself, using a Writable stream. As a writable stream implementor, you only have to define one function: the _write method, that will be called every time some data is written to the stream. When the input stream is finished emitting data, the end event will be emitted: we'll then create a buffer using the Buffer.concat method.
var stream = require('stream');
var converter = new stream.Writable();
// We'll store all the data inside this array
converter.data = [];
converter._write = function (chunk) {
converter.data.push(chunk);
};
// Will be emitted when the input stream has ended,
// i.e. no more data will be provided
converter.on('finish', function() {
// Create a buffer from all the received chunks
var b = Buffer.concat(this.data);
// Insert your business logic here
});

Related

How to pipe data modified from a gunzip stream into a gzip stream?

I need to trigger, through an http request, a process where I download some data from S3, gunzip it, modify the stream, gzip it and send to another bucket in S3.
So far I was able to either:
Download
Gunzip
Modify (filter) the data
return the data
Or:
Download
Gunzip
Gzip
Upload the unmodified data and retrieve the url of the object
My first attempt at this consisted in using the on('data') event from the gunzip stream to modify the data; then when the 'end' event is thrown, I can return it to the browser making the request.
var accumulator = [];
gunzip.on('data', chunk=>{
var lines = chunk.toString('utf-8').split(\n);
lines.forEach(line=>{
if(shouldBeFiltered(line)){
accumulator.push(line);
}
})
})
gunzip.on('end', ()=>{
res.send(accumulator);
})
getS3.pipe(gunzip)
If instead of returning the result (res.send) I try to pipe gunzip to gzip, the filter is ignored. It makes sense as I have an accumulator array that I return (in the previous case) when the end event is thrown.
Then after some digging, I found a reference suggesting that the data should be pushed to, and I tried the following, which did not work:
gunzip.on('data', chunk=>{
var lines = chunk.toString('utf-8').split(\n);
lines.forEach(line=>{
if(shouldBeFiltered(line)){
gunzip.push(line);
}
})
})
// the end event no longer mattered
// gunzip.on('end', ()=>{
// res.send(accumulator);
// })
getS3.pipe(gunzip).pipe(gzip).pipe(putS3(putS3param.Key, putS3param.Bucket));
Then I tried to create a transform stream (this is extremely simplified as I was trying the concept), but then I had an internal error:
const stream = require('stream');
const Transform = stream.Transform;
function filter(pipeline) {
var the_filter = new Transform({
transform(chunk, encoding, next) {
console.log();
chunk += Buffer('Modified', 'utf-8');
this.push(chunk);
next();
}
});
pipeline.pipe(the_filter);
}
Other than creating a file and gziping it and uploading I have no more ideas.
Thanks for any help!
After much digging around, I finally found the answer in this page
It seems that what was missing as setting Transform as objectMode, other than that, I see nothing relevant.
var stream = require('stream')
var liner = new stream.Transform( { objectMode: true } )
liner._transform = function (chunk, encoding, done) {
var data = chunk.toString()
if (this._lastLineData) data = this._lastLineData + data
var lines = data.split('\n')
this._lastLineData = lines.splice(lines.length-1,1)[0]
lines.forEach(this.push.bind(this))
done()
}
liner._flush = function (done) {
if (this._lastLineData) this.push(this._lastLineData)
this._lastLineData = null
done()
}
module.exports = liner

Mongoose Stream JSON Data

Hi I have the following code
var mq = DeviceData.find().lean().cursor();
mq.on('data', function(data) {
//code to write to a stream
}).on('end', function() {
//code to write to a stream
});
What should I put in the commented block? I think I should create a wrtie stream. The idea is to send a stream of data to the client.
Actually you can use directly the pipe method as following
var mq = DeviceData.find().lean().cursor().pipe(yourWritableStream);
or from my recipes
DeviceData.find({})
.lean()
.cursor()
.pipe(new Writable({
objectMode: true,
write(data, encoding, next) {
// do something with your data
// call next
}))
.on('finish', () => {
// you can do something when the stream ends
})

When does Node Readable Stream of _read() called

I create my own readstream. But I want to know when the _read() be called? If I don't add on('data') listerner, the _read() will not be called. Why?
var data = [{"id":0,"name":"object 0","value":3}],
Readable = require('stream').Readable,
util = require('util');
var ReadStream = function() {
Readable.call(this, {objectMode: true});
this.data = data;
this.curIndex = 0;
};
util.inherits(ReadStream, Readable);
ReadStream.prototype._read = function() {
if (this.curIndex === this.data.length)
return this.push(null);
var data = this.data[this.curIndex++];
//console.log('read: ' + JSON.stringify(data));
this.push(data);
};
var stream = new ReadStream();
stream.on('data', function(record) {
console.log('received: ' + JSON.stringify(record));
});
stream.on('end', function() {
console.log('done111');
});
If I don't add on('data') listerner, the _read() will not be called. Why?
The stream is paused. Assuming you are using a recent version of node.
https://nodejs.org/api/stream.html#stream_two_modes
All Readable streams begin in paused mode but can be switched to flowing mode in one of the following ways:
Adding a 'data' event handler.
Calling the stream.resume() method.
Calling the stream.pipe() method to send the data to a Writable.
BTW, to create a readable, check noms or mississippi.from

How i can resume after error event in piped stream in nodejs?

After i emit error event in MyWritableStream, data transmission stops. What i need to do to resume data transfer?
var readable = fs.createReadStream('test.txt');
var writable = new MyWritableStream();
writable.on('error', function(error) {
console.log('error', error);
// How i can resume?
});
writable.on('finish', function(){
console.log('finished');
})
readable.pipe(writable);
I know this question is old, but you might wanna check out https://github.com/miraclx/xresilient
I built this for this exact same reason (works best with seekable streams).
You define a function that returns a readable stream, the library measures the number of bytes that have passed through until an error is met.
Once the readable stream encounters an error event, it recalls the defined function with the number of bytes read so you can index the stream source.
Example:
const fs = require('fs');
const xresilient = require('xresilient');
const readable = xresilient(({bytesRead}) => {
return generateSeekableStreamSomehow({start: bytesRead});
}, {retries: 5});
const writable = fs.createWriteStream('file.test');
readable.pipe(writable);
File streams are indexable with the start option of the fs.createReadStream() function.
HTTP Requests are indexable with the Range HTTP Header.
Check it out.
https://www.npmjs.com/package/xresilient
I am not sure, if it is a normal practice, but i can't see another solution for now & it works for me. If you can advise more accurate solution, please do it.
We can track readable stream instance using pipe event in writeable one:
function WriteableStream(options) {
Writable.call(this, options);
this.source = null;
var instance = this;
this.on('pipe', function(source){
instance.source = source;
});
}
util.inherits(WriteableStream, Writable);
So, when we emit error event, and readable stream is unpiped automatically, we can re-pipe it ourself:
WriteableStream.prototype._write = function(chunk, encoding, done) {
this.emit('error', new Error('test')); // unpipes readable
done();
};
WriteableStream.prototype.resume = function() {
this.source.pipe(this); // re-pipes readable
}
Finally, we will use it the following way:
var readable = fs.createReadStream(file);
var writeable = new WriteableStream();
writeable.on('error', function(error) {
console.log('error', error);
writeable.resume();
});
readable.pipe(writeable);

How to force Node.js Transform stream to finish?

Consider the following scenario. I have two Node Transform streams:
Transform stream 1
function T1(options) {
if (! (this instanceof T1)) {
return new T1(options);
}
Transform.call(this, options);
}
util.inherits(T1, Transform);
T1.prototype._transform = function(chunk, encoding, done) {
console.log("### Transforming in t1");
this.push(chunk);
done();
};
T1.prototype._flush = function(done) {
console.log("### Done in t1");
done();
};
Transform stream 2
function T2(options) {
if (! (this instanceof T2)) {
return new T2(options);
}
Transform.call(this, options);
}
util.inherits(T2, Transform);
T2.prototype._transform = function(chunk, encoding, done) {
console.log("### Transforming in t2");
this.push(chunk);
done();
};
T2.prototype._flush = function(done) {
console.log("### Done in t2");
done();
};
And, I'm wanting to apply these transform streams before returning a response. I have a simple HTTP server, and on each request, I fetch a resource and would like these transformations to be applied to this fetched resource and then send the result of the second transformation to the original response:
var options = require('url').parse('http://localhost:1234/data.json');
options.method = 'GET';
http.createServer(function(req, res) {
var req = http.request(options, function(httpRes) {
var t1 = new T1({});
var t2 = new T2({});
httpRes
.pipe(t1)
.pipe(t2)
.on('finish', function() {
// Do other stuff in here before sending request back
t2.pipe(res, { end : true });
});
});
req.end();
}).listen(3001);
Ultimately, the finish event never gets called, and the request hangs and times out because the response is never resolved. I've noticed that if I just pipe t2 into res, it seems to work fine:
.pipe(t1)
.pipe(t2)
.pipe(res, { end : true });
But, this scenario doesn't seem feasible because I need to do some extra work before returning the response.
This happens because you need to let node know that the stream is being consumed somewhere, otherwise the last stream will just fill up the buffer and considering your data is longer than the highwaterMark option (usually 16) and then halt waiting for the data to be consumed.
There are three ways of consuming a stream in full:
piping to a readable stream (what you did in the second part of your question)
reading consecutive chunks by calling the read method of a stream
listening on "data" events (essentially stream.on("data", someFunc)).
The last option is the quickest, but will result in consuming the stream without looking at memory usage.
I'd also note that using the "finish" event might be a little misleading, since it is called when the last data is read, but not necessarily emitted. On a Transform stream, since it's a readable as well it's much better to use the "end" event.

Resources