I create my own readstream. But I want to know when the _read() be called? If I don't add on('data') listerner, the _read() will not be called. Why?
var data = [{"id":0,"name":"object 0","value":3}],
Readable = require('stream').Readable,
util = require('util');
var ReadStream = function() {
Readable.call(this, {objectMode: true});
this.data = data;
this.curIndex = 0;
};
util.inherits(ReadStream, Readable);
ReadStream.prototype._read = function() {
if (this.curIndex === this.data.length)
return this.push(null);
var data = this.data[this.curIndex++];
//console.log('read: ' + JSON.stringify(data));
this.push(data);
};
var stream = new ReadStream();
stream.on('data', function(record) {
console.log('received: ' + JSON.stringify(record));
});
stream.on('end', function() {
console.log('done111');
});
If I don't add on('data') listerner, the _read() will not be called. Why?
The stream is paused. Assuming you are using a recent version of node.
https://nodejs.org/api/stream.html#stream_two_modes
All Readable streams begin in paused mode but can be switched to flowing mode in one of the following ways:
Adding a 'data' event handler.
Calling the stream.resume() method.
Calling the stream.pipe() method to send the data to a Writable.
BTW, to create a readable, check noms or mississippi.from
Related
How can I stream a response using an in memory DB?
I'm using Loki JS as an in memory DB. There is a particular resource where I must return the entire contents of a table (cannot be paginated) and that table can grow to 500,000 items or so, which is about 300mb.
In other cases, I have used fs.createReadStream to get a file and stream it back to the user:
fs.createReadStream('zips.json')
.on('data', function() {
res.write(...)
})
.on('end', function() {
res.end();
})
This has worked great for large files, but how can I do something equivalent using an in memory DB?
const items = lokiDb.addCollection('items');
items.insert('a bunch of items ...');
// I would now like to stream items via res.write
res.write(items)
Currently, res.write(items) will cause memory problems as Node is trying to return the entire response at once.
As far as I can tell, there is no native stream provider in Loki, though I may have missed it. What you may want to do instead is listen to the 'insert' event on the collection and write that, like so:
const items = lokiDb.addCollection('items');
items.on('insert', (results) => {
res.write(results);
});
items.insert('a bunch of items ...');
If I'm correct, basically your problem is that readStreams only read from files, and that you want to read from an in-memory data structure. A solution might be to define your own readStream class, slightly modifying the prototype stream.Readable._read method:
var util = require('util');
var stream = require('stream');
"use strict";
var begin=0, end=0;
var options = {
highWaterMark: 16384,
encoding: null,
objectMode: false
};
util.inherits(InMemoryStream, stream.Readable);
function InMemoryStream(userDefinedOptions, resource){
if (userDefinedOptions){
for (var key in userDefinedOptions){
options.key = userDefinedOptions[key];
}
}
this.resource = resource;
stream.Readable.call(this, options);
}
InMemoryStream.prototype._read = function(size){
end += size;
this.push(this.resource.slice(begin, end));
begin += size;
}
exports.InMemoryStream = InMemoryStream;
exports.readStream = function(UserDefinedOptions, resource){
return new InMemoryStream(UserDefinedOptions, resource);
}
You convert your in-memory datastructure (in the following example an array) to a readStream, and pipe this through to a writeStream, as follows:
"use strict";
var fs = require('fs');
var InMemoryStream = require('/home/regular/javascript/poc/inmemorystream.js');
var stored=[], writestream, config={};
config = {
encoding: null,
fileToRead: 'raphael.js',
fileToWrite: 'secondraphael.js'
}
fs.readFile(config.fileToRead, function(err, data){
if (err) return console.log('Error when opening file', err);
stored = data;
var inMemoryStream = InMemoryStream.readStream({encoding: config.encoding}, stored);
writestream = fs.createWriteStream(config.fileToWrite);
inMemoryStream.pipe(writestream);
inMemoryStream.on('error', function(err){
console.log('in memory stream error', err);
});
});
After i emit error event in MyWritableStream, data transmission stops. What i need to do to resume data transfer?
var readable = fs.createReadStream('test.txt');
var writable = new MyWritableStream();
writable.on('error', function(error) {
console.log('error', error);
// How i can resume?
});
writable.on('finish', function(){
console.log('finished');
})
readable.pipe(writable);
I know this question is old, but you might wanna check out https://github.com/miraclx/xresilient
I built this for this exact same reason (works best with seekable streams).
You define a function that returns a readable stream, the library measures the number of bytes that have passed through until an error is met.
Once the readable stream encounters an error event, it recalls the defined function with the number of bytes read so you can index the stream source.
Example:
const fs = require('fs');
const xresilient = require('xresilient');
const readable = xresilient(({bytesRead}) => {
return generateSeekableStreamSomehow({start: bytesRead});
}, {retries: 5});
const writable = fs.createWriteStream('file.test');
readable.pipe(writable);
File streams are indexable with the start option of the fs.createReadStream() function.
HTTP Requests are indexable with the Range HTTP Header.
Check it out.
https://www.npmjs.com/package/xresilient
I am not sure, if it is a normal practice, but i can't see another solution for now & it works for me. If you can advise more accurate solution, please do it.
We can track readable stream instance using pipe event in writeable one:
function WriteableStream(options) {
Writable.call(this, options);
this.source = null;
var instance = this;
this.on('pipe', function(source){
instance.source = source;
});
}
util.inherits(WriteableStream, Writable);
So, when we emit error event, and readable stream is unpiped automatically, we can re-pipe it ourself:
WriteableStream.prototype._write = function(chunk, encoding, done) {
this.emit('error', new Error('test')); // unpipes readable
done();
};
WriteableStream.prototype.resume = function() {
this.source.pipe(this); // re-pipes readable
}
Finally, we will use it the following way:
var readable = fs.createReadStream(file);
var writeable = new WriteableStream();
writeable.on('error', function(error) {
console.log('error', error);
writeable.resume();
});
readable.pipe(writeable);
I am trying to register event listener at the end of the data in pipe transformation. I was
trying register the event to all streams in a pipe:
a) my custom transform stream (StreamToBuffer)
b) standard file read stream
c) standard gunzip stream.
But unfortunately, none of them works (see code below). As far as I
try, only 'data' event works, but it does not help.
What I need is to continue with processing of the tailBuffer in StreamToBuffer class after the transformation is finished.
Can you suggest how to achive this?
The code (simplified for brevity):
function samplePipe() {
var streamToBuffer = new StreamToBuffer();
var readStream = fs.createReadStream(bgzFile1, { flags: 'r',
encoding: null,
fd: null,
mode: '0666',
autoClose: true
});
var gunzipTransform = zlib.createGunzip();
readStream.on('end', function() {
//not fired
console.log('end event readStream');
});
streamToBuffer.on('end', function() {
//not fired
console.log('end event streamBuffer');
});
gunzipTransform.on('end', function() {
//not fired
console.log('end event gunzipTransform');
});
readStream
.pipe(gunzipTransform)
.pipe(streamToBuffer)
;
}
StreamToBuffer:
function StreamToBuffer() {
stream.Transform.call(this);
this.tailBuffer = new Buffer(0);
}
util.inherits(StreamToBuffer, stream.Transform);
StreamToBuffer.prototype._transform = function(chunk, encoding, callback) {
this.tailBuffer = Buffer.concat([this.tailBuffer, chunk]);
console.log('streamToBuffer');
}
StreamToBuffer.prototype._flush = function(callback) {
callback();
}
module.exports = StreamToBuffer;
EDITED:
After playing a little with passing callback function to StreamToBuffer constructor, I have discovered the mistake - missing callback(); in _transform() method. After adding it, the event 'end' listener works, at least on standard read stream.
StreamToBuffer.prototype._transform = function(chunk, encoding, callback) {
this.tailBuffer = Buffer.concat([this.tailBuffer, chunk]);
console.log('streamToBuffer');
callback();
}
Another way is to pass callback function to StreamToBuffer constructor and then call it in _flush method. This has the advantage that we can be sure that the transformation is completed.
function samplePipe() {
var streamToBuffer = new StreamToBuffer(processBuffer);
.....
}
function processBuffer(buffer) {
console.log('processBuffer');
}
StreamToBuffer:
function StreamToBuffer(callback) {
stream.Transform.call(this);
this.tailBuffer = new Buffer(0);
this.finishCallback = callback;
}
util.inherits(StreamToBuffer, stream.Transform);
StreamToBuffer.prototype._transform = function(chunk, encoding, callback) {
this.tailBuffer = Buffer.concat([this.tailBuffer, chunk]);
console.log('streamToBuffer');
callback();
}
StreamToBuffer.prototype._flush = function(callback) {
console.log('flushed');
callback();
this.finishCallback(this.tailBuffer);
}
module.exports = StreamToBuffer;
ALthough I did not receive any answer yet (thanks for other comments, anyway), I think this question can be useful for the people like me, who are learning node. If you know better solution, pls answer. Thank you.
For writable stream, try finish event instead of end event:
streamToBuffer.on('finish', function() {
// finish event fired
});
var stream = require('stream');
var util = require('util');
function SourceReader(source){
stream.Readable.call(this);
this._buffer = '';
this._source = source;
source.on('readable', function(){
console.log('got here');
this.read();
}.bind(this));
}
util.inherits(SourceReader, stream.Readable);
SourceReader.prototype._read = function(){
var chunk = this._source.read();
console.log('chunk:'+ chunk);
if(chunk !== null){
this.push(chunk);
}
else
this.push(null);
};
var fs = require('fs');
var fileStream = fs.createReadStream('my.txt'); //some data
var sourceReader = new SourceReader(fileStream);
The above code works well as expected and outputs
got here
chunk:some data
chunk:null
but when I added a listener to the event 'readable', no data gets read.
sourceReader.on('readable', function(){
// it doesn't even get here
var data = sourceReader.read();
console.log(data);
});
outputs:
chunk:null
got here
It seems like the SourceReader._read gets called before the source's readable event got fired and presumably reads nothing. But then source readable event fires and calls SourceReader.read() but this time doesn't go into SourceReader._read and finishes.
Why is it so? So how do I handle readable events if the reader has a source that also fires readable event?
Thanks for answering.
How to convert stream into buffer in nodejs? Here is my code to parse a file in post request in express.
app.post('/upload', express.multipart({
defer: true
}), function(req, res) {
req.form.on('part', function(part) {
//Here I want to convert the streaming part into a buffer.
//do something buffer-specific task
var out = fs.createWriteStream('image/' + part.filename);
part.pipe(out);
});
req.form.on('close', function() {
res.send('uploaded!');
});
});
Instead of piping, you can attach readable and end event handlers to the part stream to read it:
var buffers = [];
part.on('readable', function(buffer) {
for (;;) {
let buffer = part.read();
if (!buffer) { break; }
buffers.push(buffer);
}
});
part.on('end', function() {
var buffer = Buffer.concat(buffers);
...do your stuff...
// write to file:
fs.writeFile('image/' + part.filename, buffer, function(err) {
// handle error, return response, etc...
});
});
Note: If you instead use data, it will read the entire upload into memory.
You could also create a custom transform stream to transform the incoming data, but that might not be trivial.
You can use the stream-to module, which can convert a readable stream's data into an array or a buffer:
var streamTo = require('stream-to');
req.form.on('part', function (part) {
streamTo.buffer(part, function (err, buffer) {
// Insert your business logic here
});
});
If you want a better understanding of what's happening behind the scenes, you can implement the logic yourself, using a Writable stream. As a writable stream implementor, you only have to define one function: the _write method, that will be called every time some data is written to the stream. When the input stream is finished emitting data, the end event will be emitted: we'll then create a buffer using the Buffer.concat method.
var stream = require('stream');
var converter = new stream.Writable();
// We'll store all the data inside this array
converter.data = [];
converter._write = function (chunk) {
converter.data.push(chunk);
};
// Will be emitted when the input stream has ended,
// i.e. no more data will be provided
converter.on('finish', function() {
// Create a buffer from all the received chunks
var b = Buffer.concat(this.data);
// Insert your business logic here
});