I'm hacking on a Node program that uses smtp-protocol to capture SMTP emails and act on the mail data. The library provides the mail data as a stream, and I don't know how to get that into a string.
I'm currently writing it to stdout with stream.pipe(process.stdout, { end: false }), but as I said, I need the stream data in a string instead, which I can use once the stream has ended.
How do I collect all the data from a Node.js stream into a string?
Another way would be to convert the stream to a promise (refer to the example below) and use then (or await) to assign the resolved value to a variable.
function streamToString (stream) {
const chunks = [];
return new Promise((resolve, reject) => {
stream.on('data', (chunk) => chunks.push(Buffer.from(chunk)));
stream.on('error', (err) => reject(err));
stream.on('end', () => resolve(Buffer.concat(chunks).toString('utf8')));
})
}
const result = await streamToString(stream)
What do you think about this ?
async function streamToString(stream) {
// lets have a ReadableStream as a stream variable
const chunks = [];
for await (const chunk of stream) {
chunks.push(Buffer.from(chunk));
}
return Buffer.concat(chunks).toString("utf-8");
}
None of the above worked for me. I needed to use the Buffer object:
const chunks = [];
readStream.on("data", function (chunk) {
chunks.push(chunk);
});
// Send the buffer or you can put it into a var
readStream.on("end", function () {
res.send(Buffer.concat(chunks));
});
Hope this is more useful than the above answer:
var string = '';
stream.on('data',function(data){
string += data.toString();
console.log('stream data ' + part);
});
stream.on('end',function(){
console.log('final output ' + string);
});
Note that string concatenation is not the most efficient way to collect the string parts, but it is used for simplicity (and perhaps your code does not care about efficiency).
Also, this code may produce unpredictable failures for non-ASCII text (it assumes that every character fits in a byte), but perhaps you do not care about that, either.
(This answer is from years ago, when it was the best answer. There is now a better answer below this. I haven't kept up with node.js, and I cannot delete this answer because it is marked "correct on this question". If you are thinking of down clicking, what do you want me to do?)
The key is to use the data and end events of a Readable Stream. Listen to these events:
stream.on('data', (chunk) => { ... });
stream.on('end', () => { ... });
When you receive the data event, add the new chunk of data to a Buffer created to collect the data.
When you receive the end event, convert the completed Buffer into a string, if necessary. Then do what you need to do with it.
I'm using usually this simple function to transform a stream into a string:
function streamToString(stream, cb) {
const chunks = [];
stream.on('data', (chunk) => {
chunks.push(chunk.toString());
});
stream.on('end', () => {
cb(chunks.join(''));
});
}
Usage example:
let stream = fs.createReadStream('./myFile.foo');
streamToString(stream, (data) => {
console.log(data); // data is now my string variable
});
And yet another one for strings using promises:
function getStream(stream) {
return new Promise(resolve => {
const chunks = [];
# Buffer.from is required if chunk is a String, see comments
stream.on("data", chunk => chunks.push(Buffer.from(chunk)));
stream.on("end", () => resolve(Buffer.concat(chunks).toString()));
});
}
Usage:
const stream = fs.createReadStream(__filename);
getStream(stream).then(r=>console.log(r));
remove the .toString() to use with binary Data if required.
update: #AndreiLED correctly pointed out this has problems with strings. I couldn't get a stream returning strings with the version of node I have, but the api notes this is possible.
From the nodejs documentation you should do this - always remember a string without knowing the encoding is just a bunch of bytes:
var readable = getReadableStreamSomehow();
readable.setEncoding('utf8');
readable.on('data', function(chunk) {
assert.equal(typeof chunk, 'string');
console.log('got %d characters of string data', chunk.length);
})
Easy way with the popular (over 5m weekly downloads) and lightweight get-stream library:
https://www.npmjs.com/package/get-stream
const fs = require('fs');
const getStream = require('get-stream');
(async () => {
const stream = fs.createReadStream('unicorn.txt');
console.log(await getStream(stream)); //output is string
})();
Streams don't have a simple .toString() function (which I understand) nor something like a .toStringAsync(cb) function (which I don't understand).
So I created my own helper function:
var streamToString = function(stream, callback) {
var str = '';
stream.on('data', function(chunk) {
str += chunk;
});
stream.on('end', function() {
callback(str);
});
}
// how to use:
streamToString(myStream, function(myStr) {
console.log(myStr);
});
I had more luck using like that :
let string = '';
readstream
.on('data', (buf) => string += buf.toString())
.on('end', () => console.log(string));
I use node v9.11.1 and the readstream is the response from a http.get callback.
The cleanest solution may be to use the "string-stream" package, which converts a stream to a string with a promise.
const streamString = require('stream-string')
streamString(myStream).then(string_variable => {
// myStream was converted to a string, and that string is stored in string_variable
console.log(string_variable)
}).catch(err => {
// myStream emitted an error event (err), so the promise from stream-string was rejected
throw err
})
What about something like a stream reducer ?
Here is an example using ES6 classes how to use one.
var stream = require('stream')
class StreamReducer extends stream.Writable {
constructor(chunkReducer, initialvalue, cb) {
super();
this.reducer = chunkReducer;
this.accumulator = initialvalue;
this.cb = cb;
}
_write(chunk, enc, next) {
this.accumulator = this.reducer(this.accumulator, chunk);
next();
}
end() {
this.cb(null, this.accumulator)
}
}
// just a test stream
class EmitterStream extends stream.Readable {
constructor(chunks) {
super();
this.chunks = chunks;
}
_read() {
this.chunks.forEach(function (chunk) {
this.push(chunk);
}.bind(this));
this.push(null);
}
}
// just transform the strings into buffer as we would get from fs stream or http request stream
(new EmitterStream(
["hello ", "world !"]
.map(function(str) {
return Buffer.from(str, 'utf8');
})
)).pipe(new StreamReducer(
function (acc, v) {
acc.push(v);
return acc;
},
[],
function(err, chunks) {
console.log(Buffer.concat(chunks).toString('utf8'));
})
);
All the answers listed appear to open the Readable Stream in flowing mode which is not the default in NodeJS and can have limitations since it lacks backpressure support that NodeJS provides in Paused Readable Stream Mode.
Here is an implementation using Just Buffers, Native Stream and Native Stream Transforms and support for Object Mode
import {Transform} from 'stream';
let buffer =null;
function objectifyStream() {
return new Transform({
objectMode: true,
transform: function(chunk, encoding, next) {
if (!buffer) {
buffer = Buffer.from([...chunk]);
} else {
buffer = Buffer.from([...buffer, ...chunk]);
}
next(null, buffer);
}
});
}
process.stdin.pipe(objectifyStream()).process.stdout
This worked for me and is based on Node v6.7.0 docs:
let output = '';
stream.on('readable', function() {
let read = stream.read();
if (read !== null) {
// New stream data is available
output += read.toString();
} else {
// Stream is now finished when read is null.
// You can callback here e.g.:
callback(null, output);
}
});
stream.on('error', function(err) {
callback(err, null);
})
Using the quite popular stream-buffers package which you probably already have in your project dependencies, this is pretty straightforward:
// imports
const { WritableStreamBuffer } = require('stream-buffers');
const { promisify } = require('util');
const { createReadStream } = require('fs');
const pipeline = promisify(require('stream').pipeline);
// sample stream
let stream = createReadStream('/etc/hosts');
// pipeline the stream into a buffer, and print the contents when done
let buf = new WritableStreamBuffer();
pipeline(stream, buf).then(() => console.log(buf.getContents().toString()));
setEncoding('utf8');
Well done Sebastian J above.
I had the "buffer problem" with a few lines of test code I had, and added the encoding information and it solved it, see below.
Demonstrate the problem
software
// process.stdin.setEncoding('utf8');
process.stdin.on('data', (data) => {
console.log(typeof(data), data);
});
input
hello world
output
object <Buffer 68 65 6c 6c 6f 20 77 6f 72 6c 64 0d 0a>
Demonstrate the solution
software
process.stdin.setEncoding('utf8'); // <- Activate!
process.stdin.on('data', (data) => {
console.log(typeof(data), data);
});
input
hello world
output
string hello world
In my case, the content type response headers was Content-Type: text/plain. So, I've read the data from Buffer like:
let data = [];
stream.on('data', (chunk) => {
console.log(Buffer.from(chunk).toString())
data.push(Buffer.from(chunk).toString())
});
I've code similar to the following stripped down example
const req = request('http://www.my.url.here.com/file.bin') // 80 MB file
const decripher = .... // decipher from nodejs's crypto module
const output = fs.createWriteStream('result.zip');
const archive = archiver('zip', {zlib: { level: 9 } });
archive.pipe(output);
const stream = req
.pipe(decipher)
.on('error', (error) => {
console.error(`Error deciphering file`)
req.abort() // Does nothing
decipher.unpipe() // Unpiping to prevent the next step producing a [ERR_STREAM_WRITE_AFTER_END] error
stream.end() // End the stream as an error does not end it automatically
})
archive.append(stream, { name: 'file.bin' });
Once an error occurs deciphering the file I don't want to download any more data. But I've noticed that in these scenarios a req.abort() does nothing.
In the end I have a file partially decrypted in the archive but it's still ~80 MBs. i.e. The entire file has been downloaded despite the error (which I setup to fire near the start of the file).
Why would this occur? How can I prevent the entire file from downloading?
You can destroy the underlying socket. You can get the socket in socket or response event.
const req = request(options);
req.on('response', function(response) {
....
res.socket.end(); // or res.socket.destroy();
....
});
req.pipe(...);
Maybe in your case, modify a bit, this is basically a theory but you can do:
const req = request(options);
let sock = null;
req.on('socket', function(socket) {
sock = socket;
}).on('error', ()=>{
sock.destroy()//or end();
});
req.pipe(...);
I use this piece of code to download torrent:
var torrentStream = require('torrent-stream');
var engine = torrentStream('magnet:?xt=urn:btih:44A91362AFFF802F9058993B109C544ACC6B4813');
engine.on('ready', function(e) {
engine.files.forEach(function(file) {
console.log('filename:', file.name);
var stream = file.createReadStream();
// stream is readable stream to containing the file content
});
});
This torrent is correctly downloaded by utorrent, but it doesn't work in nodejs (nothing happens). Any ideas why? May be p2p network was not bootstrapped? How can I do this?
Thanx
Of corse there happens nothing, because you dont do anything with the stream in the example.
If you want to save it to a file you can create a write stream:
var writeStream = fs.createWriteStream('file2.txt')
stream.pipe(writeStream)
or you can use the stream with events:
var data = ''
stream.on('data', chunk => {
data += chunk
})
stream.on('end', () => {
console.log(data)
})
After i emit error event in MyWritableStream, data transmission stops. What i need to do to resume data transfer?
var readable = fs.createReadStream('test.txt');
var writable = new MyWritableStream();
writable.on('error', function(error) {
console.log('error', error);
// How i can resume?
});
writable.on('finish', function(){
console.log('finished');
})
readable.pipe(writable);
I know this question is old, but you might wanna check out https://github.com/miraclx/xresilient
I built this for this exact same reason (works best with seekable streams).
You define a function that returns a readable stream, the library measures the number of bytes that have passed through until an error is met.
Once the readable stream encounters an error event, it recalls the defined function with the number of bytes read so you can index the stream source.
Example:
const fs = require('fs');
const xresilient = require('xresilient');
const readable = xresilient(({bytesRead}) => {
return generateSeekableStreamSomehow({start: bytesRead});
}, {retries: 5});
const writable = fs.createWriteStream('file.test');
readable.pipe(writable);
File streams are indexable with the start option of the fs.createReadStream() function.
HTTP Requests are indexable with the Range HTTP Header.
Check it out.
https://www.npmjs.com/package/xresilient
I am not sure, if it is a normal practice, but i can't see another solution for now & it works for me. If you can advise more accurate solution, please do it.
We can track readable stream instance using pipe event in writeable one:
function WriteableStream(options) {
Writable.call(this, options);
this.source = null;
var instance = this;
this.on('pipe', function(source){
instance.source = source;
});
}
util.inherits(WriteableStream, Writable);
So, when we emit error event, and readable stream is unpiped automatically, we can re-pipe it ourself:
WriteableStream.prototype._write = function(chunk, encoding, done) {
this.emit('error', new Error('test')); // unpipes readable
done();
};
WriteableStream.prototype.resume = function() {
this.source.pipe(this); // re-pipes readable
}
Finally, we will use it the following way:
var readable = fs.createReadStream(file);
var writeable = new WriteableStream();
writeable.on('error', function(error) {
console.log('error', error);
writeable.resume();
});
readable.pipe(writeable);
The following line will download an image file from a specified url variable:
var filename = path.join(__dirname, url.replace(/^.*[\\\/]/, ''));
request(url).pipe(fs.createWriteStream(filename));
And these lines will take that image and save to MongoDB GridFS:
var gfs = Grid(mongoose.connection.db, mongoose.mongo);
var writestream = gfs.createWriteStream({ filename: filename });
fs.createReadStream(filename).pipe(writestream);
Chaining pipe like this throws Error: 500 Cannot Pipe. Not Pipeable.
request(url).pipe(fs.createWriteStream(filename)).pipe(writestream);
This happens because the image file is not ready to be read yet, right? What should I do to get around this problem?Error: 500 Cannot Pipe. Not Pipeable.
Using the following: Node.js 0.10.10, mongoose, request and gridfs-stream libraries.
request(url).pipe(fs.createWriteStream(filename)).pipe(writestream);
is the same as this:
var fileStream = fs.createWriteStream(filename);
request(url).pipe(fileStream);
fileStream.pipe(writestream);
So the issue is that you are attempting to .pipe one WriteStream into another WriteStream.
// create 'fs' module variable
var fs = require("fs");
// open the streams
var readerStream = fs.createReadStream('inputfile.txt');
var writerStream = fs.createWriteStream('outputfile.txt');
// pipe the read and write operations
// read input file and write data to output file
readerStream.pipe(writerStream);
I think the confusion in chaining the pipes is caused by the fact that the pipe method implicitly "makes choices" on it's own on what to return. That is:
readableStream.pipe(writableStream) // Returns writable stream
readableStream.pipe(duplexStream) // Returns readable stream
But the general rule says that "You can only pipe a Writable Stream to a Readable Stream." In other words only Readable Streams have the pipe() method.
You cannot chain the ReadStream to the WriteStream because the latter is not duplex, therefore you would do - for a gzipped archive
request.get(url, {
gzip: true,
encoding: null
})
.pipe(fs.createWriteStream(tmpPath))
.on('close', function() {
console.info("downloaded %s", tmpPath);
fs.createReadStream(tmpPath)
.pipe(gunzip)
.pipe(fs.createWriteStream(destPath))
.on('close', function() {
console.info("unarchived %s", destPath);
})
.on('error', (error) => {
console.warn("gzip error:%#", error);
})
})
.on('error', (error) => {
console.warn("download error:%#", error);
})