TDD/ testing with streams in NodeJS - node.js

I've been trying to find a reasonable way to test code that uses streams. Has anyone found a reasonable way/ framework to help testing code that uses streams in nodejs?
For example:
var fs = require('fs'),
request = require('request');
module.exports = function (url, path, callback) {
request(url)
.pipe(fs.createWriteStream(path))
.on('finish', function () {
callback();
});
};
My current way of testing this type of code either involves simplifying the code with streams so much that I can abstract it out to a non-tested chunk of code or by writing something like this:
var rewire = require('rewire'),
download = rewire('../lib/download'),
stream = require('stream'),
util = require('util');
describe('download', function () {
it('should download a url', function (done) {
var fakeRequest, fakeFs, FakeStream;
FakeStream = function () {
stream.Writable.call(this);
};
util.inherits(FakeStream, stream.Writable);
FakeStream.prototype._write = function (data, encoding, cb) {
expect(data.toString()).toEqual("hello world")
cb();
};
fakeRequest = function (url) {
var output = new stream.Readable();
output.push("hello world");
output.push(null);
expect(url).toEqual('http://hello');
return output;
};
fakeFs = {
createWriteStream: function (path) {
expect(path).toEqual('hello.txt');
return new FakeStream();
}
};
download.__set__('fs', fakeFs);
download.__set__('request', fakeRequest);
download('http://hello', 'hello.txt', function () {
done();
});
});
});
Has anyone come up with more elegant ways of testing streams?

Made streamtest for that purpose. It not only make streams tests cleaner but also allows to test V1 and V2 streams https://www.npmjs.com/package/streamtest

I've also been using memorystream, but then putting my assertions into the finish event. That way it looks more like a real use of the stream being tested:
require('chai').should();
var fs = require('fs');
var path = require('path');
var MemoryStream = require('memorystream');
var memStream = MemoryStream.createWriteStream();
/**
* This is the Transform that we want to test:
*/
var Parser = require('../lib/parser');
var parser = new Parser();
describe('Parser', function(){
it('something', function(done){
fs.createReadStream(path.join(__dirname, 'something.txt'))
.pipe(parser)
.pipe(memStream)
.on('finish', function() {
/**
* Check that our parser has created the right output:
*/
memStream
.toString()
.should.eql('something');
done();
});
});
});
Checking objects can be done like this:
var memStream = MemoryStream.createWriteStream(null, {objectMode: true});
.
.
.
.on('finish', function() {
memStream
.queue[0]
.should.eql({ some: 'thing' });
done();
});
.
.
.

Read the Stream into memory and compare it with the expected Buffer.
it('should output a valid Stream', (done) => {
const stream = getStreamToTest();
const expectedBuffer = Buffer.from(...);
let bytes = new Buffer('');
stream.on('data', (chunk) => {
bytes = Buffer.concat([bytes, chunk]);
});
stream.on('end', () => {
try {
expect(bytes).to.deep.equal(expectedBuffer);
done();
} catch (err) {
done(err);
}
});
});

I feel you pain.
I don't know any framework to help out testing with streams, but if take a look here,
where I'm developing a stream library, you can see how I approach this problem.
here is a idea of what I'm doing.
var chai = require("chai")
, sinon = require("sinon")
, chai.use(require("sinon-chai"))
, expect = chai.expect
, through2 = require('through2')
;
chai.config.showDiff = false
function spy (stream) {
var agent, fn
;
if (spy.free.length === 0) {
agent = sinon.spy();
} else {
agent = spy.free.pop();
agent.reset();
}
spy.used.push(agent);
fn = stream._transform;
stream.spy = agent;
stream._transform = function(c) {
agent(c);
return fn.apply(this, arguments);
};
stream._transform = transform;
return agent;
};
spy.free = [];
spy.used = [];
describe('basic through2 stream', function(){
beforeEach(function(){
this.streamA = through2()
this.StreamB = through2.obj()
// other kind of streams...
spy(this.streamA)
spy(this.StreamB)
})
afterEach(function(){
spy.used.map(function(agent){
spy.free.push(spy.used.pop())
})
})
it("must call transform with the data", function(){
var ctx = this
, dataA = new Buffer('some data')
, dataB = 'some data'
;
this.streamA.pipe(through2(function(chunk, enc, next){
expect(ctx.streamA.spy).to.have.been.calledOnce.and.calledWidth(dataA)
}))
this.streamB.pipe(through2(function(chunk, enc, next){
expect(ctx.streamB.spy).to.have.been.calledOnce.and.calledWidth(dataB)
}))
this.streamA.write(dataA)
this.streamB.write(dataB)
})
})
Note that my spy function wraps the _transform method and call my spy and call the original _transform
Also, The afterEach function is recycling the spies, because you can end up creating hundreds of them.
The problem gets hard is when you want to test async code. Then promises your best friend. The link I gave above have some sample that.

I haven't used this, and it's quite old, but https://github.com/dominictarr/stream-spec might help.

You can test streams using MemoryStream and sinon by using spies. Here is how I tested some of my code.
describe('some spec', function() {
it('some test', function(done) {
var outputStream = new MemoryStream();
var spyCB = sinon.spy();
outputStream.on('data', spyCB);
doSomething(param, param2, outputStream, function() {
sinon.assert.calledWith(spyCB, 'blah');
done();
});
});
});

Best way I have found is to use events
const byline = require('byline');
const fs = require('fs');
it('should process all lines in file', function(done){
//arrange
let lines = 0;
//file with 1000 lines
let reader = fs.readFileStream('./input.txt');
let writer = fs.writeFileStream('./output.txt');
//act
reader.pipe(byline).pipe(writer);
byline.on('line', function() {
lines++;
});
//assert
writer.on('close', function() {
expect(lines).to.equal(1000);
done();
});
});
by passing done as a callback, mocha waits until it is called before moving on.

Related

promise flow - bluebird

I'm trying to use ffmpeg to cut a few seconds of a directory with mp3s.
But my actual problems comes with using promises.
Instead of starting one ffmpeg process after the other it starts up one for each file immediately.
My guess is the promise isn't waiting for the resolve and I didn't understand it properly.
var P = require('bluebird');
var fs = P.promisifyAll(require("fs"));
function transcode(filename) {
return P.delay(1000).then(function() {
console.log(filename);
});
}
var in_dir = "./somedir/";
var getFiles = function(){
return fs.readdirAsync(in_dir);
};
getFiles().mapSeries(function(filename){
transcode(filename);
});
I've created a simplified version of your code. The only missing thing was the return statement for the final closure:
var P = require('bluebird');
var fs = P.promisifyAll(require("fs"));
function transcode(filename) {
return P.delay(1000).then(function() {
console.log(filename);
});
}
var in_dir = "./somedir/";
var getFiles = function(){
return fs.readdirAsync(in_dir);
};
getFiles().mapSeries(function(filename){
return transcode(filename);
});

How to Synchronize the file writes in Node.Js

I am using the EJS compile to create notification templates and I would like to know how to write the file to the file system in parallel and send the notification once all the files are saved.
Please see the below code snippet which I used
var fs = require('fs');
var ejs = require('ejs');
var arrayOfData = [someData]; //Prepare data from database
//Iterate through the data
for (var i = 0; i < arrayOfData.length; i++) {
generateFileFromTemplate(arrayOfData[i],function(){});
}
function generateFileFromTemplate(templateData,callback)
{
var outputFile = fileData.Id + ".html";
var compiled = ejs.compile(fs.readFileSync('email-template.ejs', 'utf8'));
var html = compiled(templateData);
fs.writeFile(outputFile, html, callback);
}
Please help.
Use async.each for your use case
async.each(arrayOfData,
function(ele, next){
generateFileFromTemplate(ele,function(){});
},
function(err){
if(err) console.log('err', err);
sendNotification();
}
);
You can use a great utility library called Async, particularly its parallel method: https://github.com/caolan/async#parallel.
Here's an example:
var async = require('async');
/*-------------*/
var tasks = arrayOfData.map(function(data) {
return function(cb) {
generateFileFromTemplate(data,function(){});
cb(null);
}
});
async.parallel(tasks, function(err) {
console.log('My job is done');
})

Using sinon and mocha to test node.js http.get

Let's say I have the following function
'use strict';
var http = require('http');
var getLikes = function(graphId, callback) {
// request to get the # of likes
var req = http.get('http://graph.facebook.com/' + graphId, function(response) {
var str = '';
// while data is incoming, concatenate it
response.on('data', function (chunk) {
str += chunk;
});
// data is fully recieved, and now parsable
response.on('end', function () {
var likes = JSON.parse(str).likes;
var data = {
_id: 'likes',
value: likes
};
callback(null, data);
});
}).on('error', function(err) {
callback(err, null);
});
};
module.exports = getLikes;
I would like to test it with mocha AND sinon, but I don't get how to stub the http.get.
For now I'm doing a real http.get to facebook, but I would like to avoid it.
Here is my current test:
'use strict';
/*jshint expr: true*/
var should = require('chai').should(),
getLikes = require('getLikes');
describe('getLikes', function() {
it('shoud return likes', function(done) {
getLikes(function(err, likes) {
should.not.exist(err);
likes._id.should.equal('likes');
likes.value.should.exist();
done();
});
});
});
How can I achieve what I want, without relying on something else than sinon? (I don't want to use the request module to perform the get, or using another testing lib)
Thanks!
You should be able to do this with just sinon.stub(http, 'get').yields(fakeStream); but you might be better served by looking at nock and/or rewire. nock would let you fake the facebook response without mucking too much in the getLikes implementation details. rewire would let you swap in a mock http variable into the getLikes scope without monkey patching the http.get function globally.
Do do it with just sinon as above, you'll need to create a mock response that will properly resemble the stream. Something like:
var fakeLikes = {_id: 'likes', value: 'foo'};
var resumer = require('resumer');
var stream = resumer().queue(JSON.stringify(fakeLikes)).end()

Piping multiple file streams using Node.js

I want to stream multiple files, one after each other, to the browser. To illustrate, think of having multiple CSS files which shall be delivered concatenated as one.
The code I am using is:
var directory = path.join(__dirname, 'css');
fs.readdir(directory, function (err, files) {
async.eachSeries(files, function (file, callback) {
if (!endsWith(file, '.css')) { return callback(); } // (1)
var currentFile = path.join(directory, file);
fs.stat(currentFile, function (err, stats) {
if (stats.isDirectory()) { return callback(); } // (2)
var stream = fs.createReadStream(currentFile).on('end', function () {
callback(); // (3)
});
stream.pipe(res, { end: false }); // (4)
});
}, function () {
res.end(); // (5)
});
});
The idea is that I
filter out all files that do not have the file extension .css.
filter out all directories.
proceed with the next file once a file has been read completely.
pipe each file to the response stream without closing it.
end the response stream once all files have been piped.
The problem is that only the first .css file gets piped, and all remaining files are missing. It's as if (3) would directly jump to (5) after the first (4).
The interesting thing is that if I replace line (4) with
stream.on('data', function (data) {
console.log(data.toString('utf8'));
});
everything works as expected: I see multiple files. If I then change this code to
stream.on('data', function (data) {
res.write(data.toString('utf8'));
});
all files expect the first are missing again.
What am I doing wrong?
PS: The error happens using Node.js 0.8.7 as well as using 0.8.22.
UPDATE
Okay, it works if you change the code as follows:
var directory = path.join(__dirname, 'css');
fs.readdir(directory, function (err, files) {
var concatenated = '';
async.eachSeries(files, function (file, callback) {
if (!endsWith(file, '.css')) { return callback(); }
var currentFile = path.join(directory, file);
fs.stat(currentFile, function (err, stats) {
if (stats.isDirectory()) { return callback(); }
var stream = fs.createReadStream(currentFile).on('end', function () {
callback();
}).on('data', function (data) { concatenated += data.toString('utf8'); });
});
}, function () {
res.write(concatenated);
res.end();
});
});
But: Why? Why can't I call res.write multiple times instead of first summing up all the chunks, and then write them all at once?
Consider also using multistream, that allows you to combine and emit multiple streams one after another.
The code was perfectly fine, it was the unit test that was wrong ...
Fixed that, and now it works like a charme :-)
May help someone else:
const fs = require("fs");
const pth = require("path");
let readerStream1 = fs.createReadStream(pth.join(__dirname, "a.txt"));
let readerStream2 = fs.createReadStream(pth.join(__dirname, "b.txt"));
let writerStream = fs.createWriteStream(pth.join(__dirname, "c.txt"));
//only readable streams have "pipe" method
readerStream1.pipe(writerStream);
readerStream2.pipe(writerStream);
I also checked Rocco's answer and its working like a charm:
//npm i --save multistream
const multi = require('multistream');
const fs = require('fs');
const pth = require("path");
let streams = [
fs.createReadStream(pth.join(__dirname, "a.txt")),
fs.createReadStream(pth.join(__dirname, "b.txt"))
];
let writerStream = fs.createWriteStream(pth.join(__dirname, "c.txt"));
//new multi(streams).pipe(process.stdout);
new multi(streams).pipe(writerStream);
and to send the results to client:
const multi = require('multistream');
const fs = require('fs');
const pth = require("path");
const exp = require("express");
const app = exp();
app.listen(3000);
app.get("/stream", (q, r) => {
new multi([
fs.createReadStream(pth.join(__dirname, "a.txt")),
fs.createReadStream(pth.join(__dirname, "b.txt"))
]).pipe(r);
});

Node.js: How to read a stream into a buffer?

I wrote a pretty simple function that downloads an image from a given URL, resize it and upload to S3 (using 'gm' and 'knox'), I have no idea if I'm doing the reading of a stream to a buffer correctly. (everything is working, but is it the correct way?)
also, I want to understand something about the event loop, how do I know that one invocation of the function won't leak anything or change the 'buf' variable to another already running invocation (or this scenario is impossible because the callbacks are anonymous functions?)
var http = require('http');
var https = require('https');
var s3 = require('./s3');
var gm = require('gm');
module.exports.processImageUrl = function(imageUrl, filename, callback) {
var client = http;
if (imageUrl.substr(0, 5) == 'https') { client = https; }
client.get(imageUrl, function(res) {
if (res.statusCode != 200) {
return callback(new Error('HTTP Response code ' + res.statusCode));
}
gm(res)
.geometry(1024, 768, '>')
.stream('jpg', function(err, stdout, stderr) {
if (!err) {
var buf = new Buffer(0);
stdout.on('data', function(d) {
buf = Buffer.concat([buf, d]);
});
stdout.on('end', function() {
var headers = {
'Content-Length': buf.length
, 'Content-Type': 'Image/jpeg'
, 'x-amz-acl': 'public-read'
};
s3.putBuffer(buf, '/img/d/' + filename + '.jpg', headers, function(err, res) {
if(err) {
return callback(err);
} else {
return callback(null, res.client._httpMessage.url);
}
});
});
} else {
callback(err);
}
});
}).on('error', function(err) {
callback(err);
});
};
Overall I don't see anything that would break in your code.
Two suggestions:
The way you are combining Buffer objects is a suboptimal because it has to copy all the pre-existing data on every 'data' event. It would be better to put the chunks in an array and concat them all at the end.
var bufs = [];
stdout.on('data', function(d){ bufs.push(d); });
stdout.on('end', function(){
var buf = Buffer.concat(bufs);
})
For performance, I would look into if the S3 library you are using supports streams. Ideally you wouldn't need to create one large buffer at all, and instead just pass the stdout stream directly to the S3 library.
As for the second part of your question, that isn't possible. When a function is called, it is allocated its own private context, and everything defined inside of that will only be accessible from other items defined inside that function.
Update
Dumping the file to the filesystem would probably mean less memory usage per request, but file IO can be pretty slow so it might not be worth it. I'd say that you shouldn't optimize too much until you can profile and stress-test this function. If the garbage collector is doing its job you may be overoptimizing.
With all that said, there are better ways anyway, so don't use files. Since all you want is the length, you can calculate that without needing to append all of the buffers together, so then you don't need to allocate a new Buffer at all.
var pause_stream = require('pause-stream');
// Your other code.
var bufs = [];
stdout.on('data', function(d){ bufs.push(d); });
stdout.on('end', function(){
var contentLength = bufs.reduce(function(sum, buf){
return sum + buf.length;
}, 0);
// Create a stream that will emit your chunks when resumed.
var stream = pause_stream();
stream.pause();
while (bufs.length) stream.write(bufs.shift());
stream.end();
var headers = {
'Content-Length': contentLength,
// ...
};
s3.putStream(stream, ....);
Javascript snippet
function stream2buffer(stream) {
return new Promise((resolve, reject) => {
const _buf = [];
stream.on("data", (chunk) => _buf.push(chunk));
stream.on("end", () => resolve(Buffer.concat(_buf)));
stream.on("error", (err) => reject(err));
});
}
Typescript snippet
async function stream2buffer(stream: Stream): Promise<Buffer> {
return new Promise < Buffer > ((resolve, reject) => {
const _buf = Array < any > ();
stream.on("data", chunk => _buf.push(chunk));
stream.on("end", () => resolve(Buffer.concat(_buf)));
stream.on("error", err => reject(`error converting stream - ${err}`));
});
}
You can easily do this using node-fetch if you are pulling from http(s) URIs.
From the readme:
fetch('https://assets-cdn.github.com/images/modules/logos_page/Octocat.png')
.then(res => res.buffer())
.then(buffer => console.log)
Note: this solely answers "How to read a stream into a buffer?" and ignores the context of the original question.
ES2018 Answer
Since Node 11.14.0, readable streams support async iterators.
const buffers = [];
// node.js readable streams implement the async iterator protocol
for await (const data of readableStream) {
buffers.push(data);
}
const finalBuffer = Buffer.concat(buffers);
Bonus: In the future, this could get better with the stage 2 Array.fromAsync proposal.
// 🛑 DOES NOT WORK (yet!)
const finalBuffer = Buffer.concat(await Array.fromAsync(readableStream));
You can convert your readable stream to a buffer and integrate it in your code in an asynchronous way like this.
async streamToBuffer (stream) {
return new Promise((resolve, reject) => {
const data = [];
stream.on('data', (chunk) => {
data.push(chunk);
});
stream.on('end', () => {
resolve(Buffer.concat(data))
})
stream.on('error', (err) => {
reject(err)
})
})
}
the usage would be as simple as:
// usage
const myStream // your stream
const buffer = await streamToBuffer(myStream) // this is a buffer
I suggest loganfsmyths method, using an array to hold the data.
var bufs = [];
stdout.on('data', function(d){ bufs.push(d); });
stdout.on('end', function(){
var buf = Buffer.concat(bufs);
}
IN my current working example, i am working with GRIDfs and npm's Jimp.
var bucket = new GridFSBucket(getDBReference(), { bucketName: 'images' } );
var dwnldStream = bucket.openDownloadStream(info[0]._id);// original size
dwnldStream.on('data', function(chunk) {
data.push(chunk);
});
dwnldStream.on('end', function() {
var buff =Buffer.concat(data);
console.log("buffer: ", buff);
jimp.read(buff)
.then(image => {
console.log("read the image!");
IMAGE_SIZES.forEach( (size)=>{
resize(image,size);
});
});
I did some other research
with a string method but that did not work, per haps because i was reading from an image file, but the array method did work.
const DISCLAIMER = "DONT DO THIS";
var data = "";
stdout.on('data', function(d){
bufs+=d;
});
stdout.on('end', function(){
var buf = Buffer.from(bufs);
//// do work with the buffer here
});
When i did the string method i got this error from npm jimp
buffer: <Buffer 00 00 00 00 00>
{ Error: Could not find MIME for Buffer <null>
basically i think the type coersion from binary to string didnt work so well.
I suggest to have array of buffers and concat to resulting buffer only once at the end. Its easy to do manually, or one could use node-buffers
I just want to post my solution. Previous answers was pretty helpful for my research. I use length-stream to get the size of the stream, but the problem here is that the callback is fired near the end of the stream, so i also use stream-cache to cache the stream and pipe it to res object once i know the content-length. In case on an error,
var StreamCache = require('stream-cache');
var lengthStream = require('length-stream');
var _streamFile = function(res , stream , cb){
var cache = new StreamCache();
var lstream = lengthStream(function(length) {
res.header("Content-Length", length);
cache.pipe(res);
});
stream.on('error', function(err){
return cb(err);
});
stream.on('end', function(){
return cb(null , true);
});
return stream.pipe(lstream).pipe(cache);
}
in ts, [].push(bufferPart) is not compatible;
so:
getBufferFromStream(stream: Part | null): Promise<Buffer> {
if (!stream) {
throw 'FILE_STREAM_EMPTY';
}
return new Promise(
(r, j) => {
let buffer = Buffer.from([]);
stream.on('data', buf => {
buffer = Buffer.concat([buffer, buf]);
});
stream.on('end', () => r(buffer));
stream.on('error', j);
}
);
}
You can do this by:
async function toBuffer(stream: ReadableStream<Uint8Array>) {
const list = []
const reader = stream.getReader()
while (true) {
const { value, done } = await reader.read()
if (value)
list.push(value)
if (done)
break
}
return Buffer.concat(list)
}
or using buffer consumer
const buf = buffer(stream)
You can check the "content-length" header at res.headers. It will give you the length of the content you will receive (how many bytes of data it will send)

Resources