Piping multiple file streams using Node.js - node.js

I want to stream multiple files, one after each other, to the browser. To illustrate, think of having multiple CSS files which shall be delivered concatenated as one.
The code I am using is:
var directory = path.join(__dirname, 'css');
fs.readdir(directory, function (err, files) {
async.eachSeries(files, function (file, callback) {
if (!endsWith(file, '.css')) { return callback(); } // (1)
var currentFile = path.join(directory, file);
fs.stat(currentFile, function (err, stats) {
if (stats.isDirectory()) { return callback(); } // (2)
var stream = fs.createReadStream(currentFile).on('end', function () {
callback(); // (3)
});
stream.pipe(res, { end: false }); // (4)
});
}, function () {
res.end(); // (5)
});
});
The idea is that I
filter out all files that do not have the file extension .css.
filter out all directories.
proceed with the next file once a file has been read completely.
pipe each file to the response stream without closing it.
end the response stream once all files have been piped.
The problem is that only the first .css file gets piped, and all remaining files are missing. It's as if (3) would directly jump to (5) after the first (4).
The interesting thing is that if I replace line (4) with
stream.on('data', function (data) {
console.log(data.toString('utf8'));
});
everything works as expected: I see multiple files. If I then change this code to
stream.on('data', function (data) {
res.write(data.toString('utf8'));
});
all files expect the first are missing again.
What am I doing wrong?
PS: The error happens using Node.js 0.8.7 as well as using 0.8.22.
UPDATE
Okay, it works if you change the code as follows:
var directory = path.join(__dirname, 'css');
fs.readdir(directory, function (err, files) {
var concatenated = '';
async.eachSeries(files, function (file, callback) {
if (!endsWith(file, '.css')) { return callback(); }
var currentFile = path.join(directory, file);
fs.stat(currentFile, function (err, stats) {
if (stats.isDirectory()) { return callback(); }
var stream = fs.createReadStream(currentFile).on('end', function () {
callback();
}).on('data', function (data) { concatenated += data.toString('utf8'); });
});
}, function () {
res.write(concatenated);
res.end();
});
});
But: Why? Why can't I call res.write multiple times instead of first summing up all the chunks, and then write them all at once?

Consider also using multistream, that allows you to combine and emit multiple streams one after another.

The code was perfectly fine, it was the unit test that was wrong ...
Fixed that, and now it works like a charme :-)

May help someone else:
const fs = require("fs");
const pth = require("path");
let readerStream1 = fs.createReadStream(pth.join(__dirname, "a.txt"));
let readerStream2 = fs.createReadStream(pth.join(__dirname, "b.txt"));
let writerStream = fs.createWriteStream(pth.join(__dirname, "c.txt"));
//only readable streams have "pipe" method
readerStream1.pipe(writerStream);
readerStream2.pipe(writerStream);
I also checked Rocco's answer and its working like a charm:
//npm i --save multistream
const multi = require('multistream');
const fs = require('fs');
const pth = require("path");
let streams = [
fs.createReadStream(pth.join(__dirname, "a.txt")),
fs.createReadStream(pth.join(__dirname, "b.txt"))
];
let writerStream = fs.createWriteStream(pth.join(__dirname, "c.txt"));
//new multi(streams).pipe(process.stdout);
new multi(streams).pipe(writerStream);
and to send the results to client:
const multi = require('multistream');
const fs = require('fs');
const pth = require("path");
const exp = require("express");
const app = exp();
app.listen(3000);
app.get("/stream", (q, r) => {
new multi([
fs.createReadStream(pth.join(__dirname, "a.txt")),
fs.createReadStream(pth.join(__dirname, "b.txt"))
]).pipe(r);
});

Related

Does csv-parse allow you to read from file?

I'm learning how to use the csv-parse module for nodejs. I wrote this code and it works perfectly:
var fs = require('fs');
const fileName = './spreadsheet.csv';
const assert = require('assert');
const { parse } = require('csv-parse');
const records = [];
// Initialize the parser
const parser = parse({
delimiter: ','
});
// Use the readable stream api to consume records
parser.on('readable', function(){
let record;
while ((record = parser.read()) !== null) {
records.push(record);
}
});
// Catch any error
parser.on('error', function(err){
console.error(err.message);
});
fs.readFile(fileName, 'utf8', function (err, f) {
if (err) {
return console.error(err);
}
const rows = f.split("\r\n");
for(let x in rows) {
parser.write(rows[x]+"\n");
}
parser.end();
console.log(records);
});
But right now, I depend on the fs module and fs.readFile to consume my csv file. Does the csv-parse have an option to read ffrom file? I ask because as you can see in my code, I ahve to specify my own line-break characters, which could differ between csv files. I thought maybe the csv-parse module would have something that can more readily address such a situation?
The parser object will do most of the work for you. It is expecting the data to arrive on its stream interface and it will do everything else. All you have to do is open a stream and the pipe it to the parser like this:
fs.createReadStream(fileName).pipe(parser);
And, here it is combined with your code:
const fs = require('fs');
const fileName = './spreadsheet.csv';
const { parse } = require('csv-parse');
const records = [];
// Initialize the parser
const parser = parse({
delimiter: ','
});
// Use the readable stream api to consume records
parser.on('readable', function(){
let record;
while ((record = parser.read()) !== null) {
records.push(record);
}
});
// Catch any error
parser.on('error', function(err){
console.error(err.message);
});
parser.on('end', function() {
console.log(records);
});
// open the file and pipe it into the parser
fs.createReadStream(fileName).pipe(parser);
P.S. It's amazing that such a simple example of getting the CSV data from a file is not shown in the documentation (at least not anywhere I could find it). I'm also surprised, they don't offer an option where they will automatically read the data from the stream, instead requiring you to implement the readable event handler. Odd, for such an otherwise complete package.

How do I save a file to my nodejs server from web service call

My issue is this:
I have made a call to someones web service. I get back the file name, extension and the "bytes". Bytes actually come in as an array and at position 0 "Bytes[0]" is the following string:
JVBERi0xLjYKJeLjz9MKMSAwIG9iago8PC9EZWNvZGVQYXJtczw8L0sgLTEvQ29sdW1ucyAyNTUwL1Jvd3MgMzMwMD4+L1R5cGUvWE9iamVjdC9CaXRzUGVyQ29tcG9uZW50IDEvU3VidHlwZS9JbWFnZS9XaWR0aCAyNTUwL0NvbG9yU3BhY2UvRGV2aWNlR3JheS9GaWx0ZXIvQ0NJVFRGYXhEZWNvZGUvTGVuZ3RoIDI4Mzc0L0hlaWdodCAzMzAwPj5zdHJlYW0K////////y2IZ+M8+zOPM/HzLhzkT1NAjCCoEY0CMJNAjCR4c8HigRhBAi1iZ0eGth61tHhraTFbraRaYgQ8zMFyGyGM8ZQZDI8MjMI8M6enp9W6enp+sadIMIIEYwy/ggU0wwgwjWzSBUmwWOt/rY63fraTVNu6C7R7pN6+v///20v6I70vdBaPjptK8HUQfX9/17D/TMet+l06T//0v3/S9v+r98V0nH///7Ff+Ed3/v16X9XX/S/KP0vSb//W88ksdW18lzBEJVpPXT0k9b71///...
The string example above has been cut off for readability.
How do I take that string and save it as a readable file?
This case it's a pdf.
let pdfBytes = '{String shown above in example}'
You can use the Node.js File System Module to save the received buffer.
Assuming the encoding of your data is base64:
const fs = require('fs');
let pdfBytes = 'JVBERi0xLjYKJeLjz9...'
let writeStream = fs.createWriteStream('filename.pdf');
writeStream.write(pdfBytes, 'base64');
writeStream.on('finish', () => {
console.log('saved');
});
writeStream.end();
I am using the fs file system here to create and save the file. I use a lot of try catch in case anything goes wrong. This example shows how you could pass the data to a function that could then create the file for you.
const util = require('util');
const fs = require('fs');
const fsOpen = util.promisify(fs.open);
const fsWriteFile = util.promisify(fs.writeFile);
const fsClose = util.promisify(fs.close);
function saveNewFile(path, data) {
return new Promise((async (resolve, reject) => {
let fileToCreate;
// Open the file for writing
try {
fileToCreate = await fsOpen(path, 'wx');
} catch (err) {
reject('Could not create new file, it may already exist');
return;
}
// Write the new data to the file
try {
await fsWriteFile(fileToCreate, data);
} catch (err) {
reject('Error writing to new file');
return;
}
// Close the file
try {
await fsClose(fileToCreate);
} catch (err) {
reject('Error closing new file');
return;
}
resolve('File created');
}));
};
// Data we want to use to create the file.
let pdfBytes = 'JVBERi0xLjYKJeLj...'
saveNewFile('./filename.pdf', pdfBytes);

Deleting file in node.js not working

I am using Node.js with Express.
I am trying to delete a file after sending it to client with express js.
function deleteFile (file) {
fs.unlink(file, function (err) {
if (err) {
logger.error(err);
}
});
}
app.get("/deleteFileAfterDownload", function (req, res){
var fileName = "a.pdf"
var stream = fs.createReadStream(fileName);
var streamClosed = false;
req.on('end',function(){
if (!streamClosed){
stream.emit('close');
// I tried stream.destroy() but that is also not working
}
});
stream.on('close', function () {
streamClosed = true;
deleteFile(fileName);
});
req.on('data', function(){});
stream.pipe(res);
});
But the file is not getting deleted. it seems the process is still using file because just after I end the process, the file is getting deleted.
Can anybody tell me why?
If I am doing it wrong, please tell me a good way.
Please add a log in deleteFile, makesure it is called.
Try simplify it:
var fileName = "a.pdf"
var stream = fs.createReadStream(fileName);
stream.pipe(res);
res.once("finish", function () {
deleteFile(fileName);
});
The previous example only delete file if download finished,
if you want delete file unconditionly, try the following:
var fileName = "a.pdf";
var stream = fs.createReadStream(fileName);
stream.pipe(res).once("close", function () {
stream.close();
deleteFile(fileName);
});
stream.close() is important here, because stream not close if pipe aborted.

TDD/ testing with streams in NodeJS

I've been trying to find a reasonable way to test code that uses streams. Has anyone found a reasonable way/ framework to help testing code that uses streams in nodejs?
For example:
var fs = require('fs'),
request = require('request');
module.exports = function (url, path, callback) {
request(url)
.pipe(fs.createWriteStream(path))
.on('finish', function () {
callback();
});
};
My current way of testing this type of code either involves simplifying the code with streams so much that I can abstract it out to a non-tested chunk of code or by writing something like this:
var rewire = require('rewire'),
download = rewire('../lib/download'),
stream = require('stream'),
util = require('util');
describe('download', function () {
it('should download a url', function (done) {
var fakeRequest, fakeFs, FakeStream;
FakeStream = function () {
stream.Writable.call(this);
};
util.inherits(FakeStream, stream.Writable);
FakeStream.prototype._write = function (data, encoding, cb) {
expect(data.toString()).toEqual("hello world")
cb();
};
fakeRequest = function (url) {
var output = new stream.Readable();
output.push("hello world");
output.push(null);
expect(url).toEqual('http://hello');
return output;
};
fakeFs = {
createWriteStream: function (path) {
expect(path).toEqual('hello.txt');
return new FakeStream();
}
};
download.__set__('fs', fakeFs);
download.__set__('request', fakeRequest);
download('http://hello', 'hello.txt', function () {
done();
});
});
});
Has anyone come up with more elegant ways of testing streams?
Made streamtest for that purpose. It not only make streams tests cleaner but also allows to test V1 and V2 streams https://www.npmjs.com/package/streamtest
I've also been using memorystream, but then putting my assertions into the finish event. That way it looks more like a real use of the stream being tested:
require('chai').should();
var fs = require('fs');
var path = require('path');
var MemoryStream = require('memorystream');
var memStream = MemoryStream.createWriteStream();
/**
* This is the Transform that we want to test:
*/
var Parser = require('../lib/parser');
var parser = new Parser();
describe('Parser', function(){
it('something', function(done){
fs.createReadStream(path.join(__dirname, 'something.txt'))
.pipe(parser)
.pipe(memStream)
.on('finish', function() {
/**
* Check that our parser has created the right output:
*/
memStream
.toString()
.should.eql('something');
done();
});
});
});
Checking objects can be done like this:
var memStream = MemoryStream.createWriteStream(null, {objectMode: true});
.
.
.
.on('finish', function() {
memStream
.queue[0]
.should.eql({ some: 'thing' });
done();
});
.
.
.
Read the Stream into memory and compare it with the expected Buffer.
it('should output a valid Stream', (done) => {
const stream = getStreamToTest();
const expectedBuffer = Buffer.from(...);
let bytes = new Buffer('');
stream.on('data', (chunk) => {
bytes = Buffer.concat([bytes, chunk]);
});
stream.on('end', () => {
try {
expect(bytes).to.deep.equal(expectedBuffer);
done();
} catch (err) {
done(err);
}
});
});
I feel you pain.
I don't know any framework to help out testing with streams, but if take a look here,
where I'm developing a stream library, you can see how I approach this problem.
here is a idea of what I'm doing.
var chai = require("chai")
, sinon = require("sinon")
, chai.use(require("sinon-chai"))
, expect = chai.expect
, through2 = require('through2')
;
chai.config.showDiff = false
function spy (stream) {
var agent, fn
;
if (spy.free.length === 0) {
agent = sinon.spy();
} else {
agent = spy.free.pop();
agent.reset();
}
spy.used.push(agent);
fn = stream._transform;
stream.spy = agent;
stream._transform = function(c) {
agent(c);
return fn.apply(this, arguments);
};
stream._transform = transform;
return agent;
};
spy.free = [];
spy.used = [];
describe('basic through2 stream', function(){
beforeEach(function(){
this.streamA = through2()
this.StreamB = through2.obj()
// other kind of streams...
spy(this.streamA)
spy(this.StreamB)
})
afterEach(function(){
spy.used.map(function(agent){
spy.free.push(spy.used.pop())
})
})
it("must call transform with the data", function(){
var ctx = this
, dataA = new Buffer('some data')
, dataB = 'some data'
;
this.streamA.pipe(through2(function(chunk, enc, next){
expect(ctx.streamA.spy).to.have.been.calledOnce.and.calledWidth(dataA)
}))
this.streamB.pipe(through2(function(chunk, enc, next){
expect(ctx.streamB.spy).to.have.been.calledOnce.and.calledWidth(dataB)
}))
this.streamA.write(dataA)
this.streamB.write(dataB)
})
})
Note that my spy function wraps the _transform method and call my spy and call the original _transform
Also, The afterEach function is recycling the spies, because you can end up creating hundreds of them.
The problem gets hard is when you want to test async code. Then promises your best friend. The link I gave above have some sample that.
I haven't used this, and it's quite old, but https://github.com/dominictarr/stream-spec might help.
You can test streams using MemoryStream and sinon by using spies. Here is how I tested some of my code.
describe('some spec', function() {
it('some test', function(done) {
var outputStream = new MemoryStream();
var spyCB = sinon.spy();
outputStream.on('data', spyCB);
doSomething(param, param2, outputStream, function() {
sinon.assert.calledWith(spyCB, 'blah');
done();
});
});
});
Best way I have found is to use events
const byline = require('byline');
const fs = require('fs');
it('should process all lines in file', function(done){
//arrange
let lines = 0;
//file with 1000 lines
let reader = fs.readFileStream('./input.txt');
let writer = fs.writeFileStream('./output.txt');
//act
reader.pipe(byline).pipe(writer);
byline.on('line', function() {
lines++;
});
//assert
writer.on('close', function() {
expect(lines).to.equal(1000);
done();
});
});
by passing done as a callback, mocha waits until it is called before moving on.

Fetch 100 zip files in node

So I'm trying to fetch a bunch of files from a server. The current code is basically as follows.
var http = require('http');
var fs = require('fs');
var arr = [{id:'fileOne', id:'fileTwo', id:'fileThree',....];
function fetchData() {
for (var i = 0; i < arr.length; i++) {
var file = fs.createWriteStream("../path/file.zip");
var request = http.get("url/AFG_adm.zip", function(response) {
response.pipe(file);
});
}
}
I don't think this is the best approach, trying to figure out how to handle errors, how to make sure that a file gets loaded before the next iteration... Any help is much appreciated.
You should use the async module for handling the async part, also the request module will save you a lot of effort.
You can handle this in many ways using either async.cargo or async.map.
The theory is to group up things or a series of things, and then take action according to what you want it to do, but in async way.
so a basic .map of an array of files to download would be like this.
// required modules
var async = require('async');
var request = require('request');
// array of urls
var URLs = ['hxxp://...ZipFile1.zip', 'hxxp://...ZipFile2.zip'];
// destination directory
var destinationDirectory = 'downloads';
// asyncDownload function
function asyncDownload(url, callback) {
// get filename
var filename = url.substring(url.lastIndexOf(".") + 1);
// create write stream
var stream = fs.createWriteStream(destinationDirectory + "/" + filename);
// listen for open event to start request and pipe
stream.on('open', function () {
request(url).pipe(stream);
})
// when finish , call callback
stream.on('finish', function () {
callback(null, destinationDirectory + "/" + filename);
})
}
async.map(
URLs, asyncDownload, function (err, results) {
console.log(results);
});

Resources