Write multiple files to http response with streams in nodejs - node.js

I have an array of files that I have to pack into a gzip archive and send them through http response on the fly. That means I can't store the whole file in the memory yet I have to synchronously pipe them into tar.entry or everything is going to break.
const tar = require('tar-stream'); //lib for tar stream
const { createGzip } = require('zlib'); //lib for gzip stream
//large list of huge files.
const files = [ 'file1', 'file2', 'file3', ..., 'file99999' ];
...
//http request handler:
const pack = tar.pack(); //tar stream, creates .tar
const gzipStream = createGzip(); //gzip stream so we could reduce the size
//pipe archive data trough gzip stream
//and send it to the client on the fly
pack.pipe(gzipStream).pipe(response);
//The issue comes here, when I need to pass multiple files to pack.entry
files.forEach(name => {
const src = fs.createReadStream(name); //create stream from file
const size = fs.statSync(name).size; //determine it's size
const entry = pack.entry({ name, size }); //create tar entry
//and this ruins everything because if two different streams
//writes smth into entry, it'll fail and throw an error
src.pipe(entry);
});
Basically I need for the pipe to complete sending data (smth like await src.pipe(entry);), but pipes in nodejs don't do that. So is there any way I could get around it?

Nevermind, just don't use forEach in this case

Related

nodejs/fs: writing a tar to memory buffer

I need to be able to tar a directory, and send this to a remote endpoint via HTTP PUT.
I could of course create the tar, save it to disk, then read it again and send it.
But I'd rather like to create the tar, then pipe it to some buffer and send it immediately. I haven't been able to achieve this.
Code so far:
var tar = require('tar');
var fs = require("fs");
var path = "/home/me/uploaddir";
function getTar(path, cb) {
var buf = new Buffer('');
var wbuf = fs.createWriteStream(buf);
wbuf.on("finish", function() {
cb(buf);
});
tar.c({file:""},[path]).
pipe(wbuf);
}
getTar(path, function(tar) {
//send the tar over http
});
This code results in:
fs.js:575
binding.open(pathModule._makeLong(path),
^
TypeError: path must be a string
at TypeError (native)
at Object.fs.open (fs.js:575:11)
I've also tried using an array as buffer, no joy.
The following solution
creates the tar, then pipes it to some buffer and sends it immediately
and with great speed thanks to the tar-fs library:
First install the libraries request for simplified requests and tar-fs, which provides filesystem bindings for tar-stream: npm i -S tar-fs request
var tar = require('tar-fs')
var request = require('request')
var fs = require('fs')
// pack specific files in the directory
function packTar (folderName, pathsArr) {
return tar.pack(folderName, {
entries: pathsArr
})
}
// return put stream
function makePutReq (url) {
return request.put(url)
}
packTar('./testFolder', ['test.txt', 'test1.txt'])
.pipe(makePutReq('https://www.example.com/put'))
I have renamed the function names to be super verbose.

Compress an uncompressed xlsx file using node.js (Electron)

I have an unzipped xlsx file, in it I edit some files to be able to generate a new xlsx file containing new data.
In linux to recompress the file in xlsx I just need to go into the terminal and type
find . -type f | xargs zip ../newfile.xlsx
into the folder where the xlsx files are.
The question now is how can I do this using node.js?
The solution is to compress a direct list of files contained in xlsx, for some reason if we try to compress the folder the file has corrupted.
The code looks like this if you use JSZIP
var fs = require('fs');
var JSZip = require("jszip");
var zip = new JSZip();
var file = [];
file.push("_rels/.rels");
file.push("docProps/core.xml");
file.push("docProps/app.xml");
file.push("docProps/custom.xml");
file.push("[Content_Types].xml");
file.push("xl/_rels/workbook.xml.rels");
file.push("xl/styles.xml");
file.push("xl/pivotTables/_rels/pivotTable3.xml.rels");
file.push("xl/pivotTables/_rels/pivotTable1.xml.rels");
file.push("xl/pivotTables/_rels/pivotTable2.xml.rels");
file.push("xl/pivotTables/pivotTable3.xml");
file.push("xl/pivotTables/pivotTable1.xml");
file.push("xl/pivotTables/pivotTable2.xml");
file.push("xl/workbook.xml");
file.push("xl/worksheets/_rels/sheet2.xml.rels");
file.push("xl/worksheets/_rels/sheet1.xml.rels");
file.push("xl/worksheets/_rels/sheet3.xml.rels");
file.push("xl/worksheets/sheet4.xml");
file.push("xl/worksheets/sheet1.xml");
file.push("xl/worksheets/sheet3.xml");
file.push("xl/worksheets/sheet2.xml");
file.push("xl/sharedStrings.xml");
file.push("xl/pivotCache/_rels/pivotCacheDefinition1.xml.rels");
file.push("xl/pivotCache/pivotCacheDefinition1.xml");
file.push("xl/pivotCache/pivotCacheRecords1.xml");
for (var i = 0; i < file.length; i++) {
zip.file(file[i], fs.readFileSync("/home/user/xlsx_FILES/"+file[i]));
}
zip.generateAsync({type:"blob"}).then(function(content) {
// see FileSaver.js
saveAs(content, "yourfile.xlsx");
});
Take a look at archiver, a compression library for nodejs. The docs for the library look like they are comprehensive. The library also allows you to append archives and take advantage of streaming api's for appending and creating new archives.
Here is an example snippet from their docs which shows how to use the library.
// require modules
var fs = require('fs');
var archiver = require('archiver');
// create a file to stream archive data to.
var output = fs.createWriteStream(__dirname + '/example.zip');
var archive = archiver('zip', {
store: true // Sets the compression method to STORE.
});
// listen for all archive data to be written
output.on('close', function() {
console.log(archive.pointer() + ' total bytes');
console.log('archiver has been finalized and the output file descriptor has closed.');
});
// good practice to catch this error explicitly
archive.on('error', function(err) {
throw err;
});
// pipe archive data to the file
archive.pipe(output);

gunzip partials read from read-stream

I use Node.JS to fetch files from my S3 bucket.
The files over there are gzipped (gz).
I know that the contents of each file is composed by lines, where each line is a JSON of some record that failed to be put on Kinesis.
Each file consists of ~12K such records. and I would like to be able to process the records while the file is being downloaded.
If the file was not gzipped, that could be easily done using streams and readline module.
So, the only thing that stopping me from doing this is the gunzip process which, to my knowledge, needs to be executed on the whole file.
Is there any way of gunzipping a partial of a file?
Thanks.
EDIT 1: (bad example)
Trying what #Mark Adler suggested:
const fileStream = s3.getObject(params).createReadStream();
const lineReader = readline.createInterface({input: fileStream});
lineReader.on('line', line => {
const gunzipped = zlib.gunzipSync(line);
console.log(gunzipped);
})
I get the following error:
Error: incorrect header check
at Zlib._handle.onerror (zlib.js:363:17)
Yes. node.js has a complete interface to zlib, which allows you to decompress as much of a gzip file at a time as you like.
A working example that solves the above problem
The following solves the problem in the above code:
const fileStream = s3.getObject(params).createReadStream().pipe(zlib.createGunzip());
const lineReader = readline.createInterface({input: fileStream});
lineReader.on('line', gunzippedLine => {
console.log(gunzippedLine);
})

node.js zlib returning 'Check Headers' error when gunzipping stream

So I am working on the nodeschool.io stream-adventure tutorial track and I'm having trouble with the last problem. The instructions say:
An encrypted, gzipped tar file will be piped in on process.stdin. To beat this
challenge, for each file in the tar input, print a hex-encoded md5 hash of the
file contents followed by a single space followed by the filename, then a
newline.
You will receive the cipher name as process.argv[2] and the cipher passphrase as
process.argv[3]. You can pass these arguments directly through to
`crypto.createDecipher()`.
The built-in zlib library you get when you `require('zlib')` has a
`zlib.createGunzip()` that returns a stream for gunzipping.
The `tar` module from npm has a `tar.Parse()` function that emits `'entry'`
events for each file in the tar input. Each `entry` object is a readable stream
of the file contents from the archive and:
`entry.type` is the kind of file ('File', 'Directory', etc)
`entry.path` is the file path
Using the tar module looks like:
var tar = require('tar');
var parser = tar.Parse();
parser.on('entry', function (e) {
console.dir(e);
});
var fs = require('fs');
fs.createReadStream('file.tar').pipe(parser);
Use `crypto.createHash('md5', { encoding: 'hex' })` to generate a stream that
outputs a hex md5 hash for the content written to it.
This is my attempt so far to work on it:
var tar = require('tar');
var crypto = require('crypto');
var zlib = require('zlib');
var map = require('through2-map');
var cipherAlg = process.argv[2];
var passphrase = process.argv[3];
var cryptoStream = crypto.createDecipher(cipherAlg, passphrase);
var parser = tar.Parse(); //emits 'entry' events per file in tar input
var gunzip = zlib.createGunzip();
parser.on('entry', function(e) {
e.pipe(cryptoStream).pipe(map(function(chunk) {
console.log(chunk.toString());
}));
});
process.stdin
.pipe(gunzip)
.pipe(parser);
I know it's not complete yet, but my issue is that when I try to run this, the input never gets piped to the tar file parsing part. It seems to hang up on the piping to gunzip. This is my exact error:
events.js:72
throw er; // Unhandled 'error' event
^
Error: incorrect header check
at Zlib._binding.onerror (zlib.js:295:17)
I'm totally stumped because the node documentation for Zlib has no mention of headers except for when it has examples with the http/request modules. There are a number of other questions regarding this error with node, but most use buffers rather than streams, so I couldn't find a relevant answer to my problem. All help is greatly appreciated
I actually figured it out, I was supposed to decrypt the stream before unzipping it.
So instead of:
process.stdin
.pipe(gunzip)
.pipe(parser);
it should be:
process.stdin
.pipe(cryptoStream)
.pipe(gunzip)
.pipe(parser);

How to pipe one readable stream into two writable streams at once in Node.js?

The goal is to:
Create a file read stream.
Pipe it to gzip (zlib.createGzip())
Then pipe the read stream of zlib output to:
1) HTTP response object
2) and writable file stream to save the gzipped output.
Now I can do down to 3.1:
var gzip = zlib.createGzip(),
sourceFileStream = fs.createReadStream(sourceFilePath),
targetFileStream = fs.createWriteStream(targetFilePath);
response.setHeader('Content-Encoding', 'gzip');
sourceFileStream.pipe(gzip).pipe(response);
... which works fine, but I need to also save the gzipped data to a file so that I don't need to regzip every time and be able to directly stream the gzipped data as a response.
So how do I pipe one readable stream into two writable streams at once in Node?
Would sourceFileStream.pipe(gzip).pipe(response).pipe(targetFileStream); work in Node 0.8.x?
Pipe chaining/splitting doesn't work like you're trying to do here, sending the first to two different subsequent steps:
sourceFileStream.pipe(gzip).pipe(response);
However, you can pipe the same readable stream into two writeable streams, eg:
var fs = require('fs');
var source = fs.createReadStream('source.txt');
var dest1 = fs.createWriteStream('dest1.txt');
var dest2 = fs.createWriteStream('dest2.txt');
source.pipe(dest1);
source.pipe(dest2);
I found that zlib returns a readable stream which can be later piped into multiple other streams. So I did the following to solve the above problem:
var sourceFileStream = fs.createReadStream(sourceFile);
// Even though we could chain like
// sourceFileStream.pipe(zlib.createGzip()).pipe(response);
// we need a stream with a gzipped data to pipe to two
// other streams.
var gzip = sourceFileStream.pipe(zlib.createGzip());
// This will pipe the gzipped data to response object
// and automatically close the response object.
gzip.pipe(response);
// Then I can pipe the gzipped data to a file.
gzip.pipe(fs.createWriteStream(targetFilePath));
you can use "readable-stream-clone" package
const fs = require("fs");
const ReadableStreamClone = require("readable-stream-clone");
const readStream = fs.createReadStream('text.txt');
const readStream1 = new ReadableStreamClone(readStream);
const readStream2 = new ReadableStreamClone(readStream);
const writeStream1 = fs.createWriteStream('sample1.txt');
const writeStream2 = fs.createWriteStream('sample2.txt');
readStream1.pipe(writeStream1)
readStream2.pipe(writeStream2)

Resources