node.js zlib returning 'Check Headers' error when gunzipping stream - node.js

So I am working on the nodeschool.io stream-adventure tutorial track and I'm having trouble with the last problem. The instructions say:
An encrypted, gzipped tar file will be piped in on process.stdin. To beat this
challenge, for each file in the tar input, print a hex-encoded md5 hash of the
file contents followed by a single space followed by the filename, then a
newline.
You will receive the cipher name as process.argv[2] and the cipher passphrase as
process.argv[3]. You can pass these arguments directly through to
`crypto.createDecipher()`.
The built-in zlib library you get when you `require('zlib')` has a
`zlib.createGunzip()` that returns a stream for gunzipping.
The `tar` module from npm has a `tar.Parse()` function that emits `'entry'`
events for each file in the tar input. Each `entry` object is a readable stream
of the file contents from the archive and:
`entry.type` is the kind of file ('File', 'Directory', etc)
`entry.path` is the file path
Using the tar module looks like:
var tar = require('tar');
var parser = tar.Parse();
parser.on('entry', function (e) {
console.dir(e);
});
var fs = require('fs');
fs.createReadStream('file.tar').pipe(parser);
Use `crypto.createHash('md5', { encoding: 'hex' })` to generate a stream that
outputs a hex md5 hash for the content written to it.
This is my attempt so far to work on it:
var tar = require('tar');
var crypto = require('crypto');
var zlib = require('zlib');
var map = require('through2-map');
var cipherAlg = process.argv[2];
var passphrase = process.argv[3];
var cryptoStream = crypto.createDecipher(cipherAlg, passphrase);
var parser = tar.Parse(); //emits 'entry' events per file in tar input
var gunzip = zlib.createGunzip();
parser.on('entry', function(e) {
e.pipe(cryptoStream).pipe(map(function(chunk) {
console.log(chunk.toString());
}));
});
process.stdin
.pipe(gunzip)
.pipe(parser);
I know it's not complete yet, but my issue is that when I try to run this, the input never gets piped to the tar file parsing part. It seems to hang up on the piping to gunzip. This is my exact error:
events.js:72
throw er; // Unhandled 'error' event
^
Error: incorrect header check
at Zlib._binding.onerror (zlib.js:295:17)
I'm totally stumped because the node documentation for Zlib has no mention of headers except for when it has examples with the http/request modules. There are a number of other questions regarding this error with node, but most use buffers rather than streams, so I couldn't find a relevant answer to my problem. All help is greatly appreciated

I actually figured it out, I was supposed to decrypt the stream before unzipping it.
So instead of:
process.stdin
.pipe(gunzip)
.pipe(parser);
it should be:
process.stdin
.pipe(cryptoStream)
.pipe(gunzip)
.pipe(parser);

Related

Redirect Readable object stdout process to file in node

I use an NPM library to parse markdown to HTML like this:
var Markdown = require('markdown-to-html').Markdown;
var md = new Markdown();
...
md.render('./test', opts, function(err) {
md.pipe(process.stdout)
});
This outputs the result to my terminal as intended.
However, I need the result inside the execution of my node program. I thought about writing the output stream to file and then reading it in at a later time but I can't figure out a way to write the output to a file instead.
I tried to play around var file = fs.createWriteStream('./test.html'); but the node.js streams rather give me headaches than results.
I've also looked into the library's repo and Markdown inherits from Readable via util like this:
var util = require('util');
var Readable = require('stream').Readable;
util.inherits(Markdown, Readable);
Any resources or advice would be highly appreciated. (I would also take another library for parsing the markdown, but this gave me the best results so far)
Actually creating a writable file-stream and piping the markdown to this stream should work just fine. Try it with:
const writeStream = fs.createWriteStream('./output.html');
md.render('./test', opts, function(err) {
md.pipe(writeStream)
});
// in case of errors you should handle them
writeStream.on('error', function (err) {
console.log(err);
});

nodejs/fs: writing a tar to memory buffer

I need to be able to tar a directory, and send this to a remote endpoint via HTTP PUT.
I could of course create the tar, save it to disk, then read it again and send it.
But I'd rather like to create the tar, then pipe it to some buffer and send it immediately. I haven't been able to achieve this.
Code so far:
var tar = require('tar');
var fs = require("fs");
var path = "/home/me/uploaddir";
function getTar(path, cb) {
var buf = new Buffer('');
var wbuf = fs.createWriteStream(buf);
wbuf.on("finish", function() {
cb(buf);
});
tar.c({file:""},[path]).
pipe(wbuf);
}
getTar(path, function(tar) {
//send the tar over http
});
This code results in:
fs.js:575
binding.open(pathModule._makeLong(path),
^
TypeError: path must be a string
at TypeError (native)
at Object.fs.open (fs.js:575:11)
I've also tried using an array as buffer, no joy.
The following solution
creates the tar, then pipes it to some buffer and sends it immediately
and with great speed thanks to the tar-fs library:
First install the libraries request for simplified requests and tar-fs, which provides filesystem bindings for tar-stream: npm i -S tar-fs request
var tar = require('tar-fs')
var request = require('request')
var fs = require('fs')
// pack specific files in the directory
function packTar (folderName, pathsArr) {
return tar.pack(folderName, {
entries: pathsArr
})
}
// return put stream
function makePutReq (url) {
return request.put(url)
}
packTar('./testFolder', ['test.txt', 'test1.txt'])
.pipe(makePutReq('https://www.example.com/put'))
I have renamed the function names to be super verbose.

gunzip partials read from read-stream

I use Node.JS to fetch files from my S3 bucket.
The files over there are gzipped (gz).
I know that the contents of each file is composed by lines, where each line is a JSON of some record that failed to be put on Kinesis.
Each file consists of ~12K such records. and I would like to be able to process the records while the file is being downloaded.
If the file was not gzipped, that could be easily done using streams and readline module.
So, the only thing that stopping me from doing this is the gunzip process which, to my knowledge, needs to be executed on the whole file.
Is there any way of gunzipping a partial of a file?
Thanks.
EDIT 1: (bad example)
Trying what #Mark Adler suggested:
const fileStream = s3.getObject(params).createReadStream();
const lineReader = readline.createInterface({input: fileStream});
lineReader.on('line', line => {
const gunzipped = zlib.gunzipSync(line);
console.log(gunzipped);
})
I get the following error:
Error: incorrect header check
at Zlib._handle.onerror (zlib.js:363:17)
Yes. node.js has a complete interface to zlib, which allows you to decompress as much of a gzip file at a time as you like.
A working example that solves the above problem
The following solves the problem in the above code:
const fileStream = s3.getObject(params).createReadStream().pipe(zlib.createGunzip());
const lineReader = readline.createInterface({input: fileStream});
lineReader.on('line', gunzippedLine => {
console.log(gunzippedLine);
})

Unzipping with zlib in Node.js results in incorrect header error

In short I'm trying to read a .zip file from my file system, deflate the zip-file and them stream it with xml-stream to do some things with the contents in the file.
I thought this would be fairly simple and started with this:
var fs = require('fs')
, XmlStream = require('xml-stream')
, zlib = require('zlib');
//- read the file and buffer it.
var path = '../path/to/some.zip';
var fileBuffer = fs.readFileSync(path, { encoding: 'utf8' });
//- use zlib to unzip it
zlib.gunzip(fileBuffer, function(err, buffer) {
if (!err) {
console.log(buffer.toString());
}
console.log(err);
});
But this results in a
{ [Error: incorrect header check] errno: -3, code: 'Z_DATA_ERROR' }
Changing the encoding or the method (.unzip, .gunzip or .inflate) isn't working either.
What am I missing here?
Gzip is not zip. They're different compression formats, just like RAR is. The error indicates that what you're trying to read is not a gzipped file.
You can use a different library, such as JSZip.
I'm using zlib.unzip instead zlib.gunzip

node.js tar module, 'entry' readable stream

how should I use 'entry' readable stream from tar module to pipe - npmsj.org their content without get a stream error in pipe?
this is to get hint for stream-adventure - github last exercise.
I'm not looking for a answer. but a hint or advice.
Here is my code:
var zlib = require('zlib');
var tar = require('tar');
var crypto = require('crypto');
var through = require('through');
var unzip = zlib.createGunzip();
var parser = tar.Parse();
var stream = process.stdin.pipe(crypto.createDecipher(process.argv[2], process.argv[3])).pipe(unzip);
var md5 = crypto.createHash('md5', { encoding: 'hex' });
parser.on('entry', function(entry) {
if (entry.type === 'File') {
entry.pipe(md5).pipe(process.stdout);
console.log(entry.path);
}
});
unzip.pipe(parser);
here is the output:
$> stream-adventure run app
97911dcc607865d621029f6f927c7851
stream.js:94
throw er; // Unhandled stream error in pipe.
^
Error: write after end
at writeAfterEnd (_stream_writable.js:130:12)
at Hash.Writable.write (_stream_writable.js:178:5)
at Entry.ondata (stream.js:51:26)
at Entry.EventEmitter.emit (events.js:117:20)
at Entry._read (/home/n0t/stream-adventure/secretz/node_modules/tar/lib/entry.js:111:10)
at Entry.write (/home/n0t/stream-adventure/secretz/node_modules/tar/lib/entry.js:68:8)
at Parse._process (/home/n0t/stream-adventure/secretz/node_modules/tar/lib/parse.js:104:11)
at BlockStream.<anonymous> (/home/n0t/stream-adventure/secretz/node_modules/tar/lib/parse.js:46:8)
at BlockStream.EventEmitter.emit (events.js:95:17)
at BlockStream._emitChunk (/home/n0t/stream-adventure/secretz/node_modules/tar/node_modules/block-stream/block-stream.js:145:10)
and with the verify:
$> stream-adventure verify app
ACTUAL: "97911dcc607865d621029f6f927c7851"
EXPECTED: "97911dcc607865d621029f6f927c7851 secretz/METADATA.TXT"
ACTUAL: null
EXPECTED: "2cdcfa9f8bbefb82fb7a894964b5c199 secretz/SPYING.TXT"
ACTUAL: null
EXPECTED: ""
# FAIL
You get this error because entry writes into the md5 stream after is has been closed. Once a stream is closed, you can't write into it again: for md5 this is easy to understand because you have to reset the internal buffers, otherwise the hash will be skewed.
In your example, on each file in the tar module, you pipe the file stream into the same md5 stream. You just have to pipe the file stream into a new MD5 stream; here is how you can do it properly:
parser.on('entry', function(entry) {
if (entry.type === 'File') {
var md5 = crypto.createHash('md5', { encoding: 'hex' });
entry.pipe(md5).pipe(process.stdout);
console.log(entry.path);
}
});

Resources