node.js tar module, 'entry' readable stream - node.js

how should I use 'entry' readable stream from tar module to pipe - npmsj.org their content without get a stream error in pipe?
this is to get hint for stream-adventure - github last exercise.
I'm not looking for a answer. but a hint or advice.
Here is my code:
var zlib = require('zlib');
var tar = require('tar');
var crypto = require('crypto');
var through = require('through');
var unzip = zlib.createGunzip();
var parser = tar.Parse();
var stream = process.stdin.pipe(crypto.createDecipher(process.argv[2], process.argv[3])).pipe(unzip);
var md5 = crypto.createHash('md5', { encoding: 'hex' });
parser.on('entry', function(entry) {
if (entry.type === 'File') {
entry.pipe(md5).pipe(process.stdout);
console.log(entry.path);
}
});
unzip.pipe(parser);
here is the output:
$> stream-adventure run app
97911dcc607865d621029f6f927c7851
stream.js:94
throw er; // Unhandled stream error in pipe.
^
Error: write after end
at writeAfterEnd (_stream_writable.js:130:12)
at Hash.Writable.write (_stream_writable.js:178:5)
at Entry.ondata (stream.js:51:26)
at Entry.EventEmitter.emit (events.js:117:20)
at Entry._read (/home/n0t/stream-adventure/secretz/node_modules/tar/lib/entry.js:111:10)
at Entry.write (/home/n0t/stream-adventure/secretz/node_modules/tar/lib/entry.js:68:8)
at Parse._process (/home/n0t/stream-adventure/secretz/node_modules/tar/lib/parse.js:104:11)
at BlockStream.<anonymous> (/home/n0t/stream-adventure/secretz/node_modules/tar/lib/parse.js:46:8)
at BlockStream.EventEmitter.emit (events.js:95:17)
at BlockStream._emitChunk (/home/n0t/stream-adventure/secretz/node_modules/tar/node_modules/block-stream/block-stream.js:145:10)
and with the verify:
$> stream-adventure verify app
ACTUAL: "97911dcc607865d621029f6f927c7851"
EXPECTED: "97911dcc607865d621029f6f927c7851 secretz/METADATA.TXT"
ACTUAL: null
EXPECTED: "2cdcfa9f8bbefb82fb7a894964b5c199 secretz/SPYING.TXT"
ACTUAL: null
EXPECTED: ""
# FAIL

You get this error because entry writes into the md5 stream after is has been closed. Once a stream is closed, you can't write into it again: for md5 this is easy to understand because you have to reset the internal buffers, otherwise the hash will be skewed.
In your example, on each file in the tar module, you pipe the file stream into the same md5 stream. You just have to pipe the file stream into a new MD5 stream; here is how you can do it properly:
parser.on('entry', function(entry) {
if (entry.type === 'File') {
var md5 = crypto.createHash('md5', { encoding: 'hex' });
entry.pipe(md5).pipe(process.stdout);
console.log(entry.path);
}
});

Related

nodejs/fs: writing a tar to memory buffer

I need to be able to tar a directory, and send this to a remote endpoint via HTTP PUT.
I could of course create the tar, save it to disk, then read it again and send it.
But I'd rather like to create the tar, then pipe it to some buffer and send it immediately. I haven't been able to achieve this.
Code so far:
var tar = require('tar');
var fs = require("fs");
var path = "/home/me/uploaddir";
function getTar(path, cb) {
var buf = new Buffer('');
var wbuf = fs.createWriteStream(buf);
wbuf.on("finish", function() {
cb(buf);
});
tar.c({file:""},[path]).
pipe(wbuf);
}
getTar(path, function(tar) {
//send the tar over http
});
This code results in:
fs.js:575
binding.open(pathModule._makeLong(path),
^
TypeError: path must be a string
at TypeError (native)
at Object.fs.open (fs.js:575:11)
I've also tried using an array as buffer, no joy.
The following solution
creates the tar, then pipes it to some buffer and sends it immediately
and with great speed thanks to the tar-fs library:
First install the libraries request for simplified requests and tar-fs, which provides filesystem bindings for tar-stream: npm i -S tar-fs request
var tar = require('tar-fs')
var request = require('request')
var fs = require('fs')
// pack specific files in the directory
function packTar (folderName, pathsArr) {
return tar.pack(folderName, {
entries: pathsArr
})
}
// return put stream
function makePutReq (url) {
return request.put(url)
}
packTar('./testFolder', ['test.txt', 'test1.txt'])
.pipe(makePutReq('https://www.example.com/put'))
I have renamed the function names to be super verbose.

Unhandled 'error' event when extracting zip file with nodejs

I want to download a zip file and extract it with nodejs. This is what I have done so far:
var fs = require('fs');
var wget = require('wget-improved');
var filesizeHumanReadable = require('filesize');
var unzip = require('unzip');
var downloadCSS = function() {
var src = 'http://7-zip.org/a/7za920.zip';
var output = '/tmp/7z.zip';
var options = {};
var download = wget.download(src, output, options);
download.on('error', function(err) {
console.log(err);
});
download.on('start', function(fileSize) {
console.log(filesizeHumanReadable(fileSize));
});
download.on('end', function(outputMessage) {
console.log(outputMessage);
console.log(output);
fs.createReadStream(output).pipe(unzip.Extract({ path: '/tmp/' }));
});
download.on('progress', function(progress) {
// code to show progress bar
});
}
The error message I get when running it:
mles-MacBook-Pro:test-api mles$ node index.js
375.83 KB
Finished writing to disk
/tmp/7z.zip
events.js:85
throw er; // Unhandled 'error' event
^
Error: EPERM, unlink '/tmp'
at Error (native)
Now I'm a bit baffled how to handle the error event and what my actual error is?
Does the process have enough permission to write to /tmp? Does /tmp already have some files?
Because unlink is a node.js function to delete directories. Apparently, unzip.Extract calls it. So, unlink fails if the folder isn't empty (in your case /tmp).
Setting the unzip location to a specific directory fixes it
fs.createReadStream(output).pipe(unzip.Extract({ path: '/tmp/7zip' }));
I'm marking mostruash answer as correct since he brought me on the right track.

node.js zlib returning 'Check Headers' error when gunzipping stream

So I am working on the nodeschool.io stream-adventure tutorial track and I'm having trouble with the last problem. The instructions say:
An encrypted, gzipped tar file will be piped in on process.stdin. To beat this
challenge, for each file in the tar input, print a hex-encoded md5 hash of the
file contents followed by a single space followed by the filename, then a
newline.
You will receive the cipher name as process.argv[2] and the cipher passphrase as
process.argv[3]. You can pass these arguments directly through to
`crypto.createDecipher()`.
The built-in zlib library you get when you `require('zlib')` has a
`zlib.createGunzip()` that returns a stream for gunzipping.
The `tar` module from npm has a `tar.Parse()` function that emits `'entry'`
events for each file in the tar input. Each `entry` object is a readable stream
of the file contents from the archive and:
`entry.type` is the kind of file ('File', 'Directory', etc)
`entry.path` is the file path
Using the tar module looks like:
var tar = require('tar');
var parser = tar.Parse();
parser.on('entry', function (e) {
console.dir(e);
});
var fs = require('fs');
fs.createReadStream('file.tar').pipe(parser);
Use `crypto.createHash('md5', { encoding: 'hex' })` to generate a stream that
outputs a hex md5 hash for the content written to it.
This is my attempt so far to work on it:
var tar = require('tar');
var crypto = require('crypto');
var zlib = require('zlib');
var map = require('through2-map');
var cipherAlg = process.argv[2];
var passphrase = process.argv[3];
var cryptoStream = crypto.createDecipher(cipherAlg, passphrase);
var parser = tar.Parse(); //emits 'entry' events per file in tar input
var gunzip = zlib.createGunzip();
parser.on('entry', function(e) {
e.pipe(cryptoStream).pipe(map(function(chunk) {
console.log(chunk.toString());
}));
});
process.stdin
.pipe(gunzip)
.pipe(parser);
I know it's not complete yet, but my issue is that when I try to run this, the input never gets piped to the tar file parsing part. It seems to hang up on the piping to gunzip. This is my exact error:
events.js:72
throw er; // Unhandled 'error' event
^
Error: incorrect header check
at Zlib._binding.onerror (zlib.js:295:17)
I'm totally stumped because the node documentation for Zlib has no mention of headers except for when it has examples with the http/request modules. There are a number of other questions regarding this error with node, but most use buffers rather than streams, so I couldn't find a relevant answer to my problem. All help is greatly appreciated
I actually figured it out, I was supposed to decrypt the stream before unzipping it.
So instead of:
process.stdin
.pipe(gunzip)
.pipe(parser);
it should be:
process.stdin
.pipe(cryptoStream)
.pipe(gunzip)
.pipe(parser);

untarring files to S3 fails, not sure why

(new information below)
I am trying to set up a lambda function that reacts to uploaded tgz files by uncompressing them and writing the results back to S3. The unzip and untar work fine, but uploading to S3 fails:
/Users/russell/lambda/gzip/node_modules/aws-sdk/lib/s3/managed_upload.js:350
var buf = self.body.read(self.partSize - self.partBuffer.length) ||
^
TypeError: undefined is not a function
at ManagedUpload.fillStream (/Users/russell/lambda/gzip/node_modules/aws-sdk/lib/s3/managed_upload.js:350:25)
at Entry.<anonymous> (/Users/russell/lambda/gzip/node_modules/aws-sdk/lib/s3/managed_upload.js:167:28)
at Entry.emit (events.js:104:17)
at Entry._read (/Users/russell/lambda/gzip/node_modules/tar/lib/entry.js:123:12)
at Entry.end (/Users/russell/lambda/gzip/node_modules/tar/lib/entry.js:82:8)
at Parse._process (/Users/russell/lambda/gzip/node_modules/tar/lib/parse.js:107:13)
at BlockStream.<anonymous> (/Users/russell/lambda/gzip/node_modules/tar/lib/parse.js:47:8)
at BlockStream.emit (events.js:107:17)
at BlockStream._emitChunk (/Users/russell/lambda/gzip/node_modules/tar/node_modules/block-stream/block-stream.js:145:10)
at BlockStream.write (/Users/russell/lambda/gzip/node_modules/tar/node_modules/block-stream/block-stream.js:45:10)
This error occurs when I write to S3, but if instead I write the files locally to disk it works, so the pipeline is correct.
Here is code that demonstrates the problem:
var aws = require('aws-sdk');
var s3 = new aws.S3({apiVersion: '2006-03-01'});
var zlib = require('zlib');
var tar = require('tar');
var fstream = require('fstream');
fstream.Reader({'path': 'testdata.tar.gz'})
.pipe(zlib.Unzip())
.pipe(tar.Parse())
.on('entry', function(entry) {
var filename = entry.path;
console.log('got ' + entry.type + ' ' + filename);
if (entry.type == 'File') {
if (1) { // switch between working and nonworking cases
s3.upload({Bucket: 'my_bucket', Key: 'gunzip-test/' + filename, Body: entry}, {},
function(err, data) {
if (err)
console.log('ERROR!');
else
console.log('OK');
});
}
else {
entry.pipe(fstream.Writer({ 'path': '/tmp/mytest/' + filename }));
}
}
});
If the code is set to write to S3 it fails with the above error, if it writes the extracted files locally it succeeds. ENTRY is a stream, and according to the doc should be accepted in the upload Body parameter. I put a print statement in ManagedUpload, where the fail comes, and confirmed that self.body is a stream:
var stream = require('stream');
console.log('is it a stream? ' + ((self.body instanceof stream) ? 'yes' : 'no'));
console.log('self.body.read is ' + self.body.read);
returns
$ got File gunzip.js
is it a stream? yes
self.body.read is undefined
I'm pretty new with aws and node.js, so there could be a basic problem with this, but I've spent a day and haven't found it. I did the upload call with unzip instead of gzip and it worked (using lambda functions to unzip archives in S3 is really sloooooow) Can anyone point me at something I am doing wrong in this code?
Thanks
I think I understand this a little better. I broke the pipeline up into pieces and looked at each one. The problem is that tar.Parse uses fstream and not stream. If I look at the return of the .pipe(tar.Parse()) statement it is a stream, but it is not a stream.Readable or a stream.Writable. fstream does not define a read() method (its reader is based on Stream, it is not a stream.Readable), so tar.Parse, which is based on Stream, does not have one either.
So a refinement of the question is, is this a bug in fstream, or is fstream not intended to be a stream? I think it is a bug - from the README:
"Like FS streams, but with stat on them, and supporting directories and
symbolic links, as well as normal files. Also, you can use this to set
the stats on a file, even if you don't change its contents, or to create
a symlink, etc."
In my case running the stream through stream.PassThrough helped.
var PassThrough = require('stream').PassThrough;
var stream = getStreamSomeHow();
var passthrough = new PassThrough();
stream.pipe(passthrough);
s3.upload({...,Body:passthrough}) //
Your body variable is a Stream object, in which case you will need to use .toString()
var aws = require('aws-sdk');
var s3 = new aws.S3({apiVersion: '2006-03-01'});
var zlib = require('zlib');
var tar = require('tar');
var fstream = require('fstream');
fstream.Reader({'path': 'testdata.tar.gz'})
.pipe(zlib.Unzip())
.pipe(tar.Parse())
.on('entry', function(entry) {
var filename = entry.path;
console.log('got ' + entry.type + ' ' + filename);
if (entry.type == 'File') {
if (1) { // switch between working and nonworking cases
s3.upload({Bucket: 'my_bucket', Key: 'gunzip-test/' + filename, Body: entry.toString()}, {},
function(err, data) {
if (err)
console.log('ERROR!');
else
console.log('OK');
});
}
else {
entry.pipe(fstream.Writer({ 'path': '/tmp/mytest/' + filename }));
}
}
});

Reading file with Node.js

I have a problem reading the stats of a file. I have this code:
var fs = require('fs');
process.stdin.setEncoding('utf8');
process.stdin.on('readable', function() {
var chunk = process.stdin.read();
if (chunk !== null) {
var stats =fs.statSync(chunk);
length=stats.size;
console.log(length);
}
});
When I exec this code I get this error:
return binding.stat(pathModule._makeLong(path));
^
Error: ENOENT, no such file or directory 'hello.txt
But the problem is that "hello.txt" actually exists at the same directory¡
I have tried with other files and I always get the same error.
Any ideas?
Thanks¡
The chunk read from the standard input contains a new line in the end, which was conflicting with your call to fs.statSync. Try this:
process.stdin.on('readable', function() {
var chunk = process.stdin.read();
if (chunk !== null && chunk !== '') {
var stats = fs.statSync(chunk.trim()); // trim the input
length=stats.size;
console.log(length);
}
});
Also note that the function will be constantly executed for as long as 'readable' events are triggered. You may wish to terminate the program at some point or anything like that.

Resources