Why append rather than write when using knox / node.js to grab file from Amazon s3 - node.js

I'm experimenting with the knox module for node.js as a way of managing some small files in an Amazon S3 bucket. Everything works fine stand-alone: I can upload a file, download a file, etc. However, I want to be able to download a file on recurring schedule. When I modify the code to run on an interval, I'm getting the downloaded file appending to the previous instance instead of overwriting.
I'm not sure if I've made a mistake in the file write code or in the knox handling code. I've tried several different write approaches (writeFile, writeStream, etc.) and I've looked at the knox source code. Nothing obvious to me stands out as a problem. Here's the code I'm using:
knox = require('knox');
fs = require('fs');
var downFile = DOWNFILE;
var downTxt = '';
var timer = INTERVAL;
var path = S3PATH + downFile;
setInterval(function()
{
var s3client = knox.createClient(
{
key: '********************',
secret: '**********************************',
bucket: '********'
});
s3client.get(path).on('response', function(response)
{
response.setEncoding('ascii');
response.on('data', function(chunk)
{
downTxt += chunk;
});
response.on('end', function()
{
fs.writeFileSync(downFile, downTxt, 'ascii');
});
}).end();
},
timer);

The problem is with your placement of var downTxt = '';. That is the only place you set downTxt to blank, so every time you retrieve more data, you add it to the data that you got in the previous request because you never clear the data from the previous request. The simplest fix is to move that line to just before the setEncoding line.
However, the way you are processing the data is unnecessarily complicated. Try something like this instead. You don't need to recreate the client every time, and setting the encoding will just break things if you are downloading non-text files, and it won't make a difference with text files. Next, you shouldn't manually collect the data, you can immediately start writing it to the file as you receive it. Lastly, since request is a standard stream, you don't need to monitor the 'data' event because you can just use pipe.
var knox = require('knox'),
fs = require('fs'),
downFile = DOWNFILE,
timer = INTERVAL,
path = S3PATH + downFile,
s3client = knox.createClient({
key: '********************',
secret: '**********************************',
bucket: '********'
});
(function downloadFile() {
var str = fs.createWriteStream(downFile);
s3client.get(path).pipe(str);
str.on('close', function() {
setTimeout(downloadFile, timer);
});
})();

Related

wasn't able to write the buffer stream into a newly created directory in nodeJs

I had to create a directory named stuff and write the stream into a text file inside the "stuff" directory. but, because the code in NodeJs doesn't execute in a serial fashion, I m getting an error which says the directory doesn't exist. any help would be appreciated. the code is pasted below.
var http = require("http");
var fs = require("fs");
var readStream = fs.createReadStream(__dirname+"/readMe.txt","UTF-8");
var writeStream;
fs.mkdir("stuff",function(){
writeStream = fs.createWriteStream(__dirname+"/stuff/writeMe.txt");
});
readStream.on("data",function(chunk){
writeStream.write(chunk);
});
When you call the folder creation function, it returns instantly. But the callback function is executed only when the folder has been created (or creation has failed.)
fs.mkdir("stuff",function(){
// executed after folder creation
writeStream = fs.createWriteStream(__dirname+"/stuff/writeMe.txt");
});
// executed instantly
In your current code, when you try to write to the folder, it has not been created yet, and writeStream is undefined.
So you need to write to the folder after it's been created:
fs.mkdir("stuff",function(){
writeStream = fs.createWriteStream(__dirname+"/stuff/writeMe.txt");
readStream.on("data",function(chunk){
writeStream.write(chunk);
});
});

Node writeFileSync encoding options for images

I'm using fs.writeFileSync(file, data[, options]) to save a file returned from http.get(options[, callback])
This works fine for text files but images, pdfs etc end up being corrupted. From the searching around that I've done, it's apparently because fs.writeFileSync(file, data[, options]) defaults to UTF-8
I've tried setting the encoding to 'binary', the mime-type and the extension to no avail. It feels like something really obvious that I'm overlooking, can anyone point me in the right direction?
Thank you in advance
Update
I'm running this through electron. I didn't think it was worth mentioning as electron is just running node, but I'm not a node or electron expert so I'm not sure
Create a Buffer from the image data and set its encoding to binary. Then pass that data into a stream.PassThrough and pipe that into a stream.Writable.
var fs = require('fs');
var stream = require('stream');
var imgStream = new stream.PassThrough();
imgStream.end(Buffer.from(data, 'binary'));
var wStream = fs.createWriteStream('./<dest>.<ext>');
imgStream.once('end', () => {
console.log('Image Written');
});
imgStream.once('error', (err) => {
console.log(err);
});
imgStream.pipe(wStream);

untarring files to S3 fails, not sure why

(new information below)
I am trying to set up a lambda function that reacts to uploaded tgz files by uncompressing them and writing the results back to S3. The unzip and untar work fine, but uploading to S3 fails:
/Users/russell/lambda/gzip/node_modules/aws-sdk/lib/s3/managed_upload.js:350
var buf = self.body.read(self.partSize - self.partBuffer.length) ||
^
TypeError: undefined is not a function
at ManagedUpload.fillStream (/Users/russell/lambda/gzip/node_modules/aws-sdk/lib/s3/managed_upload.js:350:25)
at Entry.<anonymous> (/Users/russell/lambda/gzip/node_modules/aws-sdk/lib/s3/managed_upload.js:167:28)
at Entry.emit (events.js:104:17)
at Entry._read (/Users/russell/lambda/gzip/node_modules/tar/lib/entry.js:123:12)
at Entry.end (/Users/russell/lambda/gzip/node_modules/tar/lib/entry.js:82:8)
at Parse._process (/Users/russell/lambda/gzip/node_modules/tar/lib/parse.js:107:13)
at BlockStream.<anonymous> (/Users/russell/lambda/gzip/node_modules/tar/lib/parse.js:47:8)
at BlockStream.emit (events.js:107:17)
at BlockStream._emitChunk (/Users/russell/lambda/gzip/node_modules/tar/node_modules/block-stream/block-stream.js:145:10)
at BlockStream.write (/Users/russell/lambda/gzip/node_modules/tar/node_modules/block-stream/block-stream.js:45:10)
This error occurs when I write to S3, but if instead I write the files locally to disk it works, so the pipeline is correct.
Here is code that demonstrates the problem:
var aws = require('aws-sdk');
var s3 = new aws.S3({apiVersion: '2006-03-01'});
var zlib = require('zlib');
var tar = require('tar');
var fstream = require('fstream');
fstream.Reader({'path': 'testdata.tar.gz'})
.pipe(zlib.Unzip())
.pipe(tar.Parse())
.on('entry', function(entry) {
var filename = entry.path;
console.log('got ' + entry.type + ' ' + filename);
if (entry.type == 'File') {
if (1) { // switch between working and nonworking cases
s3.upload({Bucket: 'my_bucket', Key: 'gunzip-test/' + filename, Body: entry}, {},
function(err, data) {
if (err)
console.log('ERROR!');
else
console.log('OK');
});
}
else {
entry.pipe(fstream.Writer({ 'path': '/tmp/mytest/' + filename }));
}
}
});
If the code is set to write to S3 it fails with the above error, if it writes the extracted files locally it succeeds. ENTRY is a stream, and according to the doc should be accepted in the upload Body parameter. I put a print statement in ManagedUpload, where the fail comes, and confirmed that self.body is a stream:
var stream = require('stream');
console.log('is it a stream? ' + ((self.body instanceof stream) ? 'yes' : 'no'));
console.log('self.body.read is ' + self.body.read);
returns
$ got File gunzip.js
is it a stream? yes
self.body.read is undefined
I'm pretty new with aws and node.js, so there could be a basic problem with this, but I've spent a day and haven't found it. I did the upload call with unzip instead of gzip and it worked (using lambda functions to unzip archives in S3 is really sloooooow) Can anyone point me at something I am doing wrong in this code?
Thanks
I think I understand this a little better. I broke the pipeline up into pieces and looked at each one. The problem is that tar.Parse uses fstream and not stream. If I look at the return of the .pipe(tar.Parse()) statement it is a stream, but it is not a stream.Readable or a stream.Writable. fstream does not define a read() method (its reader is based on Stream, it is not a stream.Readable), so tar.Parse, which is based on Stream, does not have one either.
So a refinement of the question is, is this a bug in fstream, or is fstream not intended to be a stream? I think it is a bug - from the README:
"Like FS streams, but with stat on them, and supporting directories and
symbolic links, as well as normal files. Also, you can use this to set
the stats on a file, even if you don't change its contents, or to create
a symlink, etc."
In my case running the stream through stream.PassThrough helped.
var PassThrough = require('stream').PassThrough;
var stream = getStreamSomeHow();
var passthrough = new PassThrough();
stream.pipe(passthrough);
s3.upload({...,Body:passthrough}) //
Your body variable is a Stream object, in which case you will need to use .toString()
var aws = require('aws-sdk');
var s3 = new aws.S3({apiVersion: '2006-03-01'});
var zlib = require('zlib');
var tar = require('tar');
var fstream = require('fstream');
fstream.Reader({'path': 'testdata.tar.gz'})
.pipe(zlib.Unzip())
.pipe(tar.Parse())
.on('entry', function(entry) {
var filename = entry.path;
console.log('got ' + entry.type + ' ' + filename);
if (entry.type == 'File') {
if (1) { // switch between working and nonworking cases
s3.upload({Bucket: 'my_bucket', Key: 'gunzip-test/' + filename, Body: entry.toString()}, {},
function(err, data) {
if (err)
console.log('ERROR!');
else
console.log('OK');
});
}
else {
entry.pipe(fstream.Writer({ 'path': '/tmp/mytest/' + filename }));
}
}
});

I want to pipe a readable css file to the http response

I have an issue with outputting the readable stream to the http response.
behind the scenes there is a regular request and response streams coming from the generic http createServer. I check to see if the 'req.url' ends in css, and I create a readable stream of this file. I see the css contents in the console.log, with the right css code I expect. Then, I try to pipe the readable css file stream to the response, but in Chrome, the file response is blank when I inspect the response. It is a 200 response though. Any thoughts at first glance? I've tried different variations of where I have code commented out.
router.addRoute("[a-aA-z0-9]{1,50}.css$", function(matches){
var cssFile = matches[0];
var pathToCss = process.cwd() + "/" + cssFile;
// takes care of os diffs regarding path delimiters and such
pathToCss = path.normalize(pathToCss);
console.log(matches);
console.log("PATH TO CSS");
console.log(pathToCss)
var readable = fs.createReadStream(pathToCss);
var write = function(chunk){
this.queue(chunk.toString());
console.log(chunk.toString());
}
var end = function(){
this.queue(null);
}
var thru = through(write,end);
//req.on("end",function(){
res.pipe(readable.pipe(thru)).pipe(res);
//res.end();
//});
});
you need to pipe your readable stream into your through-stream, and then pipe it into the response:
readable.pipe(thru).pipe(res);
edit: for preparing your css path, just use path.join instead of concatenating your path and normalizing it:
var pathToCss = path.join(process.cwd(), cssFile);
I separated out this route (css) from my normal html producing routes, the problem I had was that my normal routes in my router object returned strings, like res.end(compiled_html_str), and the css file readable stream was going through that same routing function. I made it separate by isolating it from my router.
var cssMatch = [];
if(cssMatch = req.url.match(/.+\/(.+\.css$)/)){
res.writeHead({"Content-Type":"text/css"});
var cssFile = cssMatch[1];
var pathToCss = process.cwd() + "/" + cssFile;
// takes care of os diffs regarding path delimiters and such
pathToCss = path.normalize(pathToCss);
console.log(cssMatch);
console.log("PATH TO CSS");
console.log(pathToCss)
var readable = fs.createReadStream(pathToCss);
var cssStr = "";
readable.on("data",function(chunk){
cssStr += chunk.toString();
});
readable.on("end",function(){
res.end(cssStr);
});
}

Downloading a Gzip'd file

I'm attempting to download a file using the http module in node. While the file seems to download sucessfully, the resultant file cannot be opened using gzip. I've tried downloading the file through other methods, and that works, and I've tried using multiple ways to open the resultant gzip'd file, but all of those produce the same error.
I did attempt to use the request module, but there seemed to be no way of accessing the returned HTTP headers before the file was finished downloading, which I need because I'd like to offer some sort of visual indicator as to how long this file is going to take to download.
This is (roughly) the code that I've got so far.
var http = require('http');
var fs = require('fs');
var progress = 0;
downloadFile = function() {
http.get(FILE_URL, function(response) {
var maxBytes = parseInt(response.headers['content-length'], 10);
var dumpFile = fs.createWriteStream(FILENAME + '.dl');
response.pipe(dumpFile);
response
.on('data', function(chunk) {
progress += chunk.length;
// progressbar-type code here
})
.on('end', function() {
// pass
})
dumpFile.on('finish', function() {
dumpFile.close();
fs.rename(FILENAME + '.dl', FILENAME);
});
}
So my question: How would you advise I download a file, bearing in mind it's a large file and I need some sort of visual indicator for download progress? Should I give up on http? Or am I doing something monumentally stupid?
Thanks!

Resources