Node transform stream not waiting - node.js

I'm getting a file stream from Busboy and then i'm piping it to a custom transform stream to validate and clean it. It works with small files but as they get bigger my custom stream doesn't wait for the busboy stream to finish and it gets truncated.
Here is the busboy code:
busboy
.on("file", function(fieldname, file, filename, encoding, mimetype) {
//Creating a mongo doc first
Dataset.create(dataset, function (err, ds) {
if(err) {...}
else {
file.pipe(validateCSV));
}
});
validateCSV
.on("finish", function() {
// Send to Data Import
datasetService.import(validateCSV, dataset, function (err, result) {
...
});
});
});
And my transform stream:
module.exports.ValidateCSV = ValidateCSV;
function ValidateCSV(options) {
if (!(this instanceof ValidateCSV)) return new ValidateCSV(options);
if (!options) options = {};
options.objectMode = true;
Transform.call(this, options);
}
util.inherits(ValidateCSV, Transform);
ValidateCSV.prototype._transform = function (chunk, encoding, done) {
if (this._checked) {
this.push(chunk);
} else {
//Do some validation
var data = chunk.toString();
var lines = data.match(/[^\r\n]+/g);
var headerline = lines[0] || "";
var header = headerline.split(",");
...
this._checked = true;
this.push(chunk);
}
done()
}

It turned out it was a backpressure issue and seeting the HighWaterMark option on the transform stream fixed it. Ideally it woold be set according to the filesize of the upload but this fixed it for me:
function ValidateCSV(options) {
if (!(this instanceof ValidateCSV)) return new ValidateCSV(options);
if (!options) options = {};
options.objectMode = true;
options.highWaterMark = 100000;
Transform.call(this, options);
}

Related

File written to createWriteStream gets corrupted when listening to 'data' event on createReadStream in NodeJS

I have an Electron application and I have written a service to copy file using Node's fs module. File copy works fine when not listening to data event of fs.createReadStream, but when I add readStream.on('data', ... event, output file gets corrupted(Output file is always less in size than original file). This is my function code to copy files:
copyFile(sourcePath: string, targetPath: string): Observable<FileCopyResponseModel> {
const copyResponse = new Subject<FileCopyResponseModel>();
const fileSize = this.node.fs.statSync(sourcePath).size;
const readStream = this.node.fs.createReadStream(sourcePath);
let bytesCopied = 0;
readStream.once("error", (err) => {
const response = new FileCopyResponseModel();
response.is_error = true;
response.error = err;
copyResponse.next(response);
});
readStream.on('data', (buffer) => {
bytesCopied+= buffer.length
const response = new FileCopyResponseModel();
response.is_error = false;
response.is_done = false;
response.size = fileSize;
response.size_copied = bytesCopied;
copyResponse.next(response);
});
this.node.mkdirp(this.node.path.dirname(targetPath), (err) => {
if (err) {
const response = new FileCopyResponseModel();
response.is_error = true;
response.error = err;
copyResponse.next(response);
} else {
const writeStream = this.node.fs.createWriteStream(targetPath);
writeStream.once("error", (err) => {
const response = new FileCopyResponseModel();
response.is_error = true;
response.error = err;
copyResponse.next(response);
});
writeStream.once("close", (ex) => {
const response = new FileCopyResponseModel();
response.is_error = false;
response.is_done = true;
response.size = fileSize;
copyResponse.next(response);
});
readStream.pipe(writeStream);
}
});
return copyResponse;
}
File gets copied correctly if I just comment out this section of code:
readStream.on('data', (buffer) => {
bytesCopied+= buffer.length
const response = new FileCopyResponseModel();
response.is_error = false;
response.is_done = false;
response.size = fileSize;
response.size_copied = bytesCopied;
copyResponse.next(response);
});
Any idea what is wrong here?
On a Side Note copy progress is being reported correctly when listening to data event.
I think that you are trying to consume the readable stream twice. Once in the on('data', handler and once in readStream.pipe(writeStream);. The on('data', will consume the readable stream before you can pipe it to the write stream.
You could try replacing the line:
readStream.pipe(writeStream);
with
readStream.on('data', (buffer) => {
bytesCopied+= buffer.length
const response = new FileCopyResponseModel();
response.is_error = false;
response.is_done = false;
response.size = fileSize;
response.size_copied = bytesCopied;
copyResponse.next(response);
// new bit
writeStream.write(buffer);
});
and removing the on('data, from handler from above.
You can also end the writeStream when the read stream ends.
readStream.on('end', function() {
console.log('end');
writeStream.end();
});

Node stream hangs when emitting error

I have a stream that's checking a CSV. It works fine except when emitting an error it hangs even after I send the response back.
export function ValidateCSV(options) {
let opt = options;
if (!(this instanceof ValidateCSV)) return new ValidateCSV(opt);
if (!opt) opt = {};
opt.objectMode = true;
opt.highWaterMark = 1000000;
Transform.call(this, opt);
}
util.inherits(ValidateCSV, Transform);
ValidateCSV.prototype.destroy = function () {
this.readable = false;
this.writable = false;
this.emit('end');
};
ValidateCSV.prototype._transform = function (chunk, encoding, done) {
// Do some stuff to the chunk
// Emit error
if (required.length > 0) {
this.emit('error', `The following columns are required: ${required.join(', ')}`);
}
done();
};
I was able to fix it by adding a destroy method but it is still slow and hangs for a few seconds with it. Is there a better way to end/destroy a Transform stream?
ValidateCSV.prototype.destroy = function () {
this.readable = false;
this.writable = false;
this.emit('end');
};
EDIT:
Here is how I'm using the stream with busboy:
function processMultipart(req, res) {
const userId = req.query._userId;
const busboy = new Busboy({ headers: req.headers, limits: { files: 1 } });
const updateId = req.params.id;
// Transform stream to validate the csv
const validateCSV = new ValidateCSV();
validateCSV
.on('finish', () => {
// Process the csv
})
.on('error', (er) => {
//Do some logging
res.status(500).json(er).end();
});
// Multipart upload handler
busboy
.on('file', (fieldname, file, filename) => {
dataset.name = fieldname.length > 0 ?
fieldname : filename.substr(0, filename.indexOf('.csv'));
file
.on('error', (er) => {
//Send Error
})
.on('end', () => {
// Save dataset to mongo
if (dataset._update) {
res.status(200).json(dataset).end();
} else {
Dataset.create(dataset, (er) => {
if (er) {
res.status(500).json(er).end();
} else {
res.status(200).json(dataset).end();
}
});
}
}).pipe(validateCSV);
});
req.pipe(busboy);
}

Streaming the results of multiple fs.readfiles

I'm a newbie at node js streams, and what I want to achieve is streaming the results of the readfiles in a module that I have. I want to then somehow invoke this readable stream in my main app and listen to data events, so everytime readfile returns a result a data event will trigger, and the object is passed as a chunk. This is what I've got so far and it's throwing an error...
function streamObjects(type, dirname){
var Readable = require('stream').Readable;
var rs = new Readable({objectMode: true});
fs.readdir(dirname, function(err, files){
if(err)
console.log(err);
for(var i=0;i<10;i++)
{
fs.readFile(path.resolve(dirname, files[i]),function(err,data){
if(err)
console.log(err);
rs.push(JSON.parse(data));
}); //end readFile
} //end for loop
return rs;
});
}
You want to use EventEmitter.
Your readFile is an async function which is not called when you return rs Readable.
var util = require('util');
var EventEmitter = require('events').EventEmitter;
function streamObjects(type, dirname) {
var Readable = require('stream').Readable;
var rs = new Readable({objectMode: true});
var self = this;
fs.readdir(dirname, function (err, files) {
if (err)
console.log(err);
for (var i = 0; i < 10; i++) {
fs.readFile(path.resolve(dirname, files[i]), function (err, data) {
if (err)
console.log(err);
self.emit('data', files[i], data);
rs.push(JSON.parse(data));
}); //end readFile
} //end for loop
});
}
util.inherits(streamObjects, EventEmitter);
module.exports = streamObjects;
From another file
var streamObjects = require('streamObjects');
var streamObjectInstance = new streamObjects(type, dirName);
streamObjectInstance.on('data', yourFunctionHere);
I did not put into error emit, but you can add those to when error happens.

unzip the file and parse it to js

hi i had tried to unzip the file from my c drive and trying to parse to javascript object
here is the code
var AdmZip = require('adm-zip');
var fs = require('fs'), xml2js = require('xml2js');
var parser = new xml2js.Parser();
var paramdata = 'c:/sample/kusuma.zip';
console.log(paramdata);
var zip = new AdmZip(paramdata);
var zipEntries = zip.getEntries();
var obj = [];
var count = 0;
zipEntries.forEach(function(zipEntry) {
var len = zipEntries.length;
console.log(zipEntry.toString());
console.log(zipEntry.entryName);
fs.readFile("", function(err, data) {
console.log(data);
parser.parseString(data, function(err, result) {
count++;
console.log(count);
obj.push(result);
if (count === len) {
console.log(obj);
res.send(obj);
}
});
});
});
please check the code once and provide me some more examples
Well, fs.readFile() is for reading files that are themselves directly on disk, which these aren't.
However, adm-zip is already reading in the contents of the .zip, so you shouldn't need fs. Each zipEntry has getData() and getDataAsync() methods that can be used to retrieve contents.
zipEntries.forEach(function (zipEntry) {
zipEntry.getDataAsync(function (data) {
parser.parseString(data, function (err, result) {
console.log(result);
});
});
});
Also, as zipEntries is an Array, you can use .filter() to reduce it to only XML files.
var zipEntries = zip.getEntries().filter(function (zipEntry) {
return !zipEntry.isDirectory && /\.xml$/.test(zipEntry.entryName);
});
You'll also want to determine len once from the collection rather than from each entry. You can also test that against use obj.length rather than having to keep count separately:
var len = zipEntries.length;
var obj = [];
zipEntries.forEach(function (zipEntry) {
zipEntry.getDataAsync(function (data) {
parser.parseString(data, function (err, result) {
obj.push(result);
if (obj.length === len) {
res.send(obj);
}
});
});
});

Node.js & Amazon S3: How to iterate through all files in a bucket?

Is there any Amazon S3 client library for Node.js that allows listing of all files in S3 bucket?
The most known aws2js and knox don't seem to have this functionality.
Using the official aws-sdk:
var allKeys = [];
function listAllKeys(marker, cb)
{
s3.listObjects({Bucket: s3bucket, Marker: marker}, function(err, data){
allKeys.push(data.Contents);
if(data.IsTruncated)
listAllKeys(data.NextMarker, cb);
else
cb();
});
}
see s3.listObjects
Edit 2017:
Same basic idea, but listObjectsV2( ... ) is now recommended and uses a ContinuationToken (see s3.listObjectsV2):
var allKeys = [];
function listAllKeys(token, cb)
{
var opts = { Bucket: s3bucket };
if(token) opts.ContinuationToken = token;
s3.listObjectsV2(opts, function(err, data){
allKeys = allKeys.concat(data.Contents);
if(data.IsTruncated)
listAllKeys(data.NextContinuationToken, cb);
else
cb();
});
}
Using AWS-SDK v3 and Typescript
import {
paginateListObjectsV2,
S3Client,
S3ClientConfig,
} from '#aws-sdk/client-s3';
/* // For Deno
import {
paginateListObjectsV2,
S3Client,
S3ClientConfig,
} from "https://deno.land/x/aws_sdk#v3.32.0-1/client-s3/mod.ts"; */
const s3Config: S3ClientConfig = {
credentials: {
accessKeyId: 'accessKeyId',
secretAccessKey: 'secretAccessKey',
},
region: 'us-east-1',
};
const getAllS3Files = async (client: S3Client, s3Opts) => {
const totalFiles = [];
for await (const data of paginateListObjectsV2({ client }, s3Opts)) {
totalFiles.push(...(data.Contents ?? []));
}
return totalFiles;
};
const main = async () => {
const client = new S3Client(s3Config);
const s3Opts = { Bucket: 'bucket-xyz' };
console.log(await getAllS3Files(client, s3Opts));
};
main();
For AWS-SDK v2 Using Async Generator
Import S3
const { S3 } = require('aws-sdk');
const s3 = new S3();
create a generator function to retrieve all the files list
async function* listAllKeys(opts) {
opts = { ...opts };
do {
const data = await s3.listObjectsV2(opts).promise();
opts.ContinuationToken = data.NextContinuationToken;
yield data;
} while (opts.ContinuationToken);
}
Prepare aws parameter, based on api docs
const opts = {
Bucket: 'bucket-xyz' /* required */,
// ContinuationToken: 'STRING_VALUE',
// Delimiter: 'STRING_VALUE',
// EncodingType: url,
// FetchOwner: true || false,
// MaxKeys: 'NUMBER_VALUE',
// Prefix: 'STRING_VALUE',
// RequestPayer: requester,
// StartAfter: 'STRING_VALUE'
};
Use generator
async function main() {
// using for of await loop
for await (const data of listAllKeys(opts)) {
console.log(data.Contents);
}
}
main();
thats it
Or Lazy Load
async function main() {
const keys = listAllKeys(opts);
console.log(await keys.next());
// {value: {…}, done: false}
console.log(await keys.next());
// {value: {…}, done: false}
console.log(await keys.next());
// {value: undefined, done: true}
}
main();
Or Use generator to make Observable function
const lister = (opts) => (o$) => {
let needMore = true;
const process = async () => {
for await (const data of listAllKeys(opts)) {
o$.next(data);
if (!needMore) break;
}
o$.complete();
};
process();
return () => (needMore = false);
};
use this observable function with RXJS
// Using Rxjs
const { Observable } = require('rxjs');
const { flatMap } = require('rxjs/operators');
function listAll() {
return Observable.create(lister(opts))
.pipe(flatMap((v) => v.Contents))
.subscribe(console.log);
}
listAll();
or use this observable function with Nodejs EventEmitter
const EventEmitter = require('events');
const _eve = new EventEmitter();
async function onData(data) {
// will be called for each set of data
console.log(data);
}
async function onError(error) {
// will be called if any error
console.log(error);
}
async function onComplete() {
// will be called when data completely received
}
_eve.on('next', onData);
_eve.on('error', onError);
_eve.on('complete', onComplete);
const stop = lister(opts)({
next: (v) => _eve.emit('next', v),
error: (e) => _eve.emit('error', e),
complete: (v) => _eve.emit('complete', v),
});
Here's Node code I wrote to assemble the S3 objects from truncated lists.
var params = {
Bucket: <yourbucket>,
Prefix: <yourprefix>,
};
var s3DataContents = []; // Single array of all combined S3 data.Contents
function s3Print() {
if (program.al) {
// --al: Print all objects
console.log(JSON.stringify(s3DataContents, null, " "));
} else {
// --b: Print key only, otherwise also print index
var i;
for (i = 0; i < s3DataContents.length; i++) {
var head = !program.b ? (i+1) + ': ' : '';
console.log(head + s3DataContents[i].Key);
}
}
}
function s3ListObjects(params, cb) {
s3.listObjects(params, function(err, data) {
if (err) {
console.log("listS3Objects Error:", err);
} else {
var contents = data.Contents;
s3DataContents = s3DataContents.concat(contents);
if (data.IsTruncated) {
// Set Marker to last returned key
params.Marker = contents[contents.length-1].Key;
s3ListObjects(params, cb);
} else {
cb();
}
}
});
}
s3ListObjects(params, s3Print);
Pay attention to listObject's documentation of NextMarker, which is NOT always present in the returned data object, so I don't use it at all in the above code ...
NextMarker — (String) When response is truncated (the IsTruncated
element value in the response is true), you can use the key name in
this field as marker in the subsequent request to get next set of
objects. Amazon S3 lists objects in alphabetical order Note: This
element is returned only if you have delimiter request parameter
specified. If response does not include the NextMarker and it is
truncated, you can use the value of the last Key in the response as
the marker in the subsequent request to get the next set of object
keys.
The entire program has now been pushed to https://github.com/kenklin/s3list.
In fact aws2js supports listing of objects in a bucket on a low level via s3.get() method call. To do it one has to pass prefix parameter which is documented on Amazon S3 REST API page:
var s3 = require('aws2js').load('s3', awsAccessKeyId, awsSecretAccessKey);
s3.setBucket(bucketName);
var folder = encodeURI('some/path/to/S3/folder');
var url = '?prefix=' + folder;
s3.get(url, 'xml', function (error, data) {
console.log(error);
console.log(data);
});
The data variable in the above snippet contains a list of all objects in the bucketName bucket.
Published knox-copy when I couldn't find a good existing solution. Wraps all the pagination details of the Rest API into a familiar node stream:
var knoxCopy = require('knox-copy');
var client = knoxCopy.createClient({
key: '<api-key-here>',
secret: '<secret-here>',
bucket: 'mrbucket'
});
client.streamKeys({
// omit the prefix to list the whole bucket
prefix: 'buckets/of/fun'
}).on('data', function(key) {
console.log(key);
});
If you're listing fewer than 1000 files a single page will work:
client.listPageOfKeys({
prefix: 'smaller/bucket/o/fun'
}, function(err, page) {
console.log(page.Contents); // <- Here's your list of files
});
Meekohi provided a very good answer, but the (new) documentation states that NextMarker can be undefined. When this is the case, you should use the last key as the marker.
So his codesample can be changed into:
var allKeys = [];
function listAllKeys(marker, cb) {
s3.listObjects({Bucket: s3bucket, Marker: marker}, function(err, data){
allKeys.push(data.Contents);
if(data.IsTruncated)
listAllKeys(data.NextMarker || data.Contents[data.Contents.length-1].Key, cb);
else
cb();
});
}
Couldn't comment on the original answer since I don't have the required reputation. Apologies for the bad mark-up btw.
I am using this version with async/await.
This function will return the content in an array.
I'm also using the NextContinuationToken instead of the Marker.
async function getFilesRecursivelySub(param) {
// Call the function to get list of items from S3.
let result = await s3.listObjectsV2(param).promise();
if(!result.IsTruncated) {
// Recursive terminating condition.
return result.Contents;
} else {
// Recurse it if results are truncated.
param.ContinuationToken = result.NextContinuationToken;
return result.Contents.concat(await getFilesRecursivelySub(param));
}
}
async function getFilesRecursively() {
let param = {
Bucket: 'YOUR_BUCKET_NAME'
// Can add more parameters here.
};
return await getFilesRecursivelySub(param);
}
This is an old question and I guess the AWS JS SDK has changed a lot since it was asked. Here's yet another way to do it these days:
s3.listObjects({Bucket:'mybucket', Prefix:'some-pfx'}).
on('success', function handlePage(r) {
//... handle page of contents r.data.Contents
if(r.hasNextPage()) {
// There's another page; handle it
r.nextPage().on('success', handlePage).send();
} else {
// Finished!
}
}).
on('error', function(r) {
// Error!
}).
send();
If you want to get list of keys only within specific folder inside a S3 Bucket then this will be useful.
Basically, listObjects function will start searching from the Marker we set and it will search until maxKeys: 1000 as limit. so it will search one by one folder and get you first 1000 keys it find from different folder in a bucket.
Consider i have many folders inside my bucket with prefix as prod/some date/, Ex: prod/2017/05/12/ ,prod/2017/05/13/,etc.
I want to fetch list of objects (file names) only within prod/2017/05/12/ folder then i will specify prod/2017/05/12/ as my start and prod/2017/05/13/ [your next folder name] as my end and in code i'm breaking the loop when i encounter the end.
Each Keyin data.Contents will look like this.
{ Key: 'prod/2017/05/13/4bf2c675-a417-4c1f-a0b4-22fc45f99207.jpg',
LastModified: 2017-05-13T00:59:02.000Z,
ETag: '"630b2sdfsdfs49ef392bcc16c833004f94ae850"',
Size: 134236366,
StorageClass: 'STANDARD',
Owner: { }
}
Code:
var list = [];
function listAllKeys(s3bucket, start, end) {
s3.listObjects({
Bucket: s3bucket,
Marker: start,
MaxKeys: 1000,
}, function(err, data) {
if (data.Contents) {
for (var i = 0; i < data.Contents.length; i++) {
var key = data.Contents[i].Key; //See above code for the structure of data.Contents
if (key.substring(0, 19) != end) {
list.push(key);
} else {
break; // break the loop if end arrived
}
}
console.log(list);
console.log('Total - ', list.length);
}
});
}
listAllKeys('BucketName', 'prod/2017/05/12/', 'prod/2017/05/13/');
Output:
[ 'prod/2017/05/12/05/4bf2c675-a417-4c1f-a0b4-22fc45f99207.jpg',
'prod/2017/05/12/05/a36528b9-e071-4b83-a7e6-9b32d6bce6d8.jpg',
'prod/2017/05/12/05/bc4d6d4b-4455-48b3-a548-7a714c489060.jpg',
'prod/2017/05/12/05/f4b8d599-80d0-46fa-a996-e73b8fd0cd6d.jpg',
... 689 more items ]
Total - 692
I ended up building a wrapper function around ListObjectsV2, works the same way and takes the same parameters but works recursively until IsTruncated=false and returns all the keys found as an array in the second parameter of the callback function
const AWS = require('aws-sdk')
const s3 = new AWS.S3()
function listAllKeys(params, cb)
{
var keys = []
if(params.data){
keys = keys.concat(params.data)
}
delete params['data']
s3.listObjectsV2(params, function(err, data){
if(err){
cb(err)
} else if (data.IsTruncated) {
params['ContinuationToken'] = data.NextContinuationToken
params['data'] = data.Contents
listAllKeys(params, cb)
} else {
keys = keys.concat(data.Contents)
cb(null,keys)
}
})
}
Here's what I came up with based on the other answers.
You can await listAllKeys() without having to use callbacks.
const listAllKeys = () =>
new Promise((resolve, reject) => {
let allKeys = [];
const list = marker => {
s3.listObjects({ Marker: marker }, (err, data) => {
if (err) {
reject(err);
} else if (data.IsTruncated) {
allKeys.push(data.Contents);
list(data.NextMarker || data.Contents[data.Contents.length - 1].Key);
} else {
allKeys.push(data.Contents);
resolve(allKeys);
}
});
};
list();
});
This assumes you've initialized the s3 variable like so
const s3 = new aws.S3({
apiVersion: API_VERSION,
params: { Bucket: BUCKET_NAME }
});
I made it as simple as possible. You can iterate uploading objects using for loop, it is quite simple, neat and easy to understand.
package required: fs, express-fileupload
server.js :-
router.post('/upload', function(req, res){
if(req.files){
var file = req.files.filename;
test(file);
res.render('test');
}
} );
test function () :-
function test(file){
// upload all
if(file.length){
for(var i =0; i < file.length; i++){
fileUP(file[i]);
}
}else{
fileUP(file);
}
// call fileUP() to upload 1 at once
function fileUP(fyl){
var filename = fyl.name;
var tempPath = './temp'+filename;
fyl.mv(tempPath, function(err){
fs.readFile(tempPath, function(err, data){
var params = {
Bucket: 'BUCKET_NAME',
Body: data,
Key: Date.now()+filename
};
s3.upload(params, function (err, data) {
if (data) {
fs.unlink(tempPath, (err) => {
if (err) {
console.error(err)
return
}
else{
console.log("file removed from temp loaction");
}
});
console.log("Uploaded in:", data.Location);
}
});
});
});
}
}
This should work,
var listAllKeys = async function (token) {
if(token) params.ContinuationToken = token;
return new Promise((resolve, reject) => {
s3.listObjectsV2(params, function (err, data) {
if (err){
reject(err)
}
resolve(data)
});
});
}
var collect_all_files = async function () {
var allkeys = []
conti = true
token = null
while (conti) {
data = await listAllKeys(token)
allkeys = allkeys.concat(data.Contents);
token = data.NextContinuationToken
conti = data.IsTruncated
}
return allkeys
};
Using the new API s3.listObjectsV2 the recursive solution will be:
S3Dataset.prototype.listFiles = function(params,callback) {
var self=this;
var options = {
};
for (var attrname in params) { options[attrname] = params[attrname]; }
var results=[];
var s3=self.s3Store.GetInstance();
function listAllKeys(token, callback) {
var opt={ Bucket: self._options.s3.Bucket, Prefix: self._options.s3.Key, MaxKeys: 1000 };
if(token) opt.ContinuationToken = token;
s3.listObjectsV2(opt, (error, data) => {
if (error) {
if(self.logger) this.logger.error("listFiles error:", error);
return callback(error);
} else {
for (var index in data.Contents) {
var bucket = data.Contents[index];
if(self.logger) self.logger.debug("listFiles Key: %s LastModified: %s Size: %s", bucket.Key, bucket.LastModified, bucket.Size);
if(bucket.Size>0) {
var Bucket=self._options.s3.Bucket;
var Key=bucket.Key;
var components=bucket.Key.split('/');
var name=components[components.length-1];
results.push({
name: name,
path: bucket.Key,
mtime: bucket.LastModified,
size: bucket.Size,
sizehr: formatSizeUnits(bucket.Size)
});
}
}
if( data.IsTruncated ) { // truncated page
return listAllKeys(data.NextContinuationToken, callback);
} else {
return callback(null,results);
}
}
});
}
return listAllKeys.apply(this,['',callback]);
};
where
function formatSizeUnits(bytes){
if (bytes>=1099511627776) {bytes=(bytes/1099511627776).toFixed(4)+' PB';}
else if (bytes>=1073741824) {bytes=(bytes/1073741824).toFixed(4)+' GB';}
else if (bytes>=1048576) {bytes=(bytes/1048576).toFixed(4)+' MB';}
else if (bytes>=1024) {bytes=(bytes/1024).toFixed(4)+' KB';}
else if (bytes>1) {bytes=bytes+' bytes';}
else if (bytes==1) {bytes=bytes+' byte';}
else {bytes='0 byte';}
return bytes;
}//formatSizeUnits
Although #Meekohi's answer does technically work, I've had enough heartache with the S3 portion of the AWS SDK for NodeJS. After all the previous struggling with modules such as aws-sdk, s3, knox, I decided to install s3cmd via the OS package manager and shell-out to it using child_process
Something like:
var s3cmd = new cmd_exec('s3cmd', ['ls', filepath, 's3://'+inputBucket],
function (me, data) {me.stdout += data.toString();},
function (me) {me.exit = 1;}
);
response.send(s3cmd.stdout);
(Using the cmd_exec implementation from this question)
This approach just works really well - including for other problematic things like file upload.
The cleanest way to do it for me was through execution of s3cmd from my node script like this (The example here is to delete files recursively):
var exec = require('child_process').exec;
var child;
var bucket = "myBucket";
var prefix = "myPrefix"; // this parameter is optional
var command = "s3cmd del -r s3://" + bucket + "/" + prefix;
child = exec(command, {maxBuffer: 5000 * 1024}, function (error, stdout, stderr) { // the maxBuffer is here to avoid the maxBuffer node process error
console.log('stdout: ' + stdout);
if (error !== null) {
console.log('exec error: ' + error);
}
});

Resources