Decrypting multiple env. variables in AWS Lambda - node.js

I've got a number of encrypted environmental variables I need to decrypt in an AWS Lambda function. They give an example bit of code, but I'd rather not run a huge chunk for each value I need to decrypt:
const AWS = require('aws-sdk');
const encrypted = process.env['my_password'];
let decrypted;
function processEvent(event, context, callback) {
// TODO handle the event here
}
exports.handler = (event, context, callback) => {
if (decrypted) {
processEvent(event, context, callback);
} else {
// Decrypt code should run once and variables stored outside of the function
// handler so that these are decrypted once per container
const kms = new AWS.KMS();
kms.decrypt({ CiphertextBlob: new Buffer(encrypted, 'base64') }, (err, data) => {
if (err) {
console.log('Decrypt error:', err);
return callback(err);
}
decrypted = data.Plaintext.toString('ascii');
processEvent(event, context, callback);
});
}
};
I'm wondering if the AWS SDK includes a function that lets me decrypt multiple values at once. Failing that, is there a way to elegantly chain these calls together so they don't take up ~75 lines of my otherwise simple function?

You can use promises to achieve this. See the example below for decrypting both a username and password via KMS. You can add as many additional decryption promises to the decryptPromises array as you'd like:
const AWS = require('aws-sdk');
const encrypted = {
username: process.env.username,
password: process.env.password
};
let decrypted = {};
function processEvent(event, context, callback) {
//do work
}
exports.handler = (event, context, callback) => {
if ( decrypted.username && decrypted.password ) {
processEvent(event, context, callback);
} else {
const kms = new AWS.KMS();
const decryptPromises = [
kms.decrypt( { CiphertextBlob: new Buffer(encrypted.username, 'base64') } ).promise(),
kms.decrypt( { CiphertextBlob: new Buffer(encrypted.password, 'base64') } ).promise()
];
Promise.all( decryptPromises ).then( data => {
decrypted.username = data[0].Plaintext.toString('ascii');
decrypted.password = data[1].Plaintext.toString('ascii');
processEvent(event, context, callback);
}).catch( err => {
console.log('Decrypt error:', err);
return callback(err);
});
}
};
You can find more information on how promises have been implimented for the AWS SDK in the Support for Promises in the SDK documentation.

I created a class to decrypt variables in amazon lambda. It uses async await instead of Promises.all. You do not need to import lodash library. You can modifiy the bellow class to not use it (use forEach instead).
var _ = require('lodash/core');
const AWS = require('aws-sdk');
class EnvVarsDecryptor {
constructor(encryptedVariables) {
this.encryptedVariables = encryptedVariables;
this.decrypted = {};
}
isDecrypted() {
return _.every(this.encryptedVariables, (e) => this.decrypted[e] != undefined && this.decrypted[e] != null);
}
async decryptVars() {
const kms = new AWS.KMS();
try {
for ( let index = 0; index < this.encryptedVariables.length; index++) {
const encrypted = this.encryptedVariables[index];
const data = await kms.decrypt({CiphertextBlob: new Buffer(process.env[encrypted], 'base64') }).promise();
this.decrypted[encrypted] = data.Plaintext.toString('ascii');
}
} catch( e) {
console.error(e);
}
return this.decrypted;
}
}
module.exports = EnvVarsDecryptor;
This is a sample illustrating how using the function:
exports.handler = async (event) => {
if (!decryptor.isDecrypted()) {
await decryptor.decryptVars();
}
console.log(decryptor.decrypted);
return `Successfully processed ${event.Records.length} messages.`;
};

Related

How to read JSON from S3 by AWS Lambda Node.js 18.x runtime?

#TBA gave the solution.
The root cause is not by runtime. It came from SDK v3.
Point: Do not update the code with mixed things (like both of runtime & SDK version together 🥲)
Thanks again, TBA.
I was using Node.js 14.x version runtime Lambda to read some json file from S3.
Brief code is below
const AWS = require("aws-sdk");
const s3 = new AWS.S3();
exports.handler = (event) => {
const { bucketName, objKey } = event
const params = {
Bucket: bucketName,
Key: objKey
};
return new Promise((resolve) => {
s3.getObject(params, async (err, data) =>{
if (err) console.log(err, err.stack);
else {
const contents = JSON.parse(data.Body)
resolve(contents);
}
});
})
};
and it returned the json data as I expected.
And today I tried to create a new lambda with runtime Node.js 18.x but it returned null or some errors...
Q) Could you give me some advice to solve this 🥲 ?
+) I used same json file for each lambda
+) Not sure why, but in my case, data.Body.toString() didn't work (I saw some answers in stackoverflow provide that and tried but no lucks)
Thanks in advance!
Case A (returns null)
import { S3Client, GetObjectCommand } from "#aws-sdk/client-s3";
const s3Client = new S3Client({ region: "ap-northeast-2" });
export const handler = (event) => {
const { objKey, bucketName } = event;
const params={
Bucket: bucketName,
Key: objKey
};
const getObjCommand = new GetObjectCommand(params);
return new Promise((resolve) => {
s3Client.send(getObjCommand, async (err, data) =>{
if (err) console.log(err, err.stack);
else {
const list = JSON.parse(data.Body)
resolve(list);
}
});
})
};
Case B (returns "Unexpected token o in JSON at position 1")
export const handler = async (event) => {
const { objKey, bucketName } = event;
const params={
Bucket: bucketName,
Key: objKey
};
const getObjCommand = new GetObjectCommand(params);
const response = await s3Client.send(getObjCommand)
console.log("JSON.parse(response.Body)", JSON.parse(response.Body))
};
Case C (returns "TypeError: Converting circular structure to JSON")
export const handler = async (event) => {
const { objKey, bucketName } = event;
const params={
Bucket: bucketName,
Key: objKey
};
const getObjCommand = new GetObjectCommand(params);
try {
const response = await s3Client.send(getObjCommand)
return JSON.stringify(response.Body)
} catch(err) {
console.log("error", err)
return err
}
};

Dynamodb batchWrite doesn't work in Lambda with Async

batchWrite doesn't work with async in Laqmbda. The code is going to insert one record tho, it can't. However, when I remove async, It works.
const AWS = require("aws-sdk");
const documentClient = new AWS.DynamoDB.DocumentClient();
AWS.config.update({ region: "us-west-2" });
const tableName = "BlrSession-56pfbzohnvdqpac6asb627z2wu-dev";
exports.handler = async (event, context, callback) => {
try {
let games = [];
games.push({
PutRequest: {
Item: {
id: Math.random().toString(36).substring(2) + Date.now().toString(36),
},
},
});
let params = {
RequestItems: {
[tableName]: games,
},
};
documentClient.batchWrite(params, function (err, data) {
if (err) {
callback(err);
} else {
callback(null, data);
}
});
} catch (err) {
return err;
}
};
The result is below. There is no error.
Ensuring latest function changes are built...
Starting execution...
Result:
null
Finished execution.
Have you guys got the same behavior?
You can't combine the callback method with the async/await method. The easiest thing to do here is to make it all async/await (and don't forget the .promise() on the call).
const AWS = require("aws-sdk");
const documentClient = new AWS.DynamoDB.DocumentClient();
AWS.config.update({ region: "us-west-2" });
const tableName = "BlrSession-56pfbzohnvdqpac6asb627z2wu-dev";
exports.handler = async (event, context, callback) => {
try {
let games = [];
games.push({
PutRequest: {
Item: {
id: Math.random().toString(36).substring(2) + Date.now().toString(36),
},
},
});
let params = {
RequestItems: {
[tableName]: games,
},
};
return await documentClient.batchWrite(params).promise();
} catch (err) {
return err;
}
};

Why control reaches to the end of a non-async handler function and then process the rest in NodeJs

Kindly excuse my NodeJS knowledge as I'm still learning. I've following lambda function, which is meant to list the S3 bucket objects. In the function, I've return allKeys, however I noticed its value is always zero and that's because when the function executes, it doesn't wait for function to complete the execution and reaches out to evaluates keys.length and then return to the function which still in process.
Code:
const AWS = require('aws-sdk');
const awsOptions = {
region: "us-east-1"
};
const s3 = new AWS.S3(awsOptions);
exports.handler = (event, context, callback) => {
var totalKeys = 0;
const allKeys = [];
const SrcBucket = event.Records[0].s3.bucket.name;
var bucketParams = {
Bucket: SrcBucket,
Delimiter: '/',
Prefix: 'Temp/'
};
function ListS3ObjectsAsync()
{
s3.listObjects(bucketParams, function (err, data) {
if (err) {
console.log(err, err.stack); // an error occurred
}
else {
var contents = data.Contents;
totalKeys = totalKeys + contents.length;
for (var index in contents) {
allKeys.push(contents[index].Key);
}
console.log(allKeys.length);
if (data.IsTruncated) {
bucketParams.Marker = data.NextMarker;
ListS3ObjectsAsync();
}
}
})
return allKeys;
}
var keys = ListS3ObjectsAsync()
console.log(`Total keys: ${keys.length}`);
};
It's a callback problem. Your code is expecting callbacks to work synchronously, which they don't. The code inside of the function (err, data) callback will always execute after the console.log. It's the nature of the event loop in JavaScript (see here and here for more detail on what the event loop is, why it's important, and how it impacts callbacks and promises).
Changing to an async methodology using promises should help things work:
const AWS = require('aws-sdk');
const awsOptions = { region: 'us-east-1' };
const s3 = new AWS.S3(awsOptions);
exports.handler = async (event, context, callback) => {
let totalKeys = 0;
const keys = [];
const SrcBucket = event.Records[0].s3.bucket.name;
const bucketParams = {
Bucket: SrcBucket,
Delimiter: '/',
Prefix: 'Temp/'
};
let data;
do {
bucketParams.Marker = (data && data.NextMarker) ? data.NextMarker : undefined;
data = await s3.listObjects(bucketParams).promise();
const contents = data.Contents;
totalKeys = totalKeys + contents.length;
keys.push(...contents.map(x => x.Key));
} while (data.IsTruncated);
console.log(`Total keys: ${keys.length}`);
return keys;
};
Explanation:
making the exports.handler = async ... marks the function as async, which means it will return a promise. That means when calling it, you need to either use the same async/await syntax, or you need to use a .then(keys=> { /*...*/ }); so that you'll get access to the keys.
since we're in an async, the await keyword on the s3.listObjects tells it to wait for that asynchonous function to complete before going on. Keeps the code simpler.
EDIT: since OP is having problems with the sdk's .promise(), I'm including a promisified version of listObjects.
const listObjects = req => new Promise((resolve, reject) => s3.listObjects(req, (err, data) => {
if (err) reject(err);
else resolve(data);
}));
This would be included at the top with the require statements, and then the only modification in the exports.handler code would be:
data = await listObjects(bucketParams);

Decrypt text with AWS KMS in NodeJs

I am trying to decrypt some text encrypted with AWS KMS using aws-sdk and NodeJs. I started to play today with NodeJs so I am a newbie with it.
I have this problem resolved with Java but I am trying to migrate an existing Alexa skill from Java to NodeJs.
The code to decrypt is:
function decrypt(buffer) {
const kms = new aws.KMS({
accessKeyId: 'accessKeyId',
secretAccessKey: 'secretAccessKey',
region: 'eu-west-1'
});
return new Promise((resolve, reject) => {
let params = {
"CiphertextBlob" : buffer,
};
kms.decrypt(params, (err, data) => {
if (err) {
reject(err);
} else {
resolve(data.Plaintext);
}
});
});
};
When I run this code with a correct CiphertextBlob, I get this error:
Promise {
<rejected> { MissingRequiredParameter: Missing required key 'CiphertextBlob' in params
at ParamValidator.fail (D:\Developing\abono-transportes-js\node_modules\aws-sdk\lib\param_validator.js:50:37)
at ParamValidator.validateStructure (D:\Developing\abono-transportes-js\node_modules\aws-sdk\lib\param_validator.js:61:14)
at ParamValidator.validateMember (D:\Developing\abono-transportes-js\node_modules\aws-sdk\lib\param_validator.js:88:21)
at ParamValidator.validate (D:\Developing\abono-transportes-js\node_modules\aws-sdk\lib\param_validator.js:34:10)
at Request.VALIDATE_PARAMETERS (D:\Developing\abono-transportes-js\node_modules\aws-sdk\lib\event_listeners.js:126:42)
at Request.callListeners (D:\Developing\abono-transportes-js\node_modules\aws-sdk\lib\sequential_executor.js:106:20)
at callNextListener (D:\Developing\abono-transportes-js\node_modules\aws-sdk\lib\sequential_executor.js:96:12)
at D:\Developing\abono-transportes-js\node_modules\aws-sdk\lib\event_listeners.js:86:9
at finish (D:\Developing\abono-transportes-js\node_modules\aws-sdk\lib\config.js:349:7)
at D:\Developing\abono-transportes-js\node_modules\aws-sdk\lib\config.js:367:9
message: 'Missing required key \'CiphertextBlob\' in params',
code: 'MissingRequiredParameter',
time: 2019-06-30T20:29:18.890Z } }
I don't understand why I am receiving that if CiphertextBlob is in the params variable.
Anyone knows?
Thanks in advance!
EDIT 01/07
Test to code the feature:
First function:
const CheckExpirationDateHandler = {
canHandle(handlerInput) {
return handlerInput.requestEnvelope.request.type === 'IntentRequest'
&& handlerInput.requestEnvelope.request.intent.name === 'TtpConsultaIntent';
},
handle(handlerInput) {
var fecha = "";
var speech = "";
userData = handlerInput.attributesManager.getSessionAttributes();
if (Object.keys(userData).length === 0) {
speech = consts.No_Card_Registered;
} else {
console.log("Retrieving expiration date from 3rd API");
fecha = crtm.expirationDate(cipher.decrypt(userData.code.toString()));
speech = "Tu abono caducará el " + fecha;
}
return handlerInput.responseBuilder
.speak(speech)
.shouldEndSession(true)
.getResponse();
}
}
Decrypt function provided with a log:
// source is plaintext
async function decrypt(source) {
console.log("Decrypt func INPUT: " + source)
const params = {
CiphertextBlob: Buffer.from(source, 'base64'),
};
const { Plaintext } = await kms.decrypt(params).promise();
return Plaintext.toString();
};
Output:
2019-07-01T19:01:12.814Z 38b45272-809d-4c84-b155-928bee61a4f8 INFO Retrieving expiration date from 3rd API
2019-07-01T19:01:12.814Z 38b45272-809d-4c84-b155-928bee61a4f8 INFO Decrypt func INPUT:
AYADeHK9xoVE19u/3vBTiug3LuYAewACABVhd3MtY3J5cHRvLXB1YmxpYy1rZXkAREF4UW0rcW5PSElnY1ZnZ2l1bHQ2bzc3ZnFLZWZMM2J6YWJEdnFCNVNGNzEyZGVQZ1dXTDB3RkxsdDJ2dFlRaEY4UT09AA10dHBDYXJkTnVtYmVyAAt0aXRsZU51bWJlcgABAAdhd3Mta21zAEthcm46YXdzOmttczpldS13ZXN0LTE6MjQwMTE3MzU1MTg4OmtleS81YTRkNmFmZS03MzkxLTRkMDQtYmUwYi0zZDJlMWRhZTRkMmIAuAECAQB4sE8Iv75TZ0A9b/ila9Yi/3vTSja3wM7mN/B0ThqiHZEBxYsoWpX7jCqHMoeoYOkVtAAAAH4wfAYJKoZIhvcNAQcGoG8wbQIBADBoBgkqhkiG9w0BBwEwHgYJYIZIAWUDBAEuMBEEDNnGIwghz+b42E07KAIBEIA76sV3Gmp5ib99S9H4MnY0d1l............
2019-07-01T19:01:12.925Z 38b45272-809d-4c84-b155-928bee61a4f8 INFO Error
handled: handlerInput.responseBuilder.speak(...).shouldEndSession is
not a function
2019-07-01T19:01:13.018Z 38b45272-809d-4c84-b155-928bee61a4f8 ERROR Unhandled Promise
Rejection {"errorType":"Runtime.UnhandledPromiseRejection","errorMessage":"InvalidCiphertextException:
null","stack":["Runtime.UnhandledPromiseRejection:
InvalidCiphertextException: null","...
That either means you're missing key 'CiphertextBlob' or its value is undefined.
Please checkout the value you're passing in as buffer.
For reference, I also added my working code example that I used.
import { KMS } from 'aws-sdk';
import config from '../config';
const kms = new KMS({
accessKeyId: config.aws.accessKeyId,
secretAccessKey: config.aws.secretAccessKey,
region: config.aws.region,
});
// source is plaintext
async function encrypt(source) {
const params = {
KeyId: config.aws.kmsKeyId,
Plaintext: source,
};
const { CiphertextBlob } = await kms.encrypt(params).promise();
// store encrypted data as base64 encoded string
return CiphertextBlob.toString('base64');
}
// source is plaintext
async function decrypt(source) {
const params = {
CiphertextBlob: Buffer.from(source, 'base64'),
};
const { Plaintext } = await kms.decrypt(params).promise();
return Plaintext.toString();
}
export default {
encrypt,
decrypt,
};
----- ADDED -----
I was able to reproduce your issue.
decrypt("this text has never been encrypted before!");
This code throws same error.
So if you pass plain text that has never been encrypted before or has been encrypted with different key, it throws InvalidCiphertextException: null.
Now I'll give you one usage example.
encrypt("hello world!") // this will return base64 encoded string
.then(decrypt) // this one accepts encrypted string
.then(decoded => console.log(decoded)); // hello world!
I kept on getting this error in my AWS lambda when trying the accepted solution, using AWS KMS over an environment variable I had encrypted by using AWS user interface.
It worked for me with this code adapted from the AWS official solution:
decrypt.js
const AWS = require('aws-sdk');
AWS.config.update({ region: 'us-east-1' });
module.exports = async (env) => {
const functionName = process.env.AWS_LAMBDA_FUNCTION_NAME;
const encrypted = process.env[env];
if (!process.env[env]) {
throw Error(`Environment variable ${env} not found`)
}
const kms = new AWS.KMS();
try {
const data = await kms.decrypt({
CiphertextBlob: Buffer.from(process.env[env], 'base64'),
EncryptionContext: { LambdaFunctionName: functionName },
}).promise();
console.info(`Environment variable ${env} decrypted`)
return data.Plaintext.toString('ascii');
} catch (err) {
console.log('Decryption error:', err);
throw err;
}
}
To be used like this:
index.js
const decrypt = require("./decrypt.js")
exports.handler = async (event, context, callback) => {
console.log(await decrypt("MY_CRYPTED_ENVIRONMENT_VARIABLE"))
}
EncryptionContext is a must for this to work.
Let's say the name of EnvironmentVariable is Secret
The code below reads the EnvironmentVariable called Secret and returns decrypted secret as plain text in the body.
Please see the function code posted below
'use strict';
const aws = require('aws-sdk');
var kms = new aws.KMS();
exports.handler = (event, context, callback) => {
const functionName = process.env.AWS_LAMBDA_FUNCTION_NAME;
const encryptedSecret = process.env.Secret;
kms.decrypt({
CiphertextBlob: new Buffer(encryptedSecret, 'base64'),
EncryptionContext: {
LambdaFunctionName: functionName /*Providing the name of the function as the Encryption Context is a must*/
},
},
(err, data) => {
if (err) {
/*Handle the error please*/
}
var decryptedSecret = data.Plaintext.toString('ascii');
callback(null, {
statusCode: 200,
body: decryptedSecret,
headers: {
'Content-Type': 'application/json',
},
});
});
};

Node.js & Amazon S3: How to iterate through all files in a bucket?

Is there any Amazon S3 client library for Node.js that allows listing of all files in S3 bucket?
The most known aws2js and knox don't seem to have this functionality.
Using the official aws-sdk:
var allKeys = [];
function listAllKeys(marker, cb)
{
s3.listObjects({Bucket: s3bucket, Marker: marker}, function(err, data){
allKeys.push(data.Contents);
if(data.IsTruncated)
listAllKeys(data.NextMarker, cb);
else
cb();
});
}
see s3.listObjects
Edit 2017:
Same basic idea, but listObjectsV2( ... ) is now recommended and uses a ContinuationToken (see s3.listObjectsV2):
var allKeys = [];
function listAllKeys(token, cb)
{
var opts = { Bucket: s3bucket };
if(token) opts.ContinuationToken = token;
s3.listObjectsV2(opts, function(err, data){
allKeys = allKeys.concat(data.Contents);
if(data.IsTruncated)
listAllKeys(data.NextContinuationToken, cb);
else
cb();
});
}
Using AWS-SDK v3 and Typescript
import {
paginateListObjectsV2,
S3Client,
S3ClientConfig,
} from '#aws-sdk/client-s3';
/* // For Deno
import {
paginateListObjectsV2,
S3Client,
S3ClientConfig,
} from "https://deno.land/x/aws_sdk#v3.32.0-1/client-s3/mod.ts"; */
const s3Config: S3ClientConfig = {
credentials: {
accessKeyId: 'accessKeyId',
secretAccessKey: 'secretAccessKey',
},
region: 'us-east-1',
};
const getAllS3Files = async (client: S3Client, s3Opts) => {
const totalFiles = [];
for await (const data of paginateListObjectsV2({ client }, s3Opts)) {
totalFiles.push(...(data.Contents ?? []));
}
return totalFiles;
};
const main = async () => {
const client = new S3Client(s3Config);
const s3Opts = { Bucket: 'bucket-xyz' };
console.log(await getAllS3Files(client, s3Opts));
};
main();
For AWS-SDK v2 Using Async Generator
Import S3
const { S3 } = require('aws-sdk');
const s3 = new S3();
create a generator function to retrieve all the files list
async function* listAllKeys(opts) {
opts = { ...opts };
do {
const data = await s3.listObjectsV2(opts).promise();
opts.ContinuationToken = data.NextContinuationToken;
yield data;
} while (opts.ContinuationToken);
}
Prepare aws parameter, based on api docs
const opts = {
Bucket: 'bucket-xyz' /* required */,
// ContinuationToken: 'STRING_VALUE',
// Delimiter: 'STRING_VALUE',
// EncodingType: url,
// FetchOwner: true || false,
// MaxKeys: 'NUMBER_VALUE',
// Prefix: 'STRING_VALUE',
// RequestPayer: requester,
// StartAfter: 'STRING_VALUE'
};
Use generator
async function main() {
// using for of await loop
for await (const data of listAllKeys(opts)) {
console.log(data.Contents);
}
}
main();
thats it
Or Lazy Load
async function main() {
const keys = listAllKeys(opts);
console.log(await keys.next());
// {value: {…}, done: false}
console.log(await keys.next());
// {value: {…}, done: false}
console.log(await keys.next());
// {value: undefined, done: true}
}
main();
Or Use generator to make Observable function
const lister = (opts) => (o$) => {
let needMore = true;
const process = async () => {
for await (const data of listAllKeys(opts)) {
o$.next(data);
if (!needMore) break;
}
o$.complete();
};
process();
return () => (needMore = false);
};
use this observable function with RXJS
// Using Rxjs
const { Observable } = require('rxjs');
const { flatMap } = require('rxjs/operators');
function listAll() {
return Observable.create(lister(opts))
.pipe(flatMap((v) => v.Contents))
.subscribe(console.log);
}
listAll();
or use this observable function with Nodejs EventEmitter
const EventEmitter = require('events');
const _eve = new EventEmitter();
async function onData(data) {
// will be called for each set of data
console.log(data);
}
async function onError(error) {
// will be called if any error
console.log(error);
}
async function onComplete() {
// will be called when data completely received
}
_eve.on('next', onData);
_eve.on('error', onError);
_eve.on('complete', onComplete);
const stop = lister(opts)({
next: (v) => _eve.emit('next', v),
error: (e) => _eve.emit('error', e),
complete: (v) => _eve.emit('complete', v),
});
Here's Node code I wrote to assemble the S3 objects from truncated lists.
var params = {
Bucket: <yourbucket>,
Prefix: <yourprefix>,
};
var s3DataContents = []; // Single array of all combined S3 data.Contents
function s3Print() {
if (program.al) {
// --al: Print all objects
console.log(JSON.stringify(s3DataContents, null, " "));
} else {
// --b: Print key only, otherwise also print index
var i;
for (i = 0; i < s3DataContents.length; i++) {
var head = !program.b ? (i+1) + ': ' : '';
console.log(head + s3DataContents[i].Key);
}
}
}
function s3ListObjects(params, cb) {
s3.listObjects(params, function(err, data) {
if (err) {
console.log("listS3Objects Error:", err);
} else {
var contents = data.Contents;
s3DataContents = s3DataContents.concat(contents);
if (data.IsTruncated) {
// Set Marker to last returned key
params.Marker = contents[contents.length-1].Key;
s3ListObjects(params, cb);
} else {
cb();
}
}
});
}
s3ListObjects(params, s3Print);
Pay attention to listObject's documentation of NextMarker, which is NOT always present in the returned data object, so I don't use it at all in the above code ...
NextMarker — (String) When response is truncated (the IsTruncated
element value in the response is true), you can use the key name in
this field as marker in the subsequent request to get next set of
objects. Amazon S3 lists objects in alphabetical order Note: This
element is returned only if you have delimiter request parameter
specified. If response does not include the NextMarker and it is
truncated, you can use the value of the last Key in the response as
the marker in the subsequent request to get the next set of object
keys.
The entire program has now been pushed to https://github.com/kenklin/s3list.
In fact aws2js supports listing of objects in a bucket on a low level via s3.get() method call. To do it one has to pass prefix parameter which is documented on Amazon S3 REST API page:
var s3 = require('aws2js').load('s3', awsAccessKeyId, awsSecretAccessKey);
s3.setBucket(bucketName);
var folder = encodeURI('some/path/to/S3/folder');
var url = '?prefix=' + folder;
s3.get(url, 'xml', function (error, data) {
console.log(error);
console.log(data);
});
The data variable in the above snippet contains a list of all objects in the bucketName bucket.
Published knox-copy when I couldn't find a good existing solution. Wraps all the pagination details of the Rest API into a familiar node stream:
var knoxCopy = require('knox-copy');
var client = knoxCopy.createClient({
key: '<api-key-here>',
secret: '<secret-here>',
bucket: 'mrbucket'
});
client.streamKeys({
// omit the prefix to list the whole bucket
prefix: 'buckets/of/fun'
}).on('data', function(key) {
console.log(key);
});
If you're listing fewer than 1000 files a single page will work:
client.listPageOfKeys({
prefix: 'smaller/bucket/o/fun'
}, function(err, page) {
console.log(page.Contents); // <- Here's your list of files
});
Meekohi provided a very good answer, but the (new) documentation states that NextMarker can be undefined. When this is the case, you should use the last key as the marker.
So his codesample can be changed into:
var allKeys = [];
function listAllKeys(marker, cb) {
s3.listObjects({Bucket: s3bucket, Marker: marker}, function(err, data){
allKeys.push(data.Contents);
if(data.IsTruncated)
listAllKeys(data.NextMarker || data.Contents[data.Contents.length-1].Key, cb);
else
cb();
});
}
Couldn't comment on the original answer since I don't have the required reputation. Apologies for the bad mark-up btw.
I am using this version with async/await.
This function will return the content in an array.
I'm also using the NextContinuationToken instead of the Marker.
async function getFilesRecursivelySub(param) {
// Call the function to get list of items from S3.
let result = await s3.listObjectsV2(param).promise();
if(!result.IsTruncated) {
// Recursive terminating condition.
return result.Contents;
} else {
// Recurse it if results are truncated.
param.ContinuationToken = result.NextContinuationToken;
return result.Contents.concat(await getFilesRecursivelySub(param));
}
}
async function getFilesRecursively() {
let param = {
Bucket: 'YOUR_BUCKET_NAME'
// Can add more parameters here.
};
return await getFilesRecursivelySub(param);
}
This is an old question and I guess the AWS JS SDK has changed a lot since it was asked. Here's yet another way to do it these days:
s3.listObjects({Bucket:'mybucket', Prefix:'some-pfx'}).
on('success', function handlePage(r) {
//... handle page of contents r.data.Contents
if(r.hasNextPage()) {
// There's another page; handle it
r.nextPage().on('success', handlePage).send();
} else {
// Finished!
}
}).
on('error', function(r) {
// Error!
}).
send();
If you want to get list of keys only within specific folder inside a S3 Bucket then this will be useful.
Basically, listObjects function will start searching from the Marker we set and it will search until maxKeys: 1000 as limit. so it will search one by one folder and get you first 1000 keys it find from different folder in a bucket.
Consider i have many folders inside my bucket with prefix as prod/some date/, Ex: prod/2017/05/12/ ,prod/2017/05/13/,etc.
I want to fetch list of objects (file names) only within prod/2017/05/12/ folder then i will specify prod/2017/05/12/ as my start and prod/2017/05/13/ [your next folder name] as my end and in code i'm breaking the loop when i encounter the end.
Each Keyin data.Contents will look like this.
{ Key: 'prod/2017/05/13/4bf2c675-a417-4c1f-a0b4-22fc45f99207.jpg',
LastModified: 2017-05-13T00:59:02.000Z,
ETag: '"630b2sdfsdfs49ef392bcc16c833004f94ae850"',
Size: 134236366,
StorageClass: 'STANDARD',
Owner: { }
}
Code:
var list = [];
function listAllKeys(s3bucket, start, end) {
s3.listObjects({
Bucket: s3bucket,
Marker: start,
MaxKeys: 1000,
}, function(err, data) {
if (data.Contents) {
for (var i = 0; i < data.Contents.length; i++) {
var key = data.Contents[i].Key; //See above code for the structure of data.Contents
if (key.substring(0, 19) != end) {
list.push(key);
} else {
break; // break the loop if end arrived
}
}
console.log(list);
console.log('Total - ', list.length);
}
});
}
listAllKeys('BucketName', 'prod/2017/05/12/', 'prod/2017/05/13/');
Output:
[ 'prod/2017/05/12/05/4bf2c675-a417-4c1f-a0b4-22fc45f99207.jpg',
'prod/2017/05/12/05/a36528b9-e071-4b83-a7e6-9b32d6bce6d8.jpg',
'prod/2017/05/12/05/bc4d6d4b-4455-48b3-a548-7a714c489060.jpg',
'prod/2017/05/12/05/f4b8d599-80d0-46fa-a996-e73b8fd0cd6d.jpg',
... 689 more items ]
Total - 692
I ended up building a wrapper function around ListObjectsV2, works the same way and takes the same parameters but works recursively until IsTruncated=false and returns all the keys found as an array in the second parameter of the callback function
const AWS = require('aws-sdk')
const s3 = new AWS.S3()
function listAllKeys(params, cb)
{
var keys = []
if(params.data){
keys = keys.concat(params.data)
}
delete params['data']
s3.listObjectsV2(params, function(err, data){
if(err){
cb(err)
} else if (data.IsTruncated) {
params['ContinuationToken'] = data.NextContinuationToken
params['data'] = data.Contents
listAllKeys(params, cb)
} else {
keys = keys.concat(data.Contents)
cb(null,keys)
}
})
}
Here's what I came up with based on the other answers.
You can await listAllKeys() without having to use callbacks.
const listAllKeys = () =>
new Promise((resolve, reject) => {
let allKeys = [];
const list = marker => {
s3.listObjects({ Marker: marker }, (err, data) => {
if (err) {
reject(err);
} else if (data.IsTruncated) {
allKeys.push(data.Contents);
list(data.NextMarker || data.Contents[data.Contents.length - 1].Key);
} else {
allKeys.push(data.Contents);
resolve(allKeys);
}
});
};
list();
});
This assumes you've initialized the s3 variable like so
const s3 = new aws.S3({
apiVersion: API_VERSION,
params: { Bucket: BUCKET_NAME }
});
I made it as simple as possible. You can iterate uploading objects using for loop, it is quite simple, neat and easy to understand.
package required: fs, express-fileupload
server.js :-
router.post('/upload', function(req, res){
if(req.files){
var file = req.files.filename;
test(file);
res.render('test');
}
} );
test function () :-
function test(file){
// upload all
if(file.length){
for(var i =0; i < file.length; i++){
fileUP(file[i]);
}
}else{
fileUP(file);
}
// call fileUP() to upload 1 at once
function fileUP(fyl){
var filename = fyl.name;
var tempPath = './temp'+filename;
fyl.mv(tempPath, function(err){
fs.readFile(tempPath, function(err, data){
var params = {
Bucket: 'BUCKET_NAME',
Body: data,
Key: Date.now()+filename
};
s3.upload(params, function (err, data) {
if (data) {
fs.unlink(tempPath, (err) => {
if (err) {
console.error(err)
return
}
else{
console.log("file removed from temp loaction");
}
});
console.log("Uploaded in:", data.Location);
}
});
});
});
}
}
This should work,
var listAllKeys = async function (token) {
if(token) params.ContinuationToken = token;
return new Promise((resolve, reject) => {
s3.listObjectsV2(params, function (err, data) {
if (err){
reject(err)
}
resolve(data)
});
});
}
var collect_all_files = async function () {
var allkeys = []
conti = true
token = null
while (conti) {
data = await listAllKeys(token)
allkeys = allkeys.concat(data.Contents);
token = data.NextContinuationToken
conti = data.IsTruncated
}
return allkeys
};
Using the new API s3.listObjectsV2 the recursive solution will be:
S3Dataset.prototype.listFiles = function(params,callback) {
var self=this;
var options = {
};
for (var attrname in params) { options[attrname] = params[attrname]; }
var results=[];
var s3=self.s3Store.GetInstance();
function listAllKeys(token, callback) {
var opt={ Bucket: self._options.s3.Bucket, Prefix: self._options.s3.Key, MaxKeys: 1000 };
if(token) opt.ContinuationToken = token;
s3.listObjectsV2(opt, (error, data) => {
if (error) {
if(self.logger) this.logger.error("listFiles error:", error);
return callback(error);
} else {
for (var index in data.Contents) {
var bucket = data.Contents[index];
if(self.logger) self.logger.debug("listFiles Key: %s LastModified: %s Size: %s", bucket.Key, bucket.LastModified, bucket.Size);
if(bucket.Size>0) {
var Bucket=self._options.s3.Bucket;
var Key=bucket.Key;
var components=bucket.Key.split('/');
var name=components[components.length-1];
results.push({
name: name,
path: bucket.Key,
mtime: bucket.LastModified,
size: bucket.Size,
sizehr: formatSizeUnits(bucket.Size)
});
}
}
if( data.IsTruncated ) { // truncated page
return listAllKeys(data.NextContinuationToken, callback);
} else {
return callback(null,results);
}
}
});
}
return listAllKeys.apply(this,['',callback]);
};
where
function formatSizeUnits(bytes){
if (bytes>=1099511627776) {bytes=(bytes/1099511627776).toFixed(4)+' PB';}
else if (bytes>=1073741824) {bytes=(bytes/1073741824).toFixed(4)+' GB';}
else if (bytes>=1048576) {bytes=(bytes/1048576).toFixed(4)+' MB';}
else if (bytes>=1024) {bytes=(bytes/1024).toFixed(4)+' KB';}
else if (bytes>1) {bytes=bytes+' bytes';}
else if (bytes==1) {bytes=bytes+' byte';}
else {bytes='0 byte';}
return bytes;
}//formatSizeUnits
Although #Meekohi's answer does technically work, I've had enough heartache with the S3 portion of the AWS SDK for NodeJS. After all the previous struggling with modules such as aws-sdk, s3, knox, I decided to install s3cmd via the OS package manager and shell-out to it using child_process
Something like:
var s3cmd = new cmd_exec('s3cmd', ['ls', filepath, 's3://'+inputBucket],
function (me, data) {me.stdout += data.toString();},
function (me) {me.exit = 1;}
);
response.send(s3cmd.stdout);
(Using the cmd_exec implementation from this question)
This approach just works really well - including for other problematic things like file upload.
The cleanest way to do it for me was through execution of s3cmd from my node script like this (The example here is to delete files recursively):
var exec = require('child_process').exec;
var child;
var bucket = "myBucket";
var prefix = "myPrefix"; // this parameter is optional
var command = "s3cmd del -r s3://" + bucket + "/" + prefix;
child = exec(command, {maxBuffer: 5000 * 1024}, function (error, stdout, stderr) { // the maxBuffer is here to avoid the maxBuffer node process error
console.log('stdout: ' + stdout);
if (error !== null) {
console.log('exec error: ' + error);
}
});

Resources