Node.js execution order - node.js

I am quite new to Node.js and I am trying to load json configs files stored either in Amazon s3 or local repository. Below is my code so far:
var cfg = process.env.CONFIG_FILE_NAME;
log.info("Loading config '%s'", cfg);
if(cfg.indexOf("s3") !== -1 || cfg.indexOf("S3") !== -1) {
log.info("S3 path detected");
var s3 = new aws.S3();
var myRegex = /\/\/(\w*)\/(.*)/g;
var matched = myRegex.exec(cfg);
var bucket = matched[1];
log.info("Extracted bucket: ", bucket);
var key = matched[2];
log.info("Extracted key: ", key);
var params = {
Bucket: bucket,
Key: key
};
s3.getObject(params, function(err, data) {
if (err) log.warn(err, err.stack);
else {
log.info("Loaded config from S3");
cfg = JSON.parse(data.Body);
log.info("Config content: "cfg);
}
});
}
else {
try {
//some code here
} catch (e) {
//some code here
}
}
subscriptions = cfg.subscriptions;
log.info("This supposes to contain json content from S3: ", cfg);
The idea is that the code will check if there is a path to S3 in the message sent to Amazon Lambda (CONFIG_FILE_NAME field). If it exists, then the code load the config file from s3, otherwise, it loads locally. However, when I try to run the code, it returns something like this:
4 Jan 11:37:34 - [INFO] Loading config 'Path-to-S3'
4 Jan 11:37:34 - [INFO] S3 path detected
4 Jan 11:37:34 - [INFO] Extracted bucket: mybucket
4 Jan 11:37:34 - [INFO] Extracted key: mykey.cfg.json
4 Jan 11:37:34 - [INFO] "This suppose to contain json content from S3: Path-to-S3'
4 Jan 11:37:34 - [INFO] Loaded config from S3
4 Jan 11:37:34 - [INFO] Config content: my-config-content
So the problem is that, the code executes the line subscriptions = cfg.subscriptions; before the config file is loaded from S3. The variable cfg at this line only contains the path to the config, not the config content I want to load from S3. My later code implementation depends on this subscriptions field from cfg file so it stucks right here.

You can use async module to make your code work. (npm install --save async)
Async is a utility module which provides straight-forward, powerful functions for working with asynchronous JavaScript.
var async = require("async");
//or you can just use var waterfall = require("async/waterfall");
var cfg = process.env.CONFIG_FILE_NAME;
log.info("Loading config '%s'", cfg);
async.waterfall([
function (callback) {
if (cfg.indexOf("s3") !== -1 || cfg.indexOf("S3") !== -1) {
log.info("S3 path detected");
var s3 = new aws.S3();
var myRegex = /\/\/(\w*)\/(.*)/g;
var matched = myRegex.exec(cfg);
var bucket = matched[1];
log.info("Extracted bucket: ", bucket);
var key = matched[2];
log.info("Extracted key: ", key);
var params = {
Bucket: bucket,
Key : key
};
s3.getObject(params, function (err, data) {
if (err) {
log.warn(err, err.stack);
callback();
}
else {
log.info("Loaded config from S3");
cfg = JSON.parse(data.Body);
log.info("Config content: ", cfg);
callback(null, cfg);
}
});
}
else {
callback();
}
},
function (cfg, callback) {
try {
//some code here
}
catch (e) {
//some code here
}
var subscriptions = cfg.subscriptions;
log.info("This supposes to contain json content from S3: ", cfg);
callback(null, 'done');
}
], function (err, result) {
// result now equals 'done'
});

Related

Read data from .xlsx file on S3 using Nodejs Lambda

I'm still new in NodeJs and AWS, so forgive me if this is a noob question.
I am trying to read the data from an excel file (.xlsx). The lambda function receives the extension of the file type.
Here is my code:
exports.handler = async (event, context, callback) => {
console.log('Received event:', JSON.stringify(event, null, 2));
if (event.fileExt === undefined) {
callback("400 Invalid Input");
}
let returnData = "";
const S3 = require('aws-sdk/clients/s3');
const s3 = new S3();
switch(event.fileExt)
{
case "plain":
case "txt":
// Extract text
const params = {Bucket: 'filestation', Key: 'MyTXT.'+event.fileExt};
try {
await s3.getObject(params, function(err, data) {
if (err) console.log(err, err.stack); // an error occurred
else{ // successful response
returnData = data.Body.toString('utf-8');
context.done(null, returnData);
}
}).promise();
} catch (error) {
console.log(error);
return;
}
break;
case "xls":
case "xlsx":
returnData = "Excel";
// Extract text
const params2 = {Bucket: 'filestation', Key: 'MyExcel.'+event.fileExt};
const readXlsxFile = require("read-excel-file/node");
try {
const doc = await s3.getObject(params2);
const parsedDoc = await readXlsxFile(doc);
console.log(parsedDoc)
} catch (err) {
console.log(err);
const message = `Error getting object.`;
console.log(message);
throw new Error(message);
}
break;
case "docx":
returnData = "Word doc";
// Extract text
break;
default:
callback("400 Invalid Operator");
break;
}
callback(null, returnData);
};
The textfile part works. But the xlsx part makes the function time out.
I did install the read-excel-file dependency and uploaded the zip so that I have access to it.
But the function times out with this message:
"errorMessage": "2020-11-02T13:06:50.948Z 120bfb48-f29c-4e3f-9507-fc88125515fd Task timed out after 3.01 seconds"
Any help would be appreciated! Thanks for your time.
using the xlsx npm library. here's how we did it.
assuming the file is under the root project path.
const xlsx = require('xlsx');
// read your excel file
let readFile = xlsx.readFile('file_example_XLSX_5000.xlsx')
// get first-sheet's name
let sheetName = readFile.SheetNames[0];
// convert sheets to JSON. Best if sheet has a headers specified.
console.log(xlsx.utils.sheet_to_json(readFile.Sheets[sheetName]));
You need to install xlsx (SheetJs) library into the project:
npm install xlsx
and then import the "read" function into the lambda, get the s3 object's body and send to xlsx like this:
const { read } = require('sheetjs-style');
const aws = require('aws-sdk');
const s3 = new aws.S3({ apiVersion: '2006-03-01' });
exports.handler = async (event) => {
const bucketName = 'excel-files';
const fileKey = 'Demo Data.xlsx';
// Simple GetObject
let file = await s3.getObject({Bucket: bucketName, Key: fileKey}).promise();
const wb = read(file.Body);
const response = {
statusCode: 200,
body: JSON.stringify({
read: wb.Sheets,
}),
};
return response;
};
(of course, you can receive the bucket and filekey from parameters if you send them...)
Very Important: Use the READ (not the readFile) function and send the Body property (with capital "B") as a paremeter
I changed the timeout to 20 seconds and it works. Only one issue remains: const parsedDoc = await readXlsxFile(doc); wants to receive a string (filepath) and not a file.
Solved by using xlsx NPM library. Using a stream and giving it buffers.

Download pdf files from external url's - Heroku, NodeJS, Angular 7

I am trying to download multiple pdf files from external sources to my nodejs server (in Heroku) temporarily and upload it to AWS S3 bucket.
I have tried multiple methods all of which works fine in my local machine but not in Heroku Dyno NodeJS Server. I am unable to even create folder in Heroku. I guess due to limited permission.
In Node
1) using var download = require('download-file') (using this currently in below code)
2) axios
3) res.download()
Download Files Code
const downloadFiles = async (unique_files) => {
for (let index = 0; index < unique_files.length; index++) {
let file_ext = unique_files[index].substr(unique_files[index].length - 4);
if(file_ext == ".pdf") {
await downloadzz(unique_files[index])
}
}
}
function downloadzz(link) {
download(link, function(err){
if (err) throw err
console.log("DOWNLOAD Complete");
});
}
Upload Files Code
const uploadFiles = async (unique_files) => {
for (let index = 0; index < unique_files.length; index++) {
let file_ext = unique_files[index].substr(unique_files[index].length - 4);
if(file_ext == ".pdf") {
await uploadzz(unique_files[index])
}
}
}
function uploadzz(link) {
fs.readFile(require('path').resolve(__dirname+'/../external-pdfs/', link.slice(link.lastIndexOf('/') + 1)), function (err, data) {
params = {Bucket: pdfBucket, Key: link.slice(link.lastIndexOf('/') + 1), Body: data, ACL: "public-read" };
s3.putObject(params, function(err, data) {
if (err) {
console.log("Failed Upload", err);
} else {
console.log("Successfully uploaded data to bucket", data);
}
});
});
}
I don't get any error but no folder seem to exist with a name external-pdfs on heroku server.
I am open for better solutions: for example, directly uploading file from external url to s3...
How can I in read file from a external url and directly upload to AWS S3 bucket?
You can use axios. Setting the responseType as stream, you can get the file data and pass it as the body. Here it's an example code to get the pdf from a URL and uploading its info directly to S3:
const AWS = require('aws-sdk');
const axios = require('axios');
AWS.config.loadFromPath('./config.json');
const s3 = new AWS.S3({apiVersion: '2006-03-01'});
const URL = "<YOUR_URL>";
const uploadPdfToS3 = async () => {
try{
const {data, headers} = await axios.get(URL, {responseType: 'stream'});
// Create params for putObject call
const objectParams = {
Bucket: "<YOUR_BUCKET>",
Key: "<YOUR_KEY>",
ContentLength: headers['content-length'],
Body: data
};
// Create object upload promise
await s3.putObject(objectParams).promise();
} catch(err){
console.log("ERROR --->" + err)
}
}
In Angular, we can use FileSaver library to save the pdf file from library.
Find the below sample code to do this way.
enter image description here

JSON files does not contain all the results in AWS Lambda using NodeJS

I'm currently working on a project using AWS S3, Rekognition and Lambda. I'm writing in NodeJS and created a working solution to what I want to achieve. The workflow in short is: an image of a face is loaded onto a S3 bucket, then the 'searchFacesByImage' API is called to see if that face has been indexed to the Master collection in the past. If it is a new face, the result will be false, and the 'indexFaces' API is called to index that face to the Master collection. Once that is done, I write the output to 3 separate JSON files that is in the same S3 bucket, called: 'metadata.json', 'indexing.json', 'rekognition.json'.
The 'metadata.json' file only contains the ExternalImageID (that I create myself), the date and time of indexing, the filename that was indexed, and a count that counts how many times that face has been indexed in the past.
The 'indexing.json' file contains the same ExternalImageID, the same data and time of indexing, and the response from the 'searchFacesByImage' API.
The 'rekognition.json' file contains the same ExternalImageID and date and time, as well as the response from the 'indexFaces' API.
The problem comes in that when I load on image at a time, the 3 JSON files will start to populate accordingly, but as soon as I load more than a few (I've tested it with 7) images at the same time, all 7 images will run through the workflow and the response data is written out to each file according to the Cloudwatch logs, but when I actually go to view the JSON files, not all the response data is there for all 7 images. Sometimes the data of 5 images are in the JSON, other times its 4 images. The data doesn't have to be in any specific order, it must just be there. I've also tested it where I uploaded 18 images at once and only the response of 10 images was in the JSON.
I believe the problem comes in that I'm calling the 'getObject' API on the JSON files, then I append the response data to those files, and then I'm calling the 'putObject' API on those JSON files to put them back into the S3 bucket, but while the first image is going through this process, the next image wants to do the same, but there is no file to use the 'getObject' on, because it is busy with the previous image, so then it just skips over the image, although the Cloudwatch logs said I has been added to the files.
I have no idea how to work around this. I believe the answer lies in Asynchronous JavaScript (which I don't know that much of so I have no idea where to begin)
My apologies for the long post. Here is my code below:
const AWS = require('aws-sdk');
const s3 = new AWS.S3({apiVersion: "2006-03-01"});
const rekognition = new AWS.Rekognition();
//const docClient = new AWS.DynamoDB.DocumentClient();
const uuidv4 = require('uuid/v4');
let bucket, key;
let dataSaveDate = new Date();
console.log('Loading function');
//-----------------------------------Exports Function---------------------------
exports.handler = function(event, context) {
bucket = event.Records[0].s3.bucket.name;
key = event.Records[0].s3.object.key;
console.log(bucket);
console.log(key);
searchingFacesByImage(bucket, key);
};
//---------------------------------------------------------------------------
// Search for a face in an input image
function searchingFacesByImage(bucket, key) {
let params = {
CollectionId: "allFaces",
FaceMatchThreshold: 95,
Image: {
S3Object: {
Bucket: bucket,
Name: key
}
},
MaxFaces: 5
};
const searchingFace = rekognition.searchFacesByImage(params, function(err, searchdata) {
if (err) {
console.log(err, err.stack); // an error occurred
} else {
// console.log(JSON.stringify(searchdata, null, '\t'));
// if data.FaceMatches > 0 : There that face in the image exists in the collection
if (searchdata.FaceMatches.length > 0) {
console.log("Face is a match");
} else {
console.log("Face is not a match");
let mapping_id = createRandomId();
console.log(`Created mapping_id: ${mapping_id}`);
console.log("Start indexing face to 'allFaces'");
indexToAllFaces(mapping_id, searchdata, bucket, key);
}
}
});
return searchingFace;
}
//---------------------------------------------------------------------------
// If face is not a match in 'allFaces', index face to 'allFaces' using mapping_id
function indexToAllFaces(mapping_id, searchData, bucket, key) {
let params = {
CollectionId: "allFaces",
DetectionAttributes: ['ALL'],
ExternalImageId: mapping_id,
Image: {
S3Object: {
Bucket: bucket,
Name: key
}
}
};
const indexFace = rekognition.indexFaces(params, function(err, data) {
if (err) {
console.log(err, err.stack); // an error occurred
} else {
console.log("INDEXING TO 'allFaces'");
//console.log(JSON.stringify(data, null, '\t'));
logAllData(mapping_id, bucket, key, searchData, data);
}
});
return indexFace;
}
//---------------------------------------------------------------------------
// Counting how many times a face has been indexed and logging ALL data in a single log
function logAllData(mapping_id, bucket, key, searchData, data) {
let params = {
CollectionId: mapping_id,
MaxResults: 20
};
const faceDetails = rekognition.listFaces(params, function(err, facedata) {
if (err) {
console.log(err, err.stack); // an error occurred
} else {
//console.log(JSON.stringify(facedata, null, '\t'));
metadata(mapping_id, bucket, key, facedata);
indexing(mapping_id, bucket, searchData);
rekognitionData(mapping_id, bucket, data);
}
});
return faceDetails;
}
//-----------------------------------------------------------------------------
function metadata(mapping_id, bucket, key, faceData) {
let body = [
{
"mapping_id": mapping_id,
"time": dataSaveDate,
"image_name": key,
"indexing_count": faceData.Faces.length - 1
}
];
//console.log(JSON.stringify(body, null, '\t'));
logData("metadata.json", bucket, body);
}
//------------------------------------------------------------------------------
function indexing(mapping_id, bucket, searchData) {
let body = [
{
"mapping_id": mapping_id,
"time": dataSaveDate,
"IndexingData": searchData
}
];
logData("indexing.json", bucket, body);
}
//------------------------------------------------------------------------------
function rekognitionData(mapping_id, bucket, data) {
let body = [
{
"mapping_id": mapping_id,
"time": dataSaveDate,
"rekognition": data
}
];
logData("rekognition.json", bucket, body);
}
//------------------------------------------------------------------------------
// Function to log all data to JSON files
function logData(jsonFileName, bucket, body) {
let params = {
Bucket: bucket,
Key: jsonFileName
};
const readFile = s3.getObject(params, function(err, filedata) {
if (err) {
console.log(err, err.stack); // an error occurred
} else {
console.log(`READING ${jsonFileName} CONTENTS`);
// Read data from 'jsonFileName'
let raw_content = filedata.Body.toString();
let content = JSON.parse(raw_content);
// Add new data to 'jsonFileName'
content.push(...body);
// Put new data back into jsonFileName
s3.putObject(
{
Bucket: bucket,
Key: jsonFileName,
Body: JSON.stringify(content, null, '\t'),
ContentType: "application/json"
},
function(err, res) {
if (err) {
console.log(err);
} else {
console.log(`DATA SAVED TO ${jsonFileName}`);
}
}
);
}
});
return readFile;
}
//----------------------------------SCRIPT ENDS---------------------------------
When a Node.js Lambda reaches the end of the main thread, it ends all other threads.
To make sure that the lambda does not prematurely terminate those threads, wait until that Promise is complete by using await.
The functions s3.getObject and s3.putObject can be made into a Promise like this:
await s3.getObject(params).promise()
await s3.putObject(params).promise()

Copying AWS S3 Bucket root contents to same bucket within subfolder

I want to be able to copy files within the same bucket from the root directory to a subfolder within a subfolder however excluding that subfolder by using the aws-sdk.
i.e:
I want to use this AWS-CLI command in a gulp file task:
aws s3 cp s3://bucketName s3://bucketName/last_good/YYYYMMDD --recursive --exclude "last_good/*"
I've used the copy examples used from How to copy/move all objects in Amazon S3 from one prefix to other using the AWS SDK for Node.js
I am just not sure how to specify the folder to exclude. In my above example it would be the last_good folder.
var gulp = require('gulp');
var AWS = require('aws-sdk');
var async = require('async');
var bucketName = 'bucketname';
var oldPrefix = '';
var newPrefix = 'last_good/20190817/';
var s3 = new AWS.S3({params: {Bucket: bucketName}, region: 'us-west-2'});
gulp.task('publish', function() {
CopyToLastGood();
}
function CopyToLastGood() {
var done = function(err, data) {
if (err) console.log(err);
else console.log(data);
};
s3.listObjects({Prefix: oldPrefix}, function(err, data) {
if (data.Contents.length) {
async.each(data.Contents, function(file, cb) {
var params = {
CopySource: bucketName + '/' + file.Key,
Key: file.Key.replace(oldPrefix, newPrefix)
};
s3.copyObject(params, function(copyErr, copyData){
if (copyErr) { // an error occured
console.log(err);
}
else {
console.log('Copied: ', params.Key); //successful response
cb();
}
});
}, done);
}
});
}
I expect contents of root to update last_good/20190817/ however not copying the last_good folder itself.
I've solved my solution using a delimiter option on the s3.listObjects params.
i.e:
s3.listObjects({Prefix: oldPrefix, Delimiter:'/'}
this only lists files within the root.

How can I delete folder on s3 with node.js?

Yes, I know. There is no folder concept on s3 storage. but I really want to delete a specific folder from s3 with node.js. I tried two solutions, but both didn't work.
My code is below:
Solution 1:
Deleting folder directly.
var key='level/folder1/folder2/';
var strReturn;
var params = {Bucket: MyBucket};
var s3 = new AWS.S3(params);
s3.client.listObjects({
Bucket: MyBucket,
Key: key
}, function (err, data) {
if(err){
strReturn="{\"status\":\"1\"}";
}else{
strReturn=+"{\"status\":\"0\"}";
}
res.send(returnJson);
console.log('error:'+err+' data:'+JSON.stringify(data));
});
Actually, I have a lot of files under folder2. I can delete single file from folder2 if I define key like this:
var key='level/folder1/folder2/file1.txt', but it didn't work when I deleted a folder(key='level/folder1/folder2/').
Solution 2:
I tried to set expiration to an object when I uploaded this file or folder to s3. code is below:
s3.client.putObject({
Bucket: Camera_Bucket,
Key: key,
ACL:'public-read',
Expires: 60
}
But it didn't either. After finishing uploading, I checked the properties of that file. it showed there was nothing value for expiry date:
Expiry Date:none
Expiration Rule:N/A
How can I delete folder on s3 with node.js?
Here is an implementation in ES7 with an async function and using listObjectsV2 (the revised List Objects API):
async function emptyS3Directory(bucket, dir) {
const listParams = {
Bucket: bucket,
Prefix: dir
};
const listedObjects = await s3.listObjectsV2(listParams).promise();
if (listedObjects.Contents.length === 0) return;
const deleteParams = {
Bucket: bucket,
Delete: { Objects: [] }
};
listedObjects.Contents.forEach(({ Key }) => {
deleteParams.Delete.Objects.push({ Key });
});
await s3.deleteObjects(deleteParams).promise();
if (listedObjects.IsTruncated) await emptyS3Directory(bucket, dir);
}
To call it:
await emptyS3Directory(process.env.S3_BUCKET, 'images/')
You can use aws-sdk module for deleting folder. Because you can only delete a folder when it is empty, you should first delete the files in it. I'm doing it like this :
function emptyBucket(bucketName,callback){
var params = {
Bucket: bucketName,
Prefix: 'folder/'
};
s3.listObjects(params, function(err, data) {
if (err) return callback(err);
if (data.Contents.length == 0) callback();
params = {Bucket: bucketName};
params.Delete = {Objects:[]};
data.Contents.forEach(function(content) {
params.Delete.Objects.push({Key: content.Key});
});
s3.deleteObjects(params, function(err, data) {
if (err) return callback(err);
if (data.IsTruncated) {
emptyBucket(bucketName, callback);
} else {
callback();
}
});
});
}
A much simpler way is to fetch all objects (keys) at that path & delete them. In each call fetch 1000 keys & s3 deleteObjects can delete 1000 keys in each request too. Do that recursively to achieve the goal
Written in typescript
/**
* delete a folder recursively
* #param bucket
* #param path - without end /
*/
deleteFolder(bucket: string, path: string) {
return new Promise((resolve, reject) => {
// get all keys and delete objects
const getAndDelete = (ct: string = null) => {
this.s3
.listObjectsV2({
Bucket: bucket,
MaxKeys: 1000,
ContinuationToken: ct,
Prefix: path + "/",
Delimiter: "",
})
.promise()
.then(async (data) => {
// params for delete operation
let params = {
Bucket: bucket,
Delete: { Objects: [] },
};
// add keys to Delete Object
data.Contents.forEach((content) => {
params.Delete.Objects.push({ Key: content.Key });
});
// delete all keys
await this.s3.deleteObjects(params).promise();
// check if ct is present
if (data.NextContinuationToken) getAndDelete(data.NextContinuationToken);
else resolve(true);
})
.catch((err) => reject(err));
};
// init call
getAndDelete();
});
}
According doc at https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListObjects.html:
A response can contain CommonPrefixes only if you specify a delimiter.
CommonPrefixes contains all (if there are any) keys between Prefix and the next occurrence of the string specified by the delimiter.
Omitting Delimiter parameter will make ListObject return all keys starting by the Prefix parameter.
According to accepted answer I created promise returned function, so you can chain it.
function emptyBucket(bucketName){
let currentData;
let params = {
Bucket: bucketName,
Prefix: 'folder/'
};
return S3.listObjects(params).promise().then(data => {
if (data.Contents.length === 0) {
throw new Error('List of objects empty.');
}
currentData = data;
params = {Bucket: bucketName};
params.Delete = {Objects:[]};
currentData.Contents.forEach(content => {
params.Delete.Objects.push({Key: content.Key});
});
return S3.deleteObjects(params).promise();
}).then(() => {
if (currentData.Contents.length === 1000) {
emptyBucket(bucketName, callback);
} else {
return true;
}
});
}
The accepted answer throws an error when used in typescript. I made it work by modifying the code in the following way. I'm very new to Typescript but at least it is working now.
async function emptyS3Directory(prefix: string) {
const listParams = {
Bucket: "bucketName",
Prefix: prefix, // ex. path/to/folder
};
const listedObjects = await s3.listObjectsV2(listParams).promise();
if (listedObjects.Contents.length === 0) return;
const deleteParams = {
Bucket: bucketName,
Delete: { Objects: [] as any },
};
listedObjects.Contents.forEach((content: any) => {
deleteParams.Delete.Objects.push({ Key: content.Key });
});
await s3.deleteObjects(deleteParams).promise();
if (listedObjects.IsTruncated) await emptyS3Directory(prefix);
}
Better solution with #aws-sdk/client-s3 module:
private async _deleteFolder(key: string, bucketName: string): Promise<void> {
const DeletePromises: Promise<DeleteObjectCommandOutput>[] = [];
const { Contents } = await this.client.send(
new ListObjectsCommand({
Bucket: bucketName,
Prefix: key,
}),
);
if (!Contents) return;
Contents.forEach(({ Key }) => {
DeletePromises.push(
this.client.send(
new DeleteObjectCommand({
Bucket: bucketName,
Key,
}),
),
);
});
await Promise.all(DeletePromises);
}
ListObjectsCommand returns the keys of files in the folder, even with subfolders
listObjectsV2 list files only with current dir Prefix not with subfolder Prefix. If you want to delete folder with subfolders recursively this is the source code: https://github.com/tagspaces/tagspaces-common/blob/develop/packages/common-aws/io-objectstore.js#L1060
deleteDirectoryPromise = async (path: string): Promise<Object> => {
const prefixes = await this.getDirectoryPrefixes(path);
if (prefixes.length > 0) {
const deleteParams = {
Bucket: this.config.bucketName,
Delete: { Objects: prefixes }
};
return this.objectStore.deleteObjects(deleteParams).promise();
}
return this.objectStore
.deleteObject({
Bucket: this.config.bucketName,
Key: path
})
.promise();
};
/**
* get recursively all aws directory prefixes
* #param path
*/
getDirectoryPrefixes = async (path: string): Promise<any[]> => {
const prefixes = [];
const promises = [];
const listParams = {
Bucket: this.config.bucketName,
Prefix: path,
Delimiter: '/'
};
const listedObjects = await this.objectStore
.listObjectsV2(listParams)
.promise();
if (
listedObjects.Contents.length > 0 ||
listedObjects.CommonPrefixes.length > 0
) {
listedObjects.Contents.forEach(({ Key }) => {
prefixes.push({ Key });
});
listedObjects.CommonPrefixes.forEach(({ Prefix }) => {
prefixes.push({ Key: Prefix });
promises.push(this.getDirectoryPrefixes(Prefix));
});
// if (listedObjects.IsTruncated) await this.deleteDirectoryPromise(path);
}
const subPrefixes = await Promise.all(promises);
subPrefixes.map(arrPrefixes => {
arrPrefixes.map(prefix => {
prefixes.push(prefix);
});
});
return prefixes;
};
You can try this:
import { s3DeleteDir } from '#zvs001/s3-utils'
import { S3 } from 'aws-sdk'
const s3Client = new S3()
await s3DeleteDir(s3Client, {
Bucket: 'my-bucket',
Prefix: `folder/`,
})
I like the list objects and then delete approach, which is what the aws cmd line does behind the scenes btw. But I didn't want to await the list (few seconds) before deleting them. So I use this 1 step (background) process, I found it slightly faster. You can await the child process if you really want to confirm deletion, but I found that took around 10 seconds, so I don't bother I just fire and forget and check logs instead. The entire API call with other stuff now takes 1.5s which is fine for my situation.
var CHILD = require("child_process").exec;
function removeImagesAndTheFolder(folder_name_str, callback){
var cmd_str = "aws s3 rm s3://"
+ IMAGE_BUCKET_STR
+ "/" + folder_name_str
+ "/ --recursive";
if(process.env.NODE_ENV === "development"){
//When not on an EC2 with a role I use my profile
cmd_str += " " + "--profile " + LOCAL_CONFIG.PROFILE_STR;
}
// In my situation I return early for the user. You could make them wait tho'.
callback(null, {"msg_str": "Check later that these images were actually removed."});
//do not return yet still stuff to do
CHILD(cmd_str, function(error, stdout, stderr){
if(error || stderr){
console.log("Problem removing this folder with a child process:" + stderr);
}else{
console.log("Child process completed, here are the results", stdout);
}
});
}
I suggest you to do it in 2 steps, so you can "follow" whats happen (with a progressBar etc...):
Get all keys to remove
Remove keys
Of course , the #1 is a recursive function, such as:
https://gist.github.com/ebuildy/7ac807fd017452dfaf3b9c9b10ff3b52#file-my-s3-client-ts
import { ListObjectsV2Command, S3Client, S3ClientConfig } from "#aws-sdk/client-s3"
/**
* Get all keys recurively
* #param Prefix
* #returns
*/
public async listObjectsRecursive(Prefix: string, ContinuationToken?: string): Promise<
any[]
> {
// Get objects for current prefix
const listObjects = await this.client.send(
new ListObjectsV2Command({
Delimiter: "/",
Bucket: this.bucket.name,
Prefix,
ContinuationToken
})
);
let deepFiles, nextFiles
// Recurive call to get sub prefixes
if (listObjects.CommonPrefixes) {
const deepFilesPromises = listObjects.CommonPrefixes.flatMap(({Prefix}) => {
return this.listObjectsRecursive(Prefix)
})
deepFiles = (await Promise.all(deepFilesPromises)).flatMap(t => t)
}
// If we must paginate
if (listObjects.IsTruncated) {
nextFiles = await this.listObjectsRecursive(Prefix, listObjects.NextContinuationToken)
}
return [
...(listObjects.Contents || []),
...(deepFiles || []),
...(nextFiles || [])
]
}
Then, delete all objects:
public async deleteKeys(keys: string[]): Promise<any[]> {
function spliceIntoChunks(arr: any[], chunkSize: number) {
const res = [];
while (arr.length > 0) {
const chunk = arr.splice(0, chunkSize);
res.push(chunk);
}
return res;
}
const allKeysToRemovePromises = keys.map(k => this.listObjectsRecursive(k))
const allKeysToRemove = (await Promise.all(allKeysToRemovePromises)).flatMap(k => k)
const allKeysToRemoveGroups = spliceIntoChunks(allKeysToRemove, 3)
const deletePromises = allKeysToRemoveGroups.map(group => {
return this.client.send(
new DeleteObjectsCommand({
Bucket: this.bucket.name,
Delete: {
Objects: group.map(({Key}) => {
return {
Key
}
})
}
})
)
})
const results = await Promise.all(deletePromises)
return results.flatMap(({$metadata, Deleted}) => {
return Deleted.map(({Key}) => {
return {
status: $metadata.httpStatusCode,
key: Key
}
})
})
}
According to Emi's answer I made a npm package so you don'
t need to write the code yourself. Also the code is written in typescript.
See https://github.com/bingtimren/s3-commons/blob/master/src/lib/deleteRecursive.ts
You can delete an empty folder the same way you delete a file. In order to delete a non-empty folder on AWS S3, you'll need to empty it first by deleting all files and folders inside. Once the folder is empty, you can delete it as a regular file. The same applies to the bucket deletion. We've implemented it in this app called Commandeer so you can do it from a GUI.

Resources