Copying AWS S3 Bucket root contents to same bucket within subfolder - node.js

I want to be able to copy files within the same bucket from the root directory to a subfolder within a subfolder however excluding that subfolder by using the aws-sdk.
i.e:
I want to use this AWS-CLI command in a gulp file task:
aws s3 cp s3://bucketName s3://bucketName/last_good/YYYYMMDD --recursive --exclude "last_good/*"
I've used the copy examples used from How to copy/move all objects in Amazon S3 from one prefix to other using the AWS SDK for Node.js
I am just not sure how to specify the folder to exclude. In my above example it would be the last_good folder.
var gulp = require('gulp');
var AWS = require('aws-sdk');
var async = require('async');
var bucketName = 'bucketname';
var oldPrefix = '';
var newPrefix = 'last_good/20190817/';
var s3 = new AWS.S3({params: {Bucket: bucketName}, region: 'us-west-2'});
gulp.task('publish', function() {
CopyToLastGood();
}
function CopyToLastGood() {
var done = function(err, data) {
if (err) console.log(err);
else console.log(data);
};
s3.listObjects({Prefix: oldPrefix}, function(err, data) {
if (data.Contents.length) {
async.each(data.Contents, function(file, cb) {
var params = {
CopySource: bucketName + '/' + file.Key,
Key: file.Key.replace(oldPrefix, newPrefix)
};
s3.copyObject(params, function(copyErr, copyData){
if (copyErr) { // an error occured
console.log(err);
}
else {
console.log('Copied: ', params.Key); //successful response
cb();
}
});
}, done);
}
});
}
I expect contents of root to update last_good/20190817/ however not copying the last_good folder itself.

I've solved my solution using a delimiter option on the s3.listObjects params.
i.e:
s3.listObjects({Prefix: oldPrefix, Delimiter:'/'}
this only lists files within the root.

Related

How to upload videos to aws using dynamic path in node js

I am having a list of 2 videos in a folder which I upload to AWS from my local directory when I call uploadDir function.
The videos are named dynamically to the path of their already existing file, because am re-uploading to replace the same file in the existing directory on aws
Example.
Video 1 name is: https://myBucketName.s3.amazonaws.com/Movies/video/comedy/93dcf22e84918d0efcb90ba2/Tom-41560712aa0f84f49ed130f2/Home Alone.mp4
Video 2 name is:
https://myBucketName.s3.amazonaws.com/Movies/video/action/26wjf31g64918d0efcb90bw6/Ben-71560712aa0f84f49ed130f4/Mission Impossible.mp4
But because I can't save a filename with special characters I then replace the video name with special characters in my folder directory using the function below
var str1 = videoName.replace(':','X');
var str2 = str1.replace(/[&\/\\#,<>{}]/g,'Q');
So I replaced all the ':' with 'X', and replaced all the '/' with 'Q'
So now the video name in my directory I want to upload back to AWS is looking like this
Replaced Video 1 name is: httpsXQQmyBucketName.s3.amazonaws.comQMoviesQvideoQcomedyQ93dcf22e84918d0efcb90ba2QTom-41560712aa0f84f49ed130f2QHome Alone.mp4
Replaced Video 2 name is: httpsXQQmyBucketName.s3.amazonaws.comQMoviesQvideoQactionQ26wjf31g64918d0efcb90bw6QBen-71560712aa0f84f49ed130f4QMission Impossible.mp4
So now I want to upload the videos back to their AWS path, which is originally the video name.
So for me to achieve this I believe I must first replace the original special characters, So all the 'X' will be ':', and all the 'Q' will be '/'
And the next step is for me to upload to the video name path.
How can I achieve this.
I already have a function uploading the videos to Aws bucket, but not to the directly I want the videos to be.
This is the code uploading the videos to Aws
var AWS = require('aws-sdk');
var path = require("path");
var fs = require('fs');
const uploadDir = function(s3Path, bucketName) {
let s3 = new AWS.S3();
function walkSync(currentDirPath, callback) {
fs.readdirSync(currentDirPath).forEach(function (name) {
var filePath = path.join(currentDirPath, name);
var stat = fs.statSync(filePath);
if (stat.isFile()) {
callback(filePath, stat);
} else if (stat.isDirectory()) {
walkSync(filePath, callback);
}
});
}
walkSync(s3Path, function(filePath, stat) {
let bucketPath = filePath.substring(s3Path.length+1);
let params = {Bucket: bucketName, Key: bucketPath, Body: fs.readFileSync(filePath) };
s3.putObject(params, function(err, data) {
if (err) {
console.log(err)
} else {
console.log('Successfully uploaded '+ bucketPath +' to ' + bucketName);
}
});
});
};
uploadDir(path.resolve('path-containing-videos'), 'bucket-name');

Download pdf files from external url's - Heroku, NodeJS, Angular 7

I am trying to download multiple pdf files from external sources to my nodejs server (in Heroku) temporarily and upload it to AWS S3 bucket.
I have tried multiple methods all of which works fine in my local machine but not in Heroku Dyno NodeJS Server. I am unable to even create folder in Heroku. I guess due to limited permission.
In Node
1) using var download = require('download-file') (using this currently in below code)
2) axios
3) res.download()
Download Files Code
const downloadFiles = async (unique_files) => {
for (let index = 0; index < unique_files.length; index++) {
let file_ext = unique_files[index].substr(unique_files[index].length - 4);
if(file_ext == ".pdf") {
await downloadzz(unique_files[index])
}
}
}
function downloadzz(link) {
download(link, function(err){
if (err) throw err
console.log("DOWNLOAD Complete");
});
}
Upload Files Code
const uploadFiles = async (unique_files) => {
for (let index = 0; index < unique_files.length; index++) {
let file_ext = unique_files[index].substr(unique_files[index].length - 4);
if(file_ext == ".pdf") {
await uploadzz(unique_files[index])
}
}
}
function uploadzz(link) {
fs.readFile(require('path').resolve(__dirname+'/../external-pdfs/', link.slice(link.lastIndexOf('/') + 1)), function (err, data) {
params = {Bucket: pdfBucket, Key: link.slice(link.lastIndexOf('/') + 1), Body: data, ACL: "public-read" };
s3.putObject(params, function(err, data) {
if (err) {
console.log("Failed Upload", err);
} else {
console.log("Successfully uploaded data to bucket", data);
}
});
});
}
I don't get any error but no folder seem to exist with a name external-pdfs on heroku server.
I am open for better solutions: for example, directly uploading file from external url to s3...
How can I in read file from a external url and directly upload to AWS S3 bucket?
You can use axios. Setting the responseType as stream, you can get the file data and pass it as the body. Here it's an example code to get the pdf from a URL and uploading its info directly to S3:
const AWS = require('aws-sdk');
const axios = require('axios');
AWS.config.loadFromPath('./config.json');
const s3 = new AWS.S3({apiVersion: '2006-03-01'});
const URL = "<YOUR_URL>";
const uploadPdfToS3 = async () => {
try{
const {data, headers} = await axios.get(URL, {responseType: 'stream'});
// Create params for putObject call
const objectParams = {
Bucket: "<YOUR_BUCKET>",
Key: "<YOUR_KEY>",
ContentLength: headers['content-length'],
Body: data
};
// Create object upload promise
await s3.putObject(objectParams).promise();
} catch(err){
console.log("ERROR --->" + err)
}
}
In Angular, we can use FileSaver library to save the pdf file from library.
Find the below sample code to do this way.
enter image description here

Node.js execution order

I am quite new to Node.js and I am trying to load json configs files stored either in Amazon s3 or local repository. Below is my code so far:
var cfg = process.env.CONFIG_FILE_NAME;
log.info("Loading config '%s'", cfg);
if(cfg.indexOf("s3") !== -1 || cfg.indexOf("S3") !== -1) {
log.info("S3 path detected");
var s3 = new aws.S3();
var myRegex = /\/\/(\w*)\/(.*)/g;
var matched = myRegex.exec(cfg);
var bucket = matched[1];
log.info("Extracted bucket: ", bucket);
var key = matched[2];
log.info("Extracted key: ", key);
var params = {
Bucket: bucket,
Key: key
};
s3.getObject(params, function(err, data) {
if (err) log.warn(err, err.stack);
else {
log.info("Loaded config from S3");
cfg = JSON.parse(data.Body);
log.info("Config content: "cfg);
}
});
}
else {
try {
//some code here
} catch (e) {
//some code here
}
}
subscriptions = cfg.subscriptions;
log.info("This supposes to contain json content from S3: ", cfg);
The idea is that the code will check if there is a path to S3 in the message sent to Amazon Lambda (CONFIG_FILE_NAME field). If it exists, then the code load the config file from s3, otherwise, it loads locally. However, when I try to run the code, it returns something like this:
4 Jan 11:37:34 - [INFO] Loading config 'Path-to-S3'
4 Jan 11:37:34 - [INFO] S3 path detected
4 Jan 11:37:34 - [INFO] Extracted bucket: mybucket
4 Jan 11:37:34 - [INFO] Extracted key: mykey.cfg.json
4 Jan 11:37:34 - [INFO] "This suppose to contain json content from S3: Path-to-S3'
4 Jan 11:37:34 - [INFO] Loaded config from S3
4 Jan 11:37:34 - [INFO] Config content: my-config-content
So the problem is that, the code executes the line subscriptions = cfg.subscriptions; before the config file is loaded from S3. The variable cfg at this line only contains the path to the config, not the config content I want to load from S3. My later code implementation depends on this subscriptions field from cfg file so it stucks right here.
You can use async module to make your code work. (npm install --save async)
Async is a utility module which provides straight-forward, powerful functions for working with asynchronous JavaScript.
var async = require("async");
//or you can just use var waterfall = require("async/waterfall");
var cfg = process.env.CONFIG_FILE_NAME;
log.info("Loading config '%s'", cfg);
async.waterfall([
function (callback) {
if (cfg.indexOf("s3") !== -1 || cfg.indexOf("S3") !== -1) {
log.info("S3 path detected");
var s3 = new aws.S3();
var myRegex = /\/\/(\w*)\/(.*)/g;
var matched = myRegex.exec(cfg);
var bucket = matched[1];
log.info("Extracted bucket: ", bucket);
var key = matched[2];
log.info("Extracted key: ", key);
var params = {
Bucket: bucket,
Key : key
};
s3.getObject(params, function (err, data) {
if (err) {
log.warn(err, err.stack);
callback();
}
else {
log.info("Loaded config from S3");
cfg = JSON.parse(data.Body);
log.info("Config content: ", cfg);
callback(null, cfg);
}
});
}
else {
callback();
}
},
function (cfg, callback) {
try {
//some code here
}
catch (e) {
//some code here
}
var subscriptions = cfg.subscriptions;
log.info("This supposes to contain json content from S3: ", cfg);
callback(null, 'done');
}
], function (err, result) {
// result now equals 'done'
});

Concat MP3/media audio files on amazon S3 server

I want to concatenate the files uploaded on Amazon S3 server.
How can I do this.
Concatenation on local machine i can do using following code.
var fs = require('fs'),
files = fs.readdirSync('./files'),
clips = [],
stream,
currentfile,
dhh = fs.createWriteStream('./concatfile.mp3');
files.forEach(function (file) {
clips.push(file.substring(0, 6));
});
function main() {
if (!clips.length) {
dhh.end("Done");
return;
}
currentfile = './files/' + clips.shift() + '.mp3';
stream = fs.createReadStream(currentfile);
stream.pipe(dhh, {end: false});
stream.on("end", function() {
main();
});
}
main();
You can achieve what you want by breaking it into two steps:
Manipulating files on s3
Since s3 is a remote file storage, you can't run code on s3 server to do the operation locally (as #Andrey mentioned).
what you will need to do in your code is to fetch each input file, process them locally and upload the results back to s3. checkout the code examples from amazon:
var s3 = new AWS.S3();
var params = {Bucket: 'myBucket', Key: 'mp3-input1.mp3'};
var file = require('fs').createWriteStream('/path/to/input.mp3');
s3.getObject(params).createReadStream().pipe(file);
at this stage you'll run your concatenation code, and upload the results back:
var fs = require('fs');
var zlib = require('zlib');
var body = fs.createReadStream('bigfile.mp3').pipe(zlib.createGzip());
var s3obj = new AWS.S3({params: {Bucket: 'myBucket', Key: 'myKey'}});
s3obj.upload({Body: body}).
on('httpUploadProgress', function(evt) { console.log(evt); }).
send(function(err, data) { console.log(err, data) });
Merging two (or more) mp3 files
Since MP3 file include a header that specifies some information like bitrate, simply concatenating them together might introduce playback issues.
See: https://stackoverflow.com/a/5364985/1265980
what you want to use a tool to that. you can have one approach of saving your input mp3 files in tmp folder, and executing an external program like to change the bitrate, contcatenate files and fix the header.
alternatively you can use an library that allows you to use ffmpeg within node.js.
in their code example shown, you can see how their merge two files together within the node api.
ffmpeg('/path/to/part1.avi')
.input('/path/to/part2.avi')
.input('/path/to/part2.avi')
.on('error', function(err) {
console.log('An error occurred: ' + err.message);
})
.on('end', function() {
console.log('Merging finished !');
})
.mergeToFile('/path/to/merged.avi', '/path/to/tempDir');
Here's my quick take on the problem of downloading and processing S3 objects. My example is focused mostly on getting the data local and then processing it once it's all downloaded. I suggest you use one of the ffmpeg approaches mentioned above.
var RSVP = require('rsvp');
var s3 = new AWS.S3();
var bucket = '<your bucket name>';
var getFile = function(key, filePath) {
return new RSVP.Promise(function(resolve, reject) {
var file = require('fs').createWriteStream(filePath);
if(!file) {
reject('unable to open file');
}
s3.getObject({
Bucket: bucket,
Key: key
}).on('httpData', function(chunk) {
file.write(chunk);
}).on('httpDone', function() {
file.end();
resolve(filePath);
});
});
};
var tempFiles = ['<local temp filename 1>', '<local temp filename 2>'];
var keys = ['<s3 object key 1>', '<s3 object key 2>'];
var promises = [];
for(var i = 0; i < keys.length; ++i) {
var promise = getFile(keys[i], tempFiles[i]);
promises.push(promise);
}
RSVP.all(promises).then(function(data) {
//do something with your files
}).catch(function(error) {
//handle errors
});

nodejs aws s3 replace files

Im trying to upload a folder from a local directory to an AWS S3 Bucket.
I have the following code.
var s3 = require('s3');
var awsS3Client = new AWS.S3({
accessKeyId: 'XXXXXXX',
secretAccessKey: 'XXXXXXX'
});
var options = {
s3Client: awsS3Client
};
var client = s3.createClient(options);
var params = {
localDir: "./zips",
deleteRemoved: true, // default false, whether to remove s3 objects
// that have no corresponding local file.
s3Params: {
Bucket: "node-files",
Prefix: "test/unzip/"
},
};
var uploader = client.uploadDir(params);
uploader.on('error', function (err) {
console.error("unable to sync:", err.stack);
});
uploader.on('progress', function () {
console.log("progress", uploader.progressAmount, uploader.progressTotal);
});
uploader.on('end', function () {
console.log("done uploading");
});
All works fine when uploading for the first time, the directory and all of its files are in tact and in the bucket.
However when i try a second time, the buffer just gets stuck and times out.
Im assuming i either need to set some kind of option to overwrite the existing files?

Resources