Download entire S3 bucket recursively, node wrapper - node.js

I am trying to sync my local folder with files from s3 bucket(full bucket dir structure). I tried using node-s3-client npm package but the connection to the package is failing. The process just exits without any output.
here are the code snippets:
const s3Client = require('s3');
let client = s3Client.createClient({
s3Options: {
accessKeyId: config.accessKeyId,
secretAccessKey: config.secretAccessKey,
region: config.region,
},
});
let params = {
localDir: localdirName,
deleteRemoved: true,
s3Params: {
Bucket: Bname,
},
};
let uploader = client.downloadDir(params);
uploader.on('error', (err) => {
throw err;
});
uploader.on('progress', () =>
console.log('progress: ', uploader.progressAmount, uploader.progressTotal),
);
uploader.on('end', () => console.log('Upload completed!'));
If this can't be resolved, please help me through a workaround, Thanks!

Related

Missing region in AWS rekognition in node js

//Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
//PDX-License-Identifier: MIT-0 (For details, see https://github.com/awsdocs/amazon-rekognition-developer-guide/blob/master/LICENSE-SAMPLECODE.)
const AWS = require('aws-sdk')
const bucket = 'bucket' // the bucketname without s3://
const photo_source = 'source.jpg'
const photo_target = 'target.jpg'
const config = new AWS.Config({
accessKeyId: process.env.AWS_ACCESS_KEY_ID,
secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY,
region: process.env.AWS_REGION
})
const client = new AWS.Rekognition();
const params = {
SourceImage: {
S3Object: {
Bucket: bucket,
Name: photo_source
},
},
TargetImage: {
S3Object: {
Bucket: bucket,
Name: photo_target
},
},
SimilarityThreshold: 70
}
client.compareFaces(params, function(err, response) {
if (err) {
console.log(err, err.stack); // an error occurred
} else {
response.FaceMatches.forEach(data => {
let position = data.Face.BoundingBox
let similarity = data.Similarity
console.log(`The face at: ${position.Left}, ${position.Top} matches with ${similarity} % confidence`)
}) // for response.faceDetails
} // if
});
Above code is from official aws webpage (https://docs.aws.amazon.com/rekognition/latest/dg/faces-comparefaces.html). The code is for implementing comparing faces in image using node js. When comparing images using the above code a error occurs which says missing region on config. Then when i checked the code eventhough config object is created in this code but it is not being used anywhere. Can someone tell me where i must use the config object. If the error is appearing due to any other reason please tell me the reason

How to download large files from aws s3 with restart on network loss

I am trying to implement a "network safe" downloader from aws s3 bucket.
the downloader should be able to download a single .zip file from s3 and write it to a local .zip file.
My current approach is using node with readStream and writeStream ass follows
const download = async () => {
AWS.config.update(
{
accessKeyId: "",
secretAccessKey: "",
region: ""
}
);
const s3 = new AWS.S3();
const params = {
Bucket: '',
Key: ''
};
const { ContentLength: contentLength } = await s3.headObject(params).promise();
const rs = s3.getObject(params).createReadStream()
const ws = fs.createWriteStream(path.join('./', 'file.zip'));
let progress = 0;
rs.on('data', function (chunk) {
progress += chunk.length;
console.log(`Progress: ${progress / contentLength * 100}%`);
});
rs.pipe(ws);
}
what i need is a way to catch/create an event regarding network errors that will allow me to pause and restart the download when network is back on.
or even better - auto restart the download when network is restored.
currently i couldn't find any events regarding network errors and seems that network loss while download is in process does not triggers the 'error' event.
any solutions in node/python will be very appriciated
Short update: found a work around using wget to download a preSignedUrl to the object i want to download. still not the native experience i wanted to have. share your thoughts
export class Downloader {
constructor(dest_dir, file_name_to_save) {
this.dest_dir = dest_dir;
this.file_name = file_name_to_save;
this.progress = "61%"
}
async download_file(on_progress, on_done, socket, io) {
let client = new S3Client({
region: 'eu-west-1',
credentials: {
accessKeyId: "",
secretAccessKey: "",
}
});
const command = new GetObjectCommand({ Bucket: "s", Key: "" });
const url = await getSignedUrl(client, command, { expiresIn: 3600 });
let child = spawn('wget', ['-c', '-O', this.dest_dir + this.file_name, url]);
socket.on('cancel', () => {
child.kill()
console.log("killed child")
})
child.stderr.on('data', (e) => {
let stdout = e.toString();
let p = stdout.match(/([0-9]+?\%)+/g);
if (p && p.length > 0 && p[0] != this.progress) {
on_progress && on_progress(p[0])
console.log(p[0])
this.progress = p[0]
}
});
child.stdout.on('end', function (data) {
on_done && on_done("end");
console.log("end: ", data)
child.kill();
});
child.on('exit', function (code) {
if (code != 0) console.log('Failed: ' + code);
else console.log("success!", code)
on_done && on_done("exit");
});
}
}

Reading a ZIP archive from S3, and writing uncompressed version to new bucket

I have an app where user can upload a ZIP archive of resources. My app handles the upload and saves this to S3. At some point I want to run a transformation that will read this S3 bucket unzip it, and write it to a new S3 bucket. This is all happening on a node service.
I am using the unzipper library to handle unzipping. Here is my initial code.
async function downloadFromS3() {
let s3 = new AWS.S3();
try {
const object = s3
.getObject({
Bucket: "zip-bucket",
Key: "Archive.zip"
})
.createReadStream();
object.on("error", err => {
console.log(err);
});
await streaming_unzipper(object, s3);
} catch (e) {
console.log(e);
}
}
async function streaming_unzipper(s3ObjectStream, s3) {
await s3.createBucket({ Bucket: "unzip-bucket" }).promise();
const unzipStream = s3ObjectStream.pipe(unzipper.Parse());
unzipStream.pipe(
stream.Transform({
objectMode: true,
transform: function(entry, e, next) {
const fileName = entry.path;
const type = entry.type; // 'Directory' or 'File'
const size = entry.vars.uncompressedSize; // There is also compressedSize;
if (type === "File") {
s3.upload(
{ Bucket: "unzip-bucket", Body: entry, Key: entry.path },
{},
function(err, data) {
if (err) console.error(err);
console.log(data);
entry.autodrain();
}
);
next();
} else {
entry.autodrain();
next();
}
}
})
);
This code is works but I feel like it could be optimized. Ideally I would like to pipe the download stream -> unzipper stream -> uploader stream. So that chunks are uploaded to S3 as they get unzipped, instead of waiting for the full fill uzip to finish then uploading.
The problem I am running into is that I need the file name (to set as an S3 key), which I only have after unzipping. Before I can start to upload.
Is there any good way to create a streaming upload to S3. Initiated with a temporaryId, that gets rewritten with the final final name after the full stream is finished.

Serverless lambda trigger read json file

I have lambda (Node) which has trigger to fire when a new JSON file added to our S3 bucket. Here is my lambda code
module.exports.bookInfo = (event, context) => {
console.log('Events ', JSON.stringify(event));
event.Records.forEach((record) => {
const filename = record.s3.object.key;
const bucketname = record.s3.bucket.name;
let logMsg = [];
const s3File = `BucketName: [${bucketname}] FileName: [${filename}]`;
console.log(s3File)
logMsg.push(`Lambda execution started for ${s3File}, Trying to download file from S3`);
try {
s3.getObject({
Bucket: bucketname,
Key: filename
}, function(err, data) {
logMsg.push('Data is ', JSON.stringify(data.Body))
if (err) {
logMsg.push('Generate Error :', err);
console.log(logMsg)
return null;
}
logMsg.push(`File downloaded successfully. Processing started for ${s3File}`);
logMsg.push('Data is ', JSON.stringify(data.Body))
});
} catch (e) {console.log(e)}
});
}
When i run this, i don't get file content and i suspect that lambda finishes execution before file read operation complete. I tried with async await without success. What i am missing here ? I was able to read small file of 1 kb but when my file grows like 100 MB, it causes issue.
Thanks in advance
I was able to do it through async/await. Here is my code
module.exports.bookInfo = (event, context) => {
event.Records.forEach(async(record) => {
const filename = record.s3.object.key;
const bucketname = record.s3.bucket.name;
const s3File = `BucketName: [${bucketname}] FileName: [${filename}]`;
logMsg.push(`Lambda execution started for ${s3File}, Trying to download file from S3`);
let response = await s3.getObject({
Bucket: bucketname,
Key: filename
}).promise();
})
}

nodejs aws s3 replace files

Im trying to upload a folder from a local directory to an AWS S3 Bucket.
I have the following code.
var s3 = require('s3');
var awsS3Client = new AWS.S3({
accessKeyId: 'XXXXXXX',
secretAccessKey: 'XXXXXXX'
});
var options = {
s3Client: awsS3Client
};
var client = s3.createClient(options);
var params = {
localDir: "./zips",
deleteRemoved: true, // default false, whether to remove s3 objects
// that have no corresponding local file.
s3Params: {
Bucket: "node-files",
Prefix: "test/unzip/"
},
};
var uploader = client.uploadDir(params);
uploader.on('error', function (err) {
console.error("unable to sync:", err.stack);
});
uploader.on('progress', function () {
console.log("progress", uploader.progressAmount, uploader.progressTotal);
});
uploader.on('end', function () {
console.log("done uploading");
});
All works fine when uploading for the first time, the directory and all of its files are in tact and in the bucket.
However when i try a second time, the buffer just gets stuck and times out.
Im assuming i either need to set some kind of option to overwrite the existing files?

Resources