AWS S3 Loading many files - node.js

I need to upload a lot of files (about 65.000) splitted in subdirectory.
I tried to iterate and load every single file like this:
const fs = require("fs");
const path = require("path");
const async = require("async");
const AWS = require("aws-sdk");
const readdir = require("recursive-readdir");
const slash = require("slash");
const { BUCKET, KEY, SECRET } = process.env;
const rootFolder = path.resolve(__dirname, "./");
const uploadFolder = "./test_files/15";
const s3 = new AWS.S3({
signatureVersion: "v4",
accessKeyId: KEY,
secretAccessKey: SECRET,
});
function getFiles(dirPath) {
return fs.existsSync(dirPath) ? readdir(dirPath) : [];
}
async function deploy(upload) {
if (!BUCKET || !KEY || !SECRET) {
throw new Error("you must provide env. variables: [BUCKET, KEY, SECRET]");
}
const filesToUpload = await getFiles(path.resolve(__dirname, upload));
return new Promise((resolve, reject) => {
async.eachOfLimit(
filesToUpload,
10,
async.asyncify(async (file) => {
const Key = file.replace(rootFolder + path.sep, "");
console.log(`uploading: [${slash(Key)}]`);
var options = { partSize: 5 * 1024 * 1024, queueSize: 4 };
return new Promise((res, rej) => {
s3.upload(
{
Key: slash(Key),
Bucket: BUCKET,
Body: fs.readFileSync(file),
},
(err) => {
if (err) {
return rej(new Error(err));
}
res({ result: true });
}
);
});
}),
(err) => {
if (err) {
return reject(new Error(err));
}
resolve({ result: true });
}
);
});
}
deploy(uploadFolder)
.then(() => {
console.log("task complete");
process.exit(0);
})
.catch((err) => {
console.error(err);
process.exit(1);
});
but after a considerable number of uploads i have this:
Error: Error: NetworkingError: connect ETIMEDOUT IP_S3_AWS
I need to upload this set of files from ec2 instance (because its a result of a image processing). I have this behavior from my pc, i don't know if from ec2 have the same problem.
I have considered the way of zip all and upload but i need to keep the original directory structure.
I accept also new way to resolve the problem.
Sorry for my bad english.

It would probably be much simpler to use the AWS CLI aws s3 sync command instead of building this yourself.

Related

How to read JSON from S3 by AWS Lambda Node.js 18.x runtime?

#TBA gave the solution.
The root cause is not by runtime. It came from SDK v3.
Point: Do not update the code with mixed things (like both of runtime & SDK version together 🥲)
Thanks again, TBA.
I was using Node.js 14.x version runtime Lambda to read some json file from S3.
Brief code is below
const AWS = require("aws-sdk");
const s3 = new AWS.S3();
exports.handler = (event) => {
const { bucketName, objKey } = event
const params = {
Bucket: bucketName,
Key: objKey
};
return new Promise((resolve) => {
s3.getObject(params, async (err, data) =>{
if (err) console.log(err, err.stack);
else {
const contents = JSON.parse(data.Body)
resolve(contents);
}
});
})
};
and it returned the json data as I expected.
And today I tried to create a new lambda with runtime Node.js 18.x but it returned null or some errors...
Q) Could you give me some advice to solve this 🥲 ?
+) I used same json file for each lambda
+) Not sure why, but in my case, data.Body.toString() didn't work (I saw some answers in stackoverflow provide that and tried but no lucks)
Thanks in advance!
Case A (returns null)
import { S3Client, GetObjectCommand } from "#aws-sdk/client-s3";
const s3Client = new S3Client({ region: "ap-northeast-2" });
export const handler = (event) => {
const { objKey, bucketName } = event;
const params={
Bucket: bucketName,
Key: objKey
};
const getObjCommand = new GetObjectCommand(params);
return new Promise((resolve) => {
s3Client.send(getObjCommand, async (err, data) =>{
if (err) console.log(err, err.stack);
else {
const list = JSON.parse(data.Body)
resolve(list);
}
});
})
};
Case B (returns "Unexpected token o in JSON at position 1")
export const handler = async (event) => {
const { objKey, bucketName } = event;
const params={
Bucket: bucketName,
Key: objKey
};
const getObjCommand = new GetObjectCommand(params);
const response = await s3Client.send(getObjCommand)
console.log("JSON.parse(response.Body)", JSON.parse(response.Body))
};
Case C (returns "TypeError: Converting circular structure to JSON")
export const handler = async (event) => {
const { objKey, bucketName } = event;
const params={
Bucket: bucketName,
Key: objKey
};
const getObjCommand = new GetObjectCommand(params);
try {
const response = await s3Client.send(getObjCommand)
return JSON.stringify(response.Body)
} catch(err) {
console.log("error", err)
return err
}
};

Serving zip file from s3 in aws lamba node.js - unable to extract after download

I'm trying to create a lambda function to read a zip file from s3 and to serve it. But after downloading this file in the browser I can't unzip it, getting the error "Unable to extract, it is in an unsupported format". What can be a problem?
const file = await s3.getObject({
Bucket: 'mybucket',
Key: `file.zip`
}).promise();
return {
statusCode: 200,
isBase64Encoded: true,
body: Buffer.from(file.Body).toString('base64'),
headers: {
'Content-Type': 'application/zip',
'Content-Disposition': `attachment; filename="file.zip"`,
},
}
Your file.Body should already be a Buffer, so Buffer.from(file.Body) should be unnecessary but unharmful.
I think your problem is that you're doing toString('base64') there. The documentation says:
If body is a binary blob, you can encode it as a Base64-encoded string by setting isBase64Encoded to true and configuring / as a Binary Media Type.
This makes me believe that it actually means that AWS will automatically convert your (non-base64) body into base64 in the response body. If that's the case, due to you doing .toString('base64'), your body is being base64'd twice. You could un-base64 your resulting file.zip and see what it gives.
The solution for me was to set 'Content-Encoding': 'base64' response header.
you can follow this code below
"use strict";
const AWS = require("aws-sdk");
const awsOptions = {
region: "us-east-1",
httpOptions: {
timeout: 300000 // Matching Lambda function timeout
}
};
const s3 = new AWS.S3(awsOptions);
const archiver = require("archiver");
const stream = require("stream");
const request = require("request");
const streamTo = (bucket, key) => {
var passthrough = new stream.PassThrough();
s3.upload(
{
Bucket: bucket,
Key: key,
Body: passthrough,
ContentType: "application/zip",
ServerSideEncryption: "AES256"
},
(err, data) => {
if (err) throw err;
}
);
return passthrough;
};
// Kudos to this person on GitHub for this getStream solution
// https://github.com/aws/aws-sdk-js/issues/2087#issuecomment-474722151
const getStream = (bucket, key) => {
let streamCreated = false;
const passThroughStream = new stream.PassThrough();
passThroughStream.on("newListener", event => {
if (!streamCreated && event == "data") {
const s3Stream = s3
.getObject({ Bucket: bucket, Key: key })
.createReadStream();
s3Stream
.on("error", err => passThroughStream.emit("error", err))
.pipe(passThroughStream);
streamCreated = true;
}
});
return passThroughStream;
};
exports.handler = async (event, context, callback) => {
var bucket = event["bucket"];
var destinationKey = event["destination_key"];
var files = event["files"];
await new Promise(async (resolve, reject) => {
var zipStream = streamTo(bucket, destinationKey);
zipStream.on("close", resolve);
zipStream.on("end", resolve);
zipStream.on("error", reject);
var archive = archiver("zip");
archive.on("error", err => {
throw new Error(err);
});
archive.pipe(zipStream);
for (const file of files) {
if (file["type"] == "file") {
archive.append(getStream(bucket, file["uri"]), {
name: file["filename"]
});
} else if (file["type"] == "url") {
archive.append(request(file["uri"]), { name: file["filename"] });
}
}
archive.finalize();
}).catch(err => {
throw new Error(err);
});
callback(null, {
statusCode: 200,
body: { final_destination: destinationKey }
});
};
If you're not restricted to using the same URI as the URI that is serving your API, you could also create a pre-signed URL and return it as a redirection result. However, this will redirect to a different domain (S3 domain) so won't work out-of-the-box if you have to serve from the same domain name (e.g., because of firewall restrictions).

AWS Lambda Custom Nodejs Container Shows Runtime Error

I have built a AWS Lambda function with custom container image. I am trying to convert an excel file to pdf with Libreoffice - getting the file from S3 and saving it to a file and converting it to pdf and then uploading it back to S3.
Here the code.
const fs = require('fs');
const getStream = require('get-stream');
const { Readable } = require('stream')
const { S3Client, GetObjectCommand, PutObjectCommand } = require("#aws-sdk/client-s3");
const libre = require('libreoffice-convert');
const path = require('path');
exports.handler = async (event) => {
const bucket = event.queryStringParameters.bucket;
const file = event.queryStringParameters.file;
const convertedFile = event.queryStringParameters.convertedFile;
if (event.queryStringParameters['warmup'] !== undefined) {
return {
result: true,
message: 'warmed up'
}
}
const client = new S3Client({ region: "ap-south-1" });
const command = new GetObjectCommand({ Bucket: bucket, Key: file });
const response = await client.send(command);
const objectData = response.Body;
const writeStream = fs.createWriteStream("/tmp/sample.xlsx");
objectData.pipe(writeStream);
var end = new Promise((resolve, reject) => {
objectData.on('close', resolve(true));
objectData.on('end', resolve(true));
objectData.on('error', reject(false));
});
let completed = await end;
if (completed) {
const extend = '.pdf'
const outputPath = `/tmp/sample${extend}`;
const enterPath = '/tmp/sample.xlsx';
var readingFile = new Promise((resolve, reject) => {
fs.readFile(enterPath, (err, data)=>{
if (err) {
reject(false);
}
resolve(data);
});
});
var fileData = await readingFile;
var converting = new Promise((resolve, reject) => {
libre.convert(fileData, extend, undefined, (err, done) => {
if (err) {
reject(false)
}
fs.writeFileSync(outputPath, done);
resolve(true)
});
})
var converted = await converting;
if (converted) {
var convertedFileStream = fs.createReadStream(outputPath);
const uploadCommand = new PutObjectCommand({ Bucket: bucket, Key: convertedFile, Body: convertedFileStream });
const lastResponse = await client.send(uploadCommand);
const returnResponse = {
result: true,
message: 'success',
bucket: event.queryStringParameters.bucket,
file: event.queryStringParameters.file,
convertedFile: event.queryStringParameters.convertedFile
};
if (event.queryStringParameters['returnEvent'] !== undefined) {
returnResponse['returnEvent'] = event;
}
return returnResponse;
}
}
return completed;
};
However, I am getting this error at time. Sometimes, it is success, but, sometimes it throws this error.
{
"errorType": "Error",
"errorMessage": "false",
"stack": [
"Error: false",
" at _homogeneousError (/function/node_modules/aws-lambda-ric/lib/Runtime/CallbackContext.js:56:16)",
" at postError (/function/node_modules/aws-lambda-ric/lib/Runtime/CallbackContext.js:72:34)",
" at done (/function/node_modules/aws-lambda-ric/lib/Runtime/CallbackContext.js:99:13)",
" at fail (/function/node_modules/aws-lambda-ric/lib/Runtime/CallbackContext.js:113:13)",
" at /function/node_modules/aws-lambda-ric/lib/Runtime/CallbackContext.js:148:24",
" at processTicksAndRejections (internal/process/task_queues.js:97:5)"
]
}
I dont know Nodejs on a great deal so I think if the code is not written the correct way. Any ideas what I am doing wrong here ?
Like #hoangdv when I logged errors I came to know that the file saving to the disk was not correct. So, I changed the area of the code where it saves to like this and then it worked.
const client = new S3Client({ region: "ap-south-1" });
const command = new GetObjectCommand({ Bucket: bucket, Key: file });
const { Body } = await client.send(command);
await new Promise((resolve, reject) => {
Body.pipe(fs.createWriteStream(filePath))
.on('error', err => reject(err))
.on('close', () => resolve())
})
const excelFile = fs.readFileSync(filePath);

Uploading Multiple Files To S3 Node

I am trying to upload multiple files to S3 from my Sails.jsapplication. I've a Controller to handle the Request and a Service to upload should in case multiple controllers wants to utilize the function. The promise in the controller doesn't get resolved. And also, I keep getting this error Unhandled rejection TimeoutError: Missing credentials in config
Controller
multiSizeCreate: (req, res) => {
UploadService.uploadFile(req).then((data, failure) => {
if (failure) return [null];
return [Promise.resolve(AWSService.s3awsupload(data))];<-- <--The promise doesn't get resolved
})
.spread(uploadedData => {
if (!uploadedData) return ResponseService.json(403, res, 'File upload failed');
console.log(uploadedData); <--It logs an unresolved promise
return ResponseService.json(201, res, 'Size(s) created successfully');
})
.catch(err => ValidationService.jsonResolveError(err, Size, res))
}
AWSService
const AWS = require('aws-sdk');
const s3 = new AWS.S3();
const fs = require('fs');
const key = sails.config.settings.aws.key;
const secret = sails.config.settings.aws.secret;
const bucket = sails.config.settings.aws.bucket;
_ = require('lodash');
AWS.config.update({accessKeyId: key, secretAccessKey: secret});
const Promise = require('bluebird');
module.exports = {
s3awsupload: (files) => {
let toUpload = []
if (files.length > 0) {
_.forEach(files, file => {
toUpload.push(awsmainUpload(file));
});
return [toUpload];
} else {
return null;
}
}
}
const awsmainUpload = (fileDirectory) => {
const uploadImage = fs.createReadStream(fileDirectory.fd);
const fileName = Math.floor(Date.now() / 1000);
const params = {Bucket: bucket, Key: fileName + " " + fileDirectory.fileName, Body: uploadImage};
return new Promise((resolve, reject) => {
s3.upload(params, (error, outpout) => {
return error ? reject(error) : resolve(outpout);
})
})
}

Create a zip file on S3 from files on S3 using Lambda Node

I need to create a Zip file that consists of a selection of files (videos and images) located in my s3 bucket.
The problem at the moment using my code below is that I quickly hit the memory limit on Lambda.
async.eachLimit(files, 10, function(file, next) {
var params = {
Bucket: bucket, // bucket name
Key: file.key
};
s3.getObject(params, function(err, data) {
if (err) {
console.log('file', file.key);
console.log('get image files err',err, err.stack); // an error occurred
} else {
console.log('file', file.key);
zip.file(file.key, data.Body);
next();
}
});
},
function(err) {
if (err) {
console.log('err', err);
} else {
console.log('zip', zip);
content = zip.generateNodeStream({
type: 'nodebuffer',
streamFiles:true
});
var params = {
Bucket: bucket, // name of dest bucket
Key: 'zipped/images.zip',
Body: content
};
s3.upload(params, function(err, data) {
if (err) {
console.log('upload zip to s3 err',err, err.stack); // an error occurred
} else {
console.log(data); // successful response
}
});
}
});
Is this possible using Lambda, or should I look at a different
approach?
Is it possible to write to a compressed zip file on the fly, therefore eliminating the memory issue somewhat, or do I need to have the files collected before compression?
Any help would be much appreciated.
Okay, I got to do this today and it works. Direct Buffer to Stream, no disk involved. So memory or disk limitation won't be an issue here:
'use strict';
const AWS = require("aws-sdk");
AWS.config.update( { region: "eu-west-1" } );
const s3 = new AWS.S3( { apiVersion: '2006-03-01'} );
const _archiver = require('archiver');
//This returns us a stream.. consider it as a real pipe sending fluid to S3 bucket.. Don't forget it
const streamTo = (_bucket, _key) => {
var stream = require('stream');
var _pass = new stream.PassThrough();
s3.upload( { Bucket: _bucket, Key: _key, Body: _pass }, (_err, _data) => { /*...Handle Errors Here*/ } );
return _pass;
};
exports.handler = async (_req, _ctx, _cb) => {
var _keys = ['list of your file keys in s3'];
var _list = await Promise.all(_keys.map(_key => new Promise((_resolve, _reject) => {
s3.getObject({Bucket:'bucket-name', Key:_key})
.then(_data => _resolve( { data: _data.Body, name: `${_key.split('/').pop()}` } ));
}
))).catch(_err => { throw new Error(_err) } );
await new Promise((_resolve, _reject) => {
var _myStream = streamTo('bucket-name', 'fileName.zip'); //Now we instantiate that pipe...
var _archive = _archiver('zip');
_archive.on('error', err => { throw new Error(err); } );
//Your promise gets resolved when the fluid stops running... so that's when you get to close and resolve
_myStream.on('close', _resolve);
_myStream.on('end', _resolve);
_myStream.on('error', _reject);
_archive.pipe(_myStream); //Pass that pipe to _archive so it can push the fluid straigh down to S3 bucket
_list.forEach(_itm => _archive.append(_itm.data, { name: _itm.name } ) ); //And then we start adding files to it
_archive.finalize(); //Tell is, that's all we want to add. Then when it finishes, the promise will resolve in one of those events up there
}).catch(_err => { throw new Error(_err) } );
_cb(null, { } ); //Handle response back to server
};
I formated the code according to #iocoker.
main entry
// index.js
'use strict';
const S3Zip = require('./s3-zip')
const params = {
files: [
{
fileName: '1.jpg',
key: 'key1.JPG'
},
{
fileName: '2.jpg',
key: 'key2.JPG'
}
],
zippedFileKey: 'zipped-file-key.zip'
}
exports.handler = async event => {
const s3Zip = new S3Zip(params);
await s3Zip.process();
return {
statusCode: 200,
body: JSON.stringify(
{
message: 'Zip file successfully!'
}
)
};
}
Zip file util
// s3-zip.js
'use strict';
const fs = require('fs');
const AWS = require("aws-sdk");
const Archiver = require('archiver');
const Stream = require('stream');
const https = require('https');
const sslAgent = new https.Agent({
KeepAlive: true,
rejectUnauthorized: true
});
sslAgent.setMaxListeners(0);
AWS.config.update({
httpOptions: {
agent: sslAgent,
},
region: 'us-east-1'
});
module.exports = class S3Zip {
constructor(params, bucketName = 'default-bucket') {
this.params = params;
this.BucketName = bucketName;
}
async process() {
const { params, BucketName } = this;
const s3 = new AWS.S3({ apiVersion: '2006-03-01', params: { Bucket: BucketName } });
// create readstreams for all the output files and store them
const createReadStream = fs.createReadStream;
const s3FileDwnldStreams = params.files.map(item => {
const stream = s3.getObject({ Key: item.key }).createReadStream();
return {
stream,
fileName: item.fileName
}
});
const streamPassThrough = new Stream.PassThrough();
// Create a zip archive using streamPassThrough style for the linking request in s3bucket
const uploadParams = {
ACL: 'private',
Body: streamPassThrough,
ContentType: 'application/zip',
Key: params.zippedFileKey
};
const s3Upload = s3.upload(uploadParams, (err, data) => {
if (err) {
console.error('upload err', err)
} else {
console.log('upload data', data);
}
});
s3Upload.on('httpUploadProgress', progress => {
// console.log(progress); // { loaded: 4915, total: 192915, part: 1, key: 'foo.jpg' }
});
// create the archiver
const archive = Archiver('zip', {
zlib: { level: 0 }
});
archive.on('error', (error) => {
throw new Error(`${error.name} ${error.code} ${error.message} ${error.path} ${error.stack}`);
});
// connect the archiver to upload streamPassThrough and pipe all the download streams to it
await new Promise((resolve, reject) => {
console.log("Starting upload of the output Files Zip Archive");
streamPassThrough.on('close', resolve());
streamPassThrough.on('end', resolve());
streamPassThrough.on('error', reject());
archive.pipe(streamPassThrough);
s3FileDwnldStreams.forEach((s3FileDwnldStream) => {
archive.append(s3FileDwnldStream.stream, { name: s3FileDwnldStream.fileName })
});
archive.finalize();
}).catch((error) => {
throw new Error(`${error.code} ${error.message} ${error.data}`);
});
// Finally wait for the uploader to finish
await s3Upload.promise();
}
}
The other solutions are great for not so many files (less than ~60). If they handle more files, they just quit into nothing with no errors. This is because they open too many streams.
This solution is inspired by https://gist.github.com/amiantos/16bacc9ed742c91151fcf1a41012445e
It is a working solution, which works well even with many files (+300) and returns a presigned URL to the zip which contains the files.
Main Lambda:
const AWS = require('aws-sdk');
const S3 = new AWS.S3({
apiVersion: '2006-03-01',
signatureVersion: 'v4',
httpOptions: {
timeout: 300000 // 5min Should Match Lambda function timeout
}
});
const archiver = require('archiver');
import stream from 'stream';
const UPLOAD_BUCKET_NAME = "my-s3-bucket";
const URL_EXPIRE_TIME = 5*60;
export async function getZipSignedUrl(event) {
const prefix = `uploads/id123123/}`; //replace this with your S3 prefix
let files = ["12314123.png", "56787567.png"] //replace this with your files
if (files.length == 0) {
console.log("No files to zip");
return result(404, "No pictures to download");
}
console.log("Files to zip: ", files);
try {
files = files.map(file => {
return {
fileName: file,
key: prefix + '/' + file,
type: "file"
};
});
const destinationKey = prefix + '/' + 'uploads.zip'
console.log("files: ", files);
console.log("destinationKey: ", destinationKey);
await streamToZipInS3(files, destinationKey);
const presignedUrl = await getSignedUrl(UPLOAD_BUCKET_NAME, destinationKey, URL_EXPIRE_TIME, "uploads.zip");
console.log("presignedUrl: ", presignedUrl);
if (!presignedUrl) {
return result(500, null);
}
return result(200, presignedUrl);
}
catch(error) {
console.error(`Error: ${error}`);
return result(500, null);
}
}
Helper functions:
export function result(code, message) {
return {
statusCode: code,
body: JSON.stringify(
{
message: message
}
)
}
}
export async function streamToZipInS3(files, destinationKey) {
await new Promise(async (resolve, reject) => {
var zipStream = streamTo(UPLOAD_BUCKET_NAME, destinationKey, resolve);
zipStream.on("error", reject);
var archive = archiver("zip");
archive.on("error", err => {
throw new Error(err);
});
archive.pipe(zipStream);
for (const file of files) {
if (file["type"] == "file") {
archive.append(getStream(UPLOAD_BUCKET_NAME, file["key"]), {
name: file["fileName"]
});
}
}
archive.finalize();
})
.catch(err => {
console.log(err);
throw new Error(err);
});
}
function streamTo(bucket, key, resolve) {
var passthrough = new stream.PassThrough();
S3.upload(
{
Bucket: bucket,
Key: key,
Body: passthrough,
ContentType: "application/zip",
ServerSideEncryption: "AES256"
},
(err, data) => {
if (err) {
console.error('Error while uploading zip')
throw new Error(err);
reject(err)
return
}
console.log('Zip uploaded')
resolve()
}
).on("httpUploadProgress", progress => {
console.log(progress)
});
return passthrough;
}
function getStream(bucket, key) {
let streamCreated = false;
const passThroughStream = new stream.PassThrough();
passThroughStream.on("newListener", event => {
if (!streamCreated && event == "data") {
const s3Stream = S3
.getObject({ Bucket: bucket, Key: key })
.createReadStream();
s3Stream
.on("error", err => passThroughStream.emit("error", err))
.pipe(passThroughStream);
streamCreated = true;
}
});
return passThroughStream;
}
export async function getSignedUrl(bucket: string, key: string, expires: number, downloadFilename?: string): Promise<string> {
const exists = await objectExists(bucket, key);
if (!exists) {
console.info(`Object ${bucket}/${key} does not exists`);
return null
}
let params = {
Bucket: bucket,
Key: key,
Expires: expires,
};
if (downloadFilename) {
params['ResponseContentDisposition'] = `inline; filename="${encodeURIComponent(downloadFilename)}"`;
}
try {
const url = s3.getSignedUrl('getObject', params);
return url;
} catch (err) {
console.error(`Unable to get URL for ${bucket}/${key}`, err);
return null;
}
};
Using streams may be tricky as I'm not sure how you could pipe multiple streams into an object. I've done this several times using standard file object. It's a multistep process and it's quite fast. Remember that Lambda operates in Linux so you have all Linux resources at hand including the system /tmp directory.
Create a sub-directory in /tmp call "transient" or whatever works for you
Use s3.getObject() and write file objects to /tmp/transient
Use the GLOB package to generate an array[] of paths from /tmp/transient
Loop the array and zip.addLocalFile(array[i]);
zip.writeZip('tmp/files.zip');
I've used a similar approach, but I'm facing the issue that some of the files in the generated ZIP file don't have the correct size (and corresponding data). Is there any limitation on the size of the files this code can manage? In my case I'm zipping large files (a few larger than 1GB) and the overall amount of data may reach 10GB.
I do not get any error/warning message, so it seems it all works fine.
Any idea what may be hapenning?

Resources