Kuzzle / Minio example usage - node.js

Dose the Kuzzle or Minio development teams have a working example of using the Kuzzle S3 plugin for Minio? I have the following but my file isnt being uploaded and the pre-signed url is referring to https://your-s3-bucket.s3.eu-west-3.amazonaws.com/
const fs = require("fs");
const fsPromises = require('fs').promises;
// Create a JS File object instance from a local path using Node.js
const fileObject = require("get-file-object-from-local-path");
// Promise based HTTP client for the browser and node.js
const axios = require('axios');
// Loads the Kuzzle SDK modules
const {
Kuzzle,
WebSocket
} = require('kuzzle-sdk');
var start = new Date();
const webSocketOptionsObject = {
"autoReconnect": true,
"ssl": true,
"port": 443
};
const kuzzle = new Kuzzle(new WebSocket('myurl.com', webSocketOptionsObject));
const credentials = { username: 'xyz123', password: 'fithenmgjtkj' };
const path = __dirname + "\\" + "yellow_taxi_data.csv"; // the "\\" is for Windows path
var fileData = {};
// check file exists
fs.access(path, fs.F_OK, (err) => {
if (err) {
console.error(err)
return
}
fileData = new fileObject.LocalFileData(path);
// Adds a listener to detect connection problems
kuzzle.on('networkError', error => {
console.error('Network Error:', error);
});
});
const connectToKuzzle = async () => {
// Connects to the Kuzzle server
await kuzzle.connect();
return await kuzzle.auth.login('local', credentials);
// console.log('jwt auth token: ', jwt);
}
const disConnectFromKuzzle = async () => {
console.log('Disconnected from Kuzzle');
kuzzle.disconnect();
var time = new Date() - start;
// sec = Math.floor((time/1000) % 60);
console.log('Execution time in milliseconds: ', time);
}
const presignedURL = async () => {
// Get a Presigned URL
const result = await kuzzle.query({
controller: 's3/upload',
action: 'getUrl',
uploadDir: 'proxybucket', // directory name inside the Bucket specified in the s3 plugin bucket name
filename: fileData.name
});
console.log("result: ", result);
return result;
}
const loadFileStream = async () => {
console.log('getting file: ', path);
targetFile = null;
await fs.promises.readFile(path)
.then(function (result) {
console.log("file loaded------", result.length);
targetFile = result;
})
.catch(function (error) {
console.log(error);
return;
});
return targetFile;
}
const kuzzleValidate = async (kuzzleResource) => {
// console.log("kuzzleResource: ", kuzzleResource.result.fileKey);
// validate
// Validate and persist a previsously uploaded file.
// https://docs.kuzzle.io/official-plugins/s3/2/controllers/upload/validate/
const Presult = await kuzzle.query({
// Kuzzle API params
"controller": "s3/upload",
"action": "validate",
// File key in S3 bucket
"fileKey": kuzzleResource.result.fileKey
});
console.log('validate: ', Presult.result.fileUrl);
}
const uploadFile = async (fileBuffer, kuzzleResource, jwt) => {
// options at https://github.com/axios/axios
const axiosOptions = {
headers: {
'Content-Type': fileData.type
},
maxBodyLength: 200000000 // 200,000,000 bytes 200 Mb
};
// PUT the fileBuffer to the Kuzzle S3 endpoint
// https://github.com/axios/axios
axios.defaults.headers.common['Authorization'] = jwt;
const response = await axios.put(kuzzleResource.result.uploadUrl, fileBuffer, axiosOptions)
.then((response) => {
console.log('file uploaded......');
})
.catch(function (error) {
console.log("File upload error: ", error);
return;
});
return "Upload successful";
}
if (fileData) {
connectToKuzzle().then((jwt) => {
console.log(jwt);
// upload(jwt);
presignedURL().then((kuzzleResource) => {
loadFileStream().then((fileBuffer) => {
uploadFile(fileBuffer, kuzzleResource, jwt).then((doneMessage) => {
console.log("doneMessage: ", doneMessage);
}).then(() => {
kuzzleValidate(kuzzleResource).then(() => {
disConnectFromKuzzle();
});
});
});
});
});
}
I'm looking to upload to a Minio bucket and obtain a pre-signedURL so I can store it in a document later.

You can change the endpoint configuration to set a different s3-compatible endpoint who can be a Minio one.
This configuration can be changer under the plugins.s3.endpoint key. You should also disable the usage of default s3 path.
Example:
app.config.set('plugins.s3.endpoint', 'https://minio.local');
app.config.set('plugins.s3.s3ClientOptions.s3ForcePathStyle', false);

Related

Get progress of firebase admin file upload

I'm trying to get the progress of a 1 minute video uploading to firebase bucket storage using the admin sdk. I've seen a lot about using firebase.storage().ref.child..... but I'm unable to do that with the admin sdk since they don't have the same functions. This is my file upload:
exports.uploadMedia = (req, res) => {
const BusBoy = require('busboy');
const path = require('path');
const os = require('os');
const fs = require('fs');
const busboy = new BusBoy({ headers: req.headers, limits: { files: 1, fileSize: 200000000 } });
let mediaFileName;
let mediaToBeUploaded = {};
busboy.on('file', (fieldname, file, filename, encoding, mimetype) => {
if(mimetype !== 'image/jpeg' && mimetype !== 'image/png' && mimetype !== 'video/quicktime' && mimetype !== 'video/mp4') {
console.log(mimetype);
return res.status(400).json({ error: 'Wrong file type submitted, only .png, .jpeg, .mov, and .mp4 files allowed'})
}
// my.image.png
const imageExtension = filename.split('.')[filename.split('.').length - 1];
//43523451452345231234.png
mediaFileName = `${Math.round(Math.random()*100000000000)}.${imageExtension}`;
const filepath = path.join(os.tmpdir(), mediaFileName);
mediaToBeUploaded = { filepath, mimetype };
file.pipe(fs.createWriteStream(filepath));
file.on('limit', function(){
fs.unlink(filepath, function(){
return res.json({'Error': 'Max file size is 200 Mb, file size too large'});
});
});
});
busboy.on('finish', () => {
admin
.storage()
.bucket()
.upload(mediaToBeUploaded.filepath, {
resumable: false,
metadata: {
metadata: {
contentType: mediaToBeUploaded.mimetype
}
}
})
.then(() => {
const meadiaUrl = `https://firebasestorage.googleapis.com/v0/b/${config.storageBucket}/o/${mediaFileName}?alt=media`;
return res.json({mediaUrl: meadiaUrl});
})
.catch((err) => {
console.error(err);
return res.json({'Error': 'Error uploading media'});
});
});
req.pipe(busboy);
}
This works okay right now, but the only problem is that the user can't see where their 1 or 2 minute video upload is at. Currently, it's just a activity indicator and the user just sits their waiting without any notice. I'm using react native on the frontend if that helps with anything. Would appreciate any help!
I was able to implement on the client side a lot easier... but it works perfect with image and video upload progress. On the backend, I was using the admin sdk, but frontend I was originally using the firebase sdk.
this.uploadingMedia = true;
const imageExtension = this.mediaFile.split('.')[this.mediaFile.split('.').length - 1];
const mediaFileName = `${Math.round(Math.random()*100000000000)}.${imageExtension}`;
const response = await fetch(this.mediaFile);
const blob = await response.blob();
const storageRef = storage.ref(`${mediaFileName}`).put(blob);
storageRef.on(`state_changed`,snapshot=>{
this.uploadProgress = (snapshot.bytesTransferred/snapshot.totalBytes);
}, error=>{
this.error = error.message;
this.submitting = false;
this.uploadingMedia = false;
return;
},
async () => {
storageRef.snapshot.ref.getDownloadURL().then(async (url)=>{
imageUrl = [];
videoUrl = [url];
this.uploadingMedia = false;
this.submitPost(imageUrl, videoUrl);
});
});
export const uploadFile = (
folderPath,
fileName,
file,
generateDownloadURL = true,
updateInformationUploadProgress
) => {
return new Promise((resolve, reject) => {
try {
const storageRef = firebaseApp.storage().ref(`${folderPath}/${fileName}`)
const uploadTask = storageRef.put(file)
uploadTask.on(
'state_changed',
snapshot => {
if (updateInformationUploadProgress) {
const progress =
(snapshot.bytesTransferred / snapshot.totalBytes) * 100
updateInformationUploadProgress({
name: fileName,
progress: progress,
})
}
},
error => {
console.log('upload error: ', error)
reject(error)
},
() => {
if (generateDownloadURL) {
uploadTask.snapshot.ref
.getDownloadURL()
.then(url => {
resolve(url)
})
.catch(error => {
console.log('url error: ', error.message)
reject(error)
})
} else {
resolve(uploadTask.snapshot.metadata.fullPath)
}
}
)
} catch (error) {
reject(error)
}
})
}

How to download all files in aws S3folder in local machine using node js

I have an S3 bucket which has a folder with some files I want to download all the files in that folder to the local machine folder I tried for the single file it's working how to download multiple files.
As per below code in key folderA has 10 files I want to download all the ten to localfolder directory which I mentioned in s3.getObject(params).createReadStream().pipe(ws);
My code :
const downloadObject = () => {
var params = { Bucket: "Sample", Key:"folderA/"};
const ws = fs.createWriteStream(`${__dirname}/localfolder/`);
const s3Stream = s3.getObject(params).createReadStream().pipe(ws);
s3Stream.on("error", (err) => {
ws.end();
});
s3Stream.on("close", () => {
console.log(`downloaded successfully from s3 at ${new Date()}`);
ws.end();
});
};
expected output:
s3 -> bucket/folderA/<10 files>
localmachine -> localfolder/<need all 10 files in local>
There is quite alot to it,
Firstly you would need to list all buckets, then loop over all the buckets (if you only want one fine). Create a local directory if not found etc.
Then find out all files in the bucket and then loop over them, on each path you the get the object and store it.
Here is how would do it with the minio js client (the calls would be the same) tweak it to your needs obviously the folder paths would be different.
/**
* S3 images pull script
*/
const fs = require('fs')
const path = require('path')
const util = require('util')
const readFile = util.promisify(fs.readFile)
const writeFile = util.promisify(fs.writeFile)
//
const rootPath = path.join(__dirname, '..')
const publicPath = path.join(rootPath, 'public', 'images')
//
require('dotenv').config({
path: path.join(rootPath, '.env')
})
// minio client S3
const s3 = new(require('minio')).Client({
endPoint: process.env.S3_HOST,
port: parseInt(process.env.S3_PORT, 10),
useSSL: process.env.S3_USE_SSL === 'true',
accessKey: process.env.S3_ACCESS_KEY,
secretKey: process.env.S3_ACCESS_SECRET,
region: process.env.S3_REGION
})
/**
* Functions
*/
const mkdir = dirPath => {
dirPath.split(path.sep).reduce((prevPath, folder) => {
const currentPath = path.join(prevPath, folder, path.sep);
if (!fs.existsSync(currentPath)) {
fs.mkdirSync(currentPath);
}
return currentPath
}, '')
}
// list objects in bucket
const listObjects = bucket => new Promise(async (resolve, reject) => {
//
bucket.objects = []
bucket.total_objects = 0
bucket.total_size = 0
//
let stream = await s3.listObjectsV2(bucket.name, '', true)
//
stream.on('data', obj => {
if (obj && (obj.name || obj.prefix)) {
bucket.objects.push(obj)
bucket.total_objects++
bucket.total_size = bucket.total_size + obj.size
}
})
//
stream.on('end', () => resolve(bucket))
stream.on('error', e => reject(e))
})
// get an objects data
const getObject = (bucket, name) => new Promise((resolve, reject) => {
s3.getObject(bucket, name, (err, stream) => {
if (err) reject(err)
//
let chunks = []
stream.on('data', chunk => chunks.push(chunk))
stream.on('end', () => resolve(Buffer.concat(chunks || [])))
stream.on('error', e => reject(e))
})
})
/**
*
*/
async function main() {
// get buckets
console.log(`Fetching buckets from: ${process.env.S3_HOST}`)
let buckets = []
try {
buckets = await s3.listBuckets()
console.log(buckets.length + ' buckets found')
} catch (e) {
return console.error(e)
}
// create local folders if not exists
console.log(`Creating local folders in ./api/public/images/ if not exists`)
try {
for (let bucket of buckets) {
//
bucket.local = path.join(publicPath, bucket.name)
try {
await fs.promises.access(bucket.local)
} catch (e) {
if (e.code === 'ENOENT') {
console.log(`Creating local folder: ${bucket.local}`)
await fs.promises.mkdir(bucket.local)
} else
bucket.error = e.message
}
}
} catch (e) {
return console.error(e)
}
// fetch all bucket objects
console.log(`Populating bucket objects`)
try {
for (let bucket of buckets) {
bucket = await listObjects(bucket)
}
} catch (e) {
console.log(e)
}
// loop over buckets and download all objects
try {
for (let bucket of buckets) {
console.log(`Downloading bucket: ${bucket.name}`)
// loop over and download
for (let object of bucket.objects) {
// if object name has prefix
let dir = path.dirname(object.name)
if (dir !== '.') {
try {
await fs.promises.access(path.join(bucket.local, dir))
} catch (e) {
if (e.code === 'ENOENT') {
console.log(`Creating local folder: ${bucket.local}`)
mkdir(path.join(bucket.local, dir))
}
}
}
//
console.log(`Downloading object[${bucket.name}]: ${object.name}`)
await writeFile(path.join(bucket.local, object.name), await getObject(bucket.name, object.name))
}
}
console.log(`Completed!`)
} catch (e) {
console.log(e)
}
}
main()

Google Cloud Function Node JS SFTP To Cloud Storage Transfer File Error

I'm trying to run the following code using node library ssh2-sftp but I don't get any error from GCP Console.
What i'm trying to do here is to pipe the data that I receive from the SFTP into an output file on GCS.
Here is the code:
/**
* Generic background Cloud Function to be triggered by Cloud Storage.
*
* #param {object} event The Cloud Functions event.
* #param {function} callback The callback function.
*/
let request = require("request");
let Storage = require('#google-cloud/storage');
let Client = require('ssh2-sftp-client');
let sftp = new Client();
const fs = require('fs');
const path = require('path');
const storage = new Storage({projectId: process.env.PROJECT_ID});
exports.retrieveFilesFromSFTP = (event, callback) => {
let pubsubMessage = event.data;
let fileName = pubsubMessage.data
? Buffer.from(pubsubMessage.data, 'base64').toString()
: 'No Files in pubsub';
console.log(fileName, 'File name from Pub/Sub Message was received');
fs.readdir(__dirname, (err, files) => {
if (err) {
console.error(err);
res.sendStatus(500);
} else {
console.log('Files', files);
res.sendStatus(200);
}
});
getFilesFromSFTP(fileName);
callback();
};
function getFilesFromSFTP(fileName) {
const bucket = storage.bucket('dataflowdirectory');
const outFile = bucket.file("relex_output/"+fileName);
//TODO create a file on the bucket , get the correct file path and then pipe it to the file
sftp.connect({
host: process.env.SFTP_SERVER,
port: process.env.SFTP_PORT,
username: process.env.SFTP_USER,
password: process.env.SFTP_PASSWORD
}).then(() => {
sftp.list(process.env.SFTP_OUTBOUND_PATH)
.then((data) => {
sftp.get(process.env.SFTP_OUTBOUND_PATH + "/" + fileName).then((data) => {
data.pipe((outFile.createWriteStream({gzip: true})
.on('error', function (err) {
console.error("error",err);
})
.on('finish', function () {
// The file upload is complete.
outFile.close();
})));
});
}).catch((err) => {
console.log(err);
});
})
}
The only thing that I see from the log is the following:
Ignoring exception from a finished function
Any ideas how to solve this?
Thanks,
I'm able to solve it with the following code,
I added the full path of the file : relex_output
It is still very very slow so any performance improving will be appreciate:
/**
* Generic background Cloud Function to be triggered by Cloud Storage.
*
* #param {object} event The Cloud Functions event.
* #param {function} callback The callback function.
*/
let request = require("request");
let Storage = require('#google-cloud/storage');
let Client = require('ssh2-sftp-client');
let sftp = new Client();
const fs = require('fs');
const storage = new Storage({projectId: process.env.PROJECT_ID});
exports.retrieveFilesFromSFTP = (event, callback) => {
let pubsubMessage = event.data;
let fileName = pubsubMessage.data
? Buffer.from(pubsubMessage.data, 'base64').toString()
: 'No Files in pubsub';
console.log(fileName, 'File name from Pub/Sub Message was received');
fs.readdir(__dirname, (err, files) => {
if (err) {
console.error(err);
res.sendStatus(500);
} else {
console.log('Files', files);
res.sendStatus(200);
}
});
getFilesFromSFTP(fileName);
callback();
};
function getFilesFromSFTP(fileName) {
const bucket = storage.bucket('dataflowdirectory');
const outFile = bucket.file("relex_output/"+fileName);
sftp.connect({
host: process.env.SFTP_SERVER,
port: process.env.SFTP_PORT,
username: process.env.SFTP_USER,
password: process.env.SFTP_PASSWORD
}).then(() => {
sftp.list(process.env.SFTP_OUTBOUND_PATH)
.then((data) => {
sftp.get(process.env.SFTP_OUTBOUND_PATH + "/" + fileName).then((data) => {
data.pipe((outFile.createWriteStream({gzip: true})
.on('error', function (err) {
console.error("error",err);
})
.on('finish', function () {
// The file upload is complete.
sftp.end();
})));
});
}).catch((err) => {
console.log(err);
});
})

Streaming image data from node server results in corrupted file (gridfs-stream)

I decided to post this after extensive searching here (1, 2, 3 ) and here (1, 2) and many, many other related posts. I am loosing hope, but will not give up that easily :)
I'm using multer to upload a PNG image to mongo database:
const storage = new GridFsStorage({
url: 'mongodb://my_database:thisIsfake#hostName/my_database',
file: (req, file) => {
return new Promise((resolve, reject) => {
crypto.randomBytes(16, (err, buf) => { // generating unique names to avoid duplicates
if (err) {
return reject(err);
}
const filename = buf.toString('hex') + path.extname(file.originalname);
const fileInfo = {
filename: filename,
bucketName: 'media',
metadata : {
clientId : req.body.client_id // added metadata to have a reference to the client to whom the image belongs
}
};
resolve(fileInfo);
});
});
}
});
const upload = multer({storage}).single('image');
Then I create a stream and pipe it to response:
loader: function (req, res) {
var conn = mongoose.createConnection('mongodb://my_database:thisIsfake#hostName/my_database');
conn.once('open', function () {
var gfs = Grid(conn.db, mongoose.mongo);
gfs.collection('media');
gfs.files.find({ metadata : {clientId : req.body.id}}).toArray(
(err, files) => {
if (err) throw err;
if (files) {
const readStream = gfs.createReadStream(files[0].filename); //testing only with the first file in the array
console.log(readStream);
res.set('Content-Type', files[0].contentType)
readStream.pipe(res);
}
});
});
}
Postman POST request to end point results in response body being displayed as an image file:
In the front end I pass the response in a File object, read it and save the result in a src attribute of img:
findAfile(){
let Data = {
id: this.$store.state.StorePatient._id,
};
console.log(this.$store.state.StorePatient._id);
visitAxios.post('http://localhost:3000/client/visits/findfile', Data )
.then(res => {
const reader = new FileReader();
let file = new File([res.data],"image.png", {type: "image/png"});
console.log('this is file: ',file);
reader.readAsDataURL(file); // encode a string
reader.onload = function() {
const img = new Image();
img.src = reader.result;
document.getElementById('imgContainer').appendChild(img);
};
})
.catch( err => console.error(err));
}
My File object is similar to the one I get when using input field only bigger:
This is original file:
When inspecting element I see this:
Looks like data URI is where it should be, but it's different from the original image on file input:
Again, when I want to display it through input element:
onFileSelected(event){
this.file = event.target.files[0];
this.fileName = event.target.files[0].name;
const reader = new FileReader();
console.log(this.file);
reader.onload = function() {
const img = new Image();
img.src = reader.result;
document.getElementById('imageContainer').appendChild(img);
};
reader.readAsDataURL(this.file);
}
I get this:
But when reading it from the response, it is corrupted:
Postman gets it right, so there must be something wrong with my front-end code, right? How do I pass this gfs stream to my html?
I managed to make a POST request to fetch an image from MongoDB and save it in the server dir:
const readStream = gfs.createReadStream(files[0].filename);
const wstream = fs.createWriteStream(path.join(__dirname,"uploads", "fileToGet.jpg"));
readStream.pipe(wstream);
Then, I just made a simple GET request by adding an absolute path to the and finally delete the file after successful response:
app.get('/image', function (req, res) {
var file = path.join(dir, 'fileToGet.jpg');
if (file.indexOf(dir + path.sep) !== 0) {
return res.status(403).end('Forbidden');
}
var type = mime[path.extname(file).slice(1)] || 'text/plain';
var s = fs.createReadStream(file);
s.on('open', function () {
res.set('Content-Type', type);
s.pipe(res);
});
s.on('end', function () {
fs.unlink(file, ()=>{
console.log("file deleted");
})
});
s.on('error', function () {
res.set('Content-Type', 'text/plain');
res.status(404).end('Not found');
});

Create a zip file on S3 from files on S3 using Lambda Node

I need to create a Zip file that consists of a selection of files (videos and images) located in my s3 bucket.
The problem at the moment using my code below is that I quickly hit the memory limit on Lambda.
async.eachLimit(files, 10, function(file, next) {
var params = {
Bucket: bucket, // bucket name
Key: file.key
};
s3.getObject(params, function(err, data) {
if (err) {
console.log('file', file.key);
console.log('get image files err',err, err.stack); // an error occurred
} else {
console.log('file', file.key);
zip.file(file.key, data.Body);
next();
}
});
},
function(err) {
if (err) {
console.log('err', err);
} else {
console.log('zip', zip);
content = zip.generateNodeStream({
type: 'nodebuffer',
streamFiles:true
});
var params = {
Bucket: bucket, // name of dest bucket
Key: 'zipped/images.zip',
Body: content
};
s3.upload(params, function(err, data) {
if (err) {
console.log('upload zip to s3 err',err, err.stack); // an error occurred
} else {
console.log(data); // successful response
}
});
}
});
Is this possible using Lambda, or should I look at a different
approach?
Is it possible to write to a compressed zip file on the fly, therefore eliminating the memory issue somewhat, or do I need to have the files collected before compression?
Any help would be much appreciated.
Okay, I got to do this today and it works. Direct Buffer to Stream, no disk involved. So memory or disk limitation won't be an issue here:
'use strict';
const AWS = require("aws-sdk");
AWS.config.update( { region: "eu-west-1" } );
const s3 = new AWS.S3( { apiVersion: '2006-03-01'} );
const _archiver = require('archiver');
//This returns us a stream.. consider it as a real pipe sending fluid to S3 bucket.. Don't forget it
const streamTo = (_bucket, _key) => {
var stream = require('stream');
var _pass = new stream.PassThrough();
s3.upload( { Bucket: _bucket, Key: _key, Body: _pass }, (_err, _data) => { /*...Handle Errors Here*/ } );
return _pass;
};
exports.handler = async (_req, _ctx, _cb) => {
var _keys = ['list of your file keys in s3'];
var _list = await Promise.all(_keys.map(_key => new Promise((_resolve, _reject) => {
s3.getObject({Bucket:'bucket-name', Key:_key})
.then(_data => _resolve( { data: _data.Body, name: `${_key.split('/').pop()}` } ));
}
))).catch(_err => { throw new Error(_err) } );
await new Promise((_resolve, _reject) => {
var _myStream = streamTo('bucket-name', 'fileName.zip'); //Now we instantiate that pipe...
var _archive = _archiver('zip');
_archive.on('error', err => { throw new Error(err); } );
//Your promise gets resolved when the fluid stops running... so that's when you get to close and resolve
_myStream.on('close', _resolve);
_myStream.on('end', _resolve);
_myStream.on('error', _reject);
_archive.pipe(_myStream); //Pass that pipe to _archive so it can push the fluid straigh down to S3 bucket
_list.forEach(_itm => _archive.append(_itm.data, { name: _itm.name } ) ); //And then we start adding files to it
_archive.finalize(); //Tell is, that's all we want to add. Then when it finishes, the promise will resolve in one of those events up there
}).catch(_err => { throw new Error(_err) } );
_cb(null, { } ); //Handle response back to server
};
I formated the code according to #iocoker.
main entry
// index.js
'use strict';
const S3Zip = require('./s3-zip')
const params = {
files: [
{
fileName: '1.jpg',
key: 'key1.JPG'
},
{
fileName: '2.jpg',
key: 'key2.JPG'
}
],
zippedFileKey: 'zipped-file-key.zip'
}
exports.handler = async event => {
const s3Zip = new S3Zip(params);
await s3Zip.process();
return {
statusCode: 200,
body: JSON.stringify(
{
message: 'Zip file successfully!'
}
)
};
}
Zip file util
// s3-zip.js
'use strict';
const fs = require('fs');
const AWS = require("aws-sdk");
const Archiver = require('archiver');
const Stream = require('stream');
const https = require('https');
const sslAgent = new https.Agent({
KeepAlive: true,
rejectUnauthorized: true
});
sslAgent.setMaxListeners(0);
AWS.config.update({
httpOptions: {
agent: sslAgent,
},
region: 'us-east-1'
});
module.exports = class S3Zip {
constructor(params, bucketName = 'default-bucket') {
this.params = params;
this.BucketName = bucketName;
}
async process() {
const { params, BucketName } = this;
const s3 = new AWS.S3({ apiVersion: '2006-03-01', params: { Bucket: BucketName } });
// create readstreams for all the output files and store them
const createReadStream = fs.createReadStream;
const s3FileDwnldStreams = params.files.map(item => {
const stream = s3.getObject({ Key: item.key }).createReadStream();
return {
stream,
fileName: item.fileName
}
});
const streamPassThrough = new Stream.PassThrough();
// Create a zip archive using streamPassThrough style for the linking request in s3bucket
const uploadParams = {
ACL: 'private',
Body: streamPassThrough,
ContentType: 'application/zip',
Key: params.zippedFileKey
};
const s3Upload = s3.upload(uploadParams, (err, data) => {
if (err) {
console.error('upload err', err)
} else {
console.log('upload data', data);
}
});
s3Upload.on('httpUploadProgress', progress => {
// console.log(progress); // { loaded: 4915, total: 192915, part: 1, key: 'foo.jpg' }
});
// create the archiver
const archive = Archiver('zip', {
zlib: { level: 0 }
});
archive.on('error', (error) => {
throw new Error(`${error.name} ${error.code} ${error.message} ${error.path} ${error.stack}`);
});
// connect the archiver to upload streamPassThrough and pipe all the download streams to it
await new Promise((resolve, reject) => {
console.log("Starting upload of the output Files Zip Archive");
streamPassThrough.on('close', resolve());
streamPassThrough.on('end', resolve());
streamPassThrough.on('error', reject());
archive.pipe(streamPassThrough);
s3FileDwnldStreams.forEach((s3FileDwnldStream) => {
archive.append(s3FileDwnldStream.stream, { name: s3FileDwnldStream.fileName })
});
archive.finalize();
}).catch((error) => {
throw new Error(`${error.code} ${error.message} ${error.data}`);
});
// Finally wait for the uploader to finish
await s3Upload.promise();
}
}
The other solutions are great for not so many files (less than ~60). If they handle more files, they just quit into nothing with no errors. This is because they open too many streams.
This solution is inspired by https://gist.github.com/amiantos/16bacc9ed742c91151fcf1a41012445e
It is a working solution, which works well even with many files (+300) and returns a presigned URL to the zip which contains the files.
Main Lambda:
const AWS = require('aws-sdk');
const S3 = new AWS.S3({
apiVersion: '2006-03-01',
signatureVersion: 'v4',
httpOptions: {
timeout: 300000 // 5min Should Match Lambda function timeout
}
});
const archiver = require('archiver');
import stream from 'stream';
const UPLOAD_BUCKET_NAME = "my-s3-bucket";
const URL_EXPIRE_TIME = 5*60;
export async function getZipSignedUrl(event) {
const prefix = `uploads/id123123/}`; //replace this with your S3 prefix
let files = ["12314123.png", "56787567.png"] //replace this with your files
if (files.length == 0) {
console.log("No files to zip");
return result(404, "No pictures to download");
}
console.log("Files to zip: ", files);
try {
files = files.map(file => {
return {
fileName: file,
key: prefix + '/' + file,
type: "file"
};
});
const destinationKey = prefix + '/' + 'uploads.zip'
console.log("files: ", files);
console.log("destinationKey: ", destinationKey);
await streamToZipInS3(files, destinationKey);
const presignedUrl = await getSignedUrl(UPLOAD_BUCKET_NAME, destinationKey, URL_EXPIRE_TIME, "uploads.zip");
console.log("presignedUrl: ", presignedUrl);
if (!presignedUrl) {
return result(500, null);
}
return result(200, presignedUrl);
}
catch(error) {
console.error(`Error: ${error}`);
return result(500, null);
}
}
Helper functions:
export function result(code, message) {
return {
statusCode: code,
body: JSON.stringify(
{
message: message
}
)
}
}
export async function streamToZipInS3(files, destinationKey) {
await new Promise(async (resolve, reject) => {
var zipStream = streamTo(UPLOAD_BUCKET_NAME, destinationKey, resolve);
zipStream.on("error", reject);
var archive = archiver("zip");
archive.on("error", err => {
throw new Error(err);
});
archive.pipe(zipStream);
for (const file of files) {
if (file["type"] == "file") {
archive.append(getStream(UPLOAD_BUCKET_NAME, file["key"]), {
name: file["fileName"]
});
}
}
archive.finalize();
})
.catch(err => {
console.log(err);
throw new Error(err);
});
}
function streamTo(bucket, key, resolve) {
var passthrough = new stream.PassThrough();
S3.upload(
{
Bucket: bucket,
Key: key,
Body: passthrough,
ContentType: "application/zip",
ServerSideEncryption: "AES256"
},
(err, data) => {
if (err) {
console.error('Error while uploading zip')
throw new Error(err);
reject(err)
return
}
console.log('Zip uploaded')
resolve()
}
).on("httpUploadProgress", progress => {
console.log(progress)
});
return passthrough;
}
function getStream(bucket, key) {
let streamCreated = false;
const passThroughStream = new stream.PassThrough();
passThroughStream.on("newListener", event => {
if (!streamCreated && event == "data") {
const s3Stream = S3
.getObject({ Bucket: bucket, Key: key })
.createReadStream();
s3Stream
.on("error", err => passThroughStream.emit("error", err))
.pipe(passThroughStream);
streamCreated = true;
}
});
return passThroughStream;
}
export async function getSignedUrl(bucket: string, key: string, expires: number, downloadFilename?: string): Promise<string> {
const exists = await objectExists(bucket, key);
if (!exists) {
console.info(`Object ${bucket}/${key} does not exists`);
return null
}
let params = {
Bucket: bucket,
Key: key,
Expires: expires,
};
if (downloadFilename) {
params['ResponseContentDisposition'] = `inline; filename="${encodeURIComponent(downloadFilename)}"`;
}
try {
const url = s3.getSignedUrl('getObject', params);
return url;
} catch (err) {
console.error(`Unable to get URL for ${bucket}/${key}`, err);
return null;
}
};
Using streams may be tricky as I'm not sure how you could pipe multiple streams into an object. I've done this several times using standard file object. It's a multistep process and it's quite fast. Remember that Lambda operates in Linux so you have all Linux resources at hand including the system /tmp directory.
Create a sub-directory in /tmp call "transient" or whatever works for you
Use s3.getObject() and write file objects to /tmp/transient
Use the GLOB package to generate an array[] of paths from /tmp/transient
Loop the array and zip.addLocalFile(array[i]);
zip.writeZip('tmp/files.zip');
I've used a similar approach, but I'm facing the issue that some of the files in the generated ZIP file don't have the correct size (and corresponding data). Is there any limitation on the size of the files this code can manage? In my case I'm zipping large files (a few larger than 1GB) and the overall amount of data may reach 10GB.
I do not get any error/warning message, so it seems it all works fine.
Any idea what may be hapenning?

Resources