Upgrade aws-sdk to version 3 - streaming S3 upload

Upgrade aws-sdk to version 3 - streaming S3 upload - node.js

I am trying to upgrade a program to aws-sdk version 3, but I am getting the error:
NotImplemented: A header you provided implies functionality that is not implemented
The function get_s3_stream needs to return a stream to the main program.
SDK version 2, this works:
var AWS = require("aws-sdk");
var s3 = new AWS.S3({apiVersion: '2006-03-01'});
const stream = require('stream');
function get_s3_stream() {
var pass = new stream.PassThrough();
var params = {Bucket: "bucketname", Key: "testfile1.txt", Body: pass};
s3.upload(params, function(err, data) {
console.log(err, data);
});
return pass;
}
const inputStream = stream.Readable.from(["input string1"])
const outStream = get_s3_stream()
inputStream.pipe(outStream);
SDK version 3, doesn't work:
const { S3Client, PutObjectCommand } = require("#aws-sdk/client-s3");
const s3Client = new S3Client({ region: "us-east-1" });
const stream = require('stream')
function get_s3_stream() {
const pass = new stream.PassThrough();
var params = {Bucket: "bucketname", Key: "testfile2.txt", Body: pass};
s3Client.send(new PutObjectCommand(params, function(err, data) {
console.log(err, data);
}));
return pass;
};
const inputStream = stream.Readable.from(["input string2"])
const outStream = get_s3_stream()
inputStream.pipe(outStream);
How can a stream be sent to S3 with the new version?

Apparently this is a know issue: https://github.com/aws/aws-sdk-js-v3/issues/1920.
A workaround in order to use passthrough streams would be to use Upload from #aws-sdk/lib-storage:
const { S3Client } = require("#aws-sdk/client-s3");
const { Upload } = require('#aws-sdk/lib-storage');
const stream = require('stream')
const s3Client = new S3Client({ region: "us-east-1" });
function get_s3_stream() {
const pass = new stream.PassThrough();
const upload = new Upload({
client: s3Client,
params: {
Bucket: 'bucketname',
Key: 'testfile2.txt',
Body: pass,
ContentType: 'text/plain',
},
});
upload.done().then((res, error) => {
console.log(res);
});
return pass;
}

Related

How can I upload multiple images to an s3 bucket in a lambda function using node.js?

I am not very familiar with node and trying to upload an array of media objects to an s3 bucket using an AWS Lambda node function.
the payload has an album which is an array of key/data dictionaries. My code is as below but I'm certain this is wrong.
const awsServerlessExpress = require('aws-serverless-express');
const app = require('./app');
const server = awsServerlessExpress.createServer(app);
const AWS = require("aws-sdk");
const docClient = new AWS.DynamoDB.DocumentClient();
var s3 = new AWS.S3();
var s3Params = {
Bucket: 'bucketid',
ContentEncoding: 'base64',
ContentType: 'image/jpeg'
};
exports.handler = async (event, context) => {
console.log(event);
var body = JSON.parse(event.body);
if (typeof body.album !== 'undefined' && body.album) {
body.album.forEach(function (value) {
var data = body.album.mediaString;
let mediaData = new Buffer(data, 'base64');
var mediaKey = body.album.mediaKey;
try {
s3Params = {
Bucket: 'bucketID',
Key: mediaKey,
Body: mediaData
};
try {
const stored = await s3.upload(s3Params).promise();
console.log("stored successfully");
return { body: JSON.stringify(data) };
} catch (err) {
console.log("error storing");
console.log(err);
return { error: err };
}
} catch (err) {
return { error: err };
}
});
return { body: JSON.stringify(data) };
} else {
return { error: 'error'};
}
};
I have an error that s3 not found. Just wondering if I'm going about this all wrong.
When I only upload one image with the following code everything works fine:
const awsServerlessExpress = require('aws-serverless-express');
const app = require('./app');
const server = awsServerlessExpress.createServer(app);
const AWS = require("aws-sdk");
const docClient = new AWS.DynamoDB.DocumentClient();
var s3 = new AWS.S3();
var s3Params = {
Bucket: 'bucketID',
ContentEncoding: 'base64',
ContentType: 'image/jpeg'
};
exports.handler = async (event, context) => {
var body = JSON.parse(event.body);
var data = body.mediaString;
let mediaData = new Buffer(data, 'base64');
var mediaKey = body.mediaKey;
try {
s3Params = {
Bucket: 'bucketID',
Key: mediaKey,
Body: mediaData
};
try {
const stored = await s3.upload(s3Params).promise();
console.log("stored successfully");
return { body: JSON.stringify(data) };
} catch (err) {
console.log("error storing");
console.log(err);
return { error: err };
}
} catch (err) {
return { error: err };
}
};

Can't upload files from Lambda to S3

I tested on my localhost, then checked on s3 and saw that there was a new file created.
But when testing on Lambda, although there is no error, there is no file on S3. The log of s3.upload(params).promise() is also not displayed.
var fs = require('fs');
var AWS = require('aws-sdk');
exports.handler = async (event, context, callback) => {
context.callbackWaitsForEmptyEventLoop = false
try {
AWS.config.update({
accessKeyId: accessKeyId,
secretAccessKey: secretAccessKey
});
var s3 = new AWS.S3();
var path = 'myfile.txt';
var file_buffer = fs.readFileSync(path);
console.log(file_buffer);
var params = {
Bucket: 'bucket-dev',
Key: '2222.txt',
Body: file_buffer
};
console.log("1111");
s3.upload(params).promise()
.then(function(data) {
console.log("Successfully uploaded to");
callback(null, "All Good");
})
.catch(function(err) {
console.error(err, err.stack);
callback(err);
});
console.log("2222");
return context.logStreamName
} catch (err) {
console.log(err);
callback(err);
}
}
Thanks

Try not to mix and match async and callback. Something like this might be closer to what you want...
var fs = require("fs");
var AWS = require("aws-sdk");
exports.handler = async (event, context) => {
AWS.config.update({
accessKeyId,
secretAccessKey,
});
const s3 = new AWS.S3();
const path = "myfile.txt";
const file_buffer = fs.readFileSync(path);
const params = {
Bucket: "bucket-dev",
Key: "2222.txt",
Body: file_buffer,
};
console.log("1111");
const res = await s3.upload(params).promise();
console.log("Successfully uploaded", res);
return "All good";
};

Is there any way to upload fluent-ffmpeg converted videos directly to s3 without storing them on local?

Is it possible to store ffmpeg output directly to s3 without downloading it in local or any other storage?
Below is my understanding of ffmpeg which converts format of video. I have done conversion part but i need to store it's output directly to s3 bucket so anyone have idea regarding this problem ?
const AWS = require('aws-sdk');
const fs = require('fs');
const ffmpeg = require('fluent-ffmpeg');
const axios = require('axios');
const s3 = new AWS.S3({
endpoint: 's3-ap-south-1.amazonaws.com', // Put you region
accessKeyId: S3_ACCESS_KEY_ID, // Put you accessKeyId
secretAccessKey: S3_ACCESS_SECRET_KEY, // Put you accessKeyId
Bucket: S3_BUCKET_NAME, // Put your bucket name
signatureVersion: 'v4',
region: 'ap-south-1' // Put you region
});
var params = {
Bucket: S3_BUCKET_NAME,
Delimiter: '',
Prefix: S3_STORE_PATH
};
s3.listObjects(params, function (err, data) {
if (err) throw err;
console.log(data);
data.Contents.forEach(function (obj, index) {
const file_name = obj.Key;
const type = "mp4";
console.log(obj.Key)
const url = s3.getSignedUrl('getObject', {
Bucket: S3_BUCKET_NAME,
Key: obj.Key,
Expires: signedUrlExpireSeconds
});
console.log("SIGNED URL= ", url);
const filename = file_name.split('.').slice(0, -1).join('.');
const localFileOutput = `${filename}.${type}`;
// const localFileOutput = `${bucket_url}${filename}.${type}`;
console.log(localFileOutput);
const key = `${filename}.${type}`;
const convert_video = async (req,res) => {
await new Promise((resolve, reject) => {
ffmpeg().input(url)
.toFormat('mp4')
.output(localFileOutput)
.on('end', async () => {
const params = {
Bucket: S3_BUCKET_NAME,
Key: key,
Body: localFileOutput
}
// const fileContent = await fs.readFileSync(localFileOutput);
await s3.putObject(params).promise();
resolve();
}).run();
});
// res.send("success")
}
convert_video();
});
});

untar/decompress to a stream in node

I am trying to write an AWS Lambda that will take a tar.gz from a S3 bucket, inflate it and then unpack it whilst streaming the files back to another S3 bucket.
I have this code:
var AWS = require('aws-sdk');
var fs = require('fs');
var zlib = require('zlib');
var uuid = require('uuid/v4');
var tar = require('tar-stream')
var pack = tar.pack()
var s3 = new AWS.S3();
exports.handler = (event, context, callback) => {
var bucket = event.Records[0].s3.bucket.name;
var key = event.Records[0].s3.object.key;
var file = 'S3://' + bucket + '/' + key;
console.log(bucket)
console.log(key)
var readParams = {
Bucket: bucket,
Key: key
};
var dataStream = s3.getObject(readParams).createReadStream();
var extract = tar.extract()
extract.on('entry', function(header, stream, next) {
console.log(header.name)
var writeParams = {
Bucket: process.env.JOB_PROCESSING_BUCKET,
Key: uuid() + '-' + header.name,
Body: stream
};
s3.upload(writeParams).
on('httpUploadProgress', function(evt) {
console.log('Progress:', evt.loaded, '/', evt.total);
}).
send(function(err, data) {
if (err) console.log("An error occurred", err);
console.log("Uploaded the file at", data.Location);
});
stream.on('end', function() {
next() // ready for next entry
})
stream.resume() // just auto drain the stream
})
extract.on('finish', function() {
// all entries read
})
dataStream.pipe(zlib.createGunzip()).pipe(extract);
callback(null, 'Gunzip Lambda Function');
};
It pulls the file, sorts the gzipping out and then i can see each file being extracted on entry. The code then tries to steam the file to S3 which creates a 0kb file hangs around like its reading the stream then continues onto the next.
Why cant it seem to read/processes the stream body?
Is there a better way of doing this?
Thanks

I don't know if it's the best solution but the following code works for me.
const AWS = require('aws-sdk');
const s3 = new AWS.S3();
const tar = require('tar-stream');
const zlib = require('zlib');
const stream = require('stream');
const uuid = require('uuid');
exports.get = (event, context) => {
var params = {
Bucket: event.Records[0].s3.bucket.name,
Key: event.Records[0].s3.object.key
};
var dataStream = s3.getObject(params).createReadStream();
var extract = tar.extract();
extract.on('entry', function(header, inputStream, next) {
inputStream.pipe(uploadFromStream(s3,header));
inputStream.on('end', function() {
next(); // ready for next entry
});
inputStream.resume(); // just auto drain the stream
});
extract.on('finish', function() {
// all entries read
});
dataStream.pipe(zlib.createGunzip()).pipe(extract);
}
function uploadFromStream(s3,header) {
var pass = new stream.PassThrough();
var writeParams = {
Bucket: process.env.JOB_PROCESSING_BUCKET,
Key: uuid.v1() + '-' + header.name,
Body: pass
};
s3.upload(writeParams, function(err, data) {
context.done(err, data);
});
return pass;
}

Tried for a couple of hours to get this to work, turns out the 'finish' event has been replaced with 'end'. So - answer above works great, just small change -
inputStream.on('end', function() {
next(); // ready for next entry
});
- Should be -
inputStream.on('finish', function() {
next(); // ready for next entry
});

stream the contents of an S3 object into hash algorithm node.js

I'm new to node.js and I'm trying to write a AWS lambda function that would stream the content of an s3 object into the node's crypto module to create a md5 checksum value of the s3 object. Not sure why but everytime I run the code it would generate different hash values on the console.log. can anyone point me in the right direction to fix my code? appreciate the help!
var crypto = require('crypto');
var fs = require('fs');
var AWS = require('aws-sdk');
var s3 = new AWS.S3();
exports.handler = (event, context, callback) => {
var params = {
Bucket: 'bucket_name',
Key: 'key',
};
var hash = crypto.createHash('md5');
var stream = s3.getObject(params, function(err, data) {
if (err){
console.log(err);
return;
}
}).createReadStream();
stream.on('data', function (data) {
hash.update(data, 'utf-8')
})
stream.on('end', function () {
console.log(hash.digest('hex'))
})
};

You were close. You are mixing the "callback" style method signature with a "createReadStream" signature. Try this:
const crypto = require('crypto');
const fs = require('fs');
const AWS = require('aws-sdk');
const s3 = new AWS.S3();
exports.handler = (event, context, callback) => {
let params = {
Bucket: 'bucket_name',
Key: 'key',
};
let hash = crypto.createHash('md5');
let stream = s3.getObject(params).createReadStream();
stream.on('data', (data) => {
hash.update(data);
});
stream.on('end', () => {
let digest = hash.digest('hex');
console.log(digest);
callback(null, digest);
});
};

Not directly an answer, but you can also add the md5 has as a ETag when uploading a file to S3.
const crypt = require('crypto');
const fs = require('fs').promises;
const aws = require('aws-sdk');
async function uploadFileToS3WithMd5Hash(bucket, filename, s3Key = null) {
const data = await fs.readFile(filename);
const md5Base64 = crypt.createHash("md5").update(data).digest('base64');
if (!s3Key) {
s3Key = filename;
}
/** Should you want to get the MD5 in hex format: */
// const md5Hex = Buffer.from(md5Base64, 'base64').toString('hex');
return new Promise((res, rej) => {
const s3 = new aws.S3();
s3.putObject({
Bucket: bucket,
Key: s3Key,
Body: data,
ContentMD5: md5Base64,
}, (err, resp) => err ? rej(err) : res(resp));
})
}
uploadFileToS3WithMd5Hash('your-own-bucket', 'file.txt')
.then(console.log)
.catch(console.error);
So by checking the ETag for an object on S3, you would get the hex-string of the files MD5 hash.
In some cases (see this post by Dennis), MD5 checksum is computed automatically upon upload.

Develop Reference

node.js excel linux python-3.x azure haskell apache-spark rust .htaccess string

Upgrade aws-sdk to version 3 - streaming S3 upload - node.js

Related

How can I upload multiple images to an s3 bucket in a lambda function using node.js?

Can't upload files from Lambda to S3

Is there any way to upload fluent-ffmpeg converted videos directly to s3 without storing them on local?

untar/decompress to a stream in node

stream the contents of an S3 object into hash algorithm node.js

Categories

Resources