Getting s3 object metadata then creating stream - node.js

I'm downloading an object from s3 and creating a read stream object from it to process a video:
s3.getObject(params).createReadStream()
However, I need to get the metadata from it which is possible when i just get the object by accessing its 'metadata' property:
s3.getObject()
How would I either:
Get the object via s3.getObject(), grab the metadata from its metadata property, and then turn it into a read stream?
var stream = fs.createReadStream(response); isn't working - input must be a string
-- OR --
Get the stream via s3.getObject().createReadStream(), and extract the metadata from the stream?
To my knowledge metadata isn't passed within streams.
Tell me if my assumptions are wrong, but I am currently stuck with these two needs:
Getting the meta data
Making it a stream

You can get the metadata via the request's httpHeaders event.
let fs = require('fs')
let aws = require('aws-sdk')
let s3 = new aws.S3()
let request = s3.getObject({
Bucket: 'my-bucket',
Key: 'my-key'
})
let stream
request.on('httpHeaders', (statusCode, httpHeaders) => {
// object metadata is represented by any header in httpHeaders starting with 'x-amz-meta-'
// you can use the stream object that this point
stream.pipe(fs.createWriteStream('./somepath'))
stream.on('end', () => {
console.log('were done')
})
})
stream = request.createReadStream()
Alternatively you can also call s3.headObject to get the metadata without downloading the object and then download the object using s3.getObject

So I kind of found a solution. This works for most files under 10 MB. If they are larger than that the buffer stream ends before the file is done being written. I've tried putting the bufferStream.end inside of the on finish function but then my call back doesnt go through....
function download(s3Event, srcKey, cb){
console.log('Starting download');
s3.getObject({
Bucket: s3Event.bucket.name,
Key: srcKey
}, cb);
}
function writeToFile(data, cb){
var dlFile = path.join(tempDir, 'download');
console.log('data = ', data);
console.log('data.Body = ', data.Body);
var stream = bufferStream.pipe(fs.createWriteStream(dlFile)).on('finish', function () {
console.log('finished writing stream');
cb(null, data);
});
bufferStream.end(data.Body);
}
exports.handler = function(event, context) {
// Read options from the event.
console.log("Reading options from event:\n", util.inspect(event, {depth: 5}));
var s3Event = event.Records[0].s3;
var srcKey = decodeURIComponent(s3Event.object.key);
var keyPrefix = srcKey.replace(/\.[^/.]+$/, '');
var dstBucket = "jump-lambda";
async.waterfall([
function (cb){
download(s3Event, srcKey, cb);
},
function (data, cb){
writeToFile(data, cb);
},
function (data, cb){
fluentffmpegProcess(data, cb);
},
function (data, cb){
transform(data, cb);
},
function (data, buffer, cb){
thumbnailUpload(data, buffer, dstBucket, keyPrefix, cb);
},
function (data, cb){
updateParse(data, srcKey, keyPrefix, cb);
},
],
function (err) {
if (err) {
console.error(
'Unable to convert video to scene object, with error code: ' + err.description
);
} else {
console.log(
'Successfully created scene object, updated venue, and created thumbnail'
);
}
}
);
};

Related

Read file from S3 into a buffer

I am trying to write a function in Lambda that requires a file from S3 to be read into a buffer. I have seen multiple examples of them being read into streams but none with buffers. My current code for getting the object is
exports.handler = async (event, context, callback) => {
//console.log("Reading options from event:\n", util.inspect(event, {depth: 5}));
const srcBucket = event.Records[0].s3.bucket.name;
const srcKey = decodeURIComponent(event.Records[0].s3.object.key.replace(/\+/g, " "));
const params =
{
Bucket: srcBucket,
Key: srcKey
};
try
{
var slippiGame = s3.getObject(params, function(error, data)
{
if (error)
{
console.log(error);
}
else
{
const game = new SlippiGame(slippiGame);
}
});
}
catch (e)
{
console.log("fail");
console.log(e);
}
console.log(slippiGame);
return("success");
};
I know that this approach is wrong since my slippiGame variable isn't a buffer and the SlippiGame constructor only takes a local file or a buffer. Is there a way to either store the file from S3 temporarily in the Lambda directory or create a buffer to hold the S3 file in?
The Body attribute of data is already a Buffer.
If you prefer file download, create a read stream from the response and pipe it to a write stream. See here for more.
const s3 = new AWS.S3({apiVersion: '2006-03-01'});
const params = {Bucket: 'myBucket', Key: 'myImageFile.jpg'};
const file = require('fs').createWriteStream('/path/to/file.jpg');
s3.getObject(params).createReadStream().pipe(file);
One additional problem with your current code: you are mixing the async and the callback variants of the Lambda function handler -- it's better to just use the async variant and get rid of callback

Adding async method to Mongoose find

I am trying to simplify the process of taking a documents file attribute and converting to the temp S3 link on the fly. Normally I have been grabbing all documents in the controller and then looping through and replacing the links before passing to the view. This works but can be a little messy if the controller logic is complex. I am trying to write a custom method for my schema where these links are replaced more easily. The following prints out the link in the console but shows undefined in the view because of the wonderful async nature of javascript. Is there a similar way to get this to work?
Maybe create a method similar to the way populate works?
ExampleSchema.js:
exampleSchema.methods.getS3Link = function(file_name, callback) {
const s3 = new aws.S3();
const s3Params = {
Bucket: process.env.S3_BUCKET,
Key: file_name,
Expires: 6000
};
s3.getSignedUrl('getObject', s3Params, function (err, data) {
console.log(data); //prints out the correct link
return data; //shows undefined in view
})
}
There is a callback argument for getS3Link function. You can pass the data in this callback function to retrieve it in view.
exampleSchema.methods.getS3Link = function(file_name, callback) {
const s3 = new aws.S3();
const s3Params = {
Bucket: process.env.S3_BUCKET,
Key: file_name,
Expires: 6000
};
s3.getSignedUrl('getObject', s3Params, function (err, data) {
console.log(data); //prints out the correct link
callback(null, data);
})
}
Simpler:
exampleSchema.methods.getS3Link = function(file_name, callback) {
const s3 = new aws.S3();
const s3Params = {
Bucket: process.env.S3_BUCKET,
Key: file_name,
Expires: 6000
};
s3.getSignedUrl('getObject', s3Params, callback);
}

convert pdf pages into images using AWS s3 and Lambda

I am using node 8.1,
I want to convert PDF into images using S3 and lambda but the CloudWatch repeatedly giving the following error:
"Unable to import module 'index': Error"
The below file is named as index.js in the main project folder:
const util = require('util');
const AWS = require('aws-sdk');
const gm = require('gm').subClass({ imageMagick: true });
const s3 = new AWS.S3();
exports.handler = (event, context) => {
const srcBucket = event.Records[0].s3.bucket.name;
const srcKey = decodeURIComponent(event.Records[0].s3.object.key.replace(/\+/g, " "));
const dstBucket = srcBucket;
const dstKey = srcKey.replace('.pdf', '.png');
s3.getObject({Bucket: srcBucket, Key: srcKey}, (err, response) => {
if (err) {
context.done('S3 get object error:', err);
context.fail(err);
}
// conversion start
gm(response.Body)
.setFormat("png")
.resize(200) // you can configure
.quality(100) // you can configure
.stream((err, stdout, stderr) => {
if(err) {
console.log("gm conversion process error: ");
console.log(err,stdout,stderr);
context.fail(err);
}
const chunks = [];
stdout.on('data', (chunk) => {
chunks.push(chunk);
});
stdout.on('end', () => {
console.log('gm process finished');
const buffer = Buffer.concat(chunks);
// Upload start
const params = {
Bucket: dstBucket,
Key: dstKey,
ContentType: 'image/png',
Body: buffer
};
s3.putObject(params, (err, data) => {
if (err) {
console.log("S3 upload error: " + err);
context.fail(err);
}
console.log('S3 upload finished!');
console.log('Bucket: ' + dstBucket);
console.log('key: ' + dstKey);
context.succeed({
"error":false
});
});
});
stderr.on('data', (data) => {
console.log('stderr data: ' + data);
});
});
});
};
I am not much aware about the aws-lambda as well as the file conversion and this is the only area where I have got stuck and can't find any way to solve the problem.
I think there might be some problem in the index.handler way of passing the functions but I don't know where the minor fault is situated at.
Problem sounds like you didn't configure the .yml file properly.
https://docs.aws.amazon.com/lambda/latest/dg/serverless-deploy-wt.html#serv-deploy (for the cli to deploy)
https://serverless.com/framework/docs/providers/aws/guide/serverless.yml/ (the config file that you need to write so aws knows what to import)
Have you tried running that code locally? It sounds like an error in Syntax.
Anyways, I’ve created just this functionality and it’s available on fit hub here:
https://github.com/rcastoro/PDFImagine
You can see from the video below it takes PDFs in an s3 bucket, and using an aws event, notifies the lambda function to convert new PDFs into images.
https://youtu.be/yU-jA2_5Tvs

Append string to a text file Nodejs in AWS Lambda

The scenario: A text file snapshot-ids.txt is located in a S3 bucket. I'm trying to create a Lambda function that run daily (Cron) that would use AWS CLI to take snapshot of a volume, then save that snapshotId to a text file in S3. On the next time another snapshot is created, the new snapshotId will be saved to the same text file on S3. The text file is a place holder for snapshotIds and when it reaches a threshold, it will delete the top snapshotIds and add the new one at the end (FIFO pipe).
For people who don't use AWS lambda, my question is what's the quickest way to append text to a variable and return the new variable with new lines in it.
For people who know Lambda, this is the basic code from AWS Lambda I have, I use fs.appendFile, but how do I use the file I got from s3.getObject() and eventually pass it to s3.putObject()?
EDIT: this is my progress:
console.log('Loading function');
var aws = require('aws-sdk');
var s3 = new aws.S3({ apiVersion: '2006-03-01' });
var fs = require('fs');
exports.handler = function(event, context) {
//console.log('Received event:', JSON.stringify(event, null, 2));
// Get the object from the event and show its content type
var bucket = event.Records[0].s3.bucket.name;
var key = decodeURIComponent(event.Records[0].s3.object.key.replace(/\+/g, ' '));
var params = {
Bucket: bucket,
Key: key
};
s3.getObject(params, function(err, data) {
if (err) {
console.log(err);
var message = "Error getting object " + key + " from bucket " + bucket +
". Make sure they exist and your bucket is in the same region as this function.";
console.log(message);
context.fail(message);
} else {
// fs.appendFile('snapshot-ids.txt', 'snap-001', function (err) {
// if (err) throw err;
// console.log('The "data to append" was appended to file!');
// });
console.log(params_new);
console.log('CONTENT TYPE getObject:', data.ContentType);
// context.succeed(data.Body.toString('ascii'));
}
});
var params_new = {
Bucket: bucket,
Key: key,
Body: 'snap-002'
};
s3.putObject(params_new, function(err, data) {
console.log('put here');
if (err) {
console.log(err);
var message = "Error getting object " + key + " from bucket " + bucket +
". Make sure they exist and your bucket is in the same region as this function.";
console.log(message);
context.fail(message);
} else {
console.log('CONTENT TYPE putObject:', data.ContentType);
context.succeed(data.ContentType);
}
});
};
A couple of things I noticed with your code so far...
You can't call s3.putObject until s3.getObject is finished and you have the file from s3.
You aren't dealing with the file system since you get the data from s3.getObject.
With those things in mind I modified your code(I haven't tried this but it should get you going in the right direction):
console.log('Loading function');
var aws = require('aws-sdk');
var s3 = new aws.S3({ apiVersion: '2006-03-01' });
exports.handler = function(event, context) {
//console.log('Received event:', JSON.stringify(event, null, 2));
// Get the object from the event and show its content type
var bucket = event.Records[0].s3.bucket.name;
var key = decodeURIComponent(event.Records[0].s3.object.key.replace(/\+/g, ' '));
var params = {
Bucket: bucket,
Key: key
};
s3.getObject(params, function(err, data) {
if (err) {
console.log(err);
var message = "Error getting object " + key + " from bucket " + bucket +
". Make sure they exist and your bucket is in the same region as this function.";
console.log(message);
context.fail(message);
} else {
console.log(params_new);
console.log('CONTENT TYPE getObject:', data.ContentType);
// convert body(file contents) to a string so we can append
var body = data.Body.toString('utf-8');
// append data
body += 'snap-001\n';
var params_new = {
Bucket: bucket,
Key: key,
Body: body
};
//NOTE this call is now nested in the s3.getObject call so it doesn't happen until the response comes back
s3.putObject(params_new, function(err, data) {
console.log('put here');
if (err) {
console.log(err);
var message = "Error getting object " + key + " from bucket " + bucket +
". Make sure they exist and your bucket is in the same region as this function.";
console.log(message);
context.fail(message);
} else {
console.log('CONTENT TYPE putObject:', data.ContentType);
context.succeed(data.ContentType);
}
});
}
});
};
Something else to keep in mind is if you have more than 1 of this Lambda running at the same time it is likely they will stomp on each others changes. Sounds like you will just be scheduling it once a day so it shouldn't be a big deal but its worth noting.

Saving an image stored on s3 using node.js?

I'm trying to write an image server that uses node.js to store images on s3. Uploading the image works fine, and I can download and view it correctly using an s3 browser client (I'm using dragondisk, specifically, but I've successfully downloaded it with other ones too), but when I download it with node and try to write it to disk, I'm unable to open the file (it says it may be damaged or use a file format that Preview does not recognize). I'm using the amazon sdk for node and fs to write the file. I know that you can pass an optional encoding to fs.writeFile, but I've tried them all and it doesn't work. I've also tried setting ContentType on putObject and ResponseContentType on getObject, as well as ContentEncoding and ResponseContentEncoding (and all of these things in various combinations). Same result. Here's some code:
var AWS = require('aws-sdk')
, gm = require('../lib/gm')
, uuid = require('node-uui')
, fs = require('fs');
AWS.config.loadFromPath('./amazonConfig.json');
var s3 = new AWS.S3();
var bucket = 'myBucketName'; // There's other logic here to set the bucket name.
exports.upload = function(req, res) {
var id = uuid.v4();
gm.format("/path/to/some/image.jpg", function(format){
var key = req.params.dir + "/" + id + "/default." + format;
fs.readFile('/path/to/some/image.jpg', function(err, data){
if (err) { console.warn(err); }
else {
s3.client.putObject({
Bucket: bucket,
Key: key,
Body: data,
ContentType: 'image/jpeg'
// I've also tried adding ContentEncoding (in various formats) here.
}).done(function(response){
res.status(200).end(JSON.stringify({ok:1, id: id}));
}).fail(function(response){
res.status(response.httpResponse.statusCode).end(JSON.stringify(({err: response})));
});
}
});
});
};
exports.get = function(req, res) {
var key = req.params.dir + "/" + req.params.id + "/default.JPEG";
s3.client.getObject({
Bucket: bucket,
Key: key,
ResponseContentType: 'image/jpeg'
// Tried ResponseContentEncoding here in base64, binary, and utf8
}).done(function(response){
res.status(200).end(JSON.stringify({ok:1, response: response}));
var filename = '/path/to/new/image/default.JPEG';
fs.writeFile(filename, response.data.Body, function(err){
if (err) console.warn(err);
// This DOES write the file, just not as an image that can be opened.
// I've tried pretty much every encoding as the optional third parameter
// and I've matched the encodings to the ResponseContentEncoding and
// ContentEncoding above (in case it needs to be the same)
});
}).fail(function(response){
res.status(response.httpResponse.statusCode).end(JSON.stringify({err: response}));
});
};
Incidentally, I'm using express for routing, so that's where req.params comes from.
For people who are still struggling with this issue. Here is the approach I used with native aws-sdk.
var AWS = require('aws-sdk');
AWS.config.loadFromPath('./s3_config.json');
var s3Bucket = new AWS.S3( { params: {Bucket: 'myBucket'} } );
inside your router method :-
ContentType should be set to the content type of the image file
buf = new Buffer(req.body.imageBinary.replace(/^data:image\/\w+;base64,/, ""),'base64')
var data = {
Key: req.body.userId,
Body: buf,
ContentEncoding: 'base64',
ContentType: 'image/jpeg'
};
s3Bucket.putObject(data, function(err, data){
if (err) {
console.log(err);
console.log('Error uploading data: ', data);
} else {
console.log('succesfully uploaded the image!');
}
});
s3_config.json file is:-
{
"accessKeyId":"xxxxxxxxxxxxxxxx",
"secretAccessKey":"xxxxxxxxxxxxxx",
"region":"us-east-1"
}
Ok, after significant trial and error, I've figured out how to do this. I ended up switching to knox, but presumably, you could use a similar strategy with aws-sdk. This is the kind of solution that makes me say, "There has to be a better way than this," but I'm satisfied with anything that works, at this point.
var imgData = "";
client.getFile(key, function(err, fileRes){
fileRes.on('data', function(chunk){
imgData += chunk.toString('binary');
}).on('end', function(){
res.set('Content-Type', pic.mime);
res.set('Content-Length', fileRes.headers['content-length']);
res.send(new Buffer(imgData, 'binary'));
});
});
getFile() returns data chunks as buffers. One would think you could just pipe the results straight to front end, but for whatever reason, this was the ONLY way I could get the service to return an image correctly. It feels redundant to write a buffer to a binary string, only to write it back into a buffer, but hey, if it works, it works. If anyone finds a more efficient solution, I would love to hear it.
uploadfile(file, filename, folder) {
const bucket = new S3(
{
accessKeyId: 'enter your access key id here',
secretAccessKey: 'enter your secret key here.',
region: 'us-east-2'
});
const params = {
Bucket: 'enter your bucket here.',
Key: folder + '/' + filename + ".jpg",
ACL: 'public-read',
ContentEncoding : 'base64,',
Body: new Buffer(file.replace(/^data:image\/\w+;base64,/, ""),'base64'),
ContentType: 'image/jpeg'
};
bucket.upload(params, function (err, data) {
if (err) {
console.log('There was an error uploading your file: ', err);
return false;
}
console.log('Successfully uploaded file.', data);
return true;
});
}
As another solution. I fixed mine by using Body: fs.createReadStream instead and it worked like a charm.
const uploadFile = () => {
fs.readFile(filename, (err, data) => {
if (err) throw err;
const params = {
Bucket: `${process.env.S3_Bucket}/ProfilePics`, // pass your bucket name
Key: `${decoded.id}-pic.${filetypeabbrv}`, // file will be saved as testBucket/contacts.csv
Body: fs.createReadStream(req.file.path),
ContentType: filetype,
};
s3.upload(params, function (s3Err, data) {
if (s3Err) throw s3Err;
console.log(`File uploaded successfully at ${data.Location}`);
});
});
};

Resources