Read file from S3 into a buffer

Read file from S3 into a buffer - node.js

I am trying to write a function in Lambda that requires a file from S3 to be read into a buffer. I have seen multiple examples of them being read into streams but none with buffers. My current code for getting the object is
exports.handler = async (event, context, callback) => {
//console.log("Reading options from event:\n", util.inspect(event, {depth: 5}));
const srcBucket = event.Records[0].s3.bucket.name;
const srcKey = decodeURIComponent(event.Records[0].s3.object.key.replace(/\+/g, " "));
const params =
{
Bucket: srcBucket,
Key: srcKey
};
try
{
var slippiGame = s3.getObject(params, function(error, data)
{
if (error)
{
console.log(error);
}
else
{
const game = new SlippiGame(slippiGame);
}
});
}
catch (e)
{
console.log("fail");
console.log(e);
}
console.log(slippiGame);
return("success");
};
I know that this approach is wrong since my slippiGame variable isn't a buffer and the SlippiGame constructor only takes a local file or a buffer. Is there a way to either store the file from S3 temporarily in the Lambda directory or create a buffer to hold the S3 file in?

The Body attribute of data is already a Buffer.
If you prefer file download, create a read stream from the response and pipe it to a write stream. See here for more.
const s3 = new AWS.S3({apiVersion: '2006-03-01'});
const params = {Bucket: 'myBucket', Key: 'myImageFile.jpg'};
const file = require('fs').createWriteStream('/path/to/file.jpg');
s3.getObject(params).createReadStream().pipe(file);
One additional problem with your current code: you are mixing the async and the callback variants of the Lambda function handler -- it's better to just use the async variant and get rid of callback

Related

Adding async method to Mongoose find

I am trying to simplify the process of taking a documents file attribute and converting to the temp S3 link on the fly. Normally I have been grabbing all documents in the controller and then looping through and replacing the links before passing to the view. This works but can be a little messy if the controller logic is complex. I am trying to write a custom method for my schema where these links are replaced more easily. The following prints out the link in the console but shows undefined in the view because of the wonderful async nature of javascript. Is there a similar way to get this to work?
Maybe create a method similar to the way populate works?
ExampleSchema.js:
exampleSchema.methods.getS3Link = function(file_name, callback) {
const s3 = new aws.S3();
const s3Params = {
Bucket: process.env.S3_BUCKET,
Key: file_name,
Expires: 6000
};
s3.getSignedUrl('getObject', s3Params, function (err, data) {
console.log(data); //prints out the correct link
return data; //shows undefined in view
})
}

There is a callback argument for getS3Link function. You can pass the data in this callback function to retrieve it in view.
exampleSchema.methods.getS3Link = function(file_name, callback) {
const s3 = new aws.S3();
const s3Params = {
Bucket: process.env.S3_BUCKET,
Key: file_name,
Expires: 6000
};
s3.getSignedUrl('getObject', s3Params, function (err, data) {
console.log(data); //prints out the correct link
callback(null, data);
})
}
Simpler:
exampleSchema.methods.getS3Link = function(file_name, callback) {
const s3 = new aws.S3();
const s3Params = {
Bucket: process.env.S3_BUCKET,
Key: file_name,
Expires: 6000
};
s3.getSignedUrl('getObject', s3Params, callback);
}

Inconsistent results making API call from AWS Lambda

Let me just apologize for this abysmal code ahead of time. I have almost zero node experience, and write all of my JS with React apps and Elixir on the back end. I am struggling to write a correct Lambda function in NodeJS, and have basically cobbled something together from Googling/SO/trial and error, etc.
What I'm doing is the following:
User wants to upload a file so they send some info to back end.
Back end generates a presigned key.
Front end sends file to S3.
S3 fires event and Lambda executes
Lambda now checks for mimetype and if it's a bad file, will delete the file from the S3 bucket and make a DELETE API call to my backend to tell it to delete the row the photo upload belongs to.
Where I'm struggling is when I make the API call to my backend inside of the s3.deleteObject call, I am getting wildly inconsistent results. A lot of time it's sending two delete requests back to back in the same Lambda execution. Sometimes it's like it never even calls the backend and just runs and shows complete without really logging anything to Cloudwatch.
My code is as follows:
const aws = require('aws-sdk');
const s3 = new aws.S3({apiVersion: '2006-03-01'});
const fileType = require('file-type');
const imageTypes = ['image/gif', 'image/jpeg', 'image/png'];
const request = require('request-promise');
exports.handler = async (event, context) => {
// Get the object from the event and show its content type
const bucket = event.Records[0].s3.bucket.name;
const key = decodeURIComponent(
event.Records[0].s3.object.key.replace(/\+/g, ' ')
);
const params = {
Bucket: bucket,
Key: key,
};
try {
const {Body} = await s3.getObject(params).promise();
const fileBuffer = new Buffer(Body, 'base64');
const fileTypeInfo = fileType(fileBuffer);
if (
typeof fileTypeInfo !== 'undefined' &&
fileTypeInfo &&
imageTypes.includes(fileTypeInfo.mime)
) {
console.log('FILE IS OKAY.');
} else {
await s3
.deleteObject(params, function(err, data) {
console.log('FILE IS NOT AN IMAGE.');
if (err) {
console.log('FAILED TO DELETE.');
} else {
console.log('DELETED ON S3. ATTEMPTING TO DELETE ON SERVER.');
const url =
`http://MYSERVERHERE:4000/api/event/${params.Key.split('.')[0]}`;
const options = {
method: 'DELETE',
uri: url,
};
request(options)
.then(function(response) {
console.log('RESPONSE: ', response);
})
.catch(function(err) {
console.log('ERROR: ', err);
});
}
})
.promise();
}
return Body;
} catch (err) {
const message = `Error getting object ${key} from bucket ${bucket}. Make sure they exist and your bucket is in the same region as this function.`;
console.log(message);
throw new Error(message);
}
};
This has been driving me mad for days. Any help is appreciated to explain why I would be getting unexpected results from a Lambda function like this.

Please check after update your else part with proper await use
Please try below code.
exports.handler = async (event, context) => {
// Get the object from the event and show its content type
const bucket = event.Records[0].s3.bucket.name;
const key = decodeURIComponent(
event.Records[0].s3.object.key.replace(/\+/g, ' ')
);
const params = {
Bucket: bucket,
Key: key,
};
try {
const {Body} = await s3.getObject(params).promise();
const fileBuffer = new Buffer(Body, 'base64');
const fileTypeInfo = fileType(fileBuffer);
if (
typeof fileTypeInfo !== 'undefined' &&
fileTypeInfo &&
imageTypes.includes(fileTypeInfo.mime)
) {
console.log('FILE IS OKAY.');
} else {
await s3.deleteObject(params).promise(); //fail then catch block execute
console.log('DELETED ON S3. ATTEMPTING TO DELETE ON SERVER.');
const url =
`http://MYSERVERHERE:4000/api/event/${params.Key.split('.')[0]}`;
const options = {
method: 'DELETE',
uri: url,
};
let response = await request(options); ////fail then catch block execute
console.log(response);
}
return Body;
} catch (err) {
console.log(err);
const message = `Error getting object ${key} from bucket ${bucket}. Make sure they exist and your bucket is in the same region as this function.`;
console.log(message);
throw new Error(message);
}
};

S3 delete operation is eventual consistent in all regions.
Hence as par AWS (captured relevant info),
A process deletes an existing object and immediately attempts to read it. Until the deletion is fully propagated, Amazon S3 might return the deleted data.
A process deletes an existing object and immediately lists keys within its bucket. Until the deletion is fully propagated, Amazon S3 might list the deleted object.
Ref: https://docs.aws.amazon.com/AmazonS3/latest/dev/Introduction.html#ConsistencyModel

convert pdf pages into images using AWS s3 and Lambda

I am using node 8.1,
I want to convert PDF into images using S3 and lambda but the CloudWatch repeatedly giving the following error:
"Unable to import module 'index': Error"
The below file is named as index.js in the main project folder:
const util = require('util');
const AWS = require('aws-sdk');
const gm = require('gm').subClass({ imageMagick: true });
const s3 = new AWS.S3();
exports.handler = (event, context) => {
const srcBucket = event.Records[0].s3.bucket.name;
const srcKey = decodeURIComponent(event.Records[0].s3.object.key.replace(/\+/g, " "));
const dstBucket = srcBucket;
const dstKey = srcKey.replace('.pdf', '.png');
s3.getObject({Bucket: srcBucket, Key: srcKey}, (err, response) => {
if (err) {
context.done('S3 get object error:', err);
context.fail(err);
}
// conversion start
gm(response.Body)
.setFormat("png")
.resize(200) // you can configure
.quality(100) // you can configure
.stream((err, stdout, stderr) => {
if(err) {
console.log("gm conversion process error: ");
console.log(err,stdout,stderr);
context.fail(err);
}
const chunks = [];
stdout.on('data', (chunk) => {
chunks.push(chunk);
});
stdout.on('end', () => {
console.log('gm process finished');
const buffer = Buffer.concat(chunks);
// Upload start
const params = {
Bucket: dstBucket,
Key: dstKey,
ContentType: 'image/png',
Body: buffer
};
s3.putObject(params, (err, data) => {
if (err) {
console.log("S3 upload error: " + err);
context.fail(err);
}
console.log('S3 upload finished!');
console.log('Bucket: ' + dstBucket);
console.log('key: ' + dstKey);
context.succeed({
"error":false
});
});
});
stderr.on('data', (data) => {
console.log('stderr data: ' + data);
});
});
});
};
I am not much aware about the aws-lambda as well as the file conversion and this is the only area where I have got stuck and can't find any way to solve the problem.
I think there might be some problem in the index.handler way of passing the functions but I don't know where the minor fault is situated at.

Problem sounds like you didn't configure the .yml file properly.
https://docs.aws.amazon.com/lambda/latest/dg/serverless-deploy-wt.html#serv-deploy (for the cli to deploy)
https://serverless.com/framework/docs/providers/aws/guide/serverless.yml/ (the config file that you need to write so aws knows what to import)

Have you tried running that code locally? It sounds like an error in Syntax.
Anyways, I’ve created just this functionality and it’s available on fit hub here:
https://github.com/rcastoro/PDFImagine
You can see from the video below it takes PDFs in an s3 bucket, and using an aws event, notifies the lambda function to convert new PDFs into images.
https://youtu.be/yU-jA2_5Tvs

Getting s3 object metadata then creating stream

I'm downloading an object from s3 and creating a read stream object from it to process a video:
s3.getObject(params).createReadStream()
However, I need to get the metadata from it which is possible when i just get the object by accessing its 'metadata' property:
s3.getObject()
How would I either:
Get the object via s3.getObject(), grab the metadata from its metadata property, and then turn it into a read stream?
var stream = fs.createReadStream(response); isn't working - input must be a string
-- OR --
Get the stream via s3.getObject().createReadStream(), and extract the metadata from the stream?
To my knowledge metadata isn't passed within streams.
Tell me if my assumptions are wrong, but I am currently stuck with these two needs:
Getting the meta data
Making it a stream

You can get the metadata via the request's httpHeaders event.
let fs = require('fs')
let aws = require('aws-sdk')
let s3 = new aws.S3()
let request = s3.getObject({
Bucket: 'my-bucket',
Key: 'my-key'
})
let stream
request.on('httpHeaders', (statusCode, httpHeaders) => {
// object metadata is represented by any header in httpHeaders starting with 'x-amz-meta-'
// you can use the stream object that this point
stream.pipe(fs.createWriteStream('./somepath'))
stream.on('end', () => {
console.log('were done')
})
})
stream = request.createReadStream()
Alternatively you can also call s3.headObject to get the metadata without downloading the object and then download the object using s3.getObject

So I kind of found a solution. This works for most files under 10 MB. If they are larger than that the buffer stream ends before the file is done being written. I've tried putting the bufferStream.end inside of the on finish function but then my call back doesnt go through....
function download(s3Event, srcKey, cb){
console.log('Starting download');
s3.getObject({
Bucket: s3Event.bucket.name,
Key: srcKey
}, cb);
}
function writeToFile(data, cb){
var dlFile = path.join(tempDir, 'download');
console.log('data = ', data);
console.log('data.Body = ', data.Body);
var stream = bufferStream.pipe(fs.createWriteStream(dlFile)).on('finish', function () {
console.log('finished writing stream');
cb(null, data);
});
bufferStream.end(data.Body);
}
exports.handler = function(event, context) {
// Read options from the event.
console.log("Reading options from event:\n", util.inspect(event, {depth: 5}));
var s3Event = event.Records[0].s3;
var srcKey = decodeURIComponent(s3Event.object.key);
var keyPrefix = srcKey.replace(/\.[^/.]+$/, '');
var dstBucket = "jump-lambda";
async.waterfall([
function (cb){
download(s3Event, srcKey, cb);
},
function (data, cb){
writeToFile(data, cb);
},
function (data, cb){
fluentffmpegProcess(data, cb);
},
function (data, cb){
transform(data, cb);
},
function (data, buffer, cb){
thumbnailUpload(data, buffer, dstBucket, keyPrefix, cb);
},
function (data, cb){
updateParse(data, srcKey, keyPrefix, cb);
},
],
function (err) {
if (err) {
console.error(
'Unable to convert video to scene object, with error code: ' + err.description
);
} else {
console.log(
'Successfully created scene object, updated venue, and created thumbnail'
);
}
}
);
};

How do determine the type of data in S3.getObject()

The node.js API for S3 gives the following description for the data returned in the callback of getObject. From http://docs.aws.amazon.com/AWSJavaScriptSDK/latest/AWS/S3.html#getObject-property :
Body — (Buffer, Typed Array, Blob, String, ReadableStream) Object data.
Is this for real? Is there no way to control which of these things it is?

I don't know if you can control in advance the type of the data.Body field provided in the getObject() callback. If all you want to do is determine if you've received a buffer, you can try Node's Buffer.isBuffer(data.Body) class method.
Alternately, you might want to avoid the issue altogether and use this approach from Amazon's S3 documentation:
var s3 = new AWS.S3();
var params = {Bucket: 'myBucket', Key: 'myImageFile.jpg'};
var file = require('fs').createWriteStream('/path/to/file.jpg');
s3.getObject(params).createReadStream().pipe(file);
Presuming you'll be using this code in a typical node.js async callback environment, it might make more sense to see the code like so:
var fs = require('fs');
function downloadFile(key, localPath, callback) {
var s3 = new AWS.S3();
var params = {Bucket: 'myBucket', Key: key};
var file = fs.createWriteStream(localPath);
file.on('close') {
callback();
}
file.on('error', function(err) {
callback(err);
});
s3.getObject(params).createReadStream().pipe(file);
}

I couldn't find any way to change the Body type either, however after noticing the Body was a buffer, I transformed the buffer into a ReadableStream with this handy & pretty straightforward function: AWS.util.buffer.toStream (or perhaps you might want to use another lib like streamifier).
I was looking for something where I could validate errors before doing anything else, in Amazon's example it translates to "create the Write Stream only if there were no errors".
s3.getObject(params, function(err, data) {
if (err) {
console.log(err);
return;
}
var file = require('fs').createWriteStream(name);
var read = AWS.util.buffer.toStream(data.Body);
read.pipe(file);
read.on('data', function(chunk) {
console.log('got %d bytes of data', chunk.length);
});
});

Develop Reference

node.js excel linux python-3.x azure haskell apache-spark rust .htaccess string

Read file from S3 into a buffer - node.js

Related

Adding async method to Mongoose find

Inconsistent results making API call from AWS Lambda

convert pdf pages into images using AWS s3 and Lambda

Getting s3 object metadata then creating stream

How do determine the type of data in S3.getObject()

Categories

Resources