Why is the file created from S3 stream empty? - node.js

I am trying to access a file in a private S3 bucket from a lambda function identified by Cognito.
Reading the stream works outside a lambda but not inside a lambda
Creating a pre-signed url works inside a lambda
Waiting for the the content to be ready as a string works inside a lambda
I've managed to get a pre-signed url to download the file. Using the same parameters, I've tried to write the read stream to a local file. A file gets created but it's empty. I couldn't catch any error in the process.
const s3 = new AWS.S3({ apiVersion: 'latest' });
const file = 's3Filename.csv'
const userId = event.requestContext.identity.cognitoIdentityId;
const s3Params = {
Bucket: 'MY_BUCKET',
Key: `private/${userId}/${file}`,
};
var fileStream = require('fs').createWriteStream('/path/to/my/file.csv');
var s3Stream = s3.getObject(s3Params).createReadStream();
// Try to print s3 stream errors
s3Stream
.on('error', function (err) {
console.error(err); // prints nothing
});
// Try to print fs errors
s3Stream
.pipe(fileStream)
.on('error', function (err) {
console.error('File Stream:', err); // prints nothing
})
.on('data', function (chunk) {
console.log(chunk); // prints nothing
})
.on('end', function () {
console.log('All the data in the file has been read'); // prints nothing
})
.on('close', function (err) {
console.log('Stream has been Closed'); // prints nothing
});
I am quite confident that my parameters are correct because I can get a pre-signed url that allows me to download the file.
console.log(s3.getSignedUrl('getObject', s3Params));
I can also read the file content using getObject().promise(). This could work but I'm parsing a CSV file and I'd rather go easy on the memory and parse the stream.
try
{
const s3Response = await s3.getObject(s3Params).promise();
let objectData = s3Response.Body.toString('utf-8');
console.log(objectData);
}
catch (ex)
{
console.error(ex);
}
Why is the file created from S3 stream empty? And why is there nothing that prints?
Could it be an access policy issue? If that's the case, why didn't I get any error when executing?

Related

Saving file to /tmp in lambda from inside of a folder

I have a task of downloading and uploading files to s3 using lambda, the scenerio is like
Download a file from s3 bucket1(request folder) to lambda
Upload the same file to s3 bucket2(request folder) from lambda
Both the downloadFiles and uploadFiles fn are inside utils/s3.js inside the root directory(var/task/) in lambda
Here is my utils/s3.js downloadFiles fn
exports.downloadFiles = async () => {
try{
const location = path.join( __dirname , `../tmp/text.txt`);
console.log(location); // prints /var/task/tmp/text.txt
console.log(__dirname); // prints /var/task/utils
const params = {
Bucket: 'bucket1',
Key: `request/text.txt`
};
const { Body } = await s3.getObject(params).promise();
fs.writeFileSync(location, Body);
return;
}catch(e){
throw new Error(e.message);
}
};
Now there are two cases,
If I create a folder in the root directory tmp, it gives this error
"EROFS: read-only file system, open '/var/task/tmp/text.txt'"
If I don't then
"ENOENT: no such file or directory, open '/var/task/tmp/text.txt'"
Now I have read most of the answeres on stackoverflow, I know I am supposed to save files to /tmp/filename, but how come I do the same and it doesn't work, where am I going so wrong?
As one commenter already stated, if you do not do anything with the file itself, it would be much better to just use the S3 API to copy the object instead of downloading and re-uploading it.
The relevant documentation can be found here: https://docs.aws.amazon.com/AWSJavaScriptSDK/latest/AWS/S3.html#copyObject-property
Example:
var params = {
CopySource: "/<source-bucket>/<source-key>",
Bucket: "<destination-bucket>",
Key: "<destination-key>"
};
s3.copyObject(params, function(err, data) {
if (err) console.log(err, err.stack);
else console.log(data);
});
Or if you want to use a promise, this should work as well:
var params = {
CopySource: "/<source-bucket>/<source-key>",
Bucket: "<destination-bucket>",
Key: "<destination-key>"
};
try {
const result = await s3.copyObject(params).promise();
} catch (error) {
console.log(error);
}

How to write multiple streams for one file in Node?

Learning how to do large file manipulation with Node and streams I'm stuck in the middle of a file change when passing down the results to a module and I think the process is still in memory when it reaches another module.
I get a zip from an s3 bucket locally and unzip the contents:
try {
const stream = fs.createReadStream(zipFile).pipe(unzipper.Extract({ path }))
stream.on('error', err => console.error(err))
stream.on('close', async () => {
fs.removeSync(zipFile)
try {
const neededFile = await dir(path) // delete files not needed from zip, rename and return named file
await mod1(neededFile) // review file, edit and return info
await mod2(neededFile, data) // pass down data for further changes
return
} catch (err) {
console.log('error')
}
})
} catch (err) {
console.log('stream error')
}
Initial unzip I learned that there is a difference between stream on close and finish because I could pass the file to the first module and start the manipulation but the file, I guess due to the size, output and file never matched. After cleaning the files I dont need I pass the renamed file to mod1 for changes and run a write file sync:
mod1.js:
const fs = require('fs-extra')
module.exports = file => {
fs.readFile(file, 'utf8', (err, data) => {
if (err) return console.log(err)
try {
const result = data.replace(/: /gm, `:`).replace(/(?<=location:")foobar(?=")/gm, '')
fs.writeFileSync(file, result)
} catch (err) {
console.log(err)
return err
}
})
}
when I tried to do the above with:
const readStream = fs.createReadStream(file)
const writeStream = fs.createWriteStream(file)
readStream.on('data', chunk => {
const data = chunk.toString().replace(/: /gm, `:`).replace(/(?<=location:")foobar(?=")/gm, '')
writeStream.write(data)
})
readStream.on('end', () => {
writeStream.close()
})
the file would always be blank. After writeFileSync I proceed with the next module to search for a line ref:
mod2.js:
const fs = require('fs-extra')
module.exports = (file, data) => {
const parseFile = fs.readFileSync(file, 'utf8')
parseFile.split(/\r?\n/).map((line, idx) => {
if (line.includes(data)) console.log(idx + 1)
})
}
but the line number returned is that of the initial unzipped file not the file that was modded from the first module. Because I thought the sync process would be for the file it would appear the file being referenced is in memory? My search results for streams when learning about them:
Working with Node.js Stream API
Stream
How to use stream.pipe
Understanding Streams in Node.js
Node.js Streams: Everything you need to know
Streams, Piping, and Their Error Handling in Node.js
Writing to Files in Node.js
Error handling with node.js streams
Node.js Readable file stream not getting data
Node.js stream 'end' event not firing
NodeJS streams not awaiting async
stream-handbook
How should a file be manipulated after an unzip stream and why does the second module reference the file after it was unzipped and not when it was already manipulated? Is it possible to write multiple streams synchronously?

Inconsistent results making API call from AWS Lambda

Let me just apologize for this abysmal code ahead of time. I have almost zero node experience, and write all of my JS with React apps and Elixir on the back end. I am struggling to write a correct Lambda function in NodeJS, and have basically cobbled something together from Googling/SO/trial and error, etc.
What I'm doing is the following:
User wants to upload a file so they send some info to back end.
Back end generates a presigned key.
Front end sends file to S3.
S3 fires event and Lambda executes
Lambda now checks for mimetype and if it's a bad file, will delete the file from the S3 bucket and make a DELETE API call to my backend to tell it to delete the row the photo upload belongs to.
Where I'm struggling is when I make the API call to my backend inside of the s3.deleteObject call, I am getting wildly inconsistent results. A lot of time it's sending two delete requests back to back in the same Lambda execution. Sometimes it's like it never even calls the backend and just runs and shows complete without really logging anything to Cloudwatch.
My code is as follows:
const aws = require('aws-sdk');
const s3 = new aws.S3({apiVersion: '2006-03-01'});
const fileType = require('file-type');
const imageTypes = ['image/gif', 'image/jpeg', 'image/png'];
const request = require('request-promise');
exports.handler = async (event, context) => {
// Get the object from the event and show its content type
const bucket = event.Records[0].s3.bucket.name;
const key = decodeURIComponent(
event.Records[0].s3.object.key.replace(/\+/g, ' ')
);
const params = {
Bucket: bucket,
Key: key,
};
try {
const {Body} = await s3.getObject(params).promise();
const fileBuffer = new Buffer(Body, 'base64');
const fileTypeInfo = fileType(fileBuffer);
if (
typeof fileTypeInfo !== 'undefined' &&
fileTypeInfo &&
imageTypes.includes(fileTypeInfo.mime)
) {
console.log('FILE IS OKAY.');
} else {
await s3
.deleteObject(params, function(err, data) {
console.log('FILE IS NOT AN IMAGE.');
if (err) {
console.log('FAILED TO DELETE.');
} else {
console.log('DELETED ON S3. ATTEMPTING TO DELETE ON SERVER.');
const url =
`http://MYSERVERHERE:4000/api/event/${params.Key.split('.')[0]}`;
const options = {
method: 'DELETE',
uri: url,
};
request(options)
.then(function(response) {
console.log('RESPONSE: ', response);
})
.catch(function(err) {
console.log('ERROR: ', err);
});
}
})
.promise();
}
return Body;
} catch (err) {
const message = `Error getting object ${key} from bucket ${bucket}. Make sure they exist and your bucket is in the same region as this function.`;
console.log(message);
throw new Error(message);
}
};
This has been driving me mad for days. Any help is appreciated to explain why I would be getting unexpected results from a Lambda function like this.
Please check after update your else part with proper await use
Please try below code.
exports.handler = async (event, context) => {
// Get the object from the event and show its content type
const bucket = event.Records[0].s3.bucket.name;
const key = decodeURIComponent(
event.Records[0].s3.object.key.replace(/\+/g, ' ')
);
const params = {
Bucket: bucket,
Key: key,
};
try {
const {Body} = await s3.getObject(params).promise();
const fileBuffer = new Buffer(Body, 'base64');
const fileTypeInfo = fileType(fileBuffer);
if (
typeof fileTypeInfo !== 'undefined' &&
fileTypeInfo &&
imageTypes.includes(fileTypeInfo.mime)
) {
console.log('FILE IS OKAY.');
} else {
await s3.deleteObject(params).promise(); //fail then catch block execute
console.log('DELETED ON S3. ATTEMPTING TO DELETE ON SERVER.');
const url =
`http://MYSERVERHERE:4000/api/event/${params.Key.split('.')[0]}`;
const options = {
method: 'DELETE',
uri: url,
};
let response = await request(options); ////fail then catch block execute
console.log(response);
}
return Body;
} catch (err) {
console.log(err);
const message = `Error getting object ${key} from bucket ${bucket}. Make sure they exist and your bucket is in the same region as this function.`;
console.log(message);
throw new Error(message);
}
};
S3 delete operation is eventual consistent in all regions.
Hence as par AWS (captured relevant info),
A process deletes an existing object and immediately attempts to read it. Until the deletion is fully propagated, Amazon S3 might return the deleted data.
A process deletes an existing object and immediately lists keys within its bucket. Until the deletion is fully propagated, Amazon S3 might list the deleted object.
Ref: https://docs.aws.amazon.com/AmazonS3/latest/dev/Introduction.html#ConsistencyModel

convert pdf pages into images using AWS s3 and Lambda

I am using node 8.1,
I want to convert PDF into images using S3 and lambda but the CloudWatch repeatedly giving the following error:
"Unable to import module 'index': Error"
The below file is named as index.js in the main project folder:
const util = require('util');
const AWS = require('aws-sdk');
const gm = require('gm').subClass({ imageMagick: true });
const s3 = new AWS.S3();
exports.handler = (event, context) => {
const srcBucket = event.Records[0].s3.bucket.name;
const srcKey = decodeURIComponent(event.Records[0].s3.object.key.replace(/\+/g, " "));
const dstBucket = srcBucket;
const dstKey = srcKey.replace('.pdf', '.png');
s3.getObject({Bucket: srcBucket, Key: srcKey}, (err, response) => {
if (err) {
context.done('S3 get object error:', err);
context.fail(err);
}
// conversion start
gm(response.Body)
.setFormat("png")
.resize(200) // you can configure
.quality(100) // you can configure
.stream((err, stdout, stderr) => {
if(err) {
console.log("gm conversion process error: ");
console.log(err,stdout,stderr);
context.fail(err);
}
const chunks = [];
stdout.on('data', (chunk) => {
chunks.push(chunk);
});
stdout.on('end', () => {
console.log('gm process finished');
const buffer = Buffer.concat(chunks);
// Upload start
const params = {
Bucket: dstBucket,
Key: dstKey,
ContentType: 'image/png',
Body: buffer
};
s3.putObject(params, (err, data) => {
if (err) {
console.log("S3 upload error: " + err);
context.fail(err);
}
console.log('S3 upload finished!');
console.log('Bucket: ' + dstBucket);
console.log('key: ' + dstKey);
context.succeed({
"error":false
});
});
});
stderr.on('data', (data) => {
console.log('stderr data: ' + data);
});
});
});
};
I am not much aware about the aws-lambda as well as the file conversion and this is the only area where I have got stuck and can't find any way to solve the problem.
I think there might be some problem in the index.handler way of passing the functions but I don't know where the minor fault is situated at.
Problem sounds like you didn't configure the .yml file properly.
https://docs.aws.amazon.com/lambda/latest/dg/serverless-deploy-wt.html#serv-deploy (for the cli to deploy)
https://serverless.com/framework/docs/providers/aws/guide/serverless.yml/ (the config file that you need to write so aws knows what to import)
Have you tried running that code locally? It sounds like an error in Syntax.
Anyways, I’ve created just this functionality and it’s available on fit hub here:
https://github.com/rcastoro/PDFImagine
You can see from the video below it takes PDFs in an s3 bucket, and using an aws event, notifies the lambda function to convert new PDFs into images.
https://youtu.be/yU-jA2_5Tvs

Accessing the raw file stream from a node-formidable file upload

I am creating an application that takes some file uploads and send them straight up to S3. I would prefer not to even have the tmp file on my server, so I am using the Knox module and would like to take the raw stream from Formidable and send it over Knox to S3. I have done something similar using Knox to download a file using this code:
knox.downloads.get(widget.download).on('response',function(sres){
res.writeHead(200, {
'Content-Type':'application/zip',
'Content-Length': sres.headers['content-length'],
'Content-Disposition':'attachment; filename=' + widget.download
});
util.pump(sres, res);
}).end();
Now I would like to do something similar in the oposite direction (File upload from the browser to S3).
So far I have written an event handler to capture each piece of data from the file as it's being uploaded:
var form = new formidable.IncomingForm();
form.onPart = function(part){
if(!part.filename){
form.handlePart(part);
}else{
if(part.name == 'download'){
// Upload to download bucket
controller.putDownload(part);
}else{
// Upload to the image bucket
controller.putImage(part);
}
//res.send(sys.inspect(part));
}
}
form.parse(req, function(err, fields, files){
if(err){
res.json(err);
}else{
res.send(sys.inspect({fields:fields, files:files}), {'content-type':'text/plain'});
//controller.createWidget(res,fields,files);
}
});
controller.putDownload = function(part){
part.addListener('data', function(buffer){
knox.download.putStream(data,part.filename, function(err,s3res){
if(err)throwError(err);
else{
console.log(s3res);
}
});
})
knox.downloads.putStream(part, part.filename, function(err,s3res){
if(err)throwError(err);
else{
console.log(s3res);
}
});
}
But the data event only give me the buffer. So is it possible to capture the stream itself and push it to S3?
What you want to do is override the Form.onPart method:
IncomingForm.prototype.onPart = function(part) {
// this method can be overwritten by the user
this.handlePart(part);
};
Formidable's default behavior is to write the part to a file. You don't want that. You want to handle the 'part' events to write to the knox download. Start with this:
form.onPart = function(part) {
if (!part.filename) {
// let formidable handle all non-file parts
form.handlePart(part);
return;
}
Then open the knox request and handle the raw part events yourself:
part.on('data', function(data) {
req.write(data);
});
part.on('end', function() {
req.end();
});
part.on('error', function(err) {
// handle this too
});
As a bonus, if the req.write(data) return false that means the send buffer is full. You should pause the Formidable parser. When you get a drain event from the Knox stream you should resume Formidable.
Use multiparty instead. It supports this kind of streaming like you want. It even has an example of streaming directly to s3: https://github.com/superjoe30/node-multiparty/blob/master/examples/s3.js
In an Express middleware, I use formidable together with PassThrough to stream-upload a file to S3 (in my case, to Minio which is S3 compatible through Minio SDK; and I believe it works for AWS S3 too with the same Minio SDK)
Here is the sample code.
const formidable = require('formidable')
const { PassThrough } = require('stream')
const form = new formidable.IncomingForm()
const pass = new PassThrough()
const fileMeta = {}
form.onPart = part => {
if (!part.filename) {
form.handlePart(part)
return
}
fileMeta.name = part.filename
fileMeta.type = part.mime
part.on('data', function (buffer) {
pass.write(buffer)
})
part.on('end', function () {
pass.end()
})
}
form.parse(req, err => {
if (err) {
req.minio = { error: err }
next()
} else {
handlePostStream(req, next, fileMeta, pass)
}
})
And handlePostStream looks like below, for your reference:
const uuidv1 = require('uuid/v1')
const handlePostStream = async (req, next, fileMeta, fileStream) => {
let filename = uuidv1()
try {
const metaData = {
'content-type': fileMeta.type,
'file-name': Buffer.from(fileMeta.name).toString('base64')
}
const minioClient = /* Get Minio Client*/
await minioClient.putObject(MINIO_BUCKET, filename, fileStream, metaData)
req.minio = { post: { filename: `${filename}` } }
} catch (error) {
req.minio = { error }
}
next()
}
You can find the source code on GitHub, and its unit tests too.
There is no way for you to capture the stream, because the data has to be translated by Formidable. The buffer you're given is the file contents in chunks of buffer.length: this might be a problem because looking at Formidable's docs it appears that until the file is completely uploaded it can't reliably report the file size and Knox's put method might need that.
Never used Knox this way before, but you might have some luck with something like this:
controller.putDownload = function(part){
var req = knox.download.put(part.filename, {
'Content-Type': 'text/plain'
});
part.addListener('data', function(buffer){
req.write(buffer);
});
req.on('response', function(res){
// error checking
});
req.end();
}
A little unsure about the response checking bits, but....see if you can whip that into shape. Also, Streaming an octet stream from request to S3 with knox on node.js also has a writeup that may be useful to you.

Resources