I have the following lambda function. It received an XML, looks through it, finds a base64 pdf file and tries to upload it to S3.
index.js
const AWS = require('aws-sdk');
const xml2js = require('xml2js');
const pdfUpload = require('./upload_pdf');
const s3 = new AWS.S3();
exports.handler = async (event, context, callback) => {
let attachment;
xml2js.parseString(event.body, function(err, result) {
attachment =
result.Attachment[0].Data[0];
if (attachment) {
pdfUpload(attachment);
}
});
return {
statusCode: 200
}
};
upload_pdf.js
/**
*
* #param {string} base64 Data
* #return {string} Image url
*/
const pdfUpload = async (base64) => {
const AWS = require('aws-sdk');
const s3 = new AWS.S3();
const base64Data = new Buffer.from(base64, 'base64');
// With this setup, each time your user uploads an image, will be overwritten.
// To prevent this, use a different Key each time.
// This won't be needed if they're uploading their avatar, hence the filename, userAvatar.js.
const params = {
Bucket: 'mu-bucket',
Key: `123.pdf`,
Body: base64Data,
ACL: 'public-read',
ContentEncoding: 'base64',
ContentType: `application/pdf`
}
let location = '';
let key = '';
try {
const { Location, Key } = await s3.upload(params).promise();
location = Location;
key = Key;
} catch (error) {
// console.log(error)
}
console.log(location, key);
return location;
}
module.exports = pdfUpload;
No matter what I do, the file does not get uploaded. I have checked the permissions, and the lambda has access to the bucket. Running the lambda I'm not receiving any errors either. Can anybody see what might be wrong here?
First, as an advice, I think you should put more logs to see at which steps the function is stuck / failing
The second thing you can try is to put await
await pdfUpload(attachment);
Related
I have implemented a s3 uploader per these instructions https://aws.amazon.com/blogs/compute/uploading-to-amazon-s3-directly-from-a-web-or-mobile-application/
This is the Lambda function code
AWS.config.update({ region: process.env.AWS_REGION })
const s3 = new AWS.S3()
const URL_EXPIRATION_SECONDS = 300
// Main Lambda entry point
exports.handler = async (event) => {
return await getUploadURL(event)
}
const getUploadURL = async function(event) {
const randomID = parseInt(Math.random() * 10000000)
const Key = `${randomID}.jpg`
// Get signed URL from S3
const s3Params = {
Bucket: process.env.UploadBucket,
Key,
Expires: URL_EXPIRATION_SECONDS,
Currently the filename (key) is generated using a random ID.
I would like to change that to use the original filename of the uploaded file.
I tried a couple approaches such as using the the fs.readfile() to get the filename but have not had any luck.
There is a webpage with a form that works in conjunction with the Lambda to upload the file to s3.
How do I get the filename?
If you want to save the file with the original filename, you have to pass that filename as part of the key you use to request the signed url. You don't show how you're getting the file to upload, but if it is part of a web site, you get this from the client.
On the client side you have the user identify the file to upload and pass that to your code that calls getUploadURL(). Maybe in your code it is part of event? Then you send the signed URL back to the client and then the client can send the file to the signed URL.
Therefore to upload a file, your client has to send two requests to your server -- one to get the URL and one to upload the file.
You do mention that you're using fs.readFile() If you're able to get the file with this call, then you already have the file name. All you have to do is pass the same name to getUploadURL() as an additional parameter or as part of event. You may have to parse the filename first or within getUploadURL() if it includes a path to someplace other than your current working directory.
The code above looks like it may be a Lambda that's getting called with some event. If that event is a trigger of some sort that you can include a file name, then you can look pull it from that variable. For example:
const getUploadURL = async function(event) {
const randomID = parseInt(Math.random() * 10000000)
const Key = `${event.fileNameFromTrigger}`
// Get signed URL from S3
const s3Params = {
Bucket: process.env.UploadBucket,
Key,
Expires: URL_EXPIRATION_SECONDS.
...
}
If the file name includes the extension, then you don't need to append that as you were with the random name.
I modified the Lambda
changed this
const randomID = parseInt(Math.random() * 10000000)
const Key = `${randomID}.jpg`
to this
const Key = event.queryStringParameters.filename
And this the frontend code with my endpoint redacted. Note the query ?filename= appended to the endpoint and how I used this.filename = file.name
<script>
const MAX_IMAGE_SIZE = 1000000
/* ENTER YOUR ENDPOINT HERE */
const API_ENDPOINT = '{api-endpoint}/uploads?filename=' // e.g. https://ab1234ab123.execute-api.us-east-1.amazonaws.com/uploads
new Vue({
el: "#app",
data: {
image: '',
uploadURL: '',
filename: ''
},
methods: {
onFileChange (e) {
let files = e.target.files || e.dataTransfer.files
//let filename = files[0].name
if (!files.length) return
this.createImage(files[0])
},
createImage (file) {
// var image = new Image()
let reader = new FileReader()
reader.onload = (e) => {
//console.log(file.name)
console.log('length: ', e.target.result.includes('data:image/jpeg'))
if (!e.target.result.includes('data:image/jpeg')) {
return alert('Wrong file type - JPG only.')
}
if (e.target.result.length > MAX_IMAGE_SIZE) {
return alert('Image is loo large.')
}
this.image = e.target.result
this.filename = file.name
}
reader.readAsDataURL(file)
},
removeImage: function (e) {
console.log('Remove clicked')
this.image = ''
this.filename = ''
},
uploadImage: async function (e) {
console.log('Upload clicked')
// Get the presigned URL
const response = await axios({
method: 'GET',
url: API_ENDPOINT + this.filename
})
console.log('Response: ', response)
console.log('Uploading: ', this.image)
let binary = atob(this.image.split(',')[1])
let array = []
for (var i = 0; i < binary.length; i++) {
array.push(binary.charCodeAt(i))
}
let blobData = new Blob([new Uint8Array(array)], {type: 'image/jpeg'})
console.log('Uploading to: ', response.uploadURL)
const result = await fetch(response.uploadURL, {
method: 'PUT',
body: blobData
})
console.log('Result: ', result)
// Final URL for the user doesn't need the query string params
this.uploadURL = response.uploadURL.split('?')[0]
}
}
})
</script>
I'm using #google-cloud/storage package and generating signed url to upload file like this:
const path = require("path");
const { Storage } = require("#google-cloud/storage");
const GOOGLE_CLOUD_KEYFILE = path.resolve(
__dirname + "/../gcloud_media_access.json"
);
const storage = new Storage({
keyFilename: GOOGLE_CLOUD_KEYFILE,
});
exports.uploadUrlGCloud = async (bucketName, key, isPrivate = false) => {
let bucket = storage.bucket(bucketName);
let file = bucket.file(key);
const options = {
version: "v4",
action: "write",
expires: Date.now() + 15 * 60 * 1000 // 15 minutes
};
let signedUrl = (await file.getSignedUrl(options))[0];
if(isPrivate){
await file.makePrivate({strict: true});
}
return signedUrl;
};
However when I call this function like this:
const url = await uploadUrlGCloud(bucket, key, true);
I'm getting 404 api error like this:
ApiError: No such object: testbucket/account/upload/4aac0fb0-92dd-11eb-8723-6b3ad09f80fa_demo.jpg
What I want to ask is is there a way to generate the signedUrl private? Before the file is uploaded, I want to mark it as private and prevent public access.
Edit:
I uploaded a file to the created signed URL, and made makePrivate again to the uploaded file. This time I didn't get any errors. However, when I checked the file again, I realized that is still public.
This is the function I tried to make file private:
const makeFilePrivate = async (bucketName, key) => {
return new Promise((resolve, reject) => {
let bucket = storage.bucket(bucketName);
let file = bucket.file(key);
try {
file.makePrivate({strict: true}, err => {
if(!err) {
resolve(file.isPublic());
} else
reject(err);
})
} catch (err) {
reject(err);
}
})
};
console.log(await makeFilePrivate(bucket, remotePath));
// True
You can't make the objects of a public bucket private due to the way how IAM and ACLs interact with one another.
I'm still new in NodeJs and AWS, so forgive me if this is a noob question.
I am trying to read the data from an excel file (.xlsx). The lambda function receives the extension of the file type.
Here is my code:
exports.handler = async (event, context, callback) => {
console.log('Received event:', JSON.stringify(event, null, 2));
if (event.fileExt === undefined) {
callback("400 Invalid Input");
}
let returnData = "";
const S3 = require('aws-sdk/clients/s3');
const s3 = new S3();
switch(event.fileExt)
{
case "plain":
case "txt":
// Extract text
const params = {Bucket: 'filestation', Key: 'MyTXT.'+event.fileExt};
try {
await s3.getObject(params, function(err, data) {
if (err) console.log(err, err.stack); // an error occurred
else{ // successful response
returnData = data.Body.toString('utf-8');
context.done(null, returnData);
}
}).promise();
} catch (error) {
console.log(error);
return;
}
break;
case "xls":
case "xlsx":
returnData = "Excel";
// Extract text
const params2 = {Bucket: 'filestation', Key: 'MyExcel.'+event.fileExt};
const readXlsxFile = require("read-excel-file/node");
try {
const doc = await s3.getObject(params2);
const parsedDoc = await readXlsxFile(doc);
console.log(parsedDoc)
} catch (err) {
console.log(err);
const message = `Error getting object.`;
console.log(message);
throw new Error(message);
}
break;
case "docx":
returnData = "Word doc";
// Extract text
break;
default:
callback("400 Invalid Operator");
break;
}
callback(null, returnData);
};
The textfile part works. But the xlsx part makes the function time out.
I did install the read-excel-file dependency and uploaded the zip so that I have access to it.
But the function times out with this message:
"errorMessage": "2020-11-02T13:06:50.948Z 120bfb48-f29c-4e3f-9507-fc88125515fd Task timed out after 3.01 seconds"
Any help would be appreciated! Thanks for your time.
using the xlsx npm library. here's how we did it.
assuming the file is under the root project path.
const xlsx = require('xlsx');
// read your excel file
let readFile = xlsx.readFile('file_example_XLSX_5000.xlsx')
// get first-sheet's name
let sheetName = readFile.SheetNames[0];
// convert sheets to JSON. Best if sheet has a headers specified.
console.log(xlsx.utils.sheet_to_json(readFile.Sheets[sheetName]));
You need to install xlsx (SheetJs) library into the project:
npm install xlsx
and then import the "read" function into the lambda, get the s3 object's body and send to xlsx like this:
const { read } = require('sheetjs-style');
const aws = require('aws-sdk');
const s3 = new aws.S3({ apiVersion: '2006-03-01' });
exports.handler = async (event) => {
const bucketName = 'excel-files';
const fileKey = 'Demo Data.xlsx';
// Simple GetObject
let file = await s3.getObject({Bucket: bucketName, Key: fileKey}).promise();
const wb = read(file.Body);
const response = {
statusCode: 200,
body: JSON.stringify({
read: wb.Sheets,
}),
};
return response;
};
(of course, you can receive the bucket and filekey from parameters if you send them...)
Very Important: Use the READ (not the readFile) function and send the Body property (with capital "B") as a paremeter
I changed the timeout to 20 seconds and it works. Only one issue remains: const parsedDoc = await readXlsxFile(doc); wants to receive a string (filepath) and not a file.
Solved by using xlsx NPM library. Using a stream and giving it buffers.
I would like to read the content of a .txt file stored within an s3 bucket.
I tried :
var s3 = new AWS.S3({apiVersion: '2006-03-01'});
var params = {Bucket: 'My-Bucket', Key: 'MyFile.txt'};
var s3file = s3.getObject(params)
But the s3file object that i get does not contain the content of the file.
Do you have an idea on what to do ?
Agree with zishone and here is the code with exception handling:
var s3 = new AWS.S3({apiVersion: '2006-03-01'});
var params = {Bucket: 'My-Bucket', Key: 'MyFile.txt'};
s3.getObject(params , function (err, data) {
if (err) {
console.log(err);
} else {
console.log(data.Body.toString());
}
})
According to the docs the contents of your file will be in the Body field of the result and it will be a Buffer.
And another problem there is that s3.getObject( should have a callback.
s3.getObject(params, (err, s3file) => {
const text = s3file.Body.toString();
})
I want to extract text from image using node js so created a lambda in aws. Please find the below code snippet. Issue is that the textract method detectDocumentText is not getting invoked.
As far as permission I had given s3 full access and textract full access to the lambda. Am I missing anything?
var AWS = require("aws-sdk");
var base64 = require("base-64");
var fs = require("fs");
exports.handler = async (event, context, callback) => {
// Input for textract can be byte array or S3 object
AWS.config.region = "us-east-1";
//AWS.config.update({ region: 'us-east-1' });
var textract = new AWS.Textract({ apiVersion: "2018-06-27" });
//var textract = new AWS.Textract();
console.log(textract);
var params = {
Document: {
/* required */
//'Bytes': imageBase64
S3Object: {
Bucket: "717577",
Name: "Picture2.png"
}
}
};
textract.detectDocumentText(params, function(err, data) {
if (err) {
console.log(err); // an error occurred
} else {
console.log(data); // successful response
callback(null, data);
}
});
};
As well as I don't see any error logs in cloudwatch logs.
The problem is that you have marked your method as async which means that you are returning a promise. In your case you are not returning a promise so for lambda there is no way to complete the execution of the method. You have two choices here
Remove async
Or more recommended way is to convert your callback style to use promise. aws-sdk support .promise method on all methods so you could leverage that. The code will look like this
var AWS = require("aws-sdk");
var base64 = require("base-64");
var fs = require("fs");
exports.handler = async (event, context) => {
// Input for textract can be byte array or S3 object
AWS.config.region = "us-east-1";
//AWS.config.update({ region: 'us-east-1' });
var textract = new AWS.Textract({ apiVersion: "2018-06-27" });
//var textract = new AWS.Textract();
console.log(textract);
var params = {
Document: {
/* required */
//'Bytes': imageBase64
S3Object: {
Bucket: "717577",
Name: "Picture2.png"
}
}
};
const data = await textract.detectDocumentText(params).promise();
return data;
};
Hope this helps.