Using PDFKit to store a PDF in S3 on the fly - node.js

I'm trying to create a pdf of some images (in datauri format) is nodejs and having the pdf stored in my S3. The return of the function is expected to provide the s3 URL of the file.
I'm using parse-server here for the server and node-canvas to create the canvas of the images and then PDFKit to create the pdf from canvas elements. (jsPdf didn't work out). Now I want this pdf to be sent to my s3 using the AWS-SDK and finally return the URL of the file. Below is my code that works till canvas generation. I don't know if pdf is created or not in the first place, even before being sent to s3. And oh! The entire thing is running on heroku.
Parse.Cloud.define('getBulkMeta',async (req)=>{
const PDFDocument = require('pdfkit'),
{Canvas,loadImage} = require('canvas');
try {
let baseImg = await loadImage('data:image/png;base64,'+req.params.labels[0]);
let labels = req.params.labels,
allCanvas = [],
rowH = baseImg.naturalHeight,
rowW = baseImg.naturalWidth,
perpage = req.params.size[1],
pages = Math.ceil(labels.length/perpage),
imgInd = 0,
g = 10;
size = req.params.size[0];
for(var p=0;p<pages;p++){
let canvas = new Canvas(rowW*((size=='A4')?2:1),rowH*((size=='A4')?2:1)),
ctx = canvas.getContext("2d");
ctx.beginPath();
ctx.rect(0,0,canvas.width,canvas.height)
ctx.fillStyle = "#fff";
ctx.fill();
if(perpage == 1){
let img = await loadImage('data:image/png;base64,'+labels[imgInd++]);
ctx.drawImage(img,g,g,rowW-(2*g),rowH-(2*g));
} else {
var thisImgInd = 0;
for (var r=0;r<2;r++){
for(var c=0;c<2;c++){
let img = await loadImage('data:image/png;base64,'+labels[imgInd++]);
ctx.drawImage(img,g+(c*(rowW-g/2)),g+(r*(rowH-g/2)),rowW-(1.5*g),rowH-(1.5*g));
thisImgInd++
if(thisImgInd>=perpage||imgInd>=labels.length){break;}
}
if(thisImgInd>=perpage||imgInd>=labels.length){break;}
}
}
allCanvas.push(canvas)
}
var thisPDF = new PDFDocument({layout: 'landscape',size:size});
var bcoded;
thisPDF.pipe(()=>{bcoded = new Buffer.from(thisPDF).toString('base64')});
allCanvas.forEach((c,i)=>{
if(i){thisPDF.addPage();}
thisPDF.image(c.toDataURL(),0,0,thisPDF.page.width,thisPDF.page.width);
})
thisPDF.end();
const S3_BUCKET = process.env.S3_BUCKET;
aws.config.region = process.env.AWS_REGION;
aws.config.signatureVersion = 'v4';
let s3 = new aws.S3();
let fileName = req.params.name;
let s3Params = {
Bucket: S3_BUCKET,
Body: bcoded,
Key: fileName,
ContentType : 'application/pdf',
ACL: 'public-read'
};
s3.putObject(s3Params, (err, data) => {
if(err){
console.log('\n\n\n\n\n\n\n'+err+'\n\n\n\n\n\n\n');
throw 'Error: '+ (err);
}
let returnData = {
signedRequest: data,
url: `https://${S3_BUCKET}.s3.amazonaws.com/${fileName}`
};
return (returnData);
})
} catch (e) {throw e;}
})
Update. I have got it to save the pdf file in s3 with the below code:
Parse.Cloud.define('getBulkMeta',async (req)=>{
const PDFDocument = require('pdfkit'),
{Canvas,loadImage} = require('canvas');
try {
let baseImg = await loadImage('data:image/png;base64,'+req.params.labels[0]);
let labels = req.params.labels,
allCanvas = [],
rowH = baseImg.naturalHeight,
rowW = baseImg.naturalWidth,
perpage = req.params.size[1],
pages = Math.ceil(labels.length/perpage),
imgInd = 0,
g = 10;
size = req.params.size[0];
for(var p=0;p<pages;p++){
let canvas = new Canvas(),
ctx = canvas.getContext("2d");
canvas.height = rowH*((size=='A4')?2:1);
canvas.width = rowW*((size=='A4')?2:1);
ctx.beginPath();
ctx.rect(0,0,canvas.width,canvas.height)
ctx.fillStyle = "#fff";
ctx.fill();
if(perpage == 1){
let img = await loadImage('data:image/png;base64,'+labels[imgInd++]);
ctx.drawImage(img,g,g,rowW-(2*g),rowH-(2*g));
} else {
var thisImgInd = 0;
for (var r=0;r<2;r++){
for(var c=0;c<2;c++){
let img = await loadImage('data:image/png;base64,'+labels[imgInd++]);
ctx.drawImage(img,g+(c*(rowW-g/2)),g+(r*(rowH-g/2)),rowW-(1.5*g),rowH-(1.5*g));
thisImgInd++
if(thisImgInd>=perpage||imgInd>=labels.length){break;}
}
if(thisImgInd>=perpage||imgInd>=labels.length){break;}
}
}
allCanvas.push(canvas)
}
var thisPDF = new PDFDocument({layout: 'landscape',size:size});
let buffers = [],pdfData,returnData='Hi';
thisPDF.on('data', buffers.push.bind(buffers));
thisPDF.on('end',() => {
pdfData = Buffer.concat(buffers);
const S3_BUCKET = process.env.S3_BUCKET;
aws.config.region = process.env.AWS_REGION;
aws.config.signatureVersion = 'v4';
let s3 = new aws.S3();
let fileName = req.params.name;
let s3Params = {
Bucket: S3_BUCKET,
Body: pdfData,
Key: (+new Date())+'-'+fileName,
ContentType : 'application/pdf',
ACL: 'public-read'
};
s3.putObject(s3Params,(err, data) => {
delete pdfData,thisPDF;
pdfData = null;thisPDF = null;
if(err){ throw 'Error: '+ (err); }
returnData = { signedRequest: data, url: `https://${S3_BUCKET}.s3.amazonaws.com/${fileName}` };
})
})
allCanvas.forEach((c,i)=>{
if(i){thisPDF.addPage();}
thisPDF.image(c.toDataURL(),0,0,{fit:[thisPDF.page.width,thisPDF.page.height]});
})
thisPDF.end();
return returnData;
} catch (e) {throw e;}
})
However, returnData always gives "Hi" as the output and it also appears the function isn't closing - Heroku throws memory exceeded error everytime.

Since you are using async functions to create the PDF and to send it to S3, your cloud function is returning before these operations are actually completed. That's why you always have Hi in your returnData var. You need to create a promise and await for this promise to finish in the end of these two operations. It should be something like this:
await (new Promise((resolve, reject) => {
var thisPDF = new PDFDocument({layout: 'landscape',size:size});
let buffers = [];
thisPDF.on('data', buffers.push.bind(buffers));
thisPDF.on('end',() => {
pdfData = Buffer.concat(buffers);
const S3_BUCKET = process.env.S3_BUCKET;
aws.config.region = process.env.AWS_REGION;
aws.config.signatureVersion = 'v4';
let s3 = new aws.S3();
let fileName = req.params.name;
let s3Params = {
Bucket: S3_BUCKET,
Body: pdfData,
Key: (+new Date())+'-'+fileName,
ContentType : 'application/pdf',
ACL: 'public-read'
};
s3.putObject(s3Params,(err, data) => {
delete pdfData,thisPDF;
pdfData = null;thisPDF = null;
if(err){ reject(err); }
returnData = { signedRequest: data, url: `https://${S3_BUCKET}.s3.amazonaws.com/${fileName}` };
resolve();
})
})
});
BTW, instead of using the AWS SDK, you could be using the Parse S3 Adapter and saving the PDF as a regular Parse file.

Related

Save html file from S3 and set is as content to puppeteer

I have a Lambda function in AWS which fetches an html file from S3 after a PUT event in the bucket. The Lambda function fetches it and saves it in /tmp/tml and then loads it again so as to be set as content in puppeteer and produce a pdf. My code is the following:
const chromium = require("#sparticuz/chrome-aws-lambda");
const AWS = require('aws-sdk');
const fs = require('fs');
const path = require('path');
const IAM_USER_KEY = "asdfadsfasd";
const IAM_USER_SECRET = "asdfdsafasdfds";
const s3bucket = new AWS.S3({
accessKeyId: IAM_USER_KEY,
secretAccessKey: IAM_USER_SECRET
});
const copyRecursiveSync = function (src, dest) {
const exists = fs.existsSync(src);
const stats = exists && fs.statSync(src);
const isDirectory = exists && stats.isDirectory();
if (isDirectory) {
if (!fs.existsSync(dest)) {
fs.mkdirSync(dest);
}
fs.readdirSync(src).forEach(function (childItemName) {
copyRecursiveSync(path.join(src, childItemName), path.join(dest, childItemName));
});
} else {
fs.copyFileSync(src, dest);
}
};
function uploadObjectToS3Bucket(objectName, objectData) {
const params = {
Bucket: 'asdfasdfsadf',
Key: objectName,
Body: objectData,
ContentType: 'application/pdf'
};
s3bucket.upload(params, function(err, data) {
if (err) throw err;
console.log('File uploaded successfully');
});
}
function downloadFromS3(bucket, key, location){
const params = {
Bucket: bucket,
Key: key,
};
const rs = s3bucket.getObject(params).createReadStream();
const ws = fs.createWriteStream(location);
rs.pipe(ws);
return true;
}
exports.handler = async (event, context, callback) => {
copyRecursiveSync('mylayerfiles/tml/', '/tmp/tml/');
console.log('Assets copied to /tmp/tml \n');
const bucket = event.Records[0].s3.bucket.name;
const key = decodeURIComponent(event.Records[0].s3.object.key.replace(/\+/g, ' '));
const html_file_name = key.split('/').pop();
console.log('Launching browser');
const browser = await chromium.puppeteer.launch({ headless: true, executablePath: await chromium.executablePath, args: ['--no-sandbox', '--disable-setuid-sandbox','--disable-web-security',
'--disable-dev-shm-usage',
'--single-process']});
console.log("Browser launched");
const page = await browser.newPage();
console.log(`Saving in /tmp/tml/${html_file_name}`);
downloadFromS3(bucket, key, `/tmp/tml/${html_file_name}`);
const bufferFile = async (relPath) => {
const data = fs.readFileSync(relPath, { encoding: 'utf8' }, function (err) {
if (err) {
console.log("readfile failed: " + err);
return 400;
} else {
console.log("readfile succeeded");
}
});
return data;};
const BUFFER = await bufferFile(`/tmp/tml/${html_file_name}`);
console.log('html file read from /tmp');
await page.setContent(content);
console.log('html set as content');
const pdfConfig = {
printBackground: true,
landscape: false,
width: "338.63mm",
height: "190.5mm"
};
await page.emulateMediaType('screen');
const pdf = await page.pdf(pdfConfig);
console.log('Uploading to S3 bucket');
const key_to_save = key.replace(/\.[^/.]+$/, ".pdf");
console.log(key_to_save);
uploadObjectToS3Bucket(key_to_save, pdf);
console.log('Uploaded to S3 bucket');
await browser.close();
console.log('Browser closed');
return 200;}
catch (err) {
console.log(err);
return 500}
};
However, the problems I'm facing are two:
The file sometimes is not written in /tmp/tml for some reason(!)
If written, it is not read correctly and subsequently, the setcontent() function does not product the proper pdf.

How can I upload multiple images to an s3 bucket in a lambda function using node.js?

I am not very familiar with node and trying to upload an array of media objects to an s3 bucket using an AWS Lambda node function.
the payload has an album which is an array of key/data dictionaries. My code is as below but I'm certain this is wrong.
const awsServerlessExpress = require('aws-serverless-express');
const app = require('./app');
const server = awsServerlessExpress.createServer(app);
const AWS = require("aws-sdk");
const docClient = new AWS.DynamoDB.DocumentClient();
var s3 = new AWS.S3();
var s3Params = {
Bucket: 'bucketid',
ContentEncoding: 'base64',
ContentType: 'image/jpeg'
};
exports.handler = async (event, context) => {
console.log(event);
var body = JSON.parse(event.body);
if (typeof body.album !== 'undefined' && body.album) {
body.album.forEach(function (value) {
var data = body.album.mediaString;
let mediaData = new Buffer(data, 'base64');
var mediaKey = body.album.mediaKey;
try {
s3Params = {
Bucket: 'bucketID',
Key: mediaKey,
Body: mediaData
};
try {
const stored = await s3.upload(s3Params).promise();
console.log("stored successfully");
return { body: JSON.stringify(data) };
} catch (err) {
console.log("error storing");
console.log(err);
return { error: err };
}
} catch (err) {
return { error: err };
}
});
return { body: JSON.stringify(data) };
} else {
return { error: 'error'};
}
};
I have an error that s3 not found. Just wondering if I'm going about this all wrong.
When I only upload one image with the following code everything works fine:
const awsServerlessExpress = require('aws-serverless-express');
const app = require('./app');
const server = awsServerlessExpress.createServer(app);
const AWS = require("aws-sdk");
const docClient = new AWS.DynamoDB.DocumentClient();
var s3 = new AWS.S3();
var s3Params = {
Bucket: 'bucketID',
ContentEncoding: 'base64',
ContentType: 'image/jpeg'
};
exports.handler = async (event, context) => {
var body = JSON.parse(event.body);
var data = body.mediaString;
let mediaData = new Buffer(data, 'base64');
var mediaKey = body.mediaKey;
try {
s3Params = {
Bucket: 'bucketID',
Key: mediaKey,
Body: mediaData
};
try {
const stored = await s3.upload(s3Params).promise();
console.log("stored successfully");
return { body: JSON.stringify(data) };
} catch (err) {
console.log("error storing");
console.log(err);
return { error: err };
}
} catch (err) {
return { error: err };
}
};

AWS Lambda Custom Nodejs Container Shows Runtime Error

I have built a AWS Lambda function with custom container image. I am trying to convert an excel file to pdf with Libreoffice - getting the file from S3 and saving it to a file and converting it to pdf and then uploading it back to S3.
Here the code.
const fs = require('fs');
const getStream = require('get-stream');
const { Readable } = require('stream')
const { S3Client, GetObjectCommand, PutObjectCommand } = require("#aws-sdk/client-s3");
const libre = require('libreoffice-convert');
const path = require('path');
exports.handler = async (event) => {
const bucket = event.queryStringParameters.bucket;
const file = event.queryStringParameters.file;
const convertedFile = event.queryStringParameters.convertedFile;
if (event.queryStringParameters['warmup'] !== undefined) {
return {
result: true,
message: 'warmed up'
}
}
const client = new S3Client({ region: "ap-south-1" });
const command = new GetObjectCommand({ Bucket: bucket, Key: file });
const response = await client.send(command);
const objectData = response.Body;
const writeStream = fs.createWriteStream("/tmp/sample.xlsx");
objectData.pipe(writeStream);
var end = new Promise((resolve, reject) => {
objectData.on('close', resolve(true));
objectData.on('end', resolve(true));
objectData.on('error', reject(false));
});
let completed = await end;
if (completed) {
const extend = '.pdf'
const outputPath = `/tmp/sample${extend}`;
const enterPath = '/tmp/sample.xlsx';
var readingFile = new Promise((resolve, reject) => {
fs.readFile(enterPath, (err, data)=>{
if (err) {
reject(false);
}
resolve(data);
});
});
var fileData = await readingFile;
var converting = new Promise((resolve, reject) => {
libre.convert(fileData, extend, undefined, (err, done) => {
if (err) {
reject(false)
}
fs.writeFileSync(outputPath, done);
resolve(true)
});
})
var converted = await converting;
if (converted) {
var convertedFileStream = fs.createReadStream(outputPath);
const uploadCommand = new PutObjectCommand({ Bucket: bucket, Key: convertedFile, Body: convertedFileStream });
const lastResponse = await client.send(uploadCommand);
const returnResponse = {
result: true,
message: 'success',
bucket: event.queryStringParameters.bucket,
file: event.queryStringParameters.file,
convertedFile: event.queryStringParameters.convertedFile
};
if (event.queryStringParameters['returnEvent'] !== undefined) {
returnResponse['returnEvent'] = event;
}
return returnResponse;
}
}
return completed;
};
However, I am getting this error at time. Sometimes, it is success, but, sometimes it throws this error.
{
"errorType": "Error",
"errorMessage": "false",
"stack": [
"Error: false",
" at _homogeneousError (/function/node_modules/aws-lambda-ric/lib/Runtime/CallbackContext.js:56:16)",
" at postError (/function/node_modules/aws-lambda-ric/lib/Runtime/CallbackContext.js:72:34)",
" at done (/function/node_modules/aws-lambda-ric/lib/Runtime/CallbackContext.js:99:13)",
" at fail (/function/node_modules/aws-lambda-ric/lib/Runtime/CallbackContext.js:113:13)",
" at /function/node_modules/aws-lambda-ric/lib/Runtime/CallbackContext.js:148:24",
" at processTicksAndRejections (internal/process/task_queues.js:97:5)"
]
}
I dont know Nodejs on a great deal so I think if the code is not written the correct way. Any ideas what I am doing wrong here ?
Like #hoangdv when I logged errors I came to know that the file saving to the disk was not correct. So, I changed the area of the code where it saves to like this and then it worked.
const client = new S3Client({ region: "ap-south-1" });
const command = new GetObjectCommand({ Bucket: bucket, Key: file });
const { Body } = await client.send(command);
await new Promise((resolve, reject) => {
Body.pipe(fs.createWriteStream(filePath))
.on('error', err => reject(err))
.on('close', () => resolve())
})
const excelFile = fs.readFileSync(filePath);

Unresolved Promise Assistance Node.js

I have the following code below, which is a lambda function to get content from a s3Object zip file. I know for a fact that I am not resolving the list of promises and need a little direction on how to resolve. I have read several codes on here but having a hard time applying it to my code. Any assistance would be greatly appreciated.
// dependencies
const AWS = require('aws-sdk');
var JSZip = require('jszip');
// get reference to S3 client
const s3 = new AWS.S3();
exports.handler = async (event, context, callback) => {
// Read options from the event parameter.
const srcBucket = event.Records[0].s3.bucket.name;
// Object key may have spaces or unicode non-ASCII characters.
const srcKey = decodeURIComponent(event.Records[0].s3.object.key.replace(/\+/g, " "));
// Download the file from the S3 source bucket.
try {
const params = {
Bucket: srcBucket,
Key: srcKey
};
const data = await s3.getObject(params).promise();
var zip = JSZip.loadAsync(data.Body).then(function (content){
return content;
});
zip.then(function(result){
var entries = Object.keys(result.files).map(function (name) {
if(name.indexOf("TestStatus") != -1){
return name;
}
}).filter(notUndefined => notUndefined !== undefined);
var listOfPromises = entries.map(function(entry) {
return result.file(entry).async("text").then(function(fileContent){
return fileContent;
});
});
Promise.all(listOfPromises).then((values) =>{
values.forEach(function(value){
console.log(value);
});
});
});
} catch (error) {
context.fail(error);
return;
}
};
Modified/Corrected code
// dependencies
const AWS = require('aws-sdk');
var JSZip = require('jszip');
// get reference to S3 client
const s3 = new AWS.S3();
exports.handler = async (event, context, callback) => {
// Read options from the event parameter.
const srcBucket = event.Records[0].s3.bucket.name;
// Object key may have spaces or unicode non-ASCII characters.
const srcKey = decodeURIComponent(event.Records[0].s3.object.key.replace(/\+/g, " "));
// Download the file from the S3 source bucket.
try {
const params = {
Bucket: srcBucket,
Key: srcKey
};
const data = await s3.getObject(params).promise();
var zip = JSZip.loadAsync(data.Body);
return zip.then(function(result){
var entries = Object.keys(result.files).map((name) =>{
if(name.indexOf("TestStatus") != -1){
return result.files[name];
}
}).filter(notUndefined => notUndefined !== undefined);
var listOfPromises = entries.map((entry) => {
return entry.async("text")
.then((u8) => {
return [entry.name, u8];
}).catch(error => console.error(error));
});
var promiseOfList = Promise.all(listOfPromises);
promiseOfList.then(function (list) {
console.log(list.toString());
});
});
} catch (error) {
context.fail(error);
return;
}
};
If you look closely you are not retuning anything it is why it stays on Pending
const AWS = require('aws-sdk');
var JSZip = require('jszip');
// get reference to S3 client
const s3 = new AWS.S3();
exports.handler = async (event, context, callback) => {
// Read options from the event parameter.
const srcBucket = event.Records[0].s3.bucket.name;
// Object key may have spaces or unicode non-ASCII characters.
const srcKey = decodeURIComponent(event.Records[0].s3.object.key.replace(/\+/g, " "));
// Download the file from the S3 source bucket.
try {
const params = {
Bucket: srcBucket,
Key: srcKey
};
const data = await s3.getObject(params).promise();
// here is the problem
// var zip = JSZip.loadAsync(data.Body).then(function (content){
// return content;
// }
var zip = await JSZip.loadAsync(data.body)
return zip.then(function(result){
var entries = Object.keys(result.files).map(function (name) {
if(name.indexOf("TestStatus") != -1){
return name;
}
}).filter(notUndefined => notUndefined !== undefined);
var listOfPromises = entries.map(function(entry) {
return result.file(entry).async("text").then(function(fileContent){
return fileContent;
});
});
console.log("Helo");
Promise.all(listOfPromises).then((values) =>{
values.forEach(function(value){
console.log(value);
});
});
});
} catch (error) {
context.fail(error);
return;
}
};
```

node.js renaming s3 object via aws-sdk module

Is it possible to rename an object on s3 via aws-sdk? I couldn't find a method for that, maybe there is a provisionary solution ...
I will answer I guess since no one has - this one should work
// create a new s3 object
var s3 = new AWS.S3();
var BUCKET_NAME = 'your-bucket-name';
var OLD_KEY = '/original-file.js';
var NEW_KEY = '/new-file.js';
// Copy the object to a new location
s3.copyObject({
Bucket: BUCKET_NAME,
CopySource: `${BUCKET_NAME}${OLD_KEY}`,
Key: NEW_KEY
})
.promise()
.then(() =>
// Delete the old object
s3.deleteObject({
Bucket: BUCKET_NAME,
Key: OLD_KEY
}).promise()
)
// Error handling is left up to reader
.catch((e) => console.error(e))
This is just a flow on from #nf071590 answer. Which is awesome.
Below, gets the entire list of a bucket and then changes the image name of every image that isn't .jpg to .jpg
Hope this helps someone. :)
const start = new Date()
const AWS = require('aws-sdk')
const state = {}
AWS.config.update({ region: 'ADD_REGION_HERE' })
try {
var s3 = new AWS.S3();
var BUCKET_NAME = 'ADD_BUCKET_NAME_HERE';
var params = {
Bucket: BUCKET_NAME,
MaxKeys: 1000
};
s3.listObjects(params, function (err, data) {
if (err) {
console.log(err, err.stack); // an error occurred
} else {
console.log(data);
data.Contents.forEach(image => {
var OLD_KEY = image.Key
var NEW_KEY = ''
// split key
var keyArray = image.Key.split('.')
var keyArrayLength = keyArray.length
console.log(keyArrayLength);
var ext = keyArray[keyArrayLength - 1]
// console.log(ext);
if(ext != 'jpg') {
console.log('Change this ext FROM: ', OLD_KEY)
ext = 'jpg'
if (keyArrayLength == 2) {
NEW_KEY = `${keyArray[0]}.${ext}`
} else if (keyArrayLength == 3) {
NEW_KEY = `${keyArray[0]}.${keyArray[1]}.${ext}`
} else if (keyArrayLength == 4) {
NEW_KEY = `${keyArray[0]}.${keyArray[1]}.${keyArray[2]}.${ext}`
}
console.log('TO:: ', NEW_KEY);
// Copy the object to a new location
try {
s3.copyObject({
Bucket: BUCKET_NAME,
CopySource: `${BUCKET_NAME}/${OLD_KEY}`,
Key: NEW_KEY
}).promise()
.then((response) => {
console.log('Seemed to have worked??');
console.log(response);
// Delete the old object
s3.deleteObject({
Bucket: BUCKET_NAME,
Key: OLD_KEY
}).promise()
})
// Error handling is left up to reader
.catch((e) => console.error(e))
} catch (error) {
console.log('error::', error);
}
}
});
}
});
} catch (err) {
const end = new Date() - start
let seconds = end / 1000
state.seconds = seconds
state.error = err
state.status = "error"
state.message = err.message
console.log(err)
console.log(state);
return
}

Resources