Download pdf files from external url's - Heroku, NodeJS, Angular 7 - node.js

I am trying to download multiple pdf files from external sources to my nodejs server (in Heroku) temporarily and upload it to AWS S3 bucket.
I have tried multiple methods all of which works fine in my local machine but not in Heroku Dyno NodeJS Server. I am unable to even create folder in Heroku. I guess due to limited permission.
In Node
1) using var download = require('download-file') (using this currently in below code)
2) axios
3) res.download()
Download Files Code
const downloadFiles = async (unique_files) => {
for (let index = 0; index < unique_files.length; index++) {
let file_ext = unique_files[index].substr(unique_files[index].length - 4);
if(file_ext == ".pdf") {
await downloadzz(unique_files[index])
}
}
}
function downloadzz(link) {
download(link, function(err){
if (err) throw err
console.log("DOWNLOAD Complete");
});
}
Upload Files Code
const uploadFiles = async (unique_files) => {
for (let index = 0; index < unique_files.length; index++) {
let file_ext = unique_files[index].substr(unique_files[index].length - 4);
if(file_ext == ".pdf") {
await uploadzz(unique_files[index])
}
}
}
function uploadzz(link) {
fs.readFile(require('path').resolve(__dirname+'/../external-pdfs/', link.slice(link.lastIndexOf('/') + 1)), function (err, data) {
params = {Bucket: pdfBucket, Key: link.slice(link.lastIndexOf('/') + 1), Body: data, ACL: "public-read" };
s3.putObject(params, function(err, data) {
if (err) {
console.log("Failed Upload", err);
} else {
console.log("Successfully uploaded data to bucket", data);
}
});
});
}
I don't get any error but no folder seem to exist with a name external-pdfs on heroku server.
I am open for better solutions: for example, directly uploading file from external url to s3...
How can I in read file from a external url and directly upload to AWS S3 bucket?

You can use axios. Setting the responseType as stream, you can get the file data and pass it as the body. Here it's an example code to get the pdf from a URL and uploading its info directly to S3:
const AWS = require('aws-sdk');
const axios = require('axios');
AWS.config.loadFromPath('./config.json');
const s3 = new AWS.S3({apiVersion: '2006-03-01'});
const URL = "<YOUR_URL>";
const uploadPdfToS3 = async () => {
try{
const {data, headers} = await axios.get(URL, {responseType: 'stream'});
// Create params for putObject call
const objectParams = {
Bucket: "<YOUR_BUCKET>",
Key: "<YOUR_KEY>",
ContentLength: headers['content-length'],
Body: data
};
// Create object upload promise
await s3.putObject(objectParams).promise();
} catch(err){
console.log("ERROR --->" + err)
}
}

In Angular, we can use FileSaver library to save the pdf file from library.
Find the below sample code to do this way.
enter image description here

Related

Delivering image from S3 to React client via Context API and Express server

I'm trying to download a photo from an AWS S3 bucket via an express server to serve to a react app but I'm not having much luck. Here are my (unsuccessful) attempts so far.
The Workflow is as follows:
Client requests photo after retrieving key from database via Context API
Request sent to express server route (important so as to hide the true location from the client)
Express server route requests blob file from AWS S3 bucket
Express server parses image to base64 and serves to client
Client updates state with new image
React Client
const [profilePic, setProfilePic] = useState('');
useEffect(() => {
await actions.getMediaSource(tempPhoto.key)
.then(resp => {
console.log('server resp: ', resp.data.data.newTest) // returns ����\u0000�\u0000\b\u0006\
const url = window.URL || window.webkitURL;
const blobUrl = url.createObjectURL(resp.data.data.newTest);
console.log("blob ", blobUrl);
setProfilePic({ ...profilePic, image : resp.data.data.newTest });
})
.catch(err => errors.push(err));
}
Context API - just axios wrapped into its own library
getMediaContents = async ( key ) => {
return await this.API.call(`http://localhost:5000/${MEDIA}/mediaitem/${key}`, "GET", null, true, this.state.accessToken, null);
}
Express server route
router.get("/mediaitem/:key", async (req, res, next) => {
try{
const { key } = req.params;
// Attempt 1 was to try with s3.getObject(downloadParams).createReadStream();
const readStream = getFileStream(key);
readStream.pipe(res);
// Attempt 2 - attempt to convert response to base 64 encoding
var data = await getFileStream(key);
var test = data.Body.toString("utf-8");
var container = '';
if ( data.Body ) {
container = data.Body.toString("utf-8");
} else {
container = undefined;
}
var buffer = (new Buffer.from(container));
var test = buffer.toString("base64");
require('fs').writeFileSync('../uploads', test); // it never wrote to this directory
console.log('conversion: ', test); // prints: 77+977+977+977+9AO+/vQAIBgYH - this doesn't look like base64 to me.
delete buffer;
res.status(201).json({ newTest: test });
} catch (err){
next(ApiError.internal(`Unexpected error > mediaData/:id GET -> Error: ${err.message}`));
return;
}
});
AWS S3 Library - I made my own library for using the s3 bucket as I'll need to use more functionality later.
const getFileStream = async (fileKey) => {
const downloadParams = {
Key: fileKey,
Bucket: bucketName
}
// This was attempt 1's return without async in the parameter
return s3.getObject(downloadParams).createReadStream();
// Attempt 2's intention was just to wait for the promise to be fulfilled.
return await s3.getObject(downloadParams).promise();
}
exports.getFileStream = getFileStream;
If you've gotten this far you may have realised that I've tried a couple of things from different sources and documentation but I'm not getting any further. I would really appreciate some pointers and advice on what I'm doing wrong and what I could improve on.
If any further information is needed then just let me know.
Thanks in advance for your time!
Maybe it be useful for you, that's how i get image from S3, and process image on server
Create temporary directory
createTmpDir(): Promise<string> {
return mkdtemp(path.join(os.tmpdir(), 'tmp-'));
}
Gets the file
readStream(path: string) {
return this.s3
.getObject({
Bucket: this.awsConfig.bucketName,
Key: path,
})
.createReadStream();
}
How i process file
async MainMethod(fileName){
const dir = await this.createTmpDir();
const serverPath = path.join(
dir,
fileName
);
await pipeline(
this.readStream(attachent.key),
fs.createWriteStream(serverPath + '.jpg')
);
const createFile= await sharp(serverPath + '.jpg')
.jpeg()
.resize({
width: 640,
fit: sharp.fit.inside,
})
.toFile(serverPath + '.jpeg');
const imageBuffer = fs.readFileSync(serverPath + '.jpeg');
//my manipulations
fs.rmSync(dir, { recursive: true, force: true }); //delete temporary folder
}

Corrupt or damaged file when uploading in s3 aws with nodejs

When I upload a file in local environment, I have no problem with s3 and I download the file in aws without problem, but I upload the file in development environment in aws, the file is corrupted or does not open or opens empty, the parameters are the same in both cases
module.exports.handler = async event => {
const body = multipart.parse(event, true)
const { loan_id, document } = body
const lang = event.headers?.Lang ? event.headers.Lang : event.headers?.lang
try {
const date = new Date()
const stringDate = `${date.getFullYear()}${
date.getMonth() + 1 < 10 ? `0${date.getMonth() + 1}` : date.getMonth() + 1
}${date.getDay() < 10 ? `0${date.getDay()}` : date.getDay()}-${date.getHours()}${date.getMinutes()}`
// Upload the file to S3
const fullFileName = `${document.type}/${loan_id}-${stringDate}-${document.filename}`
try {
console.log('Uploading the file in S3 with key', fullFileName)
//console.log(buf);
var data = {
Bucket: DOCUMENTS_BUCKET_NAME,
Key: fullFileName,
Body: document.content,
ContentType: document.ContentType
}
await s3
.putObject(data)
.promise()
} catch (err) {
console.error(err)
return serverlessResponse(HTTP_CODES.BAD_REQUEST, 'The file cannot be uploaded', err)
}
}
headers postman
Body form data postman

Read data from .xlsx file on S3 using Nodejs Lambda

I'm still new in NodeJs and AWS, so forgive me if this is a noob question.
I am trying to read the data from an excel file (.xlsx). The lambda function receives the extension of the file type.
Here is my code:
exports.handler = async (event, context, callback) => {
console.log('Received event:', JSON.stringify(event, null, 2));
if (event.fileExt === undefined) {
callback("400 Invalid Input");
}
let returnData = "";
const S3 = require('aws-sdk/clients/s3');
const s3 = new S3();
switch(event.fileExt)
{
case "plain":
case "txt":
// Extract text
const params = {Bucket: 'filestation', Key: 'MyTXT.'+event.fileExt};
try {
await s3.getObject(params, function(err, data) {
if (err) console.log(err, err.stack); // an error occurred
else{ // successful response
returnData = data.Body.toString('utf-8');
context.done(null, returnData);
}
}).promise();
} catch (error) {
console.log(error);
return;
}
break;
case "xls":
case "xlsx":
returnData = "Excel";
// Extract text
const params2 = {Bucket: 'filestation', Key: 'MyExcel.'+event.fileExt};
const readXlsxFile = require("read-excel-file/node");
try {
const doc = await s3.getObject(params2);
const parsedDoc = await readXlsxFile(doc);
console.log(parsedDoc)
} catch (err) {
console.log(err);
const message = `Error getting object.`;
console.log(message);
throw new Error(message);
}
break;
case "docx":
returnData = "Word doc";
// Extract text
break;
default:
callback("400 Invalid Operator");
break;
}
callback(null, returnData);
};
The textfile part works. But the xlsx part makes the function time out.
I did install the read-excel-file dependency and uploaded the zip so that I have access to it.
But the function times out with this message:
"errorMessage": "2020-11-02T13:06:50.948Z 120bfb48-f29c-4e3f-9507-fc88125515fd Task timed out after 3.01 seconds"
Any help would be appreciated! Thanks for your time.
using the xlsx npm library. here's how we did it.
assuming the file is under the root project path.
const xlsx = require('xlsx');
// read your excel file
let readFile = xlsx.readFile('file_example_XLSX_5000.xlsx')
// get first-sheet's name
let sheetName = readFile.SheetNames[0];
// convert sheets to JSON. Best if sheet has a headers specified.
console.log(xlsx.utils.sheet_to_json(readFile.Sheets[sheetName]));
You need to install xlsx (SheetJs) library into the project:
npm install xlsx
and then import the "read" function into the lambda, get the s3 object's body and send to xlsx like this:
const { read } = require('sheetjs-style');
const aws = require('aws-sdk');
const s3 = new aws.S3({ apiVersion: '2006-03-01' });
exports.handler = async (event) => {
const bucketName = 'excel-files';
const fileKey = 'Demo Data.xlsx';
// Simple GetObject
let file = await s3.getObject({Bucket: bucketName, Key: fileKey}).promise();
const wb = read(file.Body);
const response = {
statusCode: 200,
body: JSON.stringify({
read: wb.Sheets,
}),
};
return response;
};
(of course, you can receive the bucket and filekey from parameters if you send them...)
Very Important: Use the READ (not the readFile) function and send the Body property (with capital "B") as a paremeter
I changed the timeout to 20 seconds and it works. Only one issue remains: const parsedDoc = await readXlsxFile(doc); wants to receive a string (filepath) and not a file.
Solved by using xlsx NPM library. Using a stream and giving it buffers.

Unable to upload multiple images to AWS S3 if I don't first upload one image through a AWS NodeJS Lambda endpoint using Promises

I have the code below on AWS Lambda as an endpoint exposed through API Gateway. The point of this endpoint is to upload images to an S3 bucket. I've been experiencing an interesting bug and could use some help. This code is unable to upload multiple images to S3 if it does not first upload one image. I've listed the scenarios below. The reason I want to use Promises is because I intend to insert data into a mysql table in the same endpoint. Any advice or feedback will be greatly appreciated!
Code Successfully uploads multiple images:
Pass one image to the endpoint to upload to S3 first
Pass several images to the endpoint to upload to S3 after uploading one image first
Code fails to upload images:
Pass several images to the endpoint to upload to s3 first. A random amount of images might be uploaded, but it consistently fails to upload all of them. A 502 error code is returned because it failed to upload all images.
Code
const AWS = require('aws-sdk');
const s3 = new AWS.S3({});
function uploadAllImagesToS3(imageMap) {
console.log('in uploadAllImagesToS3')
return new Promise((resolve, reject) => {
awaitAll(imageMap, uploadToS3)
.then(results => {
console.log('awaitAllFinished. results: ' + results)
resolve(results)
})
.catch(e => {
console.log("awaitAllFinished error: " + e)
reject(e)
})
})
}
function awaitAll(imageMap, asyncFn) {
const promises = [];
imageMap.forEach((value, key) => {
promises.push(asyncFn(key, value));
})
console.log('promises length: ' + promises.length)
return Promise.all(promises)
}
function uploadToS3(key, value) {
return new Promise((resolve, reject) => {
console.log('Promise uploadToS3 | key: ' + key)
// [key, value] = [filePath, Image]
var params = {
"Body": value,
"Bucket": "userpicturebucket",
"Key": key
};
s3.upload(params, function (err, data) {
console.log('uploadToS3. s3.upload. data: ' + JSON.stringify(data))
if (err) {
console.log('error when uploading to s3 | error: ' + err)
reject(JSON.stringify(["Error when uploading data to S3", err]))
} else {
let response = {
"statusCode": 200,
"headers": {
"Access-Control-Allow-Origin": "http://localhost:3000"
},
"body": JSON.stringify(data),
"isBase64Encoded": false
};
resolve(JSON.stringify(["Successfully Uploaded data to S3", response]))
}
});
})
}
exports.handler = (event, context, callback) => {
if (event !== undefined) {
let jsonObject = JSON.parse(event.body)
let pictures = jsonObject.pictures
let location = jsonObject.pictureLocation
let imageMap = new Map()
for (let i = 0; i < pictures.length; i++) {
let base64Image = pictures[i].split('base64,', 2)
let decodedImage = Buffer.from(base64Image[1], 'base64'); // image string is after 'base64'
let base64Metadata = base64Image[0].split(';', 3) // data:image/jpeg,name=coffee.jpg,
let imageNameData = base64Metadata[1].split('=', 2)
let imageName = imageNameData[1]
var filePath = "test/" + imageName
imageMap.set(filePath, decodedImage)
}
const promises = [uploadAllImagesToS3(imageMap)]
Promise.all(promises)
.then(([uploadS3Response]) => {
console.log('return promise!! | uploadS3Response: ' + JSON.stringify([uploadS3Response]))
let res = {
body: JSON.stringify(uploadS3Response),
headers: {
"Access-Control-Allow-Origin": "http://localhost:3000"
}
};
callback(null, res);
})
.catch((err) => {
callback(err);
});
} else {
callback("No pictures were uploaded")
}
};
Reason for problem and solution :
After several hours of debugging this issue I realized what the error was! My Lambda endpoint was timing out early. The reason I was able to upload multiple images after first uploading one image was because my the lambda endpoint was being executed from a warm start - as it was already up and running. The scenario where I was unable to upload multiple images was actually only occurring when I would try to do so after not executing the endpoint in 10+ minutes - therefore a cold start. Therefore, the solution was to increase the Timeout from the default of 3 seconds. I increased it to 20 seconds, but might need to play around with that time.
How to increase the lambda timeout?
Open Lambda function
Scroll down to Basic Settings and select Edit
Increase time in Timeout
TLDR
This error was occurring because Lambda would timeout. Solution is to increase lambda timeout.

Returning result of an async operation with a Node.js web server

I'm using Express to build a web API. In the following example, SVG data is converted to PNG and uploaded to S3.
const svg2png = require("svg2png");
const AWS = require('aws-sdk');
const s3 = new AWS.S3();
app.post('/svg_to_png', function (req, res) {
let params = req.body
// STEP 1: Convert SVG to PNG:
var outputBuffer = svg2png.sync(params.svg_data, {});
// STEP 2: Upload to S3:
let s3_params = {
Bucket:params.bucket,
Key:params.key,
Body:outputBuffer,
ContentType:'image/png',
ContentDisposition:'inline',
ACL: 'public-read'
}
result = s3.putObject(s3_params,function(err,data){
if (err){
return err;
}
return 'success';
});
// Return Image URL:
let image_url = 'https://s3.amazonaws.com/' + params.bucket + '/' + params.key
res.send(image_url)
})
I want the API to respond with the URL of the converted image, which the requesting client can then immediately download. The problem is, the S3 upload operation is async, and so when the response is delivered, the image does not yet exist at the URL location, forcing the client to poll for its existence.
Is there a way to get the web server to respond only once the S3 upload has completed?
What about something like this :
const putObjPromise = s3.putObject(params).promise();
putObjPromise
.then(data => {
// Return the URL here.
})
.catch(err => console.log(err))
AWS has this doc for Promises : https://docs.aws.amazon.com/sdk-for-javascript/v2/developer-guide/using-promises.html
Hope this helps.
as #Brandon mentioned, you can return the response once the s3 callback is completed. You can also use s3.putObject(params).promise(). I prefer this since it improves readability.
app.post('/svg_to_png', async function (req, res) {
let params = req.body
...
// STEP 2: Upload to S3:
let params = {
...
}
try {
const result = await s3.putObject(params).promise();
// Return Image URL:
// image_url = "https://s3.amazonaws.com/' + params.bucket + '/' + params.key
// res.body(....).end()
} catch(err) {
// return error response
}
})

Resources