Download file from third party server and upload to S3 - node.js

I have a Lambda Node function which is called by a webhook from a thirdparty server. The TP server sends a file download URL and some other data.
The download URL is temporary, so I need to push the file to an S3 for long term storage.
The rudimentary function below, downloads the file and then tries to upload to the S3.
This works when the file is a plain text, but images/pdfs etcs are corrupted when they reach the S3.
const AWS = require("aws-sdk");
const https = require('https');
const path = require('path');
const s3 = new AWS.S3({apiVersion: '2006-03-01'});
exports.handler = async (event, context, callback) => {
var payload = event.body;
const url_host = payload.host;
const url_path = payload.path; //URL of file which needs to be downloaded
const get_params = {
host: url_host,
path: url_path,
port: 443,
method: 'GET',
headers: { }
};
var resp = await https_get_processor(get_params); //File downloaded here
var uploadParams = {
Bucket: "bucket_name",
Key: '',
Body: resp //Preparing to upload the received file
};
uploadParams.Key = path.basename(url_path); //Generating filename
s3.upload (uploadParams, function (err, data) {
if (err) {
console.log("Error", err);
} if (data) {
console.log("Upload Success", data.Location);
}
});
response = {...} //Generic Response
return response;
};
async function https_get_processor(get_params)
{
return await new Promise((resolve, reject) =>
{
var data = "";
const req = https.request(get_params, res => {
res.on('data', chunk => { data += chunk })
res.on('end', () =>
{
resolve(data);
})
});
req.on('error', (err) => {
reject(err);
});
req.end();
});
}

Response is a Buffer in such case, so try changing request processing by pushing each chunk into an array, and then merge Buffer chunks and pass them.
Try this:
var data = [];
const req = https.request(get_params, res => {
res.on('data', chunk => data.push(chunk))
res.on('end', () =>
{
resolve(Buffer.concat(data));
})

Related

node.js save http response as pdf file

I have AWS Lambda function that return pdf file like arr.
I want to call function and save pdf file, but after saving I can't open it, it brocken. I cant undestand why, i tried differente ways to create pdf, by the way i can get arr and using online converter convert arr to file, and its work, but when i create pdf file using node code its always broken. I tried internal node moduls like fs, and external like pdfkit.
`const https = require('https');
const fs = require('fs');
const PDFDocument = require('pdfkit');
const options = {
host: 'uek9w0hztc.execute-api.eu-north-1.amazonaws.com',
path: '/pdfmaker',
method: 'POST',
headers: {
url: 'https://www.linkedin.com'
}
}
const req = https.request(options, res => {
let data = [];
const headerDate = res.headers && res.headers.date ? res.headers.date : 'no response date';
console.log('Status Code:', res.statusCode);
console.log('Date in Response header:', headerDate);
res.on('data', chunk => {
data.push(chunk);
});
res.on('end', () => {
console.log('Response ended: ');
// fs.writeFileSync('index.pdf', Buffer.from(data));
// fs.writeFileSync("index_v2.pdf", Buffer.from(data), 'binary', (err) => {
// if(err) {
// return console.log(err);
// }
// console.log("The file was saved!");
// });
// const doc = new PDFDocument();
// doc.pipe(fs.createWriteStream('output.pdf'));
let writeStream = fs.createWriteStream('pdf123.pdf')
writeStream.once('open', (fd) =>{
writeStream.write(new Buffer.from(data, 'binary'))
writeStream.on('finish', () => {
console.log('wrote all data to file');
});
writeStream.end()
})
});
}).on('error', err => {
console.log('Error: ', err.message);
});
req.end();`
I tried internal node moduls like fs, and external like pdfkit.
I expect someone give me a hint where the problem is.

Node.js upload Image Stream.Readable to S3

My lambda is triggered by a request from the browser. The browser sends an image as multipart/form-data.
The lambda uses busboy to parse the request:
function parseForm(event: IHttpEvent) {
return new Promise(
(resolve, reject) => {
const busboy = new Busboy({
headers: event.headers,
limits: { files: 10 },
});
const imageResponse = new Map<string, IImageParseResponse>();
busboy.on("file", (id, file, filename, encoding, mimeType) => {
imageResponse.set(id, { file, filename, mimeType });
});
busboy.on("error", (error) => reject(`Parse error: ${error}`));
busboy.on("finish", () => resolve(imageResponse));
busboy.write(event.body, event.isBase64Encoded ? "base64" : "binary");
busboy.end();
}
);
}
When I parsed the request I want to upload the file to AWS S3.
export async function handler(event: IHttpEvent) {
var res = await parseForm(event);
const s3 = new S3Client({ region: "eu-central-1" });
for (const [k, v] of res) {
console.log(`File ${v.filename} ${v.mimeType} streaming`);
const stream = new Readable().wrap(v.file);
const upload = new Upload({
client: s3,
params: {
Key: v.filename,
Bucket: "my-image-bucket",
Body: stream,
ContentType: v.mimeType,
},
});
upload.on("httpUploadProgress", (p) => console.log(p));
const result = await upload.done();
console.log(result);
return result;
}
}
This does not work. However the Browser will receive a 200 OK with a null body response. What confuses me even more is that console.log(result); does not log anything to console.
Where is my mistake? I dont't fully understand the mechanics of streams. But as far as I understand it will be more memory-efficient. In the future I plan to upload multiple images at once. And in order to save cost I want my method to be as efficient as possible.
In general I did 2 mistakes.
Tried to upload the stream when it was already read to the end by busboy
I did not properly wait for the completion of the upload to s3 before terminating the function.
In the end i ended up with the following:
const s3 = new S3Client({ region: "eu-central-1" });
const { BUCKET_NAME, MAX_IMAGE_SIZE } = process.env;
export async function handler(event: IHttpEvent) {
const results = await parseForm(event);
const response = [];
for (const r of results) {
if (r.status === "fulfilled") {
const value: any = r.value.result;
response.push({
id: r.value.id,
key: value.Key,
url: value.Location,
});
}
if (r.status === "rejected")
response.push({ id: r.reason.id, reason: r.reason.error });
}
return response;
}
async function doneHandler(
id: string,
uploadMap: Map<string, Upload>
): Promise<{ id: string; result: ServiceOutputTypes }> {
try {
var result = await uploadMap.get(id).done();
} catch (e: any) {
var error = e;
} finally {
uploadMap.delete(id);
if (error) throw { id, error };
return { id, result };
}
}
function parseForm(event: IHttpEvent) {
return new Promise( (resolve, reject) => {
const busboy = new Busboy({
headers: event.headers,
limits: { files: 1, fileSize: parseInt(MAX_IMAGE_SIZE) },
});
const responses: Promise<{
id: string;
result: ServiceOutputTypes;
}>[] = [];
const uploads = new Map<string, Upload>();
busboy.on("file", (id, file, filename, encoding, mimeType) => {
uploads.set(
id,
new Upload({
client: s3,
params: {
Bucket: BUCKET_NAME,
Body: new Readable().wrap(file),
Key: filename,
ContentType: mimeType,
ContentEncoding: encoding,
},
})
);
responses.push(doneHandler(id, uploads));
file.on("limit", async () => {
const aborts = [];
for (const [k, upload] of uploads) {
aborts.push(upload.abort());
}
await Promise.all(aborts);
return reject(new Error("File is too big."));
});
});
busboy.on("error", (error: any) => {
reject(new Error(`Parse error: ${error}`));
});
busboy.on("finish", async () => {
const res = await Promise.allSettled(responses);
resolve(res);
});
busboy.write(event.body, event.isBase64Encoded ? "base64" : "binary");
busboy.end();
}
);
}
This solution also handles file-limits and tries to abort all pending uploads to S3

cloud function reads file but it's not returning the content on the client side

I am using the code below to read a json file in google firebase storage and then return the content of the file. The code works but all I am getting on the client side is null
exports.updateProductCatalogue = functions.https.onCall(async (data, context) => {
const filepath = data.filepath
const bucketname = data.bucket
const remoteFile = bucket.file("storeid.json");
let buffer = '';
remoteFile.createReadStream()
.on('error', function(err) {console.log(err)})
.on('data', function(response) {
buffer += response
console.log(buffer)
})
.on('end', function() {
//console.log(buffer);
console.log("FINISHED!!")
})
return buffer
})
this is my client side js call
function getUpdatedCatalogue(){
var getStorageData = firebase.functions().httpsCallable('updateProductCatalogue');
var callData = {
"bucket":"test"
}
getStorageData(callData).then(function(result){
console.log(result)
}).catch(function(error){
console.log(error)
})
}
The cloud console.log is showing that the content is read and shown in log but client side console.log is returning null. Here is the file file i am reading.
Why am I not getting the file content returned and displayed on client side? how can I fix this?
The problem is that you're returning the buffer before the stream finishes reading the file.
Try this (not tested),
exports.updateProductCatalogue = functions.https.onCall(async (data, context) => {
const filepath = data.filepath;
const bucketname = data.bucket;
const remoteFile = bucket.file("storeid.json");
return new Promise(resolve, reject) => {
let buffer = '';
remoteFile.createReadStream()
.on('error', function(err) {
console.log(err);
reject(err);
})
.on('data', function(response) {
buffer += response;
console.log(buffer);
})
.on('end', function() {
console.log("FINISHED!!")
resolve(buffer);
});
});
});

nodejs Lambda with S3 upload via API Gateway

I've been trying to get a simple serverless API Gateway -> NodeJS Lambda -> S3 working however it appears that the Lambda just uploads corrupt files.
This code would download the file from a URL then straight upload to S3.
I've tried both putObject & upload (with the different params) with no success. Looking at the file sizes when I download the original is is 24KB and the downloaded (corrupt) image from S3 is 44KB.
I simply test the application by doing a POST to the API Gateway URL.
Any ideas?
var url =
"https://upload.wikimedia.org/wikipedia/commons/thumb/1/1d/AmazonWebservices_Logo.svg/500px-AmazonWebservices_Logo.svg.png"
module.exports.upload = function(event, context, callback) {
https.get(url, function(res) {
var body = ""
res.on("data", function(chunk) {
// Agregates chunks
body += chunk
})
res.on("end", function() {
console.log(body)
// Once you received all chunks, send to S3 - putObject only
var params = {
Bucket: S3_BUCKET_NAME,
Key: "aws-logo.png",
Body: body
}
var s3Params = {
Bucket: S3_BUCKET_NAME,
Key: "aws-logo-upload.png",
Body: body,
ContentType: "image/png"
}
s3.upload(s3Params, function(err, data) {
// s3.putObject(params, function(err, data) {
if (err) {
console.log("error")
console.error(err, err.stack)
callback(null, { statusCode: 404, error })
} else {
console.log("ok")
console.log(data)
let response = {
statusCode: 200
}
callback(null, response)
}
})
})
})
}
The following code works for me outside of API Gateway/Lambda. It yields a PNG in S3 that's downloadable as a valid 23.7 KB image. I'd expect the equivalent to work in Lambda.
const AWS = require('aws-sdk');
const https = require('https');
const s3 = new AWS.S3();
const logourl =
'https://upload.wikimedia.org/wikipedia/commons/thumb/1/1d/AmazonWebservices_Logo.svg/500px-AmazonWebservices_Logo.svg.png';
const getThenUpload = (url, callback) => {
https.get(url, (res) => {
const data = [];
res.on('data', (chunk) => {
data.push(chunk);
});
res.on('end', () => {
const params = {
Bucket: S3_BUCKET_NAME,
Key: 'aws-logo-upload.png',
Body: Buffer.concat(data),
ContentType: 'image/png',
};
s3.upload(params, (err, rsp) => {
if (err) {
console.error(err, err.stack);
callback(err, { statusCode: 404, err });
} else {
console.log(rsp);
callback(null, { statusCode: 200 });
}
});
});
});
};
getThenUpload(logourl, (err, data) => {
if (err) {
console.error(`Error: ${err}`);
} else {
console.log(`Data: ${JSON.stringify(data)}`);
}
});

aws lambda how to store an image retrieved via https in S3

I am trying to write a lambda script that can pull an image from a site and store it in S3. The problem I'm having is what kind of object to pass as the Body attribute into the S3.putObject method. In the documentation here it says it should be either new Buffer('...') || 'STRING_VALUE' || streamObject, but I'm not sure how to convert the https response into one of those. Here is what I've tried:
var AWS = require('aws-sdk');
var https = require('https');
var Readable = require('stream').Readable;
var s3 = new AWS.S3();
var fs = require('fs');
var url = 'https://upload.wikimedia.org/wikipedia/commons/thumb/1/1d/AmazonWebservices_Logo.svg/500px-AmazonWebservices_Logo.svg.png';
exports.handler = function(event, context) {
https.get(url, function(response) {
var params = {
Bucket: 'example',
Key: 'aws-logo.png',
Body: response // fs.createReadStream(response); doesn't work, arg should be a path to a file...
// just putting response errors out with "Cannot determine length of [object Object]"
};
s3.putObject(params, function(err, data) {
if (err) {
console.error(err, err.stack);
} else {
console.log(data);
}
});
});
};
As indicated in the comments, Lambda allows to save files in /tmp. But you don't really need it...
response does not contain the content of the file, but the http response (with its status code and headers).
You could try something like this:
var AWS = require('aws-sdk');
var https = require('https');
var s3 = new AWS.S3();
var url = 'https://upload.wikimedia.org/wikipedia/commons/thumb/1/1d/AmazonWebservices_Logo.svg/500px-AmazonWebservices_Logo.svg.png';
exports.handler = function(event, context) {
https.get(url, function(res) {
var body = '';
res.on('data', function(chunk) {
// Agregates chunks
body += chunk;
});
res.on('end', function() {
// Once you received all chunks, send to S3
var params = {
Bucket: 'example',
Key: 'aws-logo.png',
Body: body
};
s3.putObject(params, function(err, data) {
if (err) {
console.error(err, err.stack);
} else {
console.log(data);
}
});
});
});
};
try this package https://www.npmjs.com/package/request
var request = require('request');
exports.handler = function (event, context) {
s3.putObject({
Bucket: 'example',
Key: 'aws-logo.png',
Body: request.get(url, {followRedirect: false})
}, function (err, data) {
if (err) console.error(err, err.stack);
else console.log(data);
})
}

Resources