Is uploading a file to S3 in NodeJS blocking? - node.js

I've been seeing performance issues on our application and I'm a bit unsure if uploading a file to S3 could block NodeJS.
I'm using express, formidable and aws-sdk.
Here's a middleware using formidable. This stores the file in req.file and continues to the next middleware that performs the upload to S3.
var formidable = require("formidable");
module.exports = function() {
return function(req, res, next) {
var form = new formidable.IncomingForm({
"keepExtensions": true,
"uploadDir": config.tempDir
});
form.parse(req, function(error, fields, files) {
if (error) {
return res.sendError("Error while parsing multipart-form " + error, 500);
}
req.files = files;
req.fields = fields;
next();
});
};
};
Here's the middleware that actually makes the request to S3 using AWS SDK
const AWS = require("aws-sdk");
const fs = require("fs");
const s3 = new AWS.S3(s3config.s3options);
const logger = require("logger");
function uploadFile(req, res, next) {
const requestId = getRequestIdFromRequestHeaders(req);
const file = options.file(req);
const contentType = options.contentType && options.contentType(req) || file.type;
const destinationPath = options.destinationPath(req);
req.s3uploaded = false;
logger.debug(requestId, "invoking uploadFile", contentType, destinationPath);
req.s3FilePath = "https://" + s3config.bucket + ".s3.amazonaws.com/" + destinationPath;
if (!options.writeConcern) {
logger.debug(requestId, "write concern not expected. Calling next");
next();
}
const stream = fs.createReadStream(file.path);
s3.upload({
"Bucket": s3config.bucket,
"ContentLength": file.size,
"Key": destinationPath,
"Body": stream,
"ContentType": contentType
}, s3config.s3options.uploadOptions, function(error) {
fs.unlink(file.path, error => {
if (error) {
logger.error(requestId, "Unable to remove file", file.path);
}
});
if (error) {
return next(error);
}
if (options.writeConcern) {
if (!req.s3uploaded) {
req.s3uploaded = true;
next();
}
}
}).on("httpUploadProgress", progress => {
logger.debug(requestId, "progress", progress.loaded, "of", progress.total);
if (progress.total !== undefined && progress.loaded === progress.total) {
logger.debug(requestId, "upload done, invoking next from httpUploadProgress");
if (!req.s3uploaded) {
req.s3uploaded = true;
next();
}
}
});
};

The documentation for the AWS SDK for JavaScript (v2) includes this statement on their Calling Services Asynchronously page (emphasis mine):
All requests made through the SDK are asynchronous. This is important to keep in mind when writing browser scripts. JavaScript running in a web browser typically has just a single execution thread. After making an asynchronous call to an AWS service, the browser script continues running and in the process can try to execute code that depends on that asynchronous result before it returns.

Related

How do I upload a large Audio file longer than 30sec direct from the browser to AwS S3?

I would like to save audio recording to S3. I am using the functions below to load direct to awsS3 direct from the browser. It works for short audio recordings of up to around 25 seconds but fails for larger files.
Currently the functions is as follows: I speak into the microphone using recorder.js. Once the recording is complete I press stop which then saves the file to AWS
From the browser:
getSignedRequest(file,fileLoc);
function getFetchSignedRequest(file,fileLoc){
const fetchUrl = `/xxxxxxxxx?file-name=${file.name}&file-type=${file.type}&fileLoc=${fileLoc}`;
fetch(fetchUrl )
.then((response) => {
console.log('response',response)
if(!response.ok){
console.log('Network response was not OK',response.ok)
} else {
putAudioFetchFile(file, response.signedRequest, response.url)
}
})
.catch((error) => {
console.error('Could not get signed URL:', error);
})
}
This send a get request to the NodeJs server which calls this :
const aws = require('aws-sdk');
const fs = require('fs');
aws.config.region = 'xxxxxx';
const S3_BUCKET = process.env.AWS_S3_BUCKET
this.uploadToAWSDrive =
async function uploadToAWSDrive(req,res){
const s3 = new aws.S3();
const URL_EXPIRATION_SECONDS = 3000;
const subFolderName = req.query['fileLoc'];
const fileName = req.query['file-name'];
const fileType = req.query['file-type'];
const fileLocName = subFolderName + fileName;
const s3Params = {
Bucket: S3_BUCKET,
Key: fileLocName,
Expires: URL_EXPIRATION_SECONDS,
ContentType: fileType,
ACL: 'public-read'
};
await s3.getSignedUrl('putObject', s3Params, (err, data) => {
if(err){
console.log(err);
return res.end();
}
const returnData = {
signedRequest: data,
url: `https://${S3_BUCKET}.s3.amazonaws.com/${fileLocName}`
};
console.log('audio uploaded',returnData)
res.write(JSON.stringify(returnData));
res.end();
});
}
Which then calls this:
function uploadFile(file, signedRequest, url){
const xhr = new XMLHttpRequest();
xhr.open('PUT', signedRequest);
xhr.onreadystatechange = () => {
if(xhr.readyState === 4){
if(xhr.status === 200){
console.log('destination url= ', url,xhr.readyState,xhr.status)
}
else{
alert('Could not upload file.');
}
}
};
xhr.send(file);
}
This then sends the file to the awsS3 server. Ok for audio less than 30secs, but fails for longer audio files.
What do I need to do to enable this to work with audio files of greater than 20secs and upto 3 mins?
Any help most appreciated
Not very elegant but the issue was resolved by adding a timer to the origanal function call. A function that followed also needed to be delayed to I think allow processor time. I am sure there will be better ways to do this.
setTimeout( getSignedRequest( myAudioFile,fileLoc), proccessTime) ;

Delivering image from S3 to React client via Context API and Express server

I'm trying to download a photo from an AWS S3 bucket via an express server to serve to a react app but I'm not having much luck. Here are my (unsuccessful) attempts so far.
The Workflow is as follows:
Client requests photo after retrieving key from database via Context API
Request sent to express server route (important so as to hide the true location from the client)
Express server route requests blob file from AWS S3 bucket
Express server parses image to base64 and serves to client
Client updates state with new image
React Client
const [profilePic, setProfilePic] = useState('');
useEffect(() => {
await actions.getMediaSource(tempPhoto.key)
.then(resp => {
console.log('server resp: ', resp.data.data.newTest) // returns ����\u0000�\u0000\b\u0006\
const url = window.URL || window.webkitURL;
const blobUrl = url.createObjectURL(resp.data.data.newTest);
console.log("blob ", blobUrl);
setProfilePic({ ...profilePic, image : resp.data.data.newTest });
})
.catch(err => errors.push(err));
}
Context API - just axios wrapped into its own library
getMediaContents = async ( key ) => {
return await this.API.call(`http://localhost:5000/${MEDIA}/mediaitem/${key}`, "GET", null, true, this.state.accessToken, null);
}
Express server route
router.get("/mediaitem/:key", async (req, res, next) => {
try{
const { key } = req.params;
// Attempt 1 was to try with s3.getObject(downloadParams).createReadStream();
const readStream = getFileStream(key);
readStream.pipe(res);
// Attempt 2 - attempt to convert response to base 64 encoding
var data = await getFileStream(key);
var test = data.Body.toString("utf-8");
var container = '';
if ( data.Body ) {
container = data.Body.toString("utf-8");
} else {
container = undefined;
}
var buffer = (new Buffer.from(container));
var test = buffer.toString("base64");
require('fs').writeFileSync('../uploads', test); // it never wrote to this directory
console.log('conversion: ', test); // prints: 77+977+977+977+9AO+/vQAIBgYH - this doesn't look like base64 to me.
delete buffer;
res.status(201).json({ newTest: test });
} catch (err){
next(ApiError.internal(`Unexpected error > mediaData/:id GET -> Error: ${err.message}`));
return;
}
});
AWS S3 Library - I made my own library for using the s3 bucket as I'll need to use more functionality later.
const getFileStream = async (fileKey) => {
const downloadParams = {
Key: fileKey,
Bucket: bucketName
}
// This was attempt 1's return without async in the parameter
return s3.getObject(downloadParams).createReadStream();
// Attempt 2's intention was just to wait for the promise to be fulfilled.
return await s3.getObject(downloadParams).promise();
}
exports.getFileStream = getFileStream;
If you've gotten this far you may have realised that I've tried a couple of things from different sources and documentation but I'm not getting any further. I would really appreciate some pointers and advice on what I'm doing wrong and what I could improve on.
If any further information is needed then just let me know.
Thanks in advance for your time!
Maybe it be useful for you, that's how i get image from S3, and process image on server
Create temporary directory
createTmpDir(): Promise<string> {
return mkdtemp(path.join(os.tmpdir(), 'tmp-'));
}
Gets the file
readStream(path: string) {
return this.s3
.getObject({
Bucket: this.awsConfig.bucketName,
Key: path,
})
.createReadStream();
}
How i process file
async MainMethod(fileName){
const dir = await this.createTmpDir();
const serverPath = path.join(
dir,
fileName
);
await pipeline(
this.readStream(attachent.key),
fs.createWriteStream(serverPath + '.jpg')
);
const createFile= await sharp(serverPath + '.jpg')
.jpeg()
.resize({
width: 640,
fit: sharp.fit.inside,
})
.toFile(serverPath + '.jpeg');
const imageBuffer = fs.readFileSync(serverPath + '.jpeg');
//my manipulations
fs.rmSync(dir, { recursive: true, force: true }); //delete temporary folder
}

NodeJS Amazon AWS SDK S3 client stops working intermittently

I have NodeJS express web server that serves files from AWS S3. Most of the time this exact code works correctly and serves files for a wide verity of applications with large numbers of requests in Production. The NodeJS web server is running across multiple nodes on a docker swarm server.
After about 2-3 weeks this stops working. There is no response from S3Client GetObjectCommand, there no error returned or anything. This starts working again only after restarting the NodeJS Docker container.
I read the S3 SDK docs that indicate a that the SDK will retry automatically.
Each AWS SDK implements automatic retry logic.
Questions:
How can we make this code more resilient and not need a restart?
Is the error handling correct? I'm wondering why there is no seemingly no response or error returned at all in this situation.
Is it necessary to configure the re-try settings?
NodeJS version: node:lts-alpine
Module: #aws-sdk/client-s3
Controllers
AWS Controller
const consoleLogger = require('../logger/logger.js').console;
const { S3Client, GetObjectCommand } = require('#aws-sdk/client-s3');
const config = {
"credentials": {
"accessKeyId": "example",
"secretAccessKey": "example"
},
"endpoint": "example",
"sslEnabled": true,
"forcePathStyle": true
}
const s3client = new S3Client(config);
const awsCtrl = {};
awsCtrl.getObject = async (key) => {
// Get object from Amazon S3 bucket
let data;
try {
// Data is returned as a ReadableStream
data = await s3client.send(new GetObjectCommand({ Bucket: "example", Key: key }));
console.log("Success", data);
} catch (e) {
consoleLogger.error("AWS S3 error: ", e);
const awsS3Error = {
name: e.name || null,
status: e.$metadata.httpStatusCode || 500
};
throw awsS3Error;
}
return data;
}
module.exports = awsCtrl;
Files Controller
const queryString = require('query-string');
const consoleLogger = require('../logger/logger.js').console;
const httpCtrl = require('./http.ctrl');
const jwtCtrl = require('./jwt.ctrl');
const awsCtrl = require('./aws.ctrl');
filesCtrl.deliverFile = async (req, res) => {
/* Get object from AWS S3 */
let fileObjectStream;
try {
fileObjectStream = await awsCtrl.getObject(filePath);
} catch (e) {
consoleLogger.error(`Unable to get object from AWS S3`, e);
if (e.status && e.status === 404) {
result.error = `Not found`;
result.status = 404;
return res.status(result.status).json(result);
}
return res.status(e.status || 500).json(result);
}
const filename = lookupResponse.data.filename;
// Set response header: Content-Disposition
res.attachment(filename);
// API response object stream download to client
return fileObjectStream.Body.pipe(res);
}
API
const express = require('express');
const router = express.Router();
const filesCtrl = require('../../controllers/files.ctrl');
const filesValidation = require('../validation/files');
router.get('/:fileId', [filesValidation.getFile], (req, res, next) => {
return filesCtrl.deliverFile(req, res);
});

How to read a txt file, stored in a google storage bucket, from a cloud function, in node

I've written some node.js code which is sitting in the source of a Cloud Function. When it runs I want to read a text file from a google storage bucket, and process it.
The Code runs fine when running locally, but for some reason doesn't work when running in the Cloud Function. I would expect something written out from the console logs.
I can't see any errors, as I thought it might be a permissions problem (might be looking in the wrong place though).
Any ideas?
The awaits and async's were just because I wanted it to wait for the response before continuing, but that seems to have no effect on it either.
const fileName = 'testData.txt';
const {Storage} = require('#google-cloud/storage');
const storage = new Storage();
const bucket = storage.bucket('my_bucket_name');
const remoteFile = bucket.file(fileName);
await remoteFile.download(async function(err, contents) {
console.log("file err: "+err);
console.log("file data: "+contents);
});
What you can do is to verify that the runtime account for the function has the necessary permissions to access the bucket. In general the runtime account is PROJECT_ID#appspot.gserviceaccount.com and add at least the Storage Object Viewer (you can check more roles here).
Then, test the function again. If something goes wrong, please check the logs of the function.
EDIT
Not sure, but maybe seems to be something with the code. I've used the following to test the function and works perfect:
index.js:
const {Storage} = require('#google-cloud/storage');
const storage = new Storage();
const bucket = storage.bucket('bucket_name');
const fileName = 'test.txt';
const remoteFile = bucket.file(fileName);
exports.helloWorld = (req, res) => {
console.log('Reading File');
var archivo = remoteFile.createReadStream();
console.log('Concat Data');
var buf = '';
archivo.on('data', function(d) {
buf += d;
}).on('end', function() {
console.log(buf);
console.log("End");
res.send(buf);
});
};
package.json:
{
"name": "sample-http",
"version": "0.0.1",
"dependencies": {
"#google-cloud/storage": "^4.7.0"
}
}
async function readStorageFile(obj){
try{
obj.Result = ''
if (obj.filename===undefined) return
bucket = gcs.bucket('gs:yourinfo');
//Get File
await bucket.file(obj.filename).download()
.then(async data => {
obj.data = data
obj.Result = 'OK'
return
})
.catch(err => {
return
})
return
}
catch (err){
return
}
}
obj = {filename:'TEST1.json'}
await readStorageFile(obj,'testing')
if (obj.Result==='OK') { console.log('obj.data='+obj.data) }
else { console.log('Not Found')}
return

Returning result of an async operation with a Node.js web server

I'm using Express to build a web API. In the following example, SVG data is converted to PNG and uploaded to S3.
const svg2png = require("svg2png");
const AWS = require('aws-sdk');
const s3 = new AWS.S3();
app.post('/svg_to_png', function (req, res) {
let params = req.body
// STEP 1: Convert SVG to PNG:
var outputBuffer = svg2png.sync(params.svg_data, {});
// STEP 2: Upload to S3:
let s3_params = {
Bucket:params.bucket,
Key:params.key,
Body:outputBuffer,
ContentType:'image/png',
ContentDisposition:'inline',
ACL: 'public-read'
}
result = s3.putObject(s3_params,function(err,data){
if (err){
return err;
}
return 'success';
});
// Return Image URL:
let image_url = 'https://s3.amazonaws.com/' + params.bucket + '/' + params.key
res.send(image_url)
})
I want the API to respond with the URL of the converted image, which the requesting client can then immediately download. The problem is, the S3 upload operation is async, and so when the response is delivered, the image does not yet exist at the URL location, forcing the client to poll for its existence.
Is there a way to get the web server to respond only once the S3 upload has completed?
What about something like this :
const putObjPromise = s3.putObject(params).promise();
putObjPromise
.then(data => {
// Return the URL here.
})
.catch(err => console.log(err))
AWS has this doc for Promises : https://docs.aws.amazon.com/sdk-for-javascript/v2/developer-guide/using-promises.html
Hope this helps.
as #Brandon mentioned, you can return the response once the s3 callback is completed. You can also use s3.putObject(params).promise(). I prefer this since it improves readability.
app.post('/svg_to_png', async function (req, res) {
let params = req.body
...
// STEP 2: Upload to S3:
let params = {
...
}
try {
const result = await s3.putObject(params).promise();
// Return Image URL:
// image_url = "https://s3.amazonaws.com/' + params.bucket + '/' + params.key
// res.body(....).end()
} catch(err) {
// return error response
}
})

Resources