Node.js - AWS - Program Terminates Before Upload to S3 Bucket Completes - node.js

I've written a program that creates HTML files. I then attempt to upload the files to my S3 bucket at the end of the program. It seems that the problem is that my program terminates before allowing the function to complete or receiving a callback from the function.
Here is the gist of my code:
let aws = require('aws-sdk');
aws.config.update({
//Censored keys for security
accessKeyId: '*****',
secretAccessKey: '*****',
region: 'us-west-2'
});
let s3 = new aws.S3({
apiVersion: "2006-03-01",
});
function upload(folder, platform, browser, title, data){
s3.upload({
Bucket: 'html',
Key: folder + platform + '/' + browser + '/' + title + '.html',
Body: data
}, function (err, data) {
if (err) {
console.log("Error: ", err);
}
if (data) {
console.log("Success: ", data.Location);
}
});
}
/*
*
* Here is where the program generates HTML files
*
*/
upload(folder, platform, browser, title, data);
If I call the upload() function (configured with test/dummy data) before the HTML generation section of my code, the upload succeeds. The test file successfully uploads to S3. However, when the function is called at the end of my code, I do not receive an error or success response. Rather, the program simply terminates and the file isn't uploaded to S3.
Is there a way to wait for the callback from my upload() function before continuing the program? How can I prevent the program from terminating before uploading my files to S3? Thank you!
Edit: After implementing Deiv's answer, I found that the program is still not uploading my files. I still am not receiving a success or error message of any kind. In fact, it seems like the program just skips over the upload() function. To test this, I added a console.log("test") after calling upload() to see if it would execute. Sure enough, the log prints successfully.
Here's some more information about the project: I'm utilizing WebdriverIO v4 to create HTML reports of various tests passing/failing. I gather the results of the tests via multiple event listeners (ex. this.on('test:start'), this.on('suite:end'), etc.). The final event is this.on('end'), which is called when all of the tests have completed execution. It is here were the test results are sorted based on which Operating System it was run on, Browser, etc.
I'm now noticing that my program won't to do anything S3 related in the this.on('end') event handler even if I put it at the very beginning of the handler, though I'm still convinced it's because it isn't given enough time to execute because the handler is able to process the results and create HTML files very quickly. I have this bit of code that lists all buckets in my S3:
s3.listBuckets(function (err, data) {
if (err) {
console.log("Error: ", err);
} else {
console.log("Success: ", data.Buckets);
}
});
Even this doesn't return a result of any kind when run at the beginning of this.on('end'). Does anyone have any ideas? I'm really stumped here.
Edit: Here is my new code which implement's Naveen's suggestion:
this.on('end', async (end) => {
/*
* Program sorts results and creates variable 'data', the contents of the HTML file.
*/
await s3.upload({
Bucket: 'html',
Key: key,
Body: data
}, function (err, data) {
if (err) {
console.log("Error: ", err);
}
if (data) {
console.log("Success: ", data.Location);
}
}).on('httpUploadProgress', event => {
console.log(`Uploaded ${event.loaded} out of ${event.total}`);
});
}
The logic seems sound, but still I get no success or error message, and I do not see the upload progress. The HTML file does not get uploaded to S3.

You can use promises to wait for your upload function to finish. Here's what it will look like:
function upload(folder, platform, browser, title, data) {
return new Promise((resolve, reject) => {
s3.upload({
Bucket: 'html',
Key: folder + platform + '/' + browser + '/' + title + '.html',
Body: data
}, function(err, data) {
if (err) {
console.log("Error: ", err);
return reject(err);
}
if (data) {
console.log("Success: ", data.Location);
return resolve(); //potentially return resolve(data) if you need the data
}
});
});
}
/*
*
* Here is where the program generates HTML files
*
*/
upload(folder, platform, browser, title, data)
.then(data => { //if you don't care for the data returned, you can also do .then(() => {
//handle success, do whatever else you want, such as calling callback to end the function
})
.catch(error => {
//handle error
}

Related

when file is 100% uploaded to GCS then run

Script seems to be running before file is fully uploaded to GCS.
I have a blobStream.on function that is meant to only run when the data has finished uploading to GCS. The issue is it works sometimes and other time it is running too soon and forgets that another file is also uploading (normally it's the audio file that is still uploading).
I am wondering how can I improve the below script.
index is the number of files uploading 0,1,2 it counts up.
fileLength is the number of the file it is uploading 0,1,2 etc.
What seems to happy is that this part is triggering too soon, as the index and fileLength equal the same amount. It's not taking into account the data that could be because according to me doing a console.log(data) nothing returns it's undefined.
Seems a few errors here.
I am wondering is there anyway to watch the data in this function and when that finished to run the correct script. Also clearly way too many delays - this is because I am trying to slow down the script so it runs at the correct time.
What I can say is that the delay just after the blobStream.on('finish') does seem to help it a little.
blobStream.on("finish", async (data) => {
const delay = ms => new Promise(resolve => setTimeout(resolve, ms))
await delay(10000);
const publicUrl = format(
`https://storage.googleapis.com/${bucket.name}/${blob.name}`
);
try {
await bucket.file(newfileName).makePublic();
} catch {
message.push({
message:
`Uploaded the file successfully: ${newfileName}, but public access is denied!`,
url: publicUrl,
});
}
console.log(index);
if(index == fileLength){
await delay(10000);
message.push({
originalname: file.originalname,
mimeType: file.mimetype,
message: "Uploaded the file successfully: " + newfileName,
url: publicUrl,
});
console.log(JSON.stringify(message))
await delay(10000)
console.log(JSON.stringify(message))
await delay(10000)
submitToDB(req, res, message);
//res.status(200).send(message);
}
else{
console.log("this is the first index."+ index +"file name "+ file.originalname);
const delay = ms => new Promise(resolve => setTimeout(resolve, ms))
message.push({
originalname: file.originalname,
mimeType: file.mimetype,
message: "Uploaded the file successfully: " + newfileName,
url: publicUrl,
})
await delay(1000)
console.log(JSON.stringify(message));
}
});

Get audio duration in aws lambda nodejs function

I am currently trying to get the duration/length of an audio file read from S3. I tried a bunch of different ways, but can't seem to find an efficient one that works. Currently, I am storing the audio files in /tmp/ folder and than trying to read them, but that doesn't seem to work. I am also working with .mp4a not .mp3, but was testing the below initially with url strings from S3, but got 0:00 returned when the audio was read. I also tried getAudioDurationInSeconds, but that tells me "error locating ffprobe". Any pointer would be greatly appreciated.
s3.getObject({ Bucket: bucketName, Key: `audio/file` }, function (err, data) {
if (err) {
console.error(err.code, "-", err.message);
}
fs.writeFile(`/tmp/file`, data.Body)
.then(() => { // This ensures that your mp3Duration function gets called after the file has been written
// getAudioDurationInSeconds(`/tmp/file`).then((duration) => {
// console.log(duration);
// });
mp3Duration(`/tmp/$file`, function (err, duration) {
if (err) return console.log(err.message);
console.log('Your file is ' + duration + ' seconds long');
}).catch(console.error);
});
});

aws-sdk crashing nodejs program

I am using aws-sdk for javascript.
The code below works fine when using in a stand-alone program
//program.js
const AWS = require('aws-sdk');
const firehose = new AWS.Firehose({
accessKeyId: "XXX",
secretAccessKey: "YY"
});
const params = {
DeliveryStreamName: 'demo1',
Record: {
Data: new Buffer("Hello World")
}
};
firehose.putRecord(params, function (err, data){
if (err) {
console.log(err);
return;
}
console.log(data); // successful response
});
Again, the above code works fine as a stand alone file. Data gets pushed into firehose and then further down to Redshift.
so if i execute
node program.js
I am able to see my data in Redshift. Yay!!
=============================
However, what i really want to achieve is to push data to firehose when a certain route gets hit in my express application. So I take the exact same code as above and stick it in my route
// router.js
const AWS = require('aws-sdk');
const firehose = new AWS.Firehose({
accessKeyId: "XXX",
secretAccessKey: "YY"
});
router
.get('/v1/locations/:id?', (req, res) => {
const params = {
DeliveryStreamName: 'demo1',
Record: {
Data: new Buffer("Hello World")
}
};
firehose.putRecord(params, function (err, data){
if (err) {
console.log(err);
return;
}
console.log(data);
});
// do the work that needs to be done for this route and send a response
res.send("some data");
});
The minute firehose.putRecord is executed .. it crashes my program with the following error:
```
TypeError: doneCallback.cal is not a function
at Request.callListeners (/api-project/node_modules/aws-sdk/lib/sequential_executor.js:115:18)
at callNextListener (/api-project/node_modules/aws-sdk/lib/sequential_executor.js:95:12)
at /api-project/node_modules/aws-sdk/lib/event_listeners.js:74:9
at finish (/api-project/node_modules/aws-sdk/lib/config.js:315:7)
at /api-project/node_modules/aws-sdk/lib/config.js:333:9
at Credentials.get (/api-project/node_modules/aws-sdk/lib/credentials.js:126:7)
at getAsyncCredentials (/api-project/node_modules/aws-sdk/lib/config.js:327:24)
at Config.getCredentials (/api-project/node_modules/aws-sdk/lib/config.js:347:9)
at Request.VALIDATE_CREDENTIALS (/api-project/node_modules/aws-sdk/lib/event_listeners.js:69:26)
at Request.callListeners (/api-project/node_modules/aws-sdk/lib/sequential_executor.js:101:18)
I can't understand why this code crashes my express program. is this is bug in the aws-sdk library or am i doing something wrong ?
You should be sending the express response inside your success callback.
firehose.putRecord(params, function (err, data) {
if (err) {
console.log(err);
return;
}
console.log(data);
res.send("some data");
}
);
FYI, your res.send(data) will effectively exit the program and send data. However, your putRecord callback is the time when your exit should occur. In node, things do not happen in a sequence from top to bottom of the code, but instead it executes in order of the callback events. so the execution flow for your code would be like this:
file executes
some operation is performed
callback for operation occurs
then if theres additional code outside of operate, it will continue, otherwise the code will exit in that callback. Hence, put the res.send in your putRecord callback.

Node.js Streaming/Piping Error Handling (Change Response Status on Error)

I have millions of rows in my Cassandra db that I want to stream to the client in a zip file (don't want a potentially huge zip file in memory). I am using the stream() function from the Cassandra-Node driver, piping to a Transformer which extracts the one field from each row that I care about and appends a newline, and pipes to archive which pipes to the Express Response object. This seems to work fine but I can't figure out how to properly handle errors during streaming. I have to set the appropriate headers/status before streaming for the client, but if there is an error during the streaming, on the dbStream for example, I want to clean up all of the pipes and reset the response status to be something like 404. But If I try to reset the status after the headers are set and the streaming starts, I get Can't set headers after they are sent. I've looked all over and can't find how to properly handle errors in Node when piping/streaming to the Response object. How can the client tell if valid data was actually streamed if I can't send a proper response code on error? Can anyone help?
function streamNamesToWriteStream(query, res, options) {
return new Promise((resolve, reject) => {
let success = true;
const dbStream = db.client.stream(query);
const rowTransformer = new Transform({
objectMode: true,
transform(row, encoding, callback) {
try {
const vote = row.name + '\n';
callback(null, vote);
} catch (err) {
callback(null, err.message + '\n');
}
}
});
// Handle res events
res.on('error', (err) => {
logger.error(`res ${res} error`);
return reject(err);
});
dbStream.on('error', function(err) {
res.status(404).send() // Can't set headers after they are sent.
logger.debug(`dbStream error: ${err}`);
success = false;
//res.end();
//return reject(err);
});
res.writeHead(200, {
'Content-Type': 'application/zip',
'Content-disposition': 'attachment; filename=myFile.zip'
});
const archive = archiver.create('zip');
archive.on('error', function(err) { throw err; });
archive.on('end', function(err) {
logger.debug(`Archive done`);
//res.status(404).end()
});
archive.pipe(res, {
//end:false
});
archive.append(dbStream.pipe(rowTransformer), { name: 'file1.txt' });
archive.append(dbStream.pipe(rowTransformer), { name: 'file1.txt' });
archive.finalize();
});
}
Obviously it's too late to change the headers, so there's going to have to be application logic to detect a problem. Here's some ideas I have:
Write an unambiguous sentinel of some kind at the end of the stream when an error occurs. The consumer of the zip file will then need to look for that value to check for a problem.
Perhaps more simply, have the consumer execute a verification on the integrity of the zip archive. Presumably if the stream fails the zip will be corrupted.

How to await fields before processing the file stream in multipart form

I'm using SendGrid for receiving files via email. SendGrid parses the incoming emails and sends the files in a multipart form to an endpoint I have set up.
I don't want the files on my local disk so I stream them straight to Amazon S3. This works perfect.
But before I can stream to S3 I need to get hold of the destination mail address so I can work out the correct s3 folder. This is sent in a field named "to" in the form post. Unfortunately this field sometimes arrives after the files are arriving, hence I need a way to await the to-field before I'm ready to take the stream.
I thought I could wrap the onField in a promise and await the to-field from within the onFile. But this concept seems to lock it self up when the field arrives after the file.
I'm new to booth streams and promises. I would really appreciate if someone could tell me how to do this.
This is the non working pseudoish code:
function sendGridUpload(req, res, next) {
var busboy = new Busboy({ headers: req.headers });
var awaitEmailAddress = new Promise(function(resolve, reject) {
busboy.on('field', function(fieldname, val, fieldnameTruncated, valTruncated) {
if(fieldname === 'to') {
resolve(val);
} else {
return;
}
});
});
busboy.on('file', function(fieldname, file, filename, encoding, mimetype) {
function findInbox(emailAddress) {
console.log('Got email address: ' + emailAddress);
..find the inbox and generate an s3Key
return s3Key;
}
function saveFileStream(s3Key) {
..pipe the file directly to S3
}
awaitEmailAddress.then(findInbox)
.then(saveFileStream)
.catch(function(err) {
log.error(err)
});
});
req.pipe(busboy);
}
I finally got this working. The solution is not very pretty, and I have actually switched to another concept (described at the end of the post).
To buffer the incoming data until the "to"-field arrives I used stream-buffers by #samcday. When I get hold of the to-field I release the readable stream to the pipes lined up for the data.
Here is the code (some parts omitted, but essential parts are there).
var streamBuffers = require('stream-buffers');
function postInboundMail(req, res, next) {
var busboy = new Busboy({ headers: req.headers});
//Sometimes the fields arrives after the files are streamed.
//We need the "to"-field before we are ready for the files
//Therefore the onField is wrapped in a promise which gets
//resolved when the to field arrives
var awaitEmailAddress = new Promise(function(resolve, reject) {
busboy.on('field', function(fieldname, val, fieldnameTruncated, valTruncated) {
var emailAddress;
if(fieldname === 'to') {
try {
emailAddress = emailRegexp.exec(val)[1]
resolve(emailAddress)
} catch(err) {
return reject(err);
}
} else {
return;
}
});
});
busboy.on('file', function(fieldname, file, filename, encoding, mimetype) {
var inbox;
//I'm using readableStreamBuffer to accumulate the data before
//I get the email field so I can send the stream through to S3
var readBuf = new streamBuffers.ReadableStreamBuffer();
//I have to pause readBuf immediately. Otherwise stream-buffers starts
//sending as soon as I put data in in with put().
readBuf.pause();
function getInbox(emailAddress) {
return model.inbox.findOne({email: emailAddress})
.then(function(result) {
if(!result) return Promise.reject(new Error(`Inbox not found for ${emailAddress}`))
inbox = result;
return Promise.resolve();
});
}
function saveFileStream() {
console.log('=========== starting stream to S3 ========= ' + filename)
//Have to resume readBuf since we paused it before
readBuf.resume();
//file.save will approximately do the following:
// readBuf.pipe(gzip).pipe(encrypt).pipe(S3)
return model.file.save({
inbox: inbox,
fileStream: readBuf
});
}
awaitEmailAddress.then(getInbox)
.then(saveFileStream)
.catch(function(err) {
log.error(err)
});
file.on('data', function(data) {
//Fill readBuf with data as it arrives
readBuf.put(data);
});
file.on('end', function() {
//This was the only way I found to get the S3 streaming finished.
//Destroysoon will let the pipes finish the reading bot no more writes are allowed
readBuf.destroySoon()
});
});
busboy.on('finish', function() {
res.writeHead(202, { Connection: 'close', Location: '/' });
res.end();
});
req.pipe(busboy);
}
I would really much like feedback on this solution, even though I'm not using it. I have a feeling that this can be done much more simple and elegant.
New solution:
Instead of waiting for the to-field I send the stream directly to S3. I figured, the more stuff I put in between the incoming stream and the S3 saving, the higher the risk of loosing the incoming file due to a bug in my code. (SendGrid will eventually resend the file if I'm not responding with 200, but it will take some time.)
This is how I do it:
Save a placeholder for the file in the database
Pipe the stream to S3
Update the placeholder with more information as it arrives
This solution also gives me the opportunity to easily get hold of unsuccessful uploads since the placeholders for unsuccessful uploads will be incomplete.
//Michael

Resources