AWS S3 performance using Node.js SDK - node.js

I am trying to pushing the upload(and download) performance of my program to its limits.
I am getting about 1000Mbps when uploading 256MB files using aws's command line interface.
But I get stuck at about 600Mbps upload with the following program
if (process.argv.length < 7) {
console.log ("usage: " + process.argv [0] + " " + process.argv[1] + " <config> <region> <bucket> <key> <file>")
return -1
}
var config = process.argv[2]
var region = process.argv[3]
var bucketName = process.argv[4]
var key = process.argv[5]
var file = process.argv[6]
var multipartMap = { Parts: [] }
var uploadStartTime // = new Date()
var partSize = 1024 * 1024 * 8 // at least 5MB, specified by amazon
var partNum
var multipartParams = {
Bucket: bucketName,
Key: key,
ContentType: "binary",
StorageClass: "REDUCED_REDUNDANCY",
}
var part = 0
var maxRetry = 3
var fs = require ('fs')
var aws = require ('aws-sdk')
function upload (bucket, multipart, partParams, trial) {
var trial = trial || 1;
bucket.uploadPart (partParams, function (err, data) {
if (err) {
console.log ("failed: ", err)
if (trial < maxRetry) {
console.log ("retrying part: ", partParams.PartNumber)
upload (bucket, multipart, partParams, trial + 1)
} else {
console.log ("failed: ", err, " unable to upload part: ", partParams.PartNumber)
}
return;
}
multipartMap.Parts[this.request.params.PartNumber - 1] = {
ETag: data.ETag,
PartNumber: Number (this.request.params.PartNumber)
}
if (--partNum > 0) return;
var doneParams = {
Bucket: bucketName,
Key: key,
MultipartUpload: multipartMap,
UploadId: multipart.UploadId
}
console.log ("success")
bucket.completeMultipartUpload (doneParams, function (err, data){
if (err) {
console.log("An error occurred while completing the multipart upload");
console.log(err);
} else {
var delta = (new Date() - uploadStartTime) / 1000;
console.log('Completed upload in', delta, 'seconds');
console.log('Final upload data:', data);
}
})
})
}
var kickoffTime = new Date ()
aws.config.loadFromPath (config)
aws.config.region = region
var bucket = new aws.S3 ({params: {Bucket: bucketName}})
console.log ("filename: ", file)
buffer = fs.readFileSync (file)
partNum = Math.ceil (buffer.length / partSize) // number of parts
var totalPart = partNum
uploadStartTime = new Date ()
bucket.createMultipartUpload (multipartParams, function (err, multipart) {
if (err) {
console.log ("cannot create multipart upload: ", err)
return -1
}
for (var i = 0; i < buffer.length; i += partSize) {
++part
var end = Math.min (i + partSize, buffer.length)
var body = buffer.slice (i, end)
var partParams = {
Body: body,
Bucket: bucketName,
Key: key,
PartNumber: String (part),
UploadId: multipart.UploadId,
ContentLength: end - i
}
upload (bucket, multipart, partParams);
}
})
var kickoffTimeDelta = (new Date () - kickoffTime) / 1000
console.log ("Kickoff time: ", kickoffTimeDelta)
This program will not work for empty files, but please ignore this case. The above program is coded with reference to this.
As for downloading, the speed also stuck at about 600Mbps, here is the code
if (process.argv.length < 7) {
console.log ("usage: " + process.argv [0] + " " + process.argv1 + " ")
return -1
}
var config = process.argv[2]
var region = process.argv[3]
var bucketName = process.argv[4]
var key = process.argv[5]
var file = process.argv[6]
var fs = require ('fs')
var aws = require ('aws-sdk')
fs.readFile (config, "utf8", function (err, configFile) {
if (err) {
console.log ("Config file cannot be read: ", err)
return -1
}
aws.config = JSON.parse (configFile)
aws.config.region = region
var bucket = new aws.S3 ({params: {Bucket: bucketName}})
bucket.createBucket (function () {
var data = {Key: key}
bucket.getObject (data, function (err, fileData) {
if (err) {
console.log ("Error downloading data: ", err)
} else {
fs.writeFile (file, fileData.Body, function (err) {
if (err) {
console.log ("Error writing data: ", err)
} else {
console.log ("Successfully downloaded!")
}
})
}
})
})
})
I am new to node.js and aws sdk, is there anything missing to achieve better throughtput?
Thanks

Hmm...had a clarifying question but don't have the reputation to post as such.
How many requests per second are you seeing on both ends? If you're regularly hitting S3 with more than 100 requests per second, you'll get better performance by randomizing the start of your key name.
See this article for an explanation and some suggestions:
http://docs.aws.amazon.com/AmazonS3/latest/dev/request-rate-perf-considerations.html
Basically if you have a bunch of files with a key (subdirectory) that starts with the same characters, you can overwhelm the index partition...so for high-volume read/write activities, random keynames speed up the performance.

Related

When piping a movie from S3 the file isn't seekable

We have an application that sometimes serves a MP4 file which is stored on S3, since only specific people should be able to see each file, the file is private and inside our service we will only show it to authorised people.
The movie starts playing correctly (in the browser's built in video tag), however if we seek to a point in the movie that hasn't been buffered yet, the player will buffer for a bit, then stop playing. Afterwards clicking Play will cause the movie to start from the beginning. If I make the file public and access it directly form S3 seeking to an unbuffered point works correctly.
I created a standalone node program that reproduces this problem. I tried to make the response headers identical to those that S3 sends but the problem remains.
const http = require("http");
const AWS = require("aws-sdk");
const proxy = require("proxy-agent");
Object.assign(process.env, {
AWS_ACCESS_KEY_ID: "REDACTED",
AWS_SECRET_ACCESS_KEY: "REDACTED",
AWS_EC2_REGION: "us-west-2"
});
const s3 = new AWS.S3({
s3ForcePathStyle: 'true',
signatureVersion: 'v4',
httpOptions: { timeout: 300000 },
endpoint: 'https://s3.us-west-2.amazonaws.com',
region: 'us-west-2'
});
const objectParams = {
Bucket: 'REDACTED',
Key: 'some-movie.mp4'
};
let request = 0;
function serve(req, res) {
console.log("Handling request", ++request, req.url);
s3.headObject(objectParams, (err, data) => {
if (err)
throw err;
const { ContentType: type, ContentLength: length} = data;
console.log("Got", data);
if (data.ETag)
res.setHeader("ETag", data.ETag);
const range = req.headers.range;
if (range) {
console.log("Serving range", range);
const parts = range.replace("bytes=", "").split("-");
const start = parseInt(parts[0], 10);
const end = parts[1]? parseInt(parts[1], 10): length -1;
let headers = {
"Content-Range": `bytes ${start}-${end}/${length}`,
"Accept-Ranges": "bytes",
"Content-Type": type,
"Content-Length": end - start + 1,
"Last-Modified": data.LastModified,
};
if (req.headers["if-range"]) {
console.log("Setting if-range to", req.headers["if-range"]);
headers["If-Range"] = req.headers["if-range"];
}
res.writeHead(206, headers);
}
else {
console.log("Whole file");
res.setHeader("Accept-Ranges", "bytes");
res.setHeader("Content-Type", type);
res.setHeader("Content-Length", length);
res.setHeader("Last-Modified", data.LastModified);
}
const stream = s3.getObject(objectParams).createReadStream();
stream.on("error", err => console.error("stream error:", err));
stream.pipe(res).on("finish", data => {
console.log("Finished streaming");
});
});
}
http.createServer(serve).listen(1234);
What am I missing?
Here is the code with seekbar working just fine. You can test by integrating the below code and just open the api url in the browser.
import mime from 'mime-types';
const key = 'S3_BUCKET KEY';
const params = { Key: key, Bucket: AWS_BUCKET };
//s3 here refers to AWS.S3 object.
s3.headObject(params, function (err, data) {
if (err) {
console.error(err);
return next(err);
}
if (req.headers.range) {
const range = req.headers.range;
const bytes = range.replace(/bytes=/, '').split('-');
const start = parseInt(bytes[0], 10);
const total = data.ContentLength;
const end = bytes[1] ? parseInt(bytes[1], 10) : total - 1;
const chunkSize = end - start + 1;
res.set('Content-Range', 'bytes ' + start + '-' + end + '/' + total);
res.set('Accept-Ranges', 'bytes');
res.set('Content-Length', chunkSize.toString());
params['Range'] = range;
console.log('video buffering - range, total, start, end ,params', range, total, start, end, params);
} else {
res.set('Content-Length', data.ContentLength.toString());
console.log('video buffering - ,params', params);
}
res.status(206);
res.set('Content-Type', mime.lookup(key));
res.set('Last-Modified', data.LastModified.toString());
res.set('ETag', data.ETag);
const stream = s3.getObject(params).createReadStream();
stream.on('error', function error(err) {
return next(err);
});
stream.on('end', () => {
console.log('Served by Amazon S3: ' + key);
});
stream.pipe(res);
});

Amazon Polly Character Limit Increase

According to a recent AWS announcement, the new character limit for Polly is 3,000 -- double the previous limit of 1,500.
Previously, my application had a character limit of 1,450 to account for characters I'm adding programatically to the user's input. In view of the above announcement, I thought increasing my character limit to 2,950 would be safe. However, my audio files are empty (0 kb) anytime I surpass about 2,450 characters. I'm baffled by this and of course I would like to use the extra 500 characters if possible.
Here is my code:
var AWS = require('aws-sdk'),
fs = require('fs');
const Fs = require('fs')
const path = require('path');
AWS.config.loadFromPath(path.join(__dirname, 'config.json'));
var mysql = require('mysql');
var localfile = path.join(__dirname, 'myverse.mp3');
var connection = mysql.createConnection({
connectionLimit : 10,
host : '...',
user : '...',
password : '...',
database: '...',
});
[some irrelevant code omitted here.]
connection.query('SELECT versetext, book, mp3, id, reference, userid FROM publicverses where mp3 = "empty" limit 1',
function (error, results, fields) {
console.log(error);
var scripture = results[0].versetext + ".";
var userid = results[0].userid;
var book = results[0].book;
var reference = results[0].reference.replace(":", " verse ").replace(",", " and ");
if (reference.includes("-")){
var reference = reference.replace("verse", "verses");
}
console.log(scripture + " " + book.replace("1", "first").replace("2", "second").replace("3", "third") + " " + reference);
var myverse = "<speak><prosody volume='x-loud'><break time='1s'/>" + scripture + " " + book.replace("1", "first").replace("2", "second").replace("3", "third") + " " + reference + "<break time='1s'/></prosody></speak>";
var link = "https://s3.amazonaws.com/publicverses/" + book.replace(/ /g, "")+reference.replace(/ /g, "")+"_user"+userid+"_"+randomnumber+".mp3";
writeit();
fs.createWriteStream(localfile);
var myvalue = fs.createReadStream(localfile);
setTimeout(uploadit, 2000)
function linkit(){
'use strict';
connection.query('update publicverses set mp3 = ? where mp3 = "empty" limit 1', [link],
function (error, results, fields) {
console.log(error)
})
}
function writeit() {
'use strict';
const Polly = new AWS.Polly({
signatureVersion: 'v4',
region: 'us-east-1'
})
let params = {
'Text': myverse.replace(" Job ", " Jobe "),
'LexiconNames': [ 'kjv' ],
'TextType': 'ssml',
'OutputFormat': 'mp3',
'VoiceId': 'Matthew'
}
Polly.synthesizeSpeech(params, (err, data) => {
if (err) {
console.log(err.code)
} else if (data) {
if (data.AudioStream instanceof Buffer) {
Fs.writeFile("./myverse.mp3", data.AudioStream, function(err) {
if (err) {
return console.log(err)
}
console.log("Verse recorded successfully!")
})
}
}
})
}
function uploadit () {
'use strict';
console.log('Preparing to upload the verse.')
var s3 = new AWS.S3({apiVersion: '2006-03-01'});
var uploadParams = {Bucket: 'publicverses', key: '/test.mp3', Body: myvalue, ACL: 'public-read'};
var file = 'MyVerse.mp3';
var fs = require('fs');
var fileStream = fs.createReadStream(file);
fileStream.on('error', function(err) {
console.log('File Error', err);
});
uploadParams.Body = fileStream;
var path = require('path');
uploadParams.Key = book.replace(/ /g, "")+reference.replace(/ /g, "")+"_user"+userid+"_"+randomnumber+".mp3";
// call S3 to retrieve upload file to specified bucket
s3.upload (uploadParams, function (err, data) {
if (err) {
console.log("Error", err);
} if (data) {
console.log("Upload Success", data.Location);
linkit();
addanother();
}
});
}
});
}

Read JSON from S3 file and Insert records into dynamoDB using Lambda with NodeJS runtime

My DynamoDB table has the following partition key:
UserId String
This is my Lambda function:
'use strict';
console.log('Loading function');
const doc = require('dynamodb-doc');
const dynamo = new doc.DynamoDB();
var AWS = require('aws-sdk');
var S3 = new AWS.S3({
maxRetries: 0,
region: 'us-east-1',
});
var insertSuccess = 0;
var insertErrors = 0;
function dynamoResultCallback(err, data) {
if (err) {
insertErrors++;
console.log("Insert Error: \n");
console.log(err, err.stack); // an error occurred
} else {
insertSuccess++;
}
}
exports.handler = (event, context, callback) => {
console.log('Received event:', JSON.stringify(event, null, 2));
console.log("Init complete, running.. \n")
var srcBucket = event.Records[0].s3.bucket.name;
var srcKey = event.Records[0].s3.object.key;
console.log("Params: srcBucket: " + srcBucket + " srcKey: " + srcKey + "\n")
S3.getObject({
Bucket: srcBucket,
Key: srcKey,
}, function(err, data) {
if (err !== null) {
return callback(err, null);
}
var fileData = data.Body.toString('utf-8');
var recordsArray = fileData.split("\n");
for (var i = 0; i < recordsArray.length; i++) {
var record = recordsArray[i];
console.log("Inserting record: " + record);
var params = {
Item: record,
ReturnConsumedCapacity: "TOTAL",
TableName: "PacketData"
};
dynamo.putItem(params, dynamoResultCallback);
}
console.log("Insert Result -- successCount: " + insertSuccess + " errorCount: " + insertErrors)
return callback(null, data);
});
};
What's happening currently is that, it reads the S3 file as expected, but I'm not able to pass the resulting records into dynamo, the cloud watchlog error is that it didn't get passed a UserId (the required parition key) despite it being in the data. The records print fine in CloudWatch, and actually if I replace the record variable on this line:
Item: record,
With the JSON string from CloudWatch it works as expected. Any suggestions?
P.S. I'm a JavaScript novice if that wasn't already apparent, but I believe the problem is due to how the variable record is being interpreted at runtime as it works fine if I replace that variable with it's value as a hard coded string.
Use JSON.parse(record);
According to the dynamodb-doc documentation it putItem method expects an object rather a string.

Promises or Async with Node js

I have this large amount of code which gets an image from a S3 bucket, saves it to a temporary file on Lambda, resizes it to 4 different sizes, saves it into different folders according to size and them puts the images back into the s3 bucket also into different folders.
However when running on Lambda, I have to call context.done() at the end of the whole process otherwise the context will remain alive until Lambda times out.
So I need to call context.done() when upload returns for the last time.
Looking into the two options, async and promises, which would likely need less refactoring of my code to work?
// dependencies
var AWS = require('aws-sdk');
var gm = require('gm').subClass({ imageMagick: true });
var fs = require("fs");
// get reference to S3 client
var s3 = new AWS.S3();
var _800px = {
width: 800,
destinationPath: "large"
};
var _500px = {
width: 500,
destinationPath: "medium"
};
var _200px = {
width: 200,
destinationPath: "small"
};
var _45px = {
width: 45,
destinationPath: "thumbnail"
};
var _sizesArray = [_800px, _500px, _200px, _45px];
var len = _sizesArray.length;
module to be exported when in production
ports.AwsHandler = function(event, context) {
// Read options from the event.
var srcBucket = event.Records[0].s3.bucket.name;
var srcKey = event.Records[0].s3.object.key;
var dstnFolder = "/tmp";
// function to determine paths
function _filePath (directory, i) {
if ( directory === false ) {
return "dst/" + _sizesArray[i].destinationPath + "/" + srcKey;
} else if ( directory === true ) {
return dstnFolder + "/" + _sizesArray[i].destinationPath + "/" + srcKey;
}
};
for ( var i = 0; i<len; i++) {
fs.mkdir("/tmp" + "/" + _sizesArray[i].destinationPath, function (err) {
if (err) {
console.log(err);
}
});
};
// Infer the image type.
var typeMatch = srcKey.match(/\.([^.]*)$/);
if (!typeMatch) {
console.error('unable to infer image type for key ' + srcKey);
return;
};
var imageType = typeMatch[1];
if (imageType != "jpg" && imageType != "png") {
console.log('skipping non-image ' + srcKey);
return;
};
function download () {
s3.getObject({
Bucket: srcBucket,
Key: srcKey
},
function (err, response) {
if (err) {
console.error(err);
}
fs.writeFile("/tmp" + "/" + srcKey, response.Body, function (err) {
transform();
})
}
);
};
function transform () {
var _Key,
_Size;
for ( var i = 0; i<len; i++ ) {
// define path for image write
_Key = _filePath (true, i);
// define sizes to resize to
_Size = _sizesArray[i].width;
// resize images
gm("/tmp/" + srcKey)
.resize(_Size)
.write(_Key, function (err) {
if (err) {
return handle(err);
}
if (!err) {
// get the result of write
var readPath = this.outname;
var iniPath = this.outname.slice(4);
var writePath = "dst".concat(iniPath);
read(err, readPath, writePath, upload);
}
});
};
};
function read (err, readPath, writePath, callback) {
// read file from temp directory
fs.readFile(readPath, function (err, data) {
if (err) {
console.log("NO READY FILE FOR YOU!!!");
console.error(err);
}
callback(data, writePath);
});
};
function upload (data, path) {
// upload images to s3 bucket
s3.putObject({
Bucket: srcBucket,
Key: path,
Body: data,
ContentType: data.type
},
function (err) {
if (err) {
console.error(err);
}
console.log("Uploaded with success!");
});
}
download();
Take a look at how they use Q in this example.
Your code will end up very similar to
download()
.then(transform)
.then(read)
.then(upload)
.catch(function (error) {
// Handle any error from all above steps
console.error(error);
})
.done(function() {
console.log('Finished processing image');
context.done();
});
You could also take a look to async and use it as they show in this other example.

how to upload base64 data as image to s3 using node js?

I am sending base64data of canvas to node.js script. I need the base64data to be stored as an image to the s3bucket. Is there any way to achieve it?
Store your Data URI in a variable.
Create function which decodes your data URI(64 bit encoded string) to string(Here I have created dataURItoBlob() function) and after decoding return the string.
Pass that string to in body of S3 upload function.
var myDataUri = "data:image/jpg;base64,JVBERi0xLjMKMyAwIG9iago8PC9UeXBlIC9QYW..."
var myFile=dataURItoBlob(myDataUri);
function dataURItoBlob(dataURI) {
var binary = atob(dataURI.split(',')[1]);
var array = [];
for (var i = 0; i < binary.length; i++) {
array.push(binary.charCodeAt(i));
}
return new Blob([new Uint8Array(array)], {
type: 'image/jpg'
});
}
if (myFile)) {
results.innerHTML = '';
var params = {
Key: fileName+'.jpg',
ContentType: 'image/jpg',
Body: myFile
};
bucket.upload(params, function(err, data) {
results.innerHTML = err ? 'ERROR!' : 'UPLOADED.: ' + file;
});
} else {
results.innerHTML = 'Nothing to upload.';
}
you can send base64 data with AWS putObject method as follows
var AWS = require('aws-sdk');
AWS.config.loadFromPath('./s3_config.json');
var s3Bucket = new AWS.S3( { params: {Bucket: 'myBucket'} } );
var imageBase64Data='Your base64 code '
s3Bucket.putObject(imageBase64Data, function(err, data){
if (err) {
console.log(err);
console.log('Error uploading data: ', data);
} else {
console.log('succesfully uploaded the image!');
}
});

Resources