When piping a movie from S3 the file isn't seekable - node.js

We have an application that sometimes serves a MP4 file which is stored on S3, since only specific people should be able to see each file, the file is private and inside our service we will only show it to authorised people.
The movie starts playing correctly (in the browser's built in video tag), however if we seek to a point in the movie that hasn't been buffered yet, the player will buffer for a bit, then stop playing. Afterwards clicking Play will cause the movie to start from the beginning. If I make the file public and access it directly form S3 seeking to an unbuffered point works correctly.
I created a standalone node program that reproduces this problem. I tried to make the response headers identical to those that S3 sends but the problem remains.
const http = require("http");
const AWS = require("aws-sdk");
const proxy = require("proxy-agent");
Object.assign(process.env, {
AWS_ACCESS_KEY_ID: "REDACTED",
AWS_SECRET_ACCESS_KEY: "REDACTED",
AWS_EC2_REGION: "us-west-2"
});
const s3 = new AWS.S3({
s3ForcePathStyle: 'true',
signatureVersion: 'v4',
httpOptions: { timeout: 300000 },
endpoint: 'https://s3.us-west-2.amazonaws.com',
region: 'us-west-2'
});
const objectParams = {
Bucket: 'REDACTED',
Key: 'some-movie.mp4'
};
let request = 0;
function serve(req, res) {
console.log("Handling request", ++request, req.url);
s3.headObject(objectParams, (err, data) => {
if (err)
throw err;
const { ContentType: type, ContentLength: length} = data;
console.log("Got", data);
if (data.ETag)
res.setHeader("ETag", data.ETag);
const range = req.headers.range;
if (range) {
console.log("Serving range", range);
const parts = range.replace("bytes=", "").split("-");
const start = parseInt(parts[0], 10);
const end = parts[1]? parseInt(parts[1], 10): length -1;
let headers = {
"Content-Range": `bytes ${start}-${end}/${length}`,
"Accept-Ranges": "bytes",
"Content-Type": type,
"Content-Length": end - start + 1,
"Last-Modified": data.LastModified,
};
if (req.headers["if-range"]) {
console.log("Setting if-range to", req.headers["if-range"]);
headers["If-Range"] = req.headers["if-range"];
}
res.writeHead(206, headers);
}
else {
console.log("Whole file");
res.setHeader("Accept-Ranges", "bytes");
res.setHeader("Content-Type", type);
res.setHeader("Content-Length", length);
res.setHeader("Last-Modified", data.LastModified);
}
const stream = s3.getObject(objectParams).createReadStream();
stream.on("error", err => console.error("stream error:", err));
stream.pipe(res).on("finish", data => {
console.log("Finished streaming");
});
});
}
http.createServer(serve).listen(1234);
What am I missing?

Here is the code with seekbar working just fine. You can test by integrating the below code and just open the api url in the browser.
import mime from 'mime-types';
const key = 'S3_BUCKET KEY';
const params = { Key: key, Bucket: AWS_BUCKET };
//s3 here refers to AWS.S3 object.
s3.headObject(params, function (err, data) {
if (err) {
console.error(err);
return next(err);
}
if (req.headers.range) {
const range = req.headers.range;
const bytes = range.replace(/bytes=/, '').split('-');
const start = parseInt(bytes[0], 10);
const total = data.ContentLength;
const end = bytes[1] ? parseInt(bytes[1], 10) : total - 1;
const chunkSize = end - start + 1;
res.set('Content-Range', 'bytes ' + start + '-' + end + '/' + total);
res.set('Accept-Ranges', 'bytes');
res.set('Content-Length', chunkSize.toString());
params['Range'] = range;
console.log('video buffering - range, total, start, end ,params', range, total, start, end, params);
} else {
res.set('Content-Length', data.ContentLength.toString());
console.log('video buffering - ,params', params);
}
res.status(206);
res.set('Content-Type', mime.lookup(key));
res.set('Last-Modified', data.LastModified.toString());
res.set('ETag', data.ETag);
const stream = s3.getObject(params).createReadStream();
stream.on('error', function error(err) {
return next(err);
});
stream.on('end', () => {
console.log('Served by Amazon S3: ' + key);
});
stream.pipe(res);
});

Related

Stream video from Gridfs mongoDB

I'm trying to stream video by chunks , video is stored in mongodb using grid fs ,
here is the API :
videoRoute.get('/:id', (req, res) => {
GFS.findById(req.params.id, (err: any, file: any) => {
// Check if file
if (!file || file.length === 0) {
return res.status(404).json({
err: 'No file exists',
});
}
// Read output to browser
const range = req.headers.range;
if (!range) {
res.status(400).send('Requires Range header');
} else {
// Create response headers
const start = Number(range.replace(/\D/g, ''));
const end = file.length - 1;
const contentLength = end - start + 1;
const headers = {
'Content-Range': `bytes ${start}-${end}/${file.length}`,
'Accept-Ranges': 'bytes',
'Content-Length': contentLength,
};
// HTTP Status 206 for Partial Content
res.writeHead(206, headers);
const bucket = new mongoose.mongo.GridFSBucket(mongoose.connection.db, {
bucketName: 'uploads',
});
const readstream = bucket.openDownloadStream(file._id, {
start,
end,
});
readstream.pipe(res);
}
});
});
When I use only the final part ( readStream and pipe it ) , the video works fine but it starts playing until it fully loads , but in order to implement the loading by chunks I had to do the other stuff but it doesn't work , here's what I get :
Click to see image

Not going into AWS HttpClient.handleRequest to elasticsearch in lambda, Nodejs

I know this same question was basically asked and answered, however, trying to implement the answer did not get it to work. Here is the original question: AWS.HttpClient handleRequest is not working in AWS lambda
I tried putting async/await on multiple different portions of the request, but none of them worked as mentioned in one of the comments in the referred to link.
The situation is that I have a lambda function that listens for events in the S3 buckets, when an event happens it is supposed to index the documents in elasticsearch service. The issue happens when a the PUT request is sent to es.
I have done the test event with an S3 bucket and it WORKS, but for some reason it will hang/not go into the handleRequest function when I run an actual event to my S3 bucket.
Here is my code:
Index.js
const AWS = require('aws-sdk');
const s3 = new AWS.S3()
const elastic_client = require('elastic.js');
exports.handler = async (event, context) => {
const Bucket = event.Records[0].s3.bucket.name;
const Key = event.Records[0].s3.object.key;
const data = await s3.getObject({ Bucket, Key }).promise();
for (const quote_doc of data.Body) {
elastic_client.indexQuote(quote_doc);
}
}
elastic.js
var AWS = require('aws-sdk');
require('dotenv').config();
var region = process.env.AWS_REGION;
var domain = process.env.AWS_ELASTIC_DOMAIN;
function indexQuote(quote) {
var endpoint = new AWS.Endpoint(domain);
var request = new AWS.HttpRequest(endpoint, region);
var index = 'quotes';
var type = '_doc';
var id = quote.QuoteId;
request.method = 'PUT';
request.path += index + '/' + type + '/' + id;
request.body = JSON.stringify(quote);
request.headers['host'] = domain;
request.headers['Content-Type'] = 'application/json';
request.headers['Content-Length'] = Buffer.byteLength(request.body);
var credentials = new AWS.EnvironmentCredentials('AWS');
credentials.accessKeyId = process.env.AWS_ACCESS_KEY_ID;
credentials.secretAccessKey = process.env.AWS_SECRET_ACCESS_KEY;
var signer = new AWS.Signers.V4(request, 'es');
signer.addAuthorization(credentials, new Date());
var client = new AWS.HttpClient();
client.handleRequest(request, null, function(response) { // Here is where it gets hung up
console.log(response.statusCode + ' ' + response.statusMessage); // Never outputs this
var responseBody = '';
response.on('data', function (chunk) {
responseBody += chunk;
});
response.on('end', function (chunk) {
console.log('Response body: ' + responseBody);
});
}, function(error) {
console.log('Error: ' + error);
});
}
The confusing part for me is that it works fine when i do a test event, and it works fine when I index it locally on my own computer, but then just doesn't go into the handleRequest. Any help/direction is appreciated, thank you.
Edit:
package.json
{
"dependencies": {
"aws-sdk": "*",
"aws-xray-sdk": "^3.2.0",
"dotenv": "^8.2.0"
}
}
Try wrapping the handleRequest function inside a Promise. Your function indexQuote() would look almost the same, but at the end it would return a Promise
function indexQuote(quote) {
...
return new Promise((resolve, reject) => {
client.handleRequest(request, null,
response => {
const { statusCode, statusMessage, headers } = response;
let body = '';
response.on('data', chunk => {
body += chunk;
});
response.on('end', () => {
const data = {
statusCode,
statusMessage,
headers
};
if (body) {
data.body = body;
}
resolve(data);
});
},
err => {
reject(err);
});
});
And then you can await and inspect the result:
const result = await indexQuote(quote);
console.log("Index result: " + result);

Promises or Async with Node js

I have this large amount of code which gets an image from a S3 bucket, saves it to a temporary file on Lambda, resizes it to 4 different sizes, saves it into different folders according to size and them puts the images back into the s3 bucket also into different folders.
However when running on Lambda, I have to call context.done() at the end of the whole process otherwise the context will remain alive until Lambda times out.
So I need to call context.done() when upload returns for the last time.
Looking into the two options, async and promises, which would likely need less refactoring of my code to work?
// dependencies
var AWS = require('aws-sdk');
var gm = require('gm').subClass({ imageMagick: true });
var fs = require("fs");
// get reference to S3 client
var s3 = new AWS.S3();
var _800px = {
width: 800,
destinationPath: "large"
};
var _500px = {
width: 500,
destinationPath: "medium"
};
var _200px = {
width: 200,
destinationPath: "small"
};
var _45px = {
width: 45,
destinationPath: "thumbnail"
};
var _sizesArray = [_800px, _500px, _200px, _45px];
var len = _sizesArray.length;
module to be exported when in production
ports.AwsHandler = function(event, context) {
// Read options from the event.
var srcBucket = event.Records[0].s3.bucket.name;
var srcKey = event.Records[0].s3.object.key;
var dstnFolder = "/tmp";
// function to determine paths
function _filePath (directory, i) {
if ( directory === false ) {
return "dst/" + _sizesArray[i].destinationPath + "/" + srcKey;
} else if ( directory === true ) {
return dstnFolder + "/" + _sizesArray[i].destinationPath + "/" + srcKey;
}
};
for ( var i = 0; i<len; i++) {
fs.mkdir("/tmp" + "/" + _sizesArray[i].destinationPath, function (err) {
if (err) {
console.log(err);
}
});
};
// Infer the image type.
var typeMatch = srcKey.match(/\.([^.]*)$/);
if (!typeMatch) {
console.error('unable to infer image type for key ' + srcKey);
return;
};
var imageType = typeMatch[1];
if (imageType != "jpg" && imageType != "png") {
console.log('skipping non-image ' + srcKey);
return;
};
function download () {
s3.getObject({
Bucket: srcBucket,
Key: srcKey
},
function (err, response) {
if (err) {
console.error(err);
}
fs.writeFile("/tmp" + "/" + srcKey, response.Body, function (err) {
transform();
})
}
);
};
function transform () {
var _Key,
_Size;
for ( var i = 0; i<len; i++ ) {
// define path for image write
_Key = _filePath (true, i);
// define sizes to resize to
_Size = _sizesArray[i].width;
// resize images
gm("/tmp/" + srcKey)
.resize(_Size)
.write(_Key, function (err) {
if (err) {
return handle(err);
}
if (!err) {
// get the result of write
var readPath = this.outname;
var iniPath = this.outname.slice(4);
var writePath = "dst".concat(iniPath);
read(err, readPath, writePath, upload);
}
});
};
};
function read (err, readPath, writePath, callback) {
// read file from temp directory
fs.readFile(readPath, function (err, data) {
if (err) {
console.log("NO READY FILE FOR YOU!!!");
console.error(err);
}
callback(data, writePath);
});
};
function upload (data, path) {
// upload images to s3 bucket
s3.putObject({
Bucket: srcBucket,
Key: path,
Body: data,
ContentType: data.type
},
function (err) {
if (err) {
console.error(err);
}
console.log("Uploaded with success!");
});
}
download();
Take a look at how they use Q in this example.
Your code will end up very similar to
download()
.then(transform)
.then(read)
.then(upload)
.catch(function (error) {
// Handle any error from all above steps
console.error(error);
})
.done(function() {
console.log('Finished processing image');
context.done();
});
You could also take a look to async and use it as they show in this other example.

Nodejs - testing AWS with Mocha

I'm having trouble writing the tests for the following nodejs code which uses AWS and graphicsmagick. I have tried to search for examples also on how to write tests for async's waterfall method but without any definite results.
// dependencies
var async = require('async');
var AWS = require('aws-sdk');
var gm = require('gm').subClass({ imageMagick: true });
var util = require('util');
// get reference to S3 client
var s3 = new AWS.S3();
exports.AwsHandler = function(event, context) {
// Read options from the event.
console.log("Reading options from event:\n", util.inspect(event, {depth: 5}));
var srcBucket = event.Records[0].s3.bucket.name;
var srcKey = event.Records[0].s3.object.key;
var dstnKey = srcKey;
// Infer the image type.
var typeMatch = srcKey.match(/\.([^.]*)$/);
if (!typeMatch) {
console.error('unable to infer image type for key ' + srcKey);
return;
}
var imageType = typeMatch[1];
if (imageType != "jpg" && imageType != "png") {
console.log('skipping non-image ' + srcKey);
return;
}
//Download the image from S3, transform, and upload to same S3 bucket but different folders.
async.waterfall([
function download(next) {
// Download the image from S3 into a buffer.
s3.getObject({
Bucket: srcBucket,
Key: srcKey
},
next);
},
function transformSave(response, next) {
var _buffer = null;
for (var i = 0; i<len; i++) {
// Transform the image buffer in memory.
gm(response.Body, srcKey)
.resize(_sizesArray[i].width)
.toBuffer(imageType, function(err, buffer) {
if (err) {
next(err);
} else {
console.log(buffer);
_buffer = buffer;
}
});
// put newly resized image into respective folder
s3.putObject({
Bucket: srcBucket,
Key: "dst/" + _sizesArray[i].destinationPath + "/" + dstnKey,
Body: _buffer,
ContentType: response.ContentType
}, next);
}
},
], function (err) {
if (err) {
console.error(
'---->Unable to resize ' + srcBucket + '/' + srcKey +
' and upload to ' + srcBucket + '/dst' +
' due to an error: ' + err
);
} else {
console.log(
'---->Successfully resized ' + srcBucket +
' and uploaded to ' + srcBucket + "/dst"
);
}
context.done();
}
);
};
My tests for this module so far:
require('blanket')({
pattern: function (filename) {
return !/node_modules/.test(filename);
}
});
// in terminal, type the following command to get code coverage: mocha -R html-cov > coverage.html
var chai = require('chai');
var sinonChai = require("sinon-chai");
var expect = chai.expect;
var sinon = require('sinon');
chai.use(sinonChai);
var sync = require("async");
var proxyquire = require('proxyquire');
describe('Image Resizing module', function () {
var gmSubclassStub = sinon.stub();
var getObjectStub = sinon.stub();
var putObjectSpy = sinon.spy();
var testedModule = proxyquire('../index', {
'gm': {subClass: sinon.stub().returns(gmSubclassStub)},
'AWS': {
"s3": {
getObject: sinon.stub().returns(getObjectStub),
putObject: putObjectSpy
}
}
});
describe('AwsHandler', function () {
var event = {
"Records": [
{
"s3": {
"bucket": {
"name": "testbucket"
},
"object": {
"key": "test.jpg"
}
}
}
]
};
it("should call gm write with correct files", function () {
// Arrange
// Spies are the methods you expect were actually called
var buffer800Spy = sinon.spy();
var buffer500Spy = sinon.spy();
var buffer200Spy = sinon.spy();
var buffer45Spy = sinon.spy();
// This is a stub that will return the correct spy for each iteration of the for loop
var resizeStub = sinon.stub();
resizeStub.withArgs(800).returns({toBuffer: buffer800Spy});
resizeStub.withArgs(500).returns({toBuffer: buffer500Spy});
resizeStub.withArgs(200).returns({toBuffer: buffer200Spy});
resizeStub.withArgs(45).returns({toBuffer: buffer45Spy});
// Stub is used when you just want to simulate a returned value
var nameStub = sinon.stub().yields({"name": "testbucket"});
var keyStub = sinon.stub().yields({"key": "test.jpg"});
gmSubclassStub.withArgs(event).returns({resize:resizeStub});
getObjectStub.withArgs(event).yields({name: nameStub}, {key: keyStub});
// Act - this calls the tested method
testedModule.AwsHandler(event);
// Assert
});
});
});
It's hard to respond this kind of question here; the question is not very specific and it's not an open question which can be replied with opinions, thoughts, etc.
Hence, I've created an similar implementation which solve the async.waterfall issue and provide a test which test the AwsHandler with 100% coverage.
The code is in this gist, because it's more handy and readable to be there than here.
I've also written a blog post related with this implementation
There are a few things that need to be changed:
You want to test the operation of the unit, without testing the implementation. That's why you should ignore the async in your tests (as you did).
It is just a way of implementing the method, the inner workings of the unit.
What you should test is that in given conditions, the unit gives the end result expected, in this case it's calling s3.putObject.
So you should stub everything that is external (gm and aws), and spy on the s3.putObject method, because that is the expected end result.
In your stubs you used "yield", which calls the callback function, but only if it is the first parameter.
If it's not, like in our case, you need to use "callsArgWith(index,...)" with the index of the parameter which is the callback.
The proxyquire has to have the injected modules with exactly the same name as in the code that requires them - changed 'AWS' to 'aws-sdk'
A way of checking if the stubs were injected correctly is in the debugger, put a watch on
"s3" variable, and check that it is "function proxy()" and not "function()". You can also print it to console if you're not using a debugger.
Your module is calling next in the for loop, which causes the waterfall to split into a tree with 36 calls to done(!).
Maybe you should use a different async model like map reduce. I fixed it by adding a silly condition, but that's not good code.
As a side note, you can see that the test is becoming awfully complicated.
This can be an indication that the tested code could use some separation of concerns.
For example, moving the gm operations, and the s3 operations to two separate modules can help separate things, and also make it easier to test.
Changes in the module itself, to prevent calling next 4*4 times:
function transform(response, next) {
for (var i = 0; i<len; i++) {
// Transform the image buffer in memory.
gm(response.Body, srcKey)
.resize(_sizesArray[i].width)
.toBuffer(imageType, function(err, buffer) {
if (err) {
next(err);
} else {
next(null, response.ContentType, buffer, i);
}
});
}
},
function upload(contentType, data, i, next) {
// Stream the transformed image to a different folder.
s3.putObject({
Bucket: srcBucket,
Key: "dst/" + _sizesArray[i].destinationPath + "/" + dstnKey,
Body: data,
ContentType: contentType
},
function(err) {
if (i==3) next(err);
});
}
And the test:
describe.only('Image Resizing module', function () {
var gmSubclassStub = sinon.stub();
var s3Stub = {};
var proxyquire = require('proxyquire');
var testedModule = proxyquire('../index', {
'gm': {subClass: sinon.stub().returns(gmSubclassStub)},
'aws-sdk': {"S3": sinon.stub().returns(s3Stub)}
});
describe('AwsHandler', function () {
var event = {};
// The done callback is used for async testing
it("should call gm write with correct files", function (done) {
// Arrange
var resizeStub = sinon.stub();
var buffer800Spy = sinon.stub().withArgs("jpg").callsArgWith(1, null, "800 buffer");
var buffer500Spy = sinon.stub().withArgs("jpg").callsArgWith(1, null, "500 buffer");
var buffer200Spy = sinon.stub().withArgs("jpg").callsArgWith(1, null, "200 buffer");
var buffer45Spy = sinon.stub().withArgs("jpg").callsArgWith(1, null, "45 buffer");
resizeStub.withArgs(800).returns({toBuffer: buffer800Spy});
resizeStub.withArgs(500).returns({toBuffer: buffer500Spy});
resizeStub.withArgs(200).returns({toBuffer: buffer200Spy});
resizeStub.withArgs(45).returns({toBuffer: buffer45Spy});
gmSubclassStub.withArgs("response body", "test.jpg").returns({resize: resizeStub});
s3Stub.getObject = sinon.stub()
.withArgs({name: "testbucket", key: "test.jpg"})
.callsArgWith(1, null, {
Body: "response body",
ContentType: "response content type"
});
var putObjectMock = sinon.mock();
s3Stub.putObject = putObjectMock;
putObjectMock.callsArgWith(1, null, {}); // return behaviour of the mock
putObjectMock.exactly(4); // sets expectation that it is called 4 times
// Act - this calls the tested method
testedModule.AwsHandler(event, {
done: function () {
// Assertions need to be inside callback because it is async
assert.deepEqual(putObjectMock.getCall(0).args[0], {
Bucket: "testbucket",
Key: "dst/large/test.jpg",
Body: "800 buffer",
ContentType: "response content type"
});
assert.deepEqual(putObjectMock.getCall(1).args[0], {
Bucket: "testbucket",
Key: "dst/medium/test.jpg",
Body: "500 buffer",
ContentType: "response content type"
});
assert.deepEqual(putObjectMock.getCall(2).args[0], {
Bucket: "testbucket",
Key: "dst/small/test.jpg",
Body: "200 buffer",
ContentType: "response content type"
});
assert.deepEqual(putObjectMock.getCall(3).args[0], {
Bucket: "testbucket",
Key: "dst/thumbnail/test.jpg",
Body: "45 buffer",
ContentType: "response content type"
});
// This ends the async test
done();
}
});
});
});
});

AWS S3 performance using Node.js SDK

I am trying to pushing the upload(and download) performance of my program to its limits.
I am getting about 1000Mbps when uploading 256MB files using aws's command line interface.
But I get stuck at about 600Mbps upload with the following program
if (process.argv.length < 7) {
console.log ("usage: " + process.argv [0] + " " + process.argv[1] + " <config> <region> <bucket> <key> <file>")
return -1
}
var config = process.argv[2]
var region = process.argv[3]
var bucketName = process.argv[4]
var key = process.argv[5]
var file = process.argv[6]
var multipartMap = { Parts: [] }
var uploadStartTime // = new Date()
var partSize = 1024 * 1024 * 8 // at least 5MB, specified by amazon
var partNum
var multipartParams = {
Bucket: bucketName,
Key: key,
ContentType: "binary",
StorageClass: "REDUCED_REDUNDANCY",
}
var part = 0
var maxRetry = 3
var fs = require ('fs')
var aws = require ('aws-sdk')
function upload (bucket, multipart, partParams, trial) {
var trial = trial || 1;
bucket.uploadPart (partParams, function (err, data) {
if (err) {
console.log ("failed: ", err)
if (trial < maxRetry) {
console.log ("retrying part: ", partParams.PartNumber)
upload (bucket, multipart, partParams, trial + 1)
} else {
console.log ("failed: ", err, " unable to upload part: ", partParams.PartNumber)
}
return;
}
multipartMap.Parts[this.request.params.PartNumber - 1] = {
ETag: data.ETag,
PartNumber: Number (this.request.params.PartNumber)
}
if (--partNum > 0) return;
var doneParams = {
Bucket: bucketName,
Key: key,
MultipartUpload: multipartMap,
UploadId: multipart.UploadId
}
console.log ("success")
bucket.completeMultipartUpload (doneParams, function (err, data){
if (err) {
console.log("An error occurred while completing the multipart upload");
console.log(err);
} else {
var delta = (new Date() - uploadStartTime) / 1000;
console.log('Completed upload in', delta, 'seconds');
console.log('Final upload data:', data);
}
})
})
}
var kickoffTime = new Date ()
aws.config.loadFromPath (config)
aws.config.region = region
var bucket = new aws.S3 ({params: {Bucket: bucketName}})
console.log ("filename: ", file)
buffer = fs.readFileSync (file)
partNum = Math.ceil (buffer.length / partSize) // number of parts
var totalPart = partNum
uploadStartTime = new Date ()
bucket.createMultipartUpload (multipartParams, function (err, multipart) {
if (err) {
console.log ("cannot create multipart upload: ", err)
return -1
}
for (var i = 0; i < buffer.length; i += partSize) {
++part
var end = Math.min (i + partSize, buffer.length)
var body = buffer.slice (i, end)
var partParams = {
Body: body,
Bucket: bucketName,
Key: key,
PartNumber: String (part),
UploadId: multipart.UploadId,
ContentLength: end - i
}
upload (bucket, multipart, partParams);
}
})
var kickoffTimeDelta = (new Date () - kickoffTime) / 1000
console.log ("Kickoff time: ", kickoffTimeDelta)
This program will not work for empty files, but please ignore this case. The above program is coded with reference to this.
As for downloading, the speed also stuck at about 600Mbps, here is the code
if (process.argv.length < 7) {
console.log ("usage: " + process.argv [0] + " " + process.argv1 + " ")
return -1
}
var config = process.argv[2]
var region = process.argv[3]
var bucketName = process.argv[4]
var key = process.argv[5]
var file = process.argv[6]
var fs = require ('fs')
var aws = require ('aws-sdk')
fs.readFile (config, "utf8", function (err, configFile) {
if (err) {
console.log ("Config file cannot be read: ", err)
return -1
}
aws.config = JSON.parse (configFile)
aws.config.region = region
var bucket = new aws.S3 ({params: {Bucket: bucketName}})
bucket.createBucket (function () {
var data = {Key: key}
bucket.getObject (data, function (err, fileData) {
if (err) {
console.log ("Error downloading data: ", err)
} else {
fs.writeFile (file, fileData.Body, function (err) {
if (err) {
console.log ("Error writing data: ", err)
} else {
console.log ("Successfully downloaded!")
}
})
}
})
})
})
I am new to node.js and aws sdk, is there anything missing to achieve better throughtput?
Thanks
Hmm...had a clarifying question but don't have the reputation to post as such.
How many requests per second are you seeing on both ends? If you're regularly hitting S3 with more than 100 requests per second, you'll get better performance by randomizing the start of your key name.
See this article for an explanation and some suggestions:
http://docs.aws.amazon.com/AmazonS3/latest/dev/request-rate-perf-considerations.html
Basically if you have a bunch of files with a key (subdirectory) that starts with the same characters, you can overwhelm the index partition...so for high-volume read/write activities, random keynames speed up the performance.

Resources