AWS Lambda using s3 getObject function nothing happening - node.js

This is the node.js code using the inline editor:
const AWS = require('aws-sdk');
const s3 = new AWS.S3();
console.log('Loading function');
exports.handler = async (event) => {
// TODO implement
var responseMsg = '';
var bucket = '';
var key = '';
if ('Records' in event) {
var s3Data = event.Records[0].s3;
console.log('s3Data: ' + JSON.stringify(s3Data));
bucket = s3Data.bucket.name;
key = s3Data.object.key;
}
console.log('Bucket:' + bucket);
console.log('Key:' + key);
var params = {
Bucket: bucket,
Key: key
};
console.log('Params:' + JSON.stringify(params));
s3.getObject(params, function (err, data) {
console.log('getObject');
if (err) {
console.log(err, err.stack);
return err;
}
responseMsg = data;
});
const response = {
statusCode: 200,
body: JSON.stringify(responseMsg),
};
return response;
};
I know that the key and bucket I'm testing with exists in my S3 console. I know that I can access the them using C# in LINQPad.
When I run this, I'm not getting any errors. I'm getting an empty string in the body of response, rather than the content of the object. I'm also not getting any log messages from within the s3.getObject.

The call to s3.getObject is an asynchronous call. The execution of the code continues while the s3 code is run. You need to explicitly await for the call's promise to resolve.
This is how you would do that (note the change in the s3.getObject call):
const AWS = require('aws-sdk');
const s3 = new AWS.S3();
exports.handler = async (event) => {
var params = {
Bucket: <bucket>,
Key: <key>,
};
const data = await s3.getObject(params).promise();
const response = {
statusCode: 200,
body: JSON.stringify(data),
};
return response;
};
You can use a try/catch block for error handling.
The important thing to understand here is the timing of the execution. The mainline code in your function is called sequentially when the lambda is invoked. The callback function that you pass to the s3.getObject call is called when a response from S3 arrives, long after your lambda had finished its execution.
Your return call is executed before the callback runs, and hence you see the result of JSON.strigify(responseMsg) where responseMsg holds the initial value you gave it, which is the empty string ''.

Related

Lambda function not pushing data into DynamoDB table

I'm running a NodeJS lambda function which is triggered by API Gateway.
My goal is to push the data and then send a status response. I think the lambda stops running before the insertData function finishes its execution, because sometimes it works but in most requests it doesn't.
Could someone lend a hand on this?
Here is my code:
// Set a table name that we can use later on
const tableName = "InterestRates"
// Load the AWS SDK for Node.js
var AWS = require('aws-sdk');
// Set the region
AWS.config.update({region: 'us-east-1'});
// Create the DynamoDB service object
var ddb = new AWS.DynamoDB({apiVersion: '2012-08-10'});
exports.handler = async (event) => {
// TODO implement
console.log(">>> Running LoanInterestRates")
if(event['resource'] == '/rates'){
if(event.httpMethod === 'POST'){
return newRate(event);
}
}
};
function insertData(params){
let status;
// Call DynamoDB to add the item to the table
ddb.putItem(params, function(err, data) {
if (err) {
status = "Error";
} else {
status = "Success";
}
});
return status
}
function newRate (event){
const newRate = JSON.parse(event.body);
var params = {
TableName: 'InterestRates',
Item: {
'termlength' : {N: newRate["length"]},
'InterestRate': {S: newRate["rate"]}
}
};
let addNewRate = insertData(params);
return {
statusCode: 200,
body: JSON.stringify({
response: addNewRate
})
}
}
I also tried using Async/Await but it didn't work.
You lambda function is Async but your code is not. You need to await the completion of your function newRate which in turn should also await the function inserData which should also await your DDB request.
I would advise you to do one of two things:
Learn how JS Async nature works and ensure you understand when you need to await.
Use a synchronous programming language like Python/Boto3 where you will not run into such issues.

NodeJS: upload image file to s3 bucket using streams [duplicate]

I'm currently making use of a node.js plugin called s3-upload-stream to stream very large files to Amazon S3. It uses the multipart API and for the most part it works very well.
However, this module is showing its age and I've already had to make modifications to it (the author has deprecated it as well). Today I ran into another issue with Amazon, and I would really like to take the author's recommendation and start using the official aws-sdk to accomplish my uploads.
BUT.
The official SDK does not seem to support piping to s3.upload(). The nature of s3.upload is that you have to pass the readable stream as an argument to the S3 constructor.
I have roughly 120+ user code modules that do various file processing, and they are agnostic to the final destination of their output. The engine hands them a pipeable writeable output stream, and they pipe to it. I cannot hand them an AWS.S3 object and ask them to call upload() on it without adding code to all the modules. The reason I used s3-upload-stream was because it supported piping.
Is there a way to make aws-sdk s3.upload() something I can pipe the stream to?
Wrap the S3 upload() function with the node.js stream.PassThrough() stream.
Here's an example:
inputStream
.pipe(uploadFromStream(s3));
function uploadFromStream(s3) {
var pass = new stream.PassThrough();
var params = {Bucket: BUCKET, Key: KEY, Body: pass};
s3.upload(params, function(err, data) {
console.log(err, data);
});
return pass;
}
A bit late answer, it might help someone else hopefully. You can return both writeable stream and the promise, so you can get response data when the upload finishes.
const AWS = require('aws-sdk');
const stream = require('stream');
const uploadStream = ({ Bucket, Key }) => {
const s3 = new AWS.S3();
const pass = new stream.PassThrough();
return {
writeStream: pass,
promise: s3.upload({ Bucket, Key, Body: pass }).promise(),
};
}
And you can use the function as follows:
const { writeStream, promise } = uploadStream({Bucket: 'yourbucket', Key: 'yourfile.mp4'});
const readStream = fs.createReadStream('/path/to/yourfile.mp4');
const pipeline = readStream.pipe(writeStream);
Now you can either check promise:
promise.then(() => {
console.log('upload completed successfully');
}).catch((err) => {
console.log('upload failed.', err.message);
});
Or using async/await:
try {
await promise;
console.log('upload completed successfully');
} catch (error) {
console.log('upload failed.', error.message);
}
Or as stream.pipe() returns stream.Writable, the destination (writeStream variable above), allowing for a chain of pipes, we can also use its events:
pipeline.on('close', () => {
console.log('upload successful');
});
pipeline.on('error', (err) => {
console.log('upload failed', err.message)
});
In the accepted answer, the function ends before the upload is complete, and thus, it's incorrect. The code below pipes correctly from a readable stream.
Upload reference
async function uploadReadableStream(stream) {
const params = {Bucket: bucket, Key: key, Body: stream};
return s3.upload(params).promise();
}
async function upload() {
const readable = getSomeReadableStream();
const results = await uploadReadableStream(readable);
console.log('upload complete', results);
}
You can also go a step further and output progress info using ManagedUpload as such:
const manager = s3.upload(params);
manager.on('httpUploadProgress', (progress) => {
console.log('progress', progress) // { loaded: 4915, total: 192915, part: 1, key: 'foo.jpg' }
});
ManagedUpload reference
A list of available events
I think it's worth updating the answer for AWS SDK v3 :).
S3 Client doesn't have upload function anymore and the #aws-sdk/lib-storage package is suggested instead as per https://github.com/aws/aws-sdk-js-v3/blob/main/lib/lib-storage/README.md
Hence the resulting snippet should look like this:
import { S3Client } from '#aws-sdk/client-s3';
import { Upload } from '#aws-sdk/lib-storage';
const stream = require('stream');
...
const client = new S3Client({
credentials: {
accessKeyId: process.env.AWS_ACCESS_KEY_ID,
secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY,
},
region: process.env.AWS_DEFAULT_REGION,
});
...
async function uploadStream(readableStream) {
const Key = 'filename.pdf';
const Bucket = 'bucket-name';
const passThroughStream = new stream.PassThrough();
let res;
try {
const parallelUploads3 = new Upload({
client,
params: {
Bucket,
Key,
Body: passThroughStream,
ACL:'public-read',
},
queueSize: 4,
partSize: 1024 * 1024 * 5,
leavePartsOnError: false,
});
readableStream.pipe(passThroughStream);
res = await parallelUploads3.done();
} catch (e) {
console.log(e);
}
return res;
}
None of the answers worked for me because I wanted to:
Pipe into s3.upload()
Pipe the result of s3.upload() into another stream
The accepted answer doesn't do the latter. The others rely on the promise api, which is cumbersome to work when working with stream pipes.
This is my modification of the accepted answer.
const s3 = new S3();
function writeToS3({Key, Bucket}) {
const Body = new stream.PassThrough();
s3.upload({
Body,
Key,
Bucket: process.env.adpBucket
})
.on('httpUploadProgress', progress => {
console.log('progress', progress);
})
.send((err, data) => {
if (err) {
Body.destroy(err);
} else {
console.log(`File uploaded and available at ${data.Location}`);
Body.destroy();
}
});
return Body;
}
const pipeline = myReadableStream.pipe(writeToS3({Key, Bucket});
pipeline.on('close', () => {
// upload finished, do something else
})
pipeline.on('error', () => {
// upload wasn't successful. Handle it
})
Type Script solution:
This example uses:
import * as AWS from "aws-sdk";
import * as fsExtra from "fs-extra";
import * as zlib from "zlib";
import * as stream from "stream";
And async function:
public async saveFile(filePath: string, s3Bucket: AWS.S3, key: string, bucketName: string): Promise<boolean> {
const uploadStream = (S3: AWS.S3, Bucket: string, Key: string) => {
const passT = new stream.PassThrough();
return {
writeStream: passT,
promise: S3.upload({ Bucket, Key, Body: passT }).promise(),
};
};
const { writeStream, promise } = uploadStream(s3Bucket, bucketName, key);
fsExtra.createReadStream(filePath).pipe(writeStream); // NOTE: Addition You can compress to zip by .pipe(zlib.createGzip()).pipe(writeStream)
let output = true;
await promise.catch((reason)=> { output = false; console.log(reason);});
return output;
}
Call this method somewhere like:
let result = await saveFileToS3(testFilePath, someS3Bucket, someKey, someBucketName);
The thing here to note in the most accepted answer above is that:
You need to return the pass in the function if you are using pipe like,
fs.createReadStream(<filePath>).pipe(anyUploadFunction())
function anyUploadFunction () {
let pass = new stream.PassThrough();
return pass // <- Returning this pass is important for the stream to understand where it needs to write to.
}
Otherwise it will silently move onto next without throwing an error or will throw an error of TypeError: dest.on is not a function depending upon how you have written the function
Following the other answers and using the latest AWS SDK for Node.js, there's a much cleaner and simpler solution since the s3 upload() function accepts a stream, using await syntax and S3's promise:
var model = await s3Client.upload({
Bucket : bucket,
Key : key,
ContentType : yourContentType,
Body : fs.createReadStream(path-to-file)
}).promise();
For those complaining that the when they use the s3 api upload function and a zero byte file ends up on s3 (#Radar155 and #gabo) - I also had this problem.
Create a second PassThrough stream and just pipe all data from the first to the second and pass the reference to that second to s3. You can do this in a couple of different ways - possibly a dirty way is to listen for the "data" event on the first stream and then write that same data to the second stream - the similarly for the "end" event - just call the end function on the second stream. I've no idea whether this is a bug in the aws api, the version of node or some other issue - but it worked around the issue for me.
Here is how it might look:
var PassThroughStream = require('stream').PassThrough;
var srcStream = new PassThroughStream();
var rstream = fs.createReadStream('Learning/stocktest.json');
var sameStream = rstream.pipe(srcStream);
// interesting note: (srcStream == sameStream) at this point
var destStream = new PassThroughStream();
// call your s3.upload function here - passing in the destStream as the Body parameter
srcStream.on('data', function (chunk) {
destStream.write(chunk);
});
srcStream.on('end', function () {
dataStream.end();
});
If it helps anyone I was able to stream from the client to s3 successfully:
https://gist.github.com/mattlockyer/532291b6194f6d9ca40cb82564db9d2a
The serverside code assumes req is a stream object, in my case it was sent from the client with file info set in the headers.
const fileUploadStream = (req, res) => {
//get "body" args from header
const { id, fn } = JSON.parse(req.get('body'));
const Key = id + '/' + fn; //upload to s3 folder "id" with filename === fn
const params = {
Key,
Bucket: bucketName, //set somewhere
Body: req, //req is a stream
};
s3.upload(params, (err, data) => {
if (err) {
res.send('Error Uploading Data: ' + JSON.stringify(err) + '\n' + JSON.stringify(err.stack));
} else {
res.send(Key);
}
});
};
Yes it breaks convention but if you look at the gist it's much cleaner than anything else I found using multer, busboy etc...
+1 for pragmatism and thanks to #SalehenRahman for his help.
If you're using AWS node SDK v3 there is dedicated module for uploading streams/blobs/buffers.
https://www.npmjs.com/package/#aws-sdk/lib-storage
I'm using KnexJS and had a problem using their streaming API. I finally fixed it, hopefully the following will help someone.
const knexStream = knex.select('*').from('my_table').stream();
const passThroughStream = new stream.PassThrough();
knexStream.on('data', (chunk) => passThroughStream.write(JSON.stringify(chunk) + '\n'));
knexStream.on('end', () => passThroughStream.end());
const uploadResult = await s3
.upload({
Bucket: 'my-bucket',
Key: 'stream-test.txt',
Body: passThroughStream
})
.promise();
Create a new stream.PassThrough() and pipe the input stream to it, then pass the passthrough instance to the body.
Check the following example:
function upload(s3, inputStream) {
const pass = new PassThrough();
inputStream.pipe(pass);
return s3.upload(
{
Bucket: 'bucket name',
Key: 'unique file name',
Body: pass,
},
{
queueSize: 4, // default concurrency
},
).promise()
.then((data) => console.log(data))
.catch((error) => console.error(error));
}
If you know the size of the stream you can use minio-js to upload the stream like this:
s3Client.putObject('my-bucketname', 'my-objectname.ogg', stream, size, 'audio/ogg', function(e) {
if (e) {
return console.log(e)
}
console.log("Successfully uploaded the stream")
})

Response from AWS lambda function called from another lambda function is always null

I have two lambda functions and I need to call a function named sendHealthData from a function named receiveHealthData. I'm using Node.JS 8.10 and the Serverless framework.
Here's the code to receiveHealthData:
const env = process.env;
const aws = require("aws-sdk");
const Lambda = new aws.Lambda();
const S3 = new aws.S3();
exports.main = (event, context, callback) => {
const params = {
FunctionName: "sendHealthData",
InvocationType: "RequestResponse",
Payload: JSON.stringify(event)
}
Lambda.invoke(params, function(error, remainingHealthData) {
if (error) {
reject(error);
}
else {
console.log("Remaining: " + remainingHealthData["Payload"]);
if (!remainingHealthData["Payload"]) {
reject(new Error("Payload is null"));
}
else {
resolve(remainingHealthData);
}
}
});
}
And this is sendHealthData:
exports.main = async (event, context, callback) => {
callback(null, "Sent Health Data!");
}
remainingHealthData["Payload"] is null everytime.
The output of console.log(JSON.stringify(remainingHealthData)) is:
{"StatusCode":200,"Payload":'null'}
When I invoke sendHealthData through serverless invoke --function sendHealthData I get the expected result: "Sent Health Data!"
I got the expected response only once: when I changed the timeout of the sendHealthData function. But the strange thing is that I changed it to a smaller value. It was 10 and I changed it to 6.
The issue is that you are using RequestResponse as InvocationType but your sendHealthData AWS Lambda doesn't return a valid JSON (just a string).
A small quote out of the documentation says:
Payload — (Buffer, Typed Array, Blob, String)
It is the JSON representation of the object returned by the Lambda function. This is present only if the invocation type is RequestResponse.
So as soon as you change the return value of your sendHealthData AWS Lambda to the following it should work as you expect:
exports.main = async (event, context, callback) => {
callback(null, {
"message": "Sent Health Data!"
});
}

Upload from writestream to S3 in node.js [duplicate]

I'm currently making use of a node.js plugin called s3-upload-stream to stream very large files to Amazon S3. It uses the multipart API and for the most part it works very well.
However, this module is showing its age and I've already had to make modifications to it (the author has deprecated it as well). Today I ran into another issue with Amazon, and I would really like to take the author's recommendation and start using the official aws-sdk to accomplish my uploads.
BUT.
The official SDK does not seem to support piping to s3.upload(). The nature of s3.upload is that you have to pass the readable stream as an argument to the S3 constructor.
I have roughly 120+ user code modules that do various file processing, and they are agnostic to the final destination of their output. The engine hands them a pipeable writeable output stream, and they pipe to it. I cannot hand them an AWS.S3 object and ask them to call upload() on it without adding code to all the modules. The reason I used s3-upload-stream was because it supported piping.
Is there a way to make aws-sdk s3.upload() something I can pipe the stream to?
Wrap the S3 upload() function with the node.js stream.PassThrough() stream.
Here's an example:
inputStream
.pipe(uploadFromStream(s3));
function uploadFromStream(s3) {
var pass = new stream.PassThrough();
var params = {Bucket: BUCKET, Key: KEY, Body: pass};
s3.upload(params, function(err, data) {
console.log(err, data);
});
return pass;
}
A bit late answer, it might help someone else hopefully. You can return both writeable stream and the promise, so you can get response data when the upload finishes.
const AWS = require('aws-sdk');
const stream = require('stream');
const uploadStream = ({ Bucket, Key }) => {
const s3 = new AWS.S3();
const pass = new stream.PassThrough();
return {
writeStream: pass,
promise: s3.upload({ Bucket, Key, Body: pass }).promise(),
};
}
And you can use the function as follows:
const { writeStream, promise } = uploadStream({Bucket: 'yourbucket', Key: 'yourfile.mp4'});
const readStream = fs.createReadStream('/path/to/yourfile.mp4');
const pipeline = readStream.pipe(writeStream);
Now you can either check promise:
promise.then(() => {
console.log('upload completed successfully');
}).catch((err) => {
console.log('upload failed.', err.message);
});
Or using async/await:
try {
await promise;
console.log('upload completed successfully');
} catch (error) {
console.log('upload failed.', error.message);
}
Or as stream.pipe() returns stream.Writable, the destination (writeStream variable above), allowing for a chain of pipes, we can also use its events:
pipeline.on('close', () => {
console.log('upload successful');
});
pipeline.on('error', (err) => {
console.log('upload failed', err.message)
});
In the accepted answer, the function ends before the upload is complete, and thus, it's incorrect. The code below pipes correctly from a readable stream.
Upload reference
async function uploadReadableStream(stream) {
const params = {Bucket: bucket, Key: key, Body: stream};
return s3.upload(params).promise();
}
async function upload() {
const readable = getSomeReadableStream();
const results = await uploadReadableStream(readable);
console.log('upload complete', results);
}
You can also go a step further and output progress info using ManagedUpload as such:
const manager = s3.upload(params);
manager.on('httpUploadProgress', (progress) => {
console.log('progress', progress) // { loaded: 4915, total: 192915, part: 1, key: 'foo.jpg' }
});
ManagedUpload reference
A list of available events
I think it's worth updating the answer for AWS SDK v3 :).
S3 Client doesn't have upload function anymore and the #aws-sdk/lib-storage package is suggested instead as per https://github.com/aws/aws-sdk-js-v3/blob/main/lib/lib-storage/README.md
Hence the resulting snippet should look like this:
import { S3Client } from '#aws-sdk/client-s3';
import { Upload } from '#aws-sdk/lib-storage';
const stream = require('stream');
...
const client = new S3Client({
credentials: {
accessKeyId: process.env.AWS_ACCESS_KEY_ID,
secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY,
},
region: process.env.AWS_DEFAULT_REGION,
});
...
async function uploadStream(readableStream) {
const Key = 'filename.pdf';
const Bucket = 'bucket-name';
const passThroughStream = new stream.PassThrough();
let res;
try {
const parallelUploads3 = new Upload({
client,
params: {
Bucket,
Key,
Body: passThroughStream,
ACL:'public-read',
},
queueSize: 4,
partSize: 1024 * 1024 * 5,
leavePartsOnError: false,
});
readableStream.pipe(passThroughStream);
res = await parallelUploads3.done();
} catch (e) {
console.log(e);
}
return res;
}
None of the answers worked for me because I wanted to:
Pipe into s3.upload()
Pipe the result of s3.upload() into another stream
The accepted answer doesn't do the latter. The others rely on the promise api, which is cumbersome to work when working with stream pipes.
This is my modification of the accepted answer.
const s3 = new S3();
function writeToS3({Key, Bucket}) {
const Body = new stream.PassThrough();
s3.upload({
Body,
Key,
Bucket: process.env.adpBucket
})
.on('httpUploadProgress', progress => {
console.log('progress', progress);
})
.send((err, data) => {
if (err) {
Body.destroy(err);
} else {
console.log(`File uploaded and available at ${data.Location}`);
Body.destroy();
}
});
return Body;
}
const pipeline = myReadableStream.pipe(writeToS3({Key, Bucket});
pipeline.on('close', () => {
// upload finished, do something else
})
pipeline.on('error', () => {
// upload wasn't successful. Handle it
})
Type Script solution:
This example uses:
import * as AWS from "aws-sdk";
import * as fsExtra from "fs-extra";
import * as zlib from "zlib";
import * as stream from "stream";
And async function:
public async saveFile(filePath: string, s3Bucket: AWS.S3, key: string, bucketName: string): Promise<boolean> {
const uploadStream = (S3: AWS.S3, Bucket: string, Key: string) => {
const passT = new stream.PassThrough();
return {
writeStream: passT,
promise: S3.upload({ Bucket, Key, Body: passT }).promise(),
};
};
const { writeStream, promise } = uploadStream(s3Bucket, bucketName, key);
fsExtra.createReadStream(filePath).pipe(writeStream); // NOTE: Addition You can compress to zip by .pipe(zlib.createGzip()).pipe(writeStream)
let output = true;
await promise.catch((reason)=> { output = false; console.log(reason);});
return output;
}
Call this method somewhere like:
let result = await saveFileToS3(testFilePath, someS3Bucket, someKey, someBucketName);
The thing here to note in the most accepted answer above is that:
You need to return the pass in the function if you are using pipe like,
fs.createReadStream(<filePath>).pipe(anyUploadFunction())
function anyUploadFunction () {
let pass = new stream.PassThrough();
return pass // <- Returning this pass is important for the stream to understand where it needs to write to.
}
Otherwise it will silently move onto next without throwing an error or will throw an error of TypeError: dest.on is not a function depending upon how you have written the function
Following the other answers and using the latest AWS SDK for Node.js, there's a much cleaner and simpler solution since the s3 upload() function accepts a stream, using await syntax and S3's promise:
var model = await s3Client.upload({
Bucket : bucket,
Key : key,
ContentType : yourContentType,
Body : fs.createReadStream(path-to-file)
}).promise();
For those complaining that the when they use the s3 api upload function and a zero byte file ends up on s3 (#Radar155 and #gabo) - I also had this problem.
Create a second PassThrough stream and just pipe all data from the first to the second and pass the reference to that second to s3. You can do this in a couple of different ways - possibly a dirty way is to listen for the "data" event on the first stream and then write that same data to the second stream - the similarly for the "end" event - just call the end function on the second stream. I've no idea whether this is a bug in the aws api, the version of node or some other issue - but it worked around the issue for me.
Here is how it might look:
var PassThroughStream = require('stream').PassThrough;
var srcStream = new PassThroughStream();
var rstream = fs.createReadStream('Learning/stocktest.json');
var sameStream = rstream.pipe(srcStream);
// interesting note: (srcStream == sameStream) at this point
var destStream = new PassThroughStream();
// call your s3.upload function here - passing in the destStream as the Body parameter
srcStream.on('data', function (chunk) {
destStream.write(chunk);
});
srcStream.on('end', function () {
dataStream.end();
});
If it helps anyone I was able to stream from the client to s3 successfully:
https://gist.github.com/mattlockyer/532291b6194f6d9ca40cb82564db9d2a
The serverside code assumes req is a stream object, in my case it was sent from the client with file info set in the headers.
const fileUploadStream = (req, res) => {
//get "body" args from header
const { id, fn } = JSON.parse(req.get('body'));
const Key = id + '/' + fn; //upload to s3 folder "id" with filename === fn
const params = {
Key,
Bucket: bucketName, //set somewhere
Body: req, //req is a stream
};
s3.upload(params, (err, data) => {
if (err) {
res.send('Error Uploading Data: ' + JSON.stringify(err) + '\n' + JSON.stringify(err.stack));
} else {
res.send(Key);
}
});
};
Yes it breaks convention but if you look at the gist it's much cleaner than anything else I found using multer, busboy etc...
+1 for pragmatism and thanks to #SalehenRahman for his help.
If you're using AWS node SDK v3 there is dedicated module for uploading streams/blobs/buffers.
https://www.npmjs.com/package/#aws-sdk/lib-storage
I'm using KnexJS and had a problem using their streaming API. I finally fixed it, hopefully the following will help someone.
const knexStream = knex.select('*').from('my_table').stream();
const passThroughStream = new stream.PassThrough();
knexStream.on('data', (chunk) => passThroughStream.write(JSON.stringify(chunk) + '\n'));
knexStream.on('end', () => passThroughStream.end());
const uploadResult = await s3
.upload({
Bucket: 'my-bucket',
Key: 'stream-test.txt',
Body: passThroughStream
})
.promise();
Create a new stream.PassThrough() and pipe the input stream to it, then pass the passthrough instance to the body.
Check the following example:
function upload(s3, inputStream) {
const pass = new PassThrough();
inputStream.pipe(pass);
return s3.upload(
{
Bucket: 'bucket name',
Key: 'unique file name',
Body: pass,
},
{
queueSize: 4, // default concurrency
},
).promise()
.then((data) => console.log(data))
.catch((error) => console.error(error));
}
If you know the size of the stream you can use minio-js to upload the stream like this:
s3Client.putObject('my-bucketname', 'my-objectname.ogg', stream, size, 'audio/ogg', function(e) {
if (e) {
return console.log(e)
}
console.log("Successfully uploaded the stream")
})

Pipe a stream to s3.upload()

I'm currently making use of a node.js plugin called s3-upload-stream to stream very large files to Amazon S3. It uses the multipart API and for the most part it works very well.
However, this module is showing its age and I've already had to make modifications to it (the author has deprecated it as well). Today I ran into another issue with Amazon, and I would really like to take the author's recommendation and start using the official aws-sdk to accomplish my uploads.
BUT.
The official SDK does not seem to support piping to s3.upload(). The nature of s3.upload is that you have to pass the readable stream as an argument to the S3 constructor.
I have roughly 120+ user code modules that do various file processing, and they are agnostic to the final destination of their output. The engine hands them a pipeable writeable output stream, and they pipe to it. I cannot hand them an AWS.S3 object and ask them to call upload() on it without adding code to all the modules. The reason I used s3-upload-stream was because it supported piping.
Is there a way to make aws-sdk s3.upload() something I can pipe the stream to?
Wrap the S3 upload() function with the node.js stream.PassThrough() stream.
Here's an example:
inputStream
.pipe(uploadFromStream(s3));
function uploadFromStream(s3) {
var pass = new stream.PassThrough();
var params = {Bucket: BUCKET, Key: KEY, Body: pass};
s3.upload(params, function(err, data) {
console.log(err, data);
});
return pass;
}
A bit late answer, it might help someone else hopefully. You can return both writeable stream and the promise, so you can get response data when the upload finishes.
const AWS = require('aws-sdk');
const stream = require('stream');
const uploadStream = ({ Bucket, Key }) => {
const s3 = new AWS.S3();
const pass = new stream.PassThrough();
return {
writeStream: pass,
promise: s3.upload({ Bucket, Key, Body: pass }).promise(),
};
}
And you can use the function as follows:
const { writeStream, promise } = uploadStream({Bucket: 'yourbucket', Key: 'yourfile.mp4'});
const readStream = fs.createReadStream('/path/to/yourfile.mp4');
const pipeline = readStream.pipe(writeStream);
Now you can either check promise:
promise.then(() => {
console.log('upload completed successfully');
}).catch((err) => {
console.log('upload failed.', err.message);
});
Or using async/await:
try {
await promise;
console.log('upload completed successfully');
} catch (error) {
console.log('upload failed.', error.message);
}
Or as stream.pipe() returns stream.Writable, the destination (writeStream variable above), allowing for a chain of pipes, we can also use its events:
pipeline.on('close', () => {
console.log('upload successful');
});
pipeline.on('error', (err) => {
console.log('upload failed', err.message)
});
In the accepted answer, the function ends before the upload is complete, and thus, it's incorrect. The code below pipes correctly from a readable stream.
Upload reference
async function uploadReadableStream(stream) {
const params = {Bucket: bucket, Key: key, Body: stream};
return s3.upload(params).promise();
}
async function upload() {
const readable = getSomeReadableStream();
const results = await uploadReadableStream(readable);
console.log('upload complete', results);
}
You can also go a step further and output progress info using ManagedUpload as such:
const manager = s3.upload(params);
manager.on('httpUploadProgress', (progress) => {
console.log('progress', progress) // { loaded: 4915, total: 192915, part: 1, key: 'foo.jpg' }
});
ManagedUpload reference
A list of available events
I think it's worth updating the answer for AWS SDK v3 :).
S3 Client doesn't have upload function anymore and the #aws-sdk/lib-storage package is suggested instead as per https://github.com/aws/aws-sdk-js-v3/blob/main/lib/lib-storage/README.md
Hence the resulting snippet should look like this:
import { S3Client } from '#aws-sdk/client-s3';
import { Upload } from '#aws-sdk/lib-storage';
const stream = require('stream');
...
const client = new S3Client({
credentials: {
accessKeyId: process.env.AWS_ACCESS_KEY_ID,
secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY,
},
region: process.env.AWS_DEFAULT_REGION,
});
...
async function uploadStream(readableStream) {
const Key = 'filename.pdf';
const Bucket = 'bucket-name';
const passThroughStream = new stream.PassThrough();
let res;
try {
const parallelUploads3 = new Upload({
client,
params: {
Bucket,
Key,
Body: passThroughStream,
ACL:'public-read',
},
queueSize: 4,
partSize: 1024 * 1024 * 5,
leavePartsOnError: false,
});
readableStream.pipe(passThroughStream);
res = await parallelUploads3.done();
} catch (e) {
console.log(e);
}
return res;
}
None of the answers worked for me because I wanted to:
Pipe into s3.upload()
Pipe the result of s3.upload() into another stream
The accepted answer doesn't do the latter. The others rely on the promise api, which is cumbersome to work when working with stream pipes.
This is my modification of the accepted answer.
const s3 = new S3();
function writeToS3({Key, Bucket}) {
const Body = new stream.PassThrough();
s3.upload({
Body,
Key,
Bucket: process.env.adpBucket
})
.on('httpUploadProgress', progress => {
console.log('progress', progress);
})
.send((err, data) => {
if (err) {
Body.destroy(err);
} else {
console.log(`File uploaded and available at ${data.Location}`);
Body.destroy();
}
});
return Body;
}
const pipeline = myReadableStream.pipe(writeToS3({Key, Bucket});
pipeline.on('close', () => {
// upload finished, do something else
})
pipeline.on('error', () => {
// upload wasn't successful. Handle it
})
Type Script solution:
This example uses:
import * as AWS from "aws-sdk";
import * as fsExtra from "fs-extra";
import * as zlib from "zlib";
import * as stream from "stream";
And async function:
public async saveFile(filePath: string, s3Bucket: AWS.S3, key: string, bucketName: string): Promise<boolean> {
const uploadStream = (S3: AWS.S3, Bucket: string, Key: string) => {
const passT = new stream.PassThrough();
return {
writeStream: passT,
promise: S3.upload({ Bucket, Key, Body: passT }).promise(),
};
};
const { writeStream, promise } = uploadStream(s3Bucket, bucketName, key);
fsExtra.createReadStream(filePath).pipe(writeStream); // NOTE: Addition You can compress to zip by .pipe(zlib.createGzip()).pipe(writeStream)
let output = true;
await promise.catch((reason)=> { output = false; console.log(reason);});
return output;
}
Call this method somewhere like:
let result = await saveFileToS3(testFilePath, someS3Bucket, someKey, someBucketName);
The thing here to note in the most accepted answer above is that:
You need to return the pass in the function if you are using pipe like,
fs.createReadStream(<filePath>).pipe(anyUploadFunction())
function anyUploadFunction () {
let pass = new stream.PassThrough();
return pass // <- Returning this pass is important for the stream to understand where it needs to write to.
}
Otherwise it will silently move onto next without throwing an error or will throw an error of TypeError: dest.on is not a function depending upon how you have written the function
Following the other answers and using the latest AWS SDK for Node.js, there's a much cleaner and simpler solution since the s3 upload() function accepts a stream, using await syntax and S3's promise:
var model = await s3Client.upload({
Bucket : bucket,
Key : key,
ContentType : yourContentType,
Body : fs.createReadStream(path-to-file)
}).promise();
For those complaining that the when they use the s3 api upload function and a zero byte file ends up on s3 (#Radar155 and #gabo) - I also had this problem.
Create a second PassThrough stream and just pipe all data from the first to the second and pass the reference to that second to s3. You can do this in a couple of different ways - possibly a dirty way is to listen for the "data" event on the first stream and then write that same data to the second stream - the similarly for the "end" event - just call the end function on the second stream. I've no idea whether this is a bug in the aws api, the version of node or some other issue - but it worked around the issue for me.
Here is how it might look:
var PassThroughStream = require('stream').PassThrough;
var srcStream = new PassThroughStream();
var rstream = fs.createReadStream('Learning/stocktest.json');
var sameStream = rstream.pipe(srcStream);
// interesting note: (srcStream == sameStream) at this point
var destStream = new PassThroughStream();
// call your s3.upload function here - passing in the destStream as the Body parameter
srcStream.on('data', function (chunk) {
destStream.write(chunk);
});
srcStream.on('end', function () {
dataStream.end();
});
If it helps anyone I was able to stream from the client to s3 successfully:
https://gist.github.com/mattlockyer/532291b6194f6d9ca40cb82564db9d2a
The serverside code assumes req is a stream object, in my case it was sent from the client with file info set in the headers.
const fileUploadStream = (req, res) => {
//get "body" args from header
const { id, fn } = JSON.parse(req.get('body'));
const Key = id + '/' + fn; //upload to s3 folder "id" with filename === fn
const params = {
Key,
Bucket: bucketName, //set somewhere
Body: req, //req is a stream
};
s3.upload(params, (err, data) => {
if (err) {
res.send('Error Uploading Data: ' + JSON.stringify(err) + '\n' + JSON.stringify(err.stack));
} else {
res.send(Key);
}
});
};
Yes it breaks convention but if you look at the gist it's much cleaner than anything else I found using multer, busboy etc...
+1 for pragmatism and thanks to #SalehenRahman for his help.
If you're using AWS node SDK v3 there is dedicated module for uploading streams/blobs/buffers.
https://www.npmjs.com/package/#aws-sdk/lib-storage
I'm using KnexJS and had a problem using their streaming API. I finally fixed it, hopefully the following will help someone.
const knexStream = knex.select('*').from('my_table').stream();
const passThroughStream = new stream.PassThrough();
knexStream.on('data', (chunk) => passThroughStream.write(JSON.stringify(chunk) + '\n'));
knexStream.on('end', () => passThroughStream.end());
const uploadResult = await s3
.upload({
Bucket: 'my-bucket',
Key: 'stream-test.txt',
Body: passThroughStream
})
.promise();
Create a new stream.PassThrough() and pipe the input stream to it, then pass the passthrough instance to the body.
Check the following example:
function upload(s3, inputStream) {
const pass = new PassThrough();
inputStream.pipe(pass);
return s3.upload(
{
Bucket: 'bucket name',
Key: 'unique file name',
Body: pass,
},
{
queueSize: 4, // default concurrency
},
).promise()
.then((data) => console.log(data))
.catch((error) => console.error(error));
}
If you know the size of the stream you can use minio-js to upload the stream like this:
s3Client.putObject('my-bucketname', 'my-objectname.ogg', stream, size, 'audio/ogg', function(e) {
if (e) {
return console.log(e)
}
console.log("Successfully uploaded the stream")
})

Resources