how to pipe to bunyan logs [duplicate] - node.js

I'm trying to stream upload a file submitted via a form directly to an Amazon S3 bucket, using aws-sdk or knox. Form handling is done with formidable.
My question is: how do I properly use formidable with aws-sdk (or knox) using each of these libraries' latest features for handling streams?
I'm aware that this topic has already been asked here in different flavors, ie:
How to receive an uploaded file using node.js formidable library and save it to Amazon S3 using knox?
node application stream file upload directly to amazon s3
Accessing the raw file stream from a node-formidable file upload (and its very useful accepted answer on overiding form.onPart())
However, I believe the answers are a bit outdated and/or off topic (ie. CORS support, which I don't wish to use for now for various reasons) and/or, most importantly, make no reference to the latest features from either aws-sdk (see: https://github.com/aws/aws-sdk-js/issues/13#issuecomment-16085442) or knox (notably putStream() or its readableStream.pipe(req) variant, both explained in the doc).
After hours of struggling, I came to the conclusion that I needed some help (disclaimer: I'm quite a newbie with streams).
HTML form:
<form action="/uploadPicture" method="post" enctype="multipart/form-data">
<input name="picture" type="file" accept="image/*">
<input type="submit">
</form>
Express bodyParser middleware is configured this way:
app.use(express.bodyParser({defer: true}))
POST request handler:
uploadPicture = (req, res, next) ->
form = new formidable.IncomingForm()
form.parse(req)
form.onPart = (part) ->
if not part.filename
# Let formidable handle all non-file parts (fields)
form.handlePart(part)
else
handlePart(part, form.bytesExpected)
handlePart = (part, fileSize) ->
# aws-sdk version
params =
Bucket: "mybucket"
Key: part.filename
ContentLength: fileSize
Body: part # passing stream object as body parameter
awsS3client.putObject(params, (err, data) ->
if err
console.log err
else
console.log data
)
However, I'm getting the following error:
{ [RequestTimeout: Your socket connection to the server was not read from or written to within the timeout period. Idle connections will be closed.]
message: 'Your socket connection to the server was not read from or written to within the timeout period. Idle connections will be closed.',
code: 'RequestTimeout',
name: 'RequestTimeout',
statusCode: 400,
retryable: false }
A knox version of handlePart() function tailored this way also miserably fails:
handlePart = (part, fileSize) ->
headers =
"Content-Length": fileSize
"Content-Type": part.mime
knoxS3client.putStream(part, part.filename, headers, (err, res) ->
if err
console.log err
else
console.log res
)
I also get a big res object with a 400 statusCode somewhere.
Region is configured to eu-west-1 in both case.
Additional notes:
node 0.10.12
latest formidable from npm (1.0.14)
latest aws-sdk from npm (1.3.1)
latest knox from npm (0.8.3)

Using AWS S3's multipartUpload (s3-upload-stream as working module) and node-formidable's readable stream, you can pipe the stream to upload like this:
var formidable = require('formidable');
var http = require('http');
var util = require('util');
var AWS = require('aws-sdk');
var config = require('./config');
var s3 = new AWS.S3({
accessKeyId: config.get('S3_ACCESS_KEY'),
secretAccessKey: config.get('S3_SECRET_KEY'),
apiVersion: '2006-03-01'
});
var s3Stream = require('s3-upload-stream')(s3);
var bucket = 'bucket-name';
var key = 'abcdefgh';
http.createServer(function(req, res) {
if (req.url == '/upload' && req.method.toLowerCase() == 'post') {
var form = new formidable.IncomingForm();
form.on('progress', function(bytesReceived, bytesExpected) {
//console.log('onprogress', parseInt( 100 * bytesReceived / bytesExpected ), '%');
});
form.on('error', function(err) {
console.log('err',err);
});
// This 'end' is for the client to finish uploading
// upload.on('uploaded') is when the uploading is
// done on AWS S3
form.on('end', function() {
console.log('ended!!!!', arguments);
});
form.on('aborted', function() {
console.log('aborted', arguments);
});
form.onPart = function(part) {
console.log('part',part);
// part looks like this
// {
// readable: true,
// headers:
// {
// 'content-disposition': 'form-data; name="upload"; filename="00video38.mp4"',
// 'content-type': 'video/mp4'
// },
// name: 'upload',
// filename: '00video38.mp4',
// mime: 'video/mp4',
// transferEncoding: 'binary',
// transferBuffer: ''
// }
var start = new Date().getTime();
var upload = s3Stream.upload({
"Bucket": bucket,
"Key": part.filename
});
// Optional configuration
//upload.maxPartSize(20971520); // 20 MB
upload.concurrentParts(5);
// Handle errors.
upload.on('error', function (error) {
console.log('errr',error);
});
upload.on('part', function (details) {
console.log('part',details);
});
upload.on('uploaded', function (details) {
var end = new Date().getTime();
console.log('it took',end-start);
console.log('uploaded',details);
});
// Maybe you could add compress like
// part.pipe(compress).pipe(upload)
part.pipe(upload);
};
form.parse(req, function(err, fields, files) {
res.writeHead(200, {'content-type': 'text/plain'});
res.write('received upload:\n\n');
res.end(util.inspect({fields: fields, files: files}));
});
return;
}
// show a file upload form
res.writeHead(200, {'content-type': 'text/html'});
res.end(
'<form action="/upload" enctype="multipart/form-data" method="post">'+
'<input type="text" name="title"><br>'+
'<input type="file" name="upload" multiple="multiple"><br>'+
'<input type="submit" value="Upload">'+
'</form>'
);
}).listen(8080);

Well, according to the creator of Formidable, direct streaming to Amazon S3 is impossible :
The S3 API requires you to provide the size of new files when creating them. This information is not available for multipart/form-data files until they have been fully received. This means streaming is impossible.
Indeed, form.bytesExpected refers to the size of the whole form, and not the size of the single file.
The data must therefore either hit the memory or the disk on the server first before being uploaded to S3.

Since this post is so old and I believe streaming directly is now supported, I spent a lot of time reading out of date answers on this topic...
If it helps anyone I was able to stream from the client to s3 directly without the need for installing packages:
https://gist.github.com/mattlockyer/532291b6194f6d9ca40cb82564db9d2a
The server assumes req is a stream object, in my case a File object was used in xhr(send) which will send binary data in modern browsers.
const fileUploadStream = (req, res) => {
//get "body" args from header
const { id, fn } = JSON.parse(req.get('body'));
const Key = id + '/' + fn; //upload to s3 folder "id" with filename === fn
const params = {
Key,
Bucket: bucketName, //set somewhere
Body: req, //req is a stream
};
s3.upload(params, (err, data) => {
if (err) {
res.send('Error Uploading Data: ' + JSON.stringify(err) + '\n' + JSON.stringify(err.stack));
} else {
res.send(Key);
}
});
};
Yes it breaks convention but if you look at the gist it's much cleaner than anything else I found relying on other packages.
+1 for pragmatism and thanks to #SalehenRahman for his help.

Try to add the 'ContentType' to the Upload params (https://docs.aws.amazon.com/AWSJavaScriptSDK/latest/AWS/S3.html#upload-property)
...
const params = {
Key,
Bucket: bucketName,
Body: req,
ContentType: 'image/jpg'
};
s3.upload(params, (err, data) => {
if (err) return err;
console.log(data);
});
...

Related

How do I set my images uploaded to S3 with nodejs script to display instead of download?

I have a node js script that uploads files to AWS S3 through the command line. The problem Im having is when I try to view the file in the browser it automatically downloads it.
I have done some research and most other posts point out the headers, but I have verified the headers are correct (image/png)
Additionally, when I upload the same file through the AWS console (log into AWS), I am able to view the file within the browser.
var fs = require('fs');
var path = require('path');
AWS.config.update({region: myRegion});
s3 = new AWS.S3({apiVersion: '2006-03-01'});
var uploadParams = {
Bucket: process.argv[2],
Key: '', // Key set below
Body: '', // Body set below after createReadStream
ContentType: 'image/jpeg',
ACL: 'public-read',
ContentDisposition: 'inline'
};
var file = process.argv[3];
var fileStream = fs.createReadStream(file);
fileStream.on('error', function(err) {
console.log('File Error', err);
});
uploadParams.Body = fileStream;
uploadParams.Key = path.basename(file);
s3.putObject(uploadParams, function(errBucket, dataBucket) {
if (errBucket) {
console.log("Error uploading data: ", errBucket);
} else {
console.log(dataBucket);
}
});
I get successful upload, but unable to view file in browser as it auto downloads.
You have to specify the contentDisposition as part of request headers. You can not specify it as part of request paramenters. Specify it in headers explicitly as below .
var params = {Bucket : "bucketname" , Key : "keyName" , Body : "actualData"};
s3.putObject(params).
on('build',function(req){
req.httpRequest.headers['Content-Type'] = 'application/pdf' ; // Whatever you want
req.httpRequest.headers['ContentDisposition'] = 'inline';
}).
send( function(err,data){
if(err){
console.log(err);
return res.status(400).json({sucess: false});
}else{
console.log(success);
return res.status(200).json({success: true});
}
});
Code to upload obejcts/images to s3
module.exports = function(app, models) {
var fs = require('fs');
var AWS = require('aws-sdk');
var accessKeyId = "ACESS KEY HERE";
var secretAccessKey = "SECRET KEY HERE";
AWS.config.update({
accessKeyId: accessKeyId,
secretAccessKey: secretAccessKey
});
var s3 = new AWS.S3();
app.post('/upload', function(req, res){
var params = {
Bucket: 'bucketname',
Key: 'keyname.png',
Body: "GiveSomeRandomWordOraProperBodyIfYouHave"
};
s3.putObject(params, function (perr, pres) {
if (perr) {
console.log("Error uploading data: ", perr);
} else {
console.log("Successfully uploaded data to myBucket/myKey");
}
});
});
}
The above code will make sure the object has been uploaded to s3. You cab see it listed in s3 bucket in the browser but you cant view its contents in s3 bucket.
You can not view items within S3. S3 is a storage box. you can only download and upload elements in it. If you need to view the contents you would have to download and view it in the browser or any explorer of your choice. If you simply need to list the objects in s3. Use the below code.
Code to list objects of s3
var AWS = require('aws-sdk');
AWS.config.update({accessKeyId: 'mykey', secretAccessKey: 'mysecret', region: 'myregion'});
var s3 = new AWS.S3();
var params = {
Bucket: 'bucketName',
Delimiter: '/',
Prefix: 's/prefix/objectPath/'
}
s3.listObjects(params, function (err, data) {
if(err)throw err;
console.log(data);
});
Use S3 list to list the elements of S3. This way you can view them. Create a hyperlink for each of the listed item and make it point to s3 download url. This way you can view in the browser and also download it if you need.
In case if you need to view the contents of it via node JS, use the code below to load the image as if you are loading it from a remote URL.
Code to Download contents:
var fs = require('fs'),
request = require('request');
var download = function(uri, filename, callback){
request.head(uri, function(err, res, body){
console.log('content-type:', res.headers['content-type']);
console.log('content-length:', res.headers['content-length']);
request(uri).pipe(fs.createWriteStream(filename)).on('close', callback);
});
};
download('httpo://s3/URL' 'name.png', function(){
console.log('done');
});
Code to load image into a buffer :
const request = require('request');
let url = 'http://s3url/image.png';
request({ url, encoding: null }, (err, resp, buffer) => {
// typeof buffer === 'object'
// Use the buffer
// This buffer will now contains the image data
});
Use the above to load the image into a buffer. Once its in buffer, you can manipulate it the way you need. The above code wont downloads the image but it help you to manipuate the image in s3 using a buffer.
Contains Example Code. The link will contain Specific Node JS code examples for uploading and Manipulating objects of s3. use it for reference.

node.js - streaming upload to cloud storage (busboy, request)

I'm new to node.js. What I'm trying to do is to stream the upload of a file from web browser to a cloud storage through my node.js server.
I'm using 'express', 'request' and 'busboy' modules.
var express = require("express");
var request = require("request");
var BusBoy = require("busboy");
var router = express.Router();
router.post("/upload", function(req, res, next) {
var busboy = new BusBoy({ headers: req.headers });
var json = {};
busboy.on("file", function (fieldname, file, filename, encoding, mimetype) {
file.on("data", function(data) {
console.log(`streamed ${data.length}`);
});
file.on("end", function() {
console.log(`finished streaming ${filename}`);
});
var r = request({
url: "http://<my_cloud_storage_api_url>",
method: "POST",
headers: {
"CUSTOM-HEADER": "Hello",
},
formData: {
"upload": file
}
}, function(err, httpResponse, body) {
console.log("uploaded");
json.response = body;
});
});
busboy.on("field", function(name, val) {
console.log(`name: ${name}, value: ${value}`);
});
busboy.on("finish", function() {
res.send(json);
});
req.pipe(busboy);
});
module.exports = router;
But I keep getting the following error on the server. What am I doing wrong here? Any help is appreciated.
Error: Part terminated early due to unexpected end of multipart data
at node_modules\busboy\node_modules\dicer\lib\Dicer.js:65:36
at nextTickCallbackWith0Args (node.js:420:9)
at process._tickCallback (node.js:349:13)
I realize this question is some 7 months old, but I shall answer it here in an attempt help anyone else currently banging their head against this.
You have two options, really: Add the file size, or use something other than Request.
Note: I edited this shortly after first posting it to hopefully provide a bit more context.
Using Something Else
There are some alternatives you can use instead of Request if you don't need all the baked in features it has.
form-data can be used by itself in simple cases, or it can be used with, say, got. request uses this internally.
bhttp advertises Streams2+ support, although in my experience Streams2+ support has not been an issue for me. No built in https support, you have to specify a custom agent
got another slimmed down one. Doesn't have any special handling of form data like request does, but is trivially used with form-data or form-data2. I had trouble getting it working over a corporate proxy, though, but that's likely because I'm a networking newb.
needle seems pretty light weight, but I haven't actually tried it.
Using Request: Add the File Size
Request does not (as of writing) have any support for using transfer-encoding: chunked so to upload files with it, you need to add the file's size along with the file, which if you're uploading from a web client means that client needs to send that file size to your server in addition to the file itself.
The way I came up with to do this is to send the file metadata in its own field before the file field.
I modified your example with comments describing what I did. Note that I did not include any validation of the data received, but I recommend you do add that.
var express = require("express");
var request = require("request");
var BusBoy = require("busboy");
var router = express.Router();
router.post("/upload", function(req, res, next) {
var busboy = new BusBoy({ headers: req.headers });
var json = {};
// Use this to cache any fields which are file metadata.
var fileMetas = {};
busboy.on("file", function (fieldname, file, filename, encoding, mimetype) {
// Be sure to match this prop name here with the pattern you use to detect meta fields.
var meta = fileMetas[fieldname + '.meta'];
if (!meta) {
// Make sure to dump the file.
file.resume();
// Then, do some sort of error handling here, because you cannot upload a file
// without knowing it's length.
return;
}
file.on("data", function(data) {
console.log(`streamed ${data.length}`);
});
file.on("end", function() {
console.log(`finished streaming ${filename}`);
});
var r = request({
url: "http://<my_cloud_storage_api_url>",
method: "POST",
headers: {
"CUSTOM-HEADER": "Hello",
},
formData: {
// value + options form of a formData field.
"upload": {
value: file,
options: {
filename: meta.name,
knownLength: meta.size
}
}
}
}, function(err, httpResponse, body) {
console.log("uploaded");
json.response = body;
});
});
busboy.on("field", function(name, val) {
// Use whatever pattern you want. I used (fileFieldName + ".meta").
// Another good one might be ("meta:" + fileFieldName).
if (/\.meta$/.test(name)) {
// I send an object with { name, size, type, lastModified },
// which are just the public props pulled off a File object.
// Note: Should probably add error handling if val is somehow not parsable.
fileMetas[name] = JSON.parse(val);
console.log(`file metadata: name: ${name}, value: ${value}`);
return;
}
// Otherwise, process field as normal.
console.log(`name: ${name}, value: ${value}`);
});
busboy.on("finish", function() {
res.send(json);
});
req.pipe(busboy);
});
module.exports = router;
On the client, you need to then send the metadata on the so-named field before the file itself. This can be done by ordering an <input type="hidden"> control before the file and updating its value onchange. The order of values sent is guaranteed to follow the order of inputs in appearance. If you're building the request body yourself using FormData, you can do this by appending the appropriate metadata before appending the File.
Example with <form>
<script>
function extractFileMeta(file) {
return JSON.stringify({
size: file.size,
name: file.name,
type: file.type,
lastUpdated: file.lastUpdated
});
}
function onFileUploadChange(event) {
// change this to use arrays if using the multiple attribute on the file input.
var file = event.target.files[0];
var fileMetaInput = document.querySelector('input[name=fileUpload.meta]');
if (fileMetaInput) {
fileMetaInput.value = extractFileMeta(file);
}
}
</script>
<form action="/upload-to-cloud">
<input type="hidden" name="fileUpload.meta">
<input type="file" name="fileUpload" onchange="onFileUploadChange(event)">
</form>
Example with FormData:
function onSubmit(event) {
event.preventDefault();
var form = document.getElementById('my-upload-form');
var formData = new FormData();
var fileUpload = form.elements['fileUpload'];
var fileUploadMeta = JSON.stringify({
size: fileUpload.size,
name: fileUpload.name,
type: fileUpload.type,
lastUpdated: fileUpload.lastUpdated
});
// Append fileUploadMeta BEFORE fileUpload.
formData.append('fileUpload.meta', fileUploadMeta);
formData.append('fileUpload', fileUpload);
// Do whatever you do to POST here.
}

Streaming file from S3 with Express including information on length and filetype

Using the aws-sdk module and Express 4.13, it's possible to proxy a file from S3 a number of ways.
This callback version will return the file body as a buffer, plus other relevant headers like Content-Length:
function(req,res){
var s3 = new AWS.S3();
s3.getObject({Bucket: myBucket, Key: myFile},function(err,data){
if (err) {
return res.status(500).send("Error!");
}
// Headers
res.set("Content-Length",data.ContentLength)
.set("Content-Type",data.ContentType);
res.send(data.Body); // data.Body is a buffer
});
}
The problem with this version is that you have to get the entire file before sending it, which is not great, especially if it's something large like a video.
This version will directly stream the file:
function(req,res){
var s3 = new AWS.S3();
s3.getObject({Bucket: myBucket, Key: myFile})
.createReadStream()
.pipe(res);
}
But unlike the first one, it won't do anything about the headers, which a browser might need to properly handle the file.
Is there a way to get the best of both worlds, passing through the correct headers from S3 but sending the file as a stream? It could be done by first making a HEAD request to S3 to get the metadata, but can it be done with one API call?
One approach is listening the httpHeaders event and creating a stream within it.
s3.getObject(params)
.on('httpHeaders', function (statusCode, headers) {
res.set('Content-Length', headers['content-length']);
res.set('Content-Type', headers['content-type']);
this.response.httpResponse.createUnbufferedStream()
.pipe(res);
})
.send();
For my project, I simply do a headObject in order to retrieve the object metadata only (it's really fast and avoid to download the object). Then I add in the response all the headers I need to propagate for the piping:
var s3 = new AWS.S3();
var params = {
Bucket: bucket,
Key: key
};
s3.headObject(params, function (err, data) {
if (err) {
// an error occurred
console.error(err);
return next();
}
var stream = s3.getObject(params).createReadStream();
// forward errors
stream.on('error', function error(err) {
//continue to the next middlewares
return next();
});
//Add the content type to the response (it's not propagated from the S3 SDK)
res.set('Content-Type', mime.lookup(key));
res.set('Content-Length', data.ContentLength);
res.set('Last-Modified', data.LastModified);
res.set('ETag', data.ETag);
stream.on('end', () => {
console.log('Served by Amazon S3: ' + key);
});
//Pipe the s3 object to the response
stream.pipe(res);
});
Building on André Werlang's answer, we have done the following to augment AWS Request objects with a forwardToExpress method:
const _ = require('lodash');
const AWS = require('aws-sdk');
AWS.Request.prototype.forwardToExpress = function forwardToExpress(res, next) {
this
.on('httpHeaders', function (code, headers) {
if (code < 300) {
res.set(_.pick(headers, 'content-type', 'content-length', 'last-modified'));
}
})
.createReadStream()
.on('error', next)
.pipe(res);
};
Then, in our route handlers, we can do something like this:
s3.getObject({Bucket: myBucket, Key: myFile}).forwardToExpress(res, next);
here an 20222 solution using AWS JS SDK and Client.
This solution is using s3client to stream the object data to the response stream 'res'. A simple call to GetObjectCommand returns only the raw data of the object but not the content-length neither tags, that is will prior to stream the object data, 1 call is done to get the object data like "ETag", "Content-Type" and 1 optional call is done to get the object tags and forward it to the UI.
It's up to you to customize it for your project.
const { S3, CreateBucketCommand, PutObjectCommand, GetObjectCommand, DeleteObjectCommand, DeleteBucketCommand, } = require("#aws-sdk/client-s3");
/**
*
* #param {*} res
* #param {string} bucketName Bucket Name
* #param {string} key Object key
* #param {number} cacheExpiration Cache expiration in ms
* #param {boolean} streamTags Forward object tags in http-headers
*/
async function streamGetObject(res, bucketName, key, cacheExpiration, streamTags) {
try {
const params = {
Bucket: bucketName,
Key: key,
};
// Head the object to get classic the bare minimum http-headers information
const headResponse = await s3Client.send(new HeadObjectCommand(params));
res.set({
"Content-Length": headResponse.ContentLength,
"Content-Type": headResponse.ContentType,
"ETag": headResponse.ETag,
});
// Get the object taggings (optional)
if (streamTags === true) {
const taggingResponse = await s3Client.send(new GetObjectTaggingCommand(params));
taggingResponse.TagSet.forEach((tag) => {
res.set("X-TAG-" + tag.Key, tag.Value);
});
}
// Prepare cache headers
if (typeof cacheExpiration === "number") {
res.setHeader("Cache-Control", "public, max-age=" + cacheExpiration / 1000);
res.setHeader("Expires", new Date(Date.now() + cacheExpiration).toUTCString());
} else {
res.setHeader("Pragma", "no-cache");
res.setHeader("Cache-Control", "no-cache");
res.setHeader("Expires", 0);
}
// Now get the object data and stream it
const response = await s3Client.send(new GetObjectCommand(params));
const stream = response.Body;
stream.on("data", (chunk) => res.write(chunk));
stream.once("end", () => {
res.end();
});
stream.once("error", () => {
res.end();
});
} catch (err) {
console.log("Error", err);
throw err;
}
}

Uploading images to S3 using NodeJS and Multer. How to upload whole file onFileUploadComplete

I'm using NodeJS and Multer to upload files to S3.
On the surface, everything appears to be working, the files get uploaded, and I can see them in the bucket when I log into the AWS console. However, most of the time when I follow the link to the file, the file is broken, often the file size is much smaller than the original file.
When the file reaches the server, the file size is correct if I log it, but on S3 it is much smaller. For example I just uploaded a file which is 151kb. The post request logs the file size correctly, but on S3 the file says it's 81kb.
Client side:
uploadFile = (file) ->
formData = new FormData()
formData.append 'file', file
xhr = new XMLHttpRequest()
xhr.open "POST", "/upload-image", true
# xhr.setRequestHeader("Content-Type","multipart/form-data");
console.log 'uploadFile'
xhr.onerror = ->
alert 'Error uploading file'
xhr.onreadystatechange = ->
if xhr.readyState is 4
console.log xhr.responseText
xhr.send formData
Server:
app.use(multer({ // https://github.com/expressjs/multer
inMemory: true,
limits : { fileSize:3000000 },
rename: function (fieldname, filename) {
var time = new Date().getTime();
return filename.replace(/\W+/g, '-').toLowerCase() + '_' + time;
},
onFileUploadData: function (file, data, req, res) {
var params = {
Bucket: creds.awsBucket,
Key: file.name,
Body: data,
ACL: 'public-read'
};
var s3 = new aws.S3();
s3.putObject(params, function (perr, pres) {
if (perr) {
console.log("Error uploading data: ", perr);
} else {
console.log("Successfully uploaded data", pres);
}
});
}
}));
app.post('/upload-image', function(req, res){
if (req.files.file === undefined){
res.end("error, no file chosen");
} else if (req.files.file.truncated) {
res.end("file too large");
} else {
console.log(req.files.file.size); //logs the correct file size
var path = creds.awsPath + req.files.file.name;
res.type('text/plain');
res.write(path);
res.end();
};
});
EDIT:
Setting file.buffer to the body perma onFileUploadComplete seems to work, but I have a feeling that this isn't the proper way of doing things, and may come back to bite me later. Is this approach okay, or are there issues I should be aware of doing this?

Stream uploading file to S3 on Node.js using formidable and (knox or aws-sdk)

I'm trying to stream upload a file submitted via a form directly to an Amazon S3 bucket, using aws-sdk or knox. Form handling is done with formidable.
My question is: how do I properly use formidable with aws-sdk (or knox) using each of these libraries' latest features for handling streams?
I'm aware that this topic has already been asked here in different flavors, ie:
How to receive an uploaded file using node.js formidable library and save it to Amazon S3 using knox?
node application stream file upload directly to amazon s3
Accessing the raw file stream from a node-formidable file upload (and its very useful accepted answer on overiding form.onPart())
However, I believe the answers are a bit outdated and/or off topic (ie. CORS support, which I don't wish to use for now for various reasons) and/or, most importantly, make no reference to the latest features from either aws-sdk (see: https://github.com/aws/aws-sdk-js/issues/13#issuecomment-16085442) or knox (notably putStream() or its readableStream.pipe(req) variant, both explained in the doc).
After hours of struggling, I came to the conclusion that I needed some help (disclaimer: I'm quite a newbie with streams).
HTML form:
<form action="/uploadPicture" method="post" enctype="multipart/form-data">
<input name="picture" type="file" accept="image/*">
<input type="submit">
</form>
Express bodyParser middleware is configured this way:
app.use(express.bodyParser({defer: true}))
POST request handler:
uploadPicture = (req, res, next) ->
form = new formidable.IncomingForm()
form.parse(req)
form.onPart = (part) ->
if not part.filename
# Let formidable handle all non-file parts (fields)
form.handlePart(part)
else
handlePart(part, form.bytesExpected)
handlePart = (part, fileSize) ->
# aws-sdk version
params =
Bucket: "mybucket"
Key: part.filename
ContentLength: fileSize
Body: part # passing stream object as body parameter
awsS3client.putObject(params, (err, data) ->
if err
console.log err
else
console.log data
)
However, I'm getting the following error:
{ [RequestTimeout: Your socket connection to the server was not read from or written to within the timeout period. Idle connections will be closed.]
message: 'Your socket connection to the server was not read from or written to within the timeout period. Idle connections will be closed.',
code: 'RequestTimeout',
name: 'RequestTimeout',
statusCode: 400,
retryable: false }
A knox version of handlePart() function tailored this way also miserably fails:
handlePart = (part, fileSize) ->
headers =
"Content-Length": fileSize
"Content-Type": part.mime
knoxS3client.putStream(part, part.filename, headers, (err, res) ->
if err
console.log err
else
console.log res
)
I also get a big res object with a 400 statusCode somewhere.
Region is configured to eu-west-1 in both case.
Additional notes:
node 0.10.12
latest formidable from npm (1.0.14)
latest aws-sdk from npm (1.3.1)
latest knox from npm (0.8.3)
Using AWS S3's multipartUpload (s3-upload-stream as working module) and node-formidable's readable stream, you can pipe the stream to upload like this:
var formidable = require('formidable');
var http = require('http');
var util = require('util');
var AWS = require('aws-sdk');
var config = require('./config');
var s3 = new AWS.S3({
accessKeyId: config.get('S3_ACCESS_KEY'),
secretAccessKey: config.get('S3_SECRET_KEY'),
apiVersion: '2006-03-01'
});
var s3Stream = require('s3-upload-stream')(s3);
var bucket = 'bucket-name';
var key = 'abcdefgh';
http.createServer(function(req, res) {
if (req.url == '/upload' && req.method.toLowerCase() == 'post') {
var form = new formidable.IncomingForm();
form.on('progress', function(bytesReceived, bytesExpected) {
//console.log('onprogress', parseInt( 100 * bytesReceived / bytesExpected ), '%');
});
form.on('error', function(err) {
console.log('err',err);
});
// This 'end' is for the client to finish uploading
// upload.on('uploaded') is when the uploading is
// done on AWS S3
form.on('end', function() {
console.log('ended!!!!', arguments);
});
form.on('aborted', function() {
console.log('aborted', arguments);
});
form.onPart = function(part) {
console.log('part',part);
// part looks like this
// {
// readable: true,
// headers:
// {
// 'content-disposition': 'form-data; name="upload"; filename="00video38.mp4"',
// 'content-type': 'video/mp4'
// },
// name: 'upload',
// filename: '00video38.mp4',
// mime: 'video/mp4',
// transferEncoding: 'binary',
// transferBuffer: ''
// }
var start = new Date().getTime();
var upload = s3Stream.upload({
"Bucket": bucket,
"Key": part.filename
});
// Optional configuration
//upload.maxPartSize(20971520); // 20 MB
upload.concurrentParts(5);
// Handle errors.
upload.on('error', function (error) {
console.log('errr',error);
});
upload.on('part', function (details) {
console.log('part',details);
});
upload.on('uploaded', function (details) {
var end = new Date().getTime();
console.log('it took',end-start);
console.log('uploaded',details);
});
// Maybe you could add compress like
// part.pipe(compress).pipe(upload)
part.pipe(upload);
};
form.parse(req, function(err, fields, files) {
res.writeHead(200, {'content-type': 'text/plain'});
res.write('received upload:\n\n');
res.end(util.inspect({fields: fields, files: files}));
});
return;
}
// show a file upload form
res.writeHead(200, {'content-type': 'text/html'});
res.end(
'<form action="/upload" enctype="multipart/form-data" method="post">'+
'<input type="text" name="title"><br>'+
'<input type="file" name="upload" multiple="multiple"><br>'+
'<input type="submit" value="Upload">'+
'</form>'
);
}).listen(8080);
Well, according to the creator of Formidable, direct streaming to Amazon S3 is impossible :
The S3 API requires you to provide the size of new files when creating them. This information is not available for multipart/form-data files until they have been fully received. This means streaming is impossible.
Indeed, form.bytesExpected refers to the size of the whole form, and not the size of the single file.
The data must therefore either hit the memory or the disk on the server first before being uploaded to S3.
Since this post is so old and I believe streaming directly is now supported, I spent a lot of time reading out of date answers on this topic...
If it helps anyone I was able to stream from the client to s3 directly without the need for installing packages:
https://gist.github.com/mattlockyer/532291b6194f6d9ca40cb82564db9d2a
The server assumes req is a stream object, in my case a File object was used in xhr(send) which will send binary data in modern browsers.
const fileUploadStream = (req, res) => {
//get "body" args from header
const { id, fn } = JSON.parse(req.get('body'));
const Key = id + '/' + fn; //upload to s3 folder "id" with filename === fn
const params = {
Key,
Bucket: bucketName, //set somewhere
Body: req, //req is a stream
};
s3.upload(params, (err, data) => {
if (err) {
res.send('Error Uploading Data: ' + JSON.stringify(err) + '\n' + JSON.stringify(err.stack));
} else {
res.send(Key);
}
});
};
Yes it breaks convention but if you look at the gist it's much cleaner than anything else I found relying on other packages.
+1 for pragmatism and thanks to #SalehenRahman for his help.
Try to add the 'ContentType' to the Upload params (https://docs.aws.amazon.com/AWSJavaScriptSDK/latest/AWS/S3.html#upload-property)
...
const params = {
Key,
Bucket: bucketName,
Body: req,
ContentType: 'image/jpg'
};
s3.upload(params, (err, data) => {
if (err) return err;
console.log(data);
});
...

Resources