Upload file using NodeJS and BusBoy - node.js

I am uploading a file using NodeJS. My requirement is to read the stream into a variable so that I can store that into AWS SQS. I do not want to store the file on disk. Is this possible? I only need the uploaded file into stream. The code I am using is(upload.js):
var http = require('http');
var Busboy = require('busboy');
module.exports.UploadImage = function (req, res, next) {
var busboy = new Busboy({ headers: req.headers });
// Listen for event when Busboy finds a file to stream.
busboy.on('file', function (fieldname, file, filename, encoding, mimetype) {
// We are streaming! Handle chunks
file.on('data', function (data) {
// Here we can act on the data chunks streamed.
});
// Completed streaming the file.
file.on('end', function (stream) {
//Here I need to get the stream to send to SQS
});
});
// Listen for event when Busboy finds a non-file field.
busboy.on('field', function (fieldname, val) {
// Do something with non-file field.
});
// Listen for event when Busboy is finished parsing the form.
busboy.on('finish', function () {
res.statusCode = 200;
res.end();
});
// Pipe the HTTP Request into Busboy.
req.pipe(busboy);
};
How do I get the uploaded stream?

On busboy 'file' event you get parameter named 'file' and this is a stream so you can pipe it.
For example
busboy.on('file', function (fieldname, file, filename, encoding, mimetype) {
file.pipe(streamToSQS)
}

I hope that will help you.
busboy.on('file', function (fieldname, file, filename, encoding, mimetype) {
var filename = "filename";
s3Helper.pdfUploadToS3(file, filename);
}
busboy.on('finish', function () {
res.status(200).json({ 'message': "File uploaded successfully." });
});
req.pipe(busboy);

While the current and existing arguments assume one could actually just send the stream (file) off to something that can receive the stream, the actual chunks are received in the file callback methods you implemented.
From the docs: (https://www.npmjs.com/package/busboy)
file.on('data', function(data) {
// data.length bytes seems to indicate a chunk
console.log('File [' + fieldname + '] got ' + data.length + ' bytes');
});
file.on('end', function() {
console.log('File [' + fieldname + '] Finished');
});
Update:
Found the constructor docs, second argument is a readable stream.
file(< string >fieldname, < ReadableStream >stream, < string >filename, < string >transferEncoding, < string >mimeType) - Emitted for each new file form field found. transferEncoding contains the 'Content-Transfer-Encoding' value for the file stream. mimeType contains the 'Content-Type' value for the file stream.

Related

What does calling busboy.end(req.rawBody) do?

I'm working on an image upload using express and busboy. It currently works, but if I remove the last line busboy.end(req.rawBody), the code will eventually timeout. Is this the same as the Node .end() method, and if so how is this call passing the data to / actually initiating the busboy work?
const busboy = new Busboy({ headers: req.headers });
// Define busboy event listeners
busboy.on("field", (fieldname, value) => {
// Process fields
});
busboy.on("file", (fieldname, file, filename, encoding, mimetype) => {
// Process file
});
// Call busboy with rawBody of request
busboy.end(req.rawBody);

Node Read large zip file from URL and then Unzip it

I am reading a large zip file(500MB) from a URL using request.get(url).File contains one CSV file which is pretty huge in size. I am reading the response and writing the filestream into a zip file using fs.createWriteStream(zipFile). On close event of the fs.createWriteStream I have tried using adm-zip file with which i got "error invalid or unsupported zip format. no end header found" and with Unzipper npm package I am getting "invalid signature unzip ". Below is the code
const request = require('superagent');
const fs = require('fs');
const unzip = require('unzipper');
request.get(url).on('error', function(err) { console.log(err) }
.pipe(fs.createWriteStream(zipFile))
.on('close', function() {
const admZip = require('adm-zip');
console.log('start unzip');
var zip = new admZip(zipFile);
console.log('unzipping ' + uploadDir + "to");
zip.extractAllTo(uploadDir, true);
console.log('finished unzip');
with Unzipper
request.get(url)
.on('error', function(err) { console.log(err) }
.pipe(fs.createWriteStream(zipFile))
.on('close', function() {
fs.createReadStream(zipFile)
.pipe(unzipper.Extract({ path: UploadDir }));`enter code here`
})
Error is resolved.
First step is to pipe the incoming readable response stream.
request.get(urlData)
.pipe(writeStream);
Once the read is done pipe will trigger the write stream. Then I am triggering the unzipping process on close event of the writestream.
writeStream.on('close', function() {
fs.createReadStream(zipFile).pipepipe(unzip.Extract({
path: uploadDir
}));
console.log('finished unzip');
});

Avoiding further processing on busyboy file upload size limit

I'm trying to set an upload limit in busboy. Without upload limit i'm able to upload the file successfully.
But when i want to redirect if the upload file size exceeds requirement, i realize that the code is asynchronous and the writing of the file or uploading happens anyway.
What i want to do is, that if the limit reaches configured value, it should redirect to the page without uploading the file. I have tried using Javasript Promise, but it doesn't help.
My busboy code is like this.
var express = require('express');
var router = express.Router();
var inspect = require('util').inspect;
var Busboy = require('busboy');
router.all('/', function(req, res, next) {
if (req.method === 'POST') {
var busboy = new Busboy({ headers: req.headers, limits: { files: 1, fileSize: 30000000 } });
busboy.on('file', function(fieldname, file, filename, encoding, mimetype) {
file.on('limit', function(data) {
console.log("limit...");
console.log("ONEEE");
});
console.log("TWOOO");
console.log('File [' + fieldname + ']: filename: ' + filename + ', encoding: ' + encoding + ', mimetype: ' + mimetype);
file.on('data', function(data) {
console.log('File [' + fieldname + '] got ' + data.length + ' bytes');
});
file.on('end', function() {
console.log('File [' + fieldname + '] Finished');
});
});
busboy.on('finish', function() {
console.log('Done parsing form!');
res.writeHead(303, { Connection: 'close', Location: '/test_upload' });
res.end();
});
req.pipe(busboy);
}
});
module.exports = router;
Here i have specified the fileSize limit as 30 MB.. But when i upload a file of say 40 MB, i still get "TWOO" in the console and then "ONEEE"... This is obviously due to the reason that this is happening asynchronously, so what is the solution...?
Basically, if Limit is reached i want to log "ONEEE" and redirect, avoiding logging "TWOOO" to the console and avoiding file processing.
Also, if i try and check inside file.on('data' ... of busboy.on('file' ...
i get irregular file size uploads on filesystem on using file.pipe.
For example, this is not working consistently (it shows file as 0 bytes when its 354 byte or similar):
busboy.on('file', function(fieldname, file, filename, encoding, mimetype) {
file.on('data', function(data){
fstream = fs.createWriteStream("/home/vibs/temp_01.txt");
file.pipe(fstream);
});
});
Although, if i remove file.on('data' from inside busboy.on('file' ... The stream is written correctly onto the disk, but then there is no way to check if file size has exceeded....
So this is working correctly to write the file to the filesystem... But i can't check if file size exceeded allowable limit...
busboy.on('file', function(fieldname, file, filename, encoding, mimetype) {
fstream = fs.createWriteStream("/home/vibs/temp_01.txt");
file.pipe(fstream);
});
So my question is how can i check for file size limit and redirect without executing upload procedure... file.on('data' ... fails since the stream goes corrupt inside the function... And file.on('limit' gets called asynchronously so there is no way to avoid running the file write script first avoiding unnecessary upload..
This will stop uploading a file if it hits the max size.
var fstream;
req.pipe(req.busboy);
req.busboy.on('file', function(fieldname, file, filename, encoding, mimetype) {
var path = __dirname + "/../public/";
var limit_reach = false;
var limit_reach_err = "File is too large!";
// Creating a writestream using fs module
fstream = fs.createWriteStream(path);
file.pipe(fstream);
// If the file is larger than the set limit
// delete partially uploaded file
file.on('limit', function(){
fs.unlink(path, function(){
limit_reach = true;
res.status(455).send(limit_reach_err);
});
});
// Despite being asynchronous limit_reach
// will be seen here as true if it hits max size
// as set in file.on.limit because once it hits
// max size the stream will stop and on.finish
// will be triggered.
req.busboy.on('finish', function() {
if(!limit_reach){
res.send("Image saved successfully!");
}
});
});

How to await fields before processing the file stream in multipart form

I'm using SendGrid for receiving files via email. SendGrid parses the incoming emails and sends the files in a multipart form to an endpoint I have set up.
I don't want the files on my local disk so I stream them straight to Amazon S3. This works perfect.
But before I can stream to S3 I need to get hold of the destination mail address so I can work out the correct s3 folder. This is sent in a field named "to" in the form post. Unfortunately this field sometimes arrives after the files are arriving, hence I need a way to await the to-field before I'm ready to take the stream.
I thought I could wrap the onField in a promise and await the to-field from within the onFile. But this concept seems to lock it self up when the field arrives after the file.
I'm new to booth streams and promises. I would really appreciate if someone could tell me how to do this.
This is the non working pseudoish code:
function sendGridUpload(req, res, next) {
var busboy = new Busboy({ headers: req.headers });
var awaitEmailAddress = new Promise(function(resolve, reject) {
busboy.on('field', function(fieldname, val, fieldnameTruncated, valTruncated) {
if(fieldname === 'to') {
resolve(val);
} else {
return;
}
});
});
busboy.on('file', function(fieldname, file, filename, encoding, mimetype) {
function findInbox(emailAddress) {
console.log('Got email address: ' + emailAddress);
..find the inbox and generate an s3Key
return s3Key;
}
function saveFileStream(s3Key) {
..pipe the file directly to S3
}
awaitEmailAddress.then(findInbox)
.then(saveFileStream)
.catch(function(err) {
log.error(err)
});
});
req.pipe(busboy);
}
I finally got this working. The solution is not very pretty, and I have actually switched to another concept (described at the end of the post).
To buffer the incoming data until the "to"-field arrives I used stream-buffers by #samcday. When I get hold of the to-field I release the readable stream to the pipes lined up for the data.
Here is the code (some parts omitted, but essential parts are there).
var streamBuffers = require('stream-buffers');
function postInboundMail(req, res, next) {
var busboy = new Busboy({ headers: req.headers});
//Sometimes the fields arrives after the files are streamed.
//We need the "to"-field before we are ready for the files
//Therefore the onField is wrapped in a promise which gets
//resolved when the to field arrives
var awaitEmailAddress = new Promise(function(resolve, reject) {
busboy.on('field', function(fieldname, val, fieldnameTruncated, valTruncated) {
var emailAddress;
if(fieldname === 'to') {
try {
emailAddress = emailRegexp.exec(val)[1]
resolve(emailAddress)
} catch(err) {
return reject(err);
}
} else {
return;
}
});
});
busboy.on('file', function(fieldname, file, filename, encoding, mimetype) {
var inbox;
//I'm using readableStreamBuffer to accumulate the data before
//I get the email field so I can send the stream through to S3
var readBuf = new streamBuffers.ReadableStreamBuffer();
//I have to pause readBuf immediately. Otherwise stream-buffers starts
//sending as soon as I put data in in with put().
readBuf.pause();
function getInbox(emailAddress) {
return model.inbox.findOne({email: emailAddress})
.then(function(result) {
if(!result) return Promise.reject(new Error(`Inbox not found for ${emailAddress}`))
inbox = result;
return Promise.resolve();
});
}
function saveFileStream() {
console.log('=========== starting stream to S3 ========= ' + filename)
//Have to resume readBuf since we paused it before
readBuf.resume();
//file.save will approximately do the following:
// readBuf.pipe(gzip).pipe(encrypt).pipe(S3)
return model.file.save({
inbox: inbox,
fileStream: readBuf
});
}
awaitEmailAddress.then(getInbox)
.then(saveFileStream)
.catch(function(err) {
log.error(err)
});
file.on('data', function(data) {
//Fill readBuf with data as it arrives
readBuf.put(data);
});
file.on('end', function() {
//This was the only way I found to get the S3 streaming finished.
//Destroysoon will let the pipes finish the reading bot no more writes are allowed
readBuf.destroySoon()
});
});
busboy.on('finish', function() {
res.writeHead(202, { Connection: 'close', Location: '/' });
res.end();
});
req.pipe(busboy);
}
I would really much like feedback on this solution, even though I'm not using it. I have a feeling that this can be done much more simple and elegant.
New solution:
Instead of waiting for the to-field I send the stream directly to S3. I figured, the more stuff I put in between the incoming stream and the S3 saving, the higher the risk of loosing the incoming file due to a bug in my code. (SendGrid will eventually resend the file if I'm not responding with 200, but it will take some time.)
This is how I do it:
Save a placeholder for the file in the database
Pipe the stream to S3
Update the placeholder with more information as it arrives
This solution also gives me the opportunity to easily get hold of unsuccessful uploads since the placeholders for unsuccessful uploads will be incomplete.
//Michael

busboy - is there a way to send the response when all files have been uploaded?

I'm trying to upload files to a server using node.js as backend and angular.js as frontend. I'm using express 4 + busboy for this. I have a table in the frontend where I should display all the files I'm uploading. So if I have 3 files and click on upload, angular should post these files to node.js and after getting the response back, refresh the table with those three files.
This is the function I'm using in angular:
function uploadFiles(files){
var fd = new FormData();
for(var i = 0; i<files.length; i++){
fd.append("file", files[i]);
}
$http.post('http://localhost:3000/upload', fd, {
withCredentials: false,
headers: {'Content-Type': undefined },
transformRequest: angular.identity
}).success(refreshTable()).error(function(){
console.log("error uploading");
});
}
and this is from node.js:
app.post('/upload', function(req, res) {
var busboy = new Busboy({ headers: req.headers });
busboy.on('file', function (fieldname, file, filename) {
console.log("Uploading: " + filename);
var fstream = fs.createWriteStream('./files/' + filename);
file.pipe(fstream);
});
busboy.on('finish', function(){
res.writeHead(200, { 'Connection': 'close' });
res.end("");
});
return req.pipe(busboy);
});
the problem is that if I upload three files, as soon as the first file has been uploaded node.js sends the response and hence the table is updated only with the first file uploaded, if I refresh the page, the rest of the files appear.
I think the problem is with this line in node: return req.pipe(busboy); if I remove that line, the post response keeps on pending for a long time and nothing happens, I think this is an async problem, anybody knows if there's a way to send the response back only when all files have been uploaded?
thanks
A simple and common solution to this particular problem is to use a counter variable and listening for the finish event on the fs Writable stream. For example:
app.post('/upload', function(req, res) {
var busboy = new Busboy({ headers: req.headers });
var files = 0, finished = false;
busboy.on('file', function (fieldname, file, filename) {
console.log("Uploading: " + filename);
++files;
var fstream = fs.createWriteStream('./files/' + filename);
fstream.on('finish', function() {
if (--files === 0 && finished) {
res.writeHead(200, { 'Connection': 'close' });
res.end("");
}
});
file.pipe(fstream);
});
busboy.on('finish', function() {
finished = true;
});
return req.pipe(busboy);
});
The reason for this is that busboy's finish event is emitted once the entire request has been fully processed, that includes files. However, there is some delay between when there is no more data to write to a particular file and when the OS/node has flushed its internal buffers to disk (and the closing of the file descriptor). Listening for the finish event for a fs Writable stream lets you know that the file descriptor has been closed and no more writes are going to occur.

Resources