I'm trying to upload a large (8.3GB) video to my Node.js (Express) server by chunking using busboy. How to I receive each chunk (busboy is doing this part) and piece it together as one whole video?
I have been looking into readable and writable streams but I'm not ever getting the whole video. I keep overwriting parts of it, resulting in about 1 GB.
Here's my code:
req.busboy.on('file', (fieldname, file, filename) => {
logger.info(`Upload of '${filename}' started`);
const video = fs.createReadStream(path.join(`${process.cwd()}/uploads`, filename));
const fstream = fs.createWriteStream(path.join(`${process.cwd()}/uploads`, filename));
if (video) {
video.pipe(fstream);
}
file.pipe(fstream);
fstream.on('close', () => {
logger.info(`Upload of '${filename}' finished`);
res.status(200).send(`Upload of '${filename}' finished`);
});
});
After 12+ hours, I got it figured out using pieces from this article that was given to me. I came up with this code:
//busboy is middleware on my index.js
const fs = require('fs-extra');
const streamToBuffer = require('fast-stream-to-buffer');
//API function called first
uploadVideoChunks(req, res) {
req.pipe(req.busboy);
req.busboy.on('file', (fieldname, file, filename, encoding, mimetype) => {
const fileNameBase = filename.replace(/\.[^/.]+$/, '');
//save all the chunks to a temp folder with .tmp extensions
streamToBuffer(file, function (error, buffer) {
const chunkDir = `${process.cwd()}/uploads/${fileNameBase}`;
fs.outputFileSync(path.join(chunkDir, `${Date.now()}-${fileNameBase}.tmp`), buffer);
});
});
req.busboy.on('finish', () => {
res.status(200).send(`Finshed uploading chunk`);
});
}
//API function called once all chunks are uploaded
saveToFile(req, res) {
const { filename, profileId, movieId } = req.body;
const uploadDir = `${process.cwd()}/uploads`;
const fileNameBase = filename.replace(/\.[^/.]+$/, '');
const chunkDir = `${uploadDir}/${fileNameBase}`;
let outputFile = fs.createWriteStream(path.join(uploadDir, filename));
fs.readdir(chunkDir, function(error, filenames) {
if (error) {
throw new Error('Cannot get upload chunks!');
}
//loop through the temp dir and write to the stream to create a new file
filenames.forEach(function(tempName) {
const data = fs.readFileSync(`${chunkDir}/${tempName}`);
outputFile.write(data);
//delete the chunk we just handled
fs.removeSync(`${chunkDir}/${tempName}`);
});
outputFile.end();
});
outputFile.on('finish', async function () {
//delete the temp folder once the file is written
fs.removeSync(chunkDir);
}
});
}
Use streams
multer allow you to easily handle file uploads as part of an express route. This works great for small files that don’t leave a significant memory footprint.
The problem with loading a large file into memory is that you can actually run out of memory and cause your application to crash.
use multipart/form-data request. This can be handled by assigning the readStream to that field instead in your request options
streams are extremely valuable for optimizing performance.
Try with this code sample, I think it will work for you.
busboy.on("file", function(fieldName, file, filename, encoding, mimetype){
const writeStream = fs.createWriteStream(writePath);
file.pipe(writeStream);
file.on("data", data => {
totalSize += data.length;
cb(totalSize);
});
file.on("end", () => {
console.log("File "+ fieldName +" finished");
});
});
You can refer this link also for resolve this problem
https://github.com/mscdex/busboy/issues/143
I think multer is good with this, did you try multer?
Related
I'm trying to download a .xlsx file from a website through web scraping, I've done the entire process until I access the temporary url that the website generates for the download.
When I open the file url in the browser, the download starts automatically (as shown in the image).
excel file download
The problem is that I need to parse this file to send later to my front-end. When I try to create the file using fs.createWriteStream('result.xlsx') and later populate it with res.pipe(fileStream); the file is always generated empty.
Here's my full code:
const https = require("https");
const fs = require("fs");
const path = require("path");
const xlsx = require("node-xlsx");
function download(url, callback) {
const filename = path.basename(url);
const req = https.get(url, function (res) {
const fileStream = fs.createWriteStream("result.xlsx");
res.pipe(fileStream);
const obj = xlsx.parse('result.xlsx');
callback(obj[0]);
fileStream.on("error", function (err) {
console.log("Error writting to the stream.");
console.log(err);
});
fileStream.on("close", function () {
callback(filename);
});
fileStream.on("finish", function () {
fileStream.close();
});
});
req.on("error", function (err) {
console.log("error downloading the file");
console.log(err);
});
}
module.exports.download = download;
My questions are:
Is it possible to parse this data into an array WITHOUT needing to save to a physical file? If yes, how?
If I can't parse the data without needing to populate a physical file, how can I download the spreadsheet and then read and parse the data later.
NOTE: I have already tested the rest of my download function with a valid file entered manually, everything is working perfectly. The only thing that isn't working is the data downloading and reading part of the spreadsheet.
Is it possible to parse this data into an array WITHOUT needing to save to a physical file? Basically No (file from remote server). Except the server allowed you to do it live.
Your code is nearly right, except the order is wrong. You must callback after the writing is done, it will fix your empty file issue.
Here is how:
const https = require("https");
const fs = require("fs");
const path = require("path");
const xlsx = require("node-xlsx");
function download(url, callback) {
const filename = path.basename(url);
const req = https.get(url, function (res) {
const fileStream = fs.createWriteStream("result.xlsx");
res.pipe(fileStream);
fileStream.on("error", function (err) {
console.log("Error writting to the stream.");
console.log(err);
});
fileStream.on("close", function () {
const obj = xlsx.parse('result.xlsx');// or whatever you named it
callback(obj[0]);
});
fileStream.on("finish", function () {
fileStream.close();
});
});
req.on("error", function (err) {
console.log("error downloading the file");
console.log(err);
});
}
module.exports.download = download;
I've found few articles explaining the process but most of them are not up do date.
How do you handle image upload in node.js?
Im using multer and it works perfectly. It stores your image locally. You can also send it to mongodb if you want. This is how i am doing it.
var multer = require('multer');
var done = false;
//define the model you are working with*
var Slides = require('./models/work');
app.use(multer({
dest: './public/img',
rename: function (fieldname, filename) {
return filename+Date.now();
},
onFileUploadStart: function (file) {
console.log(file.originalname + ' is starting ...')
},
onFileUploadComplete: function (file) {
console.log(file.fieldname + ' uploaded to ' + file.path);
done = true;
var id= file.fieldname;
var str = file.path;
var image = str.replace('public', '');
var slidegegevens = {
"id": id,
"img": image
};
var s = new Slides(slidegegevens);
s.save(function (err, slidegegevens) {
console.log(err);
console.log('slidegegevens: ' + slidegegevens);
});
}
}));
I use busboy middleware in express to parse out images in a multipart/form-data request and it works pretty nice.
My code looks something like:
const busboy = require('connect-busboy');
//...
app.use(busboy());
app.use(function parseUploadMW(req,res,next){
req.busboy.on('file', function onFile(fieldname, file, filename, encoding, mimetype) {
file.fileRead = [];
file.on('data', function onData(chunk) {
this.fileRead.push(chunk);
});
file.on('error', function onError(err) {
console.log('Error while buffering the stream: ', err);
//handle error
});
file.on('end', function onEnd() {
var finalBuffer = Buffer.concat(this.fileRead);
req.files = req.files||{}
req.files[fieldname] = {
buffer: finalBuffer,
size: finalBuffer.length,
filename: filename,
mimetype: mimetype.toLowerCase()
};
});
});
req.busboy.on('finish', function onFinish() {
next()
});
req.pipe(req.busboy);
})
Then files will be in the req object for you at req.files in your express routes.
This technique works fine for small images. If you are doing some hardcore uploading, you may want to consider streaming the files (to save memory) to their destination - like s3 or similar - which can also be achieved with busboy
Another package that is popular and also decent is: https://github.com/andrewrk/node-multiparty.
I think is better use formidable to handle incoming images.
I am working with nodejs and mongodb and wants to store multiple files at one time. I am using GridFS for files.
My code works fine for single file upload.
var writestream = gfs.createWriteStream({ filename: filename, metadata:project._id});
console.log(tmpFilepath);
var filename = req.files.file.name;
var tmpFilepath="./upload/"+ guid();
fs.rename(req.files.file.path,tmpFilepath);
fs.createReadStream(tmpFilepath)
.on('end', function() {
console.log("file Saved");
})
.on('error', function() {
console.log("error encountered");
// res.send('ERR');
})
// and pipe it to gfs
.pipe(writestream);
writestream.on('close', function (file) {
fs.unlink(tmpFilepath);
});
how can I make it work for uploading multiple files?
Wrap it in an aysnc call which is an npm module you can install.
var files = [];
async.each([].req.files.name, function(item){
/* Apply your gridfs here */
files.push('somedetails');
},
function(err){
/* finished processing all the files...do something with them /*
console.log(files);
});
I am working on an application that is sending a audio file to a server powered by sails.js.
I need to convert this audio file on-the-fly and send the converted data to amazon S3 using node streams.
I don't want to store data on the server but directly stream the upload file to S3 after it has been converted.
Do you know a way to do that?
I tried using formidable, but I couldn't get it working. Did anyone succeed implementing such a thing?
Thanks
EDIT
As jibsales noticed, it will probably be better if I show you a piece of what I've tried so far. So basically my strategy is to use formidable, fluent-ffmpeg and knox with streams.
I plan to receive the file stream with formidable and write chunks of received data in a first stream (stream 1) that will be the entry point for the conversion with fluent-ffmpeg. Then fluent-ffmpeg writes the output stream into stream2 which is the entry point of Knox.
The first problem I have to face, is the fact that formidable doesn't seem to work. However I'm not sure my strategy is good...
The code so far looks like this:
upload : function(req,res){
//to streams to transfer file data
var stream1 = new stream.Stream(); //stream for the incoming file data
var stream2 = new stream.Stream(); //stream for the converted file data
var client = knox.createClient({
key: 'APIKEY'
, secret: 'SECRET'
, bucket: 'bucket'
});
//Using formidable to acces data chunks
var form = new formidable.IncomingForm();
form.parse(req, function(err, fields, files){ //form.parse is not called
if(err){
return res.json(err);
}else{
return res.send('ok');
}
});
//overriding form.onPart to get the file data chunk
form.onPart = function(part) {
sails.log('getting part...');
if (!part.filename) {
form.handlePart(part);
return;
}
//we put the data chunk in stream1 to convert it
part.on('data', function(chunk) {
stream1.write(chunk[1]);
});
}
form.on('error',function(err){
return sails.log(err);
});
form.on('progress', function(bytesReceived, bytesExpected) {
sails.log(bytesReceived);
});
//conversion process
var proc = new ffmpeg({ source : stream1})
.withAudioCodec('libfdk_aac')
.toFormat('mp3')
.writeToStream(stream2, {end:true}, function(retcode, error){
console.log('file has been converted succesfully');
});
client.putStream(stream2, '/file.mp3', headers, function(err, response){
return res.send(response);
});
},
The reason formidable isn't working is that the default Sails body parser is parsing the request before formidable can get to it. In order to get this to work, you'll have to bypass the Sails body parser for multi-part form uploads. So, in config/express.js:
var express = require('sails/node_modules/express');
module.exports.express = {
bodyParser: function(options) {
return function (req, res, next) {
if (!req.headers['content-type'] || req.headers['content-type'].indexOf('multipart/form-data') === -1) {
return express.bodyParser()(req, res, next);
} else {
return next();
}
}
}
}
This just skips the body parser entirely if the content-type header includes multipart/form-data. Otherwise, it executes the default express body parser. Note that the default Sails body parser is slightly fancier than what comes with Express (if it can't parse the request, it fakes an application/json header and retries), so if you want the extra goodness you'll have to copy/paste the code from the core in to the bodyParser function above. But in most cases you won't miss it ;-)
We're working on a better file parser for Sails that will hopefully take care of some of this for you, but in the mean time this is your best bet!
I've figured out a way to convert files on-the-fly with fluent-ffmpeg and formidable. However it seems impossible for the moment to stream the converted chunks comming out of ffmpeg conversion directly to amazon as you must precise the "Content-Length" header which is unknown during the conversion...
For the first part (client upload) I first had to disable express bodyParser on the upload route in a config/express.js file :
var express = require('sails/node_modules/express');
module.exports.express = {
bodyParser: function() {
return function (req, res, next){
console.log(req.path);
if (!(req.path === '/upload' && req.method === 'POST')) {
return express.bodyParser()(req, res, next);
} else {
return next();
}
}
}
}
For the implementation I used a transform stream that does basically nothing. It just get the right parts of the uploaded data (the ones related to file data). It links formidable parser to fluent-ffmpeg. Then I can only save the converted file on the disk before sending it to amazon...
upload : function(req,res){
var Transform = Stream.Transform; //stream for the incoming file data
var client = knox.createClient({
key: 'KEY'
, secret: 'SECRET'
, bucket: 'BUCKET',
region : 'eu-west-1' //don't forget the region (My bucket is in Europe)
});
function InputStream(options)
{
if(!(this instanceof InputStream))
{
return new InputStream(options);
}
Transform.call(this,options);
return;
};
util.inherits(InputStream, Transform);
var inputDataStream = new InputStream;
var form = new formidable.IncomingForm();
form.parse(req, function(err, fields, files)
{
if(err){
return res.send(err);
}else{
return;
}
});
form.onPart = function(part)
{
if (!part.filename)
{
form.handlePart(part);
return;
}
//we put the data chunk in stream1 to convert it
part.on('data', function (chunk)
{
if(!inputDataStream.write(chunk));
form.pause()
inputDataStream.once('drain', function(){form.resume()});
});
part.on('end', function (chunk){
inputDataStream.end(chunk);
});
}
InputStream.prototype._transform = function (chunk, enc, cb)
{
this.push(chunk);
cb();
}
var proc = new ffmpeg({ source : inputDataStream})
.withAudioBitrate('64k')
.withAudioCodec('libmp3lame')
.toFormat('mp3')
.saveToFile('file.mp3', function (retcode, error){
console.log('file has been converted successfully');
res.send('ok');
var upload = new MultiPartUpload(
{
client : client,
objectName: 'file.mp3',
file: 'file.mp3'
}, function(err,body){
if(err) {
console.log(err);
return;
}
console.log(body);
return;
});
});
},
EDIT
Using knox-mpu you can actually stream data to amazon s3 directly ! You just have to create another transform stream that will be the source of your upload and knox-mpu do the magic. Thanks you all!
I am creating an application that takes some file uploads and send them straight up to S3. I would prefer not to even have the tmp file on my server, so I am using the Knox module and would like to take the raw stream from Formidable and send it over Knox to S3. I have done something similar using Knox to download a file using this code:
knox.downloads.get(widget.download).on('response',function(sres){
res.writeHead(200, {
'Content-Type':'application/zip',
'Content-Length': sres.headers['content-length'],
'Content-Disposition':'attachment; filename=' + widget.download
});
util.pump(sres, res);
}).end();
Now I would like to do something similar in the oposite direction (File upload from the browser to S3).
So far I have written an event handler to capture each piece of data from the file as it's being uploaded:
var form = new formidable.IncomingForm();
form.onPart = function(part){
if(!part.filename){
form.handlePart(part);
}else{
if(part.name == 'download'){
// Upload to download bucket
controller.putDownload(part);
}else{
// Upload to the image bucket
controller.putImage(part);
}
//res.send(sys.inspect(part));
}
}
form.parse(req, function(err, fields, files){
if(err){
res.json(err);
}else{
res.send(sys.inspect({fields:fields, files:files}), {'content-type':'text/plain'});
//controller.createWidget(res,fields,files);
}
});
controller.putDownload = function(part){
part.addListener('data', function(buffer){
knox.download.putStream(data,part.filename, function(err,s3res){
if(err)throwError(err);
else{
console.log(s3res);
}
});
})
knox.downloads.putStream(part, part.filename, function(err,s3res){
if(err)throwError(err);
else{
console.log(s3res);
}
});
}
But the data event only give me the buffer. So is it possible to capture the stream itself and push it to S3?
What you want to do is override the Form.onPart method:
IncomingForm.prototype.onPart = function(part) {
// this method can be overwritten by the user
this.handlePart(part);
};
Formidable's default behavior is to write the part to a file. You don't want that. You want to handle the 'part' events to write to the knox download. Start with this:
form.onPart = function(part) {
if (!part.filename) {
// let formidable handle all non-file parts
form.handlePart(part);
return;
}
Then open the knox request and handle the raw part events yourself:
part.on('data', function(data) {
req.write(data);
});
part.on('end', function() {
req.end();
});
part.on('error', function(err) {
// handle this too
});
As a bonus, if the req.write(data) return false that means the send buffer is full. You should pause the Formidable parser. When you get a drain event from the Knox stream you should resume Formidable.
Use multiparty instead. It supports this kind of streaming like you want. It even has an example of streaming directly to s3: https://github.com/superjoe30/node-multiparty/blob/master/examples/s3.js
In an Express middleware, I use formidable together with PassThrough to stream-upload a file to S3 (in my case, to Minio which is S3 compatible through Minio SDK; and I believe it works for AWS S3 too with the same Minio SDK)
Here is the sample code.
const formidable = require('formidable')
const { PassThrough } = require('stream')
const form = new formidable.IncomingForm()
const pass = new PassThrough()
const fileMeta = {}
form.onPart = part => {
if (!part.filename) {
form.handlePart(part)
return
}
fileMeta.name = part.filename
fileMeta.type = part.mime
part.on('data', function (buffer) {
pass.write(buffer)
})
part.on('end', function () {
pass.end()
})
}
form.parse(req, err => {
if (err) {
req.minio = { error: err }
next()
} else {
handlePostStream(req, next, fileMeta, pass)
}
})
And handlePostStream looks like below, for your reference:
const uuidv1 = require('uuid/v1')
const handlePostStream = async (req, next, fileMeta, fileStream) => {
let filename = uuidv1()
try {
const metaData = {
'content-type': fileMeta.type,
'file-name': Buffer.from(fileMeta.name).toString('base64')
}
const minioClient = /* Get Minio Client*/
await minioClient.putObject(MINIO_BUCKET, filename, fileStream, metaData)
req.minio = { post: { filename: `${filename}` } }
} catch (error) {
req.minio = { error }
}
next()
}
You can find the source code on GitHub, and its unit tests too.
There is no way for you to capture the stream, because the data has to be translated by Formidable. The buffer you're given is the file contents in chunks of buffer.length: this might be a problem because looking at Formidable's docs it appears that until the file is completely uploaded it can't reliably report the file size and Knox's put method might need that.
Never used Knox this way before, but you might have some luck with something like this:
controller.putDownload = function(part){
var req = knox.download.put(part.filename, {
'Content-Type': 'text/plain'
});
part.addListener('data', function(buffer){
req.write(buffer);
});
req.on('response', function(res){
// error checking
});
req.end();
}
A little unsure about the response checking bits, but....see if you can whip that into shape. Also, Streaming an octet stream from request to S3 with knox on node.js also has a writeup that may be useful to you.