How to clear a readable stream while uploading image in node.js - node.js

I am trying to upload multiple images with the help of multiparty module. I want to upload only a particular kind of images, say whose names are 'image.jpg'. But it's not working when the image does not meet the criteria and I don't get any response. Here is my code.
req.form.on('part', function(part) {
if (part.filename === 'image.jpg') {
var out = fs.createWriteStream('image/' + part.filename);
part.pipe(out);
} else {
//Here i want to clear the stream for the next 'part'
}
});
req.form.on('close', function() {
res.send('uploaded!');
});
I think I'm not able to clear the readable stream which contains 'part'. I can write that stream and then delete, it works then. But, I don't want to write any image on my file system if it doesn't meet the criteria. How can I achieve that?

To complete robertklep's answer, here is another possibility using a custom Writable stream that will blackhole data. When you pipe() the part stream to the blackhole, it will exhaust its source. This just keeps your code consistent by using streams everywhere instead of using raw read() function calls.
req.form.on('part', function(part) {
var out;
if (part.filename === 'image.jpg') {
out = fs.createWriteStream('image/' + part.filename);
} else {
out = new stream.Writable();
out._write = function (chunk, encoding, done) {
done(); // Don't do anything with the data
};
}
part.pipe(out);
});

Not entirely sure if this is going to work, but I think you can read the entire part stream until it's exhausted:
req.form.on('part', function(part) {
if (part.filename === 'image.jpg') {
var out = fs.createWriteStream('image/' + part.filename);
part.pipe(out);
} else {
while (part.read() !== null);
}
});

Related

How i can resume after error event in piped stream in nodejs?

After i emit error event in MyWritableStream, data transmission stops. What i need to do to resume data transfer?
var readable = fs.createReadStream('test.txt');
var writable = new MyWritableStream();
writable.on('error', function(error) {
console.log('error', error);
// How i can resume?
});
writable.on('finish', function(){
console.log('finished');
})
readable.pipe(writable);
I know this question is old, but you might wanna check out https://github.com/miraclx/xresilient
I built this for this exact same reason (works best with seekable streams).
You define a function that returns a readable stream, the library measures the number of bytes that have passed through until an error is met.
Once the readable stream encounters an error event, it recalls the defined function with the number of bytes read so you can index the stream source.
Example:
const fs = require('fs');
const xresilient = require('xresilient');
const readable = xresilient(({bytesRead}) => {
return generateSeekableStreamSomehow({start: bytesRead});
}, {retries: 5});
const writable = fs.createWriteStream('file.test');
readable.pipe(writable);
File streams are indexable with the start option of the fs.createReadStream() function.
HTTP Requests are indexable with the Range HTTP Header.
Check it out.
https://www.npmjs.com/package/xresilient
I am not sure, if it is a normal practice, but i can't see another solution for now & it works for me. If you can advise more accurate solution, please do it.
We can track readable stream instance using pipe event in writeable one:
function WriteableStream(options) {
Writable.call(this, options);
this.source = null;
var instance = this;
this.on('pipe', function(source){
instance.source = source;
});
}
util.inherits(WriteableStream, Writable);
So, when we emit error event, and readable stream is unpiped automatically, we can re-pipe it ourself:
WriteableStream.prototype._write = function(chunk, encoding, done) {
this.emit('error', new Error('test')); // unpipes readable
done();
};
WriteableStream.prototype.resume = function() {
this.source.pipe(this); // re-pipes readable
}
Finally, we will use it the following way:
var readable = fs.createReadStream(file);
var writeable = new WriteableStream();
writeable.on('error', function(error) {
console.log('error', error);
writeable.resume();
});
readable.pipe(writeable);

How can I convert uploaded file on-the-fly and stream it directly to amazon S3 within sails

I am working on an application that is sending a audio file to a server powered by sails.js.
I need to convert this audio file on-the-fly and send the converted data to amazon S3 using node streams.
I don't want to store data on the server but directly stream the upload file to S3 after it has been converted.
Do you know a way to do that?
I tried using formidable, but I couldn't get it working. Did anyone succeed implementing such a thing?
Thanks
EDIT
As jibsales noticed, it will probably be better if I show you a piece of what I've tried so far. So basically my strategy is to use formidable, fluent-ffmpeg and knox with streams.
I plan to receive the file stream with formidable and write chunks of received data in a first stream (stream 1) that will be the entry point for the conversion with fluent-ffmpeg. Then fluent-ffmpeg writes the output stream into stream2 which is the entry point of Knox.
The first problem I have to face, is the fact that formidable doesn't seem to work. However I'm not sure my strategy is good...
The code so far looks like this:
upload : function(req,res){
//to streams to transfer file data
var stream1 = new stream.Stream(); //stream for the incoming file data
var stream2 = new stream.Stream(); //stream for the converted file data
var client = knox.createClient({
key: 'APIKEY'
, secret: 'SECRET'
, bucket: 'bucket'
});
//Using formidable to acces data chunks
var form = new formidable.IncomingForm();
form.parse(req, function(err, fields, files){ //form.parse is not called
if(err){
return res.json(err);
}else{
return res.send('ok');
}
});
//overriding form.onPart to get the file data chunk
form.onPart = function(part) {
sails.log('getting part...');
if (!part.filename) {
form.handlePart(part);
return;
}
//we put the data chunk in stream1 to convert it
part.on('data', function(chunk) {
stream1.write(chunk[1]);
});
}
form.on('error',function(err){
return sails.log(err);
});
form.on('progress', function(bytesReceived, bytesExpected) {
sails.log(bytesReceived);
});
//conversion process
var proc = new ffmpeg({ source : stream1})
.withAudioCodec('libfdk_aac')
.toFormat('mp3')
.writeToStream(stream2, {end:true}, function(retcode, error){
console.log('file has been converted succesfully');
});
client.putStream(stream2, '/file.mp3', headers, function(err, response){
return res.send(response);
});
},
The reason formidable isn't working is that the default Sails body parser is parsing the request before formidable can get to it. In order to get this to work, you'll have to bypass the Sails body parser for multi-part form uploads. So, in config/express.js:
var express = require('sails/node_modules/express');
module.exports.express = {
bodyParser: function(options) {
return function (req, res, next) {
if (!req.headers['content-type'] || req.headers['content-type'].indexOf('multipart/form-data') === -1) {
return express.bodyParser()(req, res, next);
} else {
return next();
}
}
}
}
This just skips the body parser entirely if the content-type header includes multipart/form-data. Otherwise, it executes the default express body parser. Note that the default Sails body parser is slightly fancier than what comes with Express (if it can't parse the request, it fakes an application/json header and retries), so if you want the extra goodness you'll have to copy/paste the code from the core in to the bodyParser function above. But in most cases you won't miss it ;-)
We're working on a better file parser for Sails that will hopefully take care of some of this for you, but in the mean time this is your best bet!
I've figured out a way to convert files on-the-fly with fluent-ffmpeg and formidable. However it seems impossible for the moment to stream the converted chunks comming out of ffmpeg conversion directly to amazon as you must precise the "Content-Length" header which is unknown during the conversion...
For the first part (client upload) I first had to disable express bodyParser on the upload route in a config/express.js file :
var express = require('sails/node_modules/express');
module.exports.express = {
bodyParser: function() {
return function (req, res, next){
console.log(req.path);
if (!(req.path === '/upload' && req.method === 'POST')) {
return express.bodyParser()(req, res, next);
} else {
return next();
}
}
}
}
For the implementation I used a transform stream that does basically nothing. It just get the right parts of the uploaded data (the ones related to file data). It links formidable parser to fluent-ffmpeg. Then I can only save the converted file on the disk before sending it to amazon...
upload : function(req,res){
var Transform = Stream.Transform; //stream for the incoming file data
var client = knox.createClient({
key: 'KEY'
, secret: 'SECRET'
, bucket: 'BUCKET',
region : 'eu-west-1' //don't forget the region (My bucket is in Europe)
});
function InputStream(options)
{
if(!(this instanceof InputStream))
{
return new InputStream(options);
}
Transform.call(this,options);
return;
};
util.inherits(InputStream, Transform);
var inputDataStream = new InputStream;
var form = new formidable.IncomingForm();
form.parse(req, function(err, fields, files)
{
if(err){
return res.send(err);
}else{
return;
}
});
form.onPart = function(part)
{
if (!part.filename)
{
form.handlePart(part);
return;
}
//we put the data chunk in stream1 to convert it
part.on('data', function (chunk)
{
if(!inputDataStream.write(chunk));
form.pause()
inputDataStream.once('drain', function(){form.resume()});
});
part.on('end', function (chunk){
inputDataStream.end(chunk);
});
}
InputStream.prototype._transform = function (chunk, enc, cb)
{
this.push(chunk);
cb();
}
var proc = new ffmpeg({ source : inputDataStream})
.withAudioBitrate('64k')
.withAudioCodec('libmp3lame')
.toFormat('mp3')
.saveToFile('file.mp3', function (retcode, error){
console.log('file has been converted successfully');
res.send('ok');
var upload = new MultiPartUpload(
{
client : client,
objectName: 'file.mp3',
file: 'file.mp3'
}, function(err,body){
if(err) {
console.log(err);
return;
}
console.log(body);
return;
});
});
},
EDIT
Using knox-mpu you can actually stream data to amazon s3 directly ! You just have to create another transform stream that will be the source of your upload and knox-mpu do the magic. Thanks you all!

Convert stream into buffer?

How to convert stream into buffer in nodejs? Here is my code to parse a file in post request in express.
app.post('/upload', express.multipart({
defer: true
}), function(req, res) {
req.form.on('part', function(part) {
//Here I want to convert the streaming part into a buffer.
//do something buffer-specific task
var out = fs.createWriteStream('image/' + part.filename);
part.pipe(out);
});
req.form.on('close', function() {
res.send('uploaded!');
});
});
Instead of piping, you can attach readable and end event handlers to the part stream to read it:
var buffers = [];
part.on('readable', function(buffer) {
for (;;) {
let buffer = part.read();
if (!buffer) { break; }
buffers.push(buffer);
}
});
part.on('end', function() {
var buffer = Buffer.concat(buffers);
...do your stuff...
// write to file:
fs.writeFile('image/' + part.filename, buffer, function(err) {
// handle error, return response, etc...
});
});
Note: If you instead use data, it will read the entire upload into memory.
You could also create a custom transform stream to transform the incoming data, but that might not be trivial.
You can use the stream-to module, which can convert a readable stream's data into an array or a buffer:
var streamTo = require('stream-to');
req.form.on('part', function (part) {
streamTo.buffer(part, function (err, buffer) {
// Insert your business logic here
});
});
If you want a better understanding of what's happening behind the scenes, you can implement the logic yourself, using a Writable stream. As a writable stream implementor, you only have to define one function: the _write method, that will be called every time some data is written to the stream. When the input stream is finished emitting data, the end event will be emitted: we'll then create a buffer using the Buffer.concat method.
var stream = require('stream');
var converter = new stream.Writable();
// We'll store all the data inside this array
converter.data = [];
converter._write = function (chunk) {
converter.data.push(chunk);
};
// Will be emitted when the input stream has ended,
// i.e. no more data will be provided
converter.on('finish', function() {
// Create a buffer from all the received chunks
var b = Buffer.concat(this.data);
// Insert your business logic here
});

streams with percentage complete

I need to stream a file in base64 to an http endpoint using something like request or superagent. What is the best way to figure out what percentage of the file has been uploaded?
I assume I can create the read stream using something like:
fs.createReadStream('/tmp/cats.jpg', {encoding: 'base64'})
Any examples using one out of above libraries would be greatly appreciated.
I think you can use progress-stream.
Here is an example from the package:
var progress = require('progress-stream');
var fs = require('fs');
var stat = fs.statSync(filename);
var str = progress({
length: stat.size,
time: 100 /* ms */
});
str.on('progress', function(progress) {
console.log(progress);
/*
{
percentage: 9.05,
transferred: 949624,
length: 10485760,
remaining: 9536136,
eta: 42,
runtime: 3,
delta: 295396,
speed: 949624
}
*/
});
fs.createReadStream(filename)
.pipe(str)
.pipe(fs.createWriteStream(output));
I was looking for an answer to a similar issue and thanks to Alberto Zaccagni's answer, I was able to get some code working.
So for the people who don't want to piece the puzzle themselves, here is the code (edited for Stackoverflow):
var zipfile = "my_large_archive.zip";
// Get the size of the file
fs.stat(zipfile, function (err, stats) {
var zipSize = stats.size;
var uploadedSize = 0; // Incremented by on('data') to keep track of the amount of data we've uploaded
// Create a new read stream so we can plug events on it, and get the upload progress
var zipReadStream = fs.createReadStream(zipfile);
zipReadStream.on('data', function(buffer) {
var segmentLength = buffer.length;
// Increment the uploaded data counter
uploadedSize += segmentLength;
// Display the upload percentage
console.log("Progress:\t",((uploadedSize/zipSize*100).toFixed(2)+"%"));
});
// Some other events you might want for your code
zipReadStream.on('end', function() {
console.log("Event: end");
});
zipReadStream.on('close', function() {
console.log("Event: close");
});
var formData = require('form-data');
var form = new formData();
form.append('apikey', 'f4sd5f4sdf6ds456'); // Just some post parameters I need to send to the upload endpoint
form.append('file', zipReadStream); // The zip file, passed as a fs.createReadStream instance
// Submit the form and the file
form.submit('http://www.someserver.com/upload', function(err, res) {
if (err) {
console.log("Oups! We encountered an error :(\n\n", err);
return false;
}
console.log("Your file has been uploaded.");
res.resume(); // Fix is you use that code for a CLI, so that the execution will stop and let users enter new commands
});
});
In nodejs we have the Readable stream, it emits the data event when it receives a chunk of data, by knowing the file size you could easily keep track of how much data passes through the data event receiver and then update the percentage.
Get the file dimension with
require('fs').watchFile('yourfile', function () {
fs.stat('yourfile', function (err, stats) {
console.log(stats.size);
});
});

Accessing the raw file stream from a node-formidable file upload

I am creating an application that takes some file uploads and send them straight up to S3. I would prefer not to even have the tmp file on my server, so I am using the Knox module and would like to take the raw stream from Formidable and send it over Knox to S3. I have done something similar using Knox to download a file using this code:
knox.downloads.get(widget.download).on('response',function(sres){
res.writeHead(200, {
'Content-Type':'application/zip',
'Content-Length': sres.headers['content-length'],
'Content-Disposition':'attachment; filename=' + widget.download
});
util.pump(sres, res);
}).end();
Now I would like to do something similar in the oposite direction (File upload from the browser to S3).
So far I have written an event handler to capture each piece of data from the file as it's being uploaded:
var form = new formidable.IncomingForm();
form.onPart = function(part){
if(!part.filename){
form.handlePart(part);
}else{
if(part.name == 'download'){
// Upload to download bucket
controller.putDownload(part);
}else{
// Upload to the image bucket
controller.putImage(part);
}
//res.send(sys.inspect(part));
}
}
form.parse(req, function(err, fields, files){
if(err){
res.json(err);
}else{
res.send(sys.inspect({fields:fields, files:files}), {'content-type':'text/plain'});
//controller.createWidget(res,fields,files);
}
});
controller.putDownload = function(part){
part.addListener('data', function(buffer){
knox.download.putStream(data,part.filename, function(err,s3res){
if(err)throwError(err);
else{
console.log(s3res);
}
});
})
knox.downloads.putStream(part, part.filename, function(err,s3res){
if(err)throwError(err);
else{
console.log(s3res);
}
});
}
But the data event only give me the buffer. So is it possible to capture the stream itself and push it to S3?
What you want to do is override the Form.onPart method:
IncomingForm.prototype.onPart = function(part) {
// this method can be overwritten by the user
this.handlePart(part);
};
Formidable's default behavior is to write the part to a file. You don't want that. You want to handle the 'part' events to write to the knox download. Start with this:
form.onPart = function(part) {
if (!part.filename) {
// let formidable handle all non-file parts
form.handlePart(part);
return;
}
Then open the knox request and handle the raw part events yourself:
part.on('data', function(data) {
req.write(data);
});
part.on('end', function() {
req.end();
});
part.on('error', function(err) {
// handle this too
});
As a bonus, if the req.write(data) return false that means the send buffer is full. You should pause the Formidable parser. When you get a drain event from the Knox stream you should resume Formidable.
Use multiparty instead. It supports this kind of streaming like you want. It even has an example of streaming directly to s3: https://github.com/superjoe30/node-multiparty/blob/master/examples/s3.js
In an Express middleware, I use formidable together with PassThrough to stream-upload a file to S3 (in my case, to Minio which is S3 compatible through Minio SDK; and I believe it works for AWS S3 too with the same Minio SDK)
Here is the sample code.
const formidable = require('formidable')
const { PassThrough } = require('stream')
const form = new formidable.IncomingForm()
const pass = new PassThrough()
const fileMeta = {}
form.onPart = part => {
if (!part.filename) {
form.handlePart(part)
return
}
fileMeta.name = part.filename
fileMeta.type = part.mime
part.on('data', function (buffer) {
pass.write(buffer)
})
part.on('end', function () {
pass.end()
})
}
form.parse(req, err => {
if (err) {
req.minio = { error: err }
next()
} else {
handlePostStream(req, next, fileMeta, pass)
}
})
And handlePostStream looks like below, for your reference:
const uuidv1 = require('uuid/v1')
const handlePostStream = async (req, next, fileMeta, fileStream) => {
let filename = uuidv1()
try {
const metaData = {
'content-type': fileMeta.type,
'file-name': Buffer.from(fileMeta.name).toString('base64')
}
const minioClient = /* Get Minio Client*/
await minioClient.putObject(MINIO_BUCKET, filename, fileStream, metaData)
req.minio = { post: { filename: `${filename}` } }
} catch (error) {
req.minio = { error }
}
next()
}
You can find the source code on GitHub, and its unit tests too.
There is no way for you to capture the stream, because the data has to be translated by Formidable. The buffer you're given is the file contents in chunks of buffer.length: this might be a problem because looking at Formidable's docs it appears that until the file is completely uploaded it can't reliably report the file size and Knox's put method might need that.
Never used Knox this way before, but you might have some luck with something like this:
controller.putDownload = function(part){
var req = knox.download.put(part.filename, {
'Content-Type': 'text/plain'
});
part.addListener('data', function(buffer){
req.write(buffer);
});
req.on('response', function(res){
// error checking
});
req.end();
}
A little unsure about the response checking bits, but....see if you can whip that into shape. Also, Streaming an octet stream from request to S3 with knox on node.js also has a writeup that may be useful to you.

Resources