streams with percentage complete - node.js

I need to stream a file in base64 to an http endpoint using something like request or superagent. What is the best way to figure out what percentage of the file has been uploaded?
I assume I can create the read stream using something like:
fs.createReadStream('/tmp/cats.jpg', {encoding: 'base64'})
Any examples using one out of above libraries would be greatly appreciated.

I think you can use progress-stream.
Here is an example from the package:
var progress = require('progress-stream');
var fs = require('fs');
var stat = fs.statSync(filename);
var str = progress({
length: stat.size,
time: 100 /* ms */
});
str.on('progress', function(progress) {
console.log(progress);
/*
{
percentage: 9.05,
transferred: 949624,
length: 10485760,
remaining: 9536136,
eta: 42,
runtime: 3,
delta: 295396,
speed: 949624
}
*/
});
fs.createReadStream(filename)
.pipe(str)
.pipe(fs.createWriteStream(output));

I was looking for an answer to a similar issue and thanks to Alberto Zaccagni's answer, I was able to get some code working.
So for the people who don't want to piece the puzzle themselves, here is the code (edited for Stackoverflow):
var zipfile = "my_large_archive.zip";
// Get the size of the file
fs.stat(zipfile, function (err, stats) {
var zipSize = stats.size;
var uploadedSize = 0; // Incremented by on('data') to keep track of the amount of data we've uploaded
// Create a new read stream so we can plug events on it, and get the upload progress
var zipReadStream = fs.createReadStream(zipfile);
zipReadStream.on('data', function(buffer) {
var segmentLength = buffer.length;
// Increment the uploaded data counter
uploadedSize += segmentLength;
// Display the upload percentage
console.log("Progress:\t",((uploadedSize/zipSize*100).toFixed(2)+"%"));
});
// Some other events you might want for your code
zipReadStream.on('end', function() {
console.log("Event: end");
});
zipReadStream.on('close', function() {
console.log("Event: close");
});
var formData = require('form-data');
var form = new formData();
form.append('apikey', 'f4sd5f4sdf6ds456'); // Just some post parameters I need to send to the upload endpoint
form.append('file', zipReadStream); // The zip file, passed as a fs.createReadStream instance
// Submit the form and the file
form.submit('http://www.someserver.com/upload', function(err, res) {
if (err) {
console.log("Oups! We encountered an error :(\n\n", err);
return false;
}
console.log("Your file has been uploaded.");
res.resume(); // Fix is you use that code for a CLI, so that the execution will stop and let users enter new commands
});
});

In nodejs we have the Readable stream, it emits the data event when it receives a chunk of data, by knowing the file size you could easily keep track of how much data passes through the data event receiver and then update the percentage.
Get the file dimension with
require('fs').watchFile('yourfile', function () {
fs.stat('yourfile', function (err, stats) {
console.log(stats.size);
});
});

Related

NodeJS - How can I stream a response using an in memory DB?

How can I stream a response using an in memory DB?
I'm using Loki JS as an in memory DB. There is a particular resource where I must return the entire contents of a table (cannot be paginated) and that table can grow to 500,000 items or so, which is about 300mb.
In other cases, I have used fs.createReadStream to get a file and stream it back to the user:
fs.createReadStream('zips.json')
.on('data', function() {
res.write(...)
})
.on('end', function() {
res.end();
})
This has worked great for large files, but how can I do something equivalent using an in memory DB?
const items = lokiDb.addCollection('items');
items.insert('a bunch of items ...');
// I would now like to stream items via res.write
res.write(items)
Currently, res.write(items) will cause memory problems as Node is trying to return the entire response at once.
As far as I can tell, there is no native stream provider in Loki, though I may have missed it. What you may want to do instead is listen to the 'insert' event on the collection and write that, like so:
const items = lokiDb.addCollection('items');
items.on('insert', (results) => {
res.write(results);
});
items.insert('a bunch of items ...');
If I'm correct, basically your problem is that readStreams only read from files, and that you want to read from an in-memory data structure. A solution might be to define your own readStream class, slightly modifying the prototype stream.Readable._read method:
var util = require('util');
var stream = require('stream');
"use strict";
var begin=0, end=0;
var options = {
highWaterMark: 16384,
encoding: null,
objectMode: false
};
util.inherits(InMemoryStream, stream.Readable);
function InMemoryStream(userDefinedOptions, resource){
if (userDefinedOptions){
for (var key in userDefinedOptions){
options.key = userDefinedOptions[key];
}
}
this.resource = resource;
stream.Readable.call(this, options);
}
InMemoryStream.prototype._read = function(size){
end += size;
this.push(this.resource.slice(begin, end));
begin += size;
}
exports.InMemoryStream = InMemoryStream;
exports.readStream = function(UserDefinedOptions, resource){
return new InMemoryStream(UserDefinedOptions, resource);
}
You convert your in-memory datastructure (in the following example an array) to a readStream, and pipe this through to a writeStream, as follows:
"use strict";
var fs = require('fs');
var InMemoryStream = require('/home/regular/javascript/poc/inmemorystream.js');
var stored=[], writestream, config={};
config = {
encoding: null,
fileToRead: 'raphael.js',
fileToWrite: 'secondraphael.js'
}
fs.readFile(config.fileToRead, function(err, data){
if (err) return console.log('Error when opening file', err);
stored = data;
var inMemoryStream = InMemoryStream.readStream({encoding: config.encoding}, stored);
writestream = fs.createWriteStream(config.fileToWrite);
inMemoryStream.pipe(writestream);
inMemoryStream.on('error', function(err){
console.log('in memory stream error', err);
});
});

Piping transformed read stream into request.post()

I'd like parse a log file and POST what is read to a request endpoint. I've managed to build a solution that generates a request for every log line read. However, it doesn't create any back pressure so it just flogs the server and I'd like to slow it down.
This lead me to investigate using stream pipes to see if I could route data from a file directly into request.post(). I can't get the post call to post a body object though.
var stream = require('stream');
var request = require('request');
var liner = new stream.Transform( { objectMode: true } );
liner._transform = function (chunk, encoding, done) {
var data = chunk.toString()
if (this._lastLineData) data = this._lastLineData + data
var lines = data.split('\n')
this._lastLineData = lines.splice(lines.length-1,1)[0]
var that = this;
lines.forEach(function(line) {
var line_obj = JSON.parse(line);
if( line_obj.url === "/api/usages" && line_obj.method === 'POST' ) {
var req_body = line_obj.body.body;
that.push.bind(req_body);
}
});
done();
}
var file_name = process.argv[2];
console.log('Reading from ' + file_name);
var fs = require('fs')
var liner = require('./liner')
var source = fs.createReadStream(file_name)
source.pipe(liner).pipe(request
.post("http://localhost:8081/api/usages")
.on('response', function(response) {
console.log(response.statusCode) // 200
})
.on('error', function(err) {
console.log(err);
}));
The push call in the transform function is working correctly, but it's not posting that object via the body in request.post().
What am I missing?
Will this provide the back pressure I'm looking for to throttle the POST calls before all of the file reads are completed?
I've discovered that you cannot pipe a stream to an HTTP request because you would need the Content-Length known before hand (as per spec). The less pleasant alternative is to multipart the upload - as chunks are read from your transform they would marshal parts to the receiving API. This also means the receiving API needs to be able to receive multipart uploads and reassemble the whole file after all parts have been received and confirmed. AWS S3 has multipart uploads and it might be a nice example: http://docs.aws.amazon.com/AmazonS3/latest/dev/mpuoverview.html
I wanted to pipe my transform data to another API that I manage but it seems the effort is not likely worth it considering my files really aren't that big. I'll update this answer if I change my mind :)
Although I wasn't able to find a solution to the streaming question, I found a simple solution to the back pressure question.
I used async.queue to push work into a simple task queue.
// build the send queue
var pool = new http.Agent({keepAlive: true, keepAliveMsecs: 10000, maxSockets: Math.floor(send_queue_concurrency*1.5)});
var q = async.queue(function(task, callback){
request({
url : 'http://localhost:8081/xxxxxx',
method : 'POST',
json : task.req_body,
gzip : true,
pool : pool,
timeout: 30000
}, function(error, response, body){
if(error) {
console.log('request error : ' + error);
post_status.fail++;
} else {
if( response.statusCode === 400 ) {
console.dir(body);
}
}
callback();
});
}, send_queue_concurrency);
q.drain = done;
send_queue_concurrency is the primary lever for controlling request pressure.
i'm pushing work into the queue with a file parsing routine :
rl.on('line', function(line) {
line_count++;
try {
var line_object = JSON.parse(line);
var req_body = line_object.body.body;
q.push({req_body:req_body, line_object:line_object}, function(err){
if (err){
console.log('queue error! '+JSON.stringify(err));
}
});
} catch( e ) {
console.dir(e);
}
});
var done = function() {
// print out some reporting stats...
// console.log('xxxxxx');
console.log('\ndone.');
process.exit(0);
};

How to clear a readable stream while uploading image in node.js

I am trying to upload multiple images with the help of multiparty module. I want to upload only a particular kind of images, say whose names are 'image.jpg'. But it's not working when the image does not meet the criteria and I don't get any response. Here is my code.
req.form.on('part', function(part) {
if (part.filename === 'image.jpg') {
var out = fs.createWriteStream('image/' + part.filename);
part.pipe(out);
} else {
//Here i want to clear the stream for the next 'part'
}
});
req.form.on('close', function() {
res.send('uploaded!');
});
I think I'm not able to clear the readable stream which contains 'part'. I can write that stream and then delete, it works then. But, I don't want to write any image on my file system if it doesn't meet the criteria. How can I achieve that?
To complete robertklep's answer, here is another possibility using a custom Writable stream that will blackhole data. When you pipe() the part stream to the blackhole, it will exhaust its source. This just keeps your code consistent by using streams everywhere instead of using raw read() function calls.
req.form.on('part', function(part) {
var out;
if (part.filename === 'image.jpg') {
out = fs.createWriteStream('image/' + part.filename);
} else {
out = new stream.Writable();
out._write = function (chunk, encoding, done) {
done(); // Don't do anything with the data
};
}
part.pipe(out);
});
Not entirely sure if this is going to work, but I think you can read the entire part stream until it's exhausted:
req.form.on('part', function(part) {
if (part.filename === 'image.jpg') {
var out = fs.createWriteStream('image/' + part.filename);
part.pipe(out);
} else {
while (part.read() !== null);
}
});

Convert stream into buffer?

How to convert stream into buffer in nodejs? Here is my code to parse a file in post request in express.
app.post('/upload', express.multipart({
defer: true
}), function(req, res) {
req.form.on('part', function(part) {
//Here I want to convert the streaming part into a buffer.
//do something buffer-specific task
var out = fs.createWriteStream('image/' + part.filename);
part.pipe(out);
});
req.form.on('close', function() {
res.send('uploaded!');
});
});
Instead of piping, you can attach readable and end event handlers to the part stream to read it:
var buffers = [];
part.on('readable', function(buffer) {
for (;;) {
let buffer = part.read();
if (!buffer) { break; }
buffers.push(buffer);
}
});
part.on('end', function() {
var buffer = Buffer.concat(buffers);
...do your stuff...
// write to file:
fs.writeFile('image/' + part.filename, buffer, function(err) {
// handle error, return response, etc...
});
});
Note: If you instead use data, it will read the entire upload into memory.
You could also create a custom transform stream to transform the incoming data, but that might not be trivial.
You can use the stream-to module, which can convert a readable stream's data into an array or a buffer:
var streamTo = require('stream-to');
req.form.on('part', function (part) {
streamTo.buffer(part, function (err, buffer) {
// Insert your business logic here
});
});
If you want a better understanding of what's happening behind the scenes, you can implement the logic yourself, using a Writable stream. As a writable stream implementor, you only have to define one function: the _write method, that will be called every time some data is written to the stream. When the input stream is finished emitting data, the end event will be emitted: we'll then create a buffer using the Buffer.concat method.
var stream = require('stream');
var converter = new stream.Writable();
// We'll store all the data inside this array
converter.data = [];
converter._write = function (chunk) {
converter.data.push(chunk);
};
// Will be emitted when the input stream has ended,
// i.e. no more data will be provided
converter.on('finish', function() {
// Create a buffer from all the received chunks
var b = Buffer.concat(this.data);
// Insert your business logic here
});

Save a image using nodejs, expressjs and socket.io

I've tried to save a image to a specified directory with node.js using express.js and socket.io but it doesnt work.
On the client-side:
var reader = new FileReader();
function drop(e) {
e.stopPropagation();
e.preventDefault();
var dt = e.dataTransfer;
var files = dt.files;
jQuery.each(files, function(){
reader.onload = function(e) {
socket.emit('sendfile', e.target.result);
};
});
return false;
}
The image should be uploaded by a drag and drop function.
Then on the server-side:
io.sockets.on('connection', function (socket) {
[...]
socket.on('sendfile', function (data) {
var fs = require('fs');
app.use(express.bodyParser({ keepExtensions: true, uploadDir: '/uploaded' }));
io.sockets.emit('updatechat', socket.username, data); //test
});
I have also tried
socket.on('sendfile', function (data) {
var fs = require('fs');
fs.writeFile('/uploaded/test.png', data, "binary" , function (err) {
if (err) throw err;
console.log('It\'s saved!');
});
io.sockets.emit('updatechat', socket.username, data); //data test
});
but it doesnt saved anything.
The "data test" shows me, that the data are already were arrived on the server, so I don't think, that the problem comes from the client-side, but on the server-side I have no idea what I doing wrong
I made a simple example to illustrate the usage of file upload via socket!
The steps following are:
Create the send-file socket.io event to receive the file on app.js. This file received is a binary one;
In the jade/HTML page put an input file and a button to send it. NOTE: you don't have to use multipart to send a post with multipart content, we are sending socket files not a TCP request/response;
Initialize HTML5 File API support and prepare the listeners to watching out your file input component;
The rest of remaining routines to read the file and sent it content forward.
Now first step (app.js):
var io = require('socket.io').listen(8000, {log: false});
io.sockets.on('connection', function(socket) {
socket.on('send-file', function(name, buffer) {
var fs = require('fs');
//path to store uploaded files (NOTE: presumed you have created the folders)
var fileName = __dirname + '/tmp/uploads/' + name;
fs.open(fileName, 'a', 0755, function(err, fd) {
if (err) throw err;
fs.write(fd, buffer, null, 'Binary', function(err, written, buff) {
fs.close(fd, function() {
console.log('File saved successful!');
});
})
});
});
});
Second step (in my case I've used jade rather html)
extends layout
block content
h1 Tiny Uploader
p Save an Image to the Server
input#input-files(type='file', name='files[]', data-url='/upload', multiple)
button#send-file(onclick='javascript:sendFile();') Send
script(src='http://127.0.0.1:8000/socket.io/socket.io.js')
script(src='/javascripts/uploader.js')
Third and Fourth steps (coding uploader.js to send the file to server)
//variable declaration
var filesUpload = null;
var file = null;
var socket = io.connect('http://localhost:8000');
var send = false;
if (window.File && window.FileReader && window.FileList) {
//HTML5 File API ready
init();
} else {
//browser has no support for HTML5 File API
//send a error message or something like that
//TODO
}
/**
* Initialize the listeners and send the file if have.
*/
function init() {
filesUpload = document.getElementById('input-files');
filesUpload.addEventListener('change', fileHandler, false);
}
/**
* Handle the file change event to send it content.
* #param e
*/
function fileHandler(e) {
var files = e.target.files || e.dataTransfer.files;
if (files) {
//send only the first one
file = files[0];
}
}
function sendFile() {
if (file) {
//read the file content and prepare to send it
var reader = new FileReader();
reader.onload = function(e) {
console.log('Sending file...');
//get all content
var buffer = e.target.result;
//send the content via socket
socket.emit('send-file', file.name, buffer);
};
reader.readAsBinaryString(file);
}
}
Some important considerations:
This is a tiny sample of socket file uploader. I don't consider some important things here: file chunks to send piece of files instead of all content in a row; Update the status of file sent as (error msg, successful msg, progress bar or percent stage, etc.). So this is a sample to initial steps to coding your own file uploader. In this case, we don't need a form to send files, its is completely asynchronous transaction via socket.io.
I hope this post is helpful.
This tutorial goes a little bit further because you can pause/resume your upload but you will find how to upload a file through socketio :)

Resources