Stream from a mongodb cursor to Express response in node.js - node.js

I am toying around with all the fancy node.js/mongodb/express platforms, and stumbled across a problem:
app.get('/tag/:tag', function(req, res){
var tag=req.params.tag;
console.log('got tag ' + tag + '.');
catalog.byTag(tag,function(err,cursor) {
if(err) {
console.dir(err);
res.end(err);
} else {
res.writeHead(200, { 'Content-Type': 'application/json'});
//this crashes
cursor.stream().pipe(res);
}
});
});
As you probably guessed, catalog.byTag(tag, callback) does a find() query to Mongodb and returns the cursor
This leads to an error:
TypeError: first argument must be a string or Buffer
According to mongodb driver doc,
I tried to pass this converter to stream():
function(obj) {return JSON.stringify(obj);}
but that does not help.
Can anybody tell me how to correctly stream something to a response?
Or is the only solution a boilerplate to manually pump the data using the 'data' and 'end' events?

Use the cursor stream in combination with JSONStream to pipe it to your response object.
cursor.stream().pipe(JSONStream.stringify()).pipe(res);

A working combination of other answers here
app.get('/comments', (req, res) => {
Comment.find()
.cursor()
.pipe(JSONStream.stringify())
.pipe(res.type('json'))
})
http://mongoosejs.com/docs/api.html#query_Query-cursor
cursor() returns a Node streams3 compatible stream and is preferred over the deprecated query.stream() interface.
Piping to JSONStream.stringify() to combine documents into an array instead of single objects
Piping to res.type('json') which sets the HTTP Content-Type header to application/json and returns itself (the response stream) again.

Simple. .stream({transform: JSON.stringify});

Your mongo stream is dumping objects into the res stream which can only handle strings or buffers (hence the error).
Luckily, streams are easy to pipe together so its not too hard to make a transform stream to stringify your data.
in node v0.10.21:
var util = require('util')
var stream = require('stream')
var Transform = stream.Transform
util.inherits(Stringer, Transform)
function Stringer() {
Transform.call(this, { objectMode: true } )
// 'object mode allows us to consume one object at a time
}
Stringer.prototype._transform = function(chunk, encoding, cb) {
var pretty = JSON.stringify(chunk, null, 2)
this.push(pretty) // 'push' method sends data down the pike.
cb() // callback tells the incoming stream we're done processing
}
var ss = new Stringer()
db.createObjectStreamSomehow()
.pipe(ss)
.pipe(res)
hope that helps

Using mongoose and express:
function(req, res){
var stream = database.tracks.find({}).stream();
stream.on('data', function (doc) {
res.write(JSON.stringify(doc));
});
stream.on('end', function() {
res.end();
});
}

A better and recent solution for this problem:
const { JsonStreamStringify } = require("json-stream-stringify");
app.get("/", (req, res) => {
const cursor = db.collection("hello").find({}).limit(10).stream();
x = new JsonStreamStringify(cursor).pipe(res);
x.on("data", (doc) => {
res.write(doc);
});
});
The above solution is using https://www.npmjs.com/package/json-stream-stringify package.

Related

Mongoose Stream JSON Data

Hi I have the following code
var mq = DeviceData.find().lean().cursor();
mq.on('data', function(data) {
//code to write to a stream
}).on('end', function() {
//code to write to a stream
});
What should I put in the commented block? I think I should create a wrtie stream. The idea is to send a stream of data to the client.
Actually you can use directly the pipe method as following
var mq = DeviceData.find().lean().cursor().pipe(yourWritableStream);
or from my recipes
DeviceData.find({})
.lean()
.cursor()
.pipe(new Writable({
objectMode: true,
write(data, encoding, next) {
// do something with your data
// call next
}))
.on('finish', () => {
// you can do something when the stream ends
})

Nodejs: How to send a readable stream to the browser

If I query the box REST API and get back a readable stream, what is the best way to handle it? How do you send it to the browser?? (DISCLAIMER: I'm new to streams and buffers, so some of this code is pretty theoretical)
Can you pass the readStream in the response and let the browser handle it? Or do you have to stream the chunks into a buffer and then send the buffer??
export function getFileStream(req, res) {
const fileId = req.params.fileId;
console.log('fileId', fileId);
req.sdk.files.getReadStream(fileId, null, (err, stream) => {
if (err) {
console.log('error', err);
return res.status(500).send(err);
}
res.type('application/octet-stream');
console.log('stream', stream);
return res.status(200).send(stream);
});
}
Will ^^ work, or do you need to do something like:
export function downloadFile(req, res) {
const fileId = req.params.fileId;
console.log('fileId', fileId);
req.sdk.files.getReadStream(fileId, null, (err, stream) => {
if (err) {
console.log('error', err);
return res.status(500).send(err);
}
const buffers = [];
const document = new Buffer();
console.log('stream', stream);
stream.on('data', (chunk) => {
buffers.push(buffer);
})
.on('end', function(){
const finalBuffer = Buffer.concat(buffers);
return res.status(200).send(finalBuffer);
});
});
}
The first example would work if you changed you theoretical line to:
- return res.status(200).send(stream);
+ res.writeHead(200, {header: here})
+ stream.pipe(res);
That's the nicest thing about node stream. The other case would (in essence) work too, but it would accumulate lots of unnecessary memory.
If you'd like to check a working example, here's one I wrote based on scramjet, express and browserify:
https://github.com/MichalCz/scramjet/blob/master/samples/browser/browser.js
Where your streams go from the server to the browser. With minor mods it'll fit your problem.

Parsing piped response sometimes gets chunked data

I have a nodejs proxy for calling a service. On the response, the request is piped to the service url (I guess that's the right way to do it if you want to parse the response before returning it). The problem is that the parser sometimes fails on JSON.parse(data) because it Unexpected end of input. From what I saw while debugging the issue is that the data being parsed is not complete (even though the service returns it properly).
I don't have too much experience with pipe and stream so I'm not sure why this is failing sometimes.
//Request setup
r.on('response', function(resp) {
if (resp.statusCode === 200) {
r.pipe(responseParser(config.get('service:url'))).pipe(res);
} else {
r.pipe(res);
}
});
//Parser module
var _ = require('lodash'),
stream = require('stream');
module.exports = function responseParser(url) {
var data = '',
parser = new stream.Transform({
objectMode: true
});
parser._transform = function (chunk, encoding, done) {
data += chunk.toString();
done();
};
parser._flush = function (done) {
if (data) {
var obj = mapValues(JSON.parse(data));
this.push(JSON.stringify(obj));
}
done();
};
function mapValues(data){
...
}
return parser;
}
I still don't know why sometimes the flush gets called before all the chunks of data are returned but what I did in order to avoid that is just to parse the chunks as they arrived, by making sure that in a chunk I don't get partial data on the values I needed to map. If a chunk contains only partial information for the targeted value, I remove it, and add it at the beginning of the next chunk. This way the data is parsed as it comes in so I don't have to rely on the fact that flush is called only when all the data has returned.
I would disable objectMode as it's not necessary in this case. Also, you'll want to wrap the JSON parsing in a try-catch in case of malformed input:
module.exports = function responseParser(url) {
var data = '';
var parser = new stream.Transform();
parser._transform = function(chunk, encoding, done) {
data += chunk;
done();
};
parser._flush = function(done) {
var err;
if (data) {
try {
var obj = mapValues(JSON.parse(data));
this.push(JSON.stringify(obj));
this.push(null);
} catch (ex) {
err = ex;
}
}
done(err);
};
function mapValues(data){
// ...
}
return parser;
};
You may also want to check that resp.headers['content-type'] contains application/json first before trying to parse it as such and you may want to make a custom Transform subclass and instantiate that instead of creating new _transform() and _flush() functions every time.
Rather than writing this yourself, why don't you use a streaming JSON parser that knows how to parse a stream? JSONStream for example.
The other option to make your life easier would be to use stream-to-promise to just convert the read stream into a Promise that will resolve to a Buffer of the JSON, which you can then parse.
Also, why is your proxy parsing the JSON?

How to force Node.js Transform stream to finish?

Consider the following scenario. I have two Node Transform streams:
Transform stream 1
function T1(options) {
if (! (this instanceof T1)) {
return new T1(options);
}
Transform.call(this, options);
}
util.inherits(T1, Transform);
T1.prototype._transform = function(chunk, encoding, done) {
console.log("### Transforming in t1");
this.push(chunk);
done();
};
T1.prototype._flush = function(done) {
console.log("### Done in t1");
done();
};
Transform stream 2
function T2(options) {
if (! (this instanceof T2)) {
return new T2(options);
}
Transform.call(this, options);
}
util.inherits(T2, Transform);
T2.prototype._transform = function(chunk, encoding, done) {
console.log("### Transforming in t2");
this.push(chunk);
done();
};
T2.prototype._flush = function(done) {
console.log("### Done in t2");
done();
};
And, I'm wanting to apply these transform streams before returning a response. I have a simple HTTP server, and on each request, I fetch a resource and would like these transformations to be applied to this fetched resource and then send the result of the second transformation to the original response:
var options = require('url').parse('http://localhost:1234/data.json');
options.method = 'GET';
http.createServer(function(req, res) {
var req = http.request(options, function(httpRes) {
var t1 = new T1({});
var t2 = new T2({});
httpRes
.pipe(t1)
.pipe(t2)
.on('finish', function() {
// Do other stuff in here before sending request back
t2.pipe(res, { end : true });
});
});
req.end();
}).listen(3001);
Ultimately, the finish event never gets called, and the request hangs and times out because the response is never resolved. I've noticed that if I just pipe t2 into res, it seems to work fine:
.pipe(t1)
.pipe(t2)
.pipe(res, { end : true });
But, this scenario doesn't seem feasible because I need to do some extra work before returning the response.
This happens because you need to let node know that the stream is being consumed somewhere, otherwise the last stream will just fill up the buffer and considering your data is longer than the highwaterMark option (usually 16) and then halt waiting for the data to be consumed.
There are three ways of consuming a stream in full:
piping to a readable stream (what you did in the second part of your question)
reading consecutive chunks by calling the read method of a stream
listening on "data" events (essentially stream.on("data", someFunc)).
The last option is the quickest, but will result in consuming the stream without looking at memory usage.
I'd also note that using the "finish" event might be a little misleading, since it is called when the last data is read, but not necessarily emitted. On a Transform stream, since it's a readable as well it's much better to use the "end" event.

How can I convert uploaded file on-the-fly and stream it directly to amazon S3 within sails

I am working on an application that is sending a audio file to a server powered by sails.js.
I need to convert this audio file on-the-fly and send the converted data to amazon S3 using node streams.
I don't want to store data on the server but directly stream the upload file to S3 after it has been converted.
Do you know a way to do that?
I tried using formidable, but I couldn't get it working. Did anyone succeed implementing such a thing?
Thanks
EDIT
As jibsales noticed, it will probably be better if I show you a piece of what I've tried so far. So basically my strategy is to use formidable, fluent-ffmpeg and knox with streams.
I plan to receive the file stream with formidable and write chunks of received data in a first stream (stream 1) that will be the entry point for the conversion with fluent-ffmpeg. Then fluent-ffmpeg writes the output stream into stream2 which is the entry point of Knox.
The first problem I have to face, is the fact that formidable doesn't seem to work. However I'm not sure my strategy is good...
The code so far looks like this:
upload : function(req,res){
//to streams to transfer file data
var stream1 = new stream.Stream(); //stream for the incoming file data
var stream2 = new stream.Stream(); //stream for the converted file data
var client = knox.createClient({
key: 'APIKEY'
, secret: 'SECRET'
, bucket: 'bucket'
});
//Using formidable to acces data chunks
var form = new formidable.IncomingForm();
form.parse(req, function(err, fields, files){ //form.parse is not called
if(err){
return res.json(err);
}else{
return res.send('ok');
}
});
//overriding form.onPart to get the file data chunk
form.onPart = function(part) {
sails.log('getting part...');
if (!part.filename) {
form.handlePart(part);
return;
}
//we put the data chunk in stream1 to convert it
part.on('data', function(chunk) {
stream1.write(chunk[1]);
});
}
form.on('error',function(err){
return sails.log(err);
});
form.on('progress', function(bytesReceived, bytesExpected) {
sails.log(bytesReceived);
});
//conversion process
var proc = new ffmpeg({ source : stream1})
.withAudioCodec('libfdk_aac')
.toFormat('mp3')
.writeToStream(stream2, {end:true}, function(retcode, error){
console.log('file has been converted succesfully');
});
client.putStream(stream2, '/file.mp3', headers, function(err, response){
return res.send(response);
});
},
The reason formidable isn't working is that the default Sails body parser is parsing the request before formidable can get to it. In order to get this to work, you'll have to bypass the Sails body parser for multi-part form uploads. So, in config/express.js:
var express = require('sails/node_modules/express');
module.exports.express = {
bodyParser: function(options) {
return function (req, res, next) {
if (!req.headers['content-type'] || req.headers['content-type'].indexOf('multipart/form-data') === -1) {
return express.bodyParser()(req, res, next);
} else {
return next();
}
}
}
}
This just skips the body parser entirely if the content-type header includes multipart/form-data. Otherwise, it executes the default express body parser. Note that the default Sails body parser is slightly fancier than what comes with Express (if it can't parse the request, it fakes an application/json header and retries), so if you want the extra goodness you'll have to copy/paste the code from the core in to the bodyParser function above. But in most cases you won't miss it ;-)
We're working on a better file parser for Sails that will hopefully take care of some of this for you, but in the mean time this is your best bet!
I've figured out a way to convert files on-the-fly with fluent-ffmpeg and formidable. However it seems impossible for the moment to stream the converted chunks comming out of ffmpeg conversion directly to amazon as you must precise the "Content-Length" header which is unknown during the conversion...
For the first part (client upload) I first had to disable express bodyParser on the upload route in a config/express.js file :
var express = require('sails/node_modules/express');
module.exports.express = {
bodyParser: function() {
return function (req, res, next){
console.log(req.path);
if (!(req.path === '/upload' && req.method === 'POST')) {
return express.bodyParser()(req, res, next);
} else {
return next();
}
}
}
}
For the implementation I used a transform stream that does basically nothing. It just get the right parts of the uploaded data (the ones related to file data). It links formidable parser to fluent-ffmpeg. Then I can only save the converted file on the disk before sending it to amazon...
upload : function(req,res){
var Transform = Stream.Transform; //stream for the incoming file data
var client = knox.createClient({
key: 'KEY'
, secret: 'SECRET'
, bucket: 'BUCKET',
region : 'eu-west-1' //don't forget the region (My bucket is in Europe)
});
function InputStream(options)
{
if(!(this instanceof InputStream))
{
return new InputStream(options);
}
Transform.call(this,options);
return;
};
util.inherits(InputStream, Transform);
var inputDataStream = new InputStream;
var form = new formidable.IncomingForm();
form.parse(req, function(err, fields, files)
{
if(err){
return res.send(err);
}else{
return;
}
});
form.onPart = function(part)
{
if (!part.filename)
{
form.handlePart(part);
return;
}
//we put the data chunk in stream1 to convert it
part.on('data', function (chunk)
{
if(!inputDataStream.write(chunk));
form.pause()
inputDataStream.once('drain', function(){form.resume()});
});
part.on('end', function (chunk){
inputDataStream.end(chunk);
});
}
InputStream.prototype._transform = function (chunk, enc, cb)
{
this.push(chunk);
cb();
}
var proc = new ffmpeg({ source : inputDataStream})
.withAudioBitrate('64k')
.withAudioCodec('libmp3lame')
.toFormat('mp3')
.saveToFile('file.mp3', function (retcode, error){
console.log('file has been converted successfully');
res.send('ok');
var upload = new MultiPartUpload(
{
client : client,
objectName: 'file.mp3',
file: 'file.mp3'
}, function(err,body){
if(err) {
console.log(err);
return;
}
console.log(body);
return;
});
});
},
EDIT
Using knox-mpu you can actually stream data to amazon s3 directly ! You just have to create another transform stream that will be the source of your upload and knox-mpu do the magic. Thanks you all!

Resources