Order of data piped/pumped through nodejs streams

Order of data piped/pumped through nodejs streams - node.js

I'm trying to write a nodejs code that read (audio) files and stream the content to a remote service (dialogflow). I'm having trouble ensuring the order of the chunks sent to the stream.
Most of the time everything seems to be in the right order, but once in a while, the data seems to be sent in an out-of-order fashion.
Pseudo code:
for (var i = 0; i < numFiles; ++i) {
await sendData(fs.createReadStream(filenames[i]), i);
}
...
async function sendData(inputDataStream, chunkIndex) {
await inputDataStream
.pipe(new Transform({
objectMode: true,
transform: (obj, _, next) => {
console.log('Sending chunk ' + chunkIndex);
next(null, <some data>);
}
}), {end: false})
.pipe(outputStream, {end: false});
}
I can see that 'Sending chunk ...' is printed out of order sometimes.
Q: is there a way to avoid this problem?
Another issue is that, while, most of the time, each chunk is sent contiguously, occasionally, some chunks will be split and sent in smaller sub-chunks (even though each file is not large).
[I repeated this experiment many times on the same set of files]
Q: Is there a way I can control the chunk size? (what did I do wrong here?)
Q: Is this because the remote service cannot handle the rate of transmission? If so, how should I properly react to that?
[I have also tried using pump(), but still observed the same behavior]
Thanks in advance.

For Dialogflow, I have used the following pump method, and it is working fine.
await pump(
fs.createReadStream(filename),
new Transform({
objectMode: true,
transform: (obj, _, next) => {
next(null, {inputAudio: obj});
},
}),
detectStream
);
}
Ref: link
I didn't face any issue with pump as of now.
Also, I have come around one more use case, In which a WebSocket connection is used to receive audio from a streaming endpoint and then use that audio for intent detection. (I have used this one with both Dialogflow ES and CX).
example for es:
function getDialogflowStream() {
let sessionClient = new dialogflow.SessionsClient();
let sessionPath = sessionClient.projectAgentSessionPath(
projectId,
sessionID,
);
// First Request
let initialStreamRequest = {
session: sessionPath,
queryInput: {
audioConfig: {
audioEncoding: encoding,
sampleRateHertz: sampleRateHertz,
languageCode: languageCode,
},
singleUtterance: true,
},
};
const detectStream = sessionClient
.streamingDetectIntent()
.on('error', error => {
console.error(error);
writeFlag = false;
detectStream.end();
})
.on('data', data => {
if (data.recognitionResult) {
console.log(
`Intermediate transcript: ${data.recognitionResult.transcript}`
);
} else {
console.log(
`Query results: ${data.queryResult}`
);
}
});
// Write the initial stream request to config for audio input.
detectStream.write(initialStreamRequest);
return detectStream;
}
const wss = new WebSocket.Server({
port,
handleProtocols: (protocols, req) => {
return 'dialogflow.stream';
}
});
wss.on('connection', (ws, req) => {
console.log(`received connection from ${req.connection.remoteAddress}`);
let dialogflowStreamer = getDialogflowStream();
ws.on('message', (message) => {
if (typeof message === 'string') {
console.log(`received message: ${message}`);
console.log(`UUID: ${calluuid}`);
} else if (message instanceof Buffer) {
// Transform message and write to detect
dialogflowStreamer.write({ inputAudio: message });
}
});
ws.on('close', (code, reason) => {
console.log(`socket closed ${code}:${reason}`);
dialogflowStreamer.end();
sessionID = uuid.v4();
});
});
One more thing make sure your sample rate and encoding in input configuration are the same as the audio file because I have faced issues when it's different.

Related

How to send the buffer correctly in node.js via response.write?

I am trying to send out the binary content from a node.js server. To do this, I allocate a buffer and fill up the buffer with my content, and call response.write() on it. Once it returns, I reuse the buffer with the new content. However it doesn't seem to work correctly for some reasons.
Here is the server code:
const http = require('http');
async function sendChunk( response, outbuf )
{
console.log( "Sending buffer: %s", outbuf );
// Send the buffer out. If it returns false,
// this means the kernel buffers are full,
// and we have to wait until they are available.
if ( await response.write( outbuf ) === false )
{
await new Promise(resolve => response.once('drain', ()=>{
resolve();
}));
}
}
async function sendData( response )
{
let outbuf = Buffer.alloc( 20 );
for ( let count = 0x45; count < 0x50; count++ )
{
for ( let i = 0; i < outbuf.length; i++ )
{
outbuf[i] = count;
}
await sendChunk( response, outbuf );
}
}
function webRequestHandler( request, response )
{
let body = [];
request.on('error', (err) => {
console.error(err);
return;
});
request.on('data', (chunk) => {
body.push(chunk);
});
response.on('error', (err) => {
console.error( "Error sending response: %s", err);
return;
});
// A whole body collected - process it
request.on('end', async () => {
// Handle the update; can return an error message
response.setHeader('Content-Type', 'text/plain');
await sendData( response );
response.end();
});
}
const webserver = http.createServer( webRequestHandler );
// Create the web service
webserver.on('error', function (err) {
console.log("[" + process.pid + "] " + JSON.stringify(err));
process.exit();
});
webserver.listen( { "host" : "127.0.0.1", "port" : 5252 }, () => {
console.log( "Server running" );
});
When tested via curl http://localhost:5252/ the server prints the following:
Sending buffer: EEEEEEEEEEEEEEEEEEEE
Sending buffer: FFFFFFFFFFFFFFFFFFFF
Sending buffer: GGGGGGGGGGGGGGGGGGGG
Sending buffer: HHHHHHHHHHHHHHHHHHHH
Sending buffer: IIIIIIIIIIIIIIIIIIII
Sending buffer: JJJJJJJJJJJJJJJJJJJJ
Sending buffer: KKKKKKKKKKKKKKKKKKKK
Sending buffer: LLLLLLLLLLLLLLLLLLLL
Sending buffer: MMMMMMMMMMMMMMMMMMMM
Sending buffer: NNNNNNNNNNNNNNNNNNNN
Sending buffer: OOOOOOOOOOOOOOOOOOOO
however the client receives something totally different:
> curl http://localhost:5252/
EEEEEEEEEEEEEEEEEEEEOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOO
What's going on here? It does work if I create a new buffer in sendChunk which is a copy of outbuf. However this seem like waste of RAM, and doesn't really make sense for someone coming from C background, and there once you call send() on a socket, the data has been copied and you can reuse the source buffer as you wish.
Does node.js work differently? Do I need to create a dedicated buffer for response.write() which can no longer be touched once write is called, even if write has returned and I waited for the drain event?

I've posted the bug report, and it got closed with an important comment:
You should be passing a callback to .write() to know when node is
finished with that chunk of memory instead of relying on the 'drain'
event.
Once you make that change, you will see the output on the client that
you're expecting.
And indeed once the sendChunk function is changed as following:
async function sendChunk( response, outbuf )
{
return new Promise( function( resolve, reject) {
if ( response.write( outbuf, ()=>{ resolve(); } ) === false )
{
console.log( "Draining buffer" );
response.once('drain', ()=>{
resolve();
});
}
});
}
so we resolve it only in the function callback, the issue goes away. The core issue here is that response.write is not awaitable, and returns before the callback is called.
Should have read the documentation more carefully.

How to 'pipe' oracle-db data from 'on data' event

I've been using node-oracledb for a few months and I've managed to achieve what I have needed to so far.
I'm currently working on a search app that could potentially return about 2m rows of data from a single call. To ensure I don't get a disconnect from the browser and the server, I thought I would try queryStream so that there is a constant flow of data back to the client.
I implemented the queryStream example as-is, and this worked fine for a few hundred thousand rows. However, when the returned rows is greater than one million, Node runs out of memory. By logging and watching both client and server log events, I can see that client is way behind the server in terms of rows sent and received. So, it looks like Node is falling over because it's buffering so much data.
It's worth noting that at this point, my selectstream implementation is within a req/res function called via Express.
To return the data, I do something like....
stream.on('data', function (data) {
rowcount++;
let obj = new myObjectConstructor(data);
res.write(JSON.stringify(obj.getJson());
});
I've been reading about how streams and pipe can help with flow, so what I'd like to be able to do is to be able to pipe the results from the query to a) help with flow and b) to be able to pipe the results to other functions before sending back to the client.
E.g.
function getData(req, res){
var stream = myQueryStream(connection, query);
stream
.pipe(toSomeOtherFunction)
.pipe(yetAnotherFunction)
.pipe(res);
}
I'm spent a few hours trying to find a solution or example that allows me to pipe results, but I'm stuck and need some help.
Apologies if I'm missing something obvious, but I'm still getting to grips with Node and especially streams.
Thanks in advance.

There's a bit of an impedance mismatch here. The queryStream API emits rows of JavaScript objects, but what you want to stream to the client is a JSON array. You basically have to add an open bracket to the beginning, a comma after each row, and a close bracket to the end.
I'll show you how to do this in a controller that uses the driver directly as you have done, instead of using separate database modules as I advocate in this series.
const oracledb = require('oracledb');
async function get(req, res, next) {
try {
const conn = await oracledb.getConnection();
const stream = await conn.queryStream('select * from employees', [], {outFormat: oracledb.OBJECT});
res.writeHead(200, {'Content-Type': 'application/json'});
res.write('[');
stream.on('data', (row) => {
res.write(JSON.stringify(row));
res.write(',');
});
stream.on('end', () => {
res.end(']');
});
stream.on('close', async () => {
try {
await conn.close();
} catch (err) {
console.log(err);
}
});
stream.on('error', async (err) => {
next(err);
try {
await conn.close();
} catch (err) {
console.log(err);
}
});
} catch (err) {
next(err);
}
}
module.exports.get = get;
Once you get the concepts, you can simplify things a bit with a reusable Transform class which allows you to use pipe in the controller logic:
const oracledb = require('oracledb');
const { Transform } = require('stream');
class ToJSONArray extends Transform {
constructor() {
super({objectMode: true});
this.push('[');
}
_transform (row, encoding, callback) {
if (this._prevRow) {
this.push(JSON.stringify(this._prevRow));
this.push(',');
}
this._prevRow = row;
callback(null);
}
_flush (done) {
if (this._prevRow) {
this.push(JSON.stringify(this._prevRow));
}
this.push(']');
delete this._prevRow;
done();
}
}
async function get(req, res, next) {
try {
const toJSONArray = new ToJSONArray();
const conn = await oracledb.getConnection();
const stream = await conn.queryStream('select * from employees', [], {outFormat: oracledb.OBJECT});
res.writeHead(200, {'Content-Type': 'application/json'});
stream.pipe(toJSONArray).pipe(res);
stream.on('close', async () => {
try {
await conn.close();
} catch (err) {
console.log(err);
}
});
stream.on('error', async (err) => {
next(err);
try {
await conn.close();
} catch (err) {
console.log(err);
}
});
} catch (err) {
next(err);
}
}
module.exports.get = get;

Rather than writing your own logic to create a JSON stream, you can use JSONStream to convert an object stream to (stringified) JSON, before piping it to its destination (res, process.stdout etc) This saves the need to muck around with .on('data',...) events.
In the example below, I've used pipeline from node's stream module rather than the .pipe method: the effect is similar (with better error handling I think). To get objects from oracledb.queryStream, you can specify option {outFormat: oracledb.OUT_FORMAT_OBJECT} (docs). Then you can make arbitrary modifications to the stream of objects produced. This can be done using a transform stream, made perhaps using through2-map, or if you need to drop or split rows, through2. Below the stream is sent to process.stdout after being stringified as JSON, but you could equally send to it express's res.
require('dotenv').config() // config from .env file
const JSONStream = require('JSONStream')
const oracledb = require('oracledb')
const { pipeline } = require('stream')
const map = require('through2-map') // see https://www.npmjs.com/package/through2-map
oracledb.getConnection({
user: process.env.DB_USER,
password: process.env.DB_PASSWORD,
connectString: process.env.CONNECT_STRING
}).then(connection => {
pipeline(
connection.queryStream(`
select dual.*,'test' as col1 from dual
union select dual.*, :someboundvalue as col1 from dual
`
,{"someboundvalue":"test5"} // binds
,{
prefetchRows: 150, // for tuning
fetchArraySize: 150, // for tuning
outFormat: oracledb.OUT_FORMAT_OBJECT
}
)
,map.obj((row,index) => {
row.arbitraryModification = index
return row
})
,JSONStream.stringify() // false gives ndjson
,process.stdout // or send to express's res
,(err) => { if(err) console.error(err) }
)
})
// [
// {"DUMMY":"X","COL1":"test","arbitraryModification":0}
// ,
// {"DUMMY":"X","COL1":"test5","arbitraryModification":1}
// ]

socket.io-stream write file to disc then when finished emit file path

I have a problem using socket.io-stream, when I send file from client to server. I push file path to mongodb and then save the file to a file-folder. The problem is that while disc is writing the file, the file path is returned to the client before the writing has complete and then the img element paht is correct but the image does not render on the page.
The code belowe works as expected but im manually waiting 100ms and then returning the path to the client. Which does not garantee if image is too big this 100ms will do the trick. Any other solutions ?
Server side:
ss(socket).on('fileUpload', function(stream, data) {
Chat.findByIdAndUpdate(data.roomId, {
$push: {
"messages": {
author: data.author,
date: Date.now(),
isFile: true,
extension: data.extension
}
}
},{safe: true, new: true, upsert: true}, (err, message) => {
if(err){
console.log(err);
}
let msg = message.messages[message.messages.length - 1];
let filename = path.join(__dirname, 'public/files' , `${msg._id}.${msg.extension}`);
stream.pipe(fs.createWriteStream(filename));
setTimeout(() => {
io.sockets.to(data.roomId).emit('chat-connection', msg);
},100);
});
});
Client Side:
function fileUpload(file) {
let fileExtension = file.name.split('.').pop();
let stream = ss.createStream();
// upload a file to the server.
ss(vm.socket).emit('fileUpload', stream,
{
size: file.size,
extension: fileExtension,
roomId:vm.chatInstance._id,
author:$scope.user.fullName,
});
ss.createBlobReadStream(file).pipe(stream);
}

You can check for the finish event on the stream you're writing to.
stream.on('finish', () => {
io.sockets.to(data.roomId).emit('chat-connection', msg);
});
stream.pipe(fs.createWriteStream(filename));

Nodejs: How to send a readable stream to the browser

If I query the box REST API and get back a readable stream, what is the best way to handle it? How do you send it to the browser?? (DISCLAIMER: I'm new to streams and buffers, so some of this code is pretty theoretical)
Can you pass the readStream in the response and let the browser handle it? Or do you have to stream the chunks into a buffer and then send the buffer??
export function getFileStream(req, res) {
const fileId = req.params.fileId;
console.log('fileId', fileId);
req.sdk.files.getReadStream(fileId, null, (err, stream) => {
if (err) {
console.log('error', err);
return res.status(500).send(err);
}
res.type('application/octet-stream');
console.log('stream', stream);
return res.status(200).send(stream);
});
}
Will ^^ work, or do you need to do something like:
export function downloadFile(req, res) {
const fileId = req.params.fileId;
console.log('fileId', fileId);
req.sdk.files.getReadStream(fileId, null, (err, stream) => {
if (err) {
console.log('error', err);
return res.status(500).send(err);
}
const buffers = [];
const document = new Buffer();
console.log('stream', stream);
stream.on('data', (chunk) => {
buffers.push(buffer);
})
.on('end', function(){
const finalBuffer = Buffer.concat(buffers);
return res.status(200).send(finalBuffer);
});
});
}

The first example would work if you changed you theoretical line to:
- return res.status(200).send(stream);
+ res.writeHead(200, {header: here})
+ stream.pipe(res);
That's the nicest thing about node stream. The other case would (in essence) work too, but it would accumulate lots of unnecessary memory.
If you'd like to check a working example, here's one I wrote based on scramjet, express and browserify:
https://github.com/MichalCz/scramjet/blob/master/samples/browser/browser.js
Where your streams go from the server to the browser. With minor mods it'll fit your problem.

Dead Lettered Message Not Being Consumed in RabbitMQ and Node Using AMQP.Node

I want to receive a message after a certain amount of time in one of my workers. I decided to go with Node and RabbitMQ after discovering so-called dead letter exchanges.
The message seems to get send to the queue in DeadExchange, but the consumer is never receiving the message after the elapsed time in the WorkQueue in the WorkExchange. Either the bindQueue is off, or the dead-letter'ing doesn't work?
I've tried a lot of different values now. Can someone please point out what I'm missing?
var amqp = require('amqplib');
var url = 'amqp://dev.rabbitmq.com';
amqp.connect(url).then(function(conn) {
//Subscribe to the WorkQueue in WorkExchange to which the "delayed" messages get dead-letter'ed (is that a verb?) to.
return conn.createChannel().then(function(ch) {
return ch.assertExchange('WorkExchange', 'direct').then(function() {
return ch.assertQueue('WorkQueue', {
autoDelete: false,
durable: true
})
}).then(function() {
return ch.bindQueue('WorkQueue', 'WorkExchange', '');
}).then(function() {
console.log('Waiting for consume.');
return ch.consume('WorkQueue', function(msg) {
console.log('Received message.');
console.log(msg.content.toString());
ch.ack(msg);
});
});
})
}).then(function() {
//Now send a test message to DeadExchange to a random (unique) queue.
return amqp.connect(url).then(function(conn) {
return conn.createChannel();
}).then(function(ch) {
return ch.assertExchange('DeadExchange', 'direct').then(function() {
return ch.assertQueue('', {
arguments: {
'x-dead-letter-exchange': 'WorkExchange',
'x-message-ttl': 2000,
'x-expires': 10000
}
})
}).then(function(ok) {
console.log('Sending delayed message');
return ch.sendToQueue(ok.queue, new Buffer(':)'));
});
})
}).then(null, function(error) {
console.log('error\'ed')
console.log(error);
console.log(error.stack);
});
I'm using amqp.node (https://github.com/squaremo/amqp.node) which is amqplib in npm. Although node-amqp (https://github.com/postwait/node-amqp) seems to be so much more popular, it doesn't implement the full protocol and there are quite some outstanding issues regarding reconnecting.
dev.rabbitmq.com is running RabbitMQ 3.1.3.

This is a working code.When a message spends more than ttl in DeadExchange, it is pushed to WorkExchange. The key to success is defining the right routing key. The exchange-queue to which you wish to send post ttl, should be bounded with a routing key(note: not default), and 'x-dead-letter-routing-key' attributes value should match that route-key.
var amqp = require('amqplib');
var url = 'amqp://localhost';
amqp.connect(url).then(function(conn) {
//Subscribe to the WorkQueue in WorkExchange to which the "delayed" messages get dead-letter'ed (is that a verb?) to.
return conn.createChannel().then(function(ch) {
return ch.assertExchange('WorkExchange', 'direct').then(function() {
return ch.assertQueue('WorkQueue', {
autoDelete: false,
durable: true
})
}).then(function() {
return ch.bindQueue('WorkQueue', 'WorkExchange', 'rk1');
}).then(function() {
console.log('Waiting for consume.');
return ch.consume('WorkQueue', function(msg) {
console.log('Received message.');
console.log(msg.content.toString());
ch.ack(msg);
});
});
})
}).then(function() {
//Now send a test message to DeadExchange to DEQ queue.
return amqp.connect(url).then(function(conn) {
return conn.createChannel();
}).then(function(ch) {
return ch.assertExchange('DeadExchange', 'direct').then(function() {
return ch.assertQueue('DEQ', {
arguments: {
'x-dead-letter-exchange': 'WorkExchange',
'x-dead-letter-routing-key': 'rk1',
'x-message-ttl': 15000,
'x-expires': 100000
}
})
}).then(function() {
return ch.bindQueue('DEQ', 'DeadExchange', '');
}).then(function() {
console.log('Sending delayed message');
return ch.publish('DeadExchange', '', new Buffer("Over the Hills and Far Away!"));
});
})
}).then(null, function(error) {
console.log('error\'ed')
console.log(error);
console.log(error.stack);
});

Here's an example using AMQP Connection Manager for Node. I noticed no examples seemed to match what we were doing in our code, so I made a repo with a simple example and one with retry counts via republishing back to the main exchange: https://github.com/PritchardAlexander/node-amqp-dead-letter-queue
Here's the simple example:
const amqp = require('amqp-connection-manager');
const username = encodeURIComponent('queue');
const password = encodeURIComponent('pass');
const port = '5672';
const host = 'localhost';
const connectionString = `amqp://${username}:${password}#${host}:${port}`;
// Ask the connection manager for a ChannelWrapper. Specify a setup function to
// run every time we reconnect to the broker.
connection = amqp.connect([connectionString]);
// A channel is your ongoing connection to RabbitMQ.
// All commands go through your channel.
connection.createChannel({
json: true,
setup: function (channel) {
channel.prefetch(100);
// Setup EXCHANGES - which are hubs you PUBLISH to that dispatch MESSAGES to QUEUES
return Promise.all([
channel.assertExchange('Test_MainExchange', 'topic', {
durable: false,
autoDelete: true,
noAck: false
}),
channel.assertExchange('Test_DeadLetterExchange', 'topic', {
durable: false,
autoDelete: true,
maxLength: 1000,
noAck: true // This means dead letter messages will not need an explicit acknowledgement or rejection
})
])
// Setup QUEUES - which are delegated MESSAGES by EXCHANGES.
// The MESSAGES then need to be CONSUMED.
.then(() => {
return Promise.all([
channel.assertQueue(
'Test_MainQueue',
options = {
durable: true,
autoDelete: true,
exclusive: false,
messageTtl: 1000*60*60*1,
deadLetterExchange: 'Test_DeadLetterExchange'
}
),
channel.assertQueue('Test_DeadLetterQueue',
options = {
durable: false,
autoDelete: true,
exclusive: false
}
)
]);
})
// This glues the QUEUES and EXCHANGES together
// The last parameter is a routing key. A hash/pound just means: give me all messages in the exchange.
.then(() => {
return Promise.all([
channel.bindQueue('Test_MainQueue', 'Test_MainExchange', '#'),
channel.bindQueue('Test_DeadLetterQueue', 'Test_DeadLetterExchange', '#')
]);
})
// Setup our CONSUMERS
// They pick MESSAGES off of QUEUES and do something with them (either ack or nack them)
.then(() => {
return Promise.all([
channel.consume('Test_MainQueue', (msg) => {
const stringifiedContent = msg.content ? msg.content.toString() : '{}';
console.log('Test_MainQueue::CONSUME ' + stringifiedContent);
const messageData = JSON.parse(stringifiedContent);
if (messageData.value === 0) {
console.log('Test_MainQueue::REJECT ' + stringifiedContent);
// the 'false' param at the very end means, don't retry! dead letter this instead!
return channel.nack(msg, true, false);
}
return channel.ack(msg);
})
]),
channel.consume('Test_DeadLetterQueue', (msg) => {
const stringifiedContent = msg.content ? msg.content.toString() : '{}';
console.log('');
console.log('Test_DeadLetterQueue::CONSUME ' + stringifiedContent);
console.log('');
});
})
.then(() => {
setInterval(function () {
const messageData = {
text: 'Dead letter if 0',
value: Math.floor(Math.random()*5)
};
const stringifiedMessage = JSON.stringify(messageData);
// Publish message to exchange
if (channel.publish('Test_MainExchange', '', new Buffer(stringifiedMessage))) {
console.log(`Sent ${stringifiedMessage}`);
} else {
console.log(`Failed to send ${stringifiedMessage}`);
};
}, 300);
});
}
});

There was a bug in Channel#assertQueue in AMQP.Node which just got fixed, see https://github.com/squaremo/amqp.node/commit/3749c66b448875d2df374e6a89946c0bdd0cb918. The fix is on GitHub but not in npm just yet.

Develop Reference

node.js excel linux python-3.x azure haskell apache-spark rust .htaccess string

Order of data piped/pumped through nodejs streams - node.js

Related

How to send the buffer correctly in node.js via response.write?

How to 'pipe' oracle-db data from 'on data' event

socket.io-stream write file to disc then when finished emit file path

Nodejs: How to send a readable stream to the browser

Dead Lettered Message Not Being Consumed in RabbitMQ and Node Using AMQP.Node

Categories

Resources