How to 'pipe' oracle-db data from 'on data' event - node.js

I've been using node-oracledb for a few months and I've managed to achieve what I have needed to so far.
I'm currently working on a search app that could potentially return about 2m rows of data from a single call. To ensure I don't get a disconnect from the browser and the server, I thought I would try queryStream so that there is a constant flow of data back to the client.
I implemented the queryStream example as-is, and this worked fine for a few hundred thousand rows. However, when the returned rows is greater than one million, Node runs out of memory. By logging and watching both client and server log events, I can see that client is way behind the server in terms of rows sent and received. So, it looks like Node is falling over because it's buffering so much data.
It's worth noting that at this point, my selectstream implementation is within a req/res function called via Express.
To return the data, I do something like....
stream.on('data', function (data) {
rowcount++;
let obj = new myObjectConstructor(data);
res.write(JSON.stringify(obj.getJson());
});
I've been reading about how streams and pipe can help with flow, so what I'd like to be able to do is to be able to pipe the results from the query to a) help with flow and b) to be able to pipe the results to other functions before sending back to the client.
E.g.
function getData(req, res){
var stream = myQueryStream(connection, query);
stream
.pipe(toSomeOtherFunction)
.pipe(yetAnotherFunction)
.pipe(res);
}
I'm spent a few hours trying to find a solution or example that allows me to pipe results, but I'm stuck and need some help.
Apologies if I'm missing something obvious, but I'm still getting to grips with Node and especially streams.
Thanks in advance.

There's a bit of an impedance mismatch here. The queryStream API emits rows of JavaScript objects, but what you want to stream to the client is a JSON array. You basically have to add an open bracket to the beginning, a comma after each row, and a close bracket to the end.
I'll show you how to do this in a controller that uses the driver directly as you have done, instead of using separate database modules as I advocate in this series.
const oracledb = require('oracledb');
async function get(req, res, next) {
try {
const conn = await oracledb.getConnection();
const stream = await conn.queryStream('select * from employees', [], {outFormat: oracledb.OBJECT});
res.writeHead(200, {'Content-Type': 'application/json'});
res.write('[');
stream.on('data', (row) => {
res.write(JSON.stringify(row));
res.write(',');
});
stream.on('end', () => {
res.end(']');
});
stream.on('close', async () => {
try {
await conn.close();
} catch (err) {
console.log(err);
}
});
stream.on('error', async (err) => {
next(err);
try {
await conn.close();
} catch (err) {
console.log(err);
}
});
} catch (err) {
next(err);
}
}
module.exports.get = get;
Once you get the concepts, you can simplify things a bit with a reusable Transform class which allows you to use pipe in the controller logic:
const oracledb = require('oracledb');
const { Transform } = require('stream');
class ToJSONArray extends Transform {
constructor() {
super({objectMode: true});
this.push('[');
}
_transform (row, encoding, callback) {
if (this._prevRow) {
this.push(JSON.stringify(this._prevRow));
this.push(',');
}
this._prevRow = row;
callback(null);
}
_flush (done) {
if (this._prevRow) {
this.push(JSON.stringify(this._prevRow));
}
this.push(']');
delete this._prevRow;
done();
}
}
async function get(req, res, next) {
try {
const toJSONArray = new ToJSONArray();
const conn = await oracledb.getConnection();
const stream = await conn.queryStream('select * from employees', [], {outFormat: oracledb.OBJECT});
res.writeHead(200, {'Content-Type': 'application/json'});
stream.pipe(toJSONArray).pipe(res);
stream.on('close', async () => {
try {
await conn.close();
} catch (err) {
console.log(err);
}
});
stream.on('error', async (err) => {
next(err);
try {
await conn.close();
} catch (err) {
console.log(err);
}
});
} catch (err) {
next(err);
}
}
module.exports.get = get;

Rather than writing your own logic to create a JSON stream, you can use JSONStream to convert an object stream to (stringified) JSON, before piping it to its destination (res, process.stdout etc) This saves the need to muck around with .on('data',...) events.
In the example below, I've used pipeline from node's stream module rather than the .pipe method: the effect is similar (with better error handling I think). To get objects from oracledb.queryStream, you can specify option {outFormat: oracledb.OUT_FORMAT_OBJECT} (docs). Then you can make arbitrary modifications to the stream of objects produced. This can be done using a transform stream, made perhaps using through2-map, or if you need to drop or split rows, through2. Below the stream is sent to process.stdout after being stringified as JSON, but you could equally send to it express's res.
require('dotenv').config() // config from .env file
const JSONStream = require('JSONStream')
const oracledb = require('oracledb')
const { pipeline } = require('stream')
const map = require('through2-map') // see https://www.npmjs.com/package/through2-map
oracledb.getConnection({
user: process.env.DB_USER,
password: process.env.DB_PASSWORD,
connectString: process.env.CONNECT_STRING
}).then(connection => {
pipeline(
connection.queryStream(`
select dual.*,'test' as col1 from dual
union select dual.*, :someboundvalue as col1 from dual
`
,{"someboundvalue":"test5"} // binds
,{
prefetchRows: 150, // for tuning
fetchArraySize: 150, // for tuning
outFormat: oracledb.OUT_FORMAT_OBJECT
}
)
,map.obj((row,index) => {
row.arbitraryModification = index
return row
})
,JSONStream.stringify() // false gives ndjson
,process.stdout // or send to express's res
,(err) => { if(err) console.error(err) }
)
})
// [
// {"DUMMY":"X","COL1":"test","arbitraryModification":0}
// ,
// {"DUMMY":"X","COL1":"test5","arbitraryModification":1}
// ]

Related

NodeJS MSSQL results to JSON array

I am trying to create an array of JSON objects from an SQL Server query using NodeJS but using just JSON.Stringify on each row I get the results I am looking for as all individual JSON objects but not in an array. Ideally I am just trying to write these results to a file as a JSON array. Any ideas appreciated.
Tried the following
const sql=require('mssql');
const fs=require('fs');
const config = {
// Creds removed
},
};
sql.connect(config, err => {
console.log(err);
const request = new sql.Request()
request.stream = true // You can set streaming differently for each request
request.query(fs.readFileSync('./new-query.sql').toString()); // or request.execute(procedure)
request.on('row', row => {
console.log(JSON.stringify(row));
})
request.on('error', err => {
console.log(err);
// May be emitted multiple times
})
request.on('done', result => {
// console.log('done emitted', result);
sql.close();
})
})
As always, reading the library notes in more detail led me to a solution. Using the MSSQL library you can actually work on the entire returned recordset as opposed to every column. Using this code works as I was wanting it to.
const sql = require('mssql')
const fs=require('fs');
const config = {
// cred removed
};
sql.connect(config).then(() => {
return sql.query(fs.readFileSync('./new-query.sql').toString())
}).then(result => {
console.log(JSON.stringify(result.recordsets[0]))
}).catch(err => {
// ... error checks
})
sql.on('error', err => {
// ... error handler
})

MongoError: pool destroyed when fetching all data without conditions

I am new to mongoDb, as I am trying to query from different collection and in order to do that, when I am fetching data from category collection I mean when I am running select * from collection it is throwing error, MongoError: pool destroyed.
As per my understanding it is because of some find({}) is creating a pool and that is being destroyed.
The code which I am using inside model is below,
const MongoClient = require('mongodb').MongoClient;
const dbConfig = require('../configurations/database.config.js');
export const getAllCategoriesApi = (req, res, next) => {
return new Promise((resolve, reject ) => {
let finalCategory = []
const client = new MongoClient(dbConfig.url, { useNewUrlParser: true });
client.connect(err => {
const collection = client.db(dbConfig.db).collection("categories");
debugger
if (err) throw err;
let query = { CAT_PARENT: { $eq: '0' } };
collection.find(query).toArray(function(err, data) {
if(err) return next(err);
finalCategory.push(data);
resolve(finalCategory);
// db.close();
});
client.close();
});
});
}
When my finding here is when I am using
let query = { CAT_PARENT: { $eq: '0' } };
collection.find(query).toArray(function(err, data) {})
When I am using find(query) it is returning data but with {} or $gte/gt it is throwing Pool error.
The code which I have written in controller is below,
import { getAllCategoriesListApi } from '../models/fetchAllCategory';
const redis = require("redis");
const client = redis.createClient(process.env.REDIS_PORT);
export const getAllCategoriesListData = (req, res, next, query) => {
// Try fetching the result from Redis first in case we have it cached
return client.get(`allstorescategory:${query}`, (err, result) => {
// If that key exist in Redis store
if (false) {
res.send(result)
} else {
// Key does not exist in Redis store
getAllCategoriesListApi(req, res, next).then( function ( data ) {
const responseJSON = data;
// Save the Wikipedia API response in Redis store
client.setex(`allstorescategory:${query}`, 3600, JSON.stringify({ source: 'Redis Cache', responseJSON }));
res.send(responseJSON)
}).catch(function (err) {
console.log(err)
})
}
});
}
Can any one tell me what mistake I am doing here. How I can fix pool issue.
Thanking you in advance.
I assume that toArray is asynchronous (i.e. it invokes the callback passed in as results become available, i.e. read from the network).
If this is true the client.close(); call is going to get executed prior to results having been read, hence likely yielding your error.
The close call needs to be done after you have finished iterating the results.
Separately from this, you should probably not be creating the client instance in the request handler like this. Client instances are expensive to create (they must talk to all of the servers in the deployment before they can actually perform queries) and generally should be created per running process rather than per request.

page renders before getting all the values sorted

I think the rendering takes place before the searching of the string on the files, i have tried different methods but don't seems to get this working. any help will be appreciated. im a noob on to the nodejs. im trying to get the id of the user and query and get all the data and there after see if he is in any of the lists given and finally render the page.
const j = [];
let name = '';
const filename = [];
var ext = '';
module.exports = function(app, express) {
app.use(bodyParser.urlencoded({ extended: false }));
app.use(bodyParser.json());
app.post('/cusdetails', isLoggedIn, function (req, res) {
var cusid=req.body.cusid;
var insertQuerys = "SELECT * FROM customer WHERE cusid=? ORDER BY rowid DESC LIMIT 1";
connection.query(insertQuerys,[cusid],
function(err, rows){
rows.forEach( (row) => {
name=row.fncus;
});
fs.readdir('./views/iplist', function(err, files) {
if (err)
throw err;
for (var index in files) {
j.push(files[index])
}
j.forEach(function(value) {
var k = require('path').resolve(__dirname, '../views/iplist/',value);
fs.exists(k, function(fileok){
if(fileok) {
fs.readFile(k, function(err, content) {
if (err) throw err;
if (content.indexOf(name) > -1) {
ext = path.extname(k);
filename.push(path.basename(k, ext));
}
});
}
else {
console.log(" FileNotExist ");
}
});
});
});
console.log(filename);
res.render('cusdetails.ejs', {rows: rows, user:req.user , aml: filename });
});
})
You can create simple Promise wrapper and then use it inside async/await function to pause execution until resolved.
// use mysql2 package as it provides promise, less work to write promise wrappers
const mysql = require('mysql2/promise');
// create the connection to database
const connection = mysql.createConnection({
host: 'localhost',
user: 'root',
database: 'test'
});
// sample wrapper
function some(k) {
// more advisable to have local variables, why do you need this to be array?
var filename = [];
return new Promise((resolve, reject) => {
// doing this is also not recommended check nodejs documentation **fs.exists** for more info
fs.exists(k, function(fileok){
if(fileok) {
fs.readFile(k, function(err, content) {
if (err) reject(err);
if (content.indexOf(name) > -1) {
ext = path.extname(k);
filename.push(path.basename(k, ext));
resolve(filename)
}
});
}
else {
// reject(new Error("FileNotExist"))
console.log(" FileNotExist ");
}
});
})
}
// note the use of async
app.post('/cusdetails', isLoggedIn, async function (req, res) {
var cusid=req.body.cusid;
var insertQuerys = "SELECT * FROM customer WHERE cusid=? ORDER BY rowid DESC LIMIT 1";
// using await to pause excution, waits till query is finished
const [rows] = await connection.query(insertQuerys,[cusid])
rows.forEach( (row) => {
name=row.fncus;
});
// then you can
var result = await some(k)
...
Note however this way you loose the advantage of concurrent execution, as it's kindoff blocking. If the result of one call is not used in another, you can execute in parallel and await for result to achieve sequencing like
const [rows] = connection.query(insertQuerys,[cusid])
var result = some(k)
console.log(await rows) // do something
console.log(await result) // do something
JavaScript is asynchronous. This means that if you have a function with a callback (i.e. your query), the callback will be called asynchronously, at an unknown time, while the other code executes.
You need to look up some tutorials how to deal with callbacks, to get a proper understanding of it. Another method is using async/await and/or promises.
Basically, if you take the following code:
console.log("this will print first");
setTimeout(function () {
console.log("this will print last");
}, 1000);
console.log("this will print second");
If you run the code above, the top level is executed synchronously, so, it first calls console.log, then it executes setTimeout, which is synchronous. It sets a timeout, then says "I'm ready", and the code continues to the other console.log. After 1 second (1000 milliseconds), the callback in the setTimeout function is executed, and only then that console.log is called. You can not make the rest of the code wait this way, you need to restructure your code or read into promises.

Nodejs: How to send a readable stream to the browser

If I query the box REST API and get back a readable stream, what is the best way to handle it? How do you send it to the browser?? (DISCLAIMER: I'm new to streams and buffers, so some of this code is pretty theoretical)
Can you pass the readStream in the response and let the browser handle it? Or do you have to stream the chunks into a buffer and then send the buffer??
export function getFileStream(req, res) {
const fileId = req.params.fileId;
console.log('fileId', fileId);
req.sdk.files.getReadStream(fileId, null, (err, stream) => {
if (err) {
console.log('error', err);
return res.status(500).send(err);
}
res.type('application/octet-stream');
console.log('stream', stream);
return res.status(200).send(stream);
});
}
Will ^^ work, or do you need to do something like:
export function downloadFile(req, res) {
const fileId = req.params.fileId;
console.log('fileId', fileId);
req.sdk.files.getReadStream(fileId, null, (err, stream) => {
if (err) {
console.log('error', err);
return res.status(500).send(err);
}
const buffers = [];
const document = new Buffer();
console.log('stream', stream);
stream.on('data', (chunk) => {
buffers.push(buffer);
})
.on('end', function(){
const finalBuffer = Buffer.concat(buffers);
return res.status(200).send(finalBuffer);
});
});
}
The first example would work if you changed you theoretical line to:
- return res.status(200).send(stream);
+ res.writeHead(200, {header: here})
+ stream.pipe(res);
That's the nicest thing about node stream. The other case would (in essence) work too, but it would accumulate lots of unnecessary memory.
If you'd like to check a working example, here's one I wrote based on scramjet, express and browserify:
https://github.com/MichalCz/scramjet/blob/master/samples/browser/browser.js
Where your streams go from the server to the browser. With minor mods it'll fit your problem.

nodejs pg transactions without nesting

I would like to know if it's possible to run a series of SQL statements and have them all committed in a single transaction.
The scenario I am looking at is where an array has a series of values that I wish to insert into a table, not individually but as a unit.
I was looking at the following item which provides a framework for transactions in node using pg. The individual transactions appear to be nested within one another so I am unsure of how this would work with an array containing a variable number of elements.
https://github.com/brianc/node-postgres/wiki/Transactions
var pg = require('pg');
var rollback = function(client, done) {
client.query('ROLLBACK', function(err) {
//if there was a problem rolling back the query
//something is seriously messed up. Return the error
//to the done function to close & remove this client from
//the pool. If you leave a client in the pool with an unaborted
//transaction weird, hard to diagnose problems might happen.
return done(err);
});
};
pg.connect(function(err, client, done) {
if(err) throw err;
client.query('BEGIN', function(err) {
if(err) return rollback(client, done);
//as long as we do not call the `done` callback we can do
//whatever we want...the client is ours until we call `done`
//on the flip side, if you do call `done` before either COMMIT or ROLLBACK
//what you are doing is returning a client back to the pool while it
//is in the middle of a transaction.
//Returning a client while its in the middle of a transaction
//will lead to weird & hard to diagnose errors.
process.nextTick(function() {
var text = 'INSERT INTO account(money) VALUES($1) WHERE id = $2';
client.query(text, [100, 1], function(err) {
if(err) return rollback(client, done);
client.query(text, [-100, 2], function(err) {
if(err) return rollback(client, done);
client.query('COMMIT', done);
});
});
});
});
});
My array logic is:
banking.forEach(function(batch){
client.query(text, [batch.amount, batch.id], function(err, result);
}
pg-promise offers a very flexible support for transactions. See Transactions.
It also supports partial nested transactions, aka savepoints.
The library implements transactions automatically, which is what should be used these days, because too many things can go wrong, if you try organizing a transaction manually as you do in your example.
See a related question: Optional INSERT statement in a transaction
Here's a simple TypeScript solution to avoid pg-promise
import { PoolClient } from "pg"
import { pool } from "../database"
const tx = async (callback: (client: PoolClient) => void) => {
const client = await pool.connect();
try {
await client.query('BEGIN')
try {
await callback(client)
await client.query('COMMIT')
} catch (e) {
await client.query('ROLLBACK')
}
} finally {
client.release()
}
}
export { tx }
Usage:
...
let result;
await tx(async client => {
const { rows } = await client.query<{ cnt: string }>('SELECT COUNT(*) AS cnt FROM users WHERE username = $1', [username]);
result = parseInt(rows[0].cnt) > 0;
});
return result;

Resources