I have built an event api using php with use of cassandra and phpcassa.
Recently, I authored a node.js + helenus replacement of the same api. After I finished, I started to benchmark the node.js code with ab tool. However, after 1000+ inserts on cassandra, the connection can be lost and failover code is running. I'm inserting async to cassandra after each request occur.
I am instantiating ConnectionPool object when the node app starts. However, I use pool.connect() function in each request.
Does pool.connect() starts a new connection or binds the existing connection from ConnectionPool object ?
Notes:
In phpcassa, I didn't have such problems probably because I'm inserting to cassandra sync.
I'm using 5 cassandra nodes
Yes, pool.connect() creates a new connection to the pool everytime it's called, so you usually only have to call it one time when the application is started, and pass that connection to any module or method that needs it.
I usually use a module to do this
module.exports = function(app, cb) {
var domain = require('domain'),
d = domain.create(),
helenus = require('helenus'),
d.run(function() {
var pool = new helenus.ConnectionPool({
hosts : ['localhost:9160'],
user : "",
password : "",
keyspace : 'something',
cqlVersion : '3.0.0'
});
pool.connect(function(err, keyspace){
if(err){
cb(err, null);
} else {
cb(null, pool);
}
});
pool.on('error', function(err) {
cb(err, null);
});
});
d.on('error', function(err) {
console.log('error', err.stack);
cb(err, null);
});
}
and call it my app with something like this (really simplified)
var express = require('express');
var app = this.express();
var database = require('db');
database(app, function(err, conn) {
if (err==null) {
// connected, do stuff
conn.cql("SELECT * FROM table WHERE KEY = ?", [what], function(err, result) {
if (err==null) {
// get result
}
});
}
});
I've used this with millions of records, with continuos inserts and a large number of lookups, without any issue, Cassandra performs better for me than any other DB I've tried.
Related
I'm currently working on a project from work where i have an Oracle 10 database table with about 310K give or take 10-30K rows.
The goal is to display those rows in an angular frontend, however returning all of those through NodeJS is taking a lot of time.
Given that I'm using both NodeJS and oracledb for the first time, i'm assuming i must be missing something?
var oracledb = require('oracledb');
var config = require(__dirname+'/../db.js');
function get(req,res,next)
{
var table = req.query.table;
var meta;
oracledb.getConnection(config.oracle)
.then( function(connection)
{
var stream = connection.queryStream('SELECT * FROM '+table);
stream.on('error', function (error)
{
console.error(error);
return next(err);
});
stream.on('metadata', function (metadata) {
console.log(metadata);
});
stream.on('data', function (data) {
console.log(data);
});
stream.on('end', function ()
{
connection.release(
function(err) {
if (err) {
console.error(err.message);
return next(err);
}
});
});
})
.catch(function(err){
if(err){
connection.close(function(err){
if(err){
console.error(err.message);
return next(err);
}
});
}
})
}
module.exports.get = get;
30 MB is a lot of data to load into the front end. It can work in some cases, such as desktop web apps where the benefits of "caching" the data offset the time needed to load it (and increased stale data is okay). But it will not work well in other cases, such as mobile.
Keep in mind that the 30 MB must be moved from the DB to Node.js and then from Node.js to the client. The network connections between these will greatly impact performance.
I'll point out a few things that can help performance, though not all are exactly related to this question.
First, if you're using a web server, you should be using a connection pool, not dedicated/one-off connections. Generally, you'd create the connection pool in your index/main/app.js and start the web server after that's done and ready.
Here's an example:
const oracledb = require('oracledb');
const express = require('express');
const config = require('./db-config.js');
const thingController = require('./things-controller.js');
// Node.js used 4 background threads by default, increase to handle max DB pool.
// This must be done before any other calls that will use the libuv threadpool.
process.env.UV_THREADPOOL_SIZE = config.poolMax + 4;
// This setting can be used to reduce the number of round trips between Node.js
// and the database.
oracledb.prefetchRows = 10000;
function initDBConnectionPool() {
console.log('Initializing database connection pool');
return oracledb.createPool(config);
}
function initWebServer() {
console.log('Initializing webserver');
app = express();
let router = new express.Router();
router.route('/things')
.get(thingController.get);
app.use('/api', router);
app.listen(3000, () => {
console.log('Webserver listening on localhost:3000');
});
}
initDBConnectionPool()
.then(() => {
initWebServer();
})
.catch(err => {
console.log(err);
});
That will create a pool which is added to the internal pool cache in the driver. This allows you to easily access it from other modules (example later).
Note that when using connection pools, it's generally a good idea to increase the thread pool available to Node.js to allow each connection in the pool to work concurrently. An example of this is included above.
In addition, I'm increasing the value of oracledb.prefetchRows. This setting is directly related to your question. Network round trips are used to move the data between the DB and Node.js. This setting allows you to adjust the number of rows fetched with each round trip. So as prefetchRows goes higher, fewer round trips are needed and performance increases. Just be careful you don't go to high as per the memory you have in your Node.js server.
I ran a generic test that mocked the 30 MB dataset size. When oracledb.prefetchRows was left at the default of 100, the test finished in 1 minute 6 seconds. When I bumped this up to 10,000, it finished in 27 seconds.
Okay, moving on to "things-controller.js" which is based on your code. I've updated the code to do the following:
Assert that table is a valid table name. Your current code is vulnerable to SQL injection.
Use a promise chain that emulates a try/catch/finally block to close the connection just once and return the first error encountered (if needed).
Work so I could run the test.
Here's the result:
const oracledb = require('oracledb');
function get(req, res, next) {
const table = req.query.table;
const rows = [];
let conn;
let err; // Will store the first error encountered
// You need something like this to preven SQL injection. The current code
// is wide open.
if (!isSimpleSqlName(table)) {
next(new Error('Not simple SQL name'));
return;
}
// If you don't pass a config, the connection is pulled from the 'default'
// pool in the cache.
oracledb.getConnection()
.then(c => {
return new Promise((resolve, reject) => {
conn = c;
const stream = conn.queryStream('SELECT * FROM ' + table);
stream.on('error', err => {
reject(err);
});
stream.on('data', data => {
rows.push(data);
});
stream.on('end', function () {
resolve();
});
});
})
.catch(e => {
err = err || e;
})
.then(() => {
if (conn) { // conn assignment worked, need to close/release conn
return conn.close();
}
})
.catch(e => {
console.log(e); // Just log, error during release doesn't affect other work
})
.then(() => {
if (err) {
next(err);
return;
}
res.status(200).json(rows);
});
}
module.exports.get = get;
function isSimpleSqlName(name) {
if (name.length > 30) {
return false;
}
// Fairly generic, but effective. Would need to be adjusted to accommodate quoted identifiers,
// schemas, etc.
if (!/^[a-zA-Z0-9#_$]+$/.test(name)) {
return false;
}
return true;
}
I hope that helps. Let me know if you have questions.
In my code below I wish to receive an "order" from the feed and store it in the database.
I understand that the class method marketEvent will be called whenever data is received and accordingly I would need my insert statement within this function.
It would be inefficient to open and close the db connection each time the class method is called so I seek to open the connection and pass the db object through to marketEvent.
I am new to nodejs and web sockets so can't work out how to put it together.
var pg = require("pg")
var conString = "postgres://myusername:mypassword#localhost/poloniex";
var client = new pg.Client(conString);
var autobahn = require('autobahn');
var wsuri = "wss://api.poloniex.com";
var connection = new autobahn.Connection({
url: wsuri,
realm: "realm1"
});
connection.onopen = function (session) {
function marketEvent (args,kwargs) {
client.query("INSERT INTO orderbook(order) values($1)", [args]);
}
session.subscribe('BTC_XMR', marketEvent);
}
connection.onclose = function () {
console.log("Websocket connection closed");
}
client.connect();
connection.open();
I am unaware of "pg" and "autobahn" both. But from the documentation of "pg" package, may I offer you a hint of solution
client.connect() is not meant to be an isolated call. It expects a function delegate to carry out the operation.
connection.onopen = function (session) {
function marketEvent (args,kwargs) {
client.connect(function(err) {
if (err) throw err;
client.query("INSERT ...", [args]);
});
}
session.subscribe('BTC_XMR', marketEvent);
}
About using the connection efficiently, you may be asking for pooling I guess.
var pool = new pg.Pool(config);
pool.connect(function(err, client, done) {
if (err) throw err;
client.query('INSERT ...', function(err, result) {
//call `done()` to release the client back to the pool
done();
});
});
Please refer original source of this suggestion for the exact usage:
https://www.npmjs.com/package/pg
configuration.js
var mysql = require('mysql');
var pool = mysql.createPool({
connectionLimit : 10,
host : 'localhost',
user : 'root',
password : '',
database : 'test'
});
pool.getConnection( function(err, connection) {
if (err) console.error('error connecting: ' + err.stack);
return;
});
module.exports = pool;
user.js model file
var async = require("async");
var homePage = function HomePage( user_id ) {
this.user_id = user_id;
this.db = require('../config/configuration');
}
homePage.prototype.getCourse = function( callback ){
var self = this;
self.db.query(' my sql query ', [ self.user_id ], function (error, results, fields) {
self.db.release();
callback(err,results);
});
};
I have exported my db connection here and I require that module(configuration.js) in every model file. I assume that it will load the same db connection everytime as from the Node.js docs:
every call to require('foo') will get exactly the same object returned, if it would resolve to the same file.
As this is the first time I am coding in JS and asynchronous programming I am confused about few things. Will my same configuration file be loaded as I want to have a singleton kinda pattern for my db file. Is my approach correct ?
When I do - self.db.release() I assume I am releasing a connection back to the pool. But it's giving an error and I am unable to figure it out as everything else is working fine.The error is -
self.db.release() is not a function
If I comment this line it's working fine. Is there a way to see what this release is returning as it will help in debugging.
Moreover is my approach correct.
My problem is that I can't retrieve data from my mongodb database... And I don't know why.
I probably do something wrong, here is a little samble which doesn't work.
var Db = require('mongodb').Db,
Server = require('mongodb').Server;
var db = new Db('akemichat', new Server('localhost', 27017), {w:1});
db.open(function (err, p_db) {
db = p_db;
});
db.collection('rooms', function (err, collection) {
if (!err) {
collection.find().toArray(function(err, items) {
items.forEach(function(room) {
console.log('hello'); // Never call...
});
});
} else {
console.log(err);
}
});
Notice that I have data in my database as shows the following
➜ akemichat git:(master) ✗ mongo
MongoDB shell version: 2.4.7
connecting to: test
> use akemichat
switched to db akemichat
> db.rooms.find()
{ "name" : "home", "_id" : ObjectId("527008e850305d1b7d000001") }
Thanks for help !
Notice: the example program never ends, I don't know why... Maybe because the connection is never closed but if I call the db.close() in the toArray callback, It will never be called because the callback never happends.
So many things in node are asynchronous. Your connection is open after you are trying to read from your collection.
You should query the collection after you know for sure you are connect. Down and dirty:
var Db = require('mongodb').Db,
Server = require('mongodb').Server;
var db = new Db('akemichat', new Server('localhost', 27017), {w:1});
db.open(function (err, p_db) {
db = p_db;
db.collection('rooms', function (err, collection) {
if (!err) {
collection.find().toArray(function(err, items) {
items.forEach(function(room) {
console.log('hello'); // Never call...
});
});
} else {
console.log(err);
}
});
});
I ran this locally and received back the "hello" message. Also your script never finishes because the node process will run until it is closed or crashes. This is by design. Which also means that you don't have to keep opening and closing your mongo connections. You can open a connection when your application starts and close it when your application is shut down.
I am using express as my webserver for node and everything seems to be working correctly. The only problem I am encoutering is when I load a specific page ('/learn' route) 10 times repeatedly. Once I do this, express seems to stop working, although no error is logged to the console and nothing wrong is displayed on the page. It just keeps waiting for the host in the browser. What is weird is that the problem doesn't occur if I go from the page with the problem to another page, and then back again. I can repeat this as much as I want without error. Here is my route with the problem:
var bcrypt = require('bcrypt');
var pool = require('../database.js').pool;
module.exports = function(app) {
app.get('/learn', function(req, res, next) {
var query = 'SELECT * FROM questions INNER JOIN answers ON questions.questionID = answers.questionID';
pool.getConnection(function(err, connection) {
connection.query(query, function(err, rows) {
if (err) {
throw err;
}
var data = {
name: req.session.name,
problems: rows,
};
res.render('learn.html', data);
});
});
});
app.post('/learn/checkAnswer', function(req, res) {
//get posted form data
var questionID = req.body.questionID;
var selectedAnswer = req.body.selectedAnswer;
//query database
pool.getConnection(function(err, connection) {
var query = connection.query('SELECT correctAnswer FROM questions WHERE questionID = ?', questionID, function(err, rows) {
res.send({
correctAnswer: rows[0].correctAnswer
});
});
});
});
};
I'm not sure if this makes a difference, but I am using handlebars as my rendering engine instead of jade, as well as node-mysql for my database.
10 is the default size of the node-mysql pool. And since you're not ending the connections retrieved with pool.getConnection, the 11th request will wait indefinitely for a free connection.
Easy to fix:
connection.query(query, function(err, rows) {
connection.end(); // end the connection as soon as possible,
// so it's returned to the pool and can be reused.
if (err) ...
});