Weird result on nodejs scan list query for aerospike? - node.js

Is there something wrong here?
I have this snippet and it shows all the record on my stream?
The statement filters seemed to be not ignored.
I have the secondary index already setup.
// bin type for uid is text
var statement = {
concurrent: true,
nobins: false,
};
statement.filters = [aerospike.filter.equal("idx_mynamespace_myset_uid", "639085555553")];
var query = client.query('mynamespace','myset', statement);
var stream = query.execute();
var count = 0;
stream.on('data', function(rec) {
// process the scanned record here
count++;
console.log(rec);
});
stream.on('error', function(err){
// console.log(err);
});
stream.on('end', function() {
console.log('TOTAL SCANNED:', count++);
process.exit(0)
});

New version of Aerospike's Node.js Client (1.0.31) has fixes for equal and range queries. However, please note that range queries on strings are not supported or recommended and using them may result in unexpected behavior.

Related

node.js - sqlite3 read all records in table and return

I'm trying to read all records in a sqlite3 table and return them via callback. But it seems that despite using serialize these calls are still ASYNC. Here is my code:
var readRecordsFromMediaTable = function(callback){
var db = new sqlite3.Database(file, sqlite3.OPEN_READWRITE | sqlite3.OPEN_CREATE);
var allRecords = [];
db.serialize(function() {
db.each("SELECT * FROM MediaTable", function(err, row) {
myLib.generateLog(levelDebug, util.inspect(row));
allRecords.push(row);
}
callback(allRecords);
db.close();
});
}
When the callback gets fired the array prints '[]'.
Is there another call that I can make (instead of db.each) that will give me all rows in one shot. I have no need for iterating through each row here.
If there isn't, how do I read all records and only then call the callback with results?
I was able to find answer to this question. Here it is for anyone who is looking:
var sqlite3 = require("sqlite3").verbose();
var readRecordsFromMediaTable = function(callback){
var db = new sqlite3.Database(file, sqlite3.OPEN_READONLY);
db.serialize(function() {
db.all("SELECT * FROM MediaTable", function(err, allRows) {
if(err != null){
console.log(err);
callback(err);
}
console.log(util.inspect(allRows));
callback(allRows);
db.close();
});
});
}
A promise based method
var readRecordsFromMediaTable = function(){
return new Promise(function (resolve, reject) {
var responseObj;
db.all("SELECT * FROM MediaTable", null, function cb(err, rows) {
if (err) {
responseObj = {
'error': err
};
reject(responseObj);
} else {
responseObj = {
statement: this,
rows: rows
};
resolve(responseObj);
}
db.close();
});
});
}
The accepted answer using db.all with a callback is correct since db.each wasn't actually needed. However, if db.each was needed, the solution is provided in the node-sqlite3 API documentation, https://github.com/mapbox/node-sqlite3/wiki/API#databaseeachsql-param--callback-complete:
Database#each(sql, [param, ...], [callback], [complete])
...
After all row callbacks were called, the completion callback will be called if present. The first argument is an error object, and the second argument is the number of retrieved rows
So, where you end the first callback, instead of just } put }, function() {...}. Something like this:
var readRecordsFromMediaTable = function(callback){
var db = new sqlite3.Database(file, sqlite3.OPEN_READWRITE | sqlite3.OPEN_CREATE);
var allRecords = [];
db.serialize(function() {
db.each("SELECT * FROM MediaTable", function(err, row) {
myLib.generateLog(levelDebug, util.inspect(row));
allRecords.push(row);
}, function(err, count) {
callback(allRecords);
db.close();
}
});
}
I know I'm kinda late, but since you're here, please consider this:
Note that it first retrieves all result rows and stores them in memory. For queries that have potentially large result sets, use the Database#each function to retrieve all rows or Database#prepare followed by multiple Statement#get calls to retrieve a previously unknown amount of rows.
As described in the node-sqlite3 docs, you should use .each() if you're after a very large or unknown number or rows, since .all() will store all result set in memory before dumping it.
That being said, take a look at Colin Keenan's answer.
I tackled this differently, since these calls are asynchronous you need to wait until they complete to return their data. I did it with a setInterval(), kind of like throwing pizza dough up into the air and waiting for it to come back down.
var reply = '';
db.all(query, [], function(err, rows){
if(err != null) {
reply = err;
} else {
reply = rows;
}
});
var callbacker = setInterval(function(){
// check that our reply has been modified yet
if( reply !== '' ){
// clear the interval
clearInterval(callbacker);
// do work
}
}, 10); // every ten milliseconds
Old question, but I came across the issue, with a different approach as to solve the problem. The Promise option works, though being a little too verbose to my taste, in the case of a db.all(...) call.
I am using instead the event concept of Node:
var eventHandler = require('events')
In your Sqlite function:
function queryWhatever(eventHandler) {
db.serialize(() => {
db.all('SELECT * FROM myTable', (err, row) => {
// At this point, the query is completed
// You can emit a signal
eventHandler.emit('done', 'The query is completed')
})
})
}
Then, give your callback function to the eventHandler, that "reacts" to the 'done' event:
eventHandler.on('done', () => {
// Do something
})

Node.js + socket.io + MySQL correction of syntax

Considering that my server.js looks almost like this. Just send you the relevant part. I did not receive anything from the query, I do have data in the database, and "sendNotification" is triggered by the jQuery function in the client. Everything works and since var notis = []; returns an empty value and is what is shows as response. I know I have to debug SQL and that's what I'm going to do but anyway want to be sure of this other things. So my questions are:
1) Is a right syntax for node.js, considering this async behavior? (which I still don't understand )
2) The query always should be inside of the "io.sockets.on('connection')" part?
connection = mysql.createConnection({
host: 'localhost',
user: '',
password: "",
database: 'table' //put your database name
}),
...
connection.connect(function(err) {
// connected! (unless `err` is set)
console.log(err);
});
…
var sqlquery = function(uID,vs){
var notis = [];
connection.query("SELECT * FROM notification WHERE kid = ? AND v = ? ORDER BY id DESC",[uID,vs])
.on("result", function (data){
return notis.push(data);
});
};
io.sockets.on('connection', function(socket) {
...
socket.on("sendNotification", function(data) {
var roomBName = data.room_name.replace("room-",""),
found = [];
var roomSelected = _.find(rooms, function (room) { return room.id == roomBName });
for (var person in people) {
for (var i = 0, numAttending = roomSelected.peopleAttending.length; i < numAttending; i++) {
if (people[person].name == roomSelected.peopleAttending[i]) {
found.push(person);
}
}
}
for (var i = 0, numFound = found.length; i < numFound; i++) {
**result = sqlquery(9,2);**
io.to(found[i]).emit('notification', result);
};
});
Your sqlquery() function will not accomplish anything useful. Because connection.query() is asynchronous, that means it provides the response sometime LATER after sqlquery() has already finished.
The only way in node.js to use an async result is to actually use it in the callback that provides it. You don't just stuff it into some other variable and expect the result to be there for you in other code. Instead, you use it inside that callback or you call some other function from the callback and pass it the data.
Here's one way, you could change your sqlquery() function:
var sqlquery = function(uID, vs, callback){
connection.query("SELECT * FROM notification WHERE kid = ? AND v = ? ORDER BY id DESC",[uID,vs])
.on("result", function (data){
callback(null, data);
});
// need to add error handling here if the query returns an error
// by calling callback(err)
};
Then, you could use the sqlquery function like this:
found.forEach(function(person, index) {
sqlquery(..., function(err, result) {
if (err) {
// handle an error here
} else {
io.to(person).emit('notification', result);
}
});
});
And, it looks like you probably have similar async issues in other places too like in connection.connect().
In addition to #jfriend00, this could be done with new ES6 feature Promise :
var sqlquery = function(uID, vs){
return new Promise(function(resolve, reject){
connection.query("SELECT * FROM notification WHERE kid = ? AND v = ? ORDER BY id DESC",[uID,vs])
.on("result", function (data){
resolve(data);
});
});
};
Now you can use it like :
found.forEach(function(person, index) {
sqlquery(...)
.then(function(result){
io.to(person).emit('notification', result);
});
});

Mongoose: how to get data from a capped collection in an express.js app?

I'd like to listen on a MongoDB capped collection, using it as a logging facility.
I use node, express.js, mongo (with mongoose).
This is the (simplified) code I come with up to now:
var mongoose = require('mongoose');
mongoose.connect('mongodb://localhost/mydb');
var logSchema = new mongoose.Schema({
date: Date,
message: String
}, {
capped: {
size: 1024
}
});
var Log = mongoose.model('Log', logSchema);
var filter = { "date": { "$gte": Date.now() } };
var stream = Log.find(filter).tailable().stream();
stream.on('data', function(doc) {
console.log('log stream data - new doc:', doc.message);
}).on('error', function (error) {
console.log('status stream data - error:', error.message);
}).on('close', function () {
console.log('status stream data - closed');
});
// ...
var log = new Log();
logger = function(message) {
log.date = new Date();
log.message = message;
log.save(function(err) {
if (err) {
return console.error('error saving log');
}
console.log('log message "' + message + '" added');
});
};
// ...
myRoutingMethod = function(req, res) {
logger('my routing method started');
// ...
res.json('done');
});
My problem is, before myRoutingMethod() is called, I get:
database connection opened
log message "my new message" added
status stream data - error: No more documents in tailed cursor
status stream data - closed
So, I never get
log stream data - new doc: my new message
I am probably missing something about integration of stream() on capped Log collection with express.js...
Any clue?
It is hard to spot whatever went wrong code.
However, based on other answers here on StackOverflow, the following may help you out:
First, check the version of Mongoose on your environment and make sure it is 2.7 or latest version.
If you had that collection in non-capped mode and added capped mode after a couple of iterations, Try to drop the collection and retry from scratch. You may need to backup the collection and re-initialize from the backup.
based on initializations found in docs here on StackOverflow, I would suggest configuring the capped collection schema as following:
//new Schema declaration
var logSchema = mongoose.Schema({...},{capped:{size: 1024, max: 1000,autoIndexId: true}});
//Export your model as following
module.exports = mongoose.model('Log', logSchema);
To initialize and use your Mongoose:
var Log = require(path/to/log/schema);
var query = { /** query paremeters here*/ };
//Initialize the stream
var stream = Log.find(query).tailable().stream();
//Process data
stream.on('data', function(doc){});
stream.on('error', function(error){});
stream.on('close', function(status){});
To save(or edit) operation, you may refer to the same old approach as
new Log(params).save(function(error, log){
//Do something with the error or new log
});
You may find more information on this StackOverflow answer as well: https://stackoverflow.com/a/18045399/132610
I hope this helps.

Closing mongodb connection in node.js after inserting many documents

This question has been asked BUT the answer that the OP accepted did not answer my particular needs.
closing mongodb connection in node.js while inserting lot of data
I have a utility script that adds a lot of records to multiple collections. Really it is just an import that uses byline to read the VERY LARGE text files and then inserts the data into a collection:
var MongoClient = require("mongodb").MongoClient;
var fs = require("fs");
var byline = require("byline");
var inStream = fs.createReadStream("data.txt", { encoding: "utf8" });
var byLineStream = byline.createStream(inStream);
MongoClient.connect("mongodb://localhost:27017/test", { native_parser: true}, function(err, db) {
var collection = db.collection("Data");
db.dropCollection("Data", function(err, result) {
byLineStream.on("data", function(line) {
var o = parseLineToObject(line);
collection.insert(o);
});
});
});
The answer suggested was to push all the data into an array and then use a single write and a callback to close the database when it is done. This is not a good answer as the files I am working with are very large and so consume large amounts of memory.
Another solution presented to a similar question was to use the async package to create an array of functions and then run them in parallel. Another bust but at least it doesn't create a huge single insert.
So the question: How do I close MongoDB connection once all the inserts are complete so that my script exits and does not hang?
I should add that I have tried the counting method where I increment a counter variable in the insert callback. It doesn't work because at some point in the inserts, the callbacks execute and complete faster than the inserts complete causing the counter to hit 0 while the inserts are still going, and thus closing the db.
You should set a flag when all lines have been read:
var readAllLines = false;
byLineStream.on("end", function() {
readAllLines = true;
});
Next, you check for that flag after inserting each record. However, you also need to keep track of the number of lines that have been read, and how many are inserted, so you'll only close the database if all lines have been inserted (even out of order).
Putting everything together:
db.dropCollection("Data", function(err, result) {
var lineCount = 0;
var readAllLines = false;
byLineStream.on("end", function() {
readAllLines = true;
});
byLineStream.on("data", function(line) {
lineCount++;
var o = parseLineToObject(line);
collection.insert(o, { w : 1 }, function() {
if (--lineCount === 0 && readAllLines) {
// we've read and inserted all lines
db.close();
}
});
});
});
However, I do believe that passing a callback to insert ('safe mode') is slower than your current solution, where you call insert but don't wait wait for its result. To speed things up, instead of writing each lines separately, you can buffer an X amount of lines before inserting them in one statement.
Something similar to this (without the line counting):
var buffer = [];
byLineStream.on("data", function(line) {
buffer.push(parseLineToObject(line));
if (buffer.length > 100 || readAllLines) {
collection.insert(buffer, { w : 1 }, function() {
if (readAllLines) {
db.close();
}
});
buffer = [];
}
});
var MongoClient = require("mongodb").MongoClient;
var fs = require("fs");
var byline = require("byline");
var inStream = fs.createReadStream("data.txt", { encoding: "utf8" });
var byLineStream = byline.createStream(inStream);
MongoClient.connect("mongodb://localhost:27017/test", { native_parser: true}, function(err, db) {
var collection = db.collection("Data");
db.dropCollection("Data", function(err, result) { //I am completely replacing collection
var insertCount = 0;
var doneReadingFile = false;
byLineStream.on("end",function(line) {
doneReadingFile = true;
});
byLineStream.on("data", function(line) {
var o = parseLineToObject(line);
collection.insert(o,function(err, result) {
insertCount--;
if (insertCount === 0 && doneReadingFile) {
db.close();
}
});
});
});
});

Reading CSV file and sending data in intervals with websockets (Node, Socket.io)

I'm relatively new to Node and Express.js. I'm trying to create a websocket server to push CSV data in irregular intervals stored in the file itself, line after line.
The CSV structure is something like this:
[timeout [ms], data1, data2, data3 ...]
I've successfully created a websocket server which communicates with the client.
I'm looking for a best solution to effectively do something like this:
1. Read a line of the CSV file
2. Send a line with WebSockets
3. Pause the reading for a period of time stored in the first value of the row
4. Resume the reading after the interval has passed, and back to step 1.
So far, I got this far (please feel free to trash my code completely as it might be very wrong - as I said, I'm new to it. It seems like the pause() doesn't do anything.
var $ = require('jquery')
,csv = require('csv');
exports.index = function(server){
var io = require('socket.io').listen(server);
io.sockets.on('connection', function (socket) {
socket.on('startTransmission', function(msg) {
csv()
.from.path('C:/dev/node_express/csv/test.csv', { delimiter: ',', escape: '"' })
.on('record', function(row,index){
var rowArray = $.parseJSON(JSON.stringify(row));
var json = {},
that = this;
$.each(rowArray, function(i,value){
json[keys[i]] = value;
});
socket.emit('transmitDataData', json);
//this.pause(); //I guess around here is where I'd like to pause
// setTimeout(function(){
// that.resume(); //and resume here after the timeout, stored in the first value (rowArray[0])
// }, rowArray[0]);
});
});
});
};
The commented out code unfortunately does not work - All data is sent immediately, row after row, the function doesn't pause
I ran into the same sort of thing with another use case. The issue is that calling pause() on the stream pauses the underlying stream reading but not the csv record parsing, so the record event can get called with the remainder of the records that made up the last read stream chunk. I synchronized them, in my case, like this:
var rows=0, actions=0;
stream.on('record', function(row, index){
rows++;
// pause here, but expect more record events until the raw read stream is exhausted
stream.pause();
runner.do(row, function(err, result) {
// when actions have caught up to rows read, read more rows.
if (actions==rows) {
stream.resume();
}
});
});
In your case, I'd buffer the rows and release them with the timer. Here's an untested re-factoring just to give you an idea of what I mean:
var $ = require('jquery'),
csv = require('csv');
exports.index = function(server){
var io = require('socket.io').listen(server);
io.sockets.on('connection', function (socket) {
socket.on('startTransmission', function(msg) {
var timer=null, buffered=[], stream=csv().from.path('C:/dev/node_express/csv/test.csv', { delimiter: ',', escape: '"' });
function transmit(row) {
socket.emit('transmitDataData', row);
}
function drain(timeout) {
if (!timer) {
timer = setTimeout(function() {
timer = null;
if (buffered.length<=1) { // get more rows ahead of time so we don't run out. otherwise, we could skip a beat.
stream.resume(); // get more rows
} else {
var row = buffered.shift();
transmit(row);
drain(row[0]);
}
}, timeout);
}
}
stream.on('record', function(row,index){
stream.pause();
if (index == 0) {
transmit(row);
} else {
buffered.push(row);
}
drain(row[0]); // assuming row[0] contains a timeout value.
});
stream.on('end', function() {
// no more rows. wait for buffer to empty, then cleanup.
});
stream.on('error', function() {
// handle error.
});
});
};

Resources