Random selection of rows in MongoDB async - node.js

So I have read the other posts on how to get a selection of random rows but none work for me.
Random record from MongoDB
and How to find random records in mongodb both suggest dated solutions that don't compile anymore.
Adding an extra random field to my database is not an option.
I've come up with the following code, but it also doesn't work.
exports.randomX = function(req, res)
{
var x = req.params.x;
db.collection('table', function(err, collection) {
collection.count(function(err, count) {
var r;
var arr = [];
for(var i=1; i<=x; i++)
{
r = Math.floor(Math.random() * count);
collection.find().limit(1).skip(r).toArray(function(err, items)
{
arr.push(items[0]);
if(arr.length === x)
res.send(arr);
});
}
});
});
}

Not sure if it's your only problem, but x will be a string, so if you want to compare it to arr.length you should either parse it into a number or just use == instead of ===.
exports.randomX = function(req, res)
{
var x = req.params.x;
db.collection('table', function(err, collection) {
collection.count(function(err, count) {
var r;
var arr = [];
for(var i=1; i<=x; i++)
{
r = Math.floor(Math.random() * count);
collection.find().limit(1).skip(r).toArray(function(err, items)
{
arr.push(items[0]);
if(arr.length == x)
res.send(arr);
});
}
});
});
}

Related

Node array of function using variables from enclosing scope

I am trying to do dns.reverse() on a list of ip using async.parallel().
The code is as follows:
var functions = [];
for (var i = 0; i < data.length; i++) {
var ip = data[i].ip;
var x = function(callback) {
dns.reverse(ip, (err, hostnames) => {
if (err) {
log.error("Error resolving hostname for [" + ip + '] ' + err);
return callback(null, err);
}
callback(null, hostnames);
});
};
functions.push(x);
}
async.parallel(functions, (err, results) => {
for(var i = 0; i < data.length; i++) {
data[i]['hostnames'] = results[i];
}
handler(null, data);
});
What is happening is dns.reverse() is getting called with the same ip (the last one in data array) for all the calls. May be I am doing something wrong. Can somebody explain what is my mistake?
The first callback is executed after the entire for loop finished, because it's async.
The value of ip will be the one in the last iteration of the loop.
You could put some console.log to realize what's really happening.
The correct way of doing it might be:
async.parallel(data.map(({ ip }) => callback => {
dns.reverse(ip, callback)
}), (err, results) => {
for (var i = 0; i < data.length; i++) {
data[i]['hostnames'] = results[i];
}
handler(null, data);
})
Create a new array of functions based on each ip.
Every function will call it's callback as dns.reverse.
Also, it might be better to return a new data array, not changing data inside the loop:
(err, results) => {
const result = data.map((data, index) => ({
...data,
hostnames: results[index]
})
handler(null, result);
})
Thanks to #apokryfos I got a hint. To get the code working I just need to use let instead of var while declaring ip.
var functions = [];
for (var i = 0; i < data.length; i++) {
let ip = data[i].ip;
var x = function(callback) {
dns.reverse(ip, (err, hostnames) => {
if (err) {
log.error("Error resolving hostname for [" + ip + '] ' + err);
return callback(null, err);
}
callback(null, hostnames);
});
};
functions.push(x);
}
async.parallel(functions, (err, results) => {
for(var i = 0; i < data.length; i++) {
data[i]['hostnames'] = results[i];
}
handler(null, data);
});
For anybody interested in understanding following might be helpful: How do JavaScript closures work?

DocumentDB, How to work with continuationToken in a SP

The next SP suppose to run over the collection and keep query for the next batch of documents (10 docs every batch). but instead return the same 10 documents every time.
function sample(prefix) {
var continuations = [],
ids = [],
context = getContext(),
collection = context.getCollection(),
response = context.getResponse();
var queryOptions = { pageSize: 10, continuation: null };
for (i = 0; i < 10; i++) {
// get all user wish list actions
var query = "select * from w",
accept = collection.queryDocuments(collection.getSelfLink(), query, queryOptions, processMultiUsers);
if (!accept) throw "Unable to read user's sessions";
}
getContext().getResponse().setBody(ids);
function processMultiUsers(err, docs, options) {
if (err) throw new Error("Error: " + err.message);
if (docs == undefined || docs.length == 0) throw new Error("Warning: Users not exists");
for (j = 0; j < docs.length; j++) {
ids.push(docs[j].UserId);
}
queryOptions.continuation = options.continuation;
continuations.push(options.continuation);
}}
In the script that you wrote, the execution of the queries are done synchronously and they are queued up with the same initial continuation token, which is null. Instead, we need to take the token from the first query and then queue the next and continue.
The below sample should help achieve what you are looking for
function sample(continuationToken) {
var collection = getContext().getCollection();
var maxResult = 10;
var documentsProcessed = 0;
var ids = [];
var filterQuery = "select * from w";
tryQuery(continuationToken);
function tryQuery(nextContinuationToken) {
var responseOptions = { continuation: nextContinuationToken, pageSize: maxResult };
if (documentsProcessed >= maxResult || !query(responseOptions)) {
setBody(nextContinuationToken);
}
}
function query(responseOptions) {
return (filterQuery && filterQuery.length) ?
collection.queryDocuments(collection.getSelfLink(), filterQuery, responseOptions, onReadDocuments) :
collection.readDocuments(collection.getSelfLink(), responseOptions, onReadDocuments);
}
function onReadDocuments(err, docFeed, responseOptions) {
if (err) {
throw 'Error while reading document: ' + err;
}
documentsProcessed += docFeed.length;
for (var i = 0; i < documentsProcessed; i++) {
ids.push(docFeed[i].UserId);
}
if (responseOptions.continuation) {
tryQuery(responseOptions.continuation);
} else {
setBody(null);
}
}
function setBody(continuationToken) {
var body = { continuationToken: continuationToken, documentsProcessed: documentsProcessed, ids: ids };
getContext().getResponse().setBody(body);
}
}

Can't change result from MongoDB/MongooseJS in route before sending it to the view (Node js)

In my route I am trying to replace a value in whatever i get from the database before sending it to my view.
In this case I am trying to replace a reference to a user, with the users username instead. But my console.log's just keep returning the value unchanged.
app.get('/message/inbox', function(req, res) {
var _id = req.user._id;
var inbox = null;
var users = null;
async.parallel([
function(callback){
Inbox.find({to: _id}, callback);
},
function(callback){
User.find({}, callback);
}
],
function(err, result){
inbox = result[0];
users = result[1];
for(var i = 0; i < inbox.length; i++) {
for(var j = 0; j < users.length; j++) {
if(String(inbox[i].from) == String(users[j]._id)) {
inbox[i].from = users[j].local.username;
console.log(users[j].local.username);
console.log(inbox[i].from);
break;
}
}
}
console.log(inbox);
});
});
This is what my console returns:
some#email.com
540468daeb56d5081ade600d
[ { _id: 54084cacf212815422aabe94,
from: 540468daeb56d5081ade600d,
to: 5406bf4c8a8acc88120922dc,
headline: 'Some Headline',
text: 'Some text',
__v: 0 } ]
What gets returned from the find requests is a "mongoose document" which is actually quite complex and contains all the rules and methods from the associated schema. You cannot modify this outside of the constraints set on the schema.
So despite the simple serialized form, the object is quite complex. What you want is just a basic object, so you can modify it however you want. So put simply, just call .toObject() to get the "raw" JavaScript object without the rest of the sugar:
inbox = result[0].toObject();
users = result[0].toObject();
The basic objects have no methods and no rules. Modify away.
Solution by OP.
I just have to add a .lean() after the .find. This tells mongoose to pass the data as a javascript object instead of a MongooseDocument - Scource
app.get('/message/inbox', function(req, res) {
var _id = req.user._id;
var inbox = null;
var users = null;
async.parallel([
function(callback){
Inbox.find({to: _id}, callback).lean();
},
function(callback){
User.find({}, callback).lean();
}
],
function(err, result){
inbox = result[0];
users = result[1];
for(var i = 0; i < inbox.length; i++) {
for(var j = 0; j < users.length; j++) {
if(String(inbox[i].from) == String(users[j]._id)) {
inbox[i].from = users[j].local.username;
console.log(users[j].local.username);
console.log(inbox[i].from);
break;
}
}
}
console.log(inbox);
});
});
/*
you just need to add .lean() at the end of mongoose query where you want to change the object later stage.
*/
let forExample = aysnc (req, res, next ) => {
let foundUser = await user.find({ organisationId: 10 }); // without lean
foundUser.map(elem => {
elem.userName = elem.name;
delete elem;
});
console.log(foundUser[0]); // it will return { name:'something'};
// but if you try:
let foundUser = await user.find({ organisationId: 10 }).lean();// with lean
foundUser.map(elem => {
elem.userName = elem.name;
delete elem;
});
console.log(foundUser[0]); // it will return { userName:'something'};

Node.js + Mongoose consume all memory on inserts

I need to fill Mongo collection with about 80 million of records like
//Example
{
"_id" : "4gtvCPATZ",
"isActivated" : false
}
Where _id is randomly generated. I do this using Node.js Express and Mongoose:
app.get('/api/codes', function(req, res) {
for (var i = 0; i < 100000; i++) {
var code = new Code({
_id: randomStr(9),
isActivated: 0
});
code.save();
code = null;
}
res.render('index');
});
function randomStr(m) {
var m = m || 9;
s = '', r = 'ABCDEFGHIJKLMNPQRSTUVWXYZabcdefghijklmnpqrstuvwxyz123456789';
for (var i = 0; i < m; i++) {
s += r.charAt(Math.floor(Math.random() * r.length));
}
return s;
};
On 'index' jade layout has JS code that reloads the page to generate next 100000 records:
script(type='text/javascript').
location.href='http://localhost:3000/api/codes'
node.js process starts to consume memory and after 4-5 page reloads hangs up having 1GB of memory.
What am I doing wrong?
UPDATE:
Considering the robertklep's comment I've updated the code and it works properly:
app.get('/api/codes', function(req, res) {
var count = 0;
async.whilst(
function () { return count < 100000; },
function (callback) {
count++;
var code = new Code({
_id: randomStr(9),
isActivated: 0
});
code.save(function(){
callback();
});
},
function (err) {
res.render('index');
}
);
});
UPDATE 2: I've tested the freakish's advice and made my app work faster:
var MongoDB = require("mongodb");
app.get('/api/codes', function(req, res) {
var MongoClient = require('mongodb').MongoClient
, format = require('util').format;
MongoClient.connect('mongodb://127.0.0.1:27017/code', function(err, db) {
var collection = db.collection('codes');
var count = 0;
async.whilst(
function () { return count < 10; },
function (callback) {
count++;
var docs = [];
for (var i = 0; i < 100000; i++) {
docs.push({
_id: randomStr(9),
isActivated: 0
});
}
collection.insert(docs, function(err, docs) {
callback();
});
},
function (err) {
res.render('index');
}
);
})
});
Now it writes about 1M records in 60-70 seconds.
Thanks!

Loop synchronous multiple async.whilst

I want to use many included loops in node.js in synchronous mode.
Example :
for (var i = 0; i < length1; i++) {
for (var j = 0; j < length2; j++) {
for (var k = 0; k < length3; k++) {
//completed 3
}
//completed 2
}
//do completed 1
}
How to do this with async? I tried this :
exports.myFunction = function (callback) {
var finalListA = new Array();
var pos = 0;
Model_A.find().populate('listOfItems')
.lean().exec(function (err, As) {
if (err) {
console.log(err);
return callback(err, null);
} else {
//For each A
var i = 0;
async.whilst(
function () {
return i < As.length;
},
function (callback1) {
var isActive = false;
//For each B into the A
var j = 0;
async.whilst(
function () {
return j < As[i].Bs.length;
},
function (callback2) {
Model_B.findById(AS[i].Bs[j]._id, function (err, B) {
if (err) {} else {
var k = 0;
// For each C in the B
async.whilst(
function () {
return k < B.Cs.length;
},
function (callback3) {
if (B.Cs[k].dateEnd >= Date.now()) {
isActive = true;
}
k++;
callback3();
},
function (err) {
console.log("3 COMPLETED");
}
);
}
});
j++;
callback2();
},
function (err) {
console.log("2 COMPLETED");
if (err) {} else {
if (isActive == true) {
finalListA[pos] = As[i];
pos = pos + 1;
}
}
}
);
i++;
callback1();
},
function (err) {
console.log("1 COMPLETED");
if (err) {} else {
return callback(null, finalListA);
}
}
);
}
});
}
The trace shows me :
COMPLETED 2
COMPLETED 2
COMPLETED 1
COMPLETED 3
COMPLETED 3
The order expected is :
COMPLETED 3
COMPLETED 3
COMPLETED 2
COMPLETED 2
COMPLETED 1
You must call the callbacks of the higher loops from the end callback of your whilst loop (like you did with the outermost callback), instead of calling them synchronously from the whilst body in which you just started the next level iteration.
Btw, I don't know what you actually want to do, but whilst does not seem the best choice for iterating arrays. Use the parallel each or the serial eachSeries (or their map or reduce equivalents).
I've recently created simpler abstraction called wait.for to call async functions in sync mode (based on Fibers). It's at an early stage but works. It is at:
https://github.com/luciotato/waitfor
Using wait.for, you can call any standard nodejs async function, as if it were a sync function.
I do not understand exactly what are you trying to do in your code. Maybe you can explain your code a little more, or give some data example.
I dont' know what Model_A or Model_B are... I'm guessing most of your code, but...
using wait.for your code migth be:
var wait=require('wait.for');
exports.myFunction = function(callback) {
//launchs a Fiber
wait.launchFiber(inAFiber,callback);
}
function inAFiber(callback) {
var finalListA = new Array();
var pos = 0;
var x= Model_A.find().populate('listOfItems').lean();
As = wait.forMethod(x,"exec");
//For each A
for(var i=0;i<As.length;i++){
var isActive = false;
//For each B into the A
for(var j=0; j < As[i].Bs.length;j++){
var B=wait.forMethod(Model_B,"findById",AS[i].Bs[j]._id);
// For each C in the B
for(var k=0; k < B.Cs.length;k++){
if(B.Cs[k].dateEnd >= Date.now()) {
isActive = true;
}
}
console.log("3 COMPLETED");
}
console.log("2 COMPLETED");
if(isActive == true) {
finalListA[pos] = As[i];
pos = pos + 1;
}
};
console.log("1 COMPLETED");
return callback(null,finalListA);
}
Also, for what I see, you should break the loops as soon as you find one item (isActive), and you don't need the var pos. Doing that your code will be:
var wait=require('wait.for');
exports.myFunction = function(callback) {
//launchs a Fiber
wait.launchFiber(inAFiber,callback);
}
function inAFiber(callback) {
var finalListA = [];
var x= Model_A.find().populate('listOfItems').lean();
As = wait.forMethod(x,"exec");
var isActive;
//For each A
for(var i=0;i<As.length;i++){
isActive = false;
//For each B into the A
for(var j=0; j < As[i].Bs.length;j++){
var B=wait.forMethod(Model_B,"findById",AS[i].Bs[j]._id);
// For each C in the B
for(var k=0; k < B.Cs.length;k++){
if(B.Cs[k].dateEnd >= Date.now()) {
isActive = true;
break;//for each C
}
} //loop for each C
console.log("3 COMPLETED");
if (isActive) break;//for each B
} //loop for each B
if (isActive) finalListA.push(As[i]);
console.log("2 COMPLETED");
} //loop for each A
console.log("1 COMPLETED");
return callback(null,finalListA);
}

Resources