MongoDB Query Optimize Search Text - node.js

I have an application developed in NodeJS, which works as a REST API and consumes data from MongoDB
In MongoDB I have a collection called 'ftp' with more than 10 million documents with the following structure
{
"_id" : ObjectId("59e7c66911506bd1725cf145"),
"ip" : "72.32.177.76",
"timestamp" : "2017-10-16T02:30:26-04:00",
"data" : {
"banner" : "220-FileZilla Server version 0.9.41 beta\r\n"
}
}
The "data.banner" field is a hased index
From NoodeJs I make an aggregate query that filters a string of text using a regular expression, groups and counts the results.
function getData(timeInit, table, textSearch, res) {
MongoClient.connect(url, function (err, db) {
if (err) throw err;
db.collection(table).aggregate([
{
$match: { 'data.banner': $regex:textSearch}
}, {
$group: {
_id: '$data.banner',
num: { $sum: 1 },
}
},
{
$sort: {
num: -1
}
},{
$limit:5
}
], {
allowDiskUse: true
}
).toArray(function (err, result) {
if (err) throw err;
var timeFinal = new Date();
var data = {
result: result,
timeLapse: (timeFinal - timeInit) / 1000,
numResult: result.length
};
res.send(data);
db.close();
});
});
};
The query with regular expression takes about 8 seconds to return results, an excessive time in my opinion, since the regular expressions are not optimal.
My question is how should I make the filter to search for documents that contain text in an optimal way reducing the response time.
If someone knows how to optimize this type of query I would appreciate it a lot.

Related

$setOnInsert doesn't work with bulkWrite in MongoDB

I have millions of record in source collection, I am trying to perform operation bulkWirte with $setOnInsert, like if incoming record already exists in destination collection then just skip it.
But somehow this operation is not working, I don't know if there is change in behavior of $setOnInsert or it doesn't work in bulk update case.
If I perform same operation without bulkWrite then its working as expected.
This is code part of code where I tried with batch update
bulkArr.push({
updateOne: {
filter: {
date: { $gte: new Date(from).getTime(), $lte: new Date(to).getTime() }, "uid": mergedObj.uid, "baseId": mergedObj.baseId,
"lessonId": mergedObj.lessonId, "pageId": mergedObj.pageId, "site": mergedObj.site
},
update: {
$setOnInsert: mergedObj
},
upsert: true
}
});
if (bulkArr.length % batchSize === 0) {
batchCounter = batchCounter + 1;
stream.pause();
db.collection('saq_temp_jan').bulkWrite(bulkArr, (err, result) => {
if (err) {
console.log(err);
} else {
bulkArr = [];
console.log('Upserted count>>>', result.nUpserted);
console.log('Matched count>>>', result.nMatched);
console.log('Batch counter>>>', batchCounter);
stream.resume();
}
});
Any suggestion would be most welcome.

MongoDB find() won't find what I need

I am trying to find a document in a collection. I have been using this query structure on nearly all my collections, but for some reason this isnt working for this collection. Any help?
The only data in this collection right now
[ { _id: 581757143389e565b5cf8124,
companyProfileID: '86660a5b-7f61-4238-889d-1cc3087947b9',
url: 'sentsoftware.com',
appID: 1 } ]
Query Structure:
function getCompany(companyUrl, app, callback) {
MongoClient.connect(url, function(err, db) {
if (err) {
console.log(err);
} else {
console.log("We are connected");
}
var collection = db.collection('Companies');
collection.find({url: companyUrl, appID: app}).toArray(function (err, result) {
if (err) {
console.log(err);
callback(err);
} else if (result.length) {
console.log("found");
callback(result);
} else {
console.log("No document found");
callback(err);
}
});
});
}
I keep getting No document found. But if i were to take out the , appID: app part, it finds the document.
A Mongo query is type specific (although numbers can be cast), you've likely got a string where you are expecting a number.
If you ask for the string "1",
collection.find({appID: "1"})
it will only return documents with a string equal to "1", if you ask for the number 1
collection.find({appID: 1})
it will only return documents with a real number (Double, Long, Int) in.
You can check which types you have with a $type query:
collection.find( { appID : { $type : "string" } } ); // or type 2 for earlier mongo versions
collection.find( { appID : { $type : "double" } } ); // or type 1 for earlier mongo versions
collection.find( { appID : { $type : "int" } } ); // or type 16 for earlier mongo versions
Check out the BSON types and $type in the docs, currently (2016/11/11): https://docs.mongodb.com/manual/reference/operator/query/type/#op._S_type
Interestingly it will cast between Doubles, Longs and Ints; so inserting these:
db.collection("temp").insertOne({ "type" : "my double", "num" : new mongo.Double(1.0) });
db.collection("temp").insertOne({ "type" : "my long", "num" : new mongo.Long(1) });
db.collection("temp").insertOne({ "type" : "my int", "num" : 1 });
and searching for:
{ "num" : 1 }
returns the three documents.

MongoDB Upserts Variable name as a key (Node.JS)

Node.JS, MONGODB, not using Mongoose.
I have a document I'm saving. When I use UPSERT, it structures the data as so :
{ "_id" : ObjectId("573a123f55e195b227e7a78d"),
"document" :
[ {"name" : "SomeName" } ]
}
When I use INSERT it inserts it all on the root level :
{ "_id" : ObjectId("573a123f55e195b227e7a78d"), "name" : "SomeName" }
This is obviously going to lead to lots of inconsistencies. I have tried various things. I've tried various methods such as findOneAndReplace, Update with Upsert, I've tried Replace, $setOnInsert. It all does the same thing when Upsert is involved it seems.
I have tried to use document[0] to access the first block of the array, but that throws an error... 'Unexpected Token ['
I have tried various methods and dug for hours through the various documentation, and have searched high and low for someone else having this problem, but it doesn't seem to be well documented issue for anyone else.
Anyone have any recommendations to make sure that all the fields are on the ROOT level, not nested under the variable name? Relevant code below.
findReplace: function(db, document, collection) {
var dbCollection = db.collection(collection);
var filter = document.name;
return new Promise(function(resolve, reject) {
dbCollection.updateOne({
"name" : filter
}, {
document
}, {
upsert: true
}, function(err, result) {
if (err) {
console.log('error', err)
reject(err);
}
console.log("Found Document and Upserted replacement Document");
resolve([{'status' : 'success'}]);
});
});
}
When you do this:
{
document
}
You are creating an object containing a document property and the variable's value as its value:
{
document: [ {"name" : "SomeName" } ]
}
This is new functionality from ES6. If you want to access the first item of the document variable, don't create a new object:
return new Promise(function(resolve, reject) {
dbCollection.updateOne({
"name" : filter
}, document[0], { // <========
upsert: true
}, function(err, result) {
if (err) {
console.log('error', err)
reject(err);
}
console.log("Found Document and Upserted replacement Document");
resolve([{'status' : 'success'}]);
});
});

Getting the latest documents from a Cloudant/CouchDB database from NodeJS

I would like to take the last n documents from my Cloudant database using a Node query. So far I have narrowed it down to the find() function, but the documentation only really explains how to retrieve all documents containing an absolute value, for example:
db.find({selector:{name:'Alice'}}, function(er, result) {
...
});
(taken from https://www.npmjs.com/package/cloudant#cloudant-query)
What I'm looking for is the equivalent of this SQL:
SELECT * FROM db WHERE name = "Alice" LIMIT 10
The code I have so far is this:
var cloudant = require('cloudant');
cloudant({account: username, password: password}, function (err, conn) {
if (err) {
callback("Could not initialize connection to Cloudant: " + err);
} else {
var db = conn.db.use('mydb');
db.find(???, function(err, data) {
if (err) {
callback("No data found: " + err);
} else {
...
}
});
}
});
If I need to make design documents, I'd do so in the Cloudant online interface, so don't worry too much about making an executable answer for that if it's necessary.
It's important to note that Cloudant Query requires you to define your index before performing the query e.g. to index the 'name' field from your documents:
db.index( {name:'nameindex', type:'json', index:{fields:['name']}}
We can now query the data using the find function as you indicated:
var query = { selector: { name: 'Alice' }};
db.find(query, function(err, data) {
});
The interesting thing about your question is the phrase 'the last n documents'. We can retrieve 'n' documents by adding a 'limit' to our query.
var query = { selector: { name: 'Alice' }, limit: 10};
db.find(query, function(err, data) {
});
but this doesn't necessarily indicate the last ten documents; it just limits the result set to ten.
If you want your query results to appear in time order, then you'll need something in your documents that indicates the time e.g.
a time string : { "name": "Alice", "datetime": "2015-11-26 10:22:24 Z" }
a timestamp : { "name": "Alice", "ts": "123456678" }
When your document contains a field which represents time, then your index can be created incorporate this into the index e.g.
db.index( {name:'nameindex', type:'json', index:{fields:['name','time']}}
and documents can be queried to appear in *reverse order * (to get the latest first):
var query = { selector: { name: 'Alice' }, sort: [ { name: "desc"}, { ts: "desc"]};
db.find(query, function(err, data) {
});
You may also want to look at type:"text" indexes too. See https://docs.cloudant.com/cloudant_query.html
var query = { selector: { name: 'Alice' }, sort: [ { name: "desc"}, { ts: "desc"]};
should be
var query = { selector: { name: 'Alice' }, sort: [ { name: "desc"}, { ts: "desc"}];

Is it possible to get count of number of docs returned from find() query in mongoose

I am trying to get count of data fetched from the database using find() query in mongoose. Now can anyone tell me can i do something like below or do i have to write other function to do that
merchantmodel.find({merchant_id: merchant_id, rating: {'$ne': -1 }, review: {'$ne': "" }}, {'review':1, '_id':0}, {sort: {time_at: -1}}, function(err, docs) {
if (err) {
} else {
if (docs) {
console.log(docs[1].review);
console.log(docs.size()); // Here by writing something is it possible to get count or not
res.json({success: 1, message : "Successfully Fetched the Reviews"});
}
}
});
Convert returned value to array and then use length property
var query = { merchant_id : merchant_id, rating : { '$ne': -1 }, review: { '$ne': "" }};
var projection = { 'review':1, '_id':0 };
var options = { sort: { time_at: -1 } };
merchantmodel.find(query, projection, options).toArray(function(err, docs) {
if (err) {
throw(err);
}
console.log(docs[1].review);
console.log(docs.length);
res.json({success: 1, message : "Successfully Fetched the Reviews"});
});
You can simply do this:
console.log(docs.length);
The docs variable returned by the find() method is an array so docs.length would do the job.
The mongodb native way to do this would be:
db.collection.find( { a: 5, b: 5 } ).count()

Resources