MongoDB search and pagination Aggregation Performance issue - node.js

I'm new in node js and MongoDB. I'm working on MongoDB search and pagination which is working good, but I have an issue with performance. it is taking too much time in counting and search records.
if I use small word to search then it works faster, if I use "long string" or "no record in database" then it takes too much time which is 50 to 186.30 seconds. (it is too much time, I'm expecting it to be 1 to 2 seconds).
I have more than 15,00,000 data on my record.
If I do not include count of the search word. it is takes 0.20 to 1.5 seconds, but when I count records while searching word it takes 25.0 to 35.0 seconds.
I have no idea how to decrease this time for counting records with the search word(query optimization).
I tried max level of query optimization.
I have also tried with
{
$count: "passing_scores"
}
but no change on time. I'm stuck on it. I have to decrease the time of count with the search word.
SQL Query for example
SELECT * FROM `post`
Left JOIN catagory ON post.catid=catagory.id
WHERE post_name LIKE '%a%' OR post_data LIKE '%a%' OR tags LIKE '%a%' OR post_url LIKE '%a%'
NODE and MongoDB
PostObj.count({},function(err,totalCount) {
if(err) {
response = {"error" : true,"message" : "Error fetching data"}
}
PostObj.aggregate([
{ $lookup:
{
from: 'catagories',
localField: 'catagory.catagory_id',
foreignField: '_id',
as: 'catagories_data'
}
},
{
$match:
{
$or: [
{"catagories_data.catagory_name": { $regex: new RegExp(search_data)}},
{"postname": { $regex: new RegExp(search_data) }},
{"posturl": { $regex: new RegExp(search_data) }},
{"postdata": { $regex: new RegExp(search_data) }},
{"tags": { $regex: new RegExp(search_data) }}
]
}
},
{ $limit : search_limit },
{ $skip : search_skip },
{ $group : { _id : "$_id", postname: { $push: "$postname" } , posturl: { $push: "$posturl" } } }
]).exec(function (err, data){
//end insert log data
if(err) {
response = {"error" : true,"message" :err};
}
if(search_data != "")
{
// count record using search word
PostObj.aggregate([
{ $lookup:
{
from: 'catagories',
localField: 'catagory.catagory_id',
foreignField: '_id',
as: 'catagories_data'
}
},
{
$match:
{
$or: [
{"catagories_data.catagory_name": { $regex: new RegExp(search_data)}},
{"postname": { $regex: new RegExp(search_data) }},
{"posturl": { $regex: new RegExp(search_data) }},
{"postdata": { $regex: new RegExp(search_data) }},
{"tags": { $regex: new RegExp(search_data) }}
]
}
},
{ $group: { _id: null, myCount: { $sum: 1 } } },
{ $project: { _id: 0 } }
]).exec(function (err, Countdata){
res.json({
sEcho : req.body.draw,
iTotalRecords: Countdata.myCount,
iTotalDispla,yRecords: Countdata.myCount,
aaData: data
});
}
res.json({
sEcho : req.body.draw,
iTotalRecords: totalPages,
iTotalDisplayRecords: totalPages,
aaData: data
});
});
});
Also, I have to try this way but it is tack 35.0 to 49.0 seconds more than 1st code.
PostObj.aggregate([
{ $lookup:
{
from: 'catagories',
localField: 'catagory.catagory_id',
foreignField: '_id',
as: 'catagories_data'
}
},
{
$match:
{
$or: [
{"catagories_data.catagory_name": { $regex: new RegExp(search_data)}},
{"postname": { $regex: new RegExp(search_data) }},
{"posturl": { $regex: new RegExp(search_data) }},
{"postdata": { $regex: new RegExp(search_data) }},
{"tags": { $regex: new RegExp(search_data) }}
]
}
},
{ '$facet' : {
metadata: [ { $count: "total" }, { $addFields: { page: NumberInt(3) } } ],
data: [ { $skip: 20 }, { $limit: 10 } ] // add projection here wish you re-shape the docs
} }
] )
If I do not use search word it is work good. I have an issue with when searching any word(count of records of that work without skip and limit)
collection data
Post
{
"_id": ObjectId("5d29bd7609f28633f38ccc13"),
"postname": "this is some data ",
"tags " : "
Damita,
Caro,
Leontyne,
Theodosia,
Vyky ",
"postdata ": "Berry Samara Kellia Rebekah Linette Hyacinthie Joelly Micky Tomasina Christian Fae Doralynn Chelsea Aurie Gwendolyn Tate
Cairistiona Ardys Aubrie Damita Olga Kelli Leone Marthena Kelcy
Cherlyn Molli Pris Ginelle Sula Johannah Hedwig Adelle Editha Lindsey
Loleta Lenette Ann Heidie Drona Charlena Emilia Manya Ketti Dorthea
Jeni Lorene Eolanda Karoly Loretta Marylou Tommie Leontyne Winny Cyb
Violet Pavia Karen Idelle Betty Doloritas Judye Aretha Quinta Billie
Vallie Fiona Letty Gates Shandra Rosemary Dorice Doro Coral Tove Crin
Bobbe Kristan Tierney Gianina Val Daniela Kellyann Marybeth Konstance
Nixie Andeee Jolene Patrizia Carla Arabella Berna Roseline Lira Cristy
Hedi Clem Nerissa ",
"catagory " : [
{ "catagory_id " : [ ObjectId("5d29bd7509f28633f38ccbfd")]},
{ "catagory_id": [ ObjectId("5d29bd7509f28633f38ccbfd") ]}],
"createby": "5d22f712fe481b2a9afda4aa"
}
catagory
{
"_id": ObjectId("5d29bc271a68fb333531f6a1"),
"catagory_name": "Katharine",
"catagory_description": "Katharine"
}
Any solution for it?

If in your case, your regex is just looking for a (or few) word(s), then it would be better to use $text instead of $regex. $text can use text index and is thus much faster. In terms of MySQL, $text is LIKE and $regex is REGEXP. Since in your example mysql query you are using LIKE, I'm pretty confident you can go for $text instead of $regex, in your mongo query as well.
You need to have (if not already) a compound "text" index on your fields - (postname, tags, postdata and posturl).
db.POST.createIndex(
{
postname: "text",
tags: "text",
posturl: "text",
postdata: "text"
}
)

There are some tips that i can suggest you try.
1: POST collection
it seems you are storing only category_id inside your category array of objects property, which you should avoid.
instead what you should do is as below.
create new property post_id inside category collection instead of array of object of category in post collection in [ high performance approach ].
OR
convert category property of post collection form array of object to simple array. [ average performance ].
Ex: category: [ ObjectId("5d29bd7509f28633f38ccbfd", ObjectId("5d29bd7509f28633f38ccbfd", ObjectId("5d29bd7509f28633f38ccbfd"];
definitely in both the cases post_id or category property must be indexed.
2: lookup
instead using simple lookup pipeline you should use pipeline approach
Eg:
NOT GOOD.
$lookup:{
from: 'catagories',
localField: 'catagory.catagory_id', // BAD IDEA //
foreignField: '_id',
as: 'catagories_data'
},
GOOD.
$lookup:{
from: 'catagories',
localField: '_id',
foreignField: 'post_id', // GOOD IDEA
as: 'catagories_data'
},
EVEN BETTER
$lookup:{
let : { post_id: "$_id" },
from: 'catagories',
pipeline:[
{
$match: {
$expr: {
$and: [
{ $eq: ["$post_id", "$$post_id"], },
]
}
},
},
{
$match: {
$or: [
// AVOID `new` keyword if you can do such;
// and create indexes for the same;
{ "catagory_name": { $regex: `^${search_data}` } },
{ "postname": { $regex: `^${search_data}` } },
{ "posturl": { $regex: `^${search_data}` } },
{ "postdata": { $regex: `^${search_data}` } },
{ "tags": { $regex: `^${search_data}` } }
]
}
}
],
as: 'catagories_data'
},
After All facet pipeline seems fine to me.
'$facet' : {
metadata: [ { $count: "total" }, { $addFields: { page: NumberInt(3) } } ],
data: [ { $skip: 20 }, { $limit: 10 } ] // add projection here wish you re-shape the docs
}
Other aspects of slowdown query depends on
configuration of your backend server and database server.
distance between frontend -> backend -> database server.
incoming and outgoing request per second.
internet connection of course
Complete Query will look like this
PostObj.aggregate([
{
$lookup: {
let: { post_id: "$_id" },
from: 'categories',
pipeline: [
{
$match: {
$expr: {
$and: [
{ $eq: ["$post_id", "$$post_id"], },
]
}
},
},
{
$match: {
$or: [
// AVOID `new` keyword if you can do such;
// and create indexes for the same;
{ "catagory_name": { $regex: `^${search_data}` } },
{ "postname": { $regex: `^${search_data}` } },
{ "posturl": { $regex: `^${search_data}` } },
{ "postdata": { $regex: `^${search_data}` } },
{ "tags": { $regex: `^${search_data}` } }
]
}
}
],
as: "catagories_data"
}
},
{
'$facet': {
metadata: [{ $count: "total" }, { $addFields: { page: NumberInt(3) } }],
catagories_data: [{ $skip: 0 }, { $limit: 10 }]
}
}
])

Related

Mongoose - How to get unique data based on some fields using aggregation

I have these fields in the document,
doc: {
"id": "632ac8cba7723378033fef10",
"question": 1,
"text": "aasdfghjk,mnbvcxswertyuikmnbvcxsrtyuiknbvcdrtyujnbvcddtyjnbvfty",
"slug": "xcvbnrddfghjktdxjjydcvbyrsxcvbhytrsxggvbjkytrdgc",
"subject": 25866,
"tutorInfo": {
"tutorId": "632ac8cba7723378033fa0fe",
"tutorIncrementalId": 95947
}
}
the same tutorInfo can Occur in multiple documents.
const allQuestionBySubject = await QuestionParts.aggregate([
{
$match: {
$and: [
{
subject: subjectIncrementalId
},
{tutorInfo: {$exists: true}}
]
}
},
{ "$skip": page * limit },
{ "$limit": limit },
{
$lookup: {
from: "profiles",
localField: "tutorInfo.tutorIncrementalId",
foreignField: "incrementalId",
as: "tutorDetails"
}
}
])
Code to get a list of questions as per subject.
I am filtering documents based on subject and as I mentioned the same tutorInfo can be present in multiple documents so in the result same tutor can be present in multiple documents, How can I get a unique list of documents in which tutorInfo shouldn't be repeated.
Since the same tutorInfo is present in multiple records, You can use $group to group the document on the tutorInfo.tutorId field.
const allQuestionBySubject = await QuestionParts.aggregate(
[
{
$match: {
$and: [
{
subject: subjectIncrementalId
},
{ tutorInfo: { $exists: true } }
]
}
},
{ "$skip": page * limit },
{ "$limit": limit },
{
"$group": {
_id: "$tutorInfo.tutorId",
question: { $first: "$question" },
text: { $first: "$text" },
slug: { $first: "$slug" },
subject: { $first: "$orderId" },
tutorInfo: { $first: "$tutorInfo" },
}
},
{
$lookup: {
from: "profiles",
localField: "tutorInfo.tutorIncrementalId",
foreignField: "incrementalId",
as: "tutorDetails"
}
}
]
)

Mongodb $lookup using with multiple criteria mongodb

{
$lookup: {
from: "Comment",
let: {
p_id: "$_id",
d_id: "$data_id",
},
pipeline: [
{
$match: {
$expr: {
$and: [
{
$eq: [
"$_id",
"$$p_id"
]
},
{
$eq: [
"$data_id",
"$$d_id"
]
}
]
}
}
}
],
as: "subComment"
}
}
https://mongoplayground.net/p/GbEgnVn3JSv
I am good at mongoplayground but tried to put there my thought
I want to fetch the comment of posts based on doc_id and post_id for mainComment query looks good to me but subcommand is not good. Please guide on this
Its simple as a post can have multiple comment need comment count base on Post.data._id which is equal to Comment.doc_id and Post._id is in Comment.post_id
Not sure what "mainComment" and "subComment" are, I believe you missed the dollar sign before them
{
$project: {
_id: 1,
main_comments_count: {
$size: "$mainComment"
},
sub_comments_count: {
$size: "$subComment"
},
}
}
Update
What you did wrong in the playground is that you used $data in the lookup.let stage. $data is a document and the field you actually want to lookup is $data._id.
sidenote: if you are looking up using just one field, you can simply use the localField and foreign in the lookup stage. Using let and pipeline is not necessary there.
db.setting.aggregate([
{
$lookup: {
from: "site",
"let": {
"pid": "$data._id" //here
},
"pipeline": [
{
"$match": {
"$expr": {
"$in": [
"$doc_id",
"$$pid"
]
}
}
}
],
"as": "subComment"
}
},
{
$addFields: {
countRecord: "$subComment"
}
}
])
i.e. this gives the same output
db.setting.aggregate([
{
$lookup: {
from: "site",
localField: "data._id",
foreignField: "doc_id",
as: "subComment"
}
},
{
$addFields: {
countRecord: "$subComment"
}
}
])

Data not getting fetched using aggregation query in nodejs

I tried below code but only first match is checked and displayed,others displayed as object, why I am unable to see it in console. I have 3 collections student subject teacher, also made schema for the same. tried aggregation
Student.aggregate([
{
$match: { name: 'abcd'}
},
{
$lookup:
{
from:'teachers',
pipeline: [{ $match: { name: 'pqrs' } },],
as: "teacherLookup"
}
},
{
$lookup:
{
from:'subjects',
pipeline: [{ $match: { name: 'computer' } }],
as: "subjectLookup"
}
}
]).then(function (res) {
console.log(res);
res.forEach(function(students){
let id = students._id;
console.log(id+ ' got id ')
}
output
student
name:'abcd' -- its fetched and other two not displaying values only shows object
teacherLookup: [ [Object] ]
subjectLookup: [ [Object] ]
You were there just to project something you have to use $project stage.
Here I'm adding the query:
Student.aggregate([
{
$match: { name: 'abcd'}
},
{
$lookup:{
from:'teachers',
pipeline: [
{
$match: { name: 'pqrs' }
},
{
$project:{
"_id":1
}
}
],
as: "teacherLookup"
}
},
{
$lookup:
{
from:'subjects',
pipeline: [
{
$match: { name: 'computer' }
},
{
$project:{
"_id":1
}
}
],
as: "subjectLookup"
}
}
])
For more about $project refer here.
Hope this will help :)

Mongodb - Find count of distinct items after applying aggregate and match

Trying to figure out something from Mongo using mongoose in optimal way.
I have following documents
Regions
{
"_id" : ObjectId("5cf21263ff605c49cd6d8016"),
"name" : "Asia"
}
Countries can be part of multiple regions
{
"_id" : ObjectId("5d10a4ad80a93a1d7cd56cc6"),
"regions" : [
ObjectId("5d10a50080a93a1d7cd56cc7"),
ObjectId("5cf2126bff605c49cd6d8017")
],
"name" : "India"
}
Places belongs to one country
{
"_id" : ObjectId("5d11bb8180a93a1d7cd56d26"),
"name" : "Delhi",
"country" : ObjectId("5d136e7a4e480863a51c4056"),
}
Programs each in dayshows array represents one day. On a day show can cover multiple places.
{
"_id" : ObjectId("5d11cc9480a93a1d7cd56d31"),
"dayshows" : [
{
"_id" : ObjectId("5d11cc9480a93a1d7cd56d41"),
"places" : [
ObjectId("5d11bb8180a93a1d7cd56d26")
],
},
{
"_id" : ObjectId("5d11cc9480a93a1d7cd56d3c"),
"places" : [
ObjectId("5d11bb8180a93a1d7cd56d26"),
ObjectId("5d11bc7c80a93a1d7cd56d2e")
]
}
]
}
What am I trying to figure out?
For a given region, for each country in region which all places are covered and count of programs for each place. Using nodejs and mongoose.
Example
Input - Asia
Output
India
- Delhi (3)
- Mumbai (5)
Thailand
- Pattaya (2)
- Bangkok (5)
New to mongo.
You need to use $lookup to cross different collections.
Pipeline:
Stages 1-6 serves to get all related data.
(Optional) Stages 7-10 serves to transform aggregated data into key:pair object.
ASSUMPTION
Programs to visit 2 places counted as is (Place1: +1, Place2: +1)
You know how to execute MongoDB aggregation in node.js
db.Regions.aggregate([
{
$match: {
name: "Asia"
}
},
{
$lookup: {
from: "Countries",
let: {
region: "$_id"
},
pipeline: [
{
$match: {
$expr: {
$in: [
"$$region",
"$regions"
]
}
}
},
{
$lookup: {
from: "Places",
localField: "_id",
foreignField: "country",
as: "Places"
}
}
],
as: "Countries"
}
},
{
$unwind: "$Countries"
},
{
$unwind: "$Countries.Places"
},
{
$lookup: {
from: "Programs",
localField: "Countries.Places._id",
foreignField: "dayshows.places",
as: "Countries.Places.Programs"
}
},
{
$project: {
"name": 1,
"Countries.name": 1,
"Countries.Places.name": 1,
"Countries.Places.Programs": {
$size: "$Countries.Places.Programs"
}
}
},
{
$group: {
_id: {
name: "$name",
Countries: "$Countries.name"
},
Places: {
$push: {
k: "$Countries.Places.name",
v: "$Countries.Places.Programs"
}
}
}
},
{
$project: {
_id: 1,
Places: {
$arrayToObject: "$Places"
}
}
},
{
$group: {
_id: "$_id.name",
Countries: {
$push: {
k: "$_id.Countries",
v: "$Places"
}
}
}
},
{
$project: {
_id: 0,
name: "$_id",
Countries: {
$arrayToObject: "$Countries"
}
}
}
])
MongoPlayground

pipeline with $match not work as per expectation on mongodb

I have tried to do somthing like this join and search. I'm trying to do a search with in side pipeline with $match, but the issue is that $match is not working.
it is not searching or join two collections.
SELECT * FROM `post`
Left JOIN postcat ON post.id=postcat.postid
Left JOIN catagory ON postcat.catid=catagory.id
WHERE
post_name LIKE '%a%'
OR post_data LIKE '%some data%'
OR tags LIKE '%some data%'
OR post_url LIKE '%some data%'
This is my collection info
Post
{
"_id" : ObjectId("5d29bd7609f28633f38ccc13"),
"postname" : "this is some data",
"tags" : "Damita,Caro",
"postdata" : "Berry Roseline Lira Cristy Hedi Clem Nerissa ",
"catagory" : [ {
"catagory_id" : [
ObjectId("5d29bd7509f28633f38ccbfd")
]
}, {
"catagory_id" : [
ObjectId("5d29bd7509f28633f38ccbfd")
]
}
],
"createby" : "5d22f712fe481b2a9afda4aa"
} ..........
category
{
"_id" : ObjectId("5d29bc271a68fb333531f6a1"),
"catagory_name" : "Katharine",
"catagory_description" : "Katharine"
}
The code i have tried so far:
var search_data = "some data";
var search_limit = 10;
var search_skip = 0;
db.collection.aggregate([
{
$lookup: {
let: {
post_id: "$catagory.catagory_id" ,
postname: "$postname",
posturl: "$posturl" ,
postdata: "$postdata" ,
tags: "$tags"
},
from: 'catagories',
pipeline: [
{
$match: {
$expr: {
$and: [
{ $eq: ["$_id", "$$post_id"] },
{
$or: [
{"$$catagory_name": { $regex: new RegExp(search_data, 'i')}},
{"$$postname": { $regex: `^${search_data}` } },
{"$$posturl": { $regex: new RegExp(search_data, 'i') }},
{"$$postdata": { $regex: new RegExp(search_data, 'i') }},
{"$$tags": { $regex: new RegExp(search_data, 'i') }}
]
}
]
}
}
}
],
as: "catagories_data"
}
},
{ $limit : search_limit },
{ $skip : search_skip },
{ $group : { _id : "$_id", postname: { $push: "$postname" } } }
]).expla(function (err, data_post)
{
console.log(err);
console.log(data_post);
})
i have no idea how to fix it. any suggestion on it

Resources