pipeline with $match not work as per expectation on mongodb - node.js

I have tried to do somthing like this join and search. I'm trying to do a search with in side pipeline with $match, but the issue is that $match is not working.
it is not searching or join two collections.
SELECT * FROM `post`
Left JOIN postcat ON post.id=postcat.postid
Left JOIN catagory ON postcat.catid=catagory.id
WHERE
post_name LIKE '%a%'
OR post_data LIKE '%some data%'
OR tags LIKE '%some data%'
OR post_url LIKE '%some data%'
This is my collection info
Post
{
"_id" : ObjectId("5d29bd7609f28633f38ccc13"),
"postname" : "this is some data",
"tags" : "Damita,Caro",
"postdata" : "Berry Roseline Lira Cristy Hedi Clem Nerissa ",
"catagory" : [ {
"catagory_id" : [
ObjectId("5d29bd7509f28633f38ccbfd")
]
}, {
"catagory_id" : [
ObjectId("5d29bd7509f28633f38ccbfd")
]
}
],
"createby" : "5d22f712fe481b2a9afda4aa"
} ..........
category
{
"_id" : ObjectId("5d29bc271a68fb333531f6a1"),
"catagory_name" : "Katharine",
"catagory_description" : "Katharine"
}
The code i have tried so far:
var search_data = "some data";
var search_limit = 10;
var search_skip = 0;
db.collection.aggregate([
{
$lookup: {
let: {
post_id: "$catagory.catagory_id" ,
postname: "$postname",
posturl: "$posturl" ,
postdata: "$postdata" ,
tags: "$tags"
},
from: 'catagories',
pipeline: [
{
$match: {
$expr: {
$and: [
{ $eq: ["$_id", "$$post_id"] },
{
$or: [
{"$$catagory_name": { $regex: new RegExp(search_data, 'i')}},
{"$$postname": { $regex: `^${search_data}` } },
{"$$posturl": { $regex: new RegExp(search_data, 'i') }},
{"$$postdata": { $regex: new RegExp(search_data, 'i') }},
{"$$tags": { $regex: new RegExp(search_data, 'i') }}
]
}
]
}
}
}
],
as: "catagories_data"
}
},
{ $limit : search_limit },
{ $skip : search_skip },
{ $group : { _id : "$_id", postname: { $push: "$postname" } } }
]).expla(function (err, data_post)
{
console.log(err);
console.log(data_post);
})
i have no idea how to fix it. any suggestion on it

Related

Get _id to collect another collection detail in expressjs mongodb

collection 'bookborrow'
{
"_id" : ObjectId("62f66aa9b744696c2d08cade"),
"userId" : "62f492ace559e59ee288b73a",
"data" : {
"bookname" : "62e53c8af5d4c45fb7853d9f",
"todayDate" : "12/08/2022",
"dateofreturn" : "27/08/2022"
}
}
collection 'bookdetails'
{
"_id" : ObjectId("62e53c8af5d4c45fb7853d9f"),
"bookname" : "Steve Jobs",
"bookauthor" : "Walter Isaacson ",
"counterbooks" : "5",
"bookPublisher" : "Simon & Schuster "
}
i need bookborrow collection data.bookname in bookdetails collection (bookborrow data.bookname(62e53c8af5d4c45fb7853d9f) === _id (62e53c8af5d4c45fb7853d9f) bookdetails
i used aggregate method
const getHistoryData=await db.get().collection('bookborrow').aggregate([
{
$lookup: {
from: 'bookdetails',
let: {
bookid:
{$toObjectId: "$data.bookname"}
},
pipeline: [
{
$match: {
$expr: {
$eq: [
'$_id',
'$$bookid'
]
}
}
},
{
$project: {
bookname: 1
}
}
],
as: 'getbookdetails'
}
},
{
$unwind: '$bookid'
},
{
$project: {
_id: 1,
bookname: '$getbookdetails.bookname',
returndate: 1
}
}
])
output looklike :=
"bookname" : "Steve Jobs"
"todayDate" : "12/08/2022"
"dateofreturn" : "27/08/2022"

How to filter data in array object using mongoDB?

I have a query that is running fine, now i have requirement to filter some data that is inside array. I don't know how to do that. Below is my code. Please guide me where i am wrong.
Request data
[
'Online Casino/Betting',
'Other ',
'Prefer not to say ',
'Do you really care ? :) ',
'Spend time with friends'
]
Database Data
"interests" : [
{
"name" : "Computers/internet",
"_id" : ObjectId("60752406d8e7213e6b5306de"),
"id" : NumberInt(1)
},
{
"name" : "Astrology/Spiritualism",
"_id" : ObjectId("60752406d8e7213e6b5306df"),
"id" : NumberInt(3)
},
{
"name" : "Cars & motorbikes",
"_id" : ObjectId("60752406d8e7213e6b5306e0"),
"id" : NumberInt(2)
}
],
Query
if (filterData.interests != undefined && filterData.interests.length > 0) {
interests = {
interests: { $elemMatch: { $and: [{ name: filterData.interests }] } }
}
}
User.aggregate([
coordinatesCondition,
{
$match: {
$and: [
exerciseHabitsCondition,
interests
],
},
},
{
$sort: lastActivity,
},
{ $limit: skip + 12 },
{ $skip: skip },
{
$lookup: {
from: "favorites",
localField: "_id",
foreignField: "favorites.favoriteUserId",
as: "favUsers",
},
},
])
Any solution appreciated!
as per my understanding you want to match the result with interests in the req data.
I am sharing a simple update, that can work well for you.
if (filterData.interests != undefined && filterData.interests.length > 0) {
interestsQuery = {
'interests.name': { $in: filterData.interests } }
}
}
User.aggregate([
coordinatesCondition,
{
$match: {
$and: [
exerciseHabitsCondition,
interestsQuery
],
},
},
{
$sort: lastActivity,
},
])

Get Total Count using aggregate + facet using Mongo

I want to group my data based on event date with pagination. However what i am getting is whole record totalcount instead of eventDate count. because of this UI part is not working properly. Here is my collection sample:
{
"_id" : ObjectId("5fc4d0009a25e8cfbe306381"),
"eventDate" : ISODate("2021-11-29T01:00:00.000Z"),
"team1" : {
"tName" : "Chicago Bears",
},
"team2" : {
"tName" : "Green Bay Packers",
}
}
{
"_id" : ObjectId("5fc4d0019a25e8cfbe3063ff"),
"eventDate" : ISODate("2021-11-30T01:00:00.000Z"),
"team1" : {
"tName" : "Nashville SC",
},
"team2" : {
"tName" : "Columbus Crew",
}
}
{
"_id" : ObjectId("5fc4d0019a25e8cfbe3063f4"),
"eventDate" : ISODate("2021-11-30T01:00:00.000Z"),
"team1" : {
"tName" : "yyyy",
},
"team2" : {
"tName" : "xxxx",
}
}
here is my query:
db.getCollection('game').aggregate([
{ $addFields: { "newEventDate": {$dateToString:{ format: "%Y-%m-%d", date: "$eventDate" }}}},
{ "$match": {
"eventDate": { $gte: new Date() }
}
},
{ "$facet": {
"resultData": [
{ "$match": {
"eventDate": { $gte: new Date() }
}
},
{ "$group": {
"_id": "$newEventDate",
"games": {$push: {
team1:"$team1",
team2:"$team2"
}}
}
},
{ $sort: {eventDate: 1} },
{
$limit: 1
}
],
"pageInfo": [
{ "$count": "totalRecords" }
]}
}
]);
After executing this query this is my response:
{
"resultData" : [
{
"_id" : "2021-11-29",
"games" : [
{
"awayTeam" : {
"tName" : "Chicago Bears"
},
"homeTeam" : {
"tName" : "Green Bay Packers"
}
},
]
}
],
"pageInfo" : [
{
"totalRecords" : 3 **[here i want 2 ie total event date]**
}
]
}
$match your condition
move your $group stage outside from $facet, convert your date from string inside group, add you date in group stage because we are going to sort in next stage
$sort by eventDate ascending order
$facet, first get single record using $limit, and second part get total count of the record using $count
db.collection.aggregate([
{ $match: { eventDate: { $gte: new Date() } } },
{
$group: {
_id: {
$dateToString: {
format: "%Y-%m-%d",
date: "$eventDate"
}
},
eventDate: { $first: "$eventDate" },
games: {
$push: {
team1: "$team1",
team2: "$team2"
}
}
}
},
{ $sort: { eventDate: 1 } },
{
$facet: {
resultData: [{ $limit: 1 }],
pageInfo: [{ $count: "totalRecords" }]
}
}
])
Playground

MongoDB search and pagination Aggregation Performance issue

I'm new in node js and MongoDB. I'm working on MongoDB search and pagination which is working good, but I have an issue with performance. it is taking too much time in counting and search records.
if I use small word to search then it works faster, if I use "long string" or "no record in database" then it takes too much time which is 50 to 186.30 seconds. (it is too much time, I'm expecting it to be 1 to 2 seconds).
I have more than 15,00,000 data on my record.
If I do not include count of the search word. it is takes 0.20 to 1.5 seconds, but when I count records while searching word it takes 25.0 to 35.0 seconds.
I have no idea how to decrease this time for counting records with the search word(query optimization).
I tried max level of query optimization.
I have also tried with
{
$count: "passing_scores"
}
but no change on time. I'm stuck on it. I have to decrease the time of count with the search word.
SQL Query for example
SELECT * FROM `post`
Left JOIN catagory ON post.catid=catagory.id
WHERE post_name LIKE '%a%' OR post_data LIKE '%a%' OR tags LIKE '%a%' OR post_url LIKE '%a%'
NODE and MongoDB
PostObj.count({},function(err,totalCount) {
if(err) {
response = {"error" : true,"message" : "Error fetching data"}
}
PostObj.aggregate([
{ $lookup:
{
from: 'catagories',
localField: 'catagory.catagory_id',
foreignField: '_id',
as: 'catagories_data'
}
},
{
$match:
{
$or: [
{"catagories_data.catagory_name": { $regex: new RegExp(search_data)}},
{"postname": { $regex: new RegExp(search_data) }},
{"posturl": { $regex: new RegExp(search_data) }},
{"postdata": { $regex: new RegExp(search_data) }},
{"tags": { $regex: new RegExp(search_data) }}
]
}
},
{ $limit : search_limit },
{ $skip : search_skip },
{ $group : { _id : "$_id", postname: { $push: "$postname" } , posturl: { $push: "$posturl" } } }
]).exec(function (err, data){
//end insert log data
if(err) {
response = {"error" : true,"message" :err};
}
if(search_data != "")
{
// count record using search word
PostObj.aggregate([
{ $lookup:
{
from: 'catagories',
localField: 'catagory.catagory_id',
foreignField: '_id',
as: 'catagories_data'
}
},
{
$match:
{
$or: [
{"catagories_data.catagory_name": { $regex: new RegExp(search_data)}},
{"postname": { $regex: new RegExp(search_data) }},
{"posturl": { $regex: new RegExp(search_data) }},
{"postdata": { $regex: new RegExp(search_data) }},
{"tags": { $regex: new RegExp(search_data) }}
]
}
},
{ $group: { _id: null, myCount: { $sum: 1 } } },
{ $project: { _id: 0 } }
]).exec(function (err, Countdata){
res.json({
sEcho : req.body.draw,
iTotalRecords: Countdata.myCount,
iTotalDispla,yRecords: Countdata.myCount,
aaData: data
});
}
res.json({
sEcho : req.body.draw,
iTotalRecords: totalPages,
iTotalDisplayRecords: totalPages,
aaData: data
});
});
});
Also, I have to try this way but it is tack 35.0 to 49.0 seconds more than 1st code.
PostObj.aggregate([
{ $lookup:
{
from: 'catagories',
localField: 'catagory.catagory_id',
foreignField: '_id',
as: 'catagories_data'
}
},
{
$match:
{
$or: [
{"catagories_data.catagory_name": { $regex: new RegExp(search_data)}},
{"postname": { $regex: new RegExp(search_data) }},
{"posturl": { $regex: new RegExp(search_data) }},
{"postdata": { $regex: new RegExp(search_data) }},
{"tags": { $regex: new RegExp(search_data) }}
]
}
},
{ '$facet' : {
metadata: [ { $count: "total" }, { $addFields: { page: NumberInt(3) } } ],
data: [ { $skip: 20 }, { $limit: 10 } ] // add projection here wish you re-shape the docs
} }
] )
If I do not use search word it is work good. I have an issue with when searching any word(count of records of that work without skip and limit)
collection data
Post
{
"_id": ObjectId("5d29bd7609f28633f38ccc13"),
"postname": "this is some data ",
"tags " : "
Damita,
Caro,
Leontyne,
Theodosia,
Vyky ",
"postdata ": "Berry Samara Kellia Rebekah Linette Hyacinthie Joelly Micky Tomasina Christian Fae Doralynn Chelsea Aurie Gwendolyn Tate
Cairistiona Ardys Aubrie Damita Olga Kelli Leone Marthena Kelcy
Cherlyn Molli Pris Ginelle Sula Johannah Hedwig Adelle Editha Lindsey
Loleta Lenette Ann Heidie Drona Charlena Emilia Manya Ketti Dorthea
Jeni Lorene Eolanda Karoly Loretta Marylou Tommie Leontyne Winny Cyb
Violet Pavia Karen Idelle Betty Doloritas Judye Aretha Quinta Billie
Vallie Fiona Letty Gates Shandra Rosemary Dorice Doro Coral Tove Crin
Bobbe Kristan Tierney Gianina Val Daniela Kellyann Marybeth Konstance
Nixie Andeee Jolene Patrizia Carla Arabella Berna Roseline Lira Cristy
Hedi Clem Nerissa ",
"catagory " : [
{ "catagory_id " : [ ObjectId("5d29bd7509f28633f38ccbfd")]},
{ "catagory_id": [ ObjectId("5d29bd7509f28633f38ccbfd") ]}],
"createby": "5d22f712fe481b2a9afda4aa"
}
catagory
{
"_id": ObjectId("5d29bc271a68fb333531f6a1"),
"catagory_name": "Katharine",
"catagory_description": "Katharine"
}
Any solution for it?
If in your case, your regex is just looking for a (or few) word(s), then it would be better to use $text instead of $regex. $text can use text index and is thus much faster. In terms of MySQL, $text is LIKE and $regex is REGEXP. Since in your example mysql query you are using LIKE, I'm pretty confident you can go for $text instead of $regex, in your mongo query as well.
You need to have (if not already) a compound "text" index on your fields - (postname, tags, postdata and posturl).
db.POST.createIndex(
{
postname: "text",
tags: "text",
posturl: "text",
postdata: "text"
}
)
There are some tips that i can suggest you try.
1: POST collection
it seems you are storing only category_id inside your category array of objects property, which you should avoid.
instead what you should do is as below.
create new property post_id inside category collection instead of array of object of category in post collection in [ high performance approach ].
OR
convert category property of post collection form array of object to simple array. [ average performance ].
Ex: category: [ ObjectId("5d29bd7509f28633f38ccbfd", ObjectId("5d29bd7509f28633f38ccbfd", ObjectId("5d29bd7509f28633f38ccbfd"];
definitely in both the cases post_id or category property must be indexed.
2: lookup
instead using simple lookup pipeline you should use pipeline approach
Eg:
NOT GOOD.
$lookup:{
from: 'catagories',
localField: 'catagory.catagory_id', // BAD IDEA //
foreignField: '_id',
as: 'catagories_data'
},
GOOD.
$lookup:{
from: 'catagories',
localField: '_id',
foreignField: 'post_id', // GOOD IDEA
as: 'catagories_data'
},
EVEN BETTER
$lookup:{
let : { post_id: "$_id" },
from: 'catagories',
pipeline:[
{
$match: {
$expr: {
$and: [
{ $eq: ["$post_id", "$$post_id"], },
]
}
},
},
{
$match: {
$or: [
// AVOID `new` keyword if you can do such;
// and create indexes for the same;
{ "catagory_name": { $regex: `^${search_data}` } },
{ "postname": { $regex: `^${search_data}` } },
{ "posturl": { $regex: `^${search_data}` } },
{ "postdata": { $regex: `^${search_data}` } },
{ "tags": { $regex: `^${search_data}` } }
]
}
}
],
as: 'catagories_data'
},
After All facet pipeline seems fine to me.
'$facet' : {
metadata: [ { $count: "total" }, { $addFields: { page: NumberInt(3) } } ],
data: [ { $skip: 20 }, { $limit: 10 } ] // add projection here wish you re-shape the docs
}
Other aspects of slowdown query depends on
configuration of your backend server and database server.
distance between frontend -> backend -> database server.
incoming and outgoing request per second.
internet connection of course
Complete Query will look like this
PostObj.aggregate([
{
$lookup: {
let: { post_id: "$_id" },
from: 'categories',
pipeline: [
{
$match: {
$expr: {
$and: [
{ $eq: ["$post_id", "$$post_id"], },
]
}
},
},
{
$match: {
$or: [
// AVOID `new` keyword if you can do such;
// and create indexes for the same;
{ "catagory_name": { $regex: `^${search_data}` } },
{ "postname": { $regex: `^${search_data}` } },
{ "posturl": { $regex: `^${search_data}` } },
{ "postdata": { $regex: `^${search_data}` } },
{ "tags": { $regex: `^${search_data}` } }
]
}
}
],
as: "catagories_data"
}
},
{
'$facet': {
metadata: [{ $count: "total" }, { $addFields: { page: NumberInt(3) } }],
catagories_data: [{ $skip: 0 }, { $limit: 10 }]
}
}
])

Find upcoming documents based on subdocument date

Suppose I have some MongoDB Event documents, each of which has a number of sessions which take place on different dates. We might represent this as:
db.events.insert([
{
_id: '5be9860fcb16d525543cafe1',
name: 'Past',
host: '5be9860fcb16d525543daff1',
sessions: [
{ date: new Date(Date.now() - 1e8 ) },
{ date: new Date(Date.now() + 1e8 ) }
]
}, {
_id: '5be9860fcb16d525543cafe2',
name: 'Future',
host: '5be9860fcb16d525543daff2',
sessions: [
{ date: new Date(Date.now() + 2e8) },
{ date: new Date(Date.now() + 3e8) }
]
}
]);
I'd like to find all Events which have not yet had their first session. So I'd like to find 'Future' but not 'Past'.
At the moment I'm using Mongoose and Express to do:
Event.aggregate([
{ $unwind: '$sessions' }, {
$group: {
_id: '$_id',
startDate: { $min: '$sessions.date' }
}
},
{ $sort:{ startDate: 1 } }, {
$match: { startDate: { $gte: new Date() } }
}
])
.then(result => Event.find({ _id: result.map(result => result._id) }))
.then(event => Event.populate(events, 'host'))
.then(events => res.json(events))
But I feel like I'm making heavy weather of this. Two hits on the database (three if you include the populate statement) and a big, complicated aggregate statement.
Is there a simpler way to do this? Ideally one which only involves one trip to the database.
You could use $reduce to fold the array and find if any of of the elements have a past session.
To illustrate this, consider running the following aggregate pipeline:
db.events.aggregate([
{ "$match": { "sessions.date": { "$gte": new Date() } } },
{ "$addFields": {
"hasPastSession": {
"$reduce": {
"input": "$sessions.date",
"initialValue": false,
"in": {
"$or" : [
"$$value",
{ "$lt": ["$$this", new Date()] }
]
}
}
}
} },
//{ "$match": { "hasPastSession": false } }
])
Based on the above sample, this will yield the following documents with the extra field
/* 1 */
{
"_id" : "5be9860fcb16d525543cafe1",
"name" : "Past",
"host" : "5be9860fcb16d525543daff1",
"sessions" : [
{
"date" : ISODate("2019-01-03T12:04:36.174Z")
},
{
"date" : ISODate("2019-01-05T19:37:56.174Z")
}
],
"hasPastSession" : true
}
/* 2 */
{
"_id" : "5be9860fcb16d525543cafe2",
"name" : "Future",
"host" : "5be9860fcb16d525543daff2",
"sessions" : [
{
"date" : ISODate("2019-01-06T23:24:36.174Z")
},
{
"date" : ISODate("2019-01-08T03:11:16.174Z")
}
],
"hasPastSession" : false
}
Armed with this aggregate pipeline, you can then leverage $expr and use the pipeline expression as your query in the find() method (or using the aggregate operation above but with the $match pipeline step at the end enabled) as
db.events.find(
{ "$expr": {
"$eq": [
false,
{ "$reduce": {
"input": "$sessions.date",
"initialValue": false,
"in": {
"$or" : [
"$$value",
{ "$lt": ["$$this", new Date()] }
]
}
} }
]
} }
)
which returns the document
{
"_id" : "5be9860fcb16d525543cafe2",
"name" : "Future",
"host" : "5be9860fcb16d525543daff2",
"sessions" : [
{
"date" : ISODate("2019-01-06T23:24:36.174Z")
},
{
"date" : ISODate("2019-01-08T03:11:16.174Z")
}
]
}
You don't need to use $unwind and $group to find the $min date from the array. You can directly use $min to extract the min date from the session array and then use $lookup to populate the host key
db.events.aggregate([
{ "$match": { "sessions.date": { "$gte": new Date() }}},
{ "$addFields": { "startDate": { "$min": "$sessions.date" }}},
{ "$match": { "startDate": { "$gte": new Date() }}},
{ "$lookup": {
"from": "host",
"localField": "host",
"foreignField": "_id",
"as": "host"
}},
{ "$unwind": "$host" }
])
Is it possible you can just reach into the sessions of each event, and pull back each event where all session dates are only in the future? Something like this? Might need tweaking..
db.getCollection("events").aggregate(
[
{$match:{'$and':
[
{'sessions.date':{'$gt': new Date()}},
{'sessions.date':{ '$not': {'$lt': new Date()}}}
]
}}
]
);

Resources