I have a mongoDB I'm querying using NodeJS (running mongoose).
In this particular case I'm querying a bunch of collections and pipe the data as CSV into archiverjs to create a zip file. So I have an incoming request, the data gets queried using mongoose and a mongo cursor, piped into a pipeline which will end in archiverjs respectively the http response delivering the zip file to the user.
async function getSortedQueryCursor(...) {
...
const query = MODEL_LOOKUP[fileType]
.find(reducer)
.sort({ [idString]: 'asc' });
return query.cursor();
}
async function getData(...) {
const cursor = await getSortedQueryCursor(...);
return cursor
.pipe(filter1Stream)
.pipe(filter2Stream)
.pipe(filter3Stream)
.pipe(csvStringifyStream);
}
router.post('/:scenarioId', async (request, response) => {
...
const archive = Archiver(...);
archive.pipe(response);
const result = await getData(...);
archive.append(stream, { name: filepath });
return archive.finalize();
}
As soon as a particular collection is in the game (the collection holds roughly 40 million documents) the query lasts very long (>15s) and I can see the mongo process on 100% CPU during that time. Even more surprising as the result set is empty (no documents matching the query).
It's a rather simple query:
items.find({ scenarioId: 'ckqf5ulg38gu208eecxlf95fc' }, { sort: { dataId: 1 }
I have indices on scenarioId and dataId. If I run the query on the shell it returns in 30ms.
An explain() results in:
[
{
"queryPlanner": {
"plannerVersion": 1,
"namespace": "data.items",
"indexFilterSet": false,
"parsedQuery": {
"scenarioId": {
"$eq": "ckqf5ulg38gu208eecxlf95fc"
}
},
"winningPlan": {
"stage": "SORT",
"sortPattern": {
"itemId": 1
},
"memLimit": 104857600,
"type": "simple",
"inputStage": {
"stage": "FETCH",
"inputStage": {
"stage": "IXSCAN",
"keyPattern": {
"scenarioId": 1
},
"indexName": "scenarioId_1",
"isMultiKey": false,
"multiKeyPaths": {
"scenarioId": []
},
"isUnique": false,
"isSparse": false,
"isPartial": false,
"indexVersion": 2,
"direction": "forward",
"indexBounds": {
"scenarioId": [
"[\"ckqf5ulg38gu208eecxlf95fc\", \"ckqf5ulg38gu208eecxlf95fc\"]"
]
}
}
}
},
"rejectedPlans": [
...
]
},
"executionStats": {
"executionSuccess": true,
"nReturned": 0,
"executionTimeMillis": 0,
"totalKeysExamined": 0,
"totalDocsExamined": 0,
"executionStages": {
"stage": "SORT",
"nReturned": 0,
"executionTimeMillisEstimate": 0,
"works": 3,
"advanced": 0,
"needTime": 1,
"needYield": 0,
"saveState": 0,
"restoreState": 0,
"isEOF": 1,
"sortPattern": {
"dataId": 1
},
"memLimit": 104857600,
"type": "simple",
"totalDataSizeSorted": 0,
"usedDisk": false,
"inputStage": {
"stage": "FETCH",
"nReturned": 0,
"executionTimeMillisEstimate": 0,
"works": 1,
"advanced": 0,
"needTime": 0,
"needYield": 0,
"saveState": 0,
"restoreState": 0,
"isEOF": 1,
"docsExamined": 0,
"alreadyHasObj": 0,
"inputStage": {
"stage": "IXSCAN",
"nReturned": 0,
"executionTimeMillisEstimate": 0,
"works": 1,
"advanced": 0,
"needTime": 0,
"needYield": 0,
"saveState": 0,
"restoreState": 0,
"isEOF": 1,
"keyPattern": {
"scenarioId": 1
},
"indexName": "scenarioId_1",
"isMultiKey": false,
"multiKeyPaths": {
"scenarioId": []
},
"isUnique": false,
"isSparse": false,
"isPartial": false,
"indexVersion": 2,
"direction": "forward",
"indexBounds": {
"scenarioId": [
"[\"ckqf5ulg38gu208eecxlf95fc\", \"ckqf5ulg38gu208eecxlf95fc\"]"
]
},
"keysExamined": 0,
"seeks": 1,
"dupsTested": 0,
"dupsDropped": 0
}
}
},
...
},
"serverInfo": {
...
"version": "4.4.6",
"gitVersion": "72e66213c2c3eab37d9358d5e78ad7f5c1d0d0d7"
},
...
}
]
It tells me (I'm not very experienced in interpreting those results) that the query is quite cheap: "executionTimeMillisEstimate": 0, as it's not running a document scan "docsExamined": 0,.
Next I connected to the mongo server and ran db.currentOp({"secs_running": {$gte: 5}}) to get some information from this side:
{
"type" : "op",
...
"clientMetadata" : {
"driver" : {
"name" : "nodejs|Mongoose",
"version" : "3.6.5"
},
"os" : {
"type" : "Linux",
"name" : "linux",
"architecture" : "x64",
"version" : "5.8.0-50-generic"
},
"platform" : "'Node.js v14.17.0, LE (unified)",
"version" : "3.6.5|5.12.3"
},
"active" : true,
"secs_running" : NumberLong(16),
"microsecs_running" : NumberLong(16661409),
"op" : "query",
"ns" : "data.items",
"command" : {
"find" : "items",
"filter" : {
"scenarioId" : "ckqf5ulg38gu208eecxlf95fc"
},
"sort" : {
"itemId" : 1
},
"projection" : {
},
"returnKey" : false,
"showRecordId" : false,
"lsid" : {
"id" : UUID("be3ce18b-5365-4680-b734-543d06418301")
},
"$clusterTime" : {
"clusterTime" : Timestamp(1625498044, 1),
"signature" : {
"hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="),
"keyId" : 0
}
},
"$db" : "data",
"$readPreference" : {
"mode" : "primaryPreferred"
}
},
"numYields" : 14701,
"locks" : {
"ReplicationStateTransition" : "w",
"Global" : "r",
"Database" : "r",
"Collection" : "r"
},
"waitingForLock" : false,
"lockStats" : {
"ReplicationStateTransition" : {
"acquireCount" : {
"w" : NumberLong(14702)
}
},
"Global" : {
"acquireCount" : {
"r" : NumberLong(14702)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(14702)
}
},
"Collection" : {
"acquireCount" : {
"r" : NumberLong(14702)
}
},
"Mutex" : {
"acquireCount" : {
"r" : NumberLong(1)
}
}
},
"waitingForFlowControl" : false,
"flowControlStats" : {
}
}
Any ideas how to improve the performance or find the bottleneck in my application? As the load is high on mongo side and no documents are found/passed to the application I guess it's mongo having trouble ...
EDIT: I've logged the whole process from DB side using db.setProfilingLevel(2) and db.system.profile.find().pretty(). Here we can see that the whole collection (or am I misinterpreting "docsExamined" : 39612167?) is queried:
{
"op" : "query",
"ns" : "data.items",
"command" : {
"find" : "items",
"filter" : {
"scenarioId" : "ckqf5ulg38gu208eecxlf95fc"
},
"sort" : {
"dataId" : 1
},
"projection" : {
},
...
"$db" : "data",
"$readPreference" : {
"mode" : "primaryPreferred"
}
},
"keysExamined" : 39612167,
"docsExamined" : 39612167,
"cursorExhausted" : true,
"numYield" : 39613,
"nreturned" : 0,
"queryHash" : "B7F40289",
"planCacheKey" : "BADED068",
"locks" : {
"ReplicationStateTransition" : {
"acquireCount" : {
"w" : NumberLong(39615)
}
},
"Global" : {
"acquireCount" : {
"r" : NumberLong(39615)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(39614)
}
},
"Collection" : {
"acquireCount" : {
"r" : NumberLong(39614)
}
},
"Mutex" : {
"acquireCount" : {
"r" : NumberLong(1)
}
}
},
"flowControl" : {
},
"storage" : {
},
"responseLength" : 242,
"protocol" : "op_msg",
"millis" : 48401,
"planSummary" : "IXSCAN { dataId: 1 }",
"execStats" : {
"stage" : "CACHED_PLAN",
"nReturned" : 0,
"executionTimeMillisEstimate" : 48401,
"works" : 1,
"advanced" : 0,
"needTime" : 0,
"needYield" : 0,
"saveState" : 39613,
"restoreState" : 39613,
"isEOF" : 1,
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"scenarioId" : {
"$eq" : "ckqf5ulg38gu208eecxlf95fc"
}
},
"nReturned" : 0,
"executionTimeMillisEstimate" : 6270,
"works" : 39612168,
"advanced" : 0,
"needTime" : 39612167,
"needYield" : 0,
"saveState" : 39613,
"restoreState" : 39613,
"isEOF" : 1,
"docsExamined" : 39612167,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 39612167,
"executionTimeMillisEstimate" : 2151,
"works" : 39612168,
"advanced" : 39612167,
"needTime" : 0,
"needYield" : 0,
"saveState" : 39613,
"restoreState" : 39613,
"isEOF" : 1,
"keyPattern" : {
"dataId" : 1
},
"indexName" : "dataId_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"dataId" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"dataId" : [
"[MinKey, MaxKey]"
]
},
"keysExamined" : 39612167,
"seeks" : 1,
"dupsTested" : 0,
"dupsDropped" : 0
}
}
}
(As usual) it seems like the indices are not properly set. I've created a new (secondary?) index:
{
"dataId" : 1,
"scenarioId": 1
}
and now the query returns within milliseconds ...
EDIT: What still makes me wonder is that the shell query returned in milliseconds and the mongoose query took ages. Even though the queries seem to be identical (from my point of view) mongo treats them differently.
Related
I have 3 different collections which has build relation with the another field then primary id.
For ex, I have users, educations and user profiles 3 different collection and users and educations have relation with key user_id of user and same for userinfo. please find sample data.
let users = [
{ _id: ObjectId("1"), user_id: 1, name: "Nitin", is_active: true, is_deleted: false },
{ _id: ObjectId("2"), user_id: 2, name: "Vishal", is_active: true, is_deleted: false },
]
let educations =
[
{ _id: ObjectId("33"), user: 1, std: "10", is_active: true, is_deleted: false },
{ _id: ObjectId("44"), user: 2, std: "8", is_active: true, is_deleted: false },
]
let userinfo = [
{ _id: ObjectId("33"), user: 1, weight: "50", is_active: true, is_deleted: false },
{ _id: ObjectId("44"), user: 2, weight: "45", is_active: true, is_deleted: false },
]
Now I would like to fetch the relation data and created query by using $lookup by following way.
let criteria = {
is_active: true,
is_deleted: false,
};
const result = await mongoose.connection.db.collection('users').aggregate([
//education
{ $lookup: { from: "education", localField: "user_id", foreignField: "user", as: "education" } },
{ $unwind: { path: "$education", preserveNullAndEmptyArrays: true } },
//userinfo
{ $lookup: { from: "userinfo", localField: "user_id", foreignField: "user", as: "userinfo" } },
{ $unwind: { path: "$userinfo", preserveNullAndEmptyArrays: true } },
{ $match: { is_deleted: false } },
{ $match: criteria },
{
"$facet": {
"totalLocation": [
{ $match: criteria },
{ "$count": "count" },
],
}
},
{
"$project": {
"totalLocation": { "$arrayElemAt": ["$totalLocation.count", 0] },
}
}
], { allowDiskUse: true, collation: { locale: 'en_US', alternate: "shifted" } }).toArray();
This query works completely fine and return data as expected. But now as data are growing so this query became slower and we would make it faster. one of solution which I found that to create an index in a way so we can have faster result. I have tried but it doesn't works for me
so can anyone help me to create a index on this kind of query. or any another solution.
Explanation as requested in comment
{
"stages" : [
{
"$cursor" : {
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "mydata.users",
"indexFilterSet" : false,
"parsedQuery" : {
"is_deleted" : {
"$eq" : false
}
},
"queryHash" : "242D9E6F",
"planCacheKey" : "386E80BB",
"winningPlan" : {
"stage" : "PROJECTION_SIMPLE",
"transformBy" : {
"cb_id" : 1,
"user" : 1,
"_id" : 0
},
"inputStage" : {
"stage" : "COLLSCAN",
"filter" : {
"is_deleted" : {
"$eq" : false
}
},
"direction" : "forward"
}
},
"rejectedPlans" : []
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 2328,
"executionTimeMillis" : 265,
"totalKeysExamined" : 0,
"totalDocsExamined" : 2355,
"executionStages" : {
"stage" : "PROJECTION_SIMPLE",
"nReturned" : 2328,
"executionTimeMillisEstimate" : 10,
"works" : 2357,
"advanced" : 2328,
"needTime" : 28,
"needYield" : 0,
"saveState" : 3,
"restoreState" : 3,
"isEOF" : 1,
"transformBy" : {
"cb_id" : 1,
"user" : 1,
"_id" : 0
},
"inputStage" : {
"stage" : "COLLSCAN",
"filter" : {
"is_deleted" : {
"$eq" : false
}
},
"nReturned" : 2328,
"executionTimeMillisEstimate" : 0,
"works" : 2357,
"advanced" : 2328,
"needTime" : 28,
"needYield" : 0,
"saveState" : 3,
"restoreState" : 3,
"isEOF" : 1,
"direction" : "forward",
"docsExamined" : 2355
}
}
}
},
"nReturned" : NumberLong(2328),
"executionTimeMillisEstimate" : NumberLong(20)
},
{
"$lookup" : {
"from" : "education",
"as" : "education",
"localField" : "user_id",
"foreignField" : "user",
"unwinding" : {
"preserveNullAndEmptyArrays" : true
}
},
"nReturned" : NumberLong(2328),
"executionTimeMillisEstimate" : NumberLong(136)
},
{
"$lookup" : {
"from" : "userinfo",
"as" : "userinfo",
"localField" : "user_id",
"foreignField" : "user",
"unwinding" : {
"preserveNullAndEmptyArrays" : true
}
},
"nReturned" : NumberLong(2328),
"executionTimeMillisEstimate" : NumberLong(264)
},
{
"$facet" : {
"totalLocation" : [
{
"$teeConsumer" : {},
"nReturned" : NumberLong(2328),
"executionTimeMillisEstimate" : NumberLong(264)
},
{
"$group" : {
"_id" : {
"$const" : null
},
"count" : {
"$sum" : {
"$const" : 1
}
}
},
"nReturned" : NumberLong(1),
"executionTimeMillisEstimate" : NumberLong(264)
},
{
"$project" : {
"count" : true,
"_id" : false
},
"nReturned" : NumberLong(1),
"executionTimeMillisEstimate" : NumberLong(264)
}
]
},
"nReturned" : NumberLong(1),
"executionTimeMillisEstimate" : NumberLong(264)
},
{
"$project" : {
"_id" : true,
"totalLocation" : {
"$arrayElemAt" : [
"$totalLocation.count",
{
"$const" : 0.0
}
]
}
},
"nReturned" : NumberLong(1),
"executionTimeMillisEstimate" : NumberLong(264)
}
],
"serverInfo" : {},
"ok" : 1.0
}
Thanks
I'm trying to fetch a list of items that have the field: resellable: true.
Here is my data set I can see by visiting: my-domain.com/_all/listings/_search
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 4,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "collectibles",
"_type" : "listing",
"_id" : "SseZfNbpdpBxc3O",
"_score" : 1.0,
"_source" : {
"data" : {
"resellable" : true,
"active" : true,
"description" : "<p>1234 123 123 123</p>",
"title" : "2134",
"has_store" : true,
"createdAt" : "Wed May 27 2020 04:23:18 GMT+0000 (Coordinated Universal Time)",
"apiURL" : "kJ9zsIsdQG8TZrRfPco4",
"id" : "SseZfNbpdpBxc3O",
"state" : "PENDING",
"amount" : "21",
}
}
}
}
My query is using *NodeJS and ElasticSearch.js*
let results = await client.search({
index: params.category,
type: "listing",
body: {
query: {
multi_match: {
query: true,
fields: ['resellable', 'active'],
type: 'best_fields',
}
}
}
})
The response is always no hits. I've tried a match, I've tried no best_fields and it doesn't seem to match the value.
What am I doing wrong here? Is there something else you need to do to query the _source.data level items?
ElasticSearch version: 7.4
You are using the "true" in string format when using multi_match, while you indexed data in boolean format true and thats the reason you are not getting any hits.
I just noticed both of your multi_match fields(resellable and active) are boolean then why you are using the multi_match query, you should instead use the boolean filter query which is cached as well and gives better performance.
From the doc
Filter clauses are executed in filter context, meaning that scoring is ignored and clauses are considered for caching.
Sample index Mapping
{
"mappings": {
"properties": {
"active": {
"type": "boolean"
},
"resellable" :{
"type" : "boolean"
}
}
}
}
Index various example documents
{
"active" : true,
"resellable" : true
}
{
"active" : true,
"resellable" : false
}
{
"active" : false,
"resellable" : false
}
{
"active" : false,
"resellable" : true
}
Search query to get the doc where both values are true
{
"query": {
"bool": {
"filter": [
{
"match": {
"active": true
}
},
{
"match": {
"resellable": true
}
}
]
}
}
}
Result
"hits": [
{
"_index": "filterbool",
"_type": "_doc",
"_id": "1",
"_score": 0.0,
"_source": {
"active": true,
"resellable": true
}
}
]
I am trying to use Mongoose findByIdAndDelete() method on a model but it does not see the _id it needs to delete.
My route is declared similar to this (minified) example:
router.delete('/delete-entity/:id', (req, res) => {
Entity.findByIdAndDelete(req.params.id)
.exec()
.then((docs) => { ...handle success })
.catch((err) => { ...handle error });
});
I only have middleware running that hooks into the save method of the model. But this middleware does not touch the _id field.
When I console.log() inside the route I can see that req.params.id is set. Also if I try to identify my document using an other field to delete it (so not the _id field), it does works. However when I try to use the _id field all that is returned to the client is ''. This makes me believe that something goes wrong in comparing datatypes of the _id field. But cating using mongoose.Type.ObjectId() did not help either.
What could be going on here?
In addition to this already provided information. Here is my model.
const embeddedEntity = new mongoose.Schema({
name: { type: String, required: true }
}, { _id: false });
const entity = new mongoose.Schema({
name: { type: String, required: true },
embeddedInfo: { type: embeddedEntity, required: true }
});
module.exports = mongoose.model('Entity', entity);
#Invider I managed to get this from the profiler.
{
"op" : "command",
"ns" : "development.entities",
"command" : {
"findAndModify" : "entities",
"query" : {
"_id" : ObjectId("5dc95b8cc472d31232dba5a3")
},
"new" : false,
"remove" : true,
"upsert" : false,
"lsid" : {
"id" : UUID("30086660-0619-440e-9268-148957428a2b")
},
"$db" : "development"
},
"keysExamined" : 0,
"docsExamined" : 0,
"ndeleted" : 0,
"numYield" : 0,
"locks" : {
"Global" : {
"acquireCount" : {
"r" : NumberLong(1),
"w" : NumberLong(1)
}
},
"Database" : {
"acquireCount" : {
"w" : NumberLong(1)
}
},
"Collection" : {
"acquireCount" : {
"w" : NumberLong(1)
}
}
},
"responseLength" : 74,
"protocol" : "op_msg",
"millis" : 0,
"planSummary" : "IDHACK",
"execStats" : {
"stage" : "DELETE",
"nReturned" : 0,
"executionTimeMillisEstimate" : 0,
"works" : 1,
"advanced" : 0,
"needTime" : 0,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 1,
"invalidates" : 0,
"nWouldDelete" : 0,
"nInvalidateSkips" : 0,
"inputStage" : {
"stage" : "IDHACK",
"nReturned" : 0,
"executionTimeMillisEstimate" : 0,
"works" : 1,
"advanced" : 0,
"needTime" : 0,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 1,
"invalidates" : 0,
"keysExamined" : 0,
"docsExamined" : 0
}
},
"ts" : ISODate("2019-11-14T09:27:52.988Z"),
"client" : "127.0.0.1",
"allUsers" : [ ],
"user" : ""
}
A lot of thanks goes out to #invider and #SuleymanSah for pointing me into the right direction.
The problem was actually something pretty simple that I completely overlooked and was indeed related to the datatype as I expected.
My e2e tests were failing because of the same problem as trying to call the route using Postman.
The problem was that when adding test samples to the database I was setting the _id field myself. This most likely causes the value stored to be of type string. Letting MongoDB itself set the _id field solved this issue. At least for working in the live environment using Postman.
For my e2e test I had to modify my the data that will be loaded into the database for each test. I was adding samples in the following way:
{
_id: '5dc95b8cc472d31232dba5a5',
name: 'SomeData',
embeddedInfo: {
name: 'someOtherData'
}
}
I should have casted the _id into an ObjectId first. Like so:
{
_id: mongoose.Types.ObjectId('5dc95b8cc472d31232dba5a5'),
name: 'SomeData',
embeddedInfo: {
name: 'someOtherData'
}
}
Again. Thanks to the both of you.
I have created and ascending and descending index for a
db.getCollection('objectlists').createIndex( { a: 1 } )
db.getCollection('objectlists').createIndex( { a: -1 } )
When I use this index in the find functionality, it works perfectly even on large amount of data
db.getCollection('objectlists').find({a: {$gt: 0}}) --> returns instantly.
However, when I use it for sort such as:
db.getCollection('objectlists').find().sort({a: 1}), I get:
Error: error: {
"ok" : 0,
"errmsg" : "Executor error during find command :: caused by :: Sort operation used more than the maximum 33554432 bytes of RAM. Add an index, or specify a smaller limit.",
"code" : 96,
"codeName" : "OperationFailed"
}
I have even tried adding hint:
db.getCollection('objectlists').find().sort({a: 1}).hint({a: 1});
But I end up getting the same error. I tried also using ensureIndex() before running this query, but still did not help solve the error. What is the issues? Am I misunderstanding how index sorting works?
The output of db.getCollection('objectlists').find().sort({a: 1}).explain() is
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "App.objectlists",
"indexFilterSet" : false,
"parsedQuery" : {},
"winningPlan" : {
"stage" : "SORT",
"sortPattern" : {
"a" : 1.0
},
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"inputStage" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"a" : 1
},
"indexName" : "a_1",
"isMultiKey" : true,
"multiKeyPaths" : {
"a" : [
"a"
]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"a" : [
"[MinKey, MaxKey]"
]
}
}
}
}
},
"rejectedPlans" : []
So i have this structure of a document:
{
_id: "123abc",
mainProps: [
{
"countrycode": "US"
},
{
"yearfounded": "2011"
},
{
"city": "New York"
},
...
],
otherProps: [{}, {}, ...]
}
I have an index set like this:
db.companies.ensureIndex({mainProps: 1})
The task is to create a webform for searching in these documents. Fields in the form are not fixed and can be added. Basically i don't know how what fields user would like to filter on, so I can't set proper compound index. Database will be more then 20mil documents, now it's about 10mil.
The problem is that my index doesn't work, or works wrong.
See some examples.
This query has no index at all.
db.companies.find({'mainProps.yearfounded': '2012'}).explain()
This query uses index and is fine.
db.companies.find({mainProps:{'yearfounded': '2012'}}).explain()
And something like this hangs (if I remove the explain() ) and I don't know whether it's executing or what's happening.
db.companies.find(
{$or: [
{ mainProps: {foundedyear: '2012'}},
{ mainProps: {foundedyear: '2011'}},
]}
).explain()
For the last query explain I got something like this.
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "leadsbase.companies",
"indexFilterSet" : false,
"parsedQuery" : {
"$or" : [
{
"mainProps" : {
"$eq" : {
"foundedyear" : "2012"
}
}
},
{
"mainProps" : {
"$eq" : {
"foundedyear" : "2011"
}
}
}
]
},
"winningPlan" : {
"stage" : "SUBPLAN",
"inputStage" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"mainProps" : 1
},
"indexName" : "mainProps_1",
"isMultiKey" : true,
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 1,
"direction" : "forward",
"indexBounds" : {
"mainProps" : [
"[{ foundedyear: \"2011\
" }, { foundedyear: \"2011\" }]",
"[{ foundedyear: \"2012\
" }, { foundedyear: \"2012\" }]"
]
}
}
}
},
"rejectedPlans" : [ ]
},
"serverInfo" : {
"host" : "vm1",
"port" : 27017,
"version" : "3.2.8",
"gitVersion" : "ed70e33130c977bda0024c125b56d159573dbag0"
},
"ok" : 1
}
So as far as I understand index is present, but for some reason not working.
How should I structure my fields or how should I set indexes for this?
createIndex() will create indexes on collections whereas ensureIndex() creates an index on the specified field if the index does not already exist.
so your second query works while the firstQuery fails. try to delete your index with dropIndex(), and then rebuild the index with createIndex()
One way to check the performance o your indexscan , you can check for "executionStats"
db.collection.explain("executionStats").find( <your query>)
and then from the result, check for this two field:
executionSuccess.totalKeysExamined, executionSuccess.totalDocsExamined
For most case, if your index is good, both should have same number. Or you can read more in documentation
"executionStats" : {
"executionSuccess" : <boolean>,
"nReturned" : <int>,
"executionTimeMillis" : <int>,
"totalKeysExamined" : <int>, // this is your index keys
"totalDocsExamined" : <int>, // this is total docs examined
"executionStages" : {
"stage" : <STAGE1>
"nReturned" : <int>,
"executionTimeMillisEstimate" : <int>,
"works" : <int>,
"advanced" : <int>,
"needTime" : <int>,
"needYield" : <int>,
"isEOF" : <boolean>,
...
"inputStage" : {
"stage" : <STAGE2>,
...
"nReturned" : <int>,
"executionTimeMillisEstimate" : <int>,
"keysExamined" : <int>,
"docsExamined" : <int>,
...
"inputStage" : {
...
}
}
},