does anyone know how to change a propertie value in mongo
from
i changed JSON.units[0].services[0].label from etiket controle to AuditLabel
i tried this:
db.getCollection('assortiment').updateMany({"units.$[].services.$[].label": "etiket controle"},{ "$set": { "units.$[].services.$[].label": "AuditLabel" }})
but no succes because it dont select anything
{
"_id" : "764",
"meta" : {
"groupsId" : "764",
"type" : "DRYGR"
},
"units" : [
{
"unit" : "BASE_UNIT_OR_EACH",
"gtin" : "08711728556206",
"services" : [
{
"label" : "etiket controle",
"collection" : "gtins"
}
]
}
]
}
to
{
"_id" : "764",
"meta" : {
"groupsId" : "764",
"type" : "DRYGR"
},
"units" : [
{
"unit" : "BASE_UNIT_OR_EACH",
"gtin" : "08711728556206",
"services" : [
{
"label" : "AuditLabel",
"collection" : "gtins"
}
]
}
]
}
Try this code its work's for me
db.getCollection('assortiment').updateMany(
{
"units.services.label": "etiket controle"
},
{
"$set":
{ "units.$[].services.$[].label": "AuditLabel" }
}
)
I have a mongoDB I'm querying using NodeJS (running mongoose).
In this particular case I'm querying a bunch of collections and pipe the data as CSV into archiverjs to create a zip file. So I have an incoming request, the data gets queried using mongoose and a mongo cursor, piped into a pipeline which will end in archiverjs respectively the http response delivering the zip file to the user.
async function getSortedQueryCursor(...) {
...
const query = MODEL_LOOKUP[fileType]
.find(reducer)
.sort({ [idString]: 'asc' });
return query.cursor();
}
async function getData(...) {
const cursor = await getSortedQueryCursor(...);
return cursor
.pipe(filter1Stream)
.pipe(filter2Stream)
.pipe(filter3Stream)
.pipe(csvStringifyStream);
}
router.post('/:scenarioId', async (request, response) => {
...
const archive = Archiver(...);
archive.pipe(response);
const result = await getData(...);
archive.append(stream, { name: filepath });
return archive.finalize();
}
As soon as a particular collection is in the game (the collection holds roughly 40 million documents) the query lasts very long (>15s) and I can see the mongo process on 100% CPU during that time. Even more surprising as the result set is empty (no documents matching the query).
It's a rather simple query:
items.find({ scenarioId: 'ckqf5ulg38gu208eecxlf95fc' }, { sort: { dataId: 1 }
I have indices on scenarioId and dataId. If I run the query on the shell it returns in 30ms.
An explain() results in:
[
{
"queryPlanner": {
"plannerVersion": 1,
"namespace": "data.items",
"indexFilterSet": false,
"parsedQuery": {
"scenarioId": {
"$eq": "ckqf5ulg38gu208eecxlf95fc"
}
},
"winningPlan": {
"stage": "SORT",
"sortPattern": {
"itemId": 1
},
"memLimit": 104857600,
"type": "simple",
"inputStage": {
"stage": "FETCH",
"inputStage": {
"stage": "IXSCAN",
"keyPattern": {
"scenarioId": 1
},
"indexName": "scenarioId_1",
"isMultiKey": false,
"multiKeyPaths": {
"scenarioId": []
},
"isUnique": false,
"isSparse": false,
"isPartial": false,
"indexVersion": 2,
"direction": "forward",
"indexBounds": {
"scenarioId": [
"[\"ckqf5ulg38gu208eecxlf95fc\", \"ckqf5ulg38gu208eecxlf95fc\"]"
]
}
}
}
},
"rejectedPlans": [
...
]
},
"executionStats": {
"executionSuccess": true,
"nReturned": 0,
"executionTimeMillis": 0,
"totalKeysExamined": 0,
"totalDocsExamined": 0,
"executionStages": {
"stage": "SORT",
"nReturned": 0,
"executionTimeMillisEstimate": 0,
"works": 3,
"advanced": 0,
"needTime": 1,
"needYield": 0,
"saveState": 0,
"restoreState": 0,
"isEOF": 1,
"sortPattern": {
"dataId": 1
},
"memLimit": 104857600,
"type": "simple",
"totalDataSizeSorted": 0,
"usedDisk": false,
"inputStage": {
"stage": "FETCH",
"nReturned": 0,
"executionTimeMillisEstimate": 0,
"works": 1,
"advanced": 0,
"needTime": 0,
"needYield": 0,
"saveState": 0,
"restoreState": 0,
"isEOF": 1,
"docsExamined": 0,
"alreadyHasObj": 0,
"inputStage": {
"stage": "IXSCAN",
"nReturned": 0,
"executionTimeMillisEstimate": 0,
"works": 1,
"advanced": 0,
"needTime": 0,
"needYield": 0,
"saveState": 0,
"restoreState": 0,
"isEOF": 1,
"keyPattern": {
"scenarioId": 1
},
"indexName": "scenarioId_1",
"isMultiKey": false,
"multiKeyPaths": {
"scenarioId": []
},
"isUnique": false,
"isSparse": false,
"isPartial": false,
"indexVersion": 2,
"direction": "forward",
"indexBounds": {
"scenarioId": [
"[\"ckqf5ulg38gu208eecxlf95fc\", \"ckqf5ulg38gu208eecxlf95fc\"]"
]
},
"keysExamined": 0,
"seeks": 1,
"dupsTested": 0,
"dupsDropped": 0
}
}
},
...
},
"serverInfo": {
...
"version": "4.4.6",
"gitVersion": "72e66213c2c3eab37d9358d5e78ad7f5c1d0d0d7"
},
...
}
]
It tells me (I'm not very experienced in interpreting those results) that the query is quite cheap: "executionTimeMillisEstimate": 0, as it's not running a document scan "docsExamined": 0,.
Next I connected to the mongo server and ran db.currentOp({"secs_running": {$gte: 5}}) to get some information from this side:
{
"type" : "op",
...
"clientMetadata" : {
"driver" : {
"name" : "nodejs|Mongoose",
"version" : "3.6.5"
},
"os" : {
"type" : "Linux",
"name" : "linux",
"architecture" : "x64",
"version" : "5.8.0-50-generic"
},
"platform" : "'Node.js v14.17.0, LE (unified)",
"version" : "3.6.5|5.12.3"
},
"active" : true,
"secs_running" : NumberLong(16),
"microsecs_running" : NumberLong(16661409),
"op" : "query",
"ns" : "data.items",
"command" : {
"find" : "items",
"filter" : {
"scenarioId" : "ckqf5ulg38gu208eecxlf95fc"
},
"sort" : {
"itemId" : 1
},
"projection" : {
},
"returnKey" : false,
"showRecordId" : false,
"lsid" : {
"id" : UUID("be3ce18b-5365-4680-b734-543d06418301")
},
"$clusterTime" : {
"clusterTime" : Timestamp(1625498044, 1),
"signature" : {
"hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="),
"keyId" : 0
}
},
"$db" : "data",
"$readPreference" : {
"mode" : "primaryPreferred"
}
},
"numYields" : 14701,
"locks" : {
"ReplicationStateTransition" : "w",
"Global" : "r",
"Database" : "r",
"Collection" : "r"
},
"waitingForLock" : false,
"lockStats" : {
"ReplicationStateTransition" : {
"acquireCount" : {
"w" : NumberLong(14702)
}
},
"Global" : {
"acquireCount" : {
"r" : NumberLong(14702)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(14702)
}
},
"Collection" : {
"acquireCount" : {
"r" : NumberLong(14702)
}
},
"Mutex" : {
"acquireCount" : {
"r" : NumberLong(1)
}
}
},
"waitingForFlowControl" : false,
"flowControlStats" : {
}
}
Any ideas how to improve the performance or find the bottleneck in my application? As the load is high on mongo side and no documents are found/passed to the application I guess it's mongo having trouble ...
EDIT: I've logged the whole process from DB side using db.setProfilingLevel(2) and db.system.profile.find().pretty(). Here we can see that the whole collection (or am I misinterpreting "docsExamined" : 39612167?) is queried:
{
"op" : "query",
"ns" : "data.items",
"command" : {
"find" : "items",
"filter" : {
"scenarioId" : "ckqf5ulg38gu208eecxlf95fc"
},
"sort" : {
"dataId" : 1
},
"projection" : {
},
...
"$db" : "data",
"$readPreference" : {
"mode" : "primaryPreferred"
}
},
"keysExamined" : 39612167,
"docsExamined" : 39612167,
"cursorExhausted" : true,
"numYield" : 39613,
"nreturned" : 0,
"queryHash" : "B7F40289",
"planCacheKey" : "BADED068",
"locks" : {
"ReplicationStateTransition" : {
"acquireCount" : {
"w" : NumberLong(39615)
}
},
"Global" : {
"acquireCount" : {
"r" : NumberLong(39615)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(39614)
}
},
"Collection" : {
"acquireCount" : {
"r" : NumberLong(39614)
}
},
"Mutex" : {
"acquireCount" : {
"r" : NumberLong(1)
}
}
},
"flowControl" : {
},
"storage" : {
},
"responseLength" : 242,
"protocol" : "op_msg",
"millis" : 48401,
"planSummary" : "IXSCAN { dataId: 1 }",
"execStats" : {
"stage" : "CACHED_PLAN",
"nReturned" : 0,
"executionTimeMillisEstimate" : 48401,
"works" : 1,
"advanced" : 0,
"needTime" : 0,
"needYield" : 0,
"saveState" : 39613,
"restoreState" : 39613,
"isEOF" : 1,
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"scenarioId" : {
"$eq" : "ckqf5ulg38gu208eecxlf95fc"
}
},
"nReturned" : 0,
"executionTimeMillisEstimate" : 6270,
"works" : 39612168,
"advanced" : 0,
"needTime" : 39612167,
"needYield" : 0,
"saveState" : 39613,
"restoreState" : 39613,
"isEOF" : 1,
"docsExamined" : 39612167,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 39612167,
"executionTimeMillisEstimate" : 2151,
"works" : 39612168,
"advanced" : 39612167,
"needTime" : 0,
"needYield" : 0,
"saveState" : 39613,
"restoreState" : 39613,
"isEOF" : 1,
"keyPattern" : {
"dataId" : 1
},
"indexName" : "dataId_1",
"isMultiKey" : false,
"multiKeyPaths" : {
"dataId" : [ ]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"dataId" : [
"[MinKey, MaxKey]"
]
},
"keysExamined" : 39612167,
"seeks" : 1,
"dupsTested" : 0,
"dupsDropped" : 0
}
}
}
(As usual) it seems like the indices are not properly set. I've created a new (secondary?) index:
{
"dataId" : 1,
"scenarioId": 1
}
and now the query returns within milliseconds ...
EDIT: What still makes me wonder is that the shell query returned in milliseconds and the mongoose query took ages. Even though the queries seem to be identical (from my point of view) mongo treats them differently.
I have created and ascending and descending index for a
db.getCollection('objectlists').createIndex( { a: 1 } )
db.getCollection('objectlists').createIndex( { a: -1 } )
When I use this index in the find functionality, it works perfectly even on large amount of data
db.getCollection('objectlists').find({a: {$gt: 0}}) --> returns instantly.
However, when I use it for sort such as:
db.getCollection('objectlists').find().sort({a: 1}), I get:
Error: error: {
"ok" : 0,
"errmsg" : "Executor error during find command :: caused by :: Sort operation used more than the maximum 33554432 bytes of RAM. Add an index, or specify a smaller limit.",
"code" : 96,
"codeName" : "OperationFailed"
}
I have even tried adding hint:
db.getCollection('objectlists').find().sort({a: 1}).hint({a: 1});
But I end up getting the same error. I tried also using ensureIndex() before running this query, but still did not help solve the error. What is the issues? Am I misunderstanding how index sorting works?
The output of db.getCollection('objectlists').find().sort({a: 1}).explain() is
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "App.objectlists",
"indexFilterSet" : false,
"parsedQuery" : {},
"winningPlan" : {
"stage" : "SORT",
"sortPattern" : {
"a" : 1.0
},
"inputStage" : {
"stage" : "SORT_KEY_GENERATOR",
"inputStage" : {
"stage" : "FETCH",
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"a" : 1
},
"indexName" : "a_1",
"isMultiKey" : true,
"multiKeyPaths" : {
"a" : [
"a"
]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"a" : [
"[MinKey, MaxKey]"
]
}
}
}
}
},
"rejectedPlans" : []
I want to group my data on the base of factoryId field and then each factory there will be multiple orders want to again group on basis of orderId as each order can contain multiple items. Here I am giving the example of my data and what I need and first group by which I tried.
{
"_id" : ObjectId("5b3e270c42d8004cea382e87"),
"factoryId" : ObjectId("5aa76190cef23a1561b8056c"),
"productId" : ObjectId("5aa78c66cef23a1561b80893"),
"orderId" : ObjectId("5b3e270c42d8004cea382e86"),
"generatedOrderId" : "3985-166770-4554",
"productName" : "Lakme Lotion"
},
{
"_id" : ObjectId("5b3e270c42d8004cea382e88"),
"factoryId" : ObjectId("5b39aed32832f72062e51c23"),
"productId" : ObjectId("5b3cb96139cec8341df52c4b"),
"orderId" : ObjectId("5b3e270c42d8004cea382e86"),
"generatedOrderId" : "3985-166770-4554",
"productName" : "Coke"
},
{
"_id" : ObjectId("5b3e27b07fe0d94d62b76b2a"),
"factoryId" : ObjectId("5aa76190cef23a1561b8057c"),
"productId" : ObjectId("5ac21075ac347a5fbf355028"),
"orderId" : ObjectId("5b3e27b07fe0d94d62b76b27"),
"generatedOrderId" : "3985-755507-7484",
"productName" : "Spoon"
}
And I want result as:
{
"factoryId":ObjectId("5aa76190cef23a1561b8057c"),
"orders":[
{
"orderId":ObjectId("5b3e270c42d8004cea382e86")
"items":[
{
"productName":"Lakme Lotion"
},
{
"productName":"Coke"
}
]
}
]
}
Can anyone help me with this?. Any help is appreciated.
I tried and It worked for me. Sorry
db.getCollection("transactions").aggregate(
[
{
"$group" : {
"_id" : "$orderId",
"items" : {
"$push" : "$$ROOT"
}
}
},
{
"$project" : {
"orderId" : "$_id",
"items" : "$items",
"_id" : 0
}
},
{
"$unwind" : {
"path" : "$items",
"preserveNullAndEmptyArrays" : false
}
},
{
"$group" : {
"_id" : "$items.factoryId",
"orders" : {
"$push" : "$$ROOT"
}
}
},
{
"$project" : {
"factoryId" : "$_id",
"orders" : "$orders",
"_id" : 0
}
}
]
);
How can I check if my aggregate query is good or bad? I already use "explain" but it's not specific.
Here's the output of my aggregate "explain".
{
"waitedMS" : NumberLong(0),
"stages" : [
{
"$cursor" : {
"query" : {
"keys.license_id" : ObjectId("580eeb7fb79bec95648775a2"),
"deleted.status" : {
"$ne" : 1
}
},
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "serpentsmsapp.conversations",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"keys.license_id" : {
"$eq" : ObjectId("580eeb7fb79bec95648775a2")
}
},
{
"$not" : {
"deleted.status" : {
"$eq" : 1
}
}
}
]
},
"winningPlan" : {
"stage" : "COLLSCAN",
"filter" : {
"$and" : [
{
"keys.license_id" : {
"$eq" : ObjectId("580eeb7fb79bec95648775a2")
}
},
{
"$not" : {
"deleted.status" : {
"$eq" : 1
}
}
}
]
},
"direction" : "forward"
},
"rejectedPlans" : [ ]
}
}
},
{
"$sort" : {
"sortKey" : {
"_id" : 1,
"keys.license_id" : 1,
"status.deleted" : 1
}
}
},
{
"$lookup" : {
"from" : "conversation_messages",
"as" : "cmf",
"localField" : "_id",
"foreignField" : "keys.conv_id",
"unwinding" : {
"preserveNullAndEmptyArrays" : false
}
}
},
{
"$match" : {
"cmf.deleted.status" : 0
}
},
{
"$sort" : {
"sortKey" : {
"cmf.deleted.status" : -1
}
}
},
{
"$group" : {
"_id" : {
"id" : "$_id",
"number" : "$number",
"mode" : "$mode",
"keys" : "$keys",
"ports" : "$ports",
"user_assign" : "$user_assign",
"spam" : "$spam"
},
"cm_field" : {
"$last" : {
"id" : "$cmf._id",
"message" : "$cmf.message",
"ports" : "$cmf.ports",
"mode" : "$cmf.mode",
"keys" : "$cmf.keys",
"status" : "$cmf.status",
"date" : "$cmf.updated"
}
},
"counts" : {
"$sum" : "$cmf.status"
},
"sms_mode" : {
"$addToSet" : "$cmf.mode"
}
}
},
{
"$sort" : {
"sortKey" : {
"cm_field.date" : -1
},
"limit" : NumberLong(5000)
}
},
{
"$group" : {
"_id" : "$_id",
"cm_field" : {
"$last" : "$cm_field"
},
"counts" : {
"$first" : "$counts"
},
"sms_mode" : {
"$first" : "$sms_mode"
}
}
}
],
"ok" : 1
}
How can I see how many docs are scanned before performing my query?
In order to solve your problem I suggest you the following plan
Learn how does Explain work.
"Good or bad" approach is not an engineering approach, it always depends on a lot of factors (requirements, hardware, etc...). You should define what are those requirements before answering if it query is performant enough.
For your specific explain log. "COLSCAN" - means it has used all your documents on the collection to aggregate the result, this usually is an alert sign, you should think of using proper indexes.