-room collection
_id: ObjectId("xxx")
bedspaces: Array
0:ObjectId("xx")
1:ObjectId("xx")
***
***
-bedspace collection
_id: ObjectId("xxxx");
number: 1
decks: Array
{
_id: ObjectId("xxx");
number: 1
status: "Vacant"
tenant: ObjectId("5c964ae7f5097e3020d1926c")
dueRent: 11
away: null
},
{
_id: ObjectId("xxx");
number: 2
status: "Vacant"
tenant: null
dueRent: 11
away: null
}
Under the decks array, is my tenant field, that have objectId, and i am going to lookup this object id, in the tenants, collection.
-tenant collection
_id: ObjectId("5c964ae7f5097e3020d1926c");
name: 'John Doe'
-expected output
/*room collection*/
_id: ObjectId("xxx")
bedspaces: [
{
_id: ObjectId("xxx")
number: 1
decks: [
{
_id: ObjectId("xxx")
number: 1
status: "Vacant"
tenant: {
name: 'John Doe'
}
dueRent: 11
away: null
},
{
_id: ObjectId("xxx");
number: 1
status: "Vacant"
tenant: null
dueRent: 11
away: null
}
]
}
]
There is also an instances, that deck array is equal to null.
In below aggregation it will only display the decks, that have tenant with object id, what i want is to display both the decks.
{
from: 'beds',
let: {bedspace: '$bedspaces'},
pipeline:[
{
$match: {
$expr: {
$in: ["$_id", "$$bedspace"]
}
}
},
{
$unwind: "$decks"
},
{
$lookup: {
from: 'tenants',
let: {tenant: "$decks.tenant"},
pipeline: [
{
$match: {
$expr: {
$eq: ["$_id", "$$tenant"]
}
}
}
],
as: "decks.tenant",
}
},
{
$unwind: "$decks.tenant"
},
{ $group: {
_id: "$_id",
decks: { $push: "$decks" },
number: {$first: "$number"}
}}
],
as: "bedspaces"
}
"how can i add condition on my second look up, to execute only if tenant is not null", so that i could retrieve both decks, or any work-around so i could achieved my desired result
Don't really have time for all the explanation right now (sorry),
Explanation
The basic issue here is that usage of $unwind is your problem and you don't need it. Use $map on the produced array content merging with the "decks" array instead. Then you can have nulls.
What you want to do here is have the values from the $lookup from your "tenants" collection transposed into the existing array within your "beds/bedspaces" collection for it's own existing "tenant" values which are the ObjectId references for the foreign collection.
The $lookup stage cannot do this by simply naming the field path within the "as" output where that path is already inside another array, and in fact the output of $lookup is always an array of results obtained from the foreign collection. You want singular values for each actual match, and of course you expect a null to be in place where nothing matches, and of course keeping the original document array of "decks" intact, but just including the foreign details where those were found.
Your code attempt seems partially aware of this point as you are using $unwind on the $lookup result on the ""tenants" collection into a "temporary array" ( but you put in in the existing path and that overwrites content ) and then attempting to "re-group" as an array through $group and $push. But the problem of course is the $lookup result does not apply to every array member within "decks", so you end up with less results than you want.
The real solution is not a "conditional $lookup", but instead to transpose the "temporary array" content from the result into the existing "decks" entries. You do this using $map to process the array members, and $arrayElemAt along with $indexOfArray in order to return the matching elements from the "temporary array" by the matching _id values to "tenant".
{ "$lookup": {
"from": Tenant.collection.name,
"let": { "tenant": "$decks.tenant" },
"pipeline": [
{ "$match": {
"$expr": { "$in": [ "$_id", "$$tenant" ] }
}}
],
"as": "tenant"
}},
{ "$addFields": {
"decks": {
"$map": {
"input": "$decks",
"in": {
"$mergeObjects": [
"$$this",
{
"tenant": {
"$cond": {
"if": {
"$eq": [
{ "$indexOfArray": ["$tenant._id", "$$this.tenant"] },
-1
]
},
"then": null,
"else": {
"$arrayElemAt": [
"$tenant",
{ "$indexOfArray": ["$tenant._id", "$$this.tenant"]}
]
}
}
}
}
Noting there we are using $mergeObjects inside the $map in order to keep the existing content of the "decks" array and only replace ( or "merge" ) an overwritten representation of "tenant" for each array member. You are using the expressive $lookup already and this like $mergeObjects is a MongoDB 3.6 feature.
Just for interest the same thing can be done by just specifying every field within the array. i.e:
"decks": {
"$map": {
"input": "$decks",
"in": {
"_id": "$$this._id",
"number": "$$this.number",
"tenant": {
// same expression
},
"__v": "$$this.__v" // just because it's mongoose
}
}
}
Much the same can be said for the $$REMOVE used in the $addFields which is also another MongoDB 3.6 feature. You can alternately just use $project and simply omit the unwanted fields:
{ "$project": {
"number": "$number",
"decks": {
"$map": { /* same expression */ }
},
"__v": "$__v"
// note we don't use the "tenant" temporary array
}}
But that's basically how it works. By taking the $lookup result and then transposing those results back into the original array within the document.
Example Listing
Also abstracting on your data from previous questions here, which is a bit better than what you posted in the question here. Runnable listing for demonstration:
const { Schema, Types: { ObjectId } } = mongoose = require('mongoose');
const uri = 'mongodb://localhost:27017/hotel';
const opts = { useNewUrlParser: true };
mongoose.set('useFindAndModify', false);
mongoose.set('useCreateIndexes', true);
mongoose.set('debug', true);
const tenantSchema = new Schema({
name: String,
age: Number
});
const deckSchema = new Schema({
number: Number,
tenant: { type: Schema.Types.ObjectId, ref: 'Tenant' }
});
const bedSchema = new Schema({
number: Number,
decks: [deckSchema]
});
const roomSchema = new Schema({
bedspaces: [{ type: Schema.Types.ObjectId, ref: 'Bed' }]
});
const Tenant = mongoose.model('Tenant', tenantSchema);
const Bed = mongoose.model('Bed', bedSchema);
const Room = mongoose.model('Room', roomSchema);
const log = data => console.log(JSON.stringify(data, undefined, 2));
(async function() {
try {
const conn = await mongoose.connect(uri, opts);
// Clean data
await Promise.all(
Object.entries(conn.models).map(([k, m]) => m.deleteMany())
);
// Insert data
let [john, jane, bilbo ] = await Tenant.insertMany([
{
_id: ObjectId("5c964ae7f5097e3020d1926c"),
name: "john doe",
age: 11
},
{
_id: ObjectId("5c964b2531bc162fdce64f15"),
name: "jane doe",
age: 12
},
{
_id: ObjectId("5caa5454494558d863513b24"),
name: "bilbo",
age: 111
}
]);
let bedspaces = await Bed.insertMany([
{
_id: ObjectId("5c98d89c6bd5fc26a4c2851b"),
number: 1,
decks: [
{
number: 1,
tenant: john
},
{
number: 1,
tenant: jane
}
]
},
{
_id: ObjectId("5c98d89f6bd5fc26a4c28522"),
number: 2,
decks: [
{
number: 2,
tenant: bilbo
},
{
number: 3
}
]
}
]);
await Room.create({ bedspaces });
// Aggregate
let results = await Room.aggregate([
{ "$lookup": {
"from": Bed.collection.name,
"let": { "bedspaces": "$bedspaces" },
"pipeline": [
{ "$match": {
"$expr": { "$in": [ "$_id", "$$bedspaces" ] }
}},
{ "$lookup": {
"from": Tenant.collection.name,
"let": { "tenant": "$decks.tenant" },
"pipeline": [
{ "$match": {
"$expr": { "$in": [ "$_id", "$$tenant" ] }
}}
],
"as": "tenant"
}},
{ "$addFields": {
"decks": {
"$map": {
"input": "$decks",
"in": {
"$mergeObjects": [
"$$this",
{
"tenant": {
"$cond": {
"if": {
"$eq": [
{ "$indexOfArray": ["$tenant._id", "$$this.tenant"] },
-1
]
},
"then": null,
"else": {
"$arrayElemAt": [
"$tenant",
{ "$indexOfArray": ["$tenant._id", "$$this.tenant"]}
]
}
}
}
}
]
}
}
},
"tenant": "$$REMOVE"
}}
],
"as": "bedspaces"
}}
]);
log(results);
} catch (e) {
console.error(e)
} finally {
mongoose.disconnect();
}
})()
Returns:
Mongoose: tenants.deleteMany({}, {})
Mongoose: beds.deleteMany({}, {})
Mongoose: rooms.deleteMany({}, {})
Mongoose: tenants.insertMany([ { _id: 5c964ae7f5097e3020d1926c, name: 'john doe', age: 11, __v: 0 }, { _id: 5c964b2531bc162fdce64f15, name: 'jane doe', age: 12, __v: 0 }, { _id: 5caa5454494558d863513b24, name: 'bilbo', age: 111, __v: 0 } ], {})
Mongoose: beds.insertMany([ { _id: 5c98d89c6bd5fc26a4c2851b, number: 1, decks: [ { _id: 5caa5af6ed3dce1c3ed72cef, number: 1, tenant: 5c964ae7f5097e3020d1926c }, { _id: 5caa5af6ed3dce1c3ed72cee, number: 1, tenant: 5c964b2531bc162fdce64f15 } ], __v: 0 }, { _id: 5c98d89f6bd5fc26a4c28522, number: 2, decks: [ { _id: 5caa5af6ed3dce1c3ed72cf2, number: 2, tenant: 5caa5454494558d863513b24 }, { _id: 5caa5af6ed3dce1c3ed72cf1, number: 3 } ], __v: 0 } ], {})
Mongoose: rooms.insertOne({ bedspaces: [ ObjectId("5c98d89c6bd5fc26a4c2851b"), ObjectId("5c98d89f6bd5fc26a4c28522") ], _id: ObjectId("5caa5af6ed3dce1c3ed72cf3"), __v: 0 })
Mongoose: rooms.aggregate([ { '$lookup': { from: 'beds', let: { bedspaces: '$bedspaces' }, pipeline: [ { '$match': { '$expr': { '$in': [ '$_id', '$$bedspaces' ] } } }, { '$lookup': { from: 'tenants', let: { tenant: '$decks.tenant' }, pipeline: [ { '$match': { '$expr': { '$in': [ '$_id', '$$tenant' ] } } } ], as: 'tenant' } }, { '$addFields': { decks: { '$map': { input: '$decks', in: { '$mergeObjects': [ '$$this', { tenant: [Object] } ] } } }, tenant: '$$REMOVE' } } ], as: 'bedspaces' } } ], {})
[
{
"_id": "5caa5af6ed3dce1c3ed72cf3",
"bedspaces": [
{
"_id": "5c98d89c6bd5fc26a4c2851b",
"number": 1,
"decks": [
{
"_id": "5caa5af6ed3dce1c3ed72cef",
"number": 1,
"tenant": {
"_id": "5c964ae7f5097e3020d1926c",
"name": "john doe",
"age": 11,
"__v": 0
}
},
{
"_id": "5caa5af6ed3dce1c3ed72cee",
"number": 1,
"tenant": {
"_id": "5c964b2531bc162fdce64f15",
"name": "jane doe",
"age": 12,
"__v": 0
}
}
],
"__v": 0
},
{
"_id": "5c98d89f6bd5fc26a4c28522",
"number": 2,
"decks": [
{
"_id": "5caa5af6ed3dce1c3ed72cf2",
"number": 2,
"tenant": {
"_id": "5caa5454494558d863513b24",
"name": "bilbo",
"age": 111,
"__v": 0
}
},
{
"_id": "5caa5af6ed3dce1c3ed72cf1",
"number": 3,
"tenant": null
}
],
"__v": 0
}
],
"__v": 0
}
]
Shows the null on the second entry of the second entry in the bedspaces array as expected.
Related
This question is a follow up to a previous question for which I have accepted an answer already. I have an aggregate query that returns the results of a deeply nested array of subdocuments based on a date range. The query returns the correct results within the specified date range, however it also returns an empty array for the results that do not match the query.
Technologies: MongoDB 3.6, Mongoose 5.5, NodeJS 12
Question 1:
Is there any way to remove the results that don't match the query?
Question 2:
Is there any way to 'populate' the Person db reference in the results? For example to get the Person Display Name I usually use 'populate' such as find().populate({ path: 'Person', select: 'DisplayName'})
Records schema
let RecordsSchema = new Schema({
RecordID: {
type: Number,
index: true
},
RecordType: {
type: String
},
Status: {
type: String
},
// ItemReport array of subdocuments
ItemReport: [ItemReportSchema],
}, {
collection: 'records',
selectPopulatedPaths: false
});
let ItemReportSchema = new Schema({
// ObjectId reference
ReportBy: {
type: Schema.Types.ObjectId,
ref: 'people'
},
ReportDate: {
type: Date,
required: true
},
WorkDoneBy: [{
Person: {
type: Schema.Types.ObjectId,
ref: 'people'
},
CompletedHours: {
type: Number,
required: true
},
DateCompleted: {
type: Date
}
}],
});
Query
Works but also returns empty results and also need to populate the Display Name property of the Person db reference
db.records.aggregate([
{
"$project": {
"ItemReport": {
$map: {
input: "$ItemReport",
as: "ir",
in: {
WorkDoneBy: {
$filter: {
input: "$$ir.WorkDoneBy",
as: "value",
cond: {
"$and": [
{ "$ne": [ "$$value.DateCompleted", null ] },
{ "$gt": [ "$$value.DateCompleted", new Date("2017-01-01T12:00:00.000Z") ] },
{ "$lt": [ "$$value.DateCompleted", new Date("2018-12-31T12:00:00.000Z") ] }
]
}
}
}
}
}
}
}
}
])
Actual Results
{
"_id": "5dcb6406e63830b7aa5427ca",
"ItemReport": [
{
"WorkDoneBy": [
{
"_id": "5dcb6406e63830b7aa53d8ea",
"PersonID": 111,
"ReportID": 8855,
"CompletedHours": 3,
"DateCompleted": "2017-01-20T05:00:00.000Z",
"Person": "5dcb6409e63830b7aa54fdba"
}
]
}
]
},
{
"_id": "5dcb6406e63830b7aa5427f1",
"ItemReport": [
{
"WorkDoneBy": [
{
"_id": "5dcb6406e63830b7aa53dcdc",
"PersonID": 4,
"ReportID": 9673,
"CompletedHours": 17,
"DateCompleted": "2017-05-18T04:00:00.000Z",
"Person": "5dcb6409e63830b7aa54fd69"
},
{
"_id": "5dcb6406e63830b7aa53dcdd",
"PersonID": 320,
"ReportID": 9673,
"CompletedHours": 3,
"DateCompleted": "2017-05-18T04:00:00.000Z",
"Person": "5dcb6409e63830b7aa54fe88"
}
]
}
]
},
{
"_id": "5dcb6406e63830b7aa5427f2",
"ItemReport": [
{
"WorkDoneBy": []
}
]
},
{
"_id": "5dcb6406e63830b7aa5427f3",
"ItemReport": [
{
"WorkDoneBy": []
}
]
},
{
"_id": "5dcb6406e63830b7aa5427f4",
"ItemReport": [
{
"WorkDoneBy": []
}
]
},
{
"_id": "5dcb6406e63830b7aa5427f5",
"ItemReport": [
{
"WorkDoneBy": []
}
]
},
Desired results
Note the results with an empty "WorkDoneBy" array are removed (question 1), and the "Person" display name is populated (question 2).
{
"_id": "5dcb6406e63830b7aa5427f1",
"ItemReport": [
{
"WorkDoneBy": [
{
"_id": "5dcb6406e63830b7aa53dcdc",
"CompletedHours": 17,
"DateCompleted": "2017-05-18T04:00:00.000Z",
"Person": {
_id: "5dcb6409e63830b7aa54fe88",
DisplayName: "Joe Jones"
}
},
{
"_id": "5dcb6406e63830b7aa53dcdd",
"CompletedHours": 3,
"DateCompleted": "2017-05-18T04:00:00.000Z",
"Person": {
_id: "5dcb6409e63830b7aa54fe88",
DisplayName: "Alice Smith"
}
}
]
}
]
},
First question is relatively easy to answer and there are multiple ways to do that. I would prefer using $anyElementTrue along with $map as those operators are pretty self-explanatory.
{
"$match": {
$expr: { $anyElementTrue: { $map: { input: "$ItemReport", in: { $gt: [ { $size: "$$this.WorkDoneBy" }, 0 ] } } } }
}
}
MongoPlayground
Second part is a bit more complicated but still possible. Instead of populate you need to run $lookup to bring the data from other collection. The problem is that your Person values are deeply nested so you need to prepare a list of id values before using $reduce and $setUnion. Once you get the data you need to merge your nested objects with people entities using $map and $mergeObjects.
{
$addFields: {
people: {
$reduce: {
input: "$ItemReport",
initialValue: [],
in: { $setUnion: [ "$$value", "$$this.WorkDoneBy.Person" ] }
}
}
}
},
{
$lookup: {
from: "people",
localField: "peopleIds",
foreignField: "_id",
as: "people"
}
},
{
$project: {
_id: 1,
ItemReport: {
$map: {
input: "$ItemReport",
as: "ir",
in: {
WorkDoneBy: {
$map: {
input: "$$ir.WorkDoneBy",
as: "wdb",
in: {
$mergeObjects: [
"$$wdb",
{
Person: { $arrayElemAt: [{ $filter: { input: "$people", cond: { $eq: [ "$$this._id", "$$wdb.Person" ] } } } , 0] }
}
]
}
}
}
}
}
}
}
}
Complete Solution
I'm struggling with a problem of how to get the number of distinct field values in array returned as result of $lookup aggregation step in MongoDB using Mongoose. By the number of distinct field values I mean the number of rows with unique value on certain field.
Parent document has this structure:
{ _id: 678, name: "abc" }
Child document has this structure:
{ _id: 1009, fieldA: 123, x: { id: 678, name: "abc" } }
$lookup step is defined as follow:
{
from "children",
localField: "_id"
foreignField: "x.id"
as: "xyz"
}
Let's assume that I get this array as a result of $lookup aggregation step for a parent with _id equal to: 678
xyz: [
{ _id: 1009, fieldA: 123, x: { id: 678, name: "abc" } },
{ _id: 1010, fieldA: 3435, x: { id: 678, name: "abc" } },
{ _id: 1011, fieldA: 123, x: { id: 678, name: "abc" } }
]
I want to know how many distinct fieldA values are in this array. In this example it would be 2.
Of coure the step should be in aggregation flow, ater $lookup step and before (inside?) $project step. As a side note I must to add that I also need total number of elements in array xyz as another value ($size operator in $project step).
So given what you are saying, then you would basically have some data like this:
parents
{
"_id": 1,
"xyz": ["abc", "abd", "abe", "abf"]
}
children
{ "_id": "abc", "fieldA": 123 },
{ "_id": "abd", "fieldA": 34 },
{ "_id": "abe", "fieldA": 123 },
{ "_id": "abf", "fieldA": 54 }
N.B. If you actually defined the parent reference within the child instead of an array of child references in the parent, then there is a listing example at the bottom. The same principles generally apply in either case however.
Where your current $lookup that produces a result like that in the question would be something like this:
{ "$lookup": {
"from": "children",
"localField": "xyz",
"foreignField": "_id"
"as": "xyz"
}}
Best Approach
Now you could do other operations on the array returned in order to actually return the total count and distinct counts, but there is a better way with any modern MongoDB release which you should be using. Namely there is a more expressive form of $lookup which allows a pipeline to be specified to act on the resulting children:
Parent.aggregate([
{ "$lookup": {
"from": "children",
"let": { "ids": "$xyz" },
"pipeline": [
{ "$match": {
"$expr": { "$in": [ "$_id", "$$ids" ] }
}},
{ "$group": {
"_id": "$fieldA",
"total": { "$sum": 1 }
}},
{ "$group": {
"_id": null,
"distinct": { "$sum": 1 },
"total": { "$sum": "$total" }
}}
],
"as": "xyz"
}},
{ "$addFields": {
"xyz": "$$REMOVE",
"distinctCount": { "$sum": "$xyz.distinct" },
"totalCount": { "$sum": "$xyz.total" }
}}
])
The whole point there being that you don't actually need all the array results to be returned from the $lookup, so instead of working with the returned array of all matching children you just reduce that content from within the pipeline expression of the $lookup.
In order to get a total count and a distinct count for the inner content, after the initial $match conditions which specify the "join" and what matches to return, you would then $group on the "distinct" value as the key and maintain a "count" of the elements found in total. The second $group uses a null value for the key since the only thing you want now is the count of the distinct keys already returned, and of course return the $sum of the existing total of counted elements.
The result being of course:
{
"_id": 1,
"distinctCount": 3,
"totalCount": 4
}
And since we are using $addFields this would be in addition to all other fields present in the parent document with the exception of xyz which we explicitly removed via the $$REMOVE operator.
You might also note the usage of $sum in that last stage. The actual result of our $lookup pipeline is of course a single document, but it is as always within an array, since that is what the output of $lookup always is. In this case it's just a very simple way ( being the shortest syntax ) to just extract those values from the array as individual fields in the parent document instead.
Alternate
The alternate approach is of course to just work with the returned array, and all this really needs is essentially any of the appropriate "set operators" and the $size operator:
Parent.aggregate([
{ "$lookup": {
"from": "children",
"localField": "xyz",
"foreignField": "_id",
"as": "xyz"
}},
{ "$addFields": {
"xyz": "$$REMOVE",
"distinctCount": { "$size": { "$setUnion": [ [], "$xyz.fieldA" ] }},
"totalCount": { "$size": "$xyz" }
}}
])
Here we use $setUnion basically providing arguments of an empty array [] and the array of fieldA values. Since this would return a "set" that is the combination of both arguments, the one thing that defines a "set" is that the values can appear only once and are thus *distinct. This is a quick way of obtaining only the distinct values and then of course each "array" ( or "set" ) is simply measured by $size for their respective counts.
So it "looks simple" but the problem is that it's not really efficient, and mostly because we spent operational time returning those array values from the $lookup and then we basically discarded the result. This is generally why the former approach is preferred since it will actually reduce the result before it is ever returned as an array. So "less work" overall.
If on the other hand you actually want to keep the array returned from the $lookup result then the latter case would be of course more desirable
Example listing
const { Schema } = mongoose = require('mongoose');
const uri = 'mongodb://localhost:27017/test';
const options = { useNewUrlParser: true, useUnifiedTopology: true };
mongoose.set('debug', true);
mongoose.set('useFindAndModify', false);
mongoose.set('useCreateIndex', true);
const parentSchema = new Schema({
_id: Number,
xyz: [{ type: String, ref: 'Child' }]
},{ _id: false });
parentSchema.index({ "xyz": 1 });
const childSchema = new Schema({
_id: String,
fieldA: Number
},{ _id: false });
const Parent = mongoose.model('Parent', parentSchema);
const Child = mongoose.model('Child', childSchema);
const log = data => console.log(JSON.stringify(data, undefined, 2));
(async function() {
try {
const conn = await mongoose.connect(uri, options);
// Clean data for demonstration
await Promise.all(
Object.values(conn.models).map(m => m.deleteMany())
);
// Insert some data
await Parent.create({ "_id": 1, "xyz": ["abc", "abd", "abe", "abf"] });
await Child.insertMany([
{ "_id": "abc", "fieldA": 123 },
{ "_id": "abd", "fieldA": 34 },
{ "_id": "abe", "fieldA": 123 },
{ "_id": "abf", "fieldA": 54 }
]);
let result1 = await Parent.aggregate([
{ "$lookup": {
"from": Child.collection.name,
"let": { "ids": "$xyz" },
"pipeline": [
{ "$match": {
"$expr": { "$in": [ "$_id", "$$ids" ] }
}},
{ "$group": {
"_id": "$fieldA",
"total": { "$sum": 1 }
}},
{ "$group": {
"_id": null,
"distinct": { "$sum": 1 },
"total": { "$sum": "$total" }
}}
],
"as": "xyz"
}},
{ "$addFields": {
"xyz": "$$REMOVE",
"distinctCount": { "$sum": "$xyz.distinct" },
"totalCount": { "$sum": "$xyz.total" }
}}
]);
log({ result1 });
let result2 = await Parent.aggregate([
{ "$lookup": {
"from": Child.collection.name,
"localField": "xyz",
"foreignField": "_id",
"as": "xyz"
}},
{ "$addFields": {
"xyz": "$$REMOVE",
"distinctCount": { "$size": { "$setUnion": [ [], "$xyz.fieldA" ] } },
"totalCount": { "$size": "$xyz" }
}}
]);
log({ result2 })
} catch(e) {
console.error(e);
} finally {
mongoose.disconnect();
}
})()
And the output:
Mongoose: parents.createIndex({ xyz: 1 }, { background: true })
Mongoose: parents.deleteMany({}, {})
Mongoose: children.deleteMany({}, {})
Mongoose: parents.insertOne({ xyz: [ 'abc', 'abd', 'abe', 'abf' ], _id: 1, __v: 0 }, { session: null })
Mongoose: children.insertMany([ { _id: 'abc', fieldA: 123, __v: 0 }, { _id: 'abd', fieldA: 34, __v: 0 }, { _id: 'abe', fieldA: 123, __v: 0 }, { _id: 'abf', fieldA: 54, __v: 0 }], {})
Mongoose: parents.aggregate([ { '$lookup': { from: 'children', let: { ids: '$xyz' }, pipeline: [ { '$match': { '$expr': { '$in': [ '$_id', '$$ids' ] } } }, { '$group': { _id: '$fieldA', total: { '$sum': 1 } } }, { '$group': { _id: null, distinct: { '$sum': 1 }, total: { '$sum': '$total' } } } ], as: 'xyz' } }, { '$addFields': { xyz: '$$REMOVE', distinctCount: { '$sum': '$xyz.distinct' }, totalCount: { '$sum': '$xyz.total' } } }], {})
{
"result1": [
{
"_id": 1,
"__v": 0,
"distinctCount": 3,
"totalCount": 4
}
]
}
Mongoose: parents.aggregate([ { '$lookup': { from: 'children', localField: 'xyz', foreignField: '_id', as: 'xyz' } }, { '$addFields': { xyz: '$$REMOVE', distinctCount: { '$size': { '$setUnion': [ [], '$xyz.fieldA' ] } }, totalCount: { '$size': '$xyz' } } }], {})
{
"result2": [
{
"_id": 1,
"__v": 0,
"distinctCount": 3,
"totalCount": 4
}
]
}
Example without child array in parent
Shows defining a schema without an array of values in the parent and instead defining the parent reference within all children:
const { Schema } = mongoose = require('mongoose');
const uri = 'mongodb://localhost:27017/test';
const options = { useNewUrlParser: true, useUnifiedTopology: true };
mongoose.set('debug', true);
mongoose.set('useFindAndModify', false);
mongoose.set('useCreateIndex', true);
const parentSchema = new Schema({
_id: Number,
},{ _id: false });
parentSchema.virtual("xyz", {
ref: 'Child',
localField: '_id',
foreignField: 'parent',
justOne: false
});
const childSchema = new Schema({
_id: String,
parent: Number,
fieldA: Number
},{ _id: false });
childSchema.index({ "parent": 1 });
const Parent = mongoose.model('Parent', parentSchema);
const Child = mongoose.model('Child', childSchema);
const log = data => console.log(JSON.stringify(data, undefined, 2));
(async function() {
try {
const conn = await mongoose.connect(uri, options);
// Clean data for demonstration
await Promise.all(
Object.values(conn.models).map(m => m.deleteMany())
);
// Insert some data
await Parent.create({ "_id": 1 });
await Child.insertMany([
{ "_id": "abc", "fieldA": 123 },
{ "_id": "abd", "fieldA": 34 },
{ "_id": "abe", "fieldA": 123 },
{ "_id": "abf", "fieldA": 54 }
].map(e => ({ ...e, "parent": 1 })));
let result1 = await Parent.aggregate([
{ "$lookup": {
"from": Child.collection.name,
"let": { "parent": "$_id" },
"pipeline": [
{ "$match": {
"$expr": { "$eq": [ "$parent", "$$parent" ] }
}},
{ "$group": {
"_id": "$fieldA",
"total": { "$sum": 1 }
}},
{ "$group": {
"_id": null,
"distinct": { "$sum": 1 },
"total": { "$sum": "$total" }
}}
],
"as": "xyz"
}},
{ "$addFields": {
"xyz": "$$REMOVE",
"distinctCount": { "$sum": "$xyz.distinct" },
"totalCount": { "$sum": "$xyz.total" }
}}
]);
log({ result1 });
let result2 = await Parent.aggregate([
{ "$lookup": {
"from": Child.collection.name,
"localField": "_id",
"foreignField": "parent",
"as": "xyz"
}},
{ "$addFields": {
"xyz": "$$REMOVE",
"distinctCount": { "$size": { "$setUnion": [ [], "$xyz.fieldA" ] } },
"totalCount": { "$size": "$xyz" }
}}
]);
log({ result2 })
} catch(e) {
console.error(e);
} finally {
mongoose.disconnect();
}
})()
And the output:
Mongoose: children.createIndex({ parent: 1 }, { background: true })
Mongoose: parents.deleteMany({}, {})
Mongoose: children.deleteMany({}, {})
Mongoose: parents.insertOne({ _id: 1, __v: 0 }, { session: null })
Mongoose: children.insertMany([ { _id: 'abc', fieldA: 123, parent: 1, __v: 0 }, { _id: 'abd', fieldA: 34, parent: 1, __v: 0 }, { _id: 'abe', fieldA: 123, parent: 1, __v: 0 }, { _id: 'abf', fieldA: 54, parent: 1, __v: 0 }], {})
Mongoose: parents.aggregate([ { '$lookup': { from: 'children', let: { parent: '$_id' }, pipeline: [ { '$match': { '$expr': { '$eq': [ '$parent', '$$parent' ] } } }, { '$group': { _id: '$fieldA', total: { '$sum': 1 } } }, { '$group': { _id: null, distinct: { '$sum': 1 }, total: { '$sum': '$total' } } } ], as: 'xyz' } }, { '$addFields': { xyz: '$$REMOVE', distinctCount: { '$sum': '$xyz.distinct' }, totalCount: { '$sum': '$xyz.total' } } }], {})
{
"result1": [
{
"_id": 1,
"__v": 0,
"distinctCount": 3,
"totalCount": 4
}
]
}
Mongoose: parents.aggregate([ { '$lookup': { from: 'children', localField: '_id', foreignField: 'parent', as: 'xyz' } }, { '$addFields': { xyz: '$$REMOVE', distinctCount: { '$size': { '$setUnion': [ [], '$xyz.fieldA' ] } }, totalCount: { '$size': '$xyz' } } }], {})
{
"result2": [
{
"_id": 1,
"__v": 0,
"distinctCount": 3,
"totalCount": 4
}
]
}
I ended up following first approach suggested by #Neil Lunn. Due to a fact that my schemas of parent and child are different from those assumed by #Neil Lunn I post my own answer whitch solves my particular problem:
Parent.aggregate([
{
$lookup: {
from: "children",
let: { id: "$_id" },
pipeline: [
{ $match: { $expr: { $eq: ["$x.id", "$$id"] } } },
{
$group: {
_id: "$fieldA",
count: { $sum: 1 }
}
},
{
$group: {
_id: null,
fieldA: { $sum: 1 },
count: { $sum: "$count" }
}
}
],
as: "children"
}
},
{
$project: {
total: { $sum: "$children.count" },
distinct: { $sum: "$children.fieldA" }
}
}
]);
I am trying to filter a document by a sub-documents referred property. Assume that I have already created models for each schema. The simplified schemas are the following:
const store = new Schema({
name: { type: String }
})
const price = new Schema({
price: { type: Number },
store: {
type: mongoose.Schema.Types.ObjectId,
ref: 'Store'
},
})
const product = new Schema({
name: {type: String},
prices: [{
type: mongoose.Schema.Types.ObjectId,
ref: 'Price'
}]
})
/*
Notation:
lowercase for schemas: product
uppercase for models: Product
*/
As a first approach I tried:
Product.find({'prices.store':storeId}).populate('prices')
but this does not work as filtering by a sub-document property is not supported on mongoose.
My current approach is using the aggregation framework. This is how the aggregation looks:
{
$unwind: '$prices'
},
{
$lookup: {
from: 'prices',
localField: 'prices',
foreignField: '_id',
as: 'prices'
}
},
{
$unwind: '$prices'
},
{
$lookup: {
from: 'stores',
localField: 'prices.store',
foreignField: '_id',
as: 'prices.store'
}
}, // populate
{
$match: {
'prices.store._id': new mongoose.Types.ObjectId(storeId)
}
}, // filter by store id
{ $group: { _id: '$id', doc: { $first: '$$ROOT' } } },
{ $replaceRoot: { newRoot: '$doc' } }
// Error occurs in $group & $replaceRoot
For example, before the last two stages if the record being saved is:
{
name: 'Milk',
prices: [
{store: 1, price: 3.2},
{store: 2, price: 4.0}
]
}
then the aggregation returned: (notice the product is the same but displaying each price in different results)
[
{
id: 4,
name: 'Milk',
prices: {
id: 10,
store: { _id: 1, name : 'Walmart' },
price: 3.2
}
},
{
id: 4,
name: 'Milk',
prices: {
id: 11,
store: { _id: 2, name : 'CVS' },
price: 4.0
},
}
]
To solve this issue I added the last part:
{ $group: { _id: '$id', doc: { $first: '$$ROOT' } } },
{ $replaceRoot: { newRoot: '$doc' } }
But this last part only returns the following:
{
id: 4,
name: 'Milk',
prices: {
id: 10,
store: { _id: 1, name : 'Walmart' },
price: 3.2
}
}
Now prices is an object, it should be an array and it should contain all prices (2 in this case).
Question
How to return all prices (as an array) with the store field populated and filtered by storeId?
Expected result:
{
id: 4,
name: 'Milk',
prices: [
{
id: 10,
store: { _id: 1, name : 'Walmart' },
price: 3.2
},
{
id: 11,
store: { _id: 2, name : 'CVS' },
price: 4.0
}]
}
EDIT
I want to filter products that contain prices in a given store. It should return the product with its prices, all of them.
I'm not totally convinced your existing pipeline is the most optimal, but without sample data to work from it's hard to really tell otherwise. So just working onward from what you have:
Using $unwind
var pipeline = [
// { $unwind: '$prices' }, // note: should not need this past MongoDB 3.0
{ $lookup: {
from: 'prices',
localField: 'prices',
foreignField: '_id',
as: 'prices'
}},
{ $unwind: '$prices' },
{ $lookup: {
from: 'stores',
localField: 'prices.store',
foreignField: '_id',
as: 'prices.store'
}},
// Changes from here
{ $unwind: '$prices.store' },
{ $match: {'prices.store._id': mongoose.Types.ObjectId(storeId) } },
{ $group: {
_id: '$_id',
name: { $first: '$name' },
prices: { $push: '$prices' }
}}
];
The points there start with:
Initial $unwind - Should not be required. Only in very early MongoDB 3.0 releases was this ever a requirement to $unwind an array of values before using $lookup on those values.
$unwind after $lookup - Is always required if you expect a "singular" object as matching, since $lookup always returns an array.
$match after $unwind - Is actually an "optimization" for pipeline processing and in fact a requirement in order to "filter". Without $unwind it's just a verification that "something is there" but items that did not match would not be removed.
$push in $group - This is the actual part the re-builds the "prices"array.
The key point you were basically missing was using $first for the "whole document" content. You really don't ever want that, and even if you want more than just "name" you always want to $push the "prices".
In fact you probably do want more fields than just name from the original document, but really you should therefore be using the following form instead.
Expressive $lookup
An alternate is available with most modern MongoDB releases since MongoDB 3.6, which frankly you should be using at minimum:
var pipeline = [
{ $lookup: {
from: 'prices',
let: { prices: '$prices' },
pipeline: [
{ $match: {
store: mongoose.Types.ObjectId(storeId),
$expr: { $in: [ '$_id', '$$prices' ] }
}},
{ $lookup: {
from: 'stores',
let: { store: '$store' },
pipeline: [
{ $match: { $expr: { $eq: [ '$_id', '$$store' ] } }
],
as: 'store'
}},
{ $unwind: '$store' }
],
as: 'prices'
}},
// remove results with no matching prices
{ $match: { 'prices.0': { $exists: true } } }
];
So the first thing to notice there is the "outer" pipeline is actually just a single $lookup stage, since all it really needs to do is "join" to the prices collection. From the perspective of joining to your original collection this is also true since the additional $lookup in the above example is actually related from prices to another collection.
This is then exactly what this new form does, so instead of using $unwind on the resulting array and then following on the join, only the matching items for "prices" are then "joined" to the "stores" collection, and before those are returned into the array. Of course since there is a "one to one" relationship with the "store", this will actually $unwind.
In short, the output of this simply has the original document with a "prices" array inside it. So there is no need to re-construct via $group and no confusion of what you use $first on and what you $push.
NOTE: I'm more than a little suspect of your "filter stores" statement and attempting to match the store field as presented in the "prices" collection. The question shows expected output from two different stores even though you specify an equality match.
If anything I suspect you might mean a "list of stores", which would instead be more like:
store: { $in: storeList.map(store => mongoose.Types.ObjectId(store)) }
Which is how you would work with a "list of strings" in both cases, using $in for matching against a "list" and the Array.map() to work with a supplied list and return each as ObjectId() values.
TIP: With mongoose you use a "model" rather than working with collection names, and the actual MongoDB collection names is typically the plural of the model name you registered.
So you don't have to "hardcode" the actual collection names for $lookup, simply use:
Model.collection.name
The .collection.name is an accessible property on all models, and can save you the trouble of remembering to actually name the collection for $lookup. It also protects you should you ever change your mongoose.model() instance registration in a way which alters the stored collection name with MongoDB.
Full Demonstration
The following is a self contained listing demonstrating both approaches as work and how they produce the same results:
const { Schema, Types: { ObjectId } } = mongoose = require('mongoose');
const uri = 'mongodb://localhost:27017/shopping';
const opts = { useNewUrlParser: true };
mongoose.set('useFindAndModify', false);
mongoose.set('useCreateIndex', true);
mongoose.set('debug', true);
const storeSchema = new Schema({
name: { type: String }
});
const priceSchema = new Schema({
price: { type: Number },
store: { type: Schema.Types.ObjectId, ref: 'Store' }
});
const productSchema = new Schema({
name: { type: String },
prices: [{ type: Schema.Types.ObjectId, ref: 'Price' }]
});
const Store = mongoose.model('Store', storeSchema);
const Price = mongoose.model('Price', priceSchema);
const Product = mongoose.model('Product', productSchema);
const log = data => console.log(JSON.stringify(data, undefined, 2));
(async function() {
try {
const conn = await mongoose.connect(uri, opts);
// Clean data
await Promise.all(
Object.entries(conn.models).map(([k, m]) => m.deleteMany())
);
// Insert working data
let [StoreA, StoreB, StoreC] = await Store.insertMany(
["StoreA", "StoreB", "StoreC"].map(name => ({ name }))
);
let [PriceA, PriceB, PriceC, PriceD, PriceE, PriceF]
= await Price.insertMany(
[[StoreA,1],[StoreB,2],[StoreA,3],[StoreC,4],[StoreB,5],[StoreC,6]]
.map(([store, price]) => ({ price, store }))
);
let [Milk, Cheese, Bread] = await Product.insertMany(
[
{ name: 'Milk', prices: [PriceA, PriceB] },
{ name: 'Cheese', prices: [PriceC, PriceD] },
{ name: 'Bread', prices: [PriceE, PriceF] }
]
);
// Test 1
{
log("Single Store - expressive")
const pipeline = [
{ '$lookup': {
'from': Price.collection.name,
'let': { prices: '$prices' },
'pipeline': [
{ '$match': {
'store': ObjectId(StoreA._id), // demo - it's already an ObjectId
'$expr': { '$in': [ '$_id', '$$prices' ] }
}},
{ '$lookup': {
'from': Store.collection.name,
'let': { store: '$store' },
'pipeline': [
{ '$match': { '$expr': { '$eq': [ '$_id', '$$store' ] } } }
],
'as': 'store'
}},
{ '$unwind': '$store' }
],
as: 'prices'
}},
{ '$match': { 'prices.0': { '$exists': true } } }
];
let result = await Product.aggregate(pipeline);
log(result);
}
// Test 2
{
log("Dual Store - expressive");
const pipeline = [
{ '$lookup': {
'from': Price.collection.name,
'let': { prices: '$prices' },
'pipeline': [
{ '$match': {
'store': { '$in': [StoreA._id, StoreB._id] },
'$expr': { '$in': [ '$_id', '$$prices' ] }
}},
{ '$lookup': {
'from': Store.collection.name,
'let': { store: '$store' },
'pipeline': [
{ '$match': { '$expr': { '$eq': [ '$_id', '$$store' ] } } }
],
'as': 'store'
}},
{ '$unwind': '$store' }
],
as: 'prices'
}},
{ '$match': { 'prices.0': { '$exists': true } } }
];
let result = await Product.aggregate(pipeline);
log(result);
}
// Test 3
{
log("Single Store - legacy");
const pipeline = [
{ '$lookup': {
'from': Price.collection.name,
'localField': 'prices',
'foreignField': '_id',
'as': 'prices'
}},
{ '$unwind': '$prices' },
// Alternately $match can be done here
// { '$match': { 'prices.store': StoreA._id } },
{ '$lookup': {
'from': Store.collection.name,
'localField': 'prices.store',
'foreignField': '_id',
'as': 'prices.store'
}},
{ '$unwind': '$prices.store' },
{ '$match': { 'prices.store._id': StoreA._id } },
{ '$group': {
'_id': '$_id',
'name': { '$first': '$name' },
'prices': { '$push': '$prices' }
}}
];
let result = await Product.aggregate(pipeline);
log(result);
}
// Test 4
{
log("Dual Store - legacy");
const pipeline = [
{ '$lookup': {
'from': Price.collection.name,
'localField': 'prices',
'foreignField': '_id',
'as': 'prices'
}},
{ '$unwind': '$prices' },
// Alternately $match can be done here
{ '$match': { 'prices.store': { '$in': [StoreA._id, StoreB._id] } } },
{ '$lookup': {
'from': Store.collection.name,
'localField': 'prices.store',
'foreignField': '_id',
'as': 'prices.store'
}},
{ '$unwind': '$prices.store' },
//{ '$match': { 'prices.store._id': { '$in': [StoreA._id, StoreB._id] } } },
{ '$group': {
'_id': '$_id',
'name': { '$first': '$name' },
'prices': { '$push': '$prices' }
}}
];
let result = await Product.aggregate(pipeline);
log(result);
}
} catch(e) {
console.error(e);
} finally {
mongoose.disconnect();
}
})()
Which produces the output:
Mongoose: stores.deleteMany({}, {})
Mongoose: prices.deleteMany({}, {})
Mongoose: products.deleteMany({}, {})
Mongoose: stores.insertMany([ { _id: 5c7c79bcc78675135c09f54b, name: 'StoreA', __v: 0 }, { _id: 5c7c79bcc78675135c09f54c, name: 'StoreB', __v: 0 }, { _id: 5c7c79bcc78675135c09f54d, name: 'StoreC', __v: 0 } ], {})
Mongoose: prices.insertMany([ { _id: 5c7c79bcc78675135c09f54e, price: 1, store: 5c7c79bcc78675135c09f54b, __v: 0 }, { _id: 5c7c79bcc78675135c09f54f, price: 2, store: 5c7c79bcc78675135c09f54c, __v: 0 }, { _id: 5c7c79bcc78675135c09f550, price: 3, store: 5c7c79bcc78675135c09f54b, __v: 0 }, { _id: 5c7c79bcc78675135c09f551, price: 4, store: 5c7c79bcc78675135c09f54d, __v: 0 }, { _id: 5c7c79bcc78675135c09f552, price: 5, store: 5c7c79bcc78675135c09f54c, __v: 0 }, { _id: 5c7c79bcc78675135c09f553, price: 6, store: 5c7c79bcc78675135c09f54d, __v: 0 } ], {})
Mongoose: products.insertMany([ { prices: [ 5c7c79bcc78675135c09f54e, 5c7c79bcc78675135c09f54f ], _id: 5c7c79bcc78675135c09f554, name: 'Milk', __v: 0 }, { prices: [ 5c7c79bcc78675135c09f550, 5c7c79bcc78675135c09f551 ], _id: 5c7c79bcc78675135c09f555, name: 'Cheese', __v: 0 }, { prices: [ 5c7c79bcc78675135c09f552, 5c7c79bcc78675135c09f553 ], _id: 5c7c79bcc78675135c09f556, name: 'Bread', __v: 0 } ], {})
"Single Store - expressive"
Mongoose: products.aggregate([ { '$lookup': { from: 'prices', let: { prices: '$prices' }, pipeline: [ { '$match': { store: 5c7c79bcc78675135c09f54b, '$expr': { '$in': [ '$_id', '$$prices' ] } } }, { '$lookup': { from: 'stores', let: { store: '$store' }, pipeline: [ { '$match': { '$expr': { '$eq': [ '$_id', '$$store' ] } } } ], as: 'store' } }, { '$unwind': '$store' } ], as: 'prices' } }, { '$match': { 'prices.0': { '$exists': true } } } ], {})
[
{
"_id": "5c7c79bcc78675135c09f554",
"prices": [
{
"_id": "5c7c79bcc78675135c09f54e",
"price": 1,
"store": {
"_id": "5c7c79bcc78675135c09f54b",
"name": "StoreA",
"__v": 0
},
"__v": 0
}
],
"name": "Milk",
"__v": 0
},
{
"_id": "5c7c79bcc78675135c09f555",
"prices": [
{
"_id": "5c7c79bcc78675135c09f550",
"price": 3,
"store": {
"_id": "5c7c79bcc78675135c09f54b",
"name": "StoreA",
"__v": 0
},
"__v": 0
}
],
"name": "Cheese",
"__v": 0
}
]
"Dual Store - expressive"
Mongoose: products.aggregate([ { '$lookup': { from: 'prices', let: { prices: '$prices' }, pipeline: [ { '$match': { store: { '$in': [ 5c7c79bcc78675135c09f54b, 5c7c79bcc78675135c09f54c ] }, '$expr': { '$in': [ '$_id', '$$prices' ] } } }, { '$lookup': { from: 'stores', let: { store: '$store' }, pipeline: [ { '$match': { '$expr': { '$eq': [ '$_id', '$$store' ] } } } ], as: 'store' } }, { '$unwind': '$store' } ], as: 'prices' } }, { '$match': { 'prices.0': { '$exists': true } } } ], {})
[
{
"_id": "5c7c79bcc78675135c09f554",
"prices": [
{
"_id": "5c7c79bcc78675135c09f54e",
"price": 1,
"store": {
"_id": "5c7c79bcc78675135c09f54b",
"name": "StoreA",
"__v": 0
},
"__v": 0
},
{
"_id": "5c7c79bcc78675135c09f54f",
"price": 2,
"store": {
"_id": "5c7c79bcc78675135c09f54c",
"name": "StoreB",
"__v": 0
},
"__v": 0
}
],
"name": "Milk",
"__v": 0
},
{
"_id": "5c7c79bcc78675135c09f555",
"prices": [
{
"_id": "5c7c79bcc78675135c09f550",
"price": 3,
"store": {
"_id": "5c7c79bcc78675135c09f54b",
"name": "StoreA",
"__v": 0
},
"__v": 0
}
],
"name": "Cheese",
"__v": 0
},
{
"_id": "5c7c79bcc78675135c09f556",
"prices": [
{
"_id": "5c7c79bcc78675135c09f552",
"price": 5,
"store": {
"_id": "5c7c79bcc78675135c09f54c",
"name": "StoreB",
"__v": 0
},
"__v": 0
}
],
"name": "Bread",
"__v": 0
}
]
"Single Store - legacy"
Mongoose: products.aggregate([ { '$lookup': { from: 'prices', localField: 'prices', foreignField: '_id', as: 'prices' } }, { '$unwind': '$prices' }, { '$lookup': { from: 'stores', localField: 'prices.store', foreignField: '_id', as: 'prices.store' } }, { '$unwind': '$prices.store' }, { '$match': { 'prices.store._id': 5c7c79bcc78675135c09f54b } }, { '$group': { _id: '$_id', name: { '$first': '$name' }, prices: { '$push': '$prices' } } } ], {})
[
{
"_id": "5c7c79bcc78675135c09f555",
"name": "Cheese",
"prices": [
{
"_id": "5c7c79bcc78675135c09f550",
"price": 3,
"store": {
"_id": "5c7c79bcc78675135c09f54b",
"name": "StoreA",
"__v": 0
},
"__v": 0
}
]
},
{
"_id": "5c7c79bcc78675135c09f554",
"name": "Milk",
"prices": [
{
"_id": "5c7c79bcc78675135c09f54e",
"price": 1,
"store": {
"_id": "5c7c79bcc78675135c09f54b",
"name": "StoreA",
"__v": 0
},
"__v": 0
}
]
}
]
"Dual Store - legacy"
Mongoose: products.aggregate([ { '$lookup': { from: 'prices', localField: 'prices', foreignField: '_id', as: 'prices' } }, { '$unwind': '$prices' }, { '$match': { 'prices.store': { '$in': [ 5c7c79bcc78675135c09f54b, 5c7c79bcc78675135c09f54c ] } } }, { '$lookup': { from: 'stores', localField: 'prices.store', foreignField: '_id', as: 'prices.store' } }, { '$unwind': '$prices.store' }, { '$group': { _id: '$_id', name: { '$first': '$name' }, prices: { '$push': '$prices' } } } ], {})
[
{
"_id": "5c7c79bcc78675135c09f555",
"name": "Cheese",
"prices": [
{
"_id": "5c7c79bcc78675135c09f550",
"price": 3,
"store": {
"_id": "5c7c79bcc78675135c09f54b",
"name": "StoreA",
"__v": 0
},
"__v": 0
}
]
},
{
"_id": "5c7c79bcc78675135c09f556",
"name": "Bread",
"prices": [
{
"_id": "5c7c79bcc78675135c09f552",
"price": 5,
"store": {
"_id": "5c7c79bcc78675135c09f54c",
"name": "StoreB",
"__v": 0
},
"__v": 0
}
]
},
{
"_id": "5c7c79bcc78675135c09f554",
"name": "Milk",
"prices": [
{
"_id": "5c7c79bcc78675135c09f54e",
"price": 1,
"store": {
"_id": "5c7c79bcc78675135c09f54b",
"name": "StoreA",
"__v": 0
},
"__v": 0
},
{
"_id": "5c7c79bcc78675135c09f54f",
"price": 2,
"store": {
"_id": "5c7c79bcc78675135c09f54c",
"name": "StoreB",
"__v": 0
},
"__v": 0
}
]
}
]
Having trouble Populating my user.
The case:
var User = new mongoose.Schema({
name: {
type: String,
lowercase: true,
unique: true
},
portfolio:[
{
name: String,
formatType: { type: mongoose.Schema.Types.ObjectId, ref: 'FormatType' },
}
]
});
And this is my Mongoose command:
User.findById(req.payload.id)
.populate({
path:'portfolio',
populate:{
path: 'formatType',
model: 'FormatType'
}
})
.then(user => { ...
So what we have here is a model - inside of an Obect - inside of an array - inside of an entity.
Couldn't find an Answer online, would be very thankful~!
What you basically missed here is the "path" to the field you want to populate() is actually 'portfolio.formatType' and not just 'portfolio' as you have typed. Due to that mistake and the structure, you might have a few general misconceptions though.
Populate Correction
The basic correction merely needs the correct path, and you don't need the model argument since this is already implied in the schema:
User.findById(req.params.id).populate('portfolio.formatType');
It is however generally not a great idea to "mix" both "embedded" data and "referenced" data within arrays, and you should really be either embedding everything or simply referencing everything. It's also a little bit of an "anti-pattern" in general to keep an array of references in the document if your intention is referencing, since your reason should be not to cause the document to grow beyond the 16MB BSON limit. And where that limit would never be reached by your data it's generally better to "embed fully". That's really a wider discussion, but something you should be aware of.
The next general point here is populate() itself is somewhat "old hat", and really not the "magical" thing most new users perceive it to be. To be clear populate() is NOT A JOIN, and all it is doing is executing another query to the server in order to return the "related" items, then merge that content into the documents returned from the previous query.
$lookup Alternative
If you are looking for "joins", then really you probably wanted "embedding" as mentioned earlier. This is really the "MongoDB Way" of dealing with "relations" but keeping all "related" data together in the one document. The other means of a "join" where data is in separate collections is via the $lookup operator in modern releases.
This gets a bit more complex due to your "mixed" content array form, but can generally be represented as:
// Aggregation pipeline don't "autocast" from schema
const { Types: { ObjectId } } = require("mongoose");
User.aggregate([
{ "$match": { _id: ObjectId(req.params.id) } },
{ "$lookup": {
"from": FormatType.collection.name,
"localField": "portfolio.formatType",
"foreignField": "_id",
"as": "formats"
}},
{ "$project": {
"name": 1,
"portfolio": {
"$map": {
"input": "$portfolio",
"in": {
"name": "$$this.name",
"formatType": {
"$arrayElemAt": [
"$formats",
{ "$indexOfArray": [ "$formats._id", "$$this.formatType" ] }
]
}
}
}
}
}}
]);
Or with the more expressive form of $lookup since MongoDB 3.6:
User.aggregate([
{ "$match": { _id: ObjectId(req.params.id) } },
{ "$lookup": {
"from": FormatType.collection.name,
"let": { "portfolio": "$portfolio" },
"as": "portfolio",
"pipeline": [
{ "$match": {
"$expr": {
"$in": [ "$_id", "$$portfolio.formatType" ]
}
}},
{ "$project": {
"_id": {
"$arrayElemAt": [
"$$portfolio._id",
{ "$indexOfArray": [ "$$portfolio.formatType", "$_id" ] }
]
},
"name": {
"$arrayElemAt": [
"$$portfolio.name",
{ "$indexOfArray": [ "$$portfolio.formatType", "$_id" ] }
]
},
"formatType": "$$ROOT",
}}
]
}}
]);
The two approaches work slightly differently, but both essentially work with the concept of returning the matching "related" entries and then "re-mapping" onto the existing array content in order to merge with the "name" properties "embedded" inside the array. That is actually the main complication that otherwise is a fairly straightforward method of retrieval.
It's pretty much the same process as what populate() actually does on the "client" but executed on the "server". So the comparisons are using the $indexOfArray operator to find where the matching ObjectId values are and then return a property from the array at that matched "index" via the $arrayElemAt operation.
The only difference is that in the MongoDB 3.6 compatible version, we do that "substitution" within the "foreign" content "before" the joined results are returned to the parent. In prior releases we return the whole matching foreign array and then "marry up" the two to form a singular "merged" array using $map.
Whilst these may initially look "more complex", the big advantage here is that these constitute a "single request" to the server with a "single response" and not issuing and receiving "multiple" requests as populate() does. This actually saves a lot of overhead in network traffic and greatly increases response time.
In addition, these are "real joins" so there is a lot more you can do which cannot be achieved with "multiple queries". For instance you can "sort" results on the "join" and only return the top results, where as using populate() needs to pull in "all parents" before it can even look for which "children" to return in result. The same goes for "filtering" conditions on the child "join" as well.
There is some more detail on this on Querying after populate in Mongoose about the general limitations and what you actually can even practically do to "automate" the generation of such "complex" aggregation pipeline statements where needed.
Demonstration
Another common problem with doing these "joins" and understanding referenced schema in general is that people often get the concepts wrong on where and when to store the references and how it all works. Therefore the following listings serve as demonstration of both the storage and retrieval of such data.
In a native Promises implementation for older NodeJS releases:
const { Schema } = mongoose = require('mongoose');
const uri = 'mongodb://localhost/usertest';
mongoose.Promise = global.Promise;
mongoose.set('debug',true);
const formatTypeSchema = new Schema({
name: String
});
const portfolioSchema = new Schema({
name: String,
formatType: { type: Schema.Types.ObjectId, ref: 'FormatType' }
});
const userSchema = new Schema({
name: String,
portfolio: [portfolioSchema]
});
const FormatType = mongoose.model('FormatType', formatTypeSchema);
const User = mongoose.model('User', userSchema);
const log = data => console.log(JSON.stringify(data, undefined, 2));
(function() {
mongoose.connect(uri).then(conn => {
let db = conn.connections[0].db;
return db.command({ buildInfo: 1 }).then(({ version }) => {
version = parseFloat(version.match(new RegExp(/(?:(?!-).)*/))[0]);
return Promise.all(Object.entries(conn.models).map(([k,m]) => m.remove()))
.then(() => FormatType.insertMany(
[ 'A', 'B', 'C' ].map(name => ({ name }))
)
.then(([A, B, C]) => User.insertMany(
[
{
name: 'User 1',
portfolio: [
{ name: 'Port A', formatType: A },
{ name: 'Port B', formatType: B }
]
},
{
name: 'User 2',
portfolio: [
{ name: 'Port C', formatType: C }
]
}
]
))
.then(() => User.find())
.then(users => log({ users }))
.then(() => User.findOne({ name: 'User 1' })
.populate('portfolio.formatType')
)
.then(user1 => log({ user1 }))
.then(() => User.aggregate([
{ "$match": { "name": "User 2" } },
{ "$lookup": {
"from": FormatType.collection.name,
"localField": "portfolio.formatType",
"foreignField": "_id",
"as": "formats"
}},
{ "$project": {
"name": 1,
"portfolio": {
"$map": {
"input": "$portfolio",
"in": {
"name": "$$this.name",
"formatType": {
"$arrayElemAt": [
"$formats",
{ "$indexOfArray": [ "$formats._id", "$$this.formatType" ] }
]
}
}
}
}
}}
]))
.then(user2 => log({ user2 }))
.then(() =>
( version >= 3.6 ) ?
User.aggregate([
{ "$lookup": {
"from": FormatType.collection.name,
"let": { "portfolio": "$portfolio" },
"as": "portfolio",
"pipeline": [
{ "$match": {
"$expr": {
"$in": [ "$_id", "$$portfolio.formatType" ]
}
}},
{ "$project": {
"_id": {
"$arrayElemAt": [
"$$portfolio._id",
{ "$indexOfArray": [ "$$portfolio.formatType", "$_id" ] }
]
},
"name": {
"$arrayElemAt": [
"$$portfolio.name",
{ "$indexOfArray": [ "$$portfolio.formatType", "$_id" ] }
]
},
"formatType": "$$ROOT",
}}
]
}}
]).then(users => log({ users })) : ''
);
})
.catch(e => console.error(e))
.then(() => mongoose.disconnect());
})()
And with async/await syntax for newer NodeJS releases, including current LTS v.8.x series:
const { Schema } = mongoose = require('mongoose');
const uri = 'mongodb://localhost/usertest';
mongoose.Promise = global.Promise;
mongoose.set('debug',true);
const formatTypeSchema = new Schema({
name: String
});
const portfolioSchema = new Schema({
name: String,
formatType: { type: Schema.Types.ObjectId, ref: 'FormatType' }
});
const userSchema = new Schema({
name: String,
portfolio: [portfolioSchema]
});
const FormatType = mongoose.model('FormatType', formatTypeSchema);
const User = mongoose.model('User', userSchema);
const log = data => console.log(JSON.stringify(data, undefined, 2));
(async function() {
try {
const conn = await mongoose.connect(uri);
let db = conn.connections[0].db;
let { version } = await db.command({ buildInfo: 1 });
version = parseFloat(version.match(new RegExp(/(?:(?!-).)*/))[0]);
log(version);
// Clean data
await Promise.all(Object.entries(conn.models).map(([k,m]) => m.remove()));
// Insert some things
let [ A, B, C ] = await FormatType.insertMany(
[ 'A', 'B', 'C' ].map(name => ({ name }))
);
await User.insertMany(
[
{
name: 'User 1',
portfolio: [
{ name: 'Port A', formatType: A },
{ name: 'Port B', formatType: B }
]
},
{
name: 'User 2',
portfolio: [
{ name: 'Port C', formatType: C }
]
}
]
);
// Show plain users
let users = await User.find();
log({ users });
// Get user with populate
let user1 = await User.findOne({ name: 'User 1' })
.populate('portfolio.formatType');
log({ user1 });
// Get user with $lookup
let user2 = await User.aggregate([
{ "$match": { "name": "User 2" } },
{ "$lookup": {
"from": FormatType.collection.name,
"localField": "portfolio.formatType",
"foreignField": "_id",
"as": "formats"
}},
{ "$project": {
"name": 1,
"portfolio": {
"$map": {
"input": "$portfolio",
"in": {
"name": "$$this.name",
"formatType": {
"$arrayElemAt": [
"$formats",
{ "$indexOfArray": [ "$formats._id", "$$this.formatType" ] }
]
}
}
}
}
}}
]);
log({ user2 });
// Expressive $lookup
if ( version >= 3.6 ) {
let users = await User.aggregate([
{ "$lookup": {
"from": FormatType.collection.name,
"let": { "portfolio": "$portfolio" },
"as": "portfolio",
"pipeline": [
{ "$match": {
"$expr": {
"$in": [ "$_id", "$$portfolio.formatType" ]
}
}},
{ "$project": {
"_id": {
"$arrayElemAt": [
"$$portfolio._id",
{ "$indexOfArray": [ "$$portfolio.formatType", "$_id" ] }
]
},
"name": {
"$arrayElemAt": [
"$$portfolio.name",
{ "$indexOfArray": [ "$$portfolio.formatType", "$_id" ] }
]
},
"formatType": "$$ROOT",
}}
]
}}
]);
log({ users })
}
mongoose.disconnect();
} catch(e) {
console.log(e)
} finally {
process.exit()
}
})()
The latter listing if commented on each stage to explain the parts, and you can at least see by comparison how both forms of syntax relate to each other.
Note that the "expressive" $lookup example only runs where the MongoDB server connected to actually supports the syntax.
And the "output" for those who cannot be bothered to run the code themselves:
Mongoose: formattypes.remove({}, {})
Mongoose: users.remove({}, {})
Mongoose: formattypes.insertMany([ { _id: 5b1601d8be9bf225554783f5, name: 'A', __v: 0 }, { _id: 5b1601d8be9bf225554783f6, name: 'B', __v: 0 }, { _id: 5b1601d8be9bf225554783f7, name: 'C', __v: 0 } ], {})
Mongoose: users.insertMany([ { _id: 5b1601d8be9bf225554783f8, name: 'User 1', portfolio: [ { _id: 5b1601d8be9bf225554783fa, name: 'Port A', formatType: 5b1601d8be9bf225554783f5 }, { _id: 5b1601d8be9bf225554783f9, name: 'Port B', formatType: 5b1601d8be9bf225554783f6 } ], __v: 0 }, { _id: 5b1601d8be9bf225554783fb, name: 'User 2', portfolio: [ { _id: 5b1601d8be9bf225554783fc, name: 'Port C', formatType: 5b1601d8be9bf225554783f7 } ], __v: 0 } ], {})
Mongoose: users.find({}, { fields: {} })
{
"users": [
{
"_id": "5b1601d8be9bf225554783f8",
"name": "User 1",
"portfolio": [
{
"_id": "5b1601d8be9bf225554783fa",
"name": "Port A",
"formatType": "5b1601d8be9bf225554783f5"
},
{
"_id": "5b1601d8be9bf225554783f9",
"name": "Port B",
"formatType": "5b1601d8be9bf225554783f6"
}
],
"__v": 0
},
{
"_id": "5b1601d8be9bf225554783fb",
"name": "User 2",
"portfolio": [
{
"_id": "5b1601d8be9bf225554783fc",
"name": "Port C",
"formatType": "5b1601d8be9bf225554783f7"
}
],
"__v": 0
}
]
}
Mongoose: users.findOne({ name: 'User 1' }, { fields: {} })
Mongoose: formattypes.find({ _id: { '$in': [ ObjectId("5b1601d8be9bf225554783f5"), ObjectId("5b1601d8be9bf225554783f6") ] } }, { fields: {} })
{
"user1": {
"_id": "5b1601d8be9bf225554783f8",
"name": "User 1",
"portfolio": [
{
"_id": "5b1601d8be9bf225554783fa",
"name": "Port A",
"formatType": {
"_id": "5b1601d8be9bf225554783f5",
"name": "A",
"__v": 0
}
},
{
"_id": "5b1601d8be9bf225554783f9",
"name": "Port B",
"formatType": {
"_id": "5b1601d8be9bf225554783f6",
"name": "B",
"__v": 0
}
}
],
"__v": 0
}
}
Mongoose: users.aggregate([ { '$match': { name: 'User 2' } }, { '$lookup': { from: 'formattypes', localField: 'portfolio.formatType', foreignField: '_id', as: 'formats' } }, { '$project': { name: 1, portfolio: { '$map': { input: '$portfolio', in: { name: '$$this.name', formatType: { '$arrayElemAt': [ '$formats', { '$indexOfArray': [ '$formats._id', '$$this.formatType' ] } ] } } } } } } ], {})
{
"user2": [
{
"_id": "5b1601d8be9bf225554783fb",
"name": "User 2",
"portfolio": [
{
"name": "Port C",
"formatType": {
"_id": "5b1601d8be9bf225554783f7",
"name": "C",
"__v": 0
}
}
]
}
]
}
Mongoose: users.aggregate([ { '$lookup': { from: 'formattypes', let: { portfolio: '$portfolio' }, as: 'portfolio', pipeline: [ { '$match': { '$expr': { '$in': [ '$_id', '$$portfolio.formatType' ] } } }, { '$project': { _id: { '$arrayElemAt': [ '$$portfolio._id', { '$indexOfArray': [ '$$portfolio.formatType', '$_id' ] } ] }, name: { '$arrayElemAt': [ '$$portfolio.name', { '$indexOfArray': [ '$$portfolio.formatType', '$_id' ] } ] }, formatType: '$$ROOT' } } ] } } ], {})
{
"users": [
{
"_id": "5b1601d8be9bf225554783f8",
"name": "User 1",
"portfolio": [
{
"_id": "5b1601d8be9bf225554783fa",
"name": "Port A",
"formatType": {
"_id": "5b1601d8be9bf225554783f5",
"name": "A",
"__v": 0
}
},
{
"_id": "5b1601d8be9bf225554783f9",
"name": "Port B",
"formatType": {
"_id": "5b1601d8be9bf225554783f6",
"name": "B",
"__v": 0
}
}
],
"__v": 0
},
{
"_id": "5b1601d8be9bf225554783fb",
"name": "User 2",
"portfolio": [
{
"_id": "5b1601d8be9bf225554783fc",
"name": "Port C",
"formatType": {
"_id": "5b1601d8be9bf225554783f7",
"name": "C",
"__v": 0
}
}
],
"__v": 0
}
]
}
var UserSchema = Schema (
{
android_id: String,
created: {type: Date, default:Date.now},
interests: [{ type: Schema.Types.ObjectId, ref: 'Interests' }],
});
Users.aggregate([
{ $match: {android_id: {$ne: userID}, interests: {$elemMatch: {$in: ids}} }},
{ $group: { _id: { android_id: '$android_id'},count: {$sum: 1}}},
{ $sort: {count: -1}},
{ $limit: 5 }],
I need the to find the top 5 android_ids of the users with the most interests in common with me (ids array). I can work with the array of only matched elements from the interests array too.
You seemed to be going along the right lines here but you do need to consider that arrays have special considerations for comparisons.
Your basic start here is to find all users that are not the current user, and that you also need at least the "interests" array of the current user as well. You seem to be doing that already, but for here let us consider that you have the whole user object for the current user which will be used in the listing.
This makes your "top 5" basically a product of "Not me, and the most interests in common", which means you basically need to count the "overlap" of interests on each user compared to the current user.
This is basically the $setIntersection of the two arrays or "sets" where the elements in common are returned. In order to count how many are in common, there is also the $size operator. So you apply like this:
Users.aggregate(
[
{ "$match": {
"android_id": { "$ne": user.android_id },
"interests": { "$in": user.interests }
}},
{ "$project": {
"android_id": 1,
"interests": 1,
"common": {
"$size": {
"$setIntersection": [ "$interests", user.interests ]
}
}
}},
{ "$sort": { "common": -1 } },
{ "$limit": 5 }
],
function(err,result) {
}
);
The result returned in "common" is the count of common interests between the current user and the user being examined in the data. This data is then processed by $sort in order to put the largest number of common interests on top, and then $limit returns only the top 5.
If for some reason your MongoDB version is presently lower than MongoDB 2.6 where both the $setIntersection and $size operators are introduced, then you can still do this, but it just takes a longer form of processing the arrays.
Mainly you need to $unwind the arrays and process each match individually:
{ "$match": {
"android_id": { "$ne": user.android_id },
"interests": { "$in": user.interests }
}},
{ "$unwind": "$interests" },
{ "$group": {
"_id": "$_id",
"android_id": { "$first": "$android_id" },
"interests": { "$push": "$interests" },
"common": {
"$sum": {
"$add": [
{ "$cond": [{ "$eq": [ "$interests", user.interests[0] ] },1,0 ] },
{ "$cond": [{ "$eq": [ "$interests", user.interests[1] ] },1,0 ] },
{ "$cond": [{ "$eq": [ "$interests", user.interests[2] ] },1,0 ] }
]
}
}
}},
{ "$sort": { "common": -1 }},
{ "$limit": 5 }
Which is more practically coded to generate the condtional matches in the pipeline:
var pipeline = [
{ "$match": {
"android_id": { "$ne": user.android_id },
"interests": { "$in": user.interests }
}},
{ "$unwind": "$interests" }
];
var group =
{ "$group": {
"_id": "$_id",
"android_id": { "$first": "$android_id" },
"interests": { "$push": "$interests" },
"common": {
"$sum": {
"$add": []
}
}
}};
user.interests.forEach(function(interest) {
group.$group.common.$sum.$add.push(
{ "$cond": [{ "$eq": [ "$interests", interest ] }, 1, 0 ] }
);
});
pipeline.push(group);
pipeline = pipeline.concat([
{ "$sort": { "common": -1 }},
{ "$limit": 5 }
])
User.aggregate(pipeline,function(err,result) {
});
The key elements there being that "both" the current user and the user being inspected have their "interests" separated out for comparison to see if they are "equal". The result from $cond attributes a 1 where this is true or 0 where false.
Any returns ( and only ever expected to be 1 at best, per pair ) are passed to the $sum accumulator which counts the matches in common. You can alternately $match with an $in condition again:
{ "$unwind": "$interests" },
{ "$match": { "interests": { "$in": user.interests } },
{ "$group": {
"_id": "$_id",
"android_id": { "$first": "$android_id" },
"common": { "$sum": 1 }
}}
But this is naturally destructive of the array content as non matches are filtered out. So it depends on what you would rather have in the response.
That is the basic process for getting the "common" counts for use in further processing like $sort and $limit in order to get your "top 5".
Just for fun, here is a basic node.js listing to show the effects of common matches:
var async = require('async'),
mongoose = require('mongoose'),
Schema = mongoose.Schema;
mongoose.connect('mongodb://localhost/sample');
var interestSchema = new Schema({
name: String
});
var userSchema = new Schema({
name: String,
interests: [{ type: Schema.Types.ObjectId, ref: 'Interest' }]
});
var Interest = mongoose.model( 'Interest', interestSchema );
var User = mongoose.model( 'User', userSchema );
var interestHash = {};
async.series(
[
function(callback) {
async.each([Interest,User],function(model,callback) {
model.remove({},callback);
},callback);
},
function(callback) {
async.each(
[
"Tennis",
"Football",
"Gaming",
"Cooking",
"Yoga"
],
function(interest,callback) {
Interest.create({ name: interest},function(err,obj) {
if (err) callback(err);
interestHash[obj.name] = obj._id;
callback();
});
},
callback
);
},
function(callback) {
async.each(
[
{ name: "Bob", interests: ["Tennis","Football","Gaming"] },
{ name: "Tom", interests: ["Football","Cooking","Yoga"] },
{ name: "Sue", interests: ["Tennis","Gaming","Yoga","Cooking"] }
],
function(data,callback) {
data.interests = data.interests.map(function(interest) {
return interestHash[interest];
});
User.create(data,function(err,user) {
//console.log(user);
callback(err);
})
},
callback
);
},
function(callback) {
async.waterfall(
[
function(callback) {
User.findOne({ name: "Bob" },callback);
},
function(user,callback) {
console.log(user);
User.aggregate(
[
{ "$match": {
"_id": { "$ne": user._id },
"interests": { "$in": user.interests }
}},
{ "$project": {
"name": 1,
"interests": 1,
"common": {
"$size": {
"$setIntersection": [ "$interests", user.interests ]
}
}
}},
{ "$sort": { "common": -1 } }
],
function(err,result) {
if (err) callback(err);
Interest.populate(result,'interests',function(err,result) {
console.log(result);
callback(err);
});
}
);
}
],
callback
);
}
],
function(err) {
if (err) throw err;
//console.dir(interestHash);
mongoose.disconnect();
}
);
Which will output:
{ _id: 55dbd7be0e5516ac16ea62d1,
name: 'Bob',
__v: 0,
interests:
[ 55dbd7be0e5516ac16ea62cc,
55dbd7be0e5516ac16ea62cd,
55dbd7be0e5516ac16ea62ce ] }
[ { _id: 55dbd7be0e5516ac16ea62d3,
name: 'Sue',
interests:
[ { _id: 55dbd7be0e5516ac16ea62cc, name: 'Tennis', __v: 0 },
{ _id: 55dbd7be0e5516ac16ea62ce, name: 'Gaming', __v: 0 },
{ _id: 55dbd7be0e5516ac16ea62d0, name: 'Yoga', __v: 0 },
{ _id: 55dbd7be0e5516ac16ea62cf, name: 'Cooking', __v: 0 } ],
common: 2 },
{ _id: 55dbd7be0e5516ac16ea62d2,
name: 'Tom',
interests:
[ { _id: 55dbd7be0e5516ac16ea62cd, name: 'Football', __v: 0 },
{ _id: 55dbd7be0e5516ac16ea62cf, name: 'Cooking', __v: 0 },
{ _id: 55dbd7be0e5516ac16ea62d0, name: 'Yoga', __v: 0 } ],
common: 1 } ]