Find duplicate inside array without $unwind - node.js

I have below users collection
[{
"_id": 1,
"adds": ["111", "222", "333", "111"]
}, {
"_id": 2,
"adds": ["555", "666", "777", "555"]
}, {
"_id": 3,
"adds": ["888", "999", "000", "888"]
}]
I need to find the duplicates inside the adds array
The expected output should be
[{
"_id": 1,
"adds": ["111"]
}, {
"_id": 2,
"adds": [ "555"]
}, {
"_id": 3,
"adds": ["888"]
}]
I have tried using many operators $setUnion, $setDifference but none of the did the trick.
Please help!!!

You can use $range to generate arrays of numbers from 1 to n where n is the $size of adds. Then you can "loop" through that numbers and check if adds at index ($arrayElemAt) exists somewhere before index if yes then it should be considered as a duplicate. You can use $indexOfArray to check if element exists in array specifying 0 and index as search range.
Then you just need to use $project and $map to replace indexes with actual elements. You can also add $setUnion to avoid duplicated duplicates in final result set.
db.users.aggregate([
{
$addFields: {
duplicates: {
$filter: {
input: { $range: [ 1, { $size: "$adds" } ] },
as: "index",
cond: {
$ne: [ { $indexOfArray: [ "$adds", { $arrayElemAt: [ "$adds", "$$index" ] }, 0, "$$index" ] }, -1 ]
}
}
}
}
},
{
$project: {
_id: 1,
adds: {
$setUnion: [ { $map: { input: "$duplicates", as: "d", in: { $arrayElemAt: [ "$adds", "$$d" ] } } }, [] ]
}
}
}
])
Prints:
{ "_id" : 1, "adds" : [ "111" ] }
{ "_id" : 2, "adds" : [ "555" ] }
{ "_id" : 3, "adds" : [ "888" ] }

Here is another version that you might want to compare in terms of performance:
db.users.aggregate({
$project:{
"adds":{
$reduce:{
"input":{$range:[0,{$size:"$adds"}]}, // loop variable from 0 to max. index of $adds array
//"input":{$range:[0,{$subtract:[{$size:"$adds"},1]}]}, // this would be enough but looks more complicated
"initialValue":[],
"in":{
$let:{
"vars":{
"curr": { $arrayElemAt: [ "$adds", "$$this"] } // the element we're looking at
},
"in":{
// if there is another identical element after the current one then we have a duplicate
$cond:[
{$ne:[{$indexOfArray:["$adds","$$curr",{$add:["$$this",1]}]},-1]},
{$setUnion:["$$value",["$$curr"]]}, // combine duplicates found so far with new duplicate
"$$value" // continue with current value
]
}
}
}
}
}
}
})
The logic is based on a loop variable which we get through the $range operator. This loop variable allows for sequential access of the adds array. For every item that we look at, we check if there is another identical one after the current index. If yes, we have a duplicate, otherwise not.

You can try below aggregation. The idea is to collect the distinct values and iterate over values and check if the value is present in adds array; if present keep the value else ignore the value.
db.users.aggregate({
"$project":{
"adds":{
"$reduce":{
"input":{"$setUnion":["$adds",[]]},
"initialValue":[],
"in":{
"$concatArrays":[
"$$value",
{"$let":{
"vars":{
"match":{
"$filter":{"input":"$adds","as":"a","cond":{"$eq":["$$a","$$this"]}}
}},
"in":{
"$cond":[{"$gt":[{"$size":"$$match"},1]},["$$this"],[]]
}
}}
]
}
}
}
}
})

Related

Mongoose - How to query field in the last object of an array of objects

I have MongoDB documents structured like this:
{
"_id": "5d8b987f9f8b9f9c8c8b9f9",
"targetsList": [
{
"target": "user",
"statusList": [
{
"date": "2018-01-01",
"type": "OK"
},
{
"date": "2018-01-02",
"type": "FAILD"
}
]
}
]
}
And I want to count all documents that in their "targetList" array, there is an object with "target"=="user" - and also that object conatin on the last element of its "statusList" array, an object with "type" != "FAILD".
Any ideas on how to implement this kind of query?
Mongo playground:
https://mongoplayground.net/p/3bCoHRnh-KQ
In this example, I expected the count to be 1, because only the second object meets the conditions.
An aggregation pipeline
1st step - Filtering out where "targetsList.target": "user"
2nd step - $unwind on targetsList to get it out of array
3rd step - getting the last element of the targetsList.statusList array using $arrayElemAt
4th step - getting the results where that last element is not FAILD
5th step - getting the count
demo - you can try removing parts of the pipeline to see what the intermediate results are
db.collection.aggregate([
{
$match: {
"targetsList.target": "user"
}
},
{
$unwind: "$targetsList"
},
{
$project: {
"targetsList.statusList": {
$arrayElemAt: [
"$targetsList.statusList",
-1
]
},
}
},
{
$match: {
"targetsList.statusList.type": {
$ne: "FAILD"
}
}
},
{
$count: "withoutFailedInLastElemCount"
}
])
Unless it's crucial that the element be the last index, this should work for your case.
db.collection.find({
"targetsList.statusList.type": {
$in: [
"FAILD"
]
}
})
This will retrieve documents where the type value is FAILD. To invert this you can swap $in for $nin.
Updated playground here
Here's another way to do it with a leading monster "$match".
db.collection.aggregate([
{
"$match": {
"targetsList.target": "user",
"$expr": {
"$reduce": {
"input": "$targetsList",
"initialValue": false,
"in": {
"$or": [
"$$value",
{
"$ne": [
{
"$last": "$$this.statusList.type"
},
"FAILD"
]
}
]
}
}
}
}
},
{
"$count": "noFailedLastCount"
}
])
Try it on mongoplayground.net.

Removing Dynamic Fields by Association in MongoDB Aggregation

I'm trying to display a MongoDB aggregation result via react chartjs. in aggregation, I can remove one field whose value is static via the set operator. is there a way to remove a second field by an association whose value is dynamic? in the example below, {"A": "N"} denotes the field that is readily removed by the set operator, whereas {"A_count":1} denotes the corresponding dynamic field that I am trying to remove.
starting aggregation output
[{
"_id":"Fubar",
"A_set":[{"A":"Y"},{"A":"N"}],
"A_count_set":[{"A_count":0},{"A_count":1}]
}]
set operation for static field removal
{$set: {
A_set: {
$filter: {
input: "$A_set",
as: "x",
cond: { "$ne": [ "$$x", {"A":"N"}] }
}
}
}}
current aggregation output
[{
"_id":"Fubar",
"A_set":[{"A":"Y"}],
"A_count_set":[{"A_count":0},{"A_count":1}]
}]
target aggregation output
[{
"_id":"Fubar",
"A_set":[{"A":"Y"}],
"A_count_set":[{"A_count":0}]
}]
$project merge two array with the same position
$set filter array
$addFields recover the original array
$project remove the merge array
aggregate
db.collection.aggregate([
{
$project: {
anotherValue: {
$map: {
input: {
$range: [
0,
{
$size: "$A_set"
}
]
},
as: "idx",
in: {
$mergeObjects: [
{
$arrayElemAt: [
"$A_set",
"$$idx"
]
},
{
$arrayElemAt: [
"$A_count_set",
"$$idx"
]
}
]
}
}
}
}
},
{
$set: {
anotherValue: {
$filter: {
input: "$anotherValue",
as: "x",
cond: {
"$ne": [
"$$x.A",
"N"
]
}
}
}
}
},
{
$addFields: {
"A_set": {
$map: {
input: "$anotherValue",
as: "a",
in: {
"A": "$$a.A"
}
}
},
"A_count_set": {
$map: {
input: "$anotherValue",
as: "a",
in: {
"A_count": "$$a.A_count"
}
}
}
}
},
{
"$project": {
"anotherValue": 0
}
}
])
mongoplayground

Find duplicate values in Array and sum them MongoDB Aggregation without using $unwind and $group

This is my MongoDB document:
{
"name": ClassA1
"data": [
{
"first": John,
"second": David"
"age": 21,
"score": 1
},
{
"first": John,
"second": David"
"age": 21,
"score": 1
},
{
"first": John,
"second": David"
"age": 22,
"score": 1
}
]
}
What i am trying to achieve here is i want to find if there is repeating "age" (value 21) in data array i want to sum the score field only and copy the other object but without using the $unwind and $group in MongoDB aggregation.
other fields like first, second can be copied as it is and it will be always be same in my case the only different fields will be age and score. i want to compare repeating age and sum the scores in this case my output should be:
{
"name": ClassA1
"data": [
{
"first": John,
"second": David"
"age": 21,
"score": 2
},
{
"first": John,
"second": David"
"age": 22,
"score": 1
}
]
}
As you can see the first element's score is 2 now which is added from the previous one.
I hope you understand.
Playground: https://mongoplayground.net/p/nyXUMEivMIt
So this is what i have tried:
db.collection.aggregate([
{
$addFields: {
values: {
$reduce: {
input: "$array",
initialValue: [],
in: {
$concatArrays: [
"$$value",
{
$cond: [
{
$in: [
"$$this.age",
"$$value.age"
]
},
[
{
"$sum": {
"$add": [
"$$this.score",
"$$value.score"
]
}
}
],
[
"$$this"
]
]
}
]
}
}
}
}
}
])
this is the error i am getting:
query failed: (Location16554) PlanExecutor error during aggregation :: caused by :: $add only supports numeric or date types, not array
Here is my solution for it. I just used JavaScript to solve the problem. Here we put the first item with new age in the map and then later if we find another person with that same age we add the score to the previous person object and make the current one undefined. After iteration over this array we filter all the undefined item and return it.
Edit:
If you want to run this on mongo playground you need to convert the body of the function to a single line string. Try it here: Mongo playground example
PipeLine = [
{
$match: {
class: "ClassA1",
},
},
{
$project: {
_id: 0,
class: 1,
data: {
$function: {
body: function (data) {
const map = {};
data = data.map((item) => {
if (map[item.age]) map[item.age].score += item.score;
else {
map[item.age] = item;
return item;
}
});
return data.filter((item) => item !== undefined);
},
args: ["$data"],
lang: "js",
},
},
},
},
];

Mongo DB How to find common in two arrays and sort in descending order in a single query

Here is my data saves in database
0 _id:5e4d18bd10e5482eb623c6e4
name:'John singh',
cars_owned:[
{car_id:'1'},
{car_id:'5'},
{car_id:'7'},
{car_id:'8'}
],
1 _id:5e4d18bd10e5482eb6g57f5rt
name:'Josh kumar',
cars_owned:[
{car_id:'7'},
{car_id:'9'},
{car_id:'1'},
{car_id:'3'}
],
2 _id:5e4d18bd10e5482eb6r67222
name:'Jesse nigam',
cars_owned:[
{car_id:'6'},
{car_id:'7'},
{car_id:'9'},
{car_id:'3'}
],
3 _id:5e4d18bd10e5482eb6467ii46
name:'Jordan khan',
cars_owned:[
{car_id:'3'},
{car_id:'1'},
{car_id:'4'},
{car_id:'5'}
]
Now I want to search a user with its starting name that is 'J' and also cars_owned by me the input will be
'J',cars_owned['3','7','9','12','10']
and the output will be
1 _id:5e4d18bd10e5482eb6g57f5rt
name:'Josh kumar',
cars_owned:[
{car_id:'7'},
{car_id:'9'},
{car_id:'1'},
{car_id:'3'}
],
2 _id:5e4d18bd10e5482eb6r67222
name:'Jesse nigam',
cars_owned:[
{car_id:'6'},
{car_id:'7'},
{car_id:'9'},
{car_id:'3'}
],
0 _id:5e4d18bd10e5482eb623c6e4
name:'John singh',
cars_owned:[
{car_id:'1'},
{car_id:'5'},
{car_id:'7'},
{car_id:'8'}
],
3 _id:5e4d18bd10e5482eb6467ii46
name:'Jordan khan',
cars_owned:[
{car_id:'3'},
{car_id:'1'},
{car_id:'4'},
{car_id:'5'}
]
and now you will notice the result are all the users whose name starts with 'J' and cars_owned is in 3,7,9,12,10 but in descending order that is the user with most cars_owned matched in on top and according others. I want the result to be sorted according to maximum cars_owned matched in a single mongo db query. So far I have made one simple find query.
User_data.find({name: { $regex: "^" + search_name },
cars_owned:{$elemMatch:{car_id:'3',car_id:'7',car_id:'9',car_id:'12',car_id:'10'}
}},function(err,resp){
console.log(JSON.stringify(resp,null,4));
});
but it only returns one document I want all documents which have any of the give car_id but sort by maximum matched. If your don't understand anything in this question feel free to ask in comment but please give answer in a single mongo db query I am also ok with aggeregation framework.Thanks in advance.
You have to use aggregagtion for this.
First you define how many user's cars are common with your list, then filter results matching name and at least one car common, and finally sort your result by common cars.
Here's the query :
db.collection.aggregate([
{
$addFields: {
commonCars: {
$size: {
$setIntersection: [
[
"3",
"7",
"9",
"12",
"10"
],
"$cars_owned.car_id"
]
},
}
}
},
{
$match: {
$expr: {
$and: [
{
$eq: [
{
$regexMatch: {
input: "$name",
regex: "^J"
}
},
true
]
},
{
$gt: [
"$commonCars",
0
]
}
]
}
}
},
{
$sort: {
"commonCars": -1
}
}
])
And you can test it here
EDIT
if you don't need to sort your result, you can achieve this in one single match stage :
db.collection.aggregate([
{
$match: {
$expr: {
$and: [
{
$eq: [
{
$regexMatch: {
input: "$name",
regex: "^J"
}
},
true
]
},
{
$gt: [
{
$size: {
$setIntersection: [
[
"3",
"7",
"9",
"12",
"10"
],
"$cars_owned.car_id"
]
},
},
0
]
}
]
}
}
},
])
Test it here

Aggregate to get average from document and of array elements

I've got this array of these objects:
item1 = {
name:'item',
val:1,
list:[
{type:'a',value:1},
{type:'b',value:1},
{type:'c',value:1}
]
};
item2 = {
name:'item',
val:5,
list:[
{type:'a',value:3},
{type:'b',value:99},
{type:'c',value:1}
]
};
They all have the same array, same types 'a', 'b' & 'c', but different values.
How can I get the average value of type 'a', 'b' & 'c'?
How can I get the average value of all items?
I'm expecting
itemAvg = {
name:'item',
val:3,
list:[
{type:'a',value:2},
{type:'b',value:50},
{type:'c',value:1}
]
};
I thought grouping first the val by name and pushing list.
Then unwinding list.
Then grouping the list by types.
But this doesn't work
model.aggregate([
{ $match : <condition> },
{ $group : {
_id:{name:'$name'},
ValAvg:{$avg:'$val'}
List:{$push:'list'}
}},
{ $unwind:'$List'},
{ $group:{
_id:{type:'$List.type',
ValueAvg:{$avg:'$List.value'}
}}
])
I was hoping the last group after the unwind would group by tune type and calculate the average of value for each distinct type... but no... I get ValueAvg=0
Thanks
You need two $unwind stages since you pushed arrays inside an array and then follow up with two $group stages:
model.aggregate([
{ "$match": { <condition> }},
{ "$group": {
"_id": "name",
"val": { "$avg": "$val" },
"list": { "$push": "$list" }
}},
{ "$unwind": "$list" },
{ "$unwind": "$list" },
{ "$group": {
"_id": { "name": "$_id", "type": "$list.type" },
"val": { "$avg": "$val" },
"valAvg": { "$avg": "$list.value" }
}},
{ "$sort": { "_id": 1 } },
{ "$group": {
"_id": "$_id.name",
"val": { "$avg": "$val" },
"list": { "$push": {
"type": "$_id.type",
"value": "$valAvg"
}}
}}
])
So by grouping at the "type" level first the obtained results can get the averages across the elements, then the original form is reconstructed. Note the $sort to retain the order of elements, otherwise they will be reversed:
{
"_id" : "name",
"val" : 3,
"list" : [
{
"type" : "a",
"value" : 2
},
{
"type" : "b",
"value" : 50
},
{
"type" : "c",
"value" : 1
}
]
}
If you are tempted to $unwind first to avoid putting arrays inside arrays then don't do that. The averages you seek outside the array will be affected by the number of elements in the array when unwound. So arrays with more elements in one document to another would "weight" their value more highly in determining the average there.

Resources