Multiple group using Mongoose - node.js

I'm wanting to get the sum of all events for each company. Here is my data:
{company: "1" event:"a"}
{company: "1" event:"b"}
{company: "1" event:"c"}
{company: "2" event:"b"}
{company: "2" event:"b"}
{company: "3" event:"c"}
{company: "3" event:"c"}
I currently have this to aggregate the date for events but I'm struggling to further group them by company:
{
"aggregate": [
{
"$group": {
"_id": "$event",
"count": {"$sum": 1}
}
},
{"$sort": {"_id": 1}}
]
}
Which renders these results:
[
{
"_id": "a",
"count": 1
},
{
"_id": "b",
"count": 3
},
{
"_id": "c",
"count": 2
}]
How can I further group the event counts by company to produce something along the lines of:
[{
"_id": {
"company" :"1",
"events": [
{
"event": "a",
"count": 1
},
{
"event": "b",
"count": 1
},
{
"event": "c",
"count": 1
},
]
}
},
{
"_id": {
"company" :"2",
"events": [
{
"event": "b",
"count": 2
}
]
}
},
{
"_id": {
"company" :"3",
"events": [
{
"event": "c",
"count": 2
}
]
}
}
]
I've tried all sorts of further groups in the query but can't produce the data I'm after. There is also one caveat: I'm unable to process anything server side or change the schema, so therefore I am only enable to pass in a query which I hope can be done.

db.collection.aggregate([
{
$group:{
_id:{company:"$company",event:"$event"},
count:{$sum:1}
}
},
{
$group:{
_id:"$_id.company",
events:{$push:{event:"$_id.event",count:"$count"}}
}
}
])
And if u need exact same output you mentioned then add $project stage like
$project:{_id:{company:"$_id",events:"$events"}}

Related

remove duplicate documents in mongodb aggregate

I have offers collection that look like below, each document has offer id and place.
[
{
"_id": "p1-o1",
"place": "1"
},
{
"_id": "p1-o2",
"place": "1"
},
{
"_id": "p1-on",
"place": "1"
},
{
"_id": "p2-p1",
"place": "2"
},
{
"_id": "p2-o2",
"place": "2"
},
{
"_id": "p2-on",
"place": "2"
},
....
{
"_id": "pn-0n",
"place": "n"
}
]
I need to aggregate and result a stream of paginated documents like below, where each page has a single offer from a single place,
for first page:
[
{
"_id": "p1-o1",
"place": "1"
},
{
"_id": "p2-o1",
"place": "2"
},
.....
{
"_id": "pn-o1",
"place": "n"
}
]
for second page
[
{
"_id": "p1-o2",
"place": "1"
},
{
"_id": "p2-o2",
"place": "2"
},
.....
{
"_id": "pn-on",
"place": "n"
}
]
where "p1-o1" is the first offer in the first place and so on ...

Aggregate duplicate documents with values in array in Mongo

I have a large collection of documents that look as follows:
{ "_id": "5a760191813a54000b8475f1", "orders": [{ "row": "3", "seat": "11" }, { "row": "3", "seat": "12" }], "product_id": "5a7628bedbcc42000aa7f614" },
{ "_id": "5a75f6f17abe45000a3ba05e", "orders": [{ "row": "3", "seat": "12" }, { "row": "3", "seat": "13" }], "product_id": "5a7628bedbcc42000aa7f614" },
{ "_id": "5a75ebdf813a54000b8475e7", "orders": [{ "row": "5", "seat": "16" }, { "row": "5", "seat": "15" }], "product_id": "5a75f711dbcc42000c459efc" }
I need to be able to find any documents where the product_id and items in the orders array are duplicates. I can't quite seem to wrap my head around accomplishing this. Any pointers?
I don't know what output you want, but this has the information about the duplicates, maybe you want to add unwind on duplicates also.
Result documents
product_id
order (that found duplicated)
duplicates (the documents that had that order as duplicate)
For your data would print
[{
"duplicates": [
"5a760191813a54000b8475f1",
"5a75f6f17abe45000a3ba05e"
],
"order": {
"row": "3",
"seat": "12"
},
"product_id": "5a7628bedbcc42000aa7f614"
}]
Query
(run it on your driver, MongoPlayground doesn't keep the order of fields and can show wrong results)
aggregate(
[{"$unwind" : {"path" : "$orders"}},
{
"$group" : {
"_id" : {
"orders" : "$orders",
"product_id" : "$product_id"
},
"duplicates" : {
"$push" : "$_id"
}
}
},
{"$match" : {"$expr" : {"$gt" : [ {"$size" : "$duplicates"}, 1 ]}}},
{
"$project" : {
"_id" : 0,
"order" : "$_id.orders",
"product_id" : "$_id.product_id",
"duplicates" : 1
}
}
])
Data (i added some more data)
[
{
"_id": "5a760191813a54000b8475f1",
"orders": [
{
"row": "3",
"seat": "11"
},
{
"row": "3",
"seat": "12"
}
],
"product_id": "5a7628bedbcc42000aa7f614"
},
{
"_id": "5a75f6f17abe45000a3ba05g",
"orders": [
{
"row": "3",
"seat": "12"
},
{
"row": "3",
"seat": "13"
}
],
"product_id": "5a7628bedbcc42000aa7f614"
},
{
"_id": "5a75f6f17abe45000a3ba05e",
"orders": [
{
"row": "3",
"seat": "12"
},
{
"row": "3",
"seat": "13"
}
],
"product_id": "5a7628bedbcc42000aa7f614"
},
{
"_id": "5a75ebdf813a54000b8475e7",
"orders": [
{
"row": "5",
"seat": "16"
},
{
"row": "5",
"seat": "15"
}
],
"product_id": "5a75f711dbcc42000c459efc"
}
]
Results
[{
"duplicates": [
"5a75f6f17abe45000a3ba05g",
"5a75f6f17abe45000a3ba05e"
],
"order": {
"row": "3",
"seat": "13"
},
"product_id": "5a7628bedbcc42000aa7f614"
},
{
"duplicates": [
"5a760191813a54000b8475f1",
"5a75f6f17abe45000a3ba05g",
"5a75f6f17abe45000a3ba05e"
],
"order": {
"row": "3",
"seat": "12"
},
"product_id": "5a7628bedbcc42000aa7f614"
}]
You could use below query. $unwind the orders array, $group by order row and product and collect matching ids and count. Keep the documents where count is greater than 1. $lookup to pull in the matching documents by id and $replaceRoot to flatten the documents.
db.collection.aggregate([
{
"$unwind": "$orders"
},
{
"$group": {
"_id": {
"order": "$orders",
"product_id": "$product_id"
},
"count": {
"$sum": 1
},
"doc_ids": {
"$push": "$_id"
}
}
},
{
"$match": {
"count": {
"$gt": 1
}
}
},
{
"$lookup": {
"from": "collection",
"localField": "doc_ids",
"foreignField": "_id",
"as": "documents"
}
},
{
"$unwind": "$documents"
},
{
"$replaceRoot": {
"newRoot": "$documents"
}
}
])
https://mongoplayground.net/p/YbztEGttUMx
While this can be done purely in Mongo I do not recommend it as it's very very very memory inefficient. you basically have to hold the entire collection in memory the entire time while you do certain manipulations on it.
I will however show the pipeline for this because we will use it with the second more scaleable approach.
We want to $group based on orders and product_id, however there are 2 issues standing in our way.
The orders field might not be sorted the same in all documents, because Mongo does not support "nested" sorting we have to $unwind the array, $sort it and restore the original structure. ( mind you you're sorting the entire collection here in memory ). This step which is one of the pain points of this pipeline can be skipped if you can ensure sort order is maintained in the orders array.
Mongo is inconsistent when $grouping an array of objects. full disclosure I'm not entirely sure what's going on in there but I'm guessing there are some "shortcuts" done for efficiency which affects the stability somehow. So our approach will be to convert these objects into a string (concating the "row" and "seat" together).
db.collection.aggregate([
{
"$unwind": "$orders"
},
{
$sort: {
"orders.row": 1,
"orders.seat": 1
}
},
{
$group: {
_id: "$_id",
tmpOrders: {
$push: {
$concat: [
"$orders.row",
"$orders.seat"
]
}
},
product_id: {
$first: "$product_id"
}
}
},
{
$group: {
_id: {
orders: "$tmpOrders",
product: "$product_id"
},
dupIds: {
$push: "$_id"
}
}
},
{
$match: {
"dupIds.0": {
$exists: true
}
}
},
{
$project: {
_id: 0,
dups: "$dupIds",
}
}
])
Mongo Playground
Now as I said this approach is not scaleable, and on large collections will take a very long time to run. So I recommend utilizing indexes and iterating over product_id's and executing each pipeline separately.
// wraps the native Promise, not required.
import Bluebird = require('bluebird');
// very fast with index.
const productIds = await collection.distinct('product_id')
await Bluebird.map(productIds, async (productId) => {
const dups = await collection.aggregate([
{
$match: {
product_id: productId
}
}
... same pipeline ...
])
if (dups.length) {
// logic required.
}
// can control concurrency based on db workload.
}, { concurrency: 5})
Make sure with this approach you have an index built on product_id so it will work efficiently.

How to get a sorted list of most used values for a field in a mongodb collection

I have some documents resembling the following structure:
{
"name": "item_1",
"category": ["a", "b"]
},
{
"name": "item_2",
"category": ["c"]
},
{
"name": "item_3",
"category": ["a", "c"]
},
{
"name": "item_4",
"category": ["a"]
},
{
"name": "item_5",
"category": ["a"]
}
I'm trying to get a sorted list of the most used values for the category field in all documents within the collection.
So in this example, the return value I'm expecting should be something like this:
[
{
"category": "a",
"count": 4
},
{
"category": "c",
"count": 2
},
{
"category": "b",
"count": 1
}
]
Is there a way to make such a query in mongoose?
Demo - https://mongoplayground.net/p/sBpwwvowXLH
Use aggregation query to $unwind your category into separate documents $group them back by category and get the count
$sum
db.collection.aggregate({
"$unwind": "$category"
},
{
"$group": {
"_id": "$category",
count: { $sum: 1 }
}
})

Multiple groups and project using mongoose

I'm trying display aggregated data by a time range in Mongoose. My Data is like:
{company: "1" createdTime:"2017-01-01"}
{company: "1" createdTime:"2017-01-01"}
{company: "1" createdTime:"2017-01-02"}
{company: "2" createdTime:"2017-01-02"}
{company: "2" createdTime:"2017-01-03"}
{company: "3" createdTime:"2017-01-03"}
{company: "3" createdTime:"2017-01-03"}
I have created a query like:
{
"aggregate": [{
"$project": {
"frequency": {
"$dateToString": {
"format": "%Y-%m-%d",
"date": "$createdTime"
}
}
}
}, {
"$group": {
"_id": {
"date": "$frequency"
},
"count": {
"$sum": 1
}
}
}
]
}
and returns:
[
{
"_id": {
"date": "2017-01-01"
},
"count": 2
},
{
"_id": {
"date": "2017-01-02"
},
"count": 2
},
{
"_id": {
"date": "2017-01-03"
},
"count": 3
}
]
Which gives me the count for the overall occurrences for that particular date, which is fine.
I'm wanting to further break that down to give me the sum of occurrences per company for that date which should look something like this:
[
{
"_id": {
"date": "2017-01-01",
"companies": [{
"company": 1,
"count": 2
}]
},
"count": 2
},
{
"_id": {
"date": "2017-01-02",
"companies": [{
"company": 1,
"count": 1
},{
"company": 2,
"count": 1
}]
},
"count": 2
},
{
"_id": {
"date": "2017-01-03",
,
"companies": [{
"company": 2,
"count": 1
},{
"company": 3,
"count": 2
}]]
},
"count": 3
}
]
I've tried all sort of combinations but always get errors when i put in a second group and i can't work out how to do it?
EDIT
I wasn't able to use examples from the comments or they didn't work for my use case however, combining this information i came up with this:
{
"aggregate": [{
"$group": {
"_id": {
"company": "$company",
"date": {
"$dayOfMonth": "$createdTime"
},
"month": {
"$month": "$createdTime"
},
"year": {
"$year": "$createdTime"
}
},
"count": {
"$sum": 1
}
}
}, {
"$group": {
"_id": "$_id.company",
"events": {
"$push": {
"date": "$_id.date",
"month": "$_id.month",
"year": "$_id.year",
"count": "$count"
}
},
"totalEvents": {
"$sum": "$count"
}
}
}
]
}
which renders some results like:
[
{
"_id": "1",
"dateCount": [
{
"date": 3,
"month": 12,
"year": 2017,
"count": 40
},
{
"date": 10,
"month": 12,
"year": 2017,
"count": 8
}
]
}
]
Ideally the date would be yyyy-mm-dd but using the full date in the query would include the time also resulting in an entry for each time not date. I guess this will do for now, i can easily create a date from this albeit it's a little more cumbersome but it'll do until anything better comes along.
Edit 2
[{
"_id": ObjectId("5a38a03b33ccd28fb87f6dc9"),
"createdBy": ObjectId("5a2e203fa567425b2cb411fc"),
"appId": ObjectId("5a2e203fa567425b2cb411fb"),
"eventName": "event",
"userName": "person",
"userEmail": "company__29",
"organisation": "company",
"module": "module",
"createdTime": ISODate("2017-12-02T17:07:25.000Z")
"__v": 0
}
]
Query:
{
"aggregate": [{
"$project": {
"frequency": {
"$dateToString": {
"format": "%U",
"date": "$createdTime"
}
}
}
}, {
"$group": {
"_id": {
"frequency": "$frequency",
"organisation": "$organisation"
},
"count": {
"$sum": 1
}
}
}, {
"$group": {
"_id": {
"date": "$_id.frequency"
},
"count": {
"$sum": 1
},
"companies": {
"$push": {
"organisation": "$organisation",
"count": "$count"
}
}
}
}
]
}

Aggregation with Mongodb using mongoose

Hey im trying some aggregation on mongodb using moongose:
I got this data:
[
{
"school": "1",
"preferences": [
{
"person": "X",
"color": "A"
},
{
"person": "X",
"color": "B"
},
{
"person": "Y",
"color": "A"
}
]
},
{
"school": "2",
"preferences": [
{
"person": "Z",
"color": "A"
},
{
"person": "Y",
"color": "C"
}
]
}
]
I think the data explaisn it self, What i want to get as result is,
when i query for the school that matches '1'. i would like to get this result:
[
{
"_id": "X",
"colors": [
"A",
"B"
]
},
{
"_id": "Y",
"colors": ["A"]
}
]
I used aggregation before, but i cant figure to get this result.
Check this aggregation query :
db.collectionName.aggregate({
"$match": {
"school": "1"
}
}, {
"$unwind": "$preferences"
}, {
"$group": {
"_id": "$preferences.person",
"colors": {
"$addToSet": "$preferences.color" // $addToSet used here only to add distinct color
}
}
})
Edit If you want count the number of color then used group with sum as below :
db.collectionName.aggregate({
"$match": {
"school": "1"
}
}, {
"$unwind": "$preferences"
}, {
"$group": {
"_id": "$preferences.color",
"appears": {
"$sum": 1
}
}
})
EDIT
As per your new requirement you should do following aggregation:
db.collectionName.aggregate({
"$unwind": "$preferences"
},
{
"$group": {
"_id": {
"person": "$preferences.person",
"color": "$preferences.color"
},
"count": {
"$sum": 1
}
}
},
{
"$group": {
"_id": "$_id.person",
"colors": {
"$push": {
"color": "$_id.color",
"count": "$count"
}
}
}
},
{
"$project": {
"_id": 0,
"person": "$_id",
"colors": 1
}
}).pretty()

Resources