Find duplicate enteries from collection node js? - node.js

I want to find out duplicate entries on the basis of field " name " and in the result, I want to fetch " name " and " _id ".
I have tried using aggregate function and it is returning all the result from database as null:
here is my code:
ModelRestaurant.aggregate((
// { "$group": { "_id": "$name", "count": { "$sum": 1 } } },
// { "$match": { "_id": { "$ne": null }, "count": { "$gt": 1 } } },
// { "$project": { "name": "$_id", "_id": 0 } }
{ $group: { "_id": "$name", "name": { $first: "$name" }, "count": { $sum: 1 } } },
{ $match: { "count": { $gt: 1 } } },
{ $project: { "name": 1, "_id": 0 } },
{ $group: { "_id": null, "duplicateNames": { $push: "$name" } } },
{ $project: { "_id": 0, "duplicateNames": 1 } }
), function (err, result) {
if (result) {
console.log(result)
}
})
from above commented code it is giving me all id but now it is giving me null values and all the result json of 6000 with null value.
here is the field of data in collection:
My Schema is :
var mongoose = require('mongoose');
var uniqueValidator = require('mongoose-unique-validator');
var autoIncrement = require('mongoose-auto-increment');
var ModelRestaurant = new mongoose.Schema({
name: String,
ownerName: String, // to be removed by checking code
ownerID: Number,
ownerEmail: String, // to be removed by checking code
city: String,
zipCode: String,
image: [String],
restaurantType: String,
address: String,
landmark: String,
isAddedByCustomer: {
type: Boolean,
default: false
},
location: {
lat: String,
long: String
}
},
{
timestamps: true
});
ModelRestaurant.plugin(autoIncrement.plugin, 'Restaurants');
ModelRestaurant.plugin(uniqueValidator);
mongoose.model('Restaurants', ModelRestaurant);

The following code should print all the duplicates:
db.ModelRestaurant.aggregate([
{$group:{"_id":"$name","name":{$first:"$name"},"count":{$sum:1}}},
{$match:{"count":{$gt:1}}}
])

Related

Mongoose: Calculate the number of grouped documents

I have this mongoose query:
const filter_stage = {
$match: {
category: "LOGIN",
},
};
const active_users_group_stage = {
$group: {
_id: "$user",
number_of_logins: { $sum: 1 },
},
};
const pipeline = [filter_stage, active_users_group_stage];
const active_users_stats = await History.aggregate(pipeline);
response.active_users_stats = active_users_stats;
const number_of_active_users = active_users_stats.length;
This is the History model:
const HistorySchema = new Schema({
user: {
type: Schema.Types.ObjectId,
ref: "users",
},
category: {
type: String,
enum: ["LOGIN","LOGOUT"],
required: true,
},
date: {
type: Date,
default: Date.now,
},
});
And it returns something like this:
[
{
"_id": "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
"number_of_logins": 45
},
{
"_id": "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
"number_of_logins": 36
},
{
"_id": "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
"number_of_logins": 26
},
{
"_id": "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
"number_of_logins": 18
},
{
"_id": "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
"number_of_logins": 18
},
]
Instead of calculating the number of active users like this:
const number_of_active_users = active_users_stats.length;
I would like to calculate it inside the aggregate pipeline.
Is this possible?
You need $group all with _id: null to sum all active_users and add all documents into the users array as the last stage.
{
$group: {
_id: null,
number_of_active_users: {
$sum: 1
},
users: {
$push: "$$ROOT"
}
}
}
As the active_users_stats result will return an array with only one document,
...
const all_group_stage = {
$group: {
_id: null,
number_of_active_users: {
$sum: 1
},
users: {
$push: "$$ROOT"
}
},
};
const pipeline = [filter_stage, active_users_group_stage, all_group_stage];
const active_users_stats = await History.aggregate(pipeline);
response.active_users_stats = active_users_stats[0].users;
const number_of_active_users = active_users_stats[0].number_of_active_users;

Delete an object from an array in Express Mongoose

{
"_id": "608c3d353f94ae40aff1dec4",
"userId": "608425c08a3f8db8845bee84",
"experiences": [
{
"designation": "Manager",
"_id": "609197056bd0ea09eee9429c"
},
{
"designation": "Asst. Manager",
"_id": "608c530de8ade5221b0e6d4e"
},
{
"designation": "Sr. Manager",
"_id": "608c534be8ade5221b0e6d4f"
},
]
}
I want to delete object in array with id 608c530de8ade5221b0e6d4e, here is my code but that gives me error.
This is the controller:
const userId = req.userData.userId;
const id = req.params.id;
Experience.findOneAndUpdate({ userId: userId }, { $pull: { 'experiences': { '_id': id } }}, { multi:true }, function(err, obj) {
// //do something here
});
This is model:
const newExpSchema = new mongoose.Schema({
designation: { type: String, default: ""},
});
const experienceSchema = new mongoose.Schema({
userId: { type: String, required: true },
experiences: [newExpSchema],
});
export default model("experience", experienceSchema);
I am getting below error on { $pull: { 'experiences': { '_id': id } }}
Error:
No overload matches this call.
Overload 1 of 3 .........
.......
The expected type comes from property '$pull' which is declared here on type 'UpdateQuery<Document<any, {}>>'
Can you try this:
Folder.findOneAndUpdate({
"_id": "608c3d353f94ae40aff1dec4"
},
{
$pull: {
"experiences": {
"_id": "608c530de8ade5221b0e6d4e"
}
}
},
{
"multi": false
})
Here is a working example: https://mongoplayground.net/p/YtNGBTr52U9

Count by category and sum it up in MongoDB

I have a product collection in MongoDb which sole fields like _id, category, user_id.
I want to check and count the sum number of each category in collection given the matching the user_id and then sum up all the count again at the end.
my solution is :
return Product.aggregate([
{ $match: { "user_id": "id if user that added the product" } },
{ "$unwind": "$category" },
{
"$group": {
"_id": {
'category': '$category',
},
"count": { "$sum": 1 }
}
},
{ "$sort": { "_id.category": 1 } },
{
"$group": {
"_id": "$_id.category",
"count": { "$first": "$count" }
}
}
])
the code gives me the count of each category without matching the condition of user_id. But when I add the $match it fails.
Product Schema:
const ProductSchema = new Schema({
title: {
type: String,
required: true
},
description: {
type: String,
required: true
},
quantity: {
type: Number,
default: -1
},
category:
{
type: String,
required: true
},
manufactured_by: {
type: String,
required: true
},
user_id: {
type: Schema.Types.ObjectId,
ref: 'user',
required: true
}
})
my result if I dont add the condition:
[
{
"_id": "A Tables",
"count": 1
},
{
"_id": "C Tables",
"count": 4
},
{
"_id": "B Tables",
"count": 2
}
]
Not sure what you are trying to achieve from the last stage in your pipeline.
But the following should give you desired output (without any complications that you added)
async getSellerStatistics(seller_id) {
return await Product.aggregate([
{ $match: { user_id: seller_id } }
{ $unwind: "$category" },
{
$group: {
_id: "$category",
count: { $sum: 1 },
},
},
{ $sort: { _id: 1 } },
])
}

How to convert string to ObjectId type and use in $lookup for mongodb?

I want to get the business information with businessId as a reference. However, I can't get the correct data because the previous developer did not use ObjectId type on the model. Now what I want to do is convert the businessId type to objectId withough altering the model, it would be easy if I do it but the old data will be affected, which is not good. Please see below for the model
const scanHistory = new Schema({
businessId: { type: String },
domains: [
{
domainId: { type: String },
scanType: { type: String },
status: { type: String },
reportUrl: { type: String },
scanStart: { type: Date, default: Date.now },
scanFinish: { type: Date, default: Date.now }
}
],
scanStart: { type: Date, default: Date.now },
scanStatus: { type: String },
scanType: { type: String }
});
This is my aggregate query
.collection("scanhistories")
.aggregate([
{
$addFields: {
businessObjId: {
$convert: {
input: "businessId",
to: "objectId",
onError: "Could not convert to type ObjectId."
}
}
}
},
{
$group: {
_id: { $max: "$businessId" },
businessObjId: { $max: "$businessId" },
scanStatus: { $max: "$scanStatus" },
scanStart: { $max: "$scanStart" },
domains: { $max: "$domains" }
}
},
{
$lookup: {
from: "businesses",
as: "businessInfo",
localField: "businessObjId",
foreignField: "_id"
}
},
{
$project: {
_id: 1,
businessObjId: 1,
primaryDomain: { $arrayElemAt: ["$businessInfo.primaryDomain", 0] },
businessName: { $arrayElemAt: ["$businessInfo.businessName", 0] },
frequency: { $arrayElemAt: ["$businessInfo.scanFrequency", 0] },
scanStatus: 1,
domains: 1
}
},
{
$match: {
scanStatus: { $in: ["running", "undef"] },
domains: { $exists: true }
}
}
])
.toArray();
for (let x = 0; x < history.length; x++) {
console.log(history[x]);
}
Now the output is like this which is not the one I expected.
{ _id: 5de09321bdb7cc07b7595de4,
businessObjId: 5de09321bdb7cc07b7595de4,
scanStatus: 'undef',
domains:
[ { _id: 5dfa626300007c243c1528b3,
domainId: '5de09321bdb7cc07b7595de5',
scanType: 'scheduled',
status: 'running',
reportUrl: '',
scanStart: 2019-12-18T17:31:14.754Z,
scanFinish: 2019-12-18T17:31:14.754Z } ] }
The expected result should have been with the lookup businessInfo that I wanted
{ _id: 5de09321bdb7cc07b7595de4,
businessObjId: 5de09321bdb7cc07b7595de4,
scanStatus: 'undef',
domains:
[ { _id: 5dfa626300007c243c1528b3,
domainId: '5de09321bdb7cc07b7595de5',
scanType: 'scheduled',
status: 'running',
reportUrl: '',
scanStart: 2019-12-18T17:31:14.754Z,
scanFinish: 2019-12-18T17:31:14.754Z } ],
primaryDomain: "mydomainxxx.xy",
businessName: "The biz",
scanFrequency: "daily"
}
Can you help me? I am really new to MongoDB and my background is PHP/SQL so all advises will be much appreciated. Thank you!
So I have found the solution for this. It was just a single mistake. :(
So on the aggregate code above where the group pipeline is. I made a mistake here.
Previous code above
{
$group: {
_id: { $max: "$businessId" },
businessObjId: { $max: "$businessId" },
scanStatus: { $max: "$scanStatus" },
scanStart: { $max: "$scanStart" },
domains: { $max: "$domains" }
}
},
Correct one
I change this part here:
businessObjId: { $first: "$businessObjId" }
{
$group: {
_id: { $max: "$businessId" },
businessObjId: { $first: "$businessObjId" },
scanStatus: { $max: "$scanStatus" },
scanStart: { $max: "$scanStart" },
domains: { $max: "$domains" }
}
},

sort on object refering element field not working with $group and $project in Mongodb

I am getting this output:
Sorting on the inner array is not working. I have the two tables as shown below.
The pages schema is this:
const PageSchema = new Schema({
name: {
type: String,
required: true
},
created: {
type: Date
},
position: {
type: Number,
default: 0
}
});
module.exports = mongoose.model('pages', PageSchema);
The container schema is this:
const ContainerSchema = new Schema({
filename: {
type: String,
required: true
},pageId: {
type: Schema.Types.ObjectId,
ref: 'pages'
},
created: {
type: Date
}
});
For sorting the data I used this code:
Container.aggregate(match, {
"$group": {
"_id": {
"pageId": "$pageId",
"id": "$_id",
"filename": "$filename",
"position": "$position"
},
"containerCount": {
"$sum": 1
}
}
}, {
"$group": {
"_id": "$_id.pageId",
"container": {
"$push": {
"_id": "$_id.id",
"filename": "$_id.filename",
},
},
"position": {
"$first": "$_id.pageId.position"
}
"count": {
"$sum": "$containerCount"
}
}
}, {
"$project": {
"container": 1,
"count": 1
}
}, {
"$sort": {
"position": 1
}
}).exec()
I want the data sort according to the position field in the pages but it's not working.
You have forgotten to add position in $project.
Once you add in $project then its available in $sort
{
"$project": {
"position" :1,
"container": 1,
"count": 1
}
}

Resources