how to select random documents with some conditions fulfilled in MongoDB - node.js

Basically I have documents in which I have on field called "Difficulty Level" and value of this filed is between 1 to 10 for each documents.
So, I have to select random 10 or 20 documents so that in randomly selected documents , atleast 1 document should be there for each difficulty level i.e. from 1 to 10. means there should atlease one document with "Difficulty level" : 1 ,"Difficulty level" : 2 ,"Difficulty level" : 3 ............."Difficulty level" : 10.
So, How can I select documents randomly with this condition fulfilled ?
Thanks
I tried $rand operator for selecting random documents but can't getting solution for that condition.

If I've understood correctly you can try something like this:
The goal here is to create a query like this example
This query gets two random elements using $sample, one for level1 and another for level2. And using $facet you can get multiple results.
db.collection.aggregate([
{
"$facet": {
"difficulty_level_1": [
{
"$match": { "difficulty_level": 1 } },
{ "$sample": { "size": 1 } }
],
"difficulty_level_2": [
{ "$match": { "difficulty_level": 2 } },
{ "$sample": { "size": 1 } }
]
}
}
])
So the point is to do this query in a dynamic way. So you can use JS to create the object query an pass it to the mongo call.
const random = Math.floor((Math.random()*10)+1) // Or wathever to get the random number
let query = {"$facet":{}}
for(let i = 1 ; i <= random; i++){
const difficulty_level = `difficulty_level_${i}`
query["$facet"][difficulty_level] = [
{ $match: { difficulty_level: i }},
{ $sample: { size: 1 }}
]
}
console.log(query) // This output can be used in mongoplayground and it works!
// To use the query you can use somethiing like this (or other way you call the DB)
this.db.aggregate([query])

Related

Update collection to change the rank

i have a mongodb collection that I sort by the amount of points each item has, and it shows a rank according to it's place in the collection :
db.collection('websites').find({}).sort({ "points": -1 }).forEach(doc => {
rank++;
doc.rank = rank;
delete doc._id;
console.log(doc)
Si I thought to myself : Ok, I'm gonna update the rank in the collection, so I added this :
db.collection('websites').updateMany({},
{ $set: { rank: doc.rank } }
)
But I was too good to be true, and it updates every single item with the same rank, which changes at each refresh, what exactly is going on, here ?
EDIT : I managed to do it by doing this :
rank = 0;
db.collection('websites').find({}).sort({ "points": -1 }).forEach(doc => {
rank++;
doc.rank = rank;
//delete doc._id;
console.log(doc._id);
db.collection('websites').updateMany({_id : doc._id},
{ $set: { rank: doc.rank } },
{ upsert: true }
)
})
Try this:
db.collection('websites')
.updateOne( //update only one
{rank: doc.rank}, //update the one where rank is the sent in parameter doc.rank
{ $set: { rank: doc.rank } } // if multiple docs have the same rank you should send in more parameters
)
db.collection('websites').updateMany({/*All docs match*/},
{ $set: { rank: doc.rank } }
)
Reason it updates same rank because you have no filter which means it matches all docs in the collection and you have updateMany
You need to set a filter to restrict docs to be updated.
db.collection('websites').updateMany({id: "someID"},
{ $set: { rank: doc.rank } }
)
The OP states we want to sort all the docs by points, then "rerank" them from 1 to n in that order and update the DB. Here is an example of where "aggregate is the new update" thanks to the power of $merge onto the same collection as the input:
db.foo.aggregate([
// Get everything in descending order...
{$sort: {'points':-1}}
// ... and turn it into a big array:
,{$group: {_id:null, X:{$push: '$$ROOT'}}}
// Walk the array and incrementally set rank. The input arg
// is $X and we set $X so we are overwriting the old X:
,{$addFields: {X: {$function: {
body: function(items) {
for(var i = 0; i < items.length; i++) {
items[i]['rank'] = (i+1);
}
return items;
},
args: [ '$X' ],
lang: "js"
}}
}}
// Get us back to regular docs, not an array:
,{$unwind: '$X'}
,{$replaceRoot: {newRoot: '$X'}}
// ... and update everything:
,{$merge: {
into: "foo",
on: [ "_id" ],
whenMatched: "merge",
whenNotMatched: "fail"
}}
]);
If using $function spooks you, you can use a somewhat more obtuse approach with $reduce as a stateful for loop substitute. To better understand what is happening, block comment with /* */ the stages below $group and one by one uncomment each successive stage to see how that operator is affecting the pipeline.
db.foo.aggregate([
// Get everything in descending order...
{$sort: {'points':-1}}
// ... and turn it into a big array:
,{$group: {_id:null, X:{$push: '$$ROOT'}}}
// Use $reduce as a for loop with state.
,{$addFields: {X: {$reduce: {
input: '$X',
// The value (stateful) part of the loop will contain a
// counter n and the array newX which we will rebuild with
// the incremental rank:
initialValue: {
n:0,
newX:[]
},
in: {$let: {
vars: {qq:{$add:['$$value.n',1]}}, // n = n + 1
in: {
n: '$$qq',
newX: {$concatArrays: [
'$$value.newX',
// A little weird but this means "take the
// current item in the array ($$this) and
// set $$this.rank = $qq by merging it into the
// item. This results in a new object but
// $concatArrays needs an array so wrap it
// with [ ]":
[ {$mergeObjects: ['$$this',{rank:'$$qq'}]} ]
]}
}
}}
}}
}}
,{$unwind: '$X.newX'}
,{$replaceRoot: {newRoot: '$X.newX'}}
,{$merge: {
into: "foo",
on: [ "_id" ],
whenMatched: "merge",
whenNotMatched: "fail"
}}
]);
The problem here is that mongo is using the same doc.rank value to update all the records that match the filter criteria (all records in your case). Now you have two options to resolve the issue -
Works but is less efficient) - Idea here is that you need to calculate the rank for each website that you want to update. loop throuh all the document and run below query which will update every document with it's calculated rank. You could probably think that this is inefficient and you would be right. We are making large number of network calls to update the records. Worse part is that the slowness is unbounded and will get slower as number of records increases.
db.collection('websites')
.updateOne(
{ id: 'docIdThatNeedsToBeUpdated'},
{ $set: { rank: 'calculatedRankOfTheWebsite' } }
)
Efficient option - Use the same technique to calculate the rank for each website and loop through it to generate the update statement as above. But this time you would not make the update calls separately for all the websites. Rather you would use Bulk update technique. You add all your update statement to a batch and execute them all at one go.
//loop and use below two line to add the statements to a batch.
var bulk = db.websites.initializeUnorderedBulkOp();
bulk.find({ id: 'docIdThatNeedsToBeUpdated' })
.updateOne({
$set: {
rank: 'calculatedRankOfTheWebsite'
}
});
//execute all of the statement at one go outside of the loop
bulk.execute();
I managed to do it by doing:
rank = 0;
db.collection('websites').find({}).sort({ "points": -1 }).forEach(doc => {
rank++;
doc.rank = rank;
//delete doc._id;
console.log(doc._id);
db.collection('websites').updateMany({_id : doc._id},
{ $set: { rank: doc.rank } },
{ upsert: true }
)
})
Thank you everyone !

Updating mongoose nested array of mixed types

I have a mongoose schema of mixed types like so:
const user = mongoose.Schema({
...
"links" : []
...
After populating this schema, I ended up with data like so:
[
[
{
"step1": "post-url-google", // This field is unique for each entry
"step2": {
"title": "Heading 1",
"likes": 4
}
},
],
[
{
"step1": "post-url-microsoft",
"step2": {
"title": "Heading 1",
"likes": 1
}
},
{
"step1": "post-url-apple",
"step2": {
"title": "Heading 2",
"likes": 6 // I want to update this to 7
}
}
]
]
What I want to achieve is to update the "step1": "post-url-apple" field from 6 to have a likes of 7
So I tried using the User.save() function like so:
let user = await User.findOne({"_id" : "some_id"})
user.links[1].some(object => {
if (object.step1 === "post-url-apple") {
object.step2.likes = 7
(async function (){
user.save() // I also did error handling
})()
return
}
})
This method works fine and the user gets updated but it keeps throwing ParallelSaveError possibly because I am calling the save() function in parallel on the same user instance in some other parts of my code.
So I decided to use the User.findOneAndUpdate() method, but my queries keep failing when using the mongodb dot notation $[<identifier>], obviously because I don't know how to use it properly.
Like so:
let update = {
"$set" : {
"links.$[index1].$[index2].step2.likes" : 7,
}
}
let conditions = {
arrayFilters : [
{"index1" : 1},
{"index2.step1" : "post-url-apple"}
]
}
try {
let result = await Users.findOneAndUpdate({"_id" : "some_id"}, update, conditions)
console.log(result)
} catch (err) {
console.log(err)
}
For all good reasons, I'm not hiting the catch block but the update was equally not successful
How do I achieve updating the "step1": "post-url-apple" likes field to 7 using findOneAndUpdate?
Thank you.
In arrayFilters you should define the conditions to be applied to all the array elements, not the index
If you are sure, you always update the second array element (index = 1) of the outer array, then you can use the dot notation for the outer array, and for the inner array you can use the array filters to get the element that has step1 = 'post-url-apple'
your code may look something like that
let update = {
"$set" : {
'links.1.$[item].step2.likes': 7 // here we used links.1 to access the second element of the outer array
}
}
let conditions = {
arrayFilters : [
{ 'item.step1' : 'post-url-apple' } // item here is the element in the inner array that has step1 = post-url-apple
]
}
then do your update query
hope it helps

mongodb - most efficient way of calculating missing indices in sequence

Given a collection with lets say 1.000.000 entries and each of them have their own unique property called number which is indexed. How can I efficiently find the lowest gap in the number sequence.
An easy example would be a sequence of indexes like: 1,2,3,4,6,7,10, where I would like to get back the number 5 since this will be the lowest missing number in the sequence.
Is there a possible way (maybe aggregation) without the need to query all numbers.
One way of doing this would be with a cursor. With a cursor, you can manually iterate through the documents until you find one that matches your criteria.
var cursor = db.coll.find({}).sort({number: 1});
var prev = null
while (cusor.hasNext()) {
var curr = cursor.getNext()
if (prev && prev.number + 1 !== curr.number) break;
prev = curr;
}
One is get all the numbers and find the ones missing between them.
An aggregate example that you can use to not have to get them all. https://www.mongodb.com/community/forums/t/query-to-find-missing-sequence/123771/2
// Assuming the sample data with sequence numbers from 1 thru 10 as follows:
{ id: 1 },
{ id: 2 },
{ id: 4 },
{ id: 7 },
{ id: 9 },
{ id: 10 }
// And, note the missing numbers are 3, 5, 6 and 8. You can use the following aggregation to find them:
db.collection.aggregate([
{
$group: {
_id: null,
nos: { $push: "$id" }
}
},
{
$addFields: {
missing: { $setDifference: [ { $range: [ 1, 11 ] }, "$nos" ] }
}
}
])

Mongoose collection statistics / manipulations queries

first, a comment. The collection described is simplified, for this question. I'm interesting in understanding how to manipulate a mongo db and get statistics of my data.
Let's say I have a collection with test results. The schema is:
Results {
_id: ObjectId
TestNumber: int
result: String // this contains "pass" or "fail"
// additional data
}
For each test can be many reports, so most likely each TestNumber appears in more than one document.
How can I perform a query which returns this info on the entire collection:
TestNumber | count of result == "pass" | count of result == "fail"
You can use the below aggregation operations pipelined together:
Group all the documents based on their testNumber and the type of
result together, so for every testNumber, we would have two
groups each, one for fail and another for pass, with the count of
documents in each group.
Project a variable "pass" for the group containing the result as
pass, and fail for the other group.
Group together the documents again based on the testNumber, and
push the pass and fail documents into an array.
Project the fields as required.
The Code:
Results.aggregate([
{$group:{"_id":{"testNumber":"$testNumber","result":"$result"},
"count":{$sum:1}}},
{$project:{"_id":0,
"testNumber":"$_id.testNumber",
"result":{$cond:[{$eq:["$_id.result","pass"]},
{"pass":"$count"},
{"fail":"$count"}]}}},
{$group:{"_id":"$testNumber",
"result":{$push:"$result"}}},
{$project:{"testNumber":"$_id","result":1,"_id":0}}
],function(a,b){
// post process
})
Sample Data:
db.collection.insert([
{
"_id":1,
"testNumber":1,
"result":"pass"
},
{
"_id":2,
"testNumber":1,
"result":"pass"
},
{
"_id":3,
"testNumber":1,
"result":"fail"
},
{
"_id":4,
"testNumber":2,
"result":"pass"
}])
Sample o/p:
{ "result" : [ { "pass" : 1 } ], "testNumber" : 2 }
{ "result" : [ { "fail" : 1 }, { "pass" : 2 } ], "testNumber" : 1 }
iterating doc.result will give you the pass count and the number of failed tests for the testNumber.

Node, MongoDB (mongoose) distinct count

I have a collection with multiple documents and every one of them has and 'eID' field that is not unique. I want to get the count for all the distinct 'eID'.
Example: if there are 5 documents with the 'eID' = ObjectID(123) and 2 documents with 'eID' = ObjectID(321) I want to output something like:
{
ObjectID(123): 5,
ObjectID(321): 2
}
I don't know if that can be done in the same query but after knowing what are the most ocurring eID's I want to fetch the referenced documents using the ObjectID
Mongoose version 3.8.8
$status is the specific field of collection that i need to count distinct number of element.
var agg = [
{$group: {
_id: "$status",
total: {$sum: 1}
}}
];
model.Site.aggregate(agg, function(err, logs){
if (err) { return res.json(err); }
return res.json(logs);
});
//output
[
{
"_id": "plan",
"total": 3
},
{
"_id": "complete",
"total": 4
},
{
"_id": "hault",
"total": 2
},
{
"_id": "incomplete",
"total": 4
}
]
This answer is not in terms of how this query can be written via mongoose, but I am familiar with the nodejs MongoClient class if you have further questions regarding implementation.
The best (most optimal) way I can think of doing this is to use mapReduce or aggregation on your database. The closest thing to a single command would be the distinct command, which can be invoked on collections, but this will only give you an array of distinct values for the eID key.
See here: http://docs.mongodb.org/manual/core/map-reduce/
For your specific problem, you will want your map and reduce functions roughly as follows:
var map = function() {
var value = 1;
emit(this.eID, value);
};
var reduce = function(key, values) {
var result = 0;
for(var i=-1;++i<values.length;){
var value = values[i];
result += value;
};
return result;
};
There might be an easier way to do this using the aggregation pipeline (I would post the link but I don't have enough reputation).
I also found the mapReduce command for mongoose: http://mongoosejs.com/docs/api.html#model_Model.mapReduce

Resources