CouchDB Replication does not replicate old revision documents - couchdb

I'm working on "one database per user" system using the CouchDB replication with a selector to filter my data based on the user configuration.
It works pretty well, until the day when i noticed an issue with the replication, it is difficult for me to describe it so I will do it with an example:
I have my main database "mainDB" which i'm using as the "source" database for the replication, and i decide to create a sub database "subDB" for a user which will be the "target" for the replication.
I create my replication doc with my selector to filter the data from my "mainDB" and nothing happen, my "subDB" is empty, the replication state is marked as "Running" but 0 pending changes.
And as soon as i update a doc from the "mainDB" (doc that is supposed to be replicated to my "subDB"), the "_rev" of this doc will change, the replication really start and replicate my doc to the "subDB".
In brief, CouchDB filtred replication based on a selector will not replicate any doc until we update "_rev" of each doc that is supposed to be replicated.
App version
Apache CouchDB
v. 3.2.2
EDIT 1
The selector looks like this:
{
"selector": {
"$or": [
{
"date_debut": {
"$lte": "#end_date#"
},
"typedoc": "ActiviteDocument",
"date_fin": {
"$gte": "#start_date#"
},
"id": {
"$in": [
#array_of_integer_A#
]
}
},
{
"typedoc": "IndividuDocument",
"id": {
"$in": [
#array_of_integer_B#
]
}
},
(JSON too long to full parse here, but other part of the $or use same logical)
...
}
EDIT 2 : I changed the selector logical by using $or and $and
"selector": {
"$or": [
{
"$and": [
{
"typedoc": "ActiviteDocument"
},
{
"date_debut": {
"$lte": "#end_date#"
}
},
{
"date_fin": {
"$gte": "#statt_date#"
}
},
{
"id": {
"$in": [#array_of_integer_A#]
}
}
]
},
{
"$and": [
{
"typedoc": "IndividuDocument"
},
{
"id": {
"$in": [#array_of_integer_B#]
}
}
]
},
EDIT 3 : i changed my replication doc by removing selector and using "doc_ids", the replication will not replicate my docs except if i update one of them so the "_rev" change and the replication detect that and start working
{
"_id": "replicationmaster-1123",
"source": "mysource",
"target": "mytarget",
"doc_ids": [
"ActiviteDocument_335765",
"ActiviteDocument_351882",
"ActiviteDocument_421350",
"ActiviteDocument_423684",
"ActiviteDocument_428304",
"ActiviteDocument_440523",
"ActiviteDocument_442048",
"ActiviteDocument_443727"
],
"continuous": true,
"create_target": false,
"owner": "admin"
}
EDIT 4 : demo https://youtu.be/OqJA0fDQqy8

The problem is that in your selector JSON the $or parameter needs to be an array of objects, each one being an individual condition. The way you have it, this parameter is an array with a single object that has all the conditions in it.
Here is a complete replicator document based on your conditions, with the correct syntax:
{
"_id": "abc12357",
"source": "https://username:password#mycouchdb.com/db1",
"target": "https://username:password#mycouchdb.com/db2",
"selector": {
"$or": [
{
"start": {
"$lte": "2022-10-27"
}
},
{
"typedoc": "ActiviteDocument"
},
{
"end": {
"$gte": "2022-09-29"
}
},
{
"id": {
"$in": [
65993,
63938,
87265,
312112,
64885,
64277
]
}
}
]
}
}

Related

Mongoose - How to query field in the last object of an array of objects

I have MongoDB documents structured like this:
{
"_id": "5d8b987f9f8b9f9c8c8b9f9",
"targetsList": [
{
"target": "user",
"statusList": [
{
"date": "2018-01-01",
"type": "OK"
},
{
"date": "2018-01-02",
"type": "FAILD"
}
]
}
]
}
And I want to count all documents that in their "targetList" array, there is an object with "target"=="user" - and also that object conatin on the last element of its "statusList" array, an object with "type" != "FAILD".
Any ideas on how to implement this kind of query?
Mongo playground:
https://mongoplayground.net/p/3bCoHRnh-KQ
In this example, I expected the count to be 1, because only the second object meets the conditions.
An aggregation pipeline
1st step - Filtering out where "targetsList.target": "user"
2nd step - $unwind on targetsList to get it out of array
3rd step - getting the last element of the targetsList.statusList array using $arrayElemAt
4th step - getting the results where that last element is not FAILD
5th step - getting the count
demo - you can try removing parts of the pipeline to see what the intermediate results are
db.collection.aggregate([
{
$match: {
"targetsList.target": "user"
}
},
{
$unwind: "$targetsList"
},
{
$project: {
"targetsList.statusList": {
$arrayElemAt: [
"$targetsList.statusList",
-1
]
},
}
},
{
$match: {
"targetsList.statusList.type": {
$ne: "FAILD"
}
}
},
{
$count: "withoutFailedInLastElemCount"
}
])
Unless it's crucial that the element be the last index, this should work for your case.
db.collection.find({
"targetsList.statusList.type": {
$in: [
"FAILD"
]
}
})
This will retrieve documents where the type value is FAILD. To invert this you can swap $in for $nin.
Updated playground here
Here's another way to do it with a leading monster "$match".
db.collection.aggregate([
{
"$match": {
"targetsList.target": "user",
"$expr": {
"$reduce": {
"input": "$targetsList",
"initialValue": false,
"in": {
"$or": [
"$$value",
{
"$ne": [
{
"$last": "$$this.statusList.type"
},
"FAILD"
]
}
]
}
}
}
}
},
{
"$count": "noFailedLastCount"
}
])
Try it on mongoplayground.net.

Update document with aggregation pipeline with $lookup

I have a product collection which has fields -userId,referenceProductId,
I want to add new field buyerUserId to all doc where its value will be equal to userId for documents where its _id is equal to referenceProduct_id
For example-for following 2 doc
{
"_id": { "$oid": "61ded34c1e7007b17a86f889" },
"userId": { "$oid": "6190b06b113314ad2183db09" },
"referenceProductId": { "$oid": "61ded15fdd1363aa1ce09c55" }
}
{
"_id": { "$oid": "61ded15fdd1363aa1ce09c55" },
"userId": { "$oid": "6190b06b113314ad2183db09" },
"referenceProductId": { "$oid": "61ded34c1e7007b17a86f889" }
}
BuyerUserId for doc1 will be 6190b06b113314ad2183db09 since doc1's _id is equal to referenceProductId of doc2
I am new to mongoDB, trying to update with below code but doesn't work
{ "$match": { "status": "purchased" }},
{ $lookup:{
from:"product",
let:{
"id":"$Id",
"referenceProductId":"$ReferenceProductId",
"userId":"$UserId",
},
pipeline:[
{
$match:{
$expr:{
$eq: ["$$id", "$referenceProductId"] ,
}
}
},
],
as:"products"
}
},{
$project:{
"buyerUserId":"$products.userId"
}
}
])
As noted in a comment above, $merge can be used as "doc to doc" update mechanism. Starting in v4.4 you may output the results of the merge directly back onto the collection being aggregated.
db.foo.aggregate([
{$lookup: {from: "foo",
let: {rid: "$_id"},
pipeline: [
{$match:{$expr:{$eq: ["$$rid", "$referenceProductId"]}}}
],
as: "Z"}}
// If _id->refId match happened, set buyerUserId else do not set
// anything not even null ($$REMOVE means "do nothing"):
,{$project: {buyerUserId: {$cond: [ {$ne:[0,{$size: "$Z"}]}, "$_id", "$$REMOVE"] }} }
// At this point in pipeline we only have _id and maybe buyerUserId. Use _id
// from each doc to merge buyerUserId back into the main collection.
// Since the docs came from this collection, we should fail if something
// does not match.
,{$merge: {
into: "foo",
on: [ "_id" ],
whenMatched: "merge",
whenNotMatched: "fail"
}}
]);

Add filter based on condition in MongDB

I am new to MongoDB and I have a collection student. I need to add the student.name filter in the query only when is_rep=true.
My document structure.
{
{
"student": {
"name": "arun",
"dept": "bio",
"subject": "bot"
},
"is_rep": true
},
{
"student": {
"name": "div",
"dept": "csc",
"subject": "program"
},
"is_rep": false
}
}
Can anyone please guide me to achieve this in pymongo
You can use aggregations like
db.collection.aggregate([
{
$match: {
is_rep: true,
"student.name": "arun"
}
}
])
Working Mongo playground
Or you can use find query
db.collection.find({
is_rep: true,
"student.name": "arun"
})
Working Mongo playground

How to manage large/complex NoSQL queries in code, can we make it generic?

I've been using mongoose in node js for interacting with mongodb, now for every search/update/delete operation I'll have to write a JSON such as,
Model.find({
"$and": [
{ "name": name },
{
"start": {
"$or": [
{ "$exists": false },
{ "$gte": start }
]
}
},
{
"stop": {
"$or": [
{ "$exists": false },
{ "$gte": stop }
]
}
}
]
}).exec(callback);
Now everytime I've to get any such objects I'll have to make a method which makes such query, or even if I make a generic method it'll have limited parameters as input unless I pass a JSON which again makes almost same amount of code.
So what is best practice for managing such queries in code ?

Mongoose (MongoDB): Exclude Properties in a Dictionary Like Schema Type

I have a Schema of the following structure:
var schema = mongoose.Schema({
answers: {type: mongoose.Schema.Types.Mixed}
});
I use the answers field as an object (associative array to implement something like a dictionary). Here is an example:
{
"__v": 0,
"_id": {
"$oid": "53a0251c50d0536c1bfc6006"
},
"answers": {
"fea": {
"viewed": false
},
"3d2": {
"viewed": true,
"value": true
},
"4fr": {
"viewed": true,
"value": true
},
"84h": {
"viewed": false
},
...
}
}
In a query I want to select only the "value" field of each entry. How is that possible through the select syntax? This of course doesn't work:
XY.find(...)
.select({'answers': true, 'answers.*.value': false})
.exec(...);
Maybe I have to design the data in another fashion?
Best regards,
Kersten
You should never model with "explicit values" as the "key" names. This is very bad practice. Consider what you would do in a SQL database. Would you create "fields/columns" for the different "names" of the things you want?
No you would not. You have a generic field that specifies a "type" and then you have others that hold the data. Nothing changes here:
{
"_id": {
"$oid": "53a0251c50d0536c1bfc6006"
},
"answers": [
{ "type": "fea", "viewed": false },
{ "type": "3d2", "viewed": true, "value": true },
{ "type": "4fr", "viewed": true, "value": true },
{ "type": "84h", "viewed": false },
...
]
}
Now this is easy to use something like the aggregation framework to make the projection of the content like you want:
With Modern MongoDB 2.6 and onwards you can use $map and $setDifference to filter the array without using $unwind:
Model.aggregate(
[
{ "$project": {
"answers": {
"$setDifference": [
{
"$map": {
"input": "$answers",
"as": "el",
"in": {
"$cond": [
1,
{
"type": "$$el.type",
"value": { "$ifNull": [ "$$el.value", false ] }
},
false
]
}
}
},
[false]
]
}
}}
],
function(err,result) {
}
);
Or with older versions pre 2.6:
Model.aggregate(
[
{ "$unwind": "$answers" },
{ "$group": {
"_id": "$_id",
"answers": {
"$push": {
"type": "$answers.type",
"value": { "$ifNull": [ "$answers.value", false ] }
}
}
}}
],
function(err,result) {
}
);
Of course you can "filter" the array results to certain conditions by either adding a logical evaluation as the first argument to the $cond operator in the $map implementation. Or by using a $match pipeline stage in between the $unwind and $group stages.
Either form allows you to re-shape the result without problem and is the fastest way to process this, which is a strong advantage of using arrays as opposed to embedded objects whose keys are actually a "data" item.
If you are stuck with this then you need to process with JavaScript evaluation like mapReduce. This runs much slower than the aggregation framework due to the need to invoke and run in a JavaScript interpreter process:
Model.mapReduce(
{
"map": function() {
for ( var k in this.answers ) {
this.answers[k] = this.answers[k].hasOwnProperty("value")
? this.answers[k].value : false;
}
var id = this._id;
delete this._id;
emit( id, this );
},
"reduce": function(){}
},
function(err,docs) {
}
)
But really, consider changing your structure as it makes things much more flexible for queries and other operations.

Resources