ArangoDB query returns extra items with diacritic marks - arangodb

I have the question that is related to diacritic marks. For the sake of simplicity, let's assume, I have a document collection "c". I put two documents:
{"uri": "/c/de/aërotropismus/"}, {"uri": "/c/de/aerotropismus/"}
As you see, they are almost the same, except for the diacritic mark.
Then, I create a persistent index on the field "uri".
After this I do the query:
for doc in c
filter doc.uri >= "/c/de/aerotropismus/" and doc.uri < "/c/de/aerotropismus0"
return doc
I expect this query to return one result, but I get two results:
[
{
"_key": "37070873",
"_id": "c/37070873",
"_rev": "_bHsOMnm---",
"uri": "/c/de/aerotropismus/"
},
{
"_key": "37070853",
"_id": "c/37070853",
"_rev": "_bHsO_m6---",
"uri": "/c/de/aërotropismus/"
}
]
Why is that? And how can I fix it so that it returns just one result:
[
{
"_key": "37070873",
"_id": "c/37070873",
"_rev": "_bHsOMnm---",
"uri": "/c/de/aerotropismus/"
}
]

This indeed seems like a bug:
FOR doc IN c
SORT doc.uri DESC
RETURN KEEP(doc, ['uri'])
Results:
[
{
"uri": "/c/de/aerotropismus/1"
},
{
"uri": "/c/de/aërotropismus/"
},
{
"uri": "/c/de/aerotropismus/"
}
]
Workaround
You can create a user defined function inside ArangoDB to sort an array of strings.
More reading:
Registering an AQL user function
Workaround Implementation
You can execute the following code in aragosh to register your custom function:
const aqlfunctions = require("#arangodb/aql/functions");
function isBetween(elems, left, right) {
const sorted = elems.sort();
const leftIndex = sorted.findIndex((elem) => elem >= left);
if (leftIndex === -1) {
return [];
}
const rightIndexReversed = sorted.slice().reverse().findIndex((elem) => elem < right);
if (rightIndexReversed === -1) {
return [];
}
const rightIndex = sorted.length - rightIndexReversed - 1;
if (leftIndex > rightIndex) {
return [];
}
return sorted.slice(leftIndex, rightIndex + 1);
}
aqlfunctions.register("CUSTOM::IS_BETWEEN", isBetween, true);
You can later run your AQL as this:
RETURN CUSTOM::IS_BETWEEN((FOR rec IN c RETURN rec.uri), "/c/de/aerotropismus/", "/c/de/aerotropismus0")
It will return only one record for the dataset you provided.

Related

How to calculate the amount with conditional?

I have such documents in MongoDB:
{
"_id":{
"$oid":"614e0f8fb2f4d8ea534b2ccb"
},
"userEmail":"abc#example.com",
"customId":"abc1",
"amountIn":10,
"amountOut":0,
"createdTimestamp":1632505743,
"message":"",
"status":"ERROR",
}
The amountOut field can be 0, or a positive numeric value.
I need to calculate the sum of the amountIn and amountOut fields only if it is positive.
At the moment I am doing it like this:
query = {
'createdTimestamp': {'$gte': 1632430800},
'createdTimestamp': {'$lte': 1632517200}
}
records = db.RecordModel.objects(__raw__=query).all()
total_amount = 0
for record in records:
if record.amountOut > 0:
total_amount += record.amountOut
else:
total_amount += record.amountIn
But this is very slow.
I know mongoengine has a sum method:
total_amount = db.PaymentModel.objects(__raw__=query).sum('amountIn')
But I don't know how to use the condition for this method.
Maybe there are some other ways to calculate the amount with the condition I need faster?
You can use mongoengine's aggregation api which just allows you to execute aggregations normally.
Now you can use this pipeline in code which utilizes $cond:
query = {
'createdTimestamp': {'$gte': 1632430800, '$lte': 1632517200},
}
pipeline = [
{"$match": query},
{
"$group": {
"_id": None,
"total_amount": {
"$sum": {
"$cond": [
{
"$gt": [
"$amountOut",
0
]
},
"$amountOut",
"$amountIn"
]
}
}
}
}
]
records = db.RecordModel.objects().aggregate(pipeline)
Mongo Playground

How can I return all nested objects using python?

I wrote an Elastic query which will check the condition (status="APPROVED") and Gets all approved_by objects.
This is my index (portfolio):
{
"settings": {},
"mappings": {
"portfolio": {
"properties": {
"status": {
"type": "keyword",
"normalizer": "lcase_ascii_normalizer"
},
"archived_at": {
"type": "date"
},
"approved_by": {
"id": "text",
"name":"text"
}
}
}
}
}
Currently I have 60 objects whose status are approved , so when i run the query it will show 60 objects,but in my case i am getting only one object(I debugged the code, total 60 objects are coming as expected, but still returning only single object), please help guys.
My query:
profiles = client.search(index='portfolio', doc_type='portfolio',
scroll='10m', size=1000,
body={
"query": {"match": {"status": "APPROVED"}}
})
sid = profiles['_scroll_id']
scroll_size = len(profiles['hits']['hits'])
while scroll_size > 0:
for info in profiles['hits']['hits']:
item = info['_source']
approved_by_obj = item.get('approved_by')
if approved_by_obj:
return (jsonify({"approved_by": approved_by_obj}))
Expected o/p format:
{
"approved_by": {
"id": "system",
"name": "system"
}
}
You're getting only one result because you're returning from your loop, effectively breaking out of it completely.
So, instead of returning from it, append the found approved_by_object to a list of your choice and then return that 60-member list:
profiles = client.search(index='portfolio', doc_type='portfolio',
scroll='10m', size=1000,
body={
"query": {"match": {"status": "APPROVED"}}
})
sid = profiles['_scroll_id']
scroll_size = len(profiles['hits']['hits'])
approved_hits_sources = [] # <-- add this
while scroll_size > 0:
for info in profiles['hits']['hits']:
item = info['_source']
approved_by_obj = item.get('approved_by')
if approved_by_obj:
approved_hits_sources.append({"approved_by": approved_by_obj}) # <--- append and not return
return jsonify({"approved_hits_sources": approved_hits_sources})

Mongo indexing on nested object

My mongodb collection's structure looke like this,
{
paymentTree: {
t1: { from: "id-123", to: "id-2334", },
t2: { from: "id-1443", to: "id-567", },
t3: { from: "id-76567", to: "id-2334", },
tn: { from: "id-12n", to: "id-233n", }
}
How can index field 'to' in paymentTree?
Currently it is not possible to create index in dynamic object key,
If you want to achieve anyway then I would suggest if you could transform your schema structure to The Attribute Pattern,
change paymentTree object type to array type,
change object to array in key-value pairs, k for tree's dynamic key and v for { from, to } object,
{
paymentTree: [
{
"k": "t1",
"v": { "from": "id-123", "to": "id-2334" }
},
{
"k": "t2",
"v": { "from": "id-1443", "to": "id-567" }
},
{
"k": "t3",
"v": { "from": "id-76567", "to": "id-2334" }
},
{
"k": "tn",
"v": { "from": "id-12n", "to": "id-233n" }
}
]
}
Benefits of this structure:
Create Multikey Single Field Index on to:
You can create multikey indexes on array fields that contain nested objects,
db.collection.createIndex({ "paymentTree.v.to": 1 });
Create Multikey Compound Index on k and to:
db.collection.createIndex({ "paymentTree.k": 1, "paymentTree.v.to": 1 });
Push object to array:
You can insert object to paymentTree array using $push in update methods (updateOne and updateMany).
Pull object from array:
You can remove specific object on the base of k from paymentTree array using $pull in update methods (updateOne and updateMany).
Match conditions:
You can easily match conditions using conditional operators,
Ex.1:
{ "paymentTree.k": "t1" }
Ex.2:
{ "paymentTree.v.to": "id-2334" } // or { "paymentTree.v.from": "id-123" }
Ex.3:
{
"paymentTree": {
"$elemMatch": {
"k": "t1",
"v.to": "id-2334" // or "v.from": "id-123"
}
}
}
How can i easily select like paymentTree.t1, ..:
In Query:
There is a operator called $arrayToObject, you can convert paymentTree array to object in projection stages ($project, $addFields, $set) see Playground,
The find() and findOne() projection can accept aggregation expressions and syntax from MongoDB v4.4 projection consistent with aggregation’s $project stage,
Outside Query:
You need to convert peymentTree array to object format in your client side language (go, node.js, etc.).
this works for me
_, err = db.Comments.Indexes().CreateOne(db.Ctx, mongo.IndexModel{Keys: bson.M{"t1.to": 1}})
if err != nil {
panic(err)
}

remove objects from array elastic search

I have required to remove object from array that satisfies the condition, I am able to update the object of array on the basis of condition, which is as follow:
PUT twitter/twit/1
{"list":
[
{
"tweet_id": "1",
"a": "b"
},
{
"tweet_id": "123",
"a": "f"
}
]
}
POST /twitter/twit/1/_update
{"script":"foreach (item :ctx._source.list) {
if item['tweet_id'] == tweet_id) {
item['new_field'] = 'ghi';
}
}",
"params": {tweet_id": 123"}
}
this is working
for remove i am doing this
POST /twitter/twit/1/_update
{ "script": "foreach (item : ctx._source.list) {
if item['tweet_id'] == tweet_id) {
ctx._source.list.remove(item);
}
}",
"params": { tweet_id": "123" }
}
but this is not working and giving this error,
ElasticsearchIllegalArgumentException[failed to execute script];
nested: ConcurrentModificationException; Error:
ElasticsearchIllegalArgumentException[failed to execute script];
nested: ConcurrentModificationException
I am able to remove whole array or whole field using
"script": "ctx._source.remove('list')"
I am also able to remove object from array by specifying all the keys of an object using
"script":"ctx._source.list.remove(tag)",
"params" : {
"tag" : {"tweet_id": "123","a": "f"}
my node module elastic search version is 2.4.2 elastic search server is 1.3.2
You get that because you are trying to modify a list while iterating through it, meaning you want to change a list of object and, at the same time, listing those objects.
You instead need to do this:
POST /twitter/twit/1/_update
{
"script": "item_to_remove = nil; foreach (item : ctx._source.list) { if (item['tweet_id'] == tweet_id) { item_to_remove=item; } } if (item_to_remove != nil) ctx._source.list.remove(item_to_remove);",
"params": {"tweet_id": "123"}
}
If you have more than one item that matches the criteria, use a list instead:
POST /twitter/twit/1/_update
{
"script": "items_to_remove = []; foreach (item : ctx._source.list) { if (item['tweet_id'] == tweet_id) { items_to_remove.add(item); } } foreach (item : items_to_remove) {ctx._source.list.remove(item);}",
"params": {"tweet_id": "123"}
}
For people that need this working in elasticsearch 2.0 and up, the nil and foreach don't get recognized by groovy.
So here's a updated version, including a option to replace a item with the same id by a new object.
and also passing it the upsert will make sure the item gets added even if the document doesn't exist yet
{
"script": "item_to_remove = null; ctx._source.delivery.each { elem -> if (elem.id == item_to_add.id) { item_to_remove=elem; } }; if (item_to_remove != null) ctx._source.delivery.remove(item_to_remove); if (item_to_add.size() > 1) ctx._source.delivery += item_to_add;",
"params": {"item_to_add": {"id": "5", "title": "New item"}},
"upsert": [{"id": "5", "title": "New item"}]
}

Performing a query on the lowest level of a tree-structured Dojo store

Let's say we have a nested data structure like so:
[
{
"name": "fruits",
"items": [
{ "name": "apple" ...}
{ "name": "lemon" ...}
{ "name": "peach" ...}
]
}
{
"name": "veggies",
"items": [
{ "name": "carrot" ...}
{ "name": "cabbage" ...}
]
}
{
"name": "meat",
"items": [
{ "name": "steak" ...}
{ "name": "pork" ...}
]
}
]
The above data is placed in a dojo/store/Memory. I want to perform a query for items that contain the letter "c", but only on the lower level (don't want to query the categories).
With a generic dojo/store/Memory, it's query function only applies a filter on the top level, so the code
store.query(function(item) {
return item.name.indexOf("c") != -1;
});
will only perform the query on the category names (fruits, veggies, etc) instead of the actual items.
Is there a straight-forward way to perform this query on the child nodes, and if there's a match, return all children as well as the parent? For instance, the "c" query would return the "fruits" node with it's "peach" child only, "veggies" would remain intact, and "meat" would be left out of the query results entirely.
You can of course define your own checking method in the store's query method. I don't check if this code runs perfectly, but I guess you could pretty much get what it's meant to do.
store.query(function(item) {
var found = {
name: "",
items: []
};
var children = item.items;
d_array.forEach(children, function(child) {
if (child.name.indexOf("c") != -1) {
found.name = item.name;
found.items.push(child);
}
});
return found;
});
Hope this helps.

Resources