remove objects from array elastic search - node.js

I have required to remove object from array that satisfies the condition, I am able to update the object of array on the basis of condition, which is as follow:
PUT twitter/twit/1
{"list":
[
{
"tweet_id": "1",
"a": "b"
},
{
"tweet_id": "123",
"a": "f"
}
]
}
POST /twitter/twit/1/_update
{"script":"foreach (item :ctx._source.list) {
if item['tweet_id'] == tweet_id) {
item['new_field'] = 'ghi';
}
}",
"params": {tweet_id": 123"}
}
this is working
for remove i am doing this
POST /twitter/twit/1/_update
{ "script": "foreach (item : ctx._source.list) {
if item['tweet_id'] == tweet_id) {
ctx._source.list.remove(item);
}
}",
"params": { tweet_id": "123" }
}
but this is not working and giving this error,
ElasticsearchIllegalArgumentException[failed to execute script];
nested: ConcurrentModificationException; Error:
ElasticsearchIllegalArgumentException[failed to execute script];
nested: ConcurrentModificationException
I am able to remove whole array or whole field using
"script": "ctx._source.remove('list')"
I am also able to remove object from array by specifying all the keys of an object using
"script":"ctx._source.list.remove(tag)",
"params" : {
"tag" : {"tweet_id": "123","a": "f"}
my node module elastic search version is 2.4.2 elastic search server is 1.3.2

You get that because you are trying to modify a list while iterating through it, meaning you want to change a list of object and, at the same time, listing those objects.
You instead need to do this:
POST /twitter/twit/1/_update
{
"script": "item_to_remove = nil; foreach (item : ctx._source.list) { if (item['tweet_id'] == tweet_id) { item_to_remove=item; } } if (item_to_remove != nil) ctx._source.list.remove(item_to_remove);",
"params": {"tweet_id": "123"}
}
If you have more than one item that matches the criteria, use a list instead:
POST /twitter/twit/1/_update
{
"script": "items_to_remove = []; foreach (item : ctx._source.list) { if (item['tweet_id'] == tweet_id) { items_to_remove.add(item); } } foreach (item : items_to_remove) {ctx._source.list.remove(item);}",
"params": {"tweet_id": "123"}
}

For people that need this working in elasticsearch 2.0 and up, the nil and foreach don't get recognized by groovy.
So here's a updated version, including a option to replace a item with the same id by a new object.
and also passing it the upsert will make sure the item gets added even if the document doesn't exist yet
{
"script": "item_to_remove = null; ctx._source.delivery.each { elem -> if (elem.id == item_to_add.id) { item_to_remove=elem; } }; if (item_to_remove != null) ctx._source.delivery.remove(item_to_remove); if (item_to_add.size() > 1) ctx._source.delivery += item_to_add;",
"params": {"item_to_add": {"id": "5", "title": "New item"}},
"upsert": [{"id": "5", "title": "New item"}]
}

Related

How to filter document by a value in the dictionary in arangodb?

I have following structure in arangodb collection named clientShadow.
{
"_class": "com.syncservice.document.ClientShadow",
"sessions": {
"session-id-2": {
"workspaceId": "workspace-id-1",
"message": {
"lastSynced": 0,
"lastAvailable": 1674630705773
}
},
"session-id-1": {
"workspaceId": "workspace-id-1",
"message": {
"lastSynced": 0,
"lastAvailable": 1674630705773
}
},
"session-id-0": {
"workspaceId": "workspace-id-1",
"message": {
"lastSynced": 0,
"lastAvailable": 1674630705773
}
}
},
"synced": true
}
Here sessions is a map/dictionary of session_id as string and session object as value.
I want to fetch all the sessions from collection where session's lastSynced and lastAvailable aren't same.
I tried following query
FOR doc IN clientShadow
FOR session IN doc['sessions']
FILTER session.message.lastSynced != session.message.lastAvailable
RETURN {'session': session}
But I found out that FOR IN works with collections and gives me following error
Query: AQL: collection or array expected as operand to FOR loop; you provided a value of type 'object' (while executing)
To retain the original data and query structure, don't use ATTRIBUTES, use VALUES:
FOR doc IN clientShadow
FOR session IN VALUES(doc['sessions'])
FILTER session.message.lastSynced != session.message.lastAvailable
RETURN {'session': session}
If you cannot change the data structure, you can use ATTRIBUTES to access the sessions as array:
Edit: Code as fixed by Kshiti Kshitij Dhakal (Attributes returns a list of the names of the attributes)
FOR doc IN clientShadow
FOR session IN ATTRIBUTES(doc['sessions'])
FILTER
doc.sessions[session].message.lastSynced
!= doc.sessions[session].message.lastAvailable
RETURN {'session': doc.sessions[session]}
Old (wrong) suggestion:
FOR doc IN clientShadow
FOR session IN ATTRIBUTES(doc['sessions'])
FILTER session.message.lastSynced != session.message.lastAvailable
RETURN {'session': session}
If you can change the data structure, don't use an object for sessions, but a list:
{
"_class": "com.syncservice.document.ClientShadow",
"sessions": [
{
"sessionId": "session-id-2",
"workspaceId": "workspace-id-1",
"message": {
"lastSynced": 0,
"lastAvailable": 1674630705773
}
},
...
]
}

How can I return all nested objects using python?

I wrote an Elastic query which will check the condition (status="APPROVED") and Gets all approved_by objects.
This is my index (portfolio):
{
"settings": {},
"mappings": {
"portfolio": {
"properties": {
"status": {
"type": "keyword",
"normalizer": "lcase_ascii_normalizer"
},
"archived_at": {
"type": "date"
},
"approved_by": {
"id": "text",
"name":"text"
}
}
}
}
}
Currently I have 60 objects whose status are approved , so when i run the query it will show 60 objects,but in my case i am getting only one object(I debugged the code, total 60 objects are coming as expected, but still returning only single object), please help guys.
My query:
profiles = client.search(index='portfolio', doc_type='portfolio',
scroll='10m', size=1000,
body={
"query": {"match": {"status": "APPROVED"}}
})
sid = profiles['_scroll_id']
scroll_size = len(profiles['hits']['hits'])
while scroll_size > 0:
for info in profiles['hits']['hits']:
item = info['_source']
approved_by_obj = item.get('approved_by')
if approved_by_obj:
return (jsonify({"approved_by": approved_by_obj}))
Expected o/p format:
{
"approved_by": {
"id": "system",
"name": "system"
}
}
You're getting only one result because you're returning from your loop, effectively breaking out of it completely.
So, instead of returning from it, append the found approved_by_object to a list of your choice and then return that 60-member list:
profiles = client.search(index='portfolio', doc_type='portfolio',
scroll='10m', size=1000,
body={
"query": {"match": {"status": "APPROVED"}}
})
sid = profiles['_scroll_id']
scroll_size = len(profiles['hits']['hits'])
approved_hits_sources = [] # <-- add this
while scroll_size > 0:
for info in profiles['hits']['hits']:
item = info['_source']
approved_by_obj = item.get('approved_by')
if approved_by_obj:
approved_hits_sources.append({"approved_by": approved_by_obj}) # <--- append and not return
return jsonify({"approved_hits_sources": approved_hits_sources})

ArangoDB query returns extra items with diacritic marks

I have the question that is related to diacritic marks. For the sake of simplicity, let's assume, I have a document collection "c". I put two documents:
{"uri": "/c/de/aërotropismus/"}, {"uri": "/c/de/aerotropismus/"}
As you see, they are almost the same, except for the diacritic mark.
Then, I create a persistent index on the field "uri".
After this I do the query:
for doc in c
filter doc.uri >= "/c/de/aerotropismus/" and doc.uri < "/c/de/aerotropismus0"
return doc
I expect this query to return one result, but I get two results:
[
{
"_key": "37070873",
"_id": "c/37070873",
"_rev": "_bHsOMnm---",
"uri": "/c/de/aerotropismus/"
},
{
"_key": "37070853",
"_id": "c/37070853",
"_rev": "_bHsO_m6---",
"uri": "/c/de/aërotropismus/"
}
]
Why is that? And how can I fix it so that it returns just one result:
[
{
"_key": "37070873",
"_id": "c/37070873",
"_rev": "_bHsOMnm---",
"uri": "/c/de/aerotropismus/"
}
]
This indeed seems like a bug:
FOR doc IN c
SORT doc.uri DESC
RETURN KEEP(doc, ['uri'])
Results:
[
{
"uri": "/c/de/aerotropismus/1"
},
{
"uri": "/c/de/aërotropismus/"
},
{
"uri": "/c/de/aerotropismus/"
}
]
Workaround
You can create a user defined function inside ArangoDB to sort an array of strings.
More reading:
Registering an AQL user function
Workaround Implementation
You can execute the following code in aragosh to register your custom function:
const aqlfunctions = require("#arangodb/aql/functions");
function isBetween(elems, left, right) {
const sorted = elems.sort();
const leftIndex = sorted.findIndex((elem) => elem >= left);
if (leftIndex === -1) {
return [];
}
const rightIndexReversed = sorted.slice().reverse().findIndex((elem) => elem < right);
if (rightIndexReversed === -1) {
return [];
}
const rightIndex = sorted.length - rightIndexReversed - 1;
if (leftIndex > rightIndex) {
return [];
}
return sorted.slice(leftIndex, rightIndex + 1);
}
aqlfunctions.register("CUSTOM::IS_BETWEEN", isBetween, true);
You can later run your AQL as this:
RETURN CUSTOM::IS_BETWEEN((FOR rec IN c RETURN rec.uri), "/c/de/aerotropismus/", "/c/de/aerotropismus0")
It will return only one record for the dataset you provided.

How to create a view in CouchDB with multiple WHERE and OR clauses

How would I creat a view equivalent to a SQL query like this?
SELECT * FROM bucket WHERE (uid='$uid' AND accepted='Y') OR (uid='$uid' AND authorid='$logginid')
My data is stored this way:
{
"id": 9476183,
"authorid": 85490,
"content": "some text here",
"uid": 41,
"accepted": "Y",
"time": "2014-12-09 10:44:01",
"type": "testimonial"
}
function(doc) {
if (doc.accepted == 'Y') {
emit(doc.uid, null);
}
emit([doc.uid, doc.authorid], null);
}
One request is enough. You can tap view written by #Simon (reproduced above) using POST with param keys:[[uid, authorid], uid].
See http://docs.couchdb.org/en/latest/api/ddoc/views.html#post--db-_design-ddoc-_view-view for mode details.
A view could look like this:
function(doc) {
if (doc.accepted == 'Y') {
emit(doc.uid, null);
}
emit([doc.uid, doc.authorid], null);
}
You would query it with key=$uid first. If there is no match, you would query it with key=[$uid,$loginid].

Performing a query on the lowest level of a tree-structured Dojo store

Let's say we have a nested data structure like so:
[
{
"name": "fruits",
"items": [
{ "name": "apple" ...}
{ "name": "lemon" ...}
{ "name": "peach" ...}
]
}
{
"name": "veggies",
"items": [
{ "name": "carrot" ...}
{ "name": "cabbage" ...}
]
}
{
"name": "meat",
"items": [
{ "name": "steak" ...}
{ "name": "pork" ...}
]
}
]
The above data is placed in a dojo/store/Memory. I want to perform a query for items that contain the letter "c", but only on the lower level (don't want to query the categories).
With a generic dojo/store/Memory, it's query function only applies a filter on the top level, so the code
store.query(function(item) {
return item.name.indexOf("c") != -1;
});
will only perform the query on the category names (fruits, veggies, etc) instead of the actual items.
Is there a straight-forward way to perform this query on the child nodes, and if there's a match, return all children as well as the parent? For instance, the "c" query would return the "fruits" node with it's "peach" child only, "veggies" would remain intact, and "meat" would be left out of the query results entirely.
You can of course define your own checking method in the store's query method. I don't check if this code runs perfectly, but I guess you could pretty much get what it's meant to do.
store.query(function(item) {
var found = {
name: "",
items: []
};
var children = item.items;
d_array.forEach(children, function(child) {
if (child.name.indexOf("c") != -1) {
found.name = item.name;
found.items.push(child);
}
});
return found;
});
Hope this helps.

Resources