Limit filter by terms elastic search

Limit filter by terms elastic search - search

I would put a size limit per terms, 3 retrieve results for the term "tag", 5 results for the term "dossier" and 1 result for the term "personality".
Can i use limit filter or and other solution ?
{
"_source":{
"include":[
"path",
"type"
]
},
"query":{
"bool":{
"should":[
{
"match":{
"title.acp":{
"query":"car",
"boost":10
}
}
},
{
"match":{
"title.acp":{
"query":"car",
"fuzziness":"AUTO",
"prefix_length":3
}
}
}
],
"filter":[
{
"terms":{
"type":[
"tag",
"dossier",
"personality"
]
}
}
]
}
},
"highlight":{
"fields":{
"title.acp":{}
}
}
};

Looks like for a given 'title' you want top x documents for each of the types where 'x' varies with type
One way to do this is use aggregation filter and top-hits in conjunction :
Example :
{
"size": 0,
"query": {
"bool": {
"should": [
{
"match": {
"title.acp": {
"query": "car",
"boost": 10
}
}
},
{
"match": {
"title.acp": {
"query": "car",
"fuzziness": "AUTO",
"prefix_length": 3
}
}
}
],
"filter": [
{
"terms": {
"type": [
"tag",
"dossier",
"personality"
]
}
}
]
}
},
"aggs": {
"tag": {
"filter": {
"term": {
"type": "tag"
}
},
"aggs": {
"tag_top_hits": {
"top_hits": {
"_source": {
"include": [
"path",
"type"
]
},
"size": 3,
"highlight": {
"fields": {
"title.acp": {}
}
}
}
}
}
},
"dossier": {
"filter": {
"term": {
"type": "dossier"
}
},
"aggs": {
"dossier_top_hits": {
"top_hits": {
"_source": {
"include": [
"path",
"type"
]
},
"size": 5,
"highlight": {
"fields": {
"title.acp": {}
}
}
}
}
}
},
"personality": {
"filter": {
"term": {
"type": "personality"
}
},
"aggs": {
"personality_top_hits": {
"top_hits": {
"_source": {
"include": [
"path",
"type"
]
},
"size": 1,
"highlight": {
"fields": {
"title.acp": {}
}
}
}
}
}
}
}
}

Related

How to do elasticsearch aggregation together with sort and find duplicate values

I want to find duplicate values and if there are duplicate values then I sort based on the last update, so what I take is the newest one, how do I do aggregations? I've tried this aggregation.
I've tried adding sort to sources but it still doesn't work, I've tried several ways but it still fails sometimes it comes out 1 but only old data, sometimes the order is correct from the newest but appears 2 data
{
"size": 0,
"query": {
"bool": {
"must": [
{
"match": {
"BILLING_TYPE_CD": "Service Bundle"
}
},
{
"match": {
"ID": "xxxx"
}
},
{
"exists": {
"field": "LI_MILESTONE"
}
},
{
"exists": {
"field": "LI_SID"
}
},
{
"query_string": {
"default_field": "LI_SID",
"query": "*xxxx*"
}
}
],
"must_not": {
"bool": {
"must": [
{
"query_string": {
"default_field": "LI_PRODUCT_NAME",
"query": "*Network*"
}
},
{
"terms": {
"LI_MILESTONE.keyword": [
"Abandoned",
"Cancelled"
]
}
},
{
"terms": {
"ORDER_STATUS.keyword": [
"Abandoned",
"Cancelled",
"Drop In Progress"
]
}
},
{
"term": {
"STATUS.keyword": ""
}
}
]
}
}
}
},
"sort": [
{
"TGL_CREATED": {
"order": "desc"
}
}
],
"aggs": {
"list_products": {
"composite": {
"size": 50000,
"sources": [
{
"LI_SID": {
"terms": {
"field": "LI_SID.keyword",
"order": "desc"
}
}
}
]
},
"aggs": {
"totalService": {
"terms": {
"field": "LI_SID.keyword",
"size": 50000,
"order": {
"_term": "asc"
}
}
},
"bucket_sort": {
"bucket_sort": {
"from": 0,
"size": 10
}
},
"includes_source": {
"top_hits": {
"size": 1,
"_source": {
"includes": [
"LAST_UPDATE",
"xxxxx",
"xxxxx",
"xxxxx",
"xxx"
]
}
}
}
}
},
"term_product": {
"terms": {
"field": "LI_SID.keyword",
"size": 50000
}
}
}
}

Like this ?
{
"aggs": {
"LI_SID": {
"terms": {
"field": "LI_SID.keyword",
"size": 10
},
"aggs": {
"hit": {
"top_hits": {
"size": 1,
"sort": [
{
"LAST_UPDATE": "desc"
}
]
}
}
}
}
},
"size": 0
}
You need to use aggregations response not hits

How to check if all values exists inside nested object elastic search

I have following document in ES :
[
{
"event_id": 123,
"event_name": "test event",
"event_date": "2018-12-21",
"ticket_group": [
{
"available": 8,
"price": 8,
"id": "159831",
"parking_passes_available": 0,
"field_values": [
{
"field_id": 589,
"field_value": "KUMAR"
},
{
"field_id": 717,
"field_value": "AMIT"
},
{
"field_id": 1360,
"field_value": "SAM"
},
{
"field_id": 2239,
"field_value": ""
},
{
"field_id": 2240,
"field_value": ""
},
{
"field_id": 2241,
"field_value": ""
},
{
"field_id": 2242,
"field_value": ""
}
]
}
]
}
]
and i want to search with multiple field_id and field_value with AND operator. But it works if there is single condition but not for multiple cases. Here is what i have tried so far :
{
"query": {
"bool": {
"must": [
{
"nested": {
"path": "ticket_group",
"score_mode": "max",
"inner_hits": {
"from": 0,
"size": 10000
},
"query": {
"bool": {
"must": [
{
"nested": {
"path": "ticket_group.field_values",
"score_mode": "max",
"inner_hits": {
"from": 0,
"size": 10000
},
"query": {
"bool": {
"must": [
{
"bool": {
"must": [
{
"match": {
"ticket_group.field_values.field_id": 589
}
},
{
"match": {
"ticket_group.field_values.field_value": "KUMAR"
}
}
]
}
},
{
"bool": {
"must": [
{
"match": {
"ticket_group.field_values.field_id": 717
}
},
{
"match": {
"ticket_group.field_values.field_value": "AMIT"
}
}
]
}
}
]
}
}
}
}
]
}
}
}
}
]
}
},
"size": 10,
"from": 0,
"sort": {
"event_date": {
"order": "asc"
}
}
}
i want to search ticket group if field_id=717 with value "amit" and field_id=589 with value "kumar" exists in field_values object inside ticket_group. Using above query i am getting no records while objects with both values exist in field_values.
Can anyone help to build a such query ?
Thank You

Below is what you are looking for. You simply need to push the second level nested into two must clauses.
POST <your_index_name>/_search
{
"query":{
"bool":{
"must":[
{
"nested":{
"path":"ticket_group",
"score_mode":"max",
"inner_hits":{
},
"query":{
"bool":{
"must":[
{
"nested":{
"path":"ticket_group.field_values",
"score_mode":"max",
"inner_hits":{
"name":"inner_clause_1"
},
"query":{
"bool":{
"must":[
{
"match":{
"ticket_group.field_values.field_id":589
}
},
{
"match":{
"ticket_group.field_values.field_value":"KUMAR"
}
}
]
}
}
}
},
{
"nested":{
"path":"ticket_group.field_values",
"score_mode":"max",
"inner_hits":{
"name":"inner_clause_2"
},
"query":{
"bool":{
"must":[
{
"match":{
"ticket_group.field_values.field_id":717
}
},
{
"match":{
"ticket_group.field_values.field_value":"AMIT"
}
}
]
}
}
}
}
]
}
}
}
}
]
}
}
}
Notice that I've named the inner_hits in the second level nested queries.
If you don't do that(try by removing the name key in the inner_hits), then you would only see the inner_hit for the last clause which ends up overwriting the inner_hits result of the first nested clause.
Hope this helps!

count of records that have matched conditions inside a bool query

I have an ES query along the lines of (condition1 or condition2 or condition3....) and otherConditions.
Each condition inside the brackets is a 'must' clause that searches for all documents that match a given name, location and product.
GET index/type/_count
{
"query": {
"bool": {
"should": [
{
"bool": {
"must": [
{
"term": {
"NAME": {
"value": "name1"
}
}
},
{
"term": {
"PRODUCT": {
"value": "product1"
}
}
},
{
"term": {
"LOCATION": {
"value": "location1"
}
}
}
]
}
},
{
"bool": {
"must": [
{
"term": {
"NAME": {
"value": "name2"
}
}
},
{
"term": {
"PRODUCT": {
"value": "product2"
}
}
},
{
"term": {
"LOCATION": {
"value": "location2"
}
}
}
]
}
}
],
"must_not": [
{
"exists": {
"field": "some other condition"
}
}
],
"must": [
{
"term": {
"somefield": "value"
}
},
{
"range": {
"time": {
"gte": "now-6M"
}
}
}
]
}
}
}
Is it possible to get count of records that matches each of the 'must' clause inside the 'should' clause instead of an overall count using one query?

Yes, you can do it using aggregations, in particular filter aggregation. The query might look like this:
POST index/type/_search
{
"query": {
"bool": {
"should": [
"<clause1>",
"<clause2>"
],
"must_not": [
"<mustNotClause3>"
],
"must": [
"<mustClause4>"
]
}
},
"aggs": {
"clause1": {
"filter": "<clause1>"
},
"clause2": {
"filter": "<clause2>"
}
}
}
Note that we are using _search API here. If you don't need search results, you can set size: 0, this will return you only total count and the aggregations.
In your case the query will literally be this:
POST index/type/_search
{
"query": {
"bool": {
"should": [
{
"bool": {
"must": [
{
"term": {
"NAME": {
"value": "name1"
}
}
},
{
"term": {
"PRODUCT": {
"value": "product1"
}
}
},
{
"term": {
"LOCATION": {
"value": "location1"
}
}
}
]
}
},
{
"bool": {
"must": [
{
"term": {
"NAME": {
"value": "name2"
}
}
},
{
"term": {
"PRODUCT": {
"value": "product2"
}
}
},
{
"term": {
"LOCATION": {
"value": "location2"
}
}
}
]
}
}
],
"must_not": [
{
"exists": {
"field": "some other condition"
}
}
],
"must": [
{
"term": {
"somefield": "value"
}
},
{
"range": {
"time": {
"gte": "now-6M"
}
}
}
]
}
},
"aggs": {
"clause1": {
"filter": {
"bool": {
"must": [
{
"term": {
"NAME": {
"value": "name1"
}
}
},
{
"term": {
"PRODUCT": {
"value": "product1"
}
}
},
{
"term": {
"LOCATION": {
"value": "location1"
}
}
}
]
}
}
},
"clause2": {
"filter": {
"bool": {
"must": [
{
"term": {
"NAME": {
"value": "name2"
}
}
},
{
"term": {
"PRODUCT": {
"value": "product2"
}
}
},
{
"term": {
"LOCATION": {
"value": "location2"
}
}
}
]
}
}
}
}
}
Note that sum of counts of aggregations clause1 and clause2 may be greater than total count.
Hope that helps!

Elasticsearch sorting not working properly based on time

I have 20 documents and i'm performing aggregation based on reportid. I need top 10 aggregation based on time in descending. But the response is very random. What am i missing? I'm using elasticsearch 6.2.2 and node.js 4.5. Below here is the body search query for elasticsearch request.
{
"size": 0,
"sort": [
{
"triggerDate":
{
"order": "desc"
}
}],
"query":
{
"bool":
{
"must": [
{
"query_string":
{
"query": "*",
"analyze_wildcard": true
}
},
{
"range":
{
"triggerDate":
{
"gte": fromTime,
"lte": toTime
}
}
}
],
"must_not": [
{
"query_string":
{
"query": "reportId.keyword:\"\"",
"analyze_wildcard": true
}
}]
}
},
"_source":
{
"excludes": []
},
"aggs":
{
"reportid":
{
"terms":
{
"field": "reportId.keyword",
"size": 10
}
}
}

I think what you need to do is aggregate on reportId.keyword and sort aggregation by date.
So here is the solution
{
"size": 0,
"query": {
"bool": {
"must": [
{
"query_string": {
"query": "*",
"analyze_wildcard": true
}
},
{
"range": {
"triggerDate": {
"gte": fromTime,
"lte": toTime
}
}
}
],
"must_not": [
{
"query_string": {
"query": "reportId.keyword:\"\"",
"analyze_wildcard": true
}
}
]
}
},
"_source": {
"excludes": []
},
"aggs": {
"reportid": {
"terms": {
"field": "reportId.keyword",
"size": 10,
"order": {
"2-orderAgg": "desc"
}
},
"aggs": {
"2-orderAgg": {
"max": {
"field": "triggerDate"
}
}
}
}
}
}
You need to sort the aggregation results by a custom aggregation and not the query results.

Elastic Search: Matching fields in different nested objects

I am new to Elastic Search and this is my user index:
{
"user": {
"properties": {
"branches": {
"type": "nested"
},
"lists": {
"type": "nested"
},
"events": {
"type": "nested"
},
"optOuts": {
"type": "nested"
}
}
}
}
Here, branches, events and lists will contain the field id(int),countryIso(String)..
I need to find users having emails who belong to countryIso 'XX' for example.
{
"query": {
"bool": {
"must": [
{
"exists": {
"field": "email"
}
},
{
"match": {
"prog_id": 3
}
},
{
"nested": {
"path": [
"branches"
],
"query": {
"query_string": {
"fields": [
"branches.countryIso"
],
"query": "AE KW"
}
}
}
}
]
}
}
}
This way I can get them if they have that country in the branches object. What I want is that the countryIso is there in the branches or lists or events.
Note: any of these might be empty i.e. branches may not be there or lists miht not be there etc. Or lists might be there with no countryIso..
I tried this:
{
"query": {
"bool": {
"must": [
{
"exists": {
"field": "email"
}
},
{
"match": {
"prog_id": 3
}
},
{
"nested": {
"path": [
"branches"
],
"query": {
"query_string": {
"fields": [
"branches.countryIso"
],
"query": "AE KW"
}
}
}
},
{
"nested": {
"path": [
"lists"
],
"query": {
"query_string": {
"fields": [
"lists.countryIso"
],
"query": "AE KW"
}
}
}
}
]
}
}
}
AND
{
"query": {
"bool": {
"must": [
{
"exists": {
"field": "email"
}
},
{
"match": {
"prog_id": 3
}
},
{
"nested": {
"path": [
"branches",
"lists"
],
"query": {
"query_string": {
"fields": [
"branches.countryIso",
"lists.countryIso"
],
"query": "AE KW"
}
}
}
}
]
}
}
}
But neither works.

Develop Reference

node.js excel linux python-3.x azure haskell apache-spark rust .htaccess string

Limit filter by terms elastic search - search

Related

How to do elasticsearch aggregation together with sort and find duplicate values

How to check if all values exists inside nested object elastic search

count of records that have matched conditions inside a bool query

Elasticsearch sorting not working properly based on time

Elastic Search: Matching fields in different nested objects

Categories

Resources