Elastic Search: Matching fields in different nested objects - search

I am new to Elastic Search and this is my user index:
{
"user": {
"properties": {
"branches": {
"type": "nested"
},
"lists": {
"type": "nested"
},
"events": {
"type": "nested"
},
"optOuts": {
"type": "nested"
}
}
}
}
Here, branches, events and lists will contain the field id(int),countryIso(String)..
I need to find users having emails who belong to countryIso 'XX' for example.
{
"query": {
"bool": {
"must": [
{
"exists": {
"field": "email"
}
},
{
"match": {
"prog_id": 3
}
},
{
"nested": {
"path": [
"branches"
],
"query": {
"query_string": {
"fields": [
"branches.countryIso"
],
"query": "AE KW"
}
}
}
}
]
}
}
}
This way I can get them if they have that country in the branches object. What I want is that the countryIso is there in the branches or lists or events.
Note: any of these might be empty i.e. branches may not be there or lists miht not be there etc. Or lists might be there with no countryIso..
I tried this:
{
"query": {
"bool": {
"must": [
{
"exists": {
"field": "email"
}
},
{
"match": {
"prog_id": 3
}
},
{
"nested": {
"path": [
"branches"
],
"query": {
"query_string": {
"fields": [
"branches.countryIso"
],
"query": "AE KW"
}
}
}
},
{
"nested": {
"path": [
"lists"
],
"query": {
"query_string": {
"fields": [
"lists.countryIso"
],
"query": "AE KW"
}
}
}
}
]
}
}
}
AND
{
"query": {
"bool": {
"must": [
{
"exists": {
"field": "email"
}
},
{
"match": {
"prog_id": 3
}
},
{
"nested": {
"path": [
"branches",
"lists"
],
"query": {
"query_string": {
"fields": [
"branches.countryIso",
"lists.countryIso"
],
"query": "AE KW"
}
}
}
}
]
}
}
}
But neither works.

Related

How to do elasticsearch aggregation together with sort and find duplicate values

I want to find duplicate values and if there are duplicate values then I sort based on the last update, so what I take is the newest one, how do I do aggregations? I've tried this aggregation.
I've tried adding sort to sources but it still doesn't work, I've tried several ways but it still fails sometimes it comes out 1 but only old data, sometimes the order is correct from the newest but appears 2 data
{
"size": 0,
"query": {
"bool": {
"must": [
{
"match": {
"BILLING_TYPE_CD": "Service Bundle"
}
},
{
"match": {
"ID": "xxxx"
}
},
{
"exists": {
"field": "LI_MILESTONE"
}
},
{
"exists": {
"field": "LI_SID"
}
},
{
"query_string": {
"default_field": "LI_SID",
"query": "*xxxx*"
}
}
],
"must_not": {
"bool": {
"must": [
{
"query_string": {
"default_field": "LI_PRODUCT_NAME",
"query": "*Network*"
}
},
{
"terms": {
"LI_MILESTONE.keyword": [
"Abandoned",
"Cancelled"
]
}
},
{
"terms": {
"ORDER_STATUS.keyword": [
"Abandoned",
"Cancelled",
"Drop In Progress"
]
}
},
{
"term": {
"STATUS.keyword": ""
}
}
]
}
}
}
},
"sort": [
{
"TGL_CREATED": {
"order": "desc"
}
}
],
"aggs": {
"list_products": {
"composite": {
"size": 50000,
"sources": [
{
"LI_SID": {
"terms": {
"field": "LI_SID.keyword",
"order": "desc"
}
}
}
]
},
"aggs": {
"totalService": {
"terms": {
"field": "LI_SID.keyword",
"size": 50000,
"order": {
"_term": "asc"
}
}
},
"bucket_sort": {
"bucket_sort": {
"from": 0,
"size": 10
}
},
"includes_source": {
"top_hits": {
"size": 1,
"_source": {
"includes": [
"LAST_UPDATE",
"xxxxx",
"xxxxx",
"xxxxx",
"xxx"
]
}
}
}
}
},
"term_product": {
"terms": {
"field": "LI_SID.keyword",
"size": 50000
}
}
}
}
Like this ?
{
"aggs": {
"LI_SID": {
"terms": {
"field": "LI_SID.keyword",
"size": 10
},
"aggs": {
"hit": {
"top_hits": {
"size": 1,
"sort": [
{
"LAST_UPDATE": "desc"
}
]
}
}
}
}
},
"size": 0
}
You need to use aggregations response not hits

count of records that have matched conditions inside a bool query

I have an ES query along the lines of (condition1 or condition2 or condition3....) and otherConditions.
Each condition inside the brackets is a 'must' clause that searches for all documents that match a given name, location and product.
GET index/type/_count
{
"query": {
"bool": {
"should": [
{
"bool": {
"must": [
{
"term": {
"NAME": {
"value": "name1"
}
}
},
{
"term": {
"PRODUCT": {
"value": "product1"
}
}
},
{
"term": {
"LOCATION": {
"value": "location1"
}
}
}
]
}
},
{
"bool": {
"must": [
{
"term": {
"NAME": {
"value": "name2"
}
}
},
{
"term": {
"PRODUCT": {
"value": "product2"
}
}
},
{
"term": {
"LOCATION": {
"value": "location2"
}
}
}
]
}
}
],
"must_not": [
{
"exists": {
"field": "some other condition"
}
}
],
"must": [
{
"term": {
"somefield": "value"
}
},
{
"range": {
"time": {
"gte": "now-6M"
}
}
}
]
}
}
}
Is it possible to get count of records that matches each of the 'must' clause inside the 'should' clause instead of an overall count using one query?
Yes, you can do it using aggregations, in particular filter aggregation. The query might look like this:
POST index/type/_search
{
"query": {
"bool": {
"should": [
"<clause1>",
"<clause2>"
],
"must_not": [
"<mustNotClause3>"
],
"must": [
"<mustClause4>"
]
}
},
"aggs": {
"clause1": {
"filter": "<clause1>"
},
"clause2": {
"filter": "<clause2>"
}
}
}
Note that we are using _search API here. If you don't need search results, you can set size: 0, this will return you only total count and the aggregations.
In your case the query will literally be this:
POST index/type/_search
{
"query": {
"bool": {
"should": [
{
"bool": {
"must": [
{
"term": {
"NAME": {
"value": "name1"
}
}
},
{
"term": {
"PRODUCT": {
"value": "product1"
}
}
},
{
"term": {
"LOCATION": {
"value": "location1"
}
}
}
]
}
},
{
"bool": {
"must": [
{
"term": {
"NAME": {
"value": "name2"
}
}
},
{
"term": {
"PRODUCT": {
"value": "product2"
}
}
},
{
"term": {
"LOCATION": {
"value": "location2"
}
}
}
]
}
}
],
"must_not": [
{
"exists": {
"field": "some other condition"
}
}
],
"must": [
{
"term": {
"somefield": "value"
}
},
{
"range": {
"time": {
"gte": "now-6M"
}
}
}
]
}
},
"aggs": {
"clause1": {
"filter": {
"bool": {
"must": [
{
"term": {
"NAME": {
"value": "name1"
}
}
},
{
"term": {
"PRODUCT": {
"value": "product1"
}
}
},
{
"term": {
"LOCATION": {
"value": "location1"
}
}
}
]
}
}
},
"clause2": {
"filter": {
"bool": {
"must": [
{
"term": {
"NAME": {
"value": "name2"
}
}
},
{
"term": {
"PRODUCT": {
"value": "product2"
}
}
},
{
"term": {
"LOCATION": {
"value": "location2"
}
}
}
]
}
}
}
}
}
Note that sum of counts of aggregations clause1 and clause2 may be greater than total count.
Hope that helps!

Elasticsearch sorting not working properly based on time

I have 20 documents and i'm performing aggregation based on reportid. I need top 10 aggregation based on time in descending. But the response is very random. What am i missing? I'm using elasticsearch 6.2.2 and node.js 4.5. Below here is the body search query for elasticsearch request.
{
"size": 0,
"sort": [
{
"triggerDate":
{
"order": "desc"
}
}],
"query":
{
"bool":
{
"must": [
{
"query_string":
{
"query": "*",
"analyze_wildcard": true
}
},
{
"range":
{
"triggerDate":
{
"gte": fromTime,
"lte": toTime
}
}
}
],
"must_not": [
{
"query_string":
{
"query": "reportId.keyword:\"\"",
"analyze_wildcard": true
}
}]
}
},
"_source":
{
"excludes": []
},
"aggs":
{
"reportid":
{
"terms":
{
"field": "reportId.keyword",
"size": 10
}
}
}
I think what you need to do is aggregate on reportId.keyword and sort aggregation by date.
So here is the solution
{
"size": 0,
"query": {
"bool": {
"must": [
{
"query_string": {
"query": "*",
"analyze_wildcard": true
}
},
{
"range": {
"triggerDate": {
"gte": fromTime,
"lte": toTime
}
}
}
],
"must_not": [
{
"query_string": {
"query": "reportId.keyword:\"\"",
"analyze_wildcard": true
}
}
]
}
},
"_source": {
"excludes": []
},
"aggs": {
"reportid": {
"terms": {
"field": "reportId.keyword",
"size": 10,
"order": {
"2-orderAgg": "desc"
}
},
"aggs": {
"2-orderAgg": {
"max": {
"field": "triggerDate"
}
}
}
}
}
}
You need to sort the aggregation results by a custom aggregation and not the query results.

How to use AND operator in elasticsearch to query nested fields?

I have 2 documents in elasticsearch in the below structure:
Document 1:
{
"specification": [
{
"name": "Processor",
"value": "Intel"
},
{
"name": "RAM",
"value": "2GB"
}
]
}
Document 2:
{
"specification": [
{
"name": "Processor",
"value": "Intel"
},
{
"name": "RAM",
"value": "3GB"
}
]
}
I want to get the document that have a specification with values intel and 2GB (i.e) 1st document. But when i try to use must (AND operator) i am getting nothing. If i use should (OR operator) i am getting both the documents. Can anyone help me on this? Below is my query..
{
"query": {
"nested": {
"path": "specification",
"query": {
"bool": {
"must": [
{
"bool": {
"must": [
{ "match": { "specification.name": "Processor" }},
{ "match": { "specifications.value": "Intel" }}
]
}
},
{
"bool": {
"must": [
{ "match": { "specification.name": "RAM" }},
{ "match": { "specifications.value": "2GB" }}
]
}
}
]
}
}
}
}
}
Try this one:
{
"query": {
"bool": {
"must": [
{
"nested": {
"path": "specification",
"query": {
"bool": {
"must": [
{
"match": {
"specification.name": "Processor"
}
},
{
"match": {
"specification.value": "Intel"
}
}
]
}
}
}
},
{
"nested": {
"path": "specification",
"query": {
"bool": {
"must": [
{
"match": {
"specification.name": "RAM"
}
},
{
"match": {
"specification.value": "2GB"
}
}
]
}
}
}
}
]
}
}
}

Limit filter by terms elastic search

I would put a size limit per terms, 3 retrieve results for the term "tag", 5 results for the term "dossier" and 1 result for the term "personality".
Can i use limit filter or and other solution ?
{
"_source":{
"include":[
"path",
"type"
]
},
"query":{
"bool":{
"should":[
{
"match":{
"title.acp":{
"query":"car",
"boost":10
}
}
},
{
"match":{
"title.acp":{
"query":"car",
"fuzziness":"AUTO",
"prefix_length":3
}
}
}
],
"filter":[
{
"terms":{
"type":[
"tag",
"dossier",
"personality"
]
}
}
]
}
},
"highlight":{
"fields":{
"title.acp":{}
}
}
};
Looks like for a given 'title' you want top x documents for each of the types where 'x' varies with type
One way to do this is use aggregation filter and top-hits in conjunction :
Example :
{
"size": 0,
"query": {
"bool": {
"should": [
{
"match": {
"title.acp": {
"query": "car",
"boost": 10
}
}
},
{
"match": {
"title.acp": {
"query": "car",
"fuzziness": "AUTO",
"prefix_length": 3
}
}
}
],
"filter": [
{
"terms": {
"type": [
"tag",
"dossier",
"personality"
]
}
}
]
}
},
"aggs": {
"tag": {
"filter": {
"term": {
"type": "tag"
}
},
"aggs": {
"tag_top_hits": {
"top_hits": {
"_source": {
"include": [
"path",
"type"
]
},
"size": 3,
"highlight": {
"fields": {
"title.acp": {}
}
}
}
}
}
},
"dossier": {
"filter": {
"term": {
"type": "dossier"
}
},
"aggs": {
"dossier_top_hits": {
"top_hits": {
"_source": {
"include": [
"path",
"type"
]
},
"size": 5,
"highlight": {
"fields": {
"title.acp": {}
}
}
}
}
}
},
"personality": {
"filter": {
"term": {
"type": "personality"
}
},
"aggs": {
"personality_top_hits": {
"top_hits": {
"_source": {
"include": [
"path",
"type"
]
},
"size": 1,
"highlight": {
"fields": {
"title.acp": {}
}
}
}
}
}
}
}
}

Resources