Getting sum sub aggregation

Getting sum sub aggregation - search

I'd like to get the sum of a sub aggregation. For example, I have group by smartphones, group by carrier and then the average price for that carrier. I'd like to get the sum of all prices for all carriers for a specific smartphone. So essentially, I want something like this:
{
"aggs": {
"group_by_smartphones": {
"terms": {
"field": "smartphone",
"order": {
"_term": "asc"
},
"size": 200
},
"aggs": {
"group_by_sum": {
"sum": {
"field": "price"
},
"aggs": {
"group_by_carrier": {
"terms": {
"field": "carrier",
"order": {
"group_by_avg": "desc"
}
},
"aggs": {
"group_by_avg": {
"avg": {
"field": "price"
}
}
}
}
}
}
}
}
}
}
Except, when I do it like this I get this error:
"type": "aggregation_initialization_exception",
"reason": "Aggregator [group_by_sum] of type [sum] cannot accept sub-aggregations"
How do I fix it so I can get the sum of all prices for each smartphone?

You're almost there, actually the sum and group_by_carrier sub-aggregations both need to be at the same level:
{
"aggs": {
"group_by_smartphones": {
"terms": {
"field": "smartphone",
"order": {
"_term": "asc"
},
"size": 200
},
"aggs": {
"sum_prices": {
"sum": {
"field": "price"
}
},
"group_by_carrier": {
"terms": {
"field": "carrier",
"order": {
"group_by_avg": "desc"
}
},
"aggs": {
"group_by_avg": {
"avg": {
"field": "price"
}
}
}
}
}
}
}
}

Related

python elasticsearch-dsl return all unique values for specific key

I have a field called account_number . It contains random 6 character string.
I can't seem to get python elasticsearch dsl to return just those unique values.
search = Search(using=client, index=index_name).query(
{
"range": {
"date": {
"gte": "2021-08-01T08:00:00.000Z",
"lte": "2021-08-31T23:59:59.599Z"
#"format": "strict_date_optional_time"
}
}
})
search.aggs.bucket("account_number","terms",field="account_number",size="1000")
es_data = search.execute()
Not sure if I need to define the account_number in the query or if its in the agg bucket?. Right now I just get random full rows returned with all columns
Here is an example of a working query in non-dsl form. I didnt think the metric was necessary but maybe it is.
{
"aggs": {
"3": {
"terms": {
"field": "account_number",
"order": {
"1": "desc"
},
"size": 5
},
"aggs": {
"1": {
"sum": {
"field": "hits"
}
}
}
}
},
"size": 0,
"stored_fields": [
"*"
],
"script_fields": {},
"docvalue_fields": [
{
"field": "#timestamp",
"format": "date_time"
},
{
"field": "date",
"format": "date_time"
}
],
"_source": {
"excludes": []
},
"query": {
"bool": {
"must": [],
"filter": [
{
"match_all": {}
},
{
"range": {
"date": {
"gte": "2021-04-08T21:00:00.000Z",
"lte": "2021-10-08T21:00:00.000Z",
"format": "strict_date_optional_time"
}
}
}
]
}
}
}

You can add extra(size=0) to your query:
search = Search(using=client, index=index_name).query(
{
"range": {
"date": {
"gte": "2021-08-01T08:00:00.000Z",
"lte": "2021-08-31T23:59:59.599Z"
#"format": "strict_date_optional_time"
}
}
}).extra(size=0)
Then your es_data will be empty and es_data.aggregations.account_number.buckets will contain only unique account numbers.
Hope it helps.

How to do elasticsearch aggregation together with sort and find duplicate values

I want to find duplicate values and if there are duplicate values then I sort based on the last update, so what I take is the newest one, how do I do aggregations? I've tried this aggregation.
I've tried adding sort to sources but it still doesn't work, I've tried several ways but it still fails sometimes it comes out 1 but only old data, sometimes the order is correct from the newest but appears 2 data
{
"size": 0,
"query": {
"bool": {
"must": [
{
"match": {
"BILLING_TYPE_CD": "Service Bundle"
}
},
{
"match": {
"ID": "xxxx"
}
},
{
"exists": {
"field": "LI_MILESTONE"
}
},
{
"exists": {
"field": "LI_SID"
}
},
{
"query_string": {
"default_field": "LI_SID",
"query": "*xxxx*"
}
}
],
"must_not": {
"bool": {
"must": [
{
"query_string": {
"default_field": "LI_PRODUCT_NAME",
"query": "*Network*"
}
},
{
"terms": {
"LI_MILESTONE.keyword": [
"Abandoned",
"Cancelled"
]
}
},
{
"terms": {
"ORDER_STATUS.keyword": [
"Abandoned",
"Cancelled",
"Drop In Progress"
]
}
},
{
"term": {
"STATUS.keyword": ""
}
}
]
}
}
}
},
"sort": [
{
"TGL_CREATED": {
"order": "desc"
}
}
],
"aggs": {
"list_products": {
"composite": {
"size": 50000,
"sources": [
{
"LI_SID": {
"terms": {
"field": "LI_SID.keyword",
"order": "desc"
}
}
}
]
},
"aggs": {
"totalService": {
"terms": {
"field": "LI_SID.keyword",
"size": 50000,
"order": {
"_term": "asc"
}
}
},
"bucket_sort": {
"bucket_sort": {
"from": 0,
"size": 10
}
},
"includes_source": {
"top_hits": {
"size": 1,
"_source": {
"includes": [
"LAST_UPDATE",
"xxxxx",
"xxxxx",
"xxxxx",
"xxx"
]
}
}
}
}
},
"term_product": {
"terms": {
"field": "LI_SID.keyword",
"size": 50000
}
}
}
}

Like this ?
{
"aggs": {
"LI_SID": {
"terms": {
"field": "LI_SID.keyword",
"size": 10
},
"aggs": {
"hit": {
"top_hits": {
"size": 1,
"sort": [
{
"LAST_UPDATE": "desc"
}
]
}
}
}
}
},
"size": 0
}
You need to use aggregations response not hits

How to perform sub aggregation that will calculate fields with no value per bucket?

Currently building the following Elasticsearch 6.8 query\aggregation:
{
"sort": [
{
"DateCreated": {
"order": "desc"
}
}
],
"query": {
"bool": {
"must": [
{
"match": {
"InternalEntityId": "ExampleValue1111"
}
},
{
"match": {
"Direction": "Inbound"
}
}
]
}
},
"aggs": {
"top_ext": {
"terms": {
"field": "ExternalAddress.keyword"
},
"aggs": {
"top_date": {
"top_hits": {
"sort": [
{
"DateCreated": {
"order": "desc"
}
}
],
"size": 1
}
}
}
}
}
}
How do we perform (in the same search):
Count the sum of (hits per bucket) that have no value (must_not exists style query) PER bucket
Ideally, with the return of the top_ext agg return.. each bucket would have a count of the records that have no value.
Thanks!

Now you can do two things here,
1. Either sort the "top_ext" terms agg bucket by asc order of doc count and you can use the top n zero size buckets here
2. You can apply a bucket selector aggregation in parallel to you inner hits so that only those inner hits will appear that have zero docCounts.
Here is a query dsl that uses both the above approaches.(You can plug in all other required elements of the query, I have focused mainly on the aggregation part here)
GET kibana_sample_data_ecommerce/_search
{
"size": 0,
"aggs": {
"outer": {
"terms": {
"field": "products.category.keyword",
"size": 10,
"order": {
"_count": "asc"
}
},
"aggs": {
"inner": {
"top_hits": {
"size": 10
}
},
"restrictedBuckets": {
"bucket_selector": {
"buckets_path": {
"docCount": "_count"
},
"script": "params.docCount<1"
}
}
}
}
}
}

Elasticsearch sorting not working properly based on time

I have 20 documents and i'm performing aggregation based on reportid. I need top 10 aggregation based on time in descending. But the response is very random. What am i missing? I'm using elasticsearch 6.2.2 and node.js 4.5. Below here is the body search query for elasticsearch request.
{
"size": 0,
"sort": [
{
"triggerDate":
{
"order": "desc"
}
}],
"query":
{
"bool":
{
"must": [
{
"query_string":
{
"query": "*",
"analyze_wildcard": true
}
},
{
"range":
{
"triggerDate":
{
"gte": fromTime,
"lte": toTime
}
}
}
],
"must_not": [
{
"query_string":
{
"query": "reportId.keyword:\"\"",
"analyze_wildcard": true
}
}]
}
},
"_source":
{
"excludes": []
},
"aggs":
{
"reportid":
{
"terms":
{
"field": "reportId.keyword",
"size": 10
}
}
}

I think what you need to do is aggregate on reportId.keyword and sort aggregation by date.
So here is the solution
{
"size": 0,
"query": {
"bool": {
"must": [
{
"query_string": {
"query": "*",
"analyze_wildcard": true
}
},
{
"range": {
"triggerDate": {
"gte": fromTime,
"lte": toTime
}
}
}
],
"must_not": [
{
"query_string": {
"query": "reportId.keyword:\"\"",
"analyze_wildcard": true
}
}
]
}
},
"_source": {
"excludes": []
},
"aggs": {
"reportid": {
"terms": {
"field": "reportId.keyword",
"size": 10,
"order": {
"2-orderAgg": "desc"
}
},
"aggs": {
"2-orderAgg": {
"max": {
"field": "triggerDate"
}
}
}
}
}
}
You need to sort the aggregation results by a custom aggregation and not the query results.

Limit filter by terms elastic search

I would put a size limit per terms, 3 retrieve results for the term "tag", 5 results for the term "dossier" and 1 result for the term "personality".
Can i use limit filter or and other solution ?
{
"_source":{
"include":[
"path",
"type"
]
},
"query":{
"bool":{
"should":[
{
"match":{
"title.acp":{
"query":"car",
"boost":10
}
}
},
{
"match":{
"title.acp":{
"query":"car",
"fuzziness":"AUTO",
"prefix_length":3
}
}
}
],
"filter":[
{
"terms":{
"type":[
"tag",
"dossier",
"personality"
]
}
}
]
}
},
"highlight":{
"fields":{
"title.acp":{}
}
}
};

Looks like for a given 'title' you want top x documents for each of the types where 'x' varies with type
One way to do this is use aggregation filter and top-hits in conjunction :
Example :
{
"size": 0,
"query": {
"bool": {
"should": [
{
"match": {
"title.acp": {
"query": "car",
"boost": 10
}
}
},
{
"match": {
"title.acp": {
"query": "car",
"fuzziness": "AUTO",
"prefix_length": 3
}
}
}
],
"filter": [
{
"terms": {
"type": [
"tag",
"dossier",
"personality"
]
}
}
]
}
},
"aggs": {
"tag": {
"filter": {
"term": {
"type": "tag"
}
},
"aggs": {
"tag_top_hits": {
"top_hits": {
"_source": {
"include": [
"path",
"type"
]
},
"size": 3,
"highlight": {
"fields": {
"title.acp": {}
}
}
}
}
}
},
"dossier": {
"filter": {
"term": {
"type": "dossier"
}
},
"aggs": {
"dossier_top_hits": {
"top_hits": {
"_source": {
"include": [
"path",
"type"
]
},
"size": 5,
"highlight": {
"fields": {
"title.acp": {}
}
}
}
}
}
},
"personality": {
"filter": {
"term": {
"type": "personality"
}
},
"aggs": {
"personality_top_hits": {
"top_hits": {
"_source": {
"include": [
"path",
"type"
]
},
"size": 1,
"highlight": {
"fields": {
"title.acp": {}
}
}
}
}
}
}
}
}

Develop Reference

node.js excel linux python-3.x azure haskell apache-spark rust .htaccess string

Getting sum sub aggregation - search

Related

python elasticsearch-dsl return all unique values for specific key

How to do elasticsearch aggregation together with sort and find duplicate values

How to perform sub aggregation that will calculate fields with no value per bucket?

Elasticsearch sorting not working properly based on time

Limit filter by terms elastic search

Categories

Resources