I'd like to get the sum of a sub aggregation. For example, I have group by smartphones, group by carrier and then the average price for that carrier. I'd like to get the sum of all prices for all carriers for a specific smartphone. So essentially, I want something like this:
{
"aggs": {
"group_by_smartphones": {
"terms": {
"field": "smartphone",
"order": {
"_term": "asc"
},
"size": 200
},
"aggs": {
"group_by_sum": {
"sum": {
"field": "price"
},
"aggs": {
"group_by_carrier": {
"terms": {
"field": "carrier",
"order": {
"group_by_avg": "desc"
}
},
"aggs": {
"group_by_avg": {
"avg": {
"field": "price"
}
}
}
}
}
}
}
}
}
}
Except, when I do it like this I get this error:
"type": "aggregation_initialization_exception",
"reason": "Aggregator [group_by_sum] of type [sum] cannot accept sub-aggregations"
How do I fix it so I can get the sum of all prices for each smartphone?
You're almost there, actually the sum and group_by_carrier sub-aggregations both need to be at the same level:
{
"aggs": {
"group_by_smartphones": {
"terms": {
"field": "smartphone",
"order": {
"_term": "asc"
},
"size": 200
},
"aggs": {
"sum_prices": {
"sum": {
"field": "price"
}
},
"group_by_carrier": {
"terms": {
"field": "carrier",
"order": {
"group_by_avg": "desc"
}
},
"aggs": {
"group_by_avg": {
"avg": {
"field": "price"
}
}
}
}
}
}
}
}
Related
I have a field called account_number . It contains random 6 character string.
I can't seem to get python elasticsearch dsl to return just those unique values.
search = Search(using=client, index=index_name).query(
{
"range": {
"date": {
"gte": "2021-08-01T08:00:00.000Z",
"lte": "2021-08-31T23:59:59.599Z"
#"format": "strict_date_optional_time"
}
}
})
search.aggs.bucket("account_number","terms",field="account_number",size="1000")
es_data = search.execute()
Not sure if I need to define the account_number in the query or if its in the agg bucket?. Right now I just get random full rows returned with all columns
Here is an example of a working query in non-dsl form. I didnt think the metric was necessary but maybe it is.
{
"aggs": {
"3": {
"terms": {
"field": "account_number",
"order": {
"1": "desc"
},
"size": 5
},
"aggs": {
"1": {
"sum": {
"field": "hits"
}
}
}
}
},
"size": 0,
"stored_fields": [
"*"
],
"script_fields": {},
"docvalue_fields": [
{
"field": "#timestamp",
"format": "date_time"
},
{
"field": "date",
"format": "date_time"
}
],
"_source": {
"excludes": []
},
"query": {
"bool": {
"must": [],
"filter": [
{
"match_all": {}
},
{
"range": {
"date": {
"gte": "2021-04-08T21:00:00.000Z",
"lte": "2021-10-08T21:00:00.000Z",
"format": "strict_date_optional_time"
}
}
}
]
}
}
}
You can add extra(size=0) to your query:
search = Search(using=client, index=index_name).query(
{
"range": {
"date": {
"gte": "2021-08-01T08:00:00.000Z",
"lte": "2021-08-31T23:59:59.599Z"
#"format": "strict_date_optional_time"
}
}
}).extra(size=0)
Then your es_data will be empty and es_data.aggregations.account_number.buckets will contain only unique account numbers.
Hope it helps.
I want to find duplicate values and if there are duplicate values then I sort based on the last update, so what I take is the newest one, how do I do aggregations? I've tried this aggregation.
I've tried adding sort to sources but it still doesn't work, I've tried several ways but it still fails sometimes it comes out 1 but only old data, sometimes the order is correct from the newest but appears 2 data
{
"size": 0,
"query": {
"bool": {
"must": [
{
"match": {
"BILLING_TYPE_CD": "Service Bundle"
}
},
{
"match": {
"ID": "xxxx"
}
},
{
"exists": {
"field": "LI_MILESTONE"
}
},
{
"exists": {
"field": "LI_SID"
}
},
{
"query_string": {
"default_field": "LI_SID",
"query": "*xxxx*"
}
}
],
"must_not": {
"bool": {
"must": [
{
"query_string": {
"default_field": "LI_PRODUCT_NAME",
"query": "*Network*"
}
},
{
"terms": {
"LI_MILESTONE.keyword": [
"Abandoned",
"Cancelled"
]
}
},
{
"terms": {
"ORDER_STATUS.keyword": [
"Abandoned",
"Cancelled",
"Drop In Progress"
]
}
},
{
"term": {
"STATUS.keyword": ""
}
}
]
}
}
}
},
"sort": [
{
"TGL_CREATED": {
"order": "desc"
}
}
],
"aggs": {
"list_products": {
"composite": {
"size": 50000,
"sources": [
{
"LI_SID": {
"terms": {
"field": "LI_SID.keyword",
"order": "desc"
}
}
}
]
},
"aggs": {
"totalService": {
"terms": {
"field": "LI_SID.keyword",
"size": 50000,
"order": {
"_term": "asc"
}
}
},
"bucket_sort": {
"bucket_sort": {
"from": 0,
"size": 10
}
},
"includes_source": {
"top_hits": {
"size": 1,
"_source": {
"includes": [
"LAST_UPDATE",
"xxxxx",
"xxxxx",
"xxxxx",
"xxx"
]
}
}
}
}
},
"term_product": {
"terms": {
"field": "LI_SID.keyword",
"size": 50000
}
}
}
}
Like this ?
{
"aggs": {
"LI_SID": {
"terms": {
"field": "LI_SID.keyword",
"size": 10
},
"aggs": {
"hit": {
"top_hits": {
"size": 1,
"sort": [
{
"LAST_UPDATE": "desc"
}
]
}
}
}
}
},
"size": 0
}
You need to use aggregations response not hits
Currently building the following Elasticsearch 6.8 query\aggregation:
{
"sort": [
{
"DateCreated": {
"order": "desc"
}
}
],
"query": {
"bool": {
"must": [
{
"match": {
"InternalEntityId": "ExampleValue1111"
}
},
{
"match": {
"Direction": "Inbound"
}
}
]
}
},
"aggs": {
"top_ext": {
"terms": {
"field": "ExternalAddress.keyword"
},
"aggs": {
"top_date": {
"top_hits": {
"sort": [
{
"DateCreated": {
"order": "desc"
}
}
],
"size": 1
}
}
}
}
}
}
How do we perform (in the same search):
Count the sum of (hits per bucket) that have no value (must_not exists style query) PER bucket
Ideally, with the return of the top_ext agg return.. each bucket would have a count of the records that have no value.
Thanks!
Now you can do two things here,
1. Either sort the "top_ext" terms agg bucket by asc order of doc count and you can use the top n zero size buckets here
2. You can apply a bucket selector aggregation in parallel to you inner hits so that only those inner hits will appear that have zero docCounts.
Here is a query dsl that uses both the above approaches.(You can plug in all other required elements of the query, I have focused mainly on the aggregation part here)
GET kibana_sample_data_ecommerce/_search
{
"size": 0,
"aggs": {
"outer": {
"terms": {
"field": "products.category.keyword",
"size": 10,
"order": {
"_count": "asc"
}
},
"aggs": {
"inner": {
"top_hits": {
"size": 10
}
},
"restrictedBuckets": {
"bucket_selector": {
"buckets_path": {
"docCount": "_count"
},
"script": "params.docCount<1"
}
}
}
}
}
}
I have 20 documents and i'm performing aggregation based on reportid. I need top 10 aggregation based on time in descending. But the response is very random. What am i missing? I'm using elasticsearch 6.2.2 and node.js 4.5. Below here is the body search query for elasticsearch request.
{
"size": 0,
"sort": [
{
"triggerDate":
{
"order": "desc"
}
}],
"query":
{
"bool":
{
"must": [
{
"query_string":
{
"query": "*",
"analyze_wildcard": true
}
},
{
"range":
{
"triggerDate":
{
"gte": fromTime,
"lte": toTime
}
}
}
],
"must_not": [
{
"query_string":
{
"query": "reportId.keyword:\"\"",
"analyze_wildcard": true
}
}]
}
},
"_source":
{
"excludes": []
},
"aggs":
{
"reportid":
{
"terms":
{
"field": "reportId.keyword",
"size": 10
}
}
}
I think what you need to do is aggregate on reportId.keyword and sort aggregation by date.
So here is the solution
{
"size": 0,
"query": {
"bool": {
"must": [
{
"query_string": {
"query": "*",
"analyze_wildcard": true
}
},
{
"range": {
"triggerDate": {
"gte": fromTime,
"lte": toTime
}
}
}
],
"must_not": [
{
"query_string": {
"query": "reportId.keyword:\"\"",
"analyze_wildcard": true
}
}
]
}
},
"_source": {
"excludes": []
},
"aggs": {
"reportid": {
"terms": {
"field": "reportId.keyword",
"size": 10,
"order": {
"2-orderAgg": "desc"
}
},
"aggs": {
"2-orderAgg": {
"max": {
"field": "triggerDate"
}
}
}
}
}
}
You need to sort the aggregation results by a custom aggregation and not the query results.
I would put a size limit per terms, 3 retrieve results for the term "tag", 5 results for the term "dossier" and 1 result for the term "personality".
Can i use limit filter or and other solution ?
{
"_source":{
"include":[
"path",
"type"
]
},
"query":{
"bool":{
"should":[
{
"match":{
"title.acp":{
"query":"car",
"boost":10
}
}
},
{
"match":{
"title.acp":{
"query":"car",
"fuzziness":"AUTO",
"prefix_length":3
}
}
}
],
"filter":[
{
"terms":{
"type":[
"tag",
"dossier",
"personality"
]
}
}
]
}
},
"highlight":{
"fields":{
"title.acp":{}
}
}
};
Looks like for a given 'title' you want top x documents for each of the types where 'x' varies with type
One way to do this is use aggregation filter and top-hits in conjunction :
Example :
{
"size": 0,
"query": {
"bool": {
"should": [
{
"match": {
"title.acp": {
"query": "car",
"boost": 10
}
}
},
{
"match": {
"title.acp": {
"query": "car",
"fuzziness": "AUTO",
"prefix_length": 3
}
}
}
],
"filter": [
{
"terms": {
"type": [
"tag",
"dossier",
"personality"
]
}
}
]
}
},
"aggs": {
"tag": {
"filter": {
"term": {
"type": "tag"
}
},
"aggs": {
"tag_top_hits": {
"top_hits": {
"_source": {
"include": [
"path",
"type"
]
},
"size": 3,
"highlight": {
"fields": {
"title.acp": {}
}
}
}
}
}
},
"dossier": {
"filter": {
"term": {
"type": "dossier"
}
},
"aggs": {
"dossier_top_hits": {
"top_hits": {
"_source": {
"include": [
"path",
"type"
]
},
"size": 5,
"highlight": {
"fields": {
"title.acp": {}
}
}
}
}
}
},
"personality": {
"filter": {
"term": {
"type": "personality"
}
},
"aggs": {
"personality_top_hits": {
"top_hits": {
"_source": {
"include": [
"path",
"type"
]
},
"size": 1,
"highlight": {
"fields": {
"title.acp": {}
}
}
}
}
}
}
}
}