Elasticsearch aggregate by geo location

Elasticsearch aggregate by geo location - search

I want to group data by their location with shapes and bounding boxes. For example group the results by hemisphere:
GET cdc/_search
{
"from": 0,
"size": 0,
"query": {
"match_all": {}
},
"aggs": {
"group_by_geo": {
"geo_bounding_box": {
"field": "location",
"boxes": [
{
"top_left": {
"lat": 90,
"lon": -180
},
"bottom_right": {
"lat": 0,
"lon": 0
}
},
{
"top_left": {
"lat": 90,
"lon": 0
},
"bottom_right": {
"lat": 0,
"lon": 180
}
},
{
"top_left": {
"lat": 0,
"lon": -180
},
"bottom_right": {
"lat": -90,
"lon": 0
}
},
{
"top_left": {
"lat": 0,
"lon": 0
},
"bottom_right": {
"lat": -90,
"lon": 180
}
}
]
},
"aggs": {
"over_time": {
"date_histogram": {
"field": "date",
"interval": "day",
"format": "yyyy-MM-dd"
},
"aggs": {
"AvgTemp": {
"avg": {
"field": "hits"
}
}
}
}
}
}
}
}
So the over_timepart works fine isolated but this query gives me "reason": "Could not find aggregator type [geo_bounding_box] in [group_by_geo]"
My syntax is inspired by range aggregations but apparently this is not working in this case.
Is this possible or do I have to make separate queries filtering by the 4 boxes?

There's no geo_bounding_box aggregation, (there's a geo_bounding_box filter, though).
You can achieve what you need using a filters aggregation containing one geo_bounding_box filter for each bounding box. Basically something like this:
{
"from": 0,
"size": 0,
"query": {
"match_all": {}
},
"aggs": {
"group_by_geo": {
"filters": {
"filters": {
"box1": {
"geo_bounding_box": {
"location": {
"top_left": {
"lat": 90,
"lon": -180
},
"bottom_right": {
"lat": 0,
"lon": 0
}
}
}
},
"box2": {
"geo_bounding_box": {
"location": {
"top_left": {
"lat": 90,
"lon": 0
},
"bottom_right": {
"lat": 0,
"lon": 180
}
}
}
},
"box3": {
"geo_bounding_box": {
"location": {
"top_left": {
"lat": 0,
"lon": -180
},
"bottom_right": {
"lat": -90,
"lon": 0
}
}
}
},
"box4": {
"geo_bounding_box": {
"location": {
"top_left": {
"lat": 0,
"lon": 0
},
"bottom_right": {
"lat": -90,
"lon": 180
}
}
}
}
}
},
"aggs": {
"over_time": {
"date_histogram": {
"field": "date",
"interval": "day",
"format": "yyyy-MM-dd"
},
"aggs": {
"AvgTemp": {
"avg": {
"field": "hits"
}
}
}
}
}
}
}
}

Related

Opensearch transform splitting array values to new events

I'm transforming index that contains following event.
But the values inside of array are splitting into the new events.
e.g:
"serviceIdentifiers": "Redis"
"serviceIdentifiers":"Event_Detector Servicc"
etc.
{
"_index": "collated_txn_health_2022.05",
"_type": "_doc",
"_id": "LAUpboIBh6CUatILrsN3",
"_score": 1,
"_source": {
"timeInGMT": 0,
"kpiId": 0,
"compInstanceIdentifier": "d0352b7d-0484-4714-bbc8-eb67cbb7be70",
"agentIdentifier": "ComponentAgent-171",
"kpiIdentifier": "PACKETS_DROPPED",
"categoryIdentifier": "Network Utilization",
"applicationIdentifier": null,
"serviceIdentifiers": [
"Supervisor_Controller Service",
"Event_Detector Service",
"UI_Service",
"Redis",
"CC_Service"
],
"clusterIdentifiers": [
"a5c57ef5-4018-41b8-b727-27c8f8376c0e"
],
"collectionInterval": 60,
"value": "0.0",
"kpiType": "Core",
"groupAttribute": "ALL",
"groupIdentifier": null,
"watcherValue": null,
"errorCode": null,
"clusterOperation": null,
"aggLevelInMins": 1,
"error": false,
"kpiGroup": false,
"discovery": false,
"maintenanceExcluded": false,
"#timestamp": "2022-05-01T01:32:00.000Z"
}
Following is the transform job configuration.
curl -u admin:admin -XPUT "http://XXX.XXX.XX.XXX9201/_plugins/_transform/my-array-job-2" -H 'Content-type: application/json' -d'
{
"transform": {
"schedule": {
"interval": {
"start_time": 1659705000000,
"period": 1,
"unit": "Minutes"
}
},
"metadata_id": null,
"updated_at": 1659456180000,
"enabled": true,
"enabled_at": 1659457620000,
"description": "",
"source_index": "collated_txn_health_2022.05",
"data_selection_query": {
"match_all": {
"boost": 1
}
},
"target_index": "transform_collated_txn_health_2022.05",
"page_size": 1000,
"groups": [
{
"date_histogram": {
"fixed_interval": "1m",
"source_field": "#timestamp",
"target_field": "#timestamp",
"timezone": "Asia/Calcutta"
}
},
{
"terms": {
"source_field": "clusterIdentifiers",
"target_field": "clusterIdentifiers"
}
},
{
"terms": {
"source_field": "serviceIdentifiers",
"target_field": "serviceIdentifiers"
}
},
{
"terms": {
"source_field": "compInstanceIdentifier",
"target_field": "compInstanceIdentifier"
}
},
{
"terms": {
"source_field": "agentIdentifier",
"target_field": "agentIdentifier"
}
}
],
"aggregations": {
"count_#timestamp": {
"value_count": {
"field": "#timestamp"
}
}
}
}
}'
Following are the events from the transform index.
{
"_index": "transform_heal_collated_txn_health_2022.05",
"_type": "_doc",
"_id": "ybK0McQ9NZrt9xdo9iWKbA",
"_score": 1,
"_source": {
"transform._id": "my-array-job-2",
"transform._doc_count": 2,
"#timestamp": 1651365120000,
"clusterIdentifiers": "a5c57ef5-4018-41b8-b727-27c8f8376c0e",
"serviceIdentifiers": "Redis",
"compInstanceIdentifier": "a5c57ef5-4018-41b8-b727-27c8f8376c0e",
"agentIdentifier": "ComponentAgent-170",
"count_#timestamp": 2
}
},
{
"_index": "transform_heal_collated_txn_health_2022.05",
"_type": "_doc",
"_id": "Wf-4KwnFaYuw9bL-V-9WEQ",
"_score": 1,
"_source": {
"transform._id": "my-array-job-2",
"transform._doc_count": 2,
"#timestamp": 1651365120000,
"clusterIdentifiers": "a5c57ef5-4018-41b8-b727-27c8f8376c0e",
"serviceIdentifiers": "Redis_Server Service",
"compInstanceIdentifier": "a5c57ef5-4018-41b8-b727-27c8f8376c0e",
"agentIdentifier": "ComponentAgent-170",
"count_#timestamp": 2
}
It would be a great help if somebody suggest me with solution for array fields.

Have solved the issue with following painless script. Which help to transform array fields in opensearch.
PUT _plugins/_transform/my-array-job-2
{
"transform": {
"schedule": {
"interval": {
"start_time": 1659705000000,
"period": 1,
"unit": "Minutes"
}
},
"metadata_id": null,
"updated_at": 1659456180000,
"enabled": true,
"enabled_at": 1659457620000,
"description": "",
"source_index": "heal_collated_txn_heal_health_2022.05_reindex",
"target_index": "transform_heal_collated_txn_heal_health_2022.05",
"page_size": 1000,
"groups": [
{
"date_histogram": {
"fixed_interval": "1m",
"source_field": "#timestamp",
"target_field": "#timestamp",
"timezone": "Asia/Calcutta"
}
},
{
"terms": {
"source_field": "kpiIdentifier",
"target_field": "kpiIdentifier"
}
},
{
"terms": {
"source_field": "clusterIdentifiers",
"target_field": "clusterIdentifiers"
}
}
],
"aggregations": {
"count_#timestamp": {
"value_count": {
"field": "#timestamp"
}
},
"count_agentIdentifier": {
"value_count": {
"field": "agentIdentifier"
}
},
"sum_value": {
"sum": {
"field": "value"
}
},
"max_value": {
"max": {
"field": "value"
}
},
"avg_value": {
"avg": {
"field": "value"
}
},
"count_value": {
"value_count": {
"field": "value"
}
},
"percentiles_value": {
"percentiles": {
"field": "value",
"percents": [
95
],
"keyed": true,
"tdigest": {
"compression": 100
}
}
},
"serviceIdentifiers": {
"scripted_metric": {
"init_script": "state.docs = []",
"map_script": """
Map span = [
'url':doc['serviceIdentifiers']
];
state.docs.add(span)
""",
"combine_script": "return state.docs;",
"reduce_script": """
def all_docs = [];
for (s in states) {
for (span in s) {
all_docs.add(span);
}
}
def size = all_docs.size();
def serviceIdentifiers_1 = all_docs[0]['url'];
def ret = new HashMap();
ret['serviceIdentifiers'] = serviceIdentifiers_1;
return ret;
"""
}
}
}
}
}

How to do elasticsearch aggregation together with sort and find duplicate values

I want to find duplicate values and if there are duplicate values then I sort based on the last update, so what I take is the newest one, how do I do aggregations? I've tried this aggregation.
I've tried adding sort to sources but it still doesn't work, I've tried several ways but it still fails sometimes it comes out 1 but only old data, sometimes the order is correct from the newest but appears 2 data
{
"size": 0,
"query": {
"bool": {
"must": [
{
"match": {
"BILLING_TYPE_CD": "Service Bundle"
}
},
{
"match": {
"ID": "xxxx"
}
},
{
"exists": {
"field": "LI_MILESTONE"
}
},
{
"exists": {
"field": "LI_SID"
}
},
{
"query_string": {
"default_field": "LI_SID",
"query": "*xxxx*"
}
}
],
"must_not": {
"bool": {
"must": [
{
"query_string": {
"default_field": "LI_PRODUCT_NAME",
"query": "*Network*"
}
},
{
"terms": {
"LI_MILESTONE.keyword": [
"Abandoned",
"Cancelled"
]
}
},
{
"terms": {
"ORDER_STATUS.keyword": [
"Abandoned",
"Cancelled",
"Drop In Progress"
]
}
},
{
"term": {
"STATUS.keyword": ""
}
}
]
}
}
}
},
"sort": [
{
"TGL_CREATED": {
"order": "desc"
}
}
],
"aggs": {
"list_products": {
"composite": {
"size": 50000,
"sources": [
{
"LI_SID": {
"terms": {
"field": "LI_SID.keyword",
"order": "desc"
}
}
}
]
},
"aggs": {
"totalService": {
"terms": {
"field": "LI_SID.keyword",
"size": 50000,
"order": {
"_term": "asc"
}
}
},
"bucket_sort": {
"bucket_sort": {
"from": 0,
"size": 10
}
},
"includes_source": {
"top_hits": {
"size": 1,
"_source": {
"includes": [
"LAST_UPDATE",
"xxxxx",
"xxxxx",
"xxxxx",
"xxx"
]
}
}
}
}
},
"term_product": {
"terms": {
"field": "LI_SID.keyword",
"size": 50000
}
}
}
}

Like this ?
{
"aggs": {
"LI_SID": {
"terms": {
"field": "LI_SID.keyword",
"size": 10
},
"aggs": {
"hit": {
"top_hits": {
"size": 1,
"sort": [
{
"LAST_UPDATE": "desc"
}
]
}
}
}
}
},
"size": 0
}
You need to use aggregations response not hits

How to calculate total for each token in Elasticsearch

I have a request into Elastic
{
"query":{
"bool":{
"must":[
{
"query_string":{
"query":"something1 OR something2 OR something3",
"default_operator":"OR"
}
}
],
"filter":{
"range":{
"time":{
"gte":date
}
}
}
}
}
}
I wanna calculate count for each token in all documents using elastic search in one request, for example:
something1: 26 documents
something2: 12 documents
something3: 1 documents

Assuming that the tokens are not akin to enumerations (i.e. constrained set of specific values, like state names, which would make a terms aggregation your best bet with the right mapping), I think the closest thing to what you want would be to use filters aggregation:
POST your-index/_search
{
"query":{
"bool":{
"must":[
{
"query_string":{
"query":"something1 OR something2 OR something3",
"default_operator":"OR"
}
}
],
"filter":{
"range":{
"time":{
"gte":date
}
}
}
}
},
"aggs": {
"token_doc_counts": {
"filters" : {
"filters" : {
"something1" : {
"bool": {
"must": { "query_string" : { "query" : "something1" } },
"filter": { "range": { "time": { "gte": date } } }
}
},
"something2" : {
"bool": {
"must": { "query_string" : { "query" : "something2" } },
"filter": { "range": { "time": { "gte": date } } }
}
},
"something3" : {
"bool": {
"must": { "query_string" : { "query" : "something3" } },
"filter": { "range": { "time": { "gte": date } } }
}
}
}
}
}
}
}
The response would look something like:
{
"took": 9,
"timed_out": false,
"_shards": ...,
"hits": ...,
"aggregations": {
"token_doc_counts": {
"buckets": {
"something1": {
"doc_count": 1
},
"something2": {
"doc_count": 2
},
"something3": {
"doc_count": 3
}
}
}
}
}

You can split your query into filters aggregation of three filters. For reference look here: https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-filters-aggregation.html

What you would need to do, is to create a Copy_To field and have the mapping as shown below.
Depending on the fields that your query_string queries, you need to include some or all of the fields with copy_to field.
By default query_string searches all the fields, so you may need to specify copy_to for all the fields as shown in below mapping, where for sake of simplicity, I've created only three fields, title, field_2 and a third field content which would act as copied to field.
Mapping
PUT <your_index_name>
{
"mappings": {
"mydocs": {
"properties": {
"title": {
"type": "text",
"copy_to": "content"
},
"field_2": {
"type": "text",
"copy_to": "content"
},
"content": {
"type": "text",
"fielddata": true
}
}
}
}
}
Sample Documents
POST <your_index_name>/mydocs/1
{
"title": "something1",
"field_2": "something2"
}
POST <your_index_name>/mydocs/2
{
"title": "something2",
"field_2": "something3"
}
Query:
You'd get the required document counts for the each and every token using the below aggregation query and I've made use of Terms Aggregation:
POST <your_index_name>/_search
{
"size": 0,
"query": {
"query_string": {
"query": "something1 OR something2 OR something3"
}
},
"aggs": {
"myaggs": {
"terms": {
"field": "content",
"include" : ["something1","something2","something3"]
}
}
}
}
Query Response:
{
"took": 7,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 0,
"hits": []
},
"aggregations": {
"myaggs": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "something2",
"doc_count": 2
},
{
"key": "something1",
"doc_count": 1
},
{
"key": "something3",
"doc_count": 1
}
]
}
}
}
Let me know if it helps!

Elasticsearch sorting not working properly based on time

I have 20 documents and i'm performing aggregation based on reportid. I need top 10 aggregation based on time in descending. But the response is very random. What am i missing? I'm using elasticsearch 6.2.2 and node.js 4.5. Below here is the body search query for elasticsearch request.
{
"size": 0,
"sort": [
{
"triggerDate":
{
"order": "desc"
}
}],
"query":
{
"bool":
{
"must": [
{
"query_string":
{
"query": "*",
"analyze_wildcard": true
}
},
{
"range":
{
"triggerDate":
{
"gte": fromTime,
"lte": toTime
}
}
}
],
"must_not": [
{
"query_string":
{
"query": "reportId.keyword:\"\"",
"analyze_wildcard": true
}
}]
}
},
"_source":
{
"excludes": []
},
"aggs":
{
"reportid":
{
"terms":
{
"field": "reportId.keyword",
"size": 10
}
}
}

I think what you need to do is aggregate on reportId.keyword and sort aggregation by date.
So here is the solution
{
"size": 0,
"query": {
"bool": {
"must": [
{
"query_string": {
"query": "*",
"analyze_wildcard": true
}
},
{
"range": {
"triggerDate": {
"gte": fromTime,
"lte": toTime
}
}
}
],
"must_not": [
{
"query_string": {
"query": "reportId.keyword:\"\"",
"analyze_wildcard": true
}
}
]
}
},
"_source": {
"excludes": []
},
"aggs": {
"reportid": {
"terms": {
"field": "reportId.keyword",
"size": 10,
"order": {
"2-orderAgg": "desc"
}
},
"aggs": {
"2-orderAgg": {
"max": {
"field": "triggerDate"
}
}
}
}
}
}
You need to sort the aggregation results by a custom aggregation and not the query results.

ElasticSearch : GeoLocation distance search across types

As shown below, there are two types in my city index - zoo and hotel. How do I find all zoos having a hotel in 1KM radius? Here is the mapping of my index :
GET /city/_mapping
{
"city": {
"mappings": {
"hotel": {
"properties": {
"location": {
"type": "geo_point"
},
"name": {
"type": "string"
}
}
},
"zoo": {
"properties": {
"location": {
"type": "geo_point"
},
"name": {
"type": "string"
}
}
}
}
}
}

You can do it with a geo-distance filter for the whole index (just don't specify a type).
As I quick test I created an index like this:
PUT /test_index/
{
"mappings": {
"hotel": {
"properties": {
"location": {
"type": "geo_point"
},
"name": {
"type": "string"
}
}
},
"zoo": {
"properties": {
"location": {
"type": "geo_point"
},
"name": {
"type": "string"
}
}
}
}
}
Added a couple of documents
POST /test_index/_bulk
{"index":{"_type":"hotel","_id":1}}
{"name":"hotel1","location":{"lat" : 40.001, "lon" : -70.001}}
{"index":{"_type":"zoo","_id":1}}
{"name":"zoo1","location":{"lat" : 40.002, "lon" : -70.002}}
And then I can search like this. This query returns the one document:
POST /test_index/_search
{
"query": {
"filtered": {
"filter": {
"geo_distance": {
"distance": 200,
"distance_unit": "km",
"location": {
"lat": 40,
"lon": -70
}
}
}
}
}
}
...
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 1,
"hits": [
{
"_index": "test_index",
"_type": "hotel",
"_id": "1",
"_score": 1,
"_source": {
"name": "hotel1",
"location": {
"lat": 40.001,
"lon": -70.001
}
}
}
]
}
}
And this query returns both:
POST /test_index/_search
{
"query": {
"filtered": {
"filter": {
"geo_distance": {
"distance": 300,
"distance_unit": "km",
"location": {
"lat": 40,
"lon": -70
}
}
}
}
}
}
...
{
"took": 5,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 1,
"hits": [
{
"_index": "test_index",
"_type": "hotel",
"_id": "1",
"_score": 1,
"_source": {
"name": "hotel1",
"location": {
"lat": 40.001,
"lon": -70.001
}
}
},
{
"_index": "test_index",
"_type": "zoo",
"_id": "1",
"_score": 1,
"_source": {
"name": "zoo1",
"location": {
"lat": 40.002,
"lon": -70.002
}
}
}
]
}
}
Here's the code I used to test it:
http://sense.qbox.io/gist/948d23a5327cf5f22dd368146f37d09e30765fee

Develop Reference

node.js excel linux python-3.x azure haskell apache-spark rust .htaccess string

Elasticsearch aggregate by geo location - search

Related

Opensearch transform splitting array values to new events

How to do elasticsearch aggregation together with sort and find duplicate values

How to calculate total for each token in Elasticsearch

Elasticsearch sorting not working properly based on time

ElasticSearch : GeoLocation distance search across types

Categories

Resources