Elasticsearch sorting not working properly based on time - node.js

I have 20 documents and i'm performing aggregation based on reportid. I need top 10 aggregation based on time in descending. But the response is very random. What am i missing? I'm using elasticsearch 6.2.2 and node.js 4.5. Below here is the body search query for elasticsearch request.
{
"size": 0,
"sort": [
{
"triggerDate":
{
"order": "desc"
}
}],
"query":
{
"bool":
{
"must": [
{
"query_string":
{
"query": "*",
"analyze_wildcard": true
}
},
{
"range":
{
"triggerDate":
{
"gte": fromTime,
"lte": toTime
}
}
}
],
"must_not": [
{
"query_string":
{
"query": "reportId.keyword:\"\"",
"analyze_wildcard": true
}
}]
}
},
"_source":
{
"excludes": []
},
"aggs":
{
"reportid":
{
"terms":
{
"field": "reportId.keyword",
"size": 10
}
}
}

I think what you need to do is aggregate on reportId.keyword and sort aggregation by date.
So here is the solution
{
"size": 0,
"query": {
"bool": {
"must": [
{
"query_string": {
"query": "*",
"analyze_wildcard": true
}
},
{
"range": {
"triggerDate": {
"gte": fromTime,
"lte": toTime
}
}
}
],
"must_not": [
{
"query_string": {
"query": "reportId.keyword:\"\"",
"analyze_wildcard": true
}
}
]
}
},
"_source": {
"excludes": []
},
"aggs": {
"reportid": {
"terms": {
"field": "reportId.keyword",
"size": 10,
"order": {
"2-orderAgg": "desc"
}
},
"aggs": {
"2-orderAgg": {
"max": {
"field": "triggerDate"
}
}
}
}
}
}
You need to sort the aggregation results by a custom aggregation and not the query results.

Related

python elasticsearch-dsl return all unique values for specific key

I have a field called account_number . It contains random 6 character string.
I can't seem to get python elasticsearch dsl to return just those unique values.
search = Search(using=client, index=index_name).query(
{
"range": {
"date": {
"gte": "2021-08-01T08:00:00.000Z",
"lte": "2021-08-31T23:59:59.599Z"
#"format": "strict_date_optional_time"
}
}
})
search.aggs.bucket("account_number","terms",field="account_number",size="1000")
es_data = search.execute()
Not sure if I need to define the account_number in the query or if its in the agg bucket?. Right now I just get random full rows returned with all columns
Here is an example of a working query in non-dsl form. I didnt think the metric was necessary but maybe it is.
{
"aggs": {
"3": {
"terms": {
"field": "account_number",
"order": {
"1": "desc"
},
"size": 5
},
"aggs": {
"1": {
"sum": {
"field": "hits"
}
}
}
}
},
"size": 0,
"stored_fields": [
"*"
],
"script_fields": {},
"docvalue_fields": [
{
"field": "#timestamp",
"format": "date_time"
},
{
"field": "date",
"format": "date_time"
}
],
"_source": {
"excludes": []
},
"query": {
"bool": {
"must": [],
"filter": [
{
"match_all": {}
},
{
"range": {
"date": {
"gte": "2021-04-08T21:00:00.000Z",
"lte": "2021-10-08T21:00:00.000Z",
"format": "strict_date_optional_time"
}
}
}
]
}
}
}
You can add extra(size=0) to your query:
search = Search(using=client, index=index_name).query(
{
"range": {
"date": {
"gte": "2021-08-01T08:00:00.000Z",
"lte": "2021-08-31T23:59:59.599Z"
#"format": "strict_date_optional_time"
}
}
}).extra(size=0)
Then your es_data will be empty and es_data.aggregations.account_number.buckets will contain only unique account numbers.
Hope it helps.

How to do elasticsearch aggregation together with sort and find duplicate values

I want to find duplicate values and if there are duplicate values then I sort based on the last update, so what I take is the newest one, how do I do aggregations? I've tried this aggregation.
I've tried adding sort to sources but it still doesn't work, I've tried several ways but it still fails sometimes it comes out 1 but only old data, sometimes the order is correct from the newest but appears 2 data
{
"size": 0,
"query": {
"bool": {
"must": [
{
"match": {
"BILLING_TYPE_CD": "Service Bundle"
}
},
{
"match": {
"ID": "xxxx"
}
},
{
"exists": {
"field": "LI_MILESTONE"
}
},
{
"exists": {
"field": "LI_SID"
}
},
{
"query_string": {
"default_field": "LI_SID",
"query": "*xxxx*"
}
}
],
"must_not": {
"bool": {
"must": [
{
"query_string": {
"default_field": "LI_PRODUCT_NAME",
"query": "*Network*"
}
},
{
"terms": {
"LI_MILESTONE.keyword": [
"Abandoned",
"Cancelled"
]
}
},
{
"terms": {
"ORDER_STATUS.keyword": [
"Abandoned",
"Cancelled",
"Drop In Progress"
]
}
},
{
"term": {
"STATUS.keyword": ""
}
}
]
}
}
}
},
"sort": [
{
"TGL_CREATED": {
"order": "desc"
}
}
],
"aggs": {
"list_products": {
"composite": {
"size": 50000,
"sources": [
{
"LI_SID": {
"terms": {
"field": "LI_SID.keyword",
"order": "desc"
}
}
}
]
},
"aggs": {
"totalService": {
"terms": {
"field": "LI_SID.keyword",
"size": 50000,
"order": {
"_term": "asc"
}
}
},
"bucket_sort": {
"bucket_sort": {
"from": 0,
"size": 10
}
},
"includes_source": {
"top_hits": {
"size": 1,
"_source": {
"includes": [
"LAST_UPDATE",
"xxxxx",
"xxxxx",
"xxxxx",
"xxx"
]
}
}
}
}
},
"term_product": {
"terms": {
"field": "LI_SID.keyword",
"size": 50000
}
}
}
}
Like this ?
{
"aggs": {
"LI_SID": {
"terms": {
"field": "LI_SID.keyword",
"size": 10
},
"aggs": {
"hit": {
"top_hits": {
"size": 1,
"sort": [
{
"LAST_UPDATE": "desc"
}
]
}
}
}
}
},
"size": 0
}
You need to use aggregations response not hits

Is there a way with ElasticSearch to only show aggregations?

I'm trying to retrieve some data from ElasticSearch.
So far everything is working perfectly and I can query data.
But whenever I try to count a field using aggregations, the aggregation field is not in the result at the end.
So far what I've tried this as my query/function :
var client = new elasticsearch.Client({
host: 'xxxxxxxxxxxxxxxxxxxxxxx',
log:"trace"
});
client.ping({
requestTimeout: 30000,
}, function (error) {
if (error) {
console.error('elasticsearch cluster is down!');
} else {
console.log('All is well');
}
});
client.search({
"index":"worklight__appsession__1485302400000",
"type":"AppSession",
"body":{
"query": {
"filtered": {
"query": {
"query_string": {
"analyze_wildcard": true,
"query": "*"
}
},
"filter": {
"bool": {
"must": [
{
"range": {
"timestamp": {
"gte": 1553507131976,
"lte": 1553593531976
}
}
}
],
"must_not": []
}
}
}
},
"aggs": {
"1": {
"cardinality": {
"field": "deviceID"
}
}
}
}
}).then(function (resp) {
var hits = resp.hits.hits;
console.log(hits)
}, function (err) {
console.trace(err.message);
});
and the result is :
Elasticsearch DEBUG: 2019-03-26T09:46:21Z
starting request {
"method": "HEAD",
"requestTimeout": 30000,
"castExists": true,
"path": "/",
"query": {}
}
Elasticsearch DEBUG: 2019-03-26T09:46:21Z
starting request {
"method": "POST",
"path": "/worklight__appsession__1485302400000/AppSession/_search",
"body": {
"query": {
"filtered": {
"query": {
"query_string": {
"analyze_wildcard": true,
"query": "*"
}
},
"filter": {
"bool": {
"must": [
{
"range": {
"timestamp": {
"gte": 1553507131976,
"lte": 1553593531976
}
}
}
],
"must_not": []
}
}
}
},
"aggs": {
"1": {
"cardinality": {
"field": "deviceID"
}
}
}
},
"query": {}
}
Elasticsearch TRACE: 2019-03-26T09:46:22Z
-> HEAD http://xx/
<- 200
Elasticsearch DEBUG: 2019-03-26T09:46:22Z
Request complete
All is well
Elasticsearch TRACE: 2019-03-26T09:46:22Z
-> POST http://xx/worklight__appsession__1485302400000/AppSession/_search
{
"query": {
"filtered": {
"query": {
"query_string": {
"analyze_wildcard": true,
"query": "*"
}
},
"filter": {
"bool": {
"must": [
{
"range": {
"timestamp": {
"gte": 1553507131976,
"lte": 1553593531976
}
}
}
],
"must_not": []
}
}
}
},
"aggs": {
"1": {
"cardinality": {
"field": "deviceID"
}
}
}
}
<- 200
{
"took": 4,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"hits": {
"total": 325,
"max_score": 1,
"hits": [
...
confidential data here, not relevant to the topic.
...
}
]
},
"aggregations": {
"1": {
"value": 133
}
}
}
But if erase the log trace option, aggregations don't show up in the result :
[ { _index: 'worklight__appsession__1485302400000',
_type: 'AppSession',
... Some Data,
{ _index: 'worklight__appsession__1485302400000',
_type: 'AppSession',
... Some Data,
{ _index: 'worklight__appsession__1485302400000',
_type: 'AppSession',
... Some Data,
]
Am I doing something wrong, or do I just lack knowledge ?
Thanks for your time.
You are doing console.log(resp.hits.hits). Try this instead:
console.log(resp.aggregations)

Elastic Search: Matching fields in different nested objects

I am new to Elastic Search and this is my user index:
{
"user": {
"properties": {
"branches": {
"type": "nested"
},
"lists": {
"type": "nested"
},
"events": {
"type": "nested"
},
"optOuts": {
"type": "nested"
}
}
}
}
Here, branches, events and lists will contain the field id(int),countryIso(String)..
I need to find users having emails who belong to countryIso 'XX' for example.
{
"query": {
"bool": {
"must": [
{
"exists": {
"field": "email"
}
},
{
"match": {
"prog_id": 3
}
},
{
"nested": {
"path": [
"branches"
],
"query": {
"query_string": {
"fields": [
"branches.countryIso"
],
"query": "AE KW"
}
}
}
}
]
}
}
}
This way I can get them if they have that country in the branches object. What I want is that the countryIso is there in the branches or lists or events.
Note: any of these might be empty i.e. branches may not be there or lists miht not be there etc. Or lists might be there with no countryIso..
I tried this:
{
"query": {
"bool": {
"must": [
{
"exists": {
"field": "email"
}
},
{
"match": {
"prog_id": 3
}
},
{
"nested": {
"path": [
"branches"
],
"query": {
"query_string": {
"fields": [
"branches.countryIso"
],
"query": "AE KW"
}
}
}
},
{
"nested": {
"path": [
"lists"
],
"query": {
"query_string": {
"fields": [
"lists.countryIso"
],
"query": "AE KW"
}
}
}
}
]
}
}
}
AND
{
"query": {
"bool": {
"must": [
{
"exists": {
"field": "email"
}
},
{
"match": {
"prog_id": 3
}
},
{
"nested": {
"path": [
"branches",
"lists"
],
"query": {
"query_string": {
"fields": [
"branches.countryIso",
"lists.countryIso"
],
"query": "AE KW"
}
}
}
}
]
}
}
}
But neither works.

Limit filter by terms elastic search

I would put a size limit per terms, 3 retrieve results for the term "tag", 5 results for the term "dossier" and 1 result for the term "personality".
Can i use limit filter or and other solution ?
{
"_source":{
"include":[
"path",
"type"
]
},
"query":{
"bool":{
"should":[
{
"match":{
"title.acp":{
"query":"car",
"boost":10
}
}
},
{
"match":{
"title.acp":{
"query":"car",
"fuzziness":"AUTO",
"prefix_length":3
}
}
}
],
"filter":[
{
"terms":{
"type":[
"tag",
"dossier",
"personality"
]
}
}
]
}
},
"highlight":{
"fields":{
"title.acp":{}
}
}
};
Looks like for a given 'title' you want top x documents for each of the types where 'x' varies with type
One way to do this is use aggregation filter and top-hits in conjunction :
Example :
{
"size": 0,
"query": {
"bool": {
"should": [
{
"match": {
"title.acp": {
"query": "car",
"boost": 10
}
}
},
{
"match": {
"title.acp": {
"query": "car",
"fuzziness": "AUTO",
"prefix_length": 3
}
}
}
],
"filter": [
{
"terms": {
"type": [
"tag",
"dossier",
"personality"
]
}
}
]
}
},
"aggs": {
"tag": {
"filter": {
"term": {
"type": "tag"
}
},
"aggs": {
"tag_top_hits": {
"top_hits": {
"_source": {
"include": [
"path",
"type"
]
},
"size": 3,
"highlight": {
"fields": {
"title.acp": {}
}
}
}
}
}
},
"dossier": {
"filter": {
"term": {
"type": "dossier"
}
},
"aggs": {
"dossier_top_hits": {
"top_hits": {
"_source": {
"include": [
"path",
"type"
]
},
"size": 5,
"highlight": {
"fields": {
"title.acp": {}
}
}
}
}
}
},
"personality": {
"filter": {
"term": {
"type": "personality"
}
},
"aggs": {
"personality_top_hits": {
"top_hits": {
"_source": {
"include": [
"path",
"type"
]
},
"size": 1,
"highlight": {
"fields": {
"title.acp": {}
}
}
}
}
}
}
}
}

Resources