Elastic(search): Get docs with max and min timestamp values - search

I got a problem with a search I just can't figure out how to do it. My docs are of the following form:
{
"timestamp":"2015-03-17T15:05:04.563Z",
"session_id":"1",
"user_id":"jan"
}
Let's say the first timestamp of a session id is the "Login" and the last timestamp is the "Logout". I want to have all "login" and "logout" docs for all sessions (if possible sorted by user_id). I managed to get the right timestamps with aggregations:
{
"aggs" : {
"group_by_uid" : {
"terms" : {
"field" : "user_id"
},
"aggs" : {
"group_by_sid" : {
"terms" : {
"field" : "session_id"
},
"aggs" : {
"max_date" : {
"max": { "field" : "timestamp" }
},
"min_date" : {
"min": { "field" : "timestamp" }
}
}
}
}
}
}
}
But how do I get the corresponding docs? I also don't mind if i have to do 2 searches (one for the logins and one for the logouts). I tried tome top hits aggregations and sorting stuff but I always get parse errors :/
I hope someone can give me a hint :)
Best regards,
Jan

Here's a solution in a single search based on the approach proposed by Sloan Ahrens. The advantage is that the start and end session entries are in the same bucket.
{
"aggs": {
"group_by_uid": {
"terms": {
"field": "user_id"
},
"aggs": {
"group_by_sid": {
"terms": {
"field": "session_id"
},
"aggs": {
"session_start": {
"top_hits": {
"size": 1,
"sort": [ { "timestamp": { "order": "asc" } } ]
}
},
"session_end": {
"top_hits": {
"size": 1,
"sort": [ { "timestamp": { "order": "desc" } } ]
}
}
}
}
}
}
}
}
Cheers,
Jan

You're already close. How about this. Use two searches, each aggregating the way you did, but then also get the first top_hit sorting on "timestamp".
I just set up a basic index and added some data that looks like what you posted:
PUT /test_index
{
"settings": {
"number_of_shards": 1
}
}
POST /test_index/_bulk
{"index":{"_index":"test_index","_type":"doc","_id":1}}
{"timestamp":"2015-03-17T15:05:04.563Z","session_id":"1","user_id":"jan"}
{"index":{"_index":"test_index","_type":"doc","_id":2}}
{"timestamp":"2015-03-17T15:10:04.563Z","session_id":"1","user_id":"jan"}
{"index":{"_index":"test_index","_type":"doc","_id":3}}
{"timestamp":"2015-03-17T15:15:04.563Z","session_id":"1","user_id":"jan"}
{"index":{"_index":"test_index","_type":"doc","_id":4}}
{"timestamp":"2015-03-17T18:05:04.563Z","session_id":"1","user_id":"bob"}
{"index":{"_index":"test_index","_type":"doc","_id":5}}
{"timestamp":"2015-03-17T18:10:04.563Z","session_id":"1","user_id":"bob"}
{"index":{"_index":"test_index","_type":"doc","_id":6}}
{"timestamp":"2015-03-17T18:15:04.563Z","session_id":"1","user_id":"bob"}
Then I can get each session's start time with:
POST /test_index/_search?search_type=count
{
"aggs": {
"group_by_uid": {
"terms": {
"field": "user_id"
},
"aggs": {
"group_by_sid": {
"terms": {
"field": "session_id"
},
"aggs": {
"session_start": {
"top_hits": {
"size": 1,
"sort": [ { "timestamp": { "order": "asc" } } ]
}
}
}
}
}
}
}
}
...
{
"took": 5,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"hits": {
"total": 6,
"max_score": 0,
"hits": []
},
"aggregations": {
"group_by_uid": {
"buckets": [
{
"key": "bob",
"doc_count": 3,
"group_by_sid": {
"buckets": [
{
"key": "1",
"doc_count": 3,
"session_start": {
"hits": {
"total": 3,
"max_score": null,
"hits": [
{
"_index": "test_index",
"_type": "doc",
"_id": "4",
"_score": null,
"_source": {
"timestamp": "2015-03-17T18:05:04.563Z",
"session_id": "1",
"user_id": "bob"
},
"sort": [
1426615504563
]
}
]
}
}
}
]
}
},
{
"key": "jan",
"doc_count": 3,
"group_by_sid": {
"buckets": [
{
"key": "1",
"doc_count": 3,
"session_start": {
"hits": {
"total": 3,
"max_score": null,
"hits": [
{
"_index": "test_index",
"_type": "doc",
"_id": "1",
"_score": null,
"_source": {
"timestamp": "2015-03-17T15:05:04.563Z",
"session_id": "1",
"user_id": "jan"
},
"sort": [
1426604704563
]
}
]
}
}
}
]
}
}
]
}
}
}
and end-time with:
POST /test_index/_search?search_type=count
{
"aggs": {
"group_by_uid": {
"terms": {
"field": "user_id"
},
"aggs": {
"group_by_sid": {
"terms": {
"field": "session_id"
},
"aggs": {
"session_end": {
"top_hits": {
"size": 1,
"sort": [ { "timestamp": { "order": "desc" } } ]
}
}
}
}
}
}
}
}
...
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"hits": {
"total": 6,
"max_score": 0,
"hits": []
},
"aggregations": {
"group_by_uid": {
"buckets": [
{
"key": "bob",
"doc_count": 3,
"group_by_sid": {
"buckets": [
{
"key": "1",
"doc_count": 3,
"session_end": {
"hits": {
"total": 3,
"max_score": null,
"hits": [
{
"_index": "test_index",
"_type": "doc",
"_id": "6",
"_score": null,
"_source": {
"timestamp": "2015-03-17T18:15:04.563Z",
"session_id": "1",
"user_id": "bob"
},
"sort": [
1426616104563
]
}
]
}
}
}
]
}
},
{
"key": "jan",
"doc_count": 3,
"group_by_sid": {
"buckets": [
{
"key": "1",
"doc_count": 3,
"session_end": {
"hits": {
"total": 3,
"max_score": null,
"hits": [
{
"_index": "test_index",
"_type": "doc",
"_id": "3",
"_score": null,
"_source": {
"timestamp": "2015-03-17T15:15:04.563Z",
"session_id": "1",
"user_id": "jan"
},
"sort": [
1426605304563
]
}
]
}
}
}
]
}
}
]
}
}
}
Here's the code I used:
http://sense.qbox.io/gist/05edb48b840e6a992646643913db8ef0a3ccccb3

Related

Elasticsearch aggrecation give me 2 results insted of one result

I want to aggregate on the brand field and is give me two results instead of one
The brands_aggs give me from this text
{name : "Brand 1"}
2 results
Brand and 1
But Why I need only Brand 1
is separate the word brand and 1 from (Brand 1)
and is give me 2 results in the aggrecation
my mappings where I want to aggregate
mapping = {
"mappings": {
"product": {
"properties": {
"categories": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"fielddata": True
}
"brand": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"fielddata": True
}
}
}
}
}
my post request
{
"query" : {
"bool": {
"must": [
{"match": { "categories": "AV8KW5Wi31qHZdVeXG4G" }}
]
}
},
"size" : 0,
"aggs" : {
"brand_aggs" : {
"terms" : { "field" : "brand" }
},
"categories_aggs" : {
"terms" : { "field" : "categories" }
}
}
}
response from the server
{
"took": 18,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 0,
"hits": []
},
"aggregations": {
"categories_aggs": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "av8kw5wi31qhzdvexg4g",
"doc_count": 1
},
{
"key": "av8kw61c31qhzdvexg4h",
"doc_count": 1
},
{
"key": "av8kxtch31qhzdvexg4a",
"doc_count": 1
}
]
},
"brand_aggs": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "1", <==== I dont need this , why is give me that ??
"doc_count": 1
},
{
"key": "brand",
"doc_count": 1
}
]
},
}
}
Your mapping has property fields which is used when you want to have multiple analyzers for the same field. In your case valid name of your field is 'brand.keyword'. When you call your aggregate for just 'brand' it use default mapping defined for string.
So your query should be:
{
"query" : {
"bool": {
"must": [
{"match": { "categories": "AV8KW5Wi31qHZdVeXG4G" }}
]
}
},
"size" : 0,
"aggs" : {
"brand_aggs" : {
"terms" : { "field" : "brand.keyword" }
},
"categories_aggs" : {
"terms" : { "field" : "categories.keyword" }
}
}
}
Property field is useful when you want for example search the same property which multiple analyzers, for example:
"full_name": {
"type": "text",
"analyzer": "standard",
"boost": 1,
"fields": {
"autocomplete": {
"type": "text",
"analyzer": "ngram_analyzer"
},
"standard":{
"type": "text",
"analyzer": "standard"
}
}
},
You need to map your string as not_analyzed string, for that run the below query
PUT your_index/_mapping/your_type
{
"your_type": {
"properties": {
"brand": {
"type": "string",
"index": "analyzed",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}
}
Don't forget to replace the your_type and your_index with your type and index values.

Synonyms, storing weights in document for relevance scoring in Elastic Search

The story: Given the example documents below and by extending them, is it possible to get the following ranking:
A search on "Cereals" results in the following ranking
Cornflakes
Rice Krispies
A search on "Rice" results in the following ranking
Basmati
Rice Krispies
The documents against the search is performed:
[{
name: "Cornflakes"
},
{
name: "Basmati"
},
{
name: "Rice Krispies"
}]
Of course, some of them does not even held the search term, so an option is to add an array of synonyms with a text value and weight with would help in computing the ranking:
[{
name: "Cornflakes",
synonyms: [
{t: 'Cereals', weight: 100},
{t: 'Sugar', weight: 100}]
},
{
name: "Basmati",
synonyms: [
{t: 'Cereals', weight: 1},
{t: 'Rice', weight: 1000}]
},
{
name: "Rice Krispies",
synonyms: [
{t: 'Cereals', weight: 10},
{t: 'Rice', weight: 1}]
}]
Is it the right approach?
What is the Elastic Search query for taking into account weighted synonyms?
I think "tags" would be a more appropriate name for the field than "synonyms".
You could use a nested type to store tags and use function score to combine the value of the tags.weight field (of the best matching tag if any) with the match score on the name field.
One such implementation could look as follows:
put test
put test/tag_doc/_mapping
{
"properties" : {
"tags" : {
"type" : "nested" ,
"properties": {
"t" : {"type" : "string"},
"weight" : {"type" : "double"}
}
}
}
}
put test/tag_doc/_bulk
{ "index" : { "_index" : "test", "_type" : "tag_doc", "_id":1} }
{"name": "Cornflakes","tags": [{"t": "Cereals", "weight":100},{"t": "Sugar", "weight": 100}]}
{ "index" : { "_index" : "test", "_type" : "tag_doc","_id":2} }
{ "name": "Basmati","tags": [{"t": "Cereals", "weight": 1},{"t": "Rice", "weight": 1000}]}
{ "index" : { "_index" : "test", "_type" : "tag_doc","_id":3} }
{ "name": "Rice Krispies", "tags": [{"t": "Cereals", "weight": 10},{"t": "Rice", "weight": 1}]}
post test/_search
{
"query": {
"dis_max": {
"queries": [
{
"match": {
"name": {
"query": "cereals",
"boost": 100
}
}
},
{
"nested": {
"path": "tags",
"query": {
"function_score": {
"functions": [
{
"field_value_factor": {
"field": "tags.weight"
}
}
],
"query": {
"match": {
"tags.t": "cereals"
}
},
"boost_mode": "replace",
"score_mode": "max"
}
},
"score_mode": "max"
}
}
]
}
}
}
Result :
"hits": {
"total": 3,
"max_score": 100,
"hits": [
{
"_index": "test",
"_type": "tag_doc",
"_id": "1",
"_score": 100,
"_source": {
"name": "Cornflakes",
"tags": [
{
"t": "Cereals",
"weight": 100
},
{
"t": "Sugar",
"weight": 100
}
]
}
},
{
"_index": "test",
"_type": "tag_doc",
"_id": "3",
"_score": 10,
"_source": {
"name": "Rice Krispies",
"tags": [
{
"t": "Cereals",
"weight": 10
},
{
"t": "Rice",
"weight": 1
}
]
}
},
{
"_index": "test",
"_type": "tag_doc",
"_id": "2",
"_score": 1,
"_source": {
"name": "Basmati",
"tags": [
{
"t": "Cereals",
"weight": 1
},
{
"t": "Rice",
"weight": 1000
}
]
}
}
]
}

Elasticsearch aggregate unique attribute per groupId

I have a separate doc in elasticsearch for each product.
Each product has a unique productId and a non unique groupId along with other attributes eg: categories.
I want to be able to aggregate different attributes with their count per unique groupId
example:
doc 1:
{
"productId": 123
"groupId" xyz,
"categories": [{"value": "shoes"}, {"value": "t-shirt"}]
}
doc 2:
{
"productId": 345
"groupId" xyz,
"categories": [{"value": "shoes"}, {"value": "t-shirt"}]
}
doc 3:
{
"productId": 456
"groupId" abc,
"categories": [{"value": "t-shirt"}]
}
doc 4:
{
"productId": 567
"groupId" abc,
"categories": [{"value": "shoes"}, {"value": "makeup"}]
}
expected results, something like:
shoes: 2
t-shirt: 2
makeup: 1
so I want to count each item once if it exists with the same groupId
my query:
{
"from":0,
"size":0,
"query":{
"filtered":{
"filter":{
}
}
},
"aggs": {
"group": {
"terms": {"field": "group"},
"aggs": {
"brand": {
"terms": {"field": "productMeta.brand.value"}
}
}
}
}
}
response:
{
"took": 6,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 25,
"max_score": 0,
"hits": []
},
"aggregations": {
"group": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 3,
"buckets": [
{
"key": "wlmr34210507",
"doc_count": 8,
"brand": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "generic",
"doc_count": 8
}
]
}
},
{
"key": "wlmr19524441",
"doc_count": 4,
"brand": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "maybelline",
"doc_count": 4
}
]
}
},
{
"key": "wlmr34121549",
"doc_count": 2,
"brand": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "maybelline",
"doc_count": 2
}
]
}
},
{
"key": "wlmr34317301",
"doc_count": 2,
"brand": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "dream on me",
"doc_count": 2
}
]
}
},
{
"key": "bbfs40549552",
"doc_count": 1,
"brand": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "samsung",
"doc_count": 1
}
]
}
},
{
"key": "bobb7937347",
"doc_count": 1,
"brand": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "chicco",
"doc_count": 1
}
]
}
},
{
"key": "wlmr24241413",
"doc_count": 1,
"brand": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "maybelline",
"doc_count": 1
}
]
}
},
{
"key": "wlmr27504560",
"doc_count": 1,
"brand": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "mr. beer",
"doc_count": 1
}
]
}
},
{
"key": "wlmr33986448",
"doc_count": 1,
"brand": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "mr. beer",
"doc_count": 1
}
]
}
},
{
"key": "wlmr40806575",
"doc_count": 1,
"brand": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "healthtex",
"doc_count": 1
}
]
}
}
]
}
}
}
so basically I was able to solve this problem by using cardinality as follows:
{
"from":0,
"size":0,
"query":{
"filtered":{
"filter":{
}
}
},
"sort":{
"ts":{
"order":"desc",
"mode":"max",
"ignore_unmapped":true
}
},
"aggs":{
"categories":{
"terms":{
"field":"productMeta.brand.value",
"size":0
},
"aggs": {
"category" : {
"cardinality" : {
"field" : "group"
}
}
}
}
}
}
the results are unique count per productId per category:
{
"took": 4,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 71,
"max_score": 0,
"hits": []
},
"aggregations": {
"categories": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "chocolate",
"doc_count": 23,
"category": {
"value": 23
}
},
{
"key": "notebook",
"doc_count": 9,
"category": {
"value": 1
}
},
{
"key": "olive_oil",
"doc_count": 7,
"category": {
"value": 7
}
},
{
"key": "physical_training",
"doc_count": 5,
"category": {
"value": 5
}
},
{
"key": "ski",
"doc_count": 5,
"category": {
"value": 2
}
},
{
"key": "gym_membership",
"doc_count": 4,
"category": {
"value": 4
}
},
{
"key": "ski_boots",
"doc_count": 4,
"category": {
"value": 1
}
},
{
"key": "vinegar",
"doc_count": 4,
"category": {
"value": 4
}
},
{
"key": "bracelet",
"doc_count": 3,
"category": {
"value": 3
}
},
{
"key": "handbags",
"doc_count": 2,
"category": {
"value": 2
}
},
{
"key": "cider",
"doc_count": 1,
"category": {
"value": 1
}
},
{
"key": "ice_cider",
"doc_count": 1,
"category": {
"value": 1
}
},
{
"key": "jewelry_1",
"doc_count": 1,
"category": {
"value": 1
}
},
{
"key": "laces",
"doc_count": 1,
"category": {
"value": 1
}
},
{
"key": "stationery",
"doc_count": 1,
"category": {
"value": 1
}
}
]
}
}
}
Using nested elasticsearch Terms Aggregations the same way you are currently doing with brands, you can produce output structured like this:
"aggregations": {
"groups": {
"buckets": [
{
"key": "xyz",
"categories_per_group": {
"buckets": [
{
"key": "shoes",
"doc_count": 2
},
{
"key": "t-shirt",
"doc_count": 2
}
]
}
},
{
"key": "abc",
"categories_per_group": {
"buckets": [
{
"key": "shoes",
"doc_count": 1
},
{
"key": "t-shirt",
"doc_count": 1
},
{
"key": "makeup",
"doc_count": 1
}
]
}
}
]
}
}
It might be possible to write a pipeline bucket script (ES 2.x) to collect the count of distinct categories across group buckets as you're suggesting.
But it's probably simpler and quicker just to implement this reduce logic yourself using the aggregation buckets output above.

Elasticsearch correct aggregations for faceted search

I want to offer a faceted search for the clothing products on some platform. As I already have Elasticsearch-based search functionality (simple query, only the name of the product), it would be nice to also implement the faceted search with ES.
This should be done with aggregations as facets are deprecated and one can also have nested aggregations.
However I cannot wrap my head around the millions of aggregations and which ones are the right for me - there is terms, filter, filters, nested, children and so on. And all of them seem suitable.
What I want to achieve may sound pretty basic: I have different facets (brand, condition, color) each having different values. For some facets (brand) the user can select only one value. For others (color) the user is allowed to select up to 3 (as some clothes have more than one color).
I started with multi-field terms facet. Now the next natural step would be to convert this to a terms aggregation (reasons above) but multi-fields are not supported in terms aggregations.
{
"query" : {
"match_all" : { }
},
"facets" : {
"groupByBrandAndCondition" : {
"terms" : {
"fields" : ["brand", "condition"],
"size" : 10
}
}
}
}
I am somehow missing some easy but crucial point here on how to proceed with having parallel multi level bucketing. Speaking in UI terms the user should be able to select something like:
Brands (10)
A (7)
B (3) [X]
Colors (5)
Blue (3) [X]
Red (2) [X]
Read: select A (7), Blue (3) and Red (2)
I created basic mapping like this
POST your_index/your_type/_mapping
{
"your_type": {
"properties": {
"product": {
"type": "string"
},
"brand": {
"type": "string"
},
"color": {
"type": "string"
}
}
}
}
I inserted some documents like this
PUT your_index/your_type/111
{
"product" : "jeans" ,"brand" : "lee", "color" : "blue"
}
PUT your_index/your_type/1111
{
"product" : "shoes" ,"brand" : "levi", "color" : "black"
}
And so on
Simple aggregation query like this
GET your_index/_search
{
"size": 0,
"aggs": {
"prod_agg": {
"terms": {
"field": "product"
},
"aggs": {
"brand_agg": {
"terms": {
"field": "brand"
},
"aggs": {
"color_agg": {
"terms": {
"field": "color"
}
}
}
}
}
}
}
}
will retrun
"aggregations": {
"prod_agg": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "shoes",
"doc_count": 4,
"brand_agg": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "nike",
"doc_count": 3,
"color_agg": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "blue",
"doc_count": 2
},
{
"key": "black",
"doc_count": 1
}
]
}
},
{
"key": "levi",
"doc_count": 1,
"color_agg": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "black",
"doc_count": 1
}
]
}
}
]
}
},
{
"key": "jeans",
"doc_count": 3,
"brand_agg": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "lee",
"doc_count": 2,
"color_agg": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "black",
"doc_count": 1
},
{
"key": "blue",
"doc_count": 1
}
]
}
},
{
"key": "levi",
"doc_count": 1,
"color_agg": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "black",
"doc_count": 1
}
]
}
}
]
}
}
]
}
}
This could be used to populate UI search criteria.
Then If user wants to search for shoes, you could query
GET your_index/_search
{
"size": 0,
"query": {
"match": {
"product": "shoes"
}
},
"aggs": {
"brand_agg": {
"terms": {
"field": "brand"
},
"aggs": {
"color_agg": {
"terms": {
"field": "color"
}
}
}
}
}
}
which will give you
"aggregations": {
"brand_agg": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "nike",
"doc_count": 3,
"color_agg": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "blue",
"doc_count": 2
},
{
"key": "black",
"doc_count": 1
}
]
}
},
{
"key": "levi",
"doc_count": 1,
"color_agg": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "black",
"doc_count": 1
}
]
}
}
]
}
}
or you could have them as separate buckets with query like
GET your_index/_search
{
"size": 0,
"query": {
"match": {
"product": "shoes"
}
},
"aggs": {
"brand_agg": {
"terms": {
"field": "brand"
}
},
"color_agg" : {
"terms": {
"field": "color"
}
}
}
}
which will give you
"aggregations": {
"color_agg": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "black",
"doc_count": 2
},
{
"key": "blue",
"doc_count": 2
}
]
},
"brand_agg": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "nike",
"doc_count": 3
},
{
"key": "levi",
"doc_count": 1
}
]
}
}
Use doc_count value to tell users how many options they have.
Does this satisfy your requirements?

ElasticSearch : GeoLocation distance search across types

As shown below, there are two types in my city index - zoo and hotel. How do I find all zoos having a hotel in 1KM radius? Here is the mapping of my index :
GET /city/_mapping
{
"city": {
"mappings": {
"hotel": {
"properties": {
"location": {
"type": "geo_point"
},
"name": {
"type": "string"
}
}
},
"zoo": {
"properties": {
"location": {
"type": "geo_point"
},
"name": {
"type": "string"
}
}
}
}
}
}
You can do it with a geo-distance filter for the whole index (just don't specify a type).
As I quick test I created an index like this:
PUT /test_index/
{
"mappings": {
"hotel": {
"properties": {
"location": {
"type": "geo_point"
},
"name": {
"type": "string"
}
}
},
"zoo": {
"properties": {
"location": {
"type": "geo_point"
},
"name": {
"type": "string"
}
}
}
}
}
Added a couple of documents
POST /test_index/_bulk
{"index":{"_type":"hotel","_id":1}}
{"name":"hotel1","location":{"lat" : 40.001, "lon" : -70.001}}
{"index":{"_type":"zoo","_id":1}}
{"name":"zoo1","location":{"lat" : 40.002, "lon" : -70.002}}
And then I can search like this. This query returns the one document:
POST /test_index/_search
{
"query": {
"filtered": {
"filter": {
"geo_distance": {
"distance": 200,
"distance_unit": "km",
"location": {
"lat": 40,
"lon": -70
}
}
}
}
}
}
...
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 1,
"hits": [
{
"_index": "test_index",
"_type": "hotel",
"_id": "1",
"_score": 1,
"_source": {
"name": "hotel1",
"location": {
"lat": 40.001,
"lon": -70.001
}
}
}
]
}
}
And this query returns both:
POST /test_index/_search
{
"query": {
"filtered": {
"filter": {
"geo_distance": {
"distance": 300,
"distance_unit": "km",
"location": {
"lat": 40,
"lon": -70
}
}
}
}
}
}
...
{
"took": 5,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 1,
"hits": [
{
"_index": "test_index",
"_type": "hotel",
"_id": "1",
"_score": 1,
"_source": {
"name": "hotel1",
"location": {
"lat": 40.001,
"lon": -70.001
}
}
},
{
"_index": "test_index",
"_type": "zoo",
"_id": "1",
"_score": 1,
"_source": {
"name": "zoo1",
"location": {
"lat": 40.002,
"lon": -70.002
}
}
}
]
}
}
Here's the code I used to test it:
http://sense.qbox.io/gist/948d23a5327cf5f22dd368146f37d09e30765fee

Resources