Elasticsearch use custom analyzer on filter

Elasticsearch use custom analyzer on filter - search

I was asking on elasticsearch nested filter return empty result about some error I have in the query and wont getting any results, but in the answer I was pointed out that the expression I use for the filter wasn't analyzed as I expect.
I have a custom analyzer to do the work how can I specify in the next query to the filter to use this custom analyzer:
GET /develop/_search?search_type=dfs_query_then_fetch
{
"query": {
"filtered" : {
"query": {
"bool": {
"must": [
{ "match": { "title": "post" }}
]
}
},
"filter": {
"bool": {
"must": [
{"term": {
"featured": 0
}},
{
"nested": {
"path": "seller",
"filter": {
"bool": {
"must": [
{ "term": { "seller.firstName": "Test 3" } }
]
}
},
"_cache" : true
}}
]
}
}
}
},
"sort": [
{
"_score":{
"order": "desc"
}
},{
"created": {
"order": "desc"
}
}
],
"track_scores": true
}

Here is a setup that seems to do what you want. I used the same basic code as the last answer, but used index_analyzer and search_analyzer in the index definition as follows:
curl -XDELETE "http://localhost:9200/my_index"
curl -XPUT "http://localhost:9200/my_index" -d'
{
"settings": {
"number_of_shards": 1,
"number_of_replicas": 0,
"analysis": {
"filter": {
"snowball": { "type": "snowball", "language": "English" },
"english_stemmer": { "type": "stemmer", "language": "english" },
"english_possessive_stemmer": { "type": "stemmer", "language": "possessive_english" },
"stopwords": { "type": "stop", "stopwords": [ "_english_" ] },
"worddelimiter": { "type": "word_delimiter" }
},
"tokenizer": {
"nGram": { "type": "nGram", "min_gram": 3, "max_gram": 20 }
},
"analyzer": {
"custom_analyzer": {
"type": "custom",
"tokenizer": "nGram",
"filter": [
"stopwords",
"asciifolding",
"lowercase",
"snowball",
"english_stemmer",
"english_possessive_stemmer",
"worddelimiter"
]
},
"custom_search_analyzer": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"stopwords",
"asciifolding",
"lowercase",
"snowball",
"english_stemmer",
"english_possessive_stemmer",
"worddelimiter"
]
}
}
}
},
"mappings": {
"posts": {
"properties": {
"title": {
"type": "string",
"analyzer": "custom_analyzer",
"boost": 5
},
"seller": {
"type": "nested",
"properties": {
"firstName": {
"type": "string",
"index_analyzer": "custom_analyzer",
"search_analyzer": "custom_search_analyzer",
"boost": 3
}
}
}
}
}
}
}'
Then added the test docs
curl -XPUT "http://localhost:9200/my_index/posts/1" -d'
{"title": "post", "seller": {"firstName":"Test 1"}}'
curl -XPUT "http://localhost:9200/my_index/posts/2" -d'
{"title": "post", "seller": {"firstName":"Test 2"}}'
curl -XPUT "http://localhost:9200/my_index/posts/3" -d'
{"title": "post", "seller": {"firstName":"Test 3"}}'
And then a couple of match queries in a bool, where one is a multiword query, seems to accomplish what you are wanting:
curl -XPOST "http://localhost:9200/my_index/_search" -d'
{
"query": {
"bool": {
"must": [
{
"match": {
"title": "post"
}
},
{
"nested": {
"path": "seller",
"query": {
"match": {
"seller.firstName": {
"query": "Test 3",
"operator": "and"
}
}
}
}
}
]
}
}
}'
...
{
"took": 5,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 6.8380365,
"hits": [
{
"_index": "my_index",
"_type": "posts",
"_id": "3",
"_score": 6.8380365,
"_source": {
"title": "post",
"seller": {
"firstName": "Test 3"
}
}
}
]
}
}
Here is the code I used:
http://sense.qbox.io/gist/8cd954aa60be8c44f64e4282e15e6b565c945ecb
Does that solve your problem?

Related

How to do elasticsearch aggregation together with sort and find duplicate values

I want to find duplicate values and if there are duplicate values then I sort based on the last update, so what I take is the newest one, how do I do aggregations? I've tried this aggregation.
I've tried adding sort to sources but it still doesn't work, I've tried several ways but it still fails sometimes it comes out 1 but only old data, sometimes the order is correct from the newest but appears 2 data
{
"size": 0,
"query": {
"bool": {
"must": [
{
"match": {
"BILLING_TYPE_CD": "Service Bundle"
}
},
{
"match": {
"ID": "xxxx"
}
},
{
"exists": {
"field": "LI_MILESTONE"
}
},
{
"exists": {
"field": "LI_SID"
}
},
{
"query_string": {
"default_field": "LI_SID",
"query": "*xxxx*"
}
}
],
"must_not": {
"bool": {
"must": [
{
"query_string": {
"default_field": "LI_PRODUCT_NAME",
"query": "*Network*"
}
},
{
"terms": {
"LI_MILESTONE.keyword": [
"Abandoned",
"Cancelled"
]
}
},
{
"terms": {
"ORDER_STATUS.keyword": [
"Abandoned",
"Cancelled",
"Drop In Progress"
]
}
},
{
"term": {
"STATUS.keyword": ""
}
}
]
}
}
}
},
"sort": [
{
"TGL_CREATED": {
"order": "desc"
}
}
],
"aggs": {
"list_products": {
"composite": {
"size": 50000,
"sources": [
{
"LI_SID": {
"terms": {
"field": "LI_SID.keyword",
"order": "desc"
}
}
}
]
},
"aggs": {
"totalService": {
"terms": {
"field": "LI_SID.keyword",
"size": 50000,
"order": {
"_term": "asc"
}
}
},
"bucket_sort": {
"bucket_sort": {
"from": 0,
"size": 10
}
},
"includes_source": {
"top_hits": {
"size": 1,
"_source": {
"includes": [
"LAST_UPDATE",
"xxxxx",
"xxxxx",
"xxxxx",
"xxx"
]
}
}
}
}
},
"term_product": {
"terms": {
"field": "LI_SID.keyword",
"size": 50000
}
}
}
}

Like this ?
{
"aggs": {
"LI_SID": {
"terms": {
"field": "LI_SID.keyword",
"size": 10
},
"aggs": {
"hit": {
"top_hits": {
"size": 1,
"sort": [
{
"LAST_UPDATE": "desc"
}
]
}
}
}
}
},
"size": 0
}
You need to use aggregations response not hits

How to implement fuzzy search in multi fields

I was implementing fuzzy search in my existing elasticsearch where I can't change mappings, I was hoping if there is any way I can convert the following query in fuzzy one i.e add fuzzy search on fields lower_name and album
{
"query": {
"bool": {
"must": [
{
"term": {
"user": "userId"
}
},
{
"bool": {
"should": [
{
"terms": {
"lower_name": ["search", "Text"]
}
},
{
"terms": {
"album": ["search","Text"]
}
}
]
}
}
]
}
}
}
I tried this :
{
"query": {
"bool": {
"must": [
{
"term": {
"user": "userId"
}
},
{
"bool": {
"should": [
{
"fuzzy": {
"lower_name": ["search","Text"]
}
},
{
"fuzzy": {
"album": ["search","Text"]
}
}
]
}
}
]
}
}
}
But this is giving error: [fuzzy] query doesn't support multiple fields
Please help!
Using Elasticsearch 6.3

You can use a multi_match query with fuzziness. Try out the below query
Index Data:
{
"user": "ben",
"lower_name": "def",
"album": "Brenda"
}
{
"user": "ben",
"lower_name": "abc",
"album": "Brenda"
},
{
"user": "ben",
"lower_name": "fgh",
"album": "honda"
}
Search Query:
{
"query": {
"bool": {
"must": [
{
"term": {
"user": "ben"
}
},
{
"bool": {
"should": [
{
"multi_match": {
"query": "abc dey",
"fields": [
"lower_name"
],
"fuzziness": "auto"
}
},
{
"multi_match": {
"query": "brenda",
"fields": [
"album"
],
"fuzziness": "auto"
}
}
]
}
}
]
}
}
}
Search Result:
"hits": [
{
"_index": "66311552",
"_type": "_doc",
"_id": "2",
"_score": 0.7497801,
"_source": {
"user": "ben",
"lower_name": "def",
"album": "Brenda"
}
},
{
"_index": "66311552",
"_type": "_doc",
"_id": "1",
"_score": 0.7497801,
"_source": {
"user": "ben",
"lower_name": "abc",
"album": "Brenda"
}
}
]

You can easily use the "fuzziness": "AUTO". param in your search query. Refer fuzziness in match query official example

How to apply filter on geo coordinates in elasticsearch?

I am using elasticsearch with mongoosastic npm module. I am trying to apply filter on geo coordinates having following model structure
geoLocation: {
type: {
type: String,
default: 'Point'
},
coordinates: [Number] //orders should be lat,lng
}
with the mapping as follows
{
"events": {
"settings": {
"analysis": {
"filter": {
"edgeNGram_filter": {
"type": "edgeNGram",
"min_gram": 1,
"max_gram": 50,
"side": "front"
}
},
"analyzer": {
"edge_nGram_analyzer": {
"type": "custom",
"tokenizer": "edge_ngram_tokenizer",
"filter": [
"lowercase",
"asciifolding",
"edgeNGram_filter"
]
},
"whitespace_analyzer": {
"type": "custom",
"tokenizer": "whitespace",
"filter": [
"lowercase",
"asciifolding"
]
}
},
"tokenizer": {
"edge_ngram_tokenizer": {
"type": "edgeNGram",
"min_gram": "1",
"max_gram": "50",
"token_chars": [
"letter",
"digit"
]
}
}
}
},
"mappings": {
"event": {
"_all": {
"index_analyzer": "nGram_analyzer",
"search_analyzer": "whitespace_analyzer"
},
"properties": {
"title": {
"type": "string",
"index": "not_analyzed"
},
"geoLocation": {
"index": "not_analyzed",
"type": "geo_point"
}
}
}
}
}
}
Query
{
"query": {
"multi_match": {
"query": "the",
"fields": ["title", ]
}
},
"filter" : {
"geo_distance" : {
"distance" : "200km",
"geoLocation.coordinates" : {
"lat" : 19.007452,
"lon" : 72.831556
}
}
}
}
I am unable to create indexing on the geo coordinates with the following model structure, I dont understand if it is not possible to index geo coordinates with the above model structure because in my case the coordinates has order as lat,long and I have found somewhere that elasticsearch expects coordinates order as long,lat.
Error
Error: SearchPhaseExecutionException[Failed to execute phase [query],
all shards failed; shardFailures {[CDHdgtJnTbeu8tl2mDfllg][events][0]:
SearchParseException[[events][0]: from[-1],size[-1]: Parse Failure
[Failed to parse source
curl -XGET localhost:9200/events
{
"events": {
"aliases": {},
"mappings": {
"1": {
"properties": {
"location": {
"type": "double"
},
"text": {
"type": "string"
}
}
},
"event": {
"properties": {
"city": {
"type": "string"
},
"endTime": {
"type": "date",
"format": "dateOptionalTime"
},
"geo_with_lat_lon": {
"type": "geo_point",
"lat_lon": true
},
"isActive": {
"type": "boolean"
},
"isRecommended": {
"type": "boolean"
},
"location": {
"type": "string"
},
"title": {
"type": "string"
}
}
}
},
"settings": {
"index": {
"creation_date": "1461675012489",
"uuid": "FT-xVUdPQtyuKFm4J4Rd7g",
"number_of_replicas": "1",
"number_of_shards": "5",
"events": {
"mappings": {
"event": {
"_all": {
"enabled": "false",
"search_analyzer": "whitespace_analyzer",
"index_analyzer": "nGram_analyzer"
},
"properties": {
"geoLocation": {
"coordinates": {
"type": "geo_shape",
"index": "not_analyzed"
}
},
"location": {
"type": "string",
"index": "not_analyzed"
},
"title": {
"type": "string",
"index": "not_analyzed"
},
"geo_with_lat_lon": {
"type": "geo_point",
"lat_lon": "true",
"index": "not_analyzed"
}
}
}
},
"settings": {
"analysis": {
"analyzer": {
"edge_nGram_analyzer": {
"type": "custom",
"filter": [
"lowercase",
"asciifolding",
"edgeNGram_filter"
],
"tokenizer": "edge_ngram_tokenizer"
},
"whitespace_analyzer": {
"type": "custom",
"filter": [
"lowercase",
"asciifolding"
],
"tokenizer": "whitespace"
}
},
"filter": {
"edgeNGram_filter": {
"max_gram": "50",
"type": "edgeNGram",
"min_gram": "1",
"side": "front"
}
},
"tokenizer": {
"edge_ngram_tokenizer": {
"max_gram": "50",
"type": "edgeNGram",
"min_gram": "1",
"token_chars": [
"letter",
"digit"
]
}
}
}
}
},
"version": {
"created": "1070099"
}
}
},
"warmers": {}
}
}

I got a solution for my question
Mapping
PUT /geo_test
{
"mappings": {
"type_test": {
"properties": {
"name": {
"type": "string"
},
"geoLocation": {
"type": "nested",
"properties": {
"coordinates": {
"type": "geo_point",
"lat_lon": true
}
}
}
}
}
}
}
Query
POST /geo_test/type_test/_search
{
"query": {
"filtered": {
"filter": {
"nested": {
"path": "geoLocation",
"query": {
"filtered": {
"filter": {
"geo_distance": {
"distance": 5,
"distance_unit": "km",
"geoLocation.coordinates": {
"lat": 41.12,
"lon": -71.34
}
}
}
}
}
}
}
}
}
}

ElasticSearch query stops working with big amount of data

The problem: I have 2 identical in terms of settings and mappings indexes.
The first index contains only 1 document.
The second index contains the same document + 16M of others.
When I'm running the query on the first index it returns the document, but when I do the same query on the second — I receive nothing.
Indexes settings:
{
"tasks_test": {
"settings": {
"index": {
"analysis": {
"analyzer": {
"tag_analyzer": {
"filter": [
"lowercase",
"tag_filter"
],
"tokenizer": "whitespace",
"type": "custom"
}
},
"filter": {
"tag_filter": {
"type": "word_delimiter",
"type_table": "# => ALPHA"
}
}
},
"creation_date": "1444127141035",
"number_of_replicas": "2",
"number_of_shards": "5",
"uuid": "wTe6WVtLRTq0XwmaLb7BLg",
"version": {
"created": "1050199"
}
}
}
}
}
Mappings:
{
"tasks_test": {
"mappings": {
"Task": {
"dynamic": "false",
"properties": {
"format": "dateOptionalTime",
"include_in_all": false,
"type": "date"
},
"is_private": {
"type": "boolean"
},
"last_timestamp": {
"type": "integer"
},
"name": {
"analyzer": "tag_analyzer",
"type": "string"
},
"project_id": {
"include_in_all": false,
"type": "integer"
},
"user_id": {
"include_in_all": false,
"type": "integer"
}
}
}
}
}
The document:
{
"_index": "tasks_test",
"_type": "Task",
"_id": "1",
"_source": {
"is_private": false,
"name": "135548- test with number",
"project_id": 2,
"user_id": 1
}
}
The query:
{
"query": {
"filtered": {
"query": {
"bool": {
"must": [
[
{
"match": {
"_all": {
"query": "135548",
"type": "phrase_prefix"
}
}
}
]
]
}
},
"filter": {
"bool": {
"must": [
{
"term": {
"is_private": false
}
},
{
"terms": {
"project_id": [
2
]
}
},
{
"terms": {
"user_id": [
1
]
}
}
]
}
}
}
}
}
Also, some findings:
if I replace _all with name everything works
if I replace match_phrase_prefix with match_phrase works too
ES version: 1.5.1
So, the question is: how to make the query work for the second index without mentioned hacks?

How to use facet filtering with nested documents on ElasticSearch

I have the following mapping:
curl -XPUT 'http://localhost:9200/bookstore/user/_mapping' -d '
{
"user": {
"properties": {
"user_id": { "type": "integer" },
"gender": { "type": "string", "index" : "not_analyzed" },
"age": { "type": "integer" },
"age_bracket": { "type": "string", "index" : "not_analyzed" },
"current_city": { "type": "string", "index" : "not_analyzed" },
"relationship_status": { "type": "string", "index" : "not_analyzed" },
"books" : {
"type": "nested",
"properties" : {
"b_oid": { "type": "string", "index" : "not_analyzed" },
"b_name": { "type": "string", "index" : "not_analyzed" },
"bc_id": { "type": "integer" },
"bc_name": { "type": "string", "index" : "not_analyzed" },
"bcl_name": { "type": "string", "index" : "not_analyzed" },
"b_id": { "type": "integer" }
}
}
}
}
}'
Now, I try to query for example for Users which have "gender": "Male", have bought book in a certain category "bcl_name": "Trivia" and show the "b_name" book titles. I somehow cannot get it to run.
I have the query
curl -XGET 'http://localhost:9200/bookstore/user/_search?pretty=1' -d '{
"size": 0,
"from": 0,
"query": {
"filtered": {
"query": {
"terms": {
"gender": [
"Male"
]
}
}
}
},
"facets": {
"CategoryFacet": {
"terms": {
"field": "books.b_name",
"size": 5,
"shard_size": 1000,
"order": "count"
},
"nested": "books",
"facet_filter": {
"terms": {
"books.bcl_name": [
"Trivia"
]
}
}
}
}
}'
which returns a result, but I'm not sure whether this is correct. I looked for some examples, and found this (http://www.spacevatican.org/2012/6/3/fun-with-elasticsearch-s-children-and-nested-documents/) for example. I'm able to rewrite my query like this:
curl -XGET 'http://localhost:9200/bookstore/user/_search?pretty=1' -d '{
"size": 0,
"from": 0,
"query": {
"filtered": {
"query": {
"terms": {
"gender": [
"Male"
]
}
},
"filter": {
"nested": {
"path": "books",
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"and": [
{
"term": {
"books.bcl_name": "Trivia"
}
}
]
}
}
}
}
}
}
},
"facets": {
"CategoryFacet": {
"terms": {
"field": "books.b_name",
"size": 5,
"shard_size": 1000,
"order": "count"
},
"nested": "books"
}
}
}'
which shows different results.
I, as a beginner, am a litte lost right now. Can someone please give me hint on how to solve this`? Thanks a lot in advance!

First query means:
Search for users whose gender : "Male"
But "CategoryFacet" includes the count of gender : "Male" AND
books.bcl_name : "Trivia"
So in result set you get all "Male" users, but your CategoryFacet gives you the count of "Male users AND whose books.bcl_name is Trivia".
In second query your "CategoryFacet" does not include extra filtering. It just returns the facets from the exact result set.

Develop Reference

node.js excel linux python-3.x azure haskell apache-spark rust .htaccess string

Elasticsearch use custom analyzer on filter - search

Related

How to do elasticsearch aggregation together with sort and find duplicate values

How to implement fuzzy search in multi fields

How to apply filter on geo coordinates in elasticsearch?

ElasticSearch query stops working with big amount of data

How to use facet filtering with nested documents on ElasticSearch

Categories

Resources