How to implement fuzzy search in multi fields - node.js

I was implementing fuzzy search in my existing elasticsearch where I can't change mappings, I was hoping if there is any way I can convert the following query in fuzzy one i.e add fuzzy search on fields lower_name and album
{
"query": {
"bool": {
"must": [
{
"term": {
"user": "userId"
}
},
{
"bool": {
"should": [
{
"terms": {
"lower_name": ["search", "Text"]
}
},
{
"terms": {
"album": ["search","Text"]
}
}
]
}
}
]
}
}
}
I tried this :
{
"query": {
"bool": {
"must": [
{
"term": {
"user": "userId"
}
},
{
"bool": {
"should": [
{
"fuzzy": {
"lower_name": ["search","Text"]
}
},
{
"fuzzy": {
"album": ["search","Text"]
}
}
]
}
}
]
}
}
}
But this is giving error: [fuzzy] query doesn't support multiple fields
Please help!
Using Elasticsearch 6.3

You can use a multi_match query with fuzziness. Try out the below query
Index Data:
{
"user": "ben",
"lower_name": "def",
"album": "Brenda"
}
{
"user": "ben",
"lower_name": "abc",
"album": "Brenda"
},
{
"user": "ben",
"lower_name": "fgh",
"album": "honda"
}
Search Query:
{
"query": {
"bool": {
"must": [
{
"term": {
"user": "ben"
}
},
{
"bool": {
"should": [
{
"multi_match": {
"query": "abc dey",
"fields": [
"lower_name"
],
"fuzziness": "auto"
}
},
{
"multi_match": {
"query": "brenda",
"fields": [
"album"
],
"fuzziness": "auto"
}
}
]
}
}
]
}
}
}
Search Result:
"hits": [
{
"_index": "66311552",
"_type": "_doc",
"_id": "2",
"_score": 0.7497801,
"_source": {
"user": "ben",
"lower_name": "def",
"album": "Brenda"
}
},
{
"_index": "66311552",
"_type": "_doc",
"_id": "1",
"_score": 0.7497801,
"_source": {
"user": "ben",
"lower_name": "abc",
"album": "Brenda"
}
}
]

You can easily use the "fuzziness": "AUTO". param in your search query. Refer fuzziness in match query official example

Related

How to do elasticsearch aggregation together with sort and find duplicate values

I want to find duplicate values and if there are duplicate values then I sort based on the last update, so what I take is the newest one, how do I do aggregations? I've tried this aggregation.
I've tried adding sort to sources but it still doesn't work, I've tried several ways but it still fails sometimes it comes out 1 but only old data, sometimes the order is correct from the newest but appears 2 data
{
"size": 0,
"query": {
"bool": {
"must": [
{
"match": {
"BILLING_TYPE_CD": "Service Bundle"
}
},
{
"match": {
"ID": "xxxx"
}
},
{
"exists": {
"field": "LI_MILESTONE"
}
},
{
"exists": {
"field": "LI_SID"
}
},
{
"query_string": {
"default_field": "LI_SID",
"query": "*xxxx*"
}
}
],
"must_not": {
"bool": {
"must": [
{
"query_string": {
"default_field": "LI_PRODUCT_NAME",
"query": "*Network*"
}
},
{
"terms": {
"LI_MILESTONE.keyword": [
"Abandoned",
"Cancelled"
]
}
},
{
"terms": {
"ORDER_STATUS.keyword": [
"Abandoned",
"Cancelled",
"Drop In Progress"
]
}
},
{
"term": {
"STATUS.keyword": ""
}
}
]
}
}
}
},
"sort": [
{
"TGL_CREATED": {
"order": "desc"
}
}
],
"aggs": {
"list_products": {
"composite": {
"size": 50000,
"sources": [
{
"LI_SID": {
"terms": {
"field": "LI_SID.keyword",
"order": "desc"
}
}
}
]
},
"aggs": {
"totalService": {
"terms": {
"field": "LI_SID.keyword",
"size": 50000,
"order": {
"_term": "asc"
}
}
},
"bucket_sort": {
"bucket_sort": {
"from": 0,
"size": 10
}
},
"includes_source": {
"top_hits": {
"size": 1,
"_source": {
"includes": [
"LAST_UPDATE",
"xxxxx",
"xxxxx",
"xxxxx",
"xxx"
]
}
}
}
}
},
"term_product": {
"terms": {
"field": "LI_SID.keyword",
"size": 50000
}
}
}
}
Like this ?
{
"aggs": {
"LI_SID": {
"terms": {
"field": "LI_SID.keyword",
"size": 10
},
"aggs": {
"hit": {
"top_hits": {
"size": 1,
"sort": [
{
"LAST_UPDATE": "desc"
}
]
}
}
}
}
},
"size": 0
}
You need to use aggregations response not hits

Elasticsearch sorting not working properly based on time

I have 20 documents and i'm performing aggregation based on reportid. I need top 10 aggregation based on time in descending. But the response is very random. What am i missing? I'm using elasticsearch 6.2.2 and node.js 4.5. Below here is the body search query for elasticsearch request.
{
"size": 0,
"sort": [
{
"triggerDate":
{
"order": "desc"
}
}],
"query":
{
"bool":
{
"must": [
{
"query_string":
{
"query": "*",
"analyze_wildcard": true
}
},
{
"range":
{
"triggerDate":
{
"gte": fromTime,
"lte": toTime
}
}
}
],
"must_not": [
{
"query_string":
{
"query": "reportId.keyword:\"\"",
"analyze_wildcard": true
}
}]
}
},
"_source":
{
"excludes": []
},
"aggs":
{
"reportid":
{
"terms":
{
"field": "reportId.keyword",
"size": 10
}
}
}
I think what you need to do is aggregate on reportId.keyword and sort aggregation by date.
So here is the solution
{
"size": 0,
"query": {
"bool": {
"must": [
{
"query_string": {
"query": "*",
"analyze_wildcard": true
}
},
{
"range": {
"triggerDate": {
"gte": fromTime,
"lte": toTime
}
}
}
],
"must_not": [
{
"query_string": {
"query": "reportId.keyword:\"\"",
"analyze_wildcard": true
}
}
]
}
},
"_source": {
"excludes": []
},
"aggs": {
"reportid": {
"terms": {
"field": "reportId.keyword",
"size": 10,
"order": {
"2-orderAgg": "desc"
}
},
"aggs": {
"2-orderAgg": {
"max": {
"field": "triggerDate"
}
}
}
}
}
}
You need to sort the aggregation results by a custom aggregation and not the query results.

Elastic Search: Matching fields in different nested objects

I am new to Elastic Search and this is my user index:
{
"user": {
"properties": {
"branches": {
"type": "nested"
},
"lists": {
"type": "nested"
},
"events": {
"type": "nested"
},
"optOuts": {
"type": "nested"
}
}
}
}
Here, branches, events and lists will contain the field id(int),countryIso(String)..
I need to find users having emails who belong to countryIso 'XX' for example.
{
"query": {
"bool": {
"must": [
{
"exists": {
"field": "email"
}
},
{
"match": {
"prog_id": 3
}
},
{
"nested": {
"path": [
"branches"
],
"query": {
"query_string": {
"fields": [
"branches.countryIso"
],
"query": "AE KW"
}
}
}
}
]
}
}
}
This way I can get them if they have that country in the branches object. What I want is that the countryIso is there in the branches or lists or events.
Note: any of these might be empty i.e. branches may not be there or lists miht not be there etc. Or lists might be there with no countryIso..
I tried this:
{
"query": {
"bool": {
"must": [
{
"exists": {
"field": "email"
}
},
{
"match": {
"prog_id": 3
}
},
{
"nested": {
"path": [
"branches"
],
"query": {
"query_string": {
"fields": [
"branches.countryIso"
],
"query": "AE KW"
}
}
}
},
{
"nested": {
"path": [
"lists"
],
"query": {
"query_string": {
"fields": [
"lists.countryIso"
],
"query": "AE KW"
}
}
}
}
]
}
}
}
AND
{
"query": {
"bool": {
"must": [
{
"exists": {
"field": "email"
}
},
{
"match": {
"prog_id": 3
}
},
{
"nested": {
"path": [
"branches",
"lists"
],
"query": {
"query_string": {
"fields": [
"branches.countryIso",
"lists.countryIso"
],
"query": "AE KW"
}
}
}
}
]
}
}
}
But neither works.

Elasticsearch match with stemming

How do I do a search for a stemmed match?
I.e. at the moment I have many documents that contain the word "skateboard" in the item_title field, but only 3 documents that contain the word "skateboards". Because of this, when I do the following search:
POST /my_index/my_type/_search
{
"size": 100,
"query" : {
"multi_match": {
"query": "skateboards",
"fields": [ "item_title^3" ]
}
}
}
I only get 3 results. However, I would like also documents with the word "skateboard" to be returned.
From what I understand from Elasticsearch I would expect that this is done by specifying a mapping on the item_title field that contains an analyser which indexes the stemmed version of each word, but I can't seem to find the documentation on how to do this, which suggests that it's done in a different way.
Suggestions?
Here's one example:
PUT /stem
{
"settings": {
"analysis": {
"filter": {
"filter_stemmer": {
"type": "stemmer",
"language": "english"
}
},
"analyzer": {
"tags_analyzer": {
"type": "custom",
"filter": [
"standard",
"lowercase",
"filter_stemmer"
],
"tokenizer": "standard"
}
}
}
},
"mappings": {
"test": {
"properties": {
"item_title": {
"analyzer": "tags_analyzer",
"type": "text"
}
}
}
}
}
Index some sample docs:
POST /stem/test/1
{
"item_title": "skateboards"
}
POST /stem/test/2
{
"item_title": "skateboard"
}
POST /stem/test/3
{
"item_title": "skate"
}
Perform the query:
GET /stem/test/_search
{
"query": {
"multi_match": {
"query": "skateboards",
"fields": [
"item_title^3"
]
}
},
"fielddata_fields": [
"item_title"
]
}
And see the results:
"hits": [
{
"_index": "stem",
"_type": "test",
"_id": "1",
"_score": 1,
"_source": {
"item_title": "skateboards"
},
"fields": {
"item_title": [
"skateboard"
]
}
},
{
"_index": "stem",
"_type": "test",
"_id": "2",
"_score": 1,
"_source": {
"item_title": "skateboard"
},
"fields": {
"item_title": [
"skateboard"
]
}
}
]
I have added, also, the fielddata_fields element so that you can see how the content of the field has been indexed. As you can see, in both cases, the indexed term is skateboard.

Elasticsearch use custom analyzer on filter

I was asking on elasticsearch nested filter return empty result about some error I have in the query and wont getting any results, but in the answer I was pointed out that the expression I use for the filter wasn't analyzed as I expect.
I have a custom analyzer to do the work how can I specify in the next query to the filter to use this custom analyzer:
GET /develop/_search?search_type=dfs_query_then_fetch
{
"query": {
"filtered" : {
"query": {
"bool": {
"must": [
{ "match": { "title": "post" }}
]
}
},
"filter": {
"bool": {
"must": [
{"term": {
"featured": 0
}},
{
"nested": {
"path": "seller",
"filter": {
"bool": {
"must": [
{ "term": { "seller.firstName": "Test 3" } }
]
}
},
"_cache" : true
}}
]
}
}
}
},
"sort": [
{
"_score":{
"order": "desc"
}
},{
"created": {
"order": "desc"
}
}
],
"track_scores": true
}
Here is a setup that seems to do what you want. I used the same basic code as the last answer, but used index_analyzer and search_analyzer in the index definition as follows:
curl -XDELETE "http://localhost:9200/my_index"
curl -XPUT "http://localhost:9200/my_index" -d'
{
"settings": {
"number_of_shards": 1,
"number_of_replicas": 0,
"analysis": {
"filter": {
"snowball": { "type": "snowball", "language": "English" },
"english_stemmer": { "type": "stemmer", "language": "english" },
"english_possessive_stemmer": { "type": "stemmer", "language": "possessive_english" },
"stopwords": { "type": "stop", "stopwords": [ "_english_" ] },
"worddelimiter": { "type": "word_delimiter" }
},
"tokenizer": {
"nGram": { "type": "nGram", "min_gram": 3, "max_gram": 20 }
},
"analyzer": {
"custom_analyzer": {
"type": "custom",
"tokenizer": "nGram",
"filter": [
"stopwords",
"asciifolding",
"lowercase",
"snowball",
"english_stemmer",
"english_possessive_stemmer",
"worddelimiter"
]
},
"custom_search_analyzer": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"stopwords",
"asciifolding",
"lowercase",
"snowball",
"english_stemmer",
"english_possessive_stemmer",
"worddelimiter"
]
}
}
}
},
"mappings": {
"posts": {
"properties": {
"title": {
"type": "string",
"analyzer": "custom_analyzer",
"boost": 5
},
"seller": {
"type": "nested",
"properties": {
"firstName": {
"type": "string",
"index_analyzer": "custom_analyzer",
"search_analyzer": "custom_search_analyzer",
"boost": 3
}
}
}
}
}
}
}'
Then added the test docs
curl -XPUT "http://localhost:9200/my_index/posts/1" -d'
{"title": "post", "seller": {"firstName":"Test 1"}}'
curl -XPUT "http://localhost:9200/my_index/posts/2" -d'
{"title": "post", "seller": {"firstName":"Test 2"}}'
curl -XPUT "http://localhost:9200/my_index/posts/3" -d'
{"title": "post", "seller": {"firstName":"Test 3"}}'
And then a couple of match queries in a bool, where one is a multiword query, seems to accomplish what you are wanting:
curl -XPOST "http://localhost:9200/my_index/_search" -d'
{
"query": {
"bool": {
"must": [
{
"match": {
"title": "post"
}
},
{
"nested": {
"path": "seller",
"query": {
"match": {
"seller.firstName": {
"query": "Test 3",
"operator": "and"
}
}
}
}
}
]
}
}
}'
...
{
"took": 5,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 6.8380365,
"hits": [
{
"_index": "my_index",
"_type": "posts",
"_id": "3",
"_score": 6.8380365,
"_source": {
"title": "post",
"seller": {
"firstName": "Test 3"
}
}
}
]
}
}
Here is the code I used:
http://sense.qbox.io/gist/8cd954aa60be8c44f64e4282e15e6b565c945ecb
Does that solve your problem?

Resources