I can't find any documentation on what happens if Elastic Bulk API fails on one or more of the actions. For example, for the following request, let's say there is already a document with id "3", so "create" should fail- does this fail all of the other actions?
{ "index" : { "_index" : "test", "_type" : "type1", "_id" : "1" } }
{ "field1" : "value1" }
{ "delete" : { "_index" : "test", "_type" : "type1", "_id" : "2" } }
{ "create" : { "_index" : "test", "_type" : "type1", "_id" : "3" } }
{ "field1" : "value3" }
{ "update" : {"_id" : "1", "_type" : "type1", "_index" : "index1"} }
{ "doc" : {"field2" : "value2"} }
I'm using nodejs elastic module.
No failures in one action does not affect the others .
From the documentation of elasticsearch bulk api :
The response to a bulk action is a large JSON structure with the
individual results of each action that was performed. The failure of a
single action does not affect the remaining actions.
In the response from elasticsearch client there is status in response corresponding to each action to determine if it was a failure or not
Example:
client.bulk({
body: [
// action description
{ index: { _index: 'test', _type: 'test', _id: 1 } },
// the document to index
{ title: 'foo' },
// action description
{ update: { _index: 'test', _type: 'test', _id: 332 } },
// the document to update
{ doc: { title: 'foo' } },
// action description
{ delete: { _index: 'test', _type: 'test', _id: 33 } },
// no document needed for this delete
]
}, function (err, resp) {
if(resp.errors) {
console.log(JSON.stringify(resp, null, '\t'));
}
});
Response:
{
"took": 13,
"errors": true,
"items": [
{
"index": {
"_index": "test",
"_type": "test",
"_id": "1",
"_version": 20,
"_shards": {
"total": 2,
"successful": 1,
"failed": 0
},
"status": 200
}
},
{
"update": {
"_index": "test",
"_type": "test",
"_id": "332",
"status": 404,
"error": {
"type": "document_missing_exception",
"reason": "[test][332]: document missing",
"shard": "-1",
"index": "test"
}
}
},
{
"delete": {
"_index": "test",
"_type": "test",
"_id": "33",
"_version": 2,
"_shards": {
"total": 2,
"successful": 1,
"failed": 0
},
"status": 404,
"found": false
}
}
]
}
Related
I have a document contain title with "Hard work & Success". I need to do a search for this document. And if I typed "Hardwork" (without spacing) it didn't returning any value. but if I typed "hard work" then it is returning the document.
this is the query I have used :
const search = qObject.search;
const payload = {
from: skip,
size: limit,
_source: [
"id",
"title",
"thumbnailUrl",
"youtubeUrl",
"speaker",
"standards",
"topics",
"schoolDetails",
"uploadTime",
"schoolName",
"description",
"studentDetails",
"studentId"
],
query: {
bool: {
must: {
multi_match: {
fields: [
"title^2",
"standards.standard^2",
"speaker^2",
"schoolDetails.schoolName^2",
"hashtags^2",
"topics.topic^2",
"studentDetails.studentName^2",
],
query: search,
fuzziness: "AUTO",
},
},
},
},
};
if I searched for title "hard work" (included space)
then it returns data like this:
"searchResults": [
{
"_id": "92",
"_score": 19.04531,
"_source": {
"standards": {
"standard": "3",
"categoryType": "STANDARD",
"categoryId": "S3"
},
"schoolDetails": {
"categoryType": "SCHOOL",
"schoolId": "TPS123",
"schoolType": "PUBLIC",
"logo": "91748922mn8bo9krcx71.png",
"schoolName": "Carmel CMI Public School"
},
"studentDetails": {
"studentId": 270,
"studentDp": "164646972124244.jpg",
"studentName": "Nelvin",
"about": "good student"
},
"topics": {
"categoryType": "TOPIC",
"topic": "Motivation",
"categoryId": "MY"
},
"youtubeUrl": "https://www.youtube.com/watch?v=wermQ",
"speaker": "Anna Maria Siby",
"description": "How hardwork leads to success - motivational talk by Anna",
"id": 92,
"uploadTime": "2022-03-17T10:59:59.400Z",
"title": "Hard work & Success",
}
},
]
And if i search for the Keyword "Hardwork" (without spacing) it won't detecting this data. I need to make a space in it or I need to match related datas with the searching keyword. Is there any solution for this can you please help me out of this.
I made an example using a shingle analyzer.
Mapping:
{
"settings": {
"analysis": {
"filter": {
"shingle_filter": {
"type": "shingle",
"max_shingle_size": 4,
"min_shingle_size": 2,
"output_unigrams": "true",
"token_separator": ""
}
},
"analyzer": {
"shingle_analyzer": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"lowercase",
"shingle_filter"
]
}
}
}
},
"mappings": {
"properties": {
"title": {
"type": "text",
"analyzer": "shingle_analyzer"
}
}
}
}
Now I tested it with your term. Note that the token "hardwork" was generated but the others were also generated which may be a problem for you.
GET idx-separator-words/_analyze
{
"analyzer": "shingle_analyzer",
"text": ["Hard work & Success"]
}
Results:
{
"tokens" : [
{
"token" : "hard",
"start_offset" : 0,
"end_offset" : 4,
"type" : "<ALPHANUM>",
"position" : 0
},
{
"token" : "hardwork",
"start_offset" : 0,
"end_offset" : 9,
"type" : "shingle",
"position" : 0,
"positionLength" : 2
},
{
"token" : "hardworksuccess",
"start_offset" : 0,
"end_offset" : 19,
"type" : "shingle",
"position" : 0,
"positionLength" : 3
},
{
"token" : "work",
"start_offset" : 5,
"end_offset" : 9,
"type" : "<ALPHANUM>",
"position" : 1
},
{
"token" : "worksuccess",
"start_offset" : 5,
"end_offset" : 19,
"type" : "shingle",
"position" : 1,
"positionLength" : 2
},
{
"token" : "success",
"start_offset" : 12,
"end_offset" : 19,
"type" : "<ALPHANUM>",
"position" : 2
}
]
}
Successfully I found how to implement adding an element to nested property list, but currently I'm struggling with Elasticsearch mechanisms of updating document properties. I'm trying to update using Axios as my main library of wrapping of HTTP client. An example of saved documents in Elasticsearch:
{
"took": 4,
"timed_out": false,
"_shards": { "total": 1, "successful": 1, "skipped": 0, "failed": 0 },
"hits": {
"total": { "value": 2, "relation": "eq" },
"max_score": 1.0,
"hits": [
{
"_index": "my-index-01",
"_type": "_doc",
"_id": "-8_MhnsBRfQBiuhMDxjT",
"_score": 1.0,
"_source": {
"code": "ABC",
"description": "Test",
"items": [{ "key": "XYZ", "value": "123" }]
}
},
{ ... },
{ ... },
{ ... }
]
}
}
My main goal is to filter from _source element by code field and get from items table element by key and then change the value.
I tried with this code but the value is not changing and I'm getting en 400 response from my docker Elasticsearch container:
const input = {
key: "XYZ",
value: "1234"
}
this.axios.post(
`/my-index-01/_update_by_query`,
{
script: {
lang: 'painless',
source: `if (ctx._source.items['items.key'].value.key == params.key) {
doc['items.key'].value.value == params.value;
} `,
params: input
},
query: {
match: {
code: "ABC",
},
}
},
{
headers: {
"Content-Type": "application/json",
},
}
);
I found a solution to this problem. I want to share it here as answer, maybe someday someone will struggle with the same issue:
this.axios.post(
`/my-index-01/_update_by_query`,
{
script: {
lang: 'painless',
source: `for (def item : ctx._source.items) {
if (item.key == params.key) {
item.value = params.value;
}
} `,
params: input
},
query: {
match: {
code: "ABC",
},
}
},
{
headers: {
"Content-Type": "application/json",
},
}
);
I have a name field value as "abc_name" so when I search "abc_" I am getting proper results but when I search "abc_##£&-#&" still I am getting same results. I want my query to ignore this special characters that doesn't matches with my query.
My query has:
Multi_match
type as cross_fields
operator AND
I am using search_analyzer standard for my Fields
And I want this structure as it is otherwise it will affect my other Search behaviour
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"analyzer": "autocomplete",
"search_analyzer": "standard"
}
Please see the below sample which would fit your use case where I've created a custom analyzer which would fit your use case:
Sample Mapping:
PUT some_test_index
{
"settings": {
"analysis": {
"analyzer": {
"my_custom_analyzer": {
"type": "custom",
"tokenizer": "custom_tokenizer",
"filter": ["lowercase", "3_5_edge_ngram"]
}
},
"tokenizer": {
"custom_tokenizer": {
"type": "pattern",
"pattern": "\\w+_+[^a-zA-Z\\d\\s_]+|\\s+". <---- Note this pattern
}
},
"filter": {
"3_5_edge_ngram": {
"type": "edge_ngram",
"min_gram": 3,
"max_gram": 5
}
}
}
},
"mappings": {
"properties": {
"my_field":{
"type": "text",
"analyzer": "my_custom_analyzer"
}
}
}
}
The above mentioned pattern would simply ignore the tokens with the format like abc_$%^^##. As a result this token would not be indexed.
Note that the way the analyzer works is:
First executes tokenizer
Then applies the edge_ngram filter on the tokens generated.
You can verify by simply removing the edge_ngram filter in the above mapping to first understand what tokens are getting generated via Analyze API which would be as below:
POST some_test_index/_analyze
{
"analyzer": "my_custom_analyzer",
"text": "abc_name asda efg_!##!## 1213_adav"
}
Tokens generated:
{
"tokens" : [
{
"token" : "abc_name",
"start_offset" : 0,
"end_offset" : 8,
"type" : "word",
"position" : 0
},
{
"token" : "asda",
"start_offset" : 9,
"end_offset" : 13,
"type" : "word",
"position" : 1
},
{
"token" : "1213_adav",
"start_offset" : 25,
"end_offset" : 34,
"type" : "word",
"position" : 2
}
]
}
Note that the token efg_!##!## has been removed.
I've added edge_ngram fitler as you would want the search to be successful if you search with abc_ if your tokens generated via tokenizer is abc_name.
Sample Document:
POST some_test_index/_doc/1
{
"my_field": "abc_name asda efg_!##!## 1213_adav"
}
Query Request:
Use-case 1:
POST some_test_index/_search
{
"query": {
"match": {
"my_field": "abc_"
}
}
}
Use-case-2:
POST some_test_index/_search
{
"query": {
"match": {
"my_field": "efg_!##!##"
}
}
}
Responses:
Response for use-case-1:
{
"took" : 0,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 0.47992462,
"hits" : [
{
"_index" : "some_test_index",
"_type" : "_doc",
"_id" : "1",
"_score" : 0.47992462,
"_source" : {
"my_field" : "abc_name asda efg_!##!## 1213_adav"
}
}
]
}
}
Response for use-case-2:
{
"took" : 0,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 0,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
}
}
Updated Answer:
Create your mapping as follows based on the index I've created and let me know if that works:
PUT some_test_index
{
"settings": {
"analysis": {
"analyzer": {
"my_custom_analyzer": {
"type": "custom",
"tokenizer": "punctuation",
"filter": ["lowercase"]
}
},
"tokenizer": {
"punctuation": {
"type": "pattern",
"pattern": "\\w+_+[^a-zA-Z\\d\\s_]+|\\s+"
}
}
}
},
"mappings": {
"properties": {
"my_field":{
"type": "text",
"analyzer": "autocompete", <----- Assuming you have already this in setting
"search_analyzer": "my_custom_analyzer". <----- Note this
}
}
}
}
Please try and let me know if this works for all your use-cases.
let elasticsearch = require('elasticsearch');
let client = new elasticsearch.Client({
host: "*********************",
});
let temp = await client.update({
index: container,
id: 1,
type: '_doc',
body: {"name": "rks", "visible": true},
doc_as_upsert: true,
})
Response of this query returns
"result": {
"_index": "container",
"_type": "_doc",
"_id": "1",
"_version": 2,
"result": "updated",
"_shards": {
"total": 2,
"successful": 1,
"failed": 0
},
"_seq_no": 1,
"_primary_term": 1
}
How do I return the updated document in the response. Is there any way/method to achieve this goal. Please help me
The best you can do is set "_source": true. This way you'll get back the document source.
For e.g.
POST test/_update/1
{
"doc":{
"hello": "world",
"hey": "there"
},
"_source": true,
"doc_as_upsert": true
}
Sample response:
{
"_index" : "test",
"_type" : "_doc",
"_id" : "1",
"_version" : 2,
"result" : "updated",
"_shards" : {
"total" : 2,
"successful" : 1,
"failed" : 0
},
"_seq_no" : 2,
"_primary_term" : 1,
"get" : {
"_seq_no" : 2,
"_primary_term" : 1,
"found" : true,
"_source" : {
"hello" : "world",
"hey" : "there"
}
}
}
I created an index in elasticsearch 6.5.1 successfully loaded the data to that index. there is one field "submitted_date" which is the timestamp. below is the mapping like of this field.
"submitted_date": { "type": "date", "format":"yyyy-MM-dd HH:mm:ss.SSS" },
then I created the index pattern. I used the Time Filter field name as "submitted_date". after that, I tried to check the data in Discover tab, but data are not showing. there is a message saying that No results match your search criteria.
NOTE that I have changed the time in time range picker in every possible way which is on top of the right corner in kibana dashboard.
data appear in Dev Tools tab with elastic queries.
ps : I inserted the data using nodejs with elasticsearch official library, did not used logstash.
I followed this article, but it did not help me.
UPDATE : sample document
{
"took" : 0,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 10480,
"max_score" : 1.0,
"hits" : [
{
"_index" : "test",
"_type" : "tests",
"_id" : "1214334",
"_score" : 1.0,
"_source" : {
"priority" : "4",
"submitted_date" : "2018-01-04T18:32:21.000Z",
"submitted_month" : 0,
"submitted_month_name" : "January",
"submitted_day" : 4,
"submitted_weekday" : "Tuesday",
"submitted_hour" : 18,
"submitted_year_month" : "2018-0",
"submitted_year_month_name" : "2018-January",
"date_key" : "20180104",
"year_month_key" : "201801",
"status" : "Closed"
}
}
]
}
}
Inspect request
{
"version": true,
"size": 500,
"sort": [
{
"_score": {
"order": "desc"
}
}
],
"_source": {
"excludes": []
},
"aggs": {
"2": {
"date_histogram": {
"field": "submitted_date",
"interval": "1d",
"time_zone": "Asia/Kolkata",
"min_doc_count": 1
}
}
},
"stored_fields": [
"*"
],
"script_fields": {},
"docvalue_fields": [
{
"field": "close_date",
"format": "date_time"
},
{
"field": "last_modified_date",
"format": "date_time"
},
{
"field": "last_resolved_date",
"format": "date_time"
},
{
"field": "submitted_date",
"format": "date_time"
},
{
"field": "time_to_resolve",
"format": "date_time"
}
],
"query": {
"bool": {
"must": [
{
"match_all": {}
},
{
"range": {
"submitted_date": {
"gte": 1514745000000,
"lte": 1543937620414,
"format": "epoch_millis"
}
}
}
],
"filter": [],
"should": [],
"must_not": []
}
},
"highlight": {
"pre_tags": [
"#kibana-highlighted-field#"
],
"post_tags": [
"#/kibana-highlighted-field#"
],
"fields": {
"*": {}
},
"fragment_size": 2147483647
}
}
Index pattern
function _putMapping() {
return client.indices.create({
index: process.env.ELASTICSEARCH_INDEX,
body: {
settings:{
index:{
"number_of_shards": 1,
"number_of_replicas": 5
},
"index.mapping.ignore_malformed" : true
},
mappings:{
tests:{
properties:{
"last_modified_date": { "type": "date" },
"last_resolved_date": { "type": "date" },
"time_to_resolve": { "type": "date" },
"submitted_date": { "type": "date", "format":"yyyy-MM-dd HH:mm:ss.SSS" },
"date_key": { "type": "integer" },
"priority": { "type": "long" },
"submitted_hour": { "type": "long" },
"submitted_month": { "type": "long" },
"submitted_year": { "type": "long" },
"submitted_year": { "type": "keyword" },
"submitted_year_month": { "type": "keyword" },
"submitted_year_month_name": { "type": "keyword" },
}
}
}
}
});
}
Your mYour submitted_date is coming like 2018-01-04T18:32:21.000Z but your mapping is set as yyyy-MM-dd HH:mm:ss.SSS.
You need to change it to "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'".