mapper_parsing_exception in new elasticsearch 2.1.1 version

mapper_parsing_exception in new elasticsearch 2.1.1 version - node.js

Problem : I have created mapping and its working fine in elasticsearch
1.7.1 but after updating to 2.1.1 it will give me exception
EXCEPTION
response: '{"error":{"root_cause":[{"type":"mapper_parsing_exception","reason"
:"analyzer on field [_all] must be set when search_analyzer is set"}],"type":"ma
pper_parsing_exception","reason":"Failed to parse mapping [movie]: analyzer on f
ield [_all] must be set when search_analyzer is set","caused_by":{"type":"mapper
_parsing_exception","reason":"analyzer on field [_all] must be set when search_a
nalyzer is set"}},"status":400}',
toString: [Function],
toJSON: [Function] }
{
"settings": {
"number_of_shards": 1,
"number_of_replicas": 0,
"analysis": {
"filter": {
"nGram_filter": {
"type": "nGram",
"min_gram": 2,
"max_gram": 20,
"token_chars": [
"letter",
"digit",
"punctuation",
"symbol"
]
}
},
"analyzer": {
"nGram_analyzer": {
"type": "custom",
"tokenizer": "whitespace",
"filter": [
"lowercase",
"asciifolding",
"nGram_filter"
]
},
"whitespace_analyzer": {
"type": "custom",
"tokenizer": "whitespace",
"filter": [
"lowercase",
"asciifolding"
]
}
}
}
},
"mappings": {
"movie": {
"_all": {
"index_analyzer": "nGram_analyzer",
"search_analyzer": "whitespace_analyzer"
},
"properties": {
"movieName": {
"type": "string",
"index": "not_analyzed"
},
"movieYear": {
"type": "double"
},
"imageUrl": {
"type": "string"
},
"genre": {
"type": "string"
},
"director": {
"type": "string"
},
"producer": {
"type": "string"
},
"cast": {
"type": "String"
},
"writer": {
"type": "string"
},
"synopsis": {
"type": "string"
},
"rating": {
"type": "double"
},
"price": {
"type": "double"
},
"format": {
"type": "string"
},
"offer": {
"type": "double"
},
"offerString": {
"type": "string"
},
"language": {
"type": "string"
}
}
}
}
}

The error is quite clear if you ask me, you need to specify analyzer for _all in your movie mapping. Setting index_analyzer was removed in Elasticsearch 2.0.
"_all": {
"analyzer": "nGram_analyzer",
"search_analyzer": "whitespace_analyzer"
},

Related

convert json schema to avro schema in python

I want to convert json schema to avro schema using python because I'm building my microservice in Python Fastapi.
json-schema
{
"type":"object",
"properties":{
"IsNonPO":{
"title":"IsNonPO",
"type":[
"boolean",
"null"
],
"precision":null,
"scale":null,
"size":null,
"allowedValues":null
},
"ApprovedState":{
"title":"ApprovedState",
"type":[
"number",
"null"
],
"precision":null,
"scale":null,
"size":null,
"allowedValues":[
{
"key":"8",
"value":"Invalid"
},
{
"key":"1",
"value":"Composing"
},
{
"key":"2",
"value":"Submitted"
},
{
"key":"4",
"value":"Approved"
},
{
"key":"16",
"value":"Denied"
}
]
},
"CreateDate":{
"title":"CreateDate",
"type":[
"string",
"null"
],
"precision":null,
"scale":null,
"size":null,
"allowedValues":null,
"format":"date-time"
},
"RemitToAddress": {
"type": ["object", "null"],
"properties": {
"State": {
"title": "RemitToAddress.State",
"type": ["string", "null"],
"precision": null,
"scale": null,
"size": 50,
"allowedValues": null
},
"Phone": {
"title": "RemitToAddress.Phone",
"type": ["string", "null"],
"precision": null,
"scale": null,
"size": 70,
"allowedValues": null
},
"Country": {
"type": ["object", "null"],
"properties": {
"UniqueName": {
"title": "RemitToAddress.Country.UniqueName",
"type": ["string", "null"],
"precision": null,
"scale": null,
"size": 50,
"allowedValues": null
}
}
},
"PostalCode": {
"title": "RemitToAddress.PostalCode",
"type": ["string", "null"],
"precision": null,
"scale": null,
"size": 50,
"allowedValues": null
},
"City": {
"title": "RemitToAddress.City",
"type": ["string", "null"],
"precision": null,
"scale": null,
"size": 50,
"allowedValues": null
},
"Fax": {
"title": "RemitToAddress.Fax",
"type": ["string", "null"],
"precision": null,
"scale": null,
"size": 70,
"allowedValues": null
},
"UniqueName": {
"title": "RemitToAddress.UniqueName",
"type": ["string", "null"],
"precision": null,
"scale": null,
"size": 50,
"allowedValues": null
},
"Lines": {
"title": "RemitToAddress.Lines",
"type": ["string", "null"],
"precision": null,
"scale": null,
"size": 1024,
"allowedValues": null
},
"Name": {
"title": "RemitToAddress.Name",
"type": ["string", "null"],
"precision": null,
"scale": null,
"size": 128,
"allowedValues": null
}
}
}
}
}
Avro schema
{
"type":"record",
"name":"invoice",
"namespace":"com.xyz.com",
"fields":[
{
"name":"IsNonPO",
"type":[
"null",
"boolean"
]
},
{
"name":"ApprovedState",
"type":[
"null",
"long"
]
},
{
"name":"CreateDate",
"type":[
"null",
{
"type":"string",
"logicalType":"timestamp-micros"
}
]
},
{
"name":"RemitToAddress",
"type":[
{
"type":"record",
"name":"RemitToAddress",
"namespace":"com.xyz.com.invoice",
"fields":[
{
"name":"City",
"type":[
"null",
"string"
]
},
{
"name":"Country",
"type":[
{
"type":"record",
"name":"Country",
"namespace":"com.xyz.com.invoice.RemitToAddress",
"fields":[
{
"name":"UniqueName",
"type":[
"null",
"string"
]
}
]
},
"null"
]
},
{
"name":"Fax",
"type":[
"null",
"string"
]
},
{
"name":"Lines",
"type":[
"null",
"string"
]
},
{
"name":"Name",
"type":[
"null",
"string"
]
},
{
"name":"Phone",
"type":[
"null",
"string"
]
},
{
"name":"PostalCode",
"type":[
"null",
"string"
]
},
{
"name":"State",
"type":[
"null",
"string"
]
},
{
"name":"UniqueName",
"type":[
"null",
"string"
]
}
]
},
"null"
]
}
]
}
I tried to find the converter in python but I can't. I found a converter but does it in java. Please let me know if there are any Python converter exists or do I write of my own library?

How to generate a JSON Schema?

I would like to generate a schema from a json object.
var GenerateSchema = require('generate-schema')
var schema = GenerateSchema.json(request.body);
Request.Body
{
"type": "FeatureCollection",
"features": [
{
"type": "Feature",
"geometry": {
"type": "Point",
"coordinates": [
102,
0.5
]
},
"properties": {
"prop0": "value0"
}
},
{
"type": "Feature",
"geometry": {
"type": "LineString",
"coordinates": [
[
102,
0
],
[
103,
1
],
[
104,
0
],
[
105,
1
]
]
},
"properties": {
"prop0": "value0",
"prop1": 0
}
},
{
"type": "Feature",
"geometry": {
"type": "Polygon",
"coordinates": [
[
[
100,
0
],
[
101,
0
],
[
101,
1
],
[
100,
1
],
[
100,
0
]
]
]
},
"properties": {
"prop0": "value0",
"prop1": {
"this": "that"
}
}
}
]
}
Schema generated from the request.body
{
"$id": "http://json-schema.org/draft-04/schema#",
"$schema": "http://json-schema.org/draft-04/schema#",
"title": "Product",
"type": "object",
"properties": {
"type": {
"type": "string"
},
"features": {
"type": "array",
"items": {
"type": "object",
"properties": {
"type": {
"type": "string"
},
"geometry": {
"type": "object",
"properties": {
"type": {
"type": "string"
},
"coordinates": {
"type": "array",
"items": {
"oneOf": [
{
"type": "number"
},
{
"type": "number"
},
{
"type": "number"
},
{
"type": "number"
},
{
"type": "number"
}
],
"type": "array"
}
}
}
},
"properties": {
"type": "object",
"properties": {
"prop0": {
"type": "string"
},
"prop1": {
"type": "object",
"properties": {
"this": {
"type": "string"
}
}
}
}
}
},
"required": [
"type",
"geometry",
"properties"
]
}
}
}
}
Schema validation with Ajv
[
{
keyword: 'type',
dataPath: '.features[0].geometry.coordinates[0]',
schemaPath: '#/properties/features/items/properties/geometry/properties/coordinates/items/type',
params: { type: 'array' },
message: 'should be array'
}
]
Why Ajv detects an issue ?

Assuming that you want coordinates to be either an array of numbers or an array of arrays of numbers then this schema doesn't look right to me:
"coordinates": {
"type": "array",
"items": {
"oneOf": [
{
"type": "number"
},
{
"type": "number"
},
{
"type": "number"
},
{
"type": "number"
},
{
"type": "number"
}
],
"type": "array"
}
}
You would typically use oneOf to choose between different schemas but the oneOf here says:
it can be a number
or a number
or a number
or a number
or a number
But then ultimately it doesn't matter because you have a {"type": "array"} in the end that seems to override everything which would explain why it fails for {"coordinates": [102, 0.5]} because that is not an array of arrays.
It seems that what you're looking for is more along the lines of: (untested)
"coordinates": {
"type": "array",
"items": {
"oneOf": [
{ "type": "number" },
{ "type": "array", "items": { "type": "number"} }
]
}
}
Which reads:
items in a coordinates array can be either "numbers" or "arrays of numbers".
I think that your generate-schema package got this wrong here.

I found my answer with QuickType library

Kibana: Search within text for string

I have A log message in Kibana that contains this:
org.hibernate.exception.GenericJDBCException: Cannot open connection
at org.springframework.orm.hibernate3.HibernateTransactionManager.doBegin(HibernateTransactionManager.java:597)
Actual search that isn't returning results: log_message: "hibernate3"
If I search for "hibernate3" this message will not appear. I am using an Elasticsearch template and have indexed the field, but also want to be able to do case-insensitive full-text searching. Is this possible?
Template that is in use:
{
"template": "filebeat-*",
"mappings": {
"mainProgram": {
"properties": {
"#timestamp": {
"type": "date",
"format": "strict_date_optional_time||epoch_millis"
},
"#version": {
"type": "text"
},
"beat": {
"properties": {
"hostname": {
"type": "text"
},
"name": {
"type": "text"
}
}
},
"class_method": {
"type": "text",
"fielddata": "true",
"index": "true"
},
"class_name": {
"type": "text",
"fielddata": "true"
},
"clientip": {
"type": "ip",
"index": "not_analyzed"
},
"count": {
"type": "long"
},
"host": {
"type": "text",
"index": "not_analyzed"
},
"input_type": {
"type": "text",
"index": "not_analyzed"
},
"log_level": {
"type": "text",
"fielddata": "true",
"index": "true"
},
"log_message": {
"type": "text",
"index": "true"
},
"log_timestamp": {
"type": "text"
},
"log_ts": {
"type": "long",
"index": "not_analyzed"
},
"message": {
"type": "text"
},
"offset": {
"type": "long",
"index": "not_analyzed"
},
"query_params": {
"type": "text",
"index": "true"
},
"sessionid": {
"type": "text",
"index": "true"
},
"source": {
"type": "text",
"index": "not_analyzed"
},
"tags": {
"type": "text"
},
"thread": {
"type": "text",
"index": "true"
},
"type": {
"type": "text"
},
"user_account_combo": {
"type": "text",
"index": "true"
},
"version": {
"type": "text"
}
}
},
"access": {
"properties": {
"#timestamp": {
"type": "date",
"format": "strict_date_optional_time||epoch_millis"
},
"#version": {
"type": "text"
},
"beat": {
"properties": {
"hostname": {
"type": "text"
},
"name": {
"type": "text"
}
}
},
"clientip": {
"type": "ip",
"index": "not_analyzed"
},
"count": {
"type": "long",
"index": "not_analyzed"
},
"host": {
"type": "text",
"index": "true"
},
"input_type": {
"type": "text",
"index": "not_analyzed"
},
"log_timestamp": {
"type": "text"
},
"log_ts": {
"type": "long",
"index": "not_analyzed"
},
"message": {
"type": "text"
},
"offset": {
"type": "long",
"index": "not_analyzed"
},
"query_params": {
"type": "text",
"index": "true"
},
"response_time": {
"type": "long"
},
"sessionid": {
"type": "text",
"index": "true"
},
"source": {
"type": "text",
"index": "not_analyzed"
},
"statuscode": {
"type": "long"
},
"tags": {
"type": "text"
},
"thread": {
"type": "text",
"index": "true"
},
"type": {
"type": "text",
"index": "true"
},
"uripath": {
"type": "text",
"index": "true"
},
"user_account_combo": {
"type": "text",
"index": "true"
},
"verb": {
"type": "text",
"index": "true"
}
}
}
}
}

message: *.hibernate3.*
also works (please note, that no quotes are needed for that)

According to your scenario, what you're looking for is an analyzed type string which would first analyze the string and then index it. A quote from the doc.
In other words, index this field as full text.
Thus make sure that, you have your mapping of the necessary fields properly so that you'll be able to do a full-text search on the docs.
Assuming that, in Kibana if the log line is under the field message, you could simply search for the word by:
message:"hibernate3"
You might also want to refer this, to identify the variance between Term Based and Full-Text.
EDIT
Have the mapping of the field log_message as such:
"log_message": {
"type": "string", <- to make it analyzed
"index": "true"
}
Also try doing a wildcard search as such:
{"wildcard":{"log_message":"*.hibernate3.*"}}

With Kibana 6.4.1 I used the "%" as wildcard.
message: %hibernate3%

For me it was because I was using the ".keyword".
My key was called "message" and I had "message" and "message.keyword" available.
Full text search isn't working on ".keyword".
Not working :
message.keyword : hello
Working :
message : hello

No result returned from the nest c# elasticsearch query

I am indexing an attachment field. The POST query in sense returns expected resultset.
My query is
POST /mydocs/_search
{
"query" : {
"bool" : {
"must" : [
{ "match" : { "file.content":"abc"} },
{ "match":{"otherDetails":"asd"}},
{ "match" : { "filePermissionInfo.accountValue" : "xyz"} }
]
}
}
}
I need to convert it to a c# Nest code. I tried converting it, but its not returning any result,even it contains data. If I remove the
m.Match(mt1 => mt1.Field(f1 => f1.File.Coontent).Query(queryTerm))
from the below experssion, it returns a result set. Is there any problem with the attachement field?
client.Search<IndexDocument>(s => s
.Index("mydocs")
.Query(q => q
.Bool(b => b
.Must(m =>
m.Match(mt1 => mt1.Field(f1 => f1.File.Coontent).Query(queryTerm)) &&
m.Match(mt2 => mt2.Field(f2 => f2.FilePermissionInfo.First().SecurityIdValue).Query(accountName)) &&
m.Match(mt3 => mt3.Field(f3 => f3.OtherDetails).Query(other))
)))
);
My mapping is
{
"mydocs": {
"mappings": {
"indexdocument": {
"properties": {
"docLocation": {
"type": "string",
"index": "not_analyzed",
"store": true
},
"documentType": {
"type": "string",
"store": true
},
"file": {
"type": "attachment",
"fields": {
"content": {
"type": "string",
"term_vector": "with_positions_offsets",
"analyzer": "full"
},
"author": {
"type": "string"
},
"title": {
"type": "string",
"term_vector": "with_positions_offsets",
"analyzer": "full"
},
"name": {
"type": "string"
},
"date": {
"type": "date",
"format": "strict_date_optional_time||epoch_millis"
},
"keywords": {
"type": "string"
},
"content_type": {
"type": "string"
},
"content_length": {
"type": "integer"
},
"language": {
"type": "string"
}
}
},
"filePermissionInfo": {
"properties": {
"fileSystemRights": {
"type": "string",
"store": true
},
"securityIdValue": {
"type": "string",
"store": true
}
}
},
"id": {
"type": "double",
"store": true
},
"lastModifiedDate": {
"type": "date",
"store": true,
"format": "strict_date_optional_time||epoch_millis"
},
"otherDetails": {
"type": "string"
},
"title": {
"type": "string",
"store": true,
"term_vector": "with_positions_offsets"
}
}
}
}
}
}

It looks like the query hasn't been translated to NEST correctly. In the query you have
"filePermissionInfo.accountValue"
but in the NEST query, you only have
f2 => f2.FilePermissionInfo
which would result only in filePermissionInfo. You need to change this to
f2 => f2.FilePermissionInfo.AccountValue

How to apply filter on geo coordinates in elasticsearch?

I am using elasticsearch with mongoosastic npm module. I am trying to apply filter on geo coordinates having following model structure
geoLocation: {
type: {
type: String,
default: 'Point'
},
coordinates: [Number] //orders should be lat,lng
}
with the mapping as follows
{
"events": {
"settings": {
"analysis": {
"filter": {
"edgeNGram_filter": {
"type": "edgeNGram",
"min_gram": 1,
"max_gram": 50,
"side": "front"
}
},
"analyzer": {
"edge_nGram_analyzer": {
"type": "custom",
"tokenizer": "edge_ngram_tokenizer",
"filter": [
"lowercase",
"asciifolding",
"edgeNGram_filter"
]
},
"whitespace_analyzer": {
"type": "custom",
"tokenizer": "whitespace",
"filter": [
"lowercase",
"asciifolding"
]
}
},
"tokenizer": {
"edge_ngram_tokenizer": {
"type": "edgeNGram",
"min_gram": "1",
"max_gram": "50",
"token_chars": [
"letter",
"digit"
]
}
}
}
},
"mappings": {
"event": {
"_all": {
"index_analyzer": "nGram_analyzer",
"search_analyzer": "whitespace_analyzer"
},
"properties": {
"title": {
"type": "string",
"index": "not_analyzed"
},
"geoLocation": {
"index": "not_analyzed",
"type": "geo_point"
}
}
}
}
}
}
Query
{
"query": {
"multi_match": {
"query": "the",
"fields": ["title", ]
}
},
"filter" : {
"geo_distance" : {
"distance" : "200km",
"geoLocation.coordinates" : {
"lat" : 19.007452,
"lon" : 72.831556
}
}
}
}
I am unable to create indexing on the geo coordinates with the following model structure, I dont understand if it is not possible to index geo coordinates with the above model structure because in my case the coordinates has order as lat,long and I have found somewhere that elasticsearch expects coordinates order as long,lat.
Error
Error: SearchPhaseExecutionException[Failed to execute phase [query],
all shards failed; shardFailures {[CDHdgtJnTbeu8tl2mDfllg][events][0]:
SearchParseException[[events][0]: from[-1],size[-1]: Parse Failure
[Failed to parse source
curl -XGET localhost:9200/events
{
"events": {
"aliases": {},
"mappings": {
"1": {
"properties": {
"location": {
"type": "double"
},
"text": {
"type": "string"
}
}
},
"event": {
"properties": {
"city": {
"type": "string"
},
"endTime": {
"type": "date",
"format": "dateOptionalTime"
},
"geo_with_lat_lon": {
"type": "geo_point",
"lat_lon": true
},
"isActive": {
"type": "boolean"
},
"isRecommended": {
"type": "boolean"
},
"location": {
"type": "string"
},
"title": {
"type": "string"
}
}
}
},
"settings": {
"index": {
"creation_date": "1461675012489",
"uuid": "FT-xVUdPQtyuKFm4J4Rd7g",
"number_of_replicas": "1",
"number_of_shards": "5",
"events": {
"mappings": {
"event": {
"_all": {
"enabled": "false",
"search_analyzer": "whitespace_analyzer",
"index_analyzer": "nGram_analyzer"
},
"properties": {
"geoLocation": {
"coordinates": {
"type": "geo_shape",
"index": "not_analyzed"
}
},
"location": {
"type": "string",
"index": "not_analyzed"
},
"title": {
"type": "string",
"index": "not_analyzed"
},
"geo_with_lat_lon": {
"type": "geo_point",
"lat_lon": "true",
"index": "not_analyzed"
}
}
}
},
"settings": {
"analysis": {
"analyzer": {
"edge_nGram_analyzer": {
"type": "custom",
"filter": [
"lowercase",
"asciifolding",
"edgeNGram_filter"
],
"tokenizer": "edge_ngram_tokenizer"
},
"whitespace_analyzer": {
"type": "custom",
"filter": [
"lowercase",
"asciifolding"
],
"tokenizer": "whitespace"
}
},
"filter": {
"edgeNGram_filter": {
"max_gram": "50",
"type": "edgeNGram",
"min_gram": "1",
"side": "front"
}
},
"tokenizer": {
"edge_ngram_tokenizer": {
"max_gram": "50",
"type": "edgeNGram",
"min_gram": "1",
"token_chars": [
"letter",
"digit"
]
}
}
}
}
},
"version": {
"created": "1070099"
}
}
},
"warmers": {}
}
}

I got a solution for my question
Mapping
PUT /geo_test
{
"mappings": {
"type_test": {
"properties": {
"name": {
"type": "string"
},
"geoLocation": {
"type": "nested",
"properties": {
"coordinates": {
"type": "geo_point",
"lat_lon": true
}
}
}
}
}
}
}
Query
POST /geo_test/type_test/_search
{
"query": {
"filtered": {
"filter": {
"nested": {
"path": "geoLocation",
"query": {
"filtered": {
"filter": {
"geo_distance": {
"distance": 5,
"distance_unit": "km",
"geoLocation.coordinates": {
"lat": 41.12,
"lon": -71.34
}
}
}
}
}
}
}
}
}
}

Develop Reference

node.js excel linux python-3.x azure haskell apache-spark rust .htaccess string

mapper_parsing_exception in new elasticsearch 2.1.1 version - node.js

The error is quite clear if you ask me, you need to specify analyzer for _all in your movie mapping. Setting index_analyzer was removed in Elasticsearch 2.0. "_all": { "analyzer": "nGram_analyzer", "search_analyzer": "whitespace_analyzer" },

Related

convert json schema to avro schema in python

How to generate a JSON Schema?

Kibana: Search within text for string

No result returned from the nest c# elasticsearch query

How to apply filter on geo coordinates in elasticsearch?

Categories

Resources