logstash grok pattern to monitor logstash itself - logstash

I would like to add logstash.log log into my ELK stack but I always have grokparsefailure.
My pattern is OK on http://grokconstructor.appspot.com/do/match#result
My logstash conf file (filter part) is
filter {
if [application] == "logstash" {
grok {
match => { "message" => "\{:timestamp=>\"%{TIMESTAMP_ISO8601:timestamp}\", :message=>%{GREEDYDATA:errormessage}\}" }
}
date {
match => [ "timestamp" , "yyyy-MM-dd'T'HH:mm:ss.SSSSSSZ" ]
}
}
}
But still In only get
{
"_index": "logstash-2016.05.03",
"_type": "logs",
"_id": "AVR3WUtpT8BPcJ-gVynN",
"_score": null,
"_source": {
"#version": "1",
"#timestamp": "2016-05-03T16:00:20.708Z",
"path": "/var/log/logstash/logstash.log",
"host": "xxx.arte.tv",
"application": "logstash",
"tags": [
"_grokparsefailure"
]
I guess I have issue with either { ou " but with or without backslashing theim, still grokparsefailure.

Shame on me, there is no error in my previous post, problem was no message because of a remove_field message in another conf file.
Sorry guys for the waste of time

Related

How to define a default value when creating an index in Elasticsearch

I need to create an index in elasticsearch by assigning a default value for a field. Ex,
In python3,
request_body = {
"settings":{
"number_of_shards":1,
"number_of_replicas":1
},
"mappings":{
"properties":{
"name":{
"type":"keyword"
},
"school":{
"type":"keyword"
},
"pass":{
"type":"keyword"
}
}
}
}
from elasticsearch import Elasticsearch
es = Elasticsearch(['https://....'])
es.indices.create(index="test-index", ignore=400, body= request_body)
in above scenario, the index will be created with those fields. But i need to put a default value to "pass" as True. Can i do that here?
Elastic search is schema-less. It allows any number of fields and any content in fields without any logical constraints.
In a distributed system integrity checking can be expensive so checks like RDBMS are not available in elastic search.
Best way is to do validations at client side.
Another approach is to use ingest
Ingest pipelines let you perform common transformations on your data before indexing. For example, you can use pipelines to remove fields, extract values from text, and enrich your data.
**For testing**
POST _ingest/pipeline/_simulate
{
"pipeline": {
"processors": [
{
"script": {
"lang": "painless",
"source": "if (ctx.pass ===null) { ctx.pass='true' }"
}
}
]
},
"docs": [
{
"_index": "index",
"_type": "type",
"_id": "2",
"_source": {
"name": "a",
"school":"aa"
}
}
]
}
PUT _ingest/pipeline/default-value_pipeline
{
"description": "Set default value",
"processors": [
{
"script": {
"lang": "painless",
"source": "if (ctx.pass ===null) { ctx.pass='true' }"
}
}
]
}
**Indexing document**
POST my-index-000001/_doc?pipeline=default-value_pipeline
{
"name":"sss",
"school":"sss"
}
**Result**
{
"_index" : "my-index-000001",
"_type" : "_doc",
"_id" : "hlQDGXoB5tcHqHDtaEQb",
"_score" : 1.0,
"_source" : {
"school" : "sss",
"pass" : "true",
"name" : "sss"
}
},

Logback losghstash appender add own field

I need to send application logs directly to logstash using: Logstash Logback Encoder from multiple microservices. Problem is that when I am sending logs logstash recive logs like this:
{
"_index": "logstash-2021.01.21-000001",
"_type": "_doc",
"_id": "id",
"_version": 1,
"_score": 1.6928859,
"_source": {
"#timestamp": "2021-01-21T14:13:05.480Z",
"#version": "1",
"message": "message",
"host": "gateway",
"port": 43892
},
"fields": {
"#timestamp": [
"2021-01-21T14:13:05.480Z"
]
},
"highlight": {
"message": [msg]
},
"sort": [ sort ]
}
I need to add a custom field in "fields" section or in general section. Do you have any idea how I can do this?
You can use mutate filter in your logstash configuration file.
For example, into logstash configuration your file, this looks like this :
filter {
mutate { add_field => { "field_name" => "field_value" } }
}

Change a field based on a another field?

Logstash is receiving a docs JSON object which contains various types of docs.
{
"docs": [
{
"_id": "project:A",
"_rev": "project:1",
"name": "secret",
"children": ["item:A"]
},
{
"_id": "item:A",
"_rev": "item:1",
"name": "secret"
}
]
}
I want each doc with an _id starting with project to include matching children. The end result should be:
{
"docs": [
{
"_id": "project:A",
"_rev": "project:1",
"name": "secret",
"children": [{
"_id": "item:A",
"_rev": "item:1",
"name": "secret"
}]
},
]
}
How can I achieve this?
Here is my conf file. I aven't been able to figure out how to solve this:
input {
file {
path => ["/home/logstash/logstash-testdata.json"]
sincedb_path => "/dev/null"
start_position => "beginning"
}
}
filter {
json {
source => "message"
}
// ... ???
}
output {
elasticsearch {
hosts => ["localhost:9200"]
}
stdout {
codec => rubydebug
}
}

Customize log output in Kibana

Finally, I got working ELK stack to get some logs from a remote server. However, I would like to customize the output of the logs. Is there a way to remove some fields which I am highlighting in yellow:
I tried to remove them from _source including remove_field in the logstash.conf:
input {
beats {
port => 5044
ssl => true
ssl_certificate => "/..."
ssl_key => "/..logstash.key"
}
}
filter {
grok {
match => {
"message" => "%{SYSLOGTIMESTAMP:syslog_timestamp} %{SYSLOGHOST:syslog_hostname} %{DATA:syslog_program}(?:\[%{POSINT:syslog_pid}\])?: %{GREEDYDATA:syslog_message}"
}
remove_field => [ "tags", "prospector.type", "host.architecture", "host.containerized", "host.id", "host.os.platform", "host.os.family" ]
}
}
output {
elasticsearch {
hosts => "localhost:9200"
index => "%{[#metadata][beat]}-%{+YYYY.MM.dd}"
}
}
Do you know how can I get rid of the yellow fields in _source for the logs coming from filebeat?
Update of logstash.conf based on Leandro comments:
input {
beats {
port => 5044
ssl => true
ssl_certificate => ".../logstash.crt"
ssl_key => ".../logstash.key"
}
}
filter {
grok {
match => {
"message" => "%{SYSLOGTIMESTAMP:syslog_timestamp} %{SYSLOGHOST:syslog_hostname} %{DATA:syslog_program}(?:\[%{POSINT:syslog_pid}\])?: %{GREEDYDATA:syslog_message}"
}
remove_field => [ "tags","[prospector][type]","[host][architecture]", "[host][containerized]", "[host][id]", "[host][os][platform]", "[host][os][family]", "[beat][hostname]", "[beat][name]", "[beat][version], "[offset]", "[input][type]", "[meta][cloud][provider]", "[meta][cloud][machine_type]", "[meta][cloud][instance_id]"]
}
}
output {
elasticsearch {
hosts => "localhost:9200"
index => "%{[#metadata][beat]}-%{+YYYY.MM.dd}"
}
}
In logs:
019-02-27T17:03:41.637-0800 DEBUG [input] file/states.go:68 New state added for /logs/api.log
2019-02-27T17:03:41.637-0800 DEBUG [registrar] registrar/registrar.go:315 Registrar state updates processed. Count: 1
2019-02-27T17:03:41.637-0800 DEBUG [registrar] registrar/registrar.go:400 Write registry file: /filebeat/registry
2019-02-27T17:03:41.637-0800 INFO log/harvester.go:255 Harvester started for file: /logs/api.log
2019-02-27T17:03:41.647-0800 DEBUG [publish] pipeline/processor.go:308 Publish event: {
"#timestamp": "2019-02-28T01:03:41.647Z",
"#metadata": {
"beat": "filebeat",
"type": "doc",
"version": "6.6.0"
},
"log": {
"file": {
"path": "/logs/api.log"
}
},
"input": {
"type": "log"
},
"host": {
"name": "tomcat",
"os": {
"family": "redhat",
"name": "CentOS Linux",
"codename": "Core",
"platform": "centos",
"version": "7 (Core)"
},
"id": "6aaed308aa5a419f880c5e45eea65414",
"containerized": true,
"architecture": "x86_64"
},
"meta": {
"cloud": {
"region": "CanadaCentral",
"provider": "az",
"instance_id": "6452bcf4-7f5d-4fc3-9f8e-5ea57f00724b",
"instance_name": "tomcat",
"machine_type": "Standard_D8s_v3"
}
},
"message": "2018-09-14 20:23:37 INFO ContextLoader:272 - Root WebApplicationContext: initialization started",
"source": "/logs/api.log",
"offset": 0,
"prospector": {
"type": "log"
},
"beat": {
"hostname": "tomcat",
"version": "6.6.0",
"name": "tomcat"
}
}
Thanks
Some of those fields are nested fields, the way to access them in a Logstash filter is using the [field][subfield] notation.
Your remove_field shoud be something like this:
remove_field => ["tags","[host][architecture]","[meta][cloud][provider]"]
But I don't think you can remove the #version field.
UPDATE:
Using the event example from your Filebeat log I simulated a pipeline and got a _grokparsefailure, to remove the fields even when the grok fails you need to use the remove_field inside a mutate filter:
filter {
grok {
your grok
}
mutate {
remove_field => ["[prospector]","[host][architecture]", "[host][containerized]", "[host][id]", "[host][os][platform]", "[host][os][family]", "[beat]", "[offset]", "[input]", "[meta]"]
}
}
Don't remove the tags field until you have fixed your groks.
The logstash output on that example is:
{
"source": "/logs/api.log",
"tags": [
"_grokparsefailure"
],
"#timestamp": "2019-02-28T01:03:41.647Z",
"message": "2018-09-14 20:23:37 INFO ContextLoader:272 - Root WebApplicationContext: initialization started",
"log": {
"file": {
"path": "/logs/api.log"
}
},
"#version": "1",
"host": {
"os": {
"codename": "Core",
"version": "7 (Core)",
"name": "CentOS Linux"
},
"name": "tomcat"
}
}

Can't use parsed fields to visualize data on kibana

I'm new in this ELK stuff. I've been trying to create visualizations using this stack, but I'm not able to use fields such as verb, response, request, etc, I'm only able to select a few available fields:
However, in the Discover section I'm perfectly able to work with those fields. Here is a sample of one of my query results:
(I'm using Kibana 4.4.2, filebeat forwarding to logstash 2.2.3)
{
"_index": "filebeat-2016.04.12",
"_type": "apache_log",
"_id": "AVQMoRFwO5HM5nz1lmXf",
"_score": null,
"_source": {
"message": "187.142.15.173 - - [12/Apr/2016:16:39:23 -0600] \"GET /v1.0/person/297312123/client/1132347/profile HTTP/1.1\" 200 2051 \"-\" \"Android CEX 2.2.0\"",
"#version": "1",
"#timestamp": "2016-04-12T22:39:27.064Z",
"beat": {
"hostname": "myhost",
"name": "myhost"
},
"count": 1,
"fields": null,
"input_type": "log",
"offset": 30034512,
"source": "/var/log/httpd/access_log",
"type": "apache_log",
"host": "myhost",
"tags": [
"beats_input_codec_plain_applied"
],
"clientip": "187.142.15.173",
"ident": "-",
"auth": "-",
"timestamp": "12/Apr/2016:16:39:23 -0600",
"verb": "GET",
"request": "/v1.0/person/297312123/client/1132347/profile",
"httpversion": "1.1",
"response": "200",
"bytes": "2051",
"referrer": "\"-\"",
"agent": "\"Android CEX 2.2.0\"",
},
"fields": {
"#timestamp": [
1460500767064
]
},
"sort": [
1460500767064
]
}
What could posibly be wrong with this?
Here is my config file:
filter {
if [type] == "syslog" {
grok {
match => { "message" =>
"%{SYSLOGTIMESTAMP:syslog_timestamp} %{SYSLOGHOST:syslog_hostname} %{DATA:syslog_program}(?:\[%{POSINT:syslog_pid}\])?: %{GREEDYDATA:syslog_message}"
}
add_field => [ "received_at", "%{#timestamp}" ]
add_field => [ "received_from", "%{host}" ]
}
syslog_pri { }
date {
match => [ "syslog_timestamp", "MMM d HH:mm:ss", "MMM dd HH:mm:ss" ]
}
}
if [type] == "apache_log" {
grok {
# match => [ "message", "%{COMBINEDAPACHELOG}" ]
# match => { "message" => "%{COMBINEDAPACHELOG}" }
# add_field => [ "received_at", "%{#timestamp}" ]
# add_field => [ "received_from", "%{host}" ]
match => [ "message", "%{COMBINEDAPACHELOG}" ]
}
#syslog_pri { }
#date {
# match => [ "syslog_timestamp", "MMM d HH:mm:ss", "MMM dd HH:mm:ss" ]
#}
}
}
Thanks in advance!
My first thought would be the kibana field cache. Go to Settings->Indexes, select your index, and click the orange Reload button.

Resources