Json transformation in Logstash - logstash

I want to transform a json document from one structure to another.
I've already tried to solve it with multiple workarounds but somehow I don't get my expected output.
I have a json log file as following:
{
"consumer": {
"created_at": 1566912618154,
"username": "dummyapp",
"id": "07912445-ca35-464b-8596-b2ace5e60481"
},
"service": {
"created_at": 1567173649,
"connect_timeout": 60000,
"protocol": "https",
"read_timeout": 60000,
"port": 9090,
"updated_at": 1567173649,
"retries": 1,
"write_timeout": 60000
},
"request": {
"querystring": {},
"size": "361",
"headers": {
"cache-control": "no-cache",
"content-length": "0",
"postman-token": "fb9e10e4-2f66-4126-beec-d5c1f7c06bf7",
"user-agent": "PostmanRuntime/7.15.0",
"accept": "/",
"accept-encoding": "gzip, deflate",
"connection": "keep-alive"
},
"method": "POST"
}
}
I want to transform it to something like this:
{
"Title":"Sample"
"consumer": {
"created_at": 1566912618154,
"username": "dummyapp"
},
"service": {
"created_at": 1567173649,
"connect_timeout": 60000
},
"request": {
"querystring": {},
"headers": {
"user-agent": "PostmanRuntime\/7.15.0"
}
"method": "POST"
}
}
This is my pipeline configuration:
input {
file {
path => "Path_to_Log\test.log"
start_position => "beginning"
type => "json"
codec => "json"
}
}
filter {
mutate {
add_field => {
"Title" => "Sample"
}
add_field => {
"consumer" => "%{[consumer]}"
}
...further changes
}
}
output {
file {
path => "Output_Path\Output.log"
}
}
This is the response I get:
{
"#timestamp":"2019-09-10T09:20:38.569Z",
"Title":"Sample",
"#version":"1",
"consumer":[
[
"created_at",
1566912618154
],
[
"username",
"dummyapp"
],
"{\"created_at\":1566912618154,\"username\":\"dummyappapp\"}"
],
"type":"json"
..some data
}
Why am I getting an output like above?
How can I tranform json documents to fulfill the requirements I described above?
How can I set a tag from the source to the target child element?

So why don't you just remove the fields that you are not interested in?
filter {
mutate {
add_field => {
"Title" => "Sample"
}
remove_field => [
"[consumer][id]",
"[service][protocol]",
# and so on for the service element
"[request][size]",
"[request][headers][cache-control]",
# and so on for the request.headers element
]
# THIS IS OBSOLETE NOW!
#add_field => {
# "consumer" => "%{[consumer]}"
#}
...further changes
}
}
Please refer to this guide for further information on how to access event data and fields (especially nested ones).

Related

Can mountebank be used to mock a GET octet-stream image file using stub and predicate?

When using a text file, this works like a regular GET
{
"responses": [
{
"is": {
"headers": {
"content-disposition": "attachment; filename=sample_text_file.txt"
},
"statusCode": 200,
"body": "<%- stringify(filename, 'templates/attachments/sample_text_file.txt') %>"
}
}
],
"predicates": [
{
"and": [
{
"equals": {
"method": "GET",
"path": "/resources/4df3dab6-003b-440d"
}
}
]
}
]
}
However, when an image file is used in place of text file, the Mountebank fails to start.

Mountebank predicates doesn't check headers

I have below code and looks like it is not checking headers as a predicate.
{
"responses": [
{
"inject": "<%- stringify(filename, 'Scripts/MyDept/CutOffTime.ejs') %>"
}
],
"predicates": [
{
"matches": {
"method": "GET",
"path": "/cutoff-times",
"query": {
"country": "\\w+"
},
"headers": {
"X-CLIENT-ID": "^[ A-Za-z0-9]*$"
}
}
}
]
}
Strangely, when I pass # as the value to header X-CLIENT-ID it validate and shows the message as no predicate match. Because it is not part of the regex.
Identified the issue,
Basically if you need have multiple predicates need to merge them as below,(using and / or)
{
"responses": [
{
"inject": "<%- stringify(filename, 'Scripts/MyDept/CutOffTime.ejs') %>"
}
],
"predicates": [
{
"and": [
{
"exists": {
"headers": {
"X-CLIENT-ID": true,
}
}
},
{
"matches": {
"method": "GET",
"path": "/cutoff-times",
"headers": {
"X-CLIENT-ID": "^[ A-Za-z0-9]*$"
},
"query": {
"country": "\\w+"
}
}
}
]
}
]
}
Mountebank site
Further matches predicate doesn't check the existence (e.g. header existence)

Change a field based on a another field?

Logstash is receiving a docs JSON object which contains various types of docs.
{
"docs": [
{
"_id": "project:A",
"_rev": "project:1",
"name": "secret",
"children": ["item:A"]
},
{
"_id": "item:A",
"_rev": "item:1",
"name": "secret"
}
]
}
I want each doc with an _id starting with project to include matching children. The end result should be:
{
"docs": [
{
"_id": "project:A",
"_rev": "project:1",
"name": "secret",
"children": [{
"_id": "item:A",
"_rev": "item:1",
"name": "secret"
}]
},
]
}
How can I achieve this?
Here is my conf file. I aven't been able to figure out how to solve this:
input {
file {
path => ["/home/logstash/logstash-testdata.json"]
sincedb_path => "/dev/null"
start_position => "beginning"
}
}
filter {
json {
source => "message"
}
// ... ???
}
output {
elasticsearch {
hosts => ["localhost:9200"]
}
stdout {
codec => rubydebug
}
}

Customize log output in Kibana

Finally, I got working ELK stack to get some logs from a remote server. However, I would like to customize the output of the logs. Is there a way to remove some fields which I am highlighting in yellow:
I tried to remove them from _source including remove_field in the logstash.conf:
input {
beats {
port => 5044
ssl => true
ssl_certificate => "/..."
ssl_key => "/..logstash.key"
}
}
filter {
grok {
match => {
"message" => "%{SYSLOGTIMESTAMP:syslog_timestamp} %{SYSLOGHOST:syslog_hostname} %{DATA:syslog_program}(?:\[%{POSINT:syslog_pid}\])?: %{GREEDYDATA:syslog_message}"
}
remove_field => [ "tags", "prospector.type", "host.architecture", "host.containerized", "host.id", "host.os.platform", "host.os.family" ]
}
}
output {
elasticsearch {
hosts => "localhost:9200"
index => "%{[#metadata][beat]}-%{+YYYY.MM.dd}"
}
}
Do you know how can I get rid of the yellow fields in _source for the logs coming from filebeat?
Update of logstash.conf based on Leandro comments:
input {
beats {
port => 5044
ssl => true
ssl_certificate => ".../logstash.crt"
ssl_key => ".../logstash.key"
}
}
filter {
grok {
match => {
"message" => "%{SYSLOGTIMESTAMP:syslog_timestamp} %{SYSLOGHOST:syslog_hostname} %{DATA:syslog_program}(?:\[%{POSINT:syslog_pid}\])?: %{GREEDYDATA:syslog_message}"
}
remove_field => [ "tags","[prospector][type]","[host][architecture]", "[host][containerized]", "[host][id]", "[host][os][platform]", "[host][os][family]", "[beat][hostname]", "[beat][name]", "[beat][version], "[offset]", "[input][type]", "[meta][cloud][provider]", "[meta][cloud][machine_type]", "[meta][cloud][instance_id]"]
}
}
output {
elasticsearch {
hosts => "localhost:9200"
index => "%{[#metadata][beat]}-%{+YYYY.MM.dd}"
}
}
In logs:
019-02-27T17:03:41.637-0800 DEBUG [input] file/states.go:68 New state added for /logs/api.log
2019-02-27T17:03:41.637-0800 DEBUG [registrar] registrar/registrar.go:315 Registrar state updates processed. Count: 1
2019-02-27T17:03:41.637-0800 DEBUG [registrar] registrar/registrar.go:400 Write registry file: /filebeat/registry
2019-02-27T17:03:41.637-0800 INFO log/harvester.go:255 Harvester started for file: /logs/api.log
2019-02-27T17:03:41.647-0800 DEBUG [publish] pipeline/processor.go:308 Publish event: {
"#timestamp": "2019-02-28T01:03:41.647Z",
"#metadata": {
"beat": "filebeat",
"type": "doc",
"version": "6.6.0"
},
"log": {
"file": {
"path": "/logs/api.log"
}
},
"input": {
"type": "log"
},
"host": {
"name": "tomcat",
"os": {
"family": "redhat",
"name": "CentOS Linux",
"codename": "Core",
"platform": "centos",
"version": "7 (Core)"
},
"id": "6aaed308aa5a419f880c5e45eea65414",
"containerized": true,
"architecture": "x86_64"
},
"meta": {
"cloud": {
"region": "CanadaCentral",
"provider": "az",
"instance_id": "6452bcf4-7f5d-4fc3-9f8e-5ea57f00724b",
"instance_name": "tomcat",
"machine_type": "Standard_D8s_v3"
}
},
"message": "2018-09-14 20:23:37 INFO ContextLoader:272 - Root WebApplicationContext: initialization started",
"source": "/logs/api.log",
"offset": 0,
"prospector": {
"type": "log"
},
"beat": {
"hostname": "tomcat",
"version": "6.6.0",
"name": "tomcat"
}
}
Thanks
Some of those fields are nested fields, the way to access them in a Logstash filter is using the [field][subfield] notation.
Your remove_field shoud be something like this:
remove_field => ["tags","[host][architecture]","[meta][cloud][provider]"]
But I don't think you can remove the #version field.
UPDATE:
Using the event example from your Filebeat log I simulated a pipeline and got a _grokparsefailure, to remove the fields even when the grok fails you need to use the remove_field inside a mutate filter:
filter {
grok {
your grok
}
mutate {
remove_field => ["[prospector]","[host][architecture]", "[host][containerized]", "[host][id]", "[host][os][platform]", "[host][os][family]", "[beat]", "[offset]", "[input]", "[meta]"]
}
}
Don't remove the tags field until you have fixed your groks.
The logstash output on that example is:
{
"source": "/logs/api.log",
"tags": [
"_grokparsefailure"
],
"#timestamp": "2019-02-28T01:03:41.647Z",
"message": "2018-09-14 20:23:37 INFO ContextLoader:272 - Root WebApplicationContext: initialization started",
"log": {
"file": {
"path": "/logs/api.log"
}
},
"#version": "1",
"host": {
"os": {
"codename": "Core",
"version": "7 (Core)",
"name": "CentOS Linux"
},
"name": "tomcat"
}
}

JSON Parse errors in Logstash

When attempting to parse JSON data with Logstash, it seems to fail the parse and my JSON doesn't get sent to ES as expected. Any suggestions would be great. Attempting to log failed Wordpress logins, but having no luck with the parsing of the JSON.
Currently using Logstash 6.4.2 on FreeBSD 11.
Example log file. File has nothing else but this data.
{
"username": "billy",
"password": "gfdgdfdfg4",
"time": "2019-02-03 00:39:11",
"agent": "Mozilla\/5.0 (X11; Ubuntu; Linux x86_64; rv:62.0) Gecko\/20100101 Firefox\/62.0",
"ip": "11.11.11.11"
}
Template
{
"index_patterns": ["wpbadlogin*"],
"settings": {
"number_of_shards": 1,
"number_of_replicas" : 0,
"index.refresh_interval": "60s"
},
"mappings": {
"_default_": {
"properties": {
"host": {
"type": "text"
},
"username": {
"type": "text"
},
"password": {
"type": "text"
},
"agent": {
"type": "text"
},
"ip": {
"type": "ip"
}
},
"_all": {
"enabled": false
}
}
}
}
Logstash config
input {
file {
type => "json"
codec => "json"
sincedb_path => "/dev/null"
path => "/var/log/lighttpd/badlogin.txt"
start_position => "beginning"#
tags => ["wpbadlogin"]
}
}
#filter { }
output {
stdout {
codec => rubydebug
}
elasticsearch {
hosts => ["10.0.5.30:9200"]
template => "/usr/local/etc/logstash/templates/wpbadlogin.json"
template_name => "wpbadlogin"
template_overwrite => true
index => "wpbadlogin"
}
}
Error: https://pastebin.com/raw/KWEYGkLn

Resources