grok parser failure - for django logs - logstash

This is 1 of my log entries,
INFO 2017-05-16 17:24:11,690 views 14463 139643033982720 https://play.google.com/store/apps/details?id=com.VoDrive&referrer=referral_code%3DP5E
This is my pattern ,
DJANGOTIMESTAMP %{YEAR}-%{MONTHNUM}-%{MONTHDAY}%{SPACE}%{HOUR}:%{MINUTE}:%{SECOND}
This is my logstash conf file,
input {
beats {
port => "5043"
}
}
filter {
if [type] in ["django"] {
grok {
patterns_dir => ["/opt/logstash/patterns"]
match => [ "message" , "%{LOGLEVEL:level}%{SPACE}%{DJANGOTIMESTAMP:timestamp},%{INT:pid}%{SPACE}%{WORD:origin}%{SPACE}%{INT:uid}%{SPACE}%{INT:django-id}%{SPACE}%{GREEDYDATA:action}" ]
}
}
}
output {
elasticsearch {
hosts => [ "localhost:9200" ]
index => "%{type}_indexer"
}
}
IN elasticsearch output, the fields are not made,
luvpreet#DHARI-Inspiron-3542:/usr/bin$ curl -XGET 'localhost:9200/django_indexer/_search?pretty=true&q=*:*'
{
"_index" : "django_indexer",
"_type" : "django",
"_id" : "AVwu8tE7j-Kh6vl1kUdf",
"_score" : 1.0,
"_source" : {
"#timestamp" : "2017-05-22T06:55:52.819Z",
"offset" : 144,
"#version" : "1",
"beat" : {
"hostname" : "DHARI-Inspiron-3542",
"name" : "DHARI-Inspiron-3542",
"version" : "5.4.0"
},
"input_type" : "log",
"host" : "DHARI-Inspiron-3542",
"source" : "/var/log/django/a.log",
"message" : "INFO 2017-05-16 06:33:08,673 views 40152 139731056719616 https://play.google.com/store/apps/details?id=com.VoDrive&referrer=referral_code%3DP5E",
"type" : "django",
"tags" : [
"beats_input_codec_plain_applied"
]
}
It is not saying that parser has failed, but why are the fields not being made ?
What am I lacking ?

Try with this grok pattern:
%{LOGLEVEL:loglevel}%{SPACE}%{TIMESTAMP_ISO8601:timestamp},%{INT:pid}%{SPACE}%{WORD:origin}%{SPACE}%{INT:id}%{SPACE}%{INT:number}%{SPACE}%{URI:action}
Input
INFO 2017-05-16 17:24:11,690 views 14463 139643033982720 https://play.google.com/store/apps/details?id=com.VoDrive&referrer=referral_code%3DP5E
Output
number 139643033982720
timestamp 2017-05-16ยท17:24:11
id 14463
port
pid 690
origin views
action https://play.google.com/store/apps/details?id=com.VoDrive&referrer=referral_code%3DP5E
loglevel INFO
You can then remove the field port with a mutate in you filter plugin
mutate {
remove_field => ["port"]
}
UPDATE
Ok, I tried your configuration on with my logstash.
This is what I did:
1- Configure filebeat:
filebeat.prospectors:
- paths:
- /etc/filebeat/FilebeatInputTest.txt
document_type: django
output.logstash:
hosts: ["127.0.0.1:5044"]
2- Configure logstash
input {
beats {
port => "5044"
}
}
filter {
if [type] == "django" {
grok {
match => [ "message" , "%{LOGLEVEL:loglevel}%{SPACE}%{TIMESTAMP_ISO8601:timestamp},%{INT:pid}%{SPACE}%{WORD:origin}%{SPACE}%{INT:id}%{SPACE}%{INT:number}%{SPACE}%{GREEDYDATA:action}" ]
}
mutate {
remove_field => ["#timestamp", "beat","input_type","offset","source","#version","host","tags","message"]
}
}
}
output {
elasticsearch {
hosts => [ "xx.xx.xx.xx:9200" ]
index => "%{type}_indexer"
user => "xxxx"
password => "xxxx"
}
}
You can remove user and password if your elasticsearch is not secured.
Input (content of /etc/filebeat/FilebeatInputTest.txt)
INFO 2017-05-16 17:24:11,690 views 14463 139643033982720 https://play.google.com/store/apps/details?id=com.VoDrive&referrer=referral_code%3DP5E
Output (In elasticsearch)
{
"_index" : "django_indexer",
"_type" : "django",
"_id" : "AVwhFe30JYGYNG_7C7YI",
"_score" : 1.0,
"_source" : {
"origin" : "views",
"pid" : "690",
"type" : "django",
"number" : "139643033982720",
"loglevel" : "INFO",
"action" : "https://play.google.com/store/apps/details?id=com.VoDrive&referrer=referral_code%3DP5E",
"id" : "14463",
"timestamp" : "2017-05-16 17:24:11"
}
}
Hope this helps.

Related

How can convert a json nested string to json in logstash?

My string after json decode js:
"{\"#timestamp\":\"2022-09-27T10:14:49.082014+02:00\",\"#version\":1,\"host\":\"hieu-GF63-Thin-10SC\",\"message\":\"{\\\"command\\\":\\\"test:upload\\\",\\\"title\\\":\\\"Import success\\\",\\\"total_success\\\":10,\\\"total_fails\\\":0,\\\"log_message\\\":\\\"\\\"}\",\"type\":\"Datahub\",\"channel\":\"logstash.main\",\"level\":\"INFO\",\"monolog_level\":200,\"context\":{\"host\":{\"ip\":\"127.0.0.1\"}}}\n"
My logstash script is
udp {
port => 5000
}
}
filter{
json { source => "message" }
}
output {
elasticsearch {
hosts => ["http://localhost:9200"]
index => "my-index"
user => "elastic"
password => "123456"
}
}
My result in elastic
{
"_index" : "my-index",
"_id" : "PskDfoMBtWToAIWATogd",
"_score" : 1.0,
"_ignored" : [
"event.original.keyword"
],
"_source" : {
"channel" : "logstash.main",
"context" : {
"host" : {
"ip" : "127.0.0.1"
}
},
"type" : "Datahub",
"monolog_level" : 200,
"message" : "{\"command\":\"test:upload\",\"title\":\"Import success\",\"total_success\":10,\"total_fails\":0,\"log_message\":\"\"}",
"host" : "hieu-GF63-Thin-10SC",
"level" : "INFO",
"#timestamp" : "2022-09-27T08:14:49.082014Z",
"#version" : 1,
"event" : {
"original" : "{\"#timestamp\":\"2022-09-27T10:14:49.082014+02:00\",\"#version\":1,\"host\":\"hieu-GF63-Thin-10SC\",\"message\":\"{\\\"command\\\":\\\"test:upload\\\",\\\"title\\\":\\\"Import success\\\",\\\"total_success\\\":10,\\\"total_fails\\\":0,\\\"log_message\\\":\\\"\\\"}\",\"type\":\"Datahub\",\"channel\":\"logstash.main\",\"level\":\"INFO\",\"monolog_level\":200,\"context\":{\"host\":{\"ip\":\"127.0.0.1\"}}}\n"
}
}
}
How can I extract the value in message field into json data and append in _source
For example, I want to command, total_success field append into _source

logstash - Do not get the values for [log][file][path]

I receive the data from filebeats in logstash but I do not see the value for [log][file][path].
I see othr people get the data in the logstash data, below is what I receive. There is no data from beats for the logpath. Thank you in advance for the help.
{
"_index" : "%{merchant_id}",
"_type" : "_doc",
"_id" : "gpPkankB77M3Al5AC9Cs",
"_score" : 1.0,
"_source" : {
"host" : {
"name" : "53f955bf4e04"
},
"message" : "request: \nnull",
"#version" : "1",
"#timestamp" : "2021-05-08T09:50:26.090Z",
"level" : "D",
"stacktrace" : true,
"ls_pipe" : "droid_log",
"agent" : {
"id" : "dffd6c5f-69e9-416c-b08c-f2229e6d477b",
"name" : "53f955bf4e04",
"version" : "7.12.1",
"type" : "filebeat",
"ephemeral_id" : "6417dd44-79fd-44a9-8798-5ae188f9fe62",
"hostname" : "53f955bf4e04"
},
"device" : {
"id" : "1850006639",
"tid" : "3786",
"pid" : "3732"
},
"tags" : [
"beats_input_codec_plain_applied",
"_grokparsefailure"
],
"input" : {
"type" : "log"
},
"ecs" : {
"version" : "1.8.0"
},
"logger" : "postiliondriver.parameters.PostilionParamDownloadBase"
}
}
You have a drop_fields processor in your filebeat.yml that is dropping the log field.
If you drop the field you can't use it in logstash because it does not exists in your message.
Remove the log field from the drop_fields processor and try again.

How to transfrom log message using logstash?

I have below log messages like
2021-03-26 11:49:25.575: 2021-03-26 11:49:25.575 [INFO] 10.0.3.12 - "POST https://api.kr-seo.assistant.watson.cloud.ibm.com/instances/a33da834-a7a7-48c2-9bf6-d3207849ad71/v1/workspaces/c6e3035b-411a-468d-adac-1ae608f7bf68/message?version=2018-07-10" 200 462 ms
2021-03-26 11:49:26.514: 2021-03-26 11:49:26.514 [INFO] 10.0.3.12 + "POST http://test-bff.lotteon.com/order/v1/mylotte/getOrderList"
I want to transfrom using logstash like
"timestamp" : "2021-03-26 11:49:26.514",
"logLevel" : "INFO",
"IP" : "10.0.3.12",
"inout" : "-",
"Method" : "POST",
"url" : "https://api.kr-seo.assistant.watson.cloud.ibm.com/instances/a33da834-a7a7-48c2-9bf6-d3207849ad71/v1/workspaces/c6e3035b-411a-468d-adac-1ae608f7bf68/message?version=2018-07-10",
"status" : "200",
"duration" : "462 ms"
if, inout field is '+' that status/ duration filed are null ('')
How can I script logstash grok filter? (grok, mutate any other filter OK ...etc)
Help me..!
filter {
grok { match => [ "message", "%{GREEDYDATA:predata} (?<inout>[-+]) \"%{GREEDYDATA:postdata}\""] }
if [inout] == "+"
{
grok { match => [ "message", "%{DATESTAMP:timestamp}: %{GREEDYDATA:data} \[%{LOGLEVEL:loglevel}\] %{IP:IP} (?<inout>[-+]) \"%{WORD:method} %{URI:url}\"" ] }
}
else {
grok { match => [ "message", "%{DATESTAMP:timestamp}: %{GREEDYDATA:data} \[%{LOGLEVEL:loglevel}\] %{IP:IP} (?<inout>[-+]) \"%{WORD:method} %{URI:url}\" %{POSINT:statucode} %{POSINT:duration}" ] }
}
}
Now, you can remove the unnecessary fields:
filter {
mutate {
remove_field => [
"message",
"predata",
"postdata",
"DATE_US",
"IPV6",
"USER",
"USERNAME",
"URIHOST",
"IPORHOST",
"HOSTNAME",
"URIPATHPARAM",
"port",
"URIPATH",
"URIPARAM"
]
remove_tag => [
"multiline",
"_grokparsefailure"
]
}
}

LOGSTASH - Filter JSON

I need to save only the contents of the ship node in a Kafka topic, unfortunately I have already performed several tests and the filter is not working
My json is similar to that
{
"_index": "abd",
"type" : "doc",
"_source":{
"response_body": {
"ship":[
{
"type" : "iPhone",
"number": "0123-4567-8888"
},
{
"type" : "iPhone",
"number": "0123-4567-4444"
}
]
}}}
My logstash is configured like this
input {
file {
path => "${PWD}/logstash_input"
start_position => "beginning"
sincedb_path => "/dev/null"
type => "json"
}
}
filter{
json{
source => "message"
target => "_source.response_body"
}
}
output {
kafka {
bootstrap_servers => "localhost:9092"
codec => json{}
topic_id => "testtopic"
}

I can't filter neither I can aggregate the documents I have been saved to ElasticSearch via LogStash

I guess the problem may be related to my logstash.conf but I don't know exactly what to do. I found excellent tutorials explaning how to do it using only ElasticSearch but in my case all data will come from NodeJs via LogStash.
I search about enabling fieldData but I couldn't figure out how to do it in my logstash.conf. Should I create a Index Template? If so how?
The context is that I want to log every time an user access our application and then bill him/her according to the access number per month.
logstash.conf
input {
tcp {
port => 5000
type => cpfTipo
}
}
filter {
json {
source => "message"
}
}
output {
elasticsearch { hosts => ["localhost:9200"] index => "mycostumer_indice" document_type => "cpfTipo"}
}
Tentative to filter:
1)
curl -XGET http://127.0.0.1:9200/mycostumer_indice/cpfTipo/_search -d '{
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter":
{
"term": {
"username": "a"
}
}
]
}
}
}
}'
{"error":{"root_cause":[{"type":"parsing_exception","reason":"no [query] registered for [filtered]","line":3,"col":21}],"type":"parsing_exception","reason":"no [query] registered for [filtered]","line":3,"col":21},"status":400}demetrio#nodejs ~/tool
Tentatives to aggregate:
1)
curl -XGET http://127.0.0.1:9200/mycostumer_indice/cpfTipo/_search -d '{
{
"aggs" : {
"message" : {
"terms" : {
"field" : "cpfTipo",
"size" : 5
}
}
}
}'
{"error":{"root_cause":[{"type":"json_parse_exception","reason":"Unexpected character ('{' (code 123)): was expecting double-quote to start field name\n at [Source: org.elasticsearch.transport.netty4.ByteBufStreamInput#3ce63313; line: 2, column: 2]"}],"type":"json_parse_exception","reason":"Unexpected character ('{' (code 123)): was expecting double-quote to start field name\n at [Source: org.elasticsearch.transport.netty4.ByteBufStreamInput#3ce63313; line: 2, column: 2]"},"status":500}
2)
curl -XPOST 'http://127.0.0.1:9200/mycostumer_indice/_search?pretty' -d '
{
"size": 0,
"aggs": {
"group_by_username": {
"terms": {
"field": "username"
}
}
}
}'
{
"error" : {
"root_cause" : [
{
"type" : "illegal_argument_exception",
"reason" : "Fielddata is disabled on text fields by default. Set fielddata=true on [username] in order to load fielddata in memory by uninverting the inverted index. Note that this can however use significant memory."
}
],
"type" : "search_phase_execution_exception",
"reason" : "all shards failed",
"phase" : "query",
"grouped" : true,
"failed_shards" : [
{
"shard" : 0,
"index" : "mycostumer_indice",
"node" : "-em7X-ssT3SL2JBtfs0VTQ",
"reason" : {
"type" : "illegal_argument_exception",
"reason" : "Fielddata is disabled on text fields by default. Set fielddata=true on [username] in order to load fielddata in memory by uninverting the inverted index. Note that this can however use significant memory."
}
}
],
"caused_by" : {
"type" : "illegal_argument_exception",
"reason" : "Fielddata is disabled on text fields by default. Set fielddata=true on [username] in order to load fielddata in memory by uninverting the inverted index. Note that this can however use significant memory."
}
},
"status" : 400
}
How mycostumer index appears:
curl http://127.0.0.1:9200/mycostumer_indice/cpfTipo/_search?pretty
{
"took" : 2,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"failed" : 0
},
"hits" : {
"total" : 1,
"max_score" : 1.0,
"hits" : [
{
"_index" : "mycostumer_indice",
"_type" : "cpfTipo",
"_id" : "AVrxUi5cIZDJUBCguFI8",
"_score" : 1.0,
"_source" : {
"password" : "a",
"#timestamp" : "2017-03-21T14:42:54.466Z",
"port" : 56012,
"#version" : "1",
"host" : "127.0.0.1",
"message" : "{\"username\":\"a\",\"password\":\"a\"}",
"type" : "cpfTipo",
"username" : "a"
}
}
]
}
}
In nodeJs
var express = require('express');
var bodyParser = require('body-parser');
var Client = require('node-rest-client').Client;
var expressWinston = require('express-winston');
var winston = require('winston');
require('winston-logstash');
var client = new Client();
var Logstash = require('logstash-client');
var app = express();
expressWinston.requestWhitelist.push('body');
expressWinston.responseWhitelist.push('body')
app.use(bodyParser.json());
app.use(bodyParser.urlencoded({
extended: true
}));
var port = process.env.PORT || 3000;
var router = express.Router();
var tokenRoute = router.route('/token');
tokenRoute.post(function (req, res) {
var user = {
username: req.body.username,
password: req.body.password
};
logstash.send(user);
Your first search query uses a deprecated filtered query, simply replace it with bool and you're good:
curl -XGET http://127.0.0.1:9200/mycostumer_indice/cpfTipo/_search -d '{
"query": {
"bool": {
"filter":
{
"term": {
"username": "a"
}
}
]
}
}
}
}'
Your second query has one too many open brace at the beginning, use this one instead.
curl -XGET http://127.0.0.1:9200/mycostumer_indice/cpfTipo/_search -d '{
"aggs" : {
"message" : {
"terms" : {
"field" : "cpfTipo",
"size" : 5
}
}
}
}'
Your third query fails because you're trying to aggregate on username which is a text field. You should change the mapping of that field to use the keyword type instead.

Resources