How can convert a json nested string to json in logstash? - logstash

My string after json decode js:
"{\"#timestamp\":\"2022-09-27T10:14:49.082014+02:00\",\"#version\":1,\"host\":\"hieu-GF63-Thin-10SC\",\"message\":\"{\\\"command\\\":\\\"test:upload\\\",\\\"title\\\":\\\"Import success\\\",\\\"total_success\\\":10,\\\"total_fails\\\":0,\\\"log_message\\\":\\\"\\\"}\",\"type\":\"Datahub\",\"channel\":\"logstash.main\",\"level\":\"INFO\",\"monolog_level\":200,\"context\":{\"host\":{\"ip\":\"127.0.0.1\"}}}\n"
My logstash script is
udp {
port => 5000
}
}
filter{
json { source => "message" }
}
output {
elasticsearch {
hosts => ["http://localhost:9200"]
index => "my-index"
user => "elastic"
password => "123456"
}
}
My result in elastic
{
"_index" : "my-index",
"_id" : "PskDfoMBtWToAIWATogd",
"_score" : 1.0,
"_ignored" : [
"event.original.keyword"
],
"_source" : {
"channel" : "logstash.main",
"context" : {
"host" : {
"ip" : "127.0.0.1"
}
},
"type" : "Datahub",
"monolog_level" : 200,
"message" : "{\"command\":\"test:upload\",\"title\":\"Import success\",\"total_success\":10,\"total_fails\":0,\"log_message\":\"\"}",
"host" : "hieu-GF63-Thin-10SC",
"level" : "INFO",
"#timestamp" : "2022-09-27T08:14:49.082014Z",
"#version" : 1,
"event" : {
"original" : "{\"#timestamp\":\"2022-09-27T10:14:49.082014+02:00\",\"#version\":1,\"host\":\"hieu-GF63-Thin-10SC\",\"message\":\"{\\\"command\\\":\\\"test:upload\\\",\\\"title\\\":\\\"Import success\\\",\\\"total_success\\\":10,\\\"total_fails\\\":0,\\\"log_message\\\":\\\"\\\"}\",\"type\":\"Datahub\",\"channel\":\"logstash.main\",\"level\":\"INFO\",\"monolog_level\":200,\"context\":{\"host\":{\"ip\":\"127.0.0.1\"}}}\n"
}
}
}
How can I extract the value in message field into json data and append in _source
For example, I want to command, total_success field append into _source

Related

logstash - Do not get the values for [log][file][path]

I receive the data from filebeats in logstash but I do not see the value for [log][file][path].
I see othr people get the data in the logstash data, below is what I receive. There is no data from beats for the logpath. Thank you in advance for the help.
{
"_index" : "%{merchant_id}",
"_type" : "_doc",
"_id" : "gpPkankB77M3Al5AC9Cs",
"_score" : 1.0,
"_source" : {
"host" : {
"name" : "53f955bf4e04"
},
"message" : "request: \nnull",
"#version" : "1",
"#timestamp" : "2021-05-08T09:50:26.090Z",
"level" : "D",
"stacktrace" : true,
"ls_pipe" : "droid_log",
"agent" : {
"id" : "dffd6c5f-69e9-416c-b08c-f2229e6d477b",
"name" : "53f955bf4e04",
"version" : "7.12.1",
"type" : "filebeat",
"ephemeral_id" : "6417dd44-79fd-44a9-8798-5ae188f9fe62",
"hostname" : "53f955bf4e04"
},
"device" : {
"id" : "1850006639",
"tid" : "3786",
"pid" : "3732"
},
"tags" : [
"beats_input_codec_plain_applied",
"_grokparsefailure"
],
"input" : {
"type" : "log"
},
"ecs" : {
"version" : "1.8.0"
},
"logger" : "postiliondriver.parameters.PostilionParamDownloadBase"
}
}
You have a drop_fields processor in your filebeat.yml that is dropping the log field.
If you drop the field you can't use it in logstash because it does not exists in your message.
Remove the log field from the drop_fields processor and try again.

using terms in elasticsearch js

because of the wonderful documentation Elasticsearch has , I cant figure out the proper syntax to search for a term , this is my code:
let checkuser = await client.search({
index: "users",
type: "my_users",
body: {
query: {
term: {
email: req.body.email
}
}
}
});
I wish to search for an object that has a key value pair of 'email' with a certain email, but I wish it to be the exact email I wrote, if its a#mail.com ab#mail.com should not match, I know I need to use terms but when i write it like that it doesn't work, whats wrong with my syntax?
PS this is my index mapping:
"users" : {
"mappings" : {
"jobix_users" : {
"properties" : {
"confirmed" : {
"type" : "boolean"
},
"email" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"firstName" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"jobNotification" : {
"type" : "boolean"
},
"jobTitle" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"lastName" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"password" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"userName" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
}
}
}
You use default mapping and this means that you use standard tokenizer when you index documents.
How you can see in mapping email field have two interpretations:
text
keyword
In text standard tokenizer is work and save tokens to your index. This mean you can find for term alex and for term mail.com. If you want to find whole email then your query should look as:
{
"query": {
"term": {
"email.keyword": req.body.email
}
}
}
But elasticsearch has special uax_url_email tokenizer for urls and mails. I would like to recommend use this tokenizer for the email field.

Forward logs with logstash running in ec2 instance to Amazon Elasticsearch service

I'm currently running logstash in an ec2 instance with the default linux AMI, and attempting to send logs to an AWS ES instance. If I used the standard 'elasticsearch' output, I can send unsigned data to the AWS ES instance, but I'm trying to set up a prod-ready framework, and everything I've read recommends using the AWS labs logstash output plugin here( https://github.com/awslabs/logstash-output-amazon_es ).
I can confirm the plugin is installed, but when I run logstash using the below conf file, I get a message that 'Sending Logstash's logs to /var/log/logstash which is now configured via log4j2.properties', but no data appears in my elasticsearch endpoint with the '/_search?pretty=true' flag when I refresh after making a stdin entry.
input {
stdin {
}
}
output {
amazon_es {
hosts => ["https://search-secretstuff.es.amazonaws.com"]
region => "xxxxx"
aws_access_key_id => 'xxxxxx'
aws_secret_access_key => 'xxxxxx'
index => "prod-logs-%{+YYYY.MM.dd}"
template => "/etc/logstash/mappings/es6-template.json"
}
}
In addition to using stdin, I've tried using a file input, ex
input {
file {
path => "/var/log/amazon/ssm/errors.log"
}
}
The template I'm using is below, as per the accepted answer for this post (Logstash conf error - amazon_es)
{
"template" : "logstash-*",
"version" : 60001,
"settings" : {
"index.refresh_interval" : "5s"
},
"mappings" : {
"_default_" : {
"dynamic_templates" : [ {
"message_field" : {
"path_match" : "message",
"match_mapping_type" : "string",
"mapping" : {
"type" : "text",
"norms" : false
}
}
}, {
"string_fields" : {
"match" : "*",
"match_mapping_type" : "string",
"mapping" : {
"type" : "text", "norms" : false,
"fields" : {
"keyword" : { "type": "keyword", "ignore_above": 256 }
}
}
}
} ],
"properties" : {
"#timestamp": { "type": "date"},
"#version": { "type": "keyword"},
"geoip" : {
"dynamic": true,
"properties" : {
"ip": { "type": "ip" },
"location" : { "type" : "geo_point" },
"latitude" : { "type" : "half_float" },
"longitude" : { "type" : "half_float" }
}
}
}
}
}
}
Does anything in the configuration jump out as a potential pain point? I've tried a number of iterations of both the template file and the logstash.conf file, and now feel like I'm beating my head against the wall to no avail.

grok parser failure - for django logs

This is 1 of my log entries,
INFO 2017-05-16 17:24:11,690 views 14463 139643033982720 https://play.google.com/store/apps/details?id=com.VoDrive&referrer=referral_code%3DP5E
This is my pattern ,
DJANGOTIMESTAMP %{YEAR}-%{MONTHNUM}-%{MONTHDAY}%{SPACE}%{HOUR}:%{MINUTE}:%{SECOND}
This is my logstash conf file,
input {
beats {
port => "5043"
}
}
filter {
if [type] in ["django"] {
grok {
patterns_dir => ["/opt/logstash/patterns"]
match => [ "message" , "%{LOGLEVEL:level}%{SPACE}%{DJANGOTIMESTAMP:timestamp},%{INT:pid}%{SPACE}%{WORD:origin}%{SPACE}%{INT:uid}%{SPACE}%{INT:django-id}%{SPACE}%{GREEDYDATA:action}" ]
}
}
}
output {
elasticsearch {
hosts => [ "localhost:9200" ]
index => "%{type}_indexer"
}
}
IN elasticsearch output, the fields are not made,
luvpreet#DHARI-Inspiron-3542:/usr/bin$ curl -XGET 'localhost:9200/django_indexer/_search?pretty=true&q=*:*'
{
"_index" : "django_indexer",
"_type" : "django",
"_id" : "AVwu8tE7j-Kh6vl1kUdf",
"_score" : 1.0,
"_source" : {
"#timestamp" : "2017-05-22T06:55:52.819Z",
"offset" : 144,
"#version" : "1",
"beat" : {
"hostname" : "DHARI-Inspiron-3542",
"name" : "DHARI-Inspiron-3542",
"version" : "5.4.0"
},
"input_type" : "log",
"host" : "DHARI-Inspiron-3542",
"source" : "/var/log/django/a.log",
"message" : "INFO 2017-05-16 06:33:08,673 views 40152 139731056719616 https://play.google.com/store/apps/details?id=com.VoDrive&referrer=referral_code%3DP5E",
"type" : "django",
"tags" : [
"beats_input_codec_plain_applied"
]
}
It is not saying that parser has failed, but why are the fields not being made ?
What am I lacking ?
Try with this grok pattern:
%{LOGLEVEL:loglevel}%{SPACE}%{TIMESTAMP_ISO8601:timestamp},%{INT:pid}%{SPACE}%{WORD:origin}%{SPACE}%{INT:id}%{SPACE}%{INT:number}%{SPACE}%{URI:action}
Input
INFO 2017-05-16 17:24:11,690 views 14463 139643033982720 https://play.google.com/store/apps/details?id=com.VoDrive&referrer=referral_code%3DP5E
Output
number 139643033982720
timestamp 2017-05-16ยท17:24:11
id 14463
port
pid 690
origin views
action https://play.google.com/store/apps/details?id=com.VoDrive&referrer=referral_code%3DP5E
loglevel INFO
You can then remove the field port with a mutate in you filter plugin
mutate {
remove_field => ["port"]
}
UPDATE
Ok, I tried your configuration on with my logstash.
This is what I did:
1- Configure filebeat:
filebeat.prospectors:
- paths:
- /etc/filebeat/FilebeatInputTest.txt
document_type: django
output.logstash:
hosts: ["127.0.0.1:5044"]
2- Configure logstash
input {
beats {
port => "5044"
}
}
filter {
if [type] == "django" {
grok {
match => [ "message" , "%{LOGLEVEL:loglevel}%{SPACE}%{TIMESTAMP_ISO8601:timestamp},%{INT:pid}%{SPACE}%{WORD:origin}%{SPACE}%{INT:id}%{SPACE}%{INT:number}%{SPACE}%{GREEDYDATA:action}" ]
}
mutate {
remove_field => ["#timestamp", "beat","input_type","offset","source","#version","host","tags","message"]
}
}
}
output {
elasticsearch {
hosts => [ "xx.xx.xx.xx:9200" ]
index => "%{type}_indexer"
user => "xxxx"
password => "xxxx"
}
}
You can remove user and password if your elasticsearch is not secured.
Input (content of /etc/filebeat/FilebeatInputTest.txt)
INFO 2017-05-16 17:24:11,690 views 14463 139643033982720 https://play.google.com/store/apps/details?id=com.VoDrive&referrer=referral_code%3DP5E
Output (In elasticsearch)
{
"_index" : "django_indexer",
"_type" : "django",
"_id" : "AVwhFe30JYGYNG_7C7YI",
"_score" : 1.0,
"_source" : {
"origin" : "views",
"pid" : "690",
"type" : "django",
"number" : "139643033982720",
"loglevel" : "INFO",
"action" : "https://play.google.com/store/apps/details?id=com.VoDrive&referrer=referral_code%3DP5E",
"id" : "14463",
"timestamp" : "2017-05-16 17:24:11"
}
}
Hope this helps.

I can't filter neither I can aggregate the documents I have been saved to ElasticSearch via LogStash

I guess the problem may be related to my logstash.conf but I don't know exactly what to do. I found excellent tutorials explaning how to do it using only ElasticSearch but in my case all data will come from NodeJs via LogStash.
I search about enabling fieldData but I couldn't figure out how to do it in my logstash.conf. Should I create a Index Template? If so how?
The context is that I want to log every time an user access our application and then bill him/her according to the access number per month.
logstash.conf
input {
tcp {
port => 5000
type => cpfTipo
}
}
filter {
json {
source => "message"
}
}
output {
elasticsearch { hosts => ["localhost:9200"] index => "mycostumer_indice" document_type => "cpfTipo"}
}
Tentative to filter:
1)
curl -XGET http://127.0.0.1:9200/mycostumer_indice/cpfTipo/_search -d '{
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter":
{
"term": {
"username": "a"
}
}
]
}
}
}
}'
{"error":{"root_cause":[{"type":"parsing_exception","reason":"no [query] registered for [filtered]","line":3,"col":21}],"type":"parsing_exception","reason":"no [query] registered for [filtered]","line":3,"col":21},"status":400}demetrio#nodejs ~/tool
Tentatives to aggregate:
1)
curl -XGET http://127.0.0.1:9200/mycostumer_indice/cpfTipo/_search -d '{
{
"aggs" : {
"message" : {
"terms" : {
"field" : "cpfTipo",
"size" : 5
}
}
}
}'
{"error":{"root_cause":[{"type":"json_parse_exception","reason":"Unexpected character ('{' (code 123)): was expecting double-quote to start field name\n at [Source: org.elasticsearch.transport.netty4.ByteBufStreamInput#3ce63313; line: 2, column: 2]"}],"type":"json_parse_exception","reason":"Unexpected character ('{' (code 123)): was expecting double-quote to start field name\n at [Source: org.elasticsearch.transport.netty4.ByteBufStreamInput#3ce63313; line: 2, column: 2]"},"status":500}
2)
curl -XPOST 'http://127.0.0.1:9200/mycostumer_indice/_search?pretty' -d '
{
"size": 0,
"aggs": {
"group_by_username": {
"terms": {
"field": "username"
}
}
}
}'
{
"error" : {
"root_cause" : [
{
"type" : "illegal_argument_exception",
"reason" : "Fielddata is disabled on text fields by default. Set fielddata=true on [username] in order to load fielddata in memory by uninverting the inverted index. Note that this can however use significant memory."
}
],
"type" : "search_phase_execution_exception",
"reason" : "all shards failed",
"phase" : "query",
"grouped" : true,
"failed_shards" : [
{
"shard" : 0,
"index" : "mycostumer_indice",
"node" : "-em7X-ssT3SL2JBtfs0VTQ",
"reason" : {
"type" : "illegal_argument_exception",
"reason" : "Fielddata is disabled on text fields by default. Set fielddata=true on [username] in order to load fielddata in memory by uninverting the inverted index. Note that this can however use significant memory."
}
}
],
"caused_by" : {
"type" : "illegal_argument_exception",
"reason" : "Fielddata is disabled on text fields by default. Set fielddata=true on [username] in order to load fielddata in memory by uninverting the inverted index. Note that this can however use significant memory."
}
},
"status" : 400
}
How mycostumer index appears:
curl http://127.0.0.1:9200/mycostumer_indice/cpfTipo/_search?pretty
{
"took" : 2,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"failed" : 0
},
"hits" : {
"total" : 1,
"max_score" : 1.0,
"hits" : [
{
"_index" : "mycostumer_indice",
"_type" : "cpfTipo",
"_id" : "AVrxUi5cIZDJUBCguFI8",
"_score" : 1.0,
"_source" : {
"password" : "a",
"#timestamp" : "2017-03-21T14:42:54.466Z",
"port" : 56012,
"#version" : "1",
"host" : "127.0.0.1",
"message" : "{\"username\":\"a\",\"password\":\"a\"}",
"type" : "cpfTipo",
"username" : "a"
}
}
]
}
}
In nodeJs
var express = require('express');
var bodyParser = require('body-parser');
var Client = require('node-rest-client').Client;
var expressWinston = require('express-winston');
var winston = require('winston');
require('winston-logstash');
var client = new Client();
var Logstash = require('logstash-client');
var app = express();
expressWinston.requestWhitelist.push('body');
expressWinston.responseWhitelist.push('body')
app.use(bodyParser.json());
app.use(bodyParser.urlencoded({
extended: true
}));
var port = process.env.PORT || 3000;
var router = express.Router();
var tokenRoute = router.route('/token');
tokenRoute.post(function (req, res) {
var user = {
username: req.body.username,
password: req.body.password
};
logstash.send(user);
Your first search query uses a deprecated filtered query, simply replace it with bool and you're good:
curl -XGET http://127.0.0.1:9200/mycostumer_indice/cpfTipo/_search -d '{
"query": {
"bool": {
"filter":
{
"term": {
"username": "a"
}
}
]
}
}
}
}'
Your second query has one too many open brace at the beginning, use this one instead.
curl -XGET http://127.0.0.1:9200/mycostumer_indice/cpfTipo/_search -d '{
"aggs" : {
"message" : {
"terms" : {
"field" : "cpfTipo",
"size" : 5
}
}
}
}'
Your third query fails because you're trying to aggregate on username which is a text field. You should change the mapping of that field to use the keyword type instead.

Resources