Logstash - parse array of JSON - logstash

I'm trying to parse SendGrid Webhook events using Logstash. The issue is that Logstash's output is not an array of JSON, but only JSON. Square brackets are missing.
The reason I'm doing this is GeoIP location and UserAgent parsing for analytics.
I am posting to 127.0.0.1:3000, then I want to forward the output to 127.0.0.1:8080.
8080 is just a basic Express server which prints requests/responses and it sends final data to ElasticSearch.
This is the input:
[
{
"email": "email#domain.com",
"event": "click",
"ip": "8.8.8.8",
"sg_event_id": "WS1wXXhERnefBsqEt5FSFA",
"sg_message_id": "mk4Msf8nQvycsZIAHQPOrw.filter0321p1iad2-30191-5E686C57-5D.0",
"timestamp": 1596484698,
"url": "http://10.0.0.6/ServiceCenter/view",
"url_offset": { "index": 1, "type": "html" },
"useragent": "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko"
}
]
This is the output:
{
sg_event_id: 'WS1wXXhERnefBsqEt5FSFA',
event: 'click',
email: 'email#domain.com',
sg_message_id: 'mk4Msf8nQvycsZIAHQPOrw.filter0321p1iad2-30191-5E686C57-5D.0',
timestamp: 1596484698,
useragent: 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko',
ip: '8.8.8.8',
url: 'http://10.0.0.6/ServiceCenter/view',
url_offset: { index: 1, type: 'html' }
}
This is my config:
input {
http {
host => "127.0.0.1"
port => "8080"
}
}
filter {
mutate {
remove_field => [ "#version", "#timestamp", "headers", "host" ]
}
}
output {
http {
http_method => "post"
url => "http://127.0.0.1:3000"
}
stdout {
codec => rubydebug
}
}

So finally, I've found an workaround to do it using the json_encode filter.
Used ruby code to store keys and values to #DATA[oldJSON]
Used json_encode plugin on #DATA[oldJSON] and save the results to #DATA[newJSON]
Important: Set http output format => message and content_type => "application/json; charset=UTF-8". Default is text/plain and we don't want this.
Set message value to '[ %{[#DATA][newJSON]} ]'
Config:
input {
http {
host => "127.0.0.1"
port => "8080"
}
}
filter {
mutate {
remove_field => [
"#version",
"#timestamp",
"headers",
"host"
]
}
ruby {
code => '
event.to_hash.each { |k,v|
event.set("[#DATA][oldJSON][#{k}]", v)
}
'
}
json_encode {
source => "[#DATA][oldJSON]"
target => "[#DATA][newJSON]"
}
}
output {
http {
http_method => "post"
url => "http://127.0.0.1:3000"
format => message
content_type => "application/json; charset=UTF-8"
message => '[ %{[#DATA][newJSON]} ]'
}
}
Output:
[
{
ip: '8.8.8.8',
sg_message_id: 'mk4Msf8nQvycsZIAHQPOrw.filter0321p1iad2-30191-5E686C57-5D.0',
url_offset: { type: 'html', index: 1 },
sg_event_id: 'WS1wXXhERnefBsqEt5FSFA',
email: 'email#domain.com',
event: 'click',
url: 'http://10.0.0.6/ServiceCenter/view',
timestamp: 1596484698,
useragent: 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko'
}
]
Maybe somebody will find this useful.

Related

proxy-authentication header missing with https

I want to create a mitm proxy that can only be access by providing correct credentials:
(async () => {
const mockttp = require('mockttp');
// Create a proxy server with a self-signed HTTPS CA certificate:
const https = await mockttp.generateCACertificate();
const server = mockttp.getLocal({ https });
// Inject 'Hello world' responses for all requests
// Replace targets entirely with custom logic:
let counter = 0;
server.forAnyRequest().thenCallback((request) => {
console.log(JSON.stringify(request));
return {
status: 200,
// Return a JSON response with an incrementing counter:
json: { counterValue: counter++ }
};
});
await server.start(8080);
// Print out the server details:
const caFingerprint = mockttp.generateSPKIFingerprint(https.cert)
console.log(`Server running on port ${server.port}`);
console.log(`CA cert fingerprint ${caFingerprint}`);
})(); // (Run in an async wrapper so we can use top-level await everywhere)
With http it works flawlessly, the proxy-authorization header is present:
curl -k -v --proxy "user:pass#127.0.0.1:8080" http://www.google.com
{
"id":"8978f1a3-8a4f-4395-b0dc-0cf8929e760a",
"matchedRuleId":"5a1bc167-7e34-4b0d-9f51-f8e49015b349",
"protocol":"http",
"httpVersion":"1.1",
"method":"GET",
"url":"http://www.google.com/",
"path":"/",
"remoteIpAddress":"::ffff:127.0.0.1",
"remotePort":32932,
"headers":{
"host":"www.google.com",
"proxy-authorization":"Basic dXNlcjpwYXNz",
"user-agent":"curl/7.83.1",
"accept":"*/*",
"proxy-connection":"Keep-Alive"
},
"rawHeaders":[
[
"Host",
"www.google.com"
],
[
"Proxy-Authorization",
"Basic dXNlcjpwYXNz"
],
[
"User-Agent",
"curl/7.83.1"
],
[
"Accept",
"*/*"
],
[
"Proxy-Connection",
"Keep-Alive"
]
],
"tags":[
],
"timingEvents":{
"startTime":1663860475270,
"startTimestamp":7655.8840999901295,
"bodyReceivedTimestamp":7656.588100001216
},
"body":{
"buffer":{
"type":"Buffer",
"data":[
]
}
}
}
Now the problem is that if it runs through https, the proxy-authorization disappears:
curl -k -v --proxy "user:pass#127.0.0.1:8080" https://www.google.com
{
"id":"dd9f61c9-8ecb-4f94-87aa-095fd2f40da6",
"matchedRuleId":"5a1bc167-7e34-4b0d-9f51-f8e49015b349",
"protocol":"https",
"httpVersion":"1.1",
"method":"GET",
"url":"https://www.google.com/",
"path":"/",
"remoteIpAddress":"::ffff:127.0.0.1",
"remotePort":34557,
"headers":{
"host":"www.google.com",
"user-agent":"curl/7.83.1",
"accept":"*/*"
},
"rawHeaders":[
[
"Host",
"www.google.com"
],
[
"User-Agent",
"curl/7.83.1"
],
[
"Accept",
"*/*"
]
],
"tags":[
],
"timingEvents":{
"startTime":1663860737403,
"startTimestamp":269786.7910999954,
"bodyReceivedTimestamp":269787.29159998894
},
"body":{
"buffer":{
"type":"Buffer",
"data":[
]
}
}
}
Is there anything I'm unaware of that causes this behaviour?

How do I capture an event from an ALB for a lambda function and extract the "host" from it?

I have a lambda on edge function that receives a cloudfront event and uses the "host" to perform a dynamo db lookup. Below is the snippet of code that receives the cf event.
function main(event, context, callback) {
**const request = event.Records[0].cf.request;**
const headers = request.headers;
const host = headers.host[0].value;
};
I am trying to replace cloudfront with an ALB. I am relatively new to node.js and struggling with
capturing the event from the ALB( incoming http or https request) and extracting the host from it. I tried this but it doesn't seem to work.
function main(event, context, callback) {
**const request = event;**
const headers = request.headers;
const host = headers.host[0].value
}
Here is an example Event from an Application Load Balancer (from the aws documentation)
{
"requestContext": {
"elb": {
"targetGroupArn": "arn:aws:elasticloadbalancing:us-east-2:123456789012:targetgroup/lambda-279XGJDqGZ5rsrHC2Fjr/49e9d65c45c6791a"
}
},
"httpMethod": "GET",
"path": "/lambda",
"queryStringParameters": {
"query": "1234ABCD"
},
"headers": {
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
"accept-encoding": "gzip",
"accept-language": "en-US,en;q=0.9",
"connection": "keep-alive",
"host": "lambda-alb-123578498.us-east-2.elb.amazonaws.com",
"upgrade-insecure-requests": "1",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36",
"x-amzn-trace-id": "Root=1-5c536348-3d683b8b04734faae651f476",
"x-forwarded-for": "72.12.164.125",
"x-forwarded-port": "80",
"x-forwarded-proto": "http",
"x-imforwards": "20"
},
"body": "",
"isBase64Encoded": false
}
Any help with this is greatly appreciated.
event.headers.host will give you the host.

using request-promise to querystring in JSON

I have a big API I want to query for userId and receive its details.
var options = {
uri: 'http://www.theapi.net/0862710324bo0',
method : 'GET',
useQuerystring: true,
qs: {
"must": [
{ "match": { "data.clients.id": req.params.userId }},
]
},
headers: {
'User-Agent': 'Request-Promise'
},
json: true // Automatically parses the JSON string in the response
};
console.log(options.qs.must)
rp(options)
.then(function (repos) {
console.log(repos.clients.name);
res.status(200).json({
data:repos.clients[0].name
})
})...
This code returns:
[
{
match: { 'data.clients.id': 'b2d445-2160-4va7-ref-4edf860bd' }
}
]
undefined (because I didn't specify the object array index)
{
"data": "Sergio"
}
What I need:
{
"id":"ec9c1c4d-ab1a-41b2-bc1a-520b889cdeb9",
"name":"Sergio",
"email":"sergio#jorge.com",
},
I believe adding a "bool" tag would help you out.
var options = {
uri: 'http://www.theapi.net/0862710324bo0',
method : 'GET',
useQuerystring: true,
qs: {
"bool": { // Tag added
"must": [
{ "match": { "data.clients.id": req.params.userId }},
]
}
}
headers: {
'User-Agent': 'Request-Promise'
},
json: true // Automatically parses the JSON string in the response
};
console.log(options.qs.must)
rp(options)
.then(function (repos) {
console.log(repos.clients.name);
res.status(200).json({
data:repos.clients[0].name
})
})
Beware - Untested code!
for (const [key, value] of Object.entries(repos.clients)) {
if (req.params.userId === repos.clients[key].id) {
return res.status(200).json({
data:repos.clients[key]
})
}
}

How to get Lambda Function to get httpmethod from ApiGateway with method ANY?

I'm attempting to create an API Gateway that will take in ANY method in AWS. Once the API has been called, the lambda function will then parse out the message that was sent, and decide what to do from there. So, given an API Gateway method of:
Type: AWS::ApiGateway::Method
Properties:
RestApiId: !Ref myRestApi
ResourceId: !Ref myResource
HttpMethod: ANY
AuthorizationType: NONE
Integration:
Type: AWS_PROXY
IntegrationHttpMethod: POST
Uri:
Fn::Join:
- ''
- - 'arn:aws:apigateway:'
- Ref: AWS::Region
- :lambda:path/2015-04-30/functions/
- Fn::GetAtt:
- myLambdaFunction
- Arn
- /invocations
And it will successfully call myLambdaFunction, how do I then have the lambda function in node get which HttpMethod was actually sent?
For example:
exports.handler = (event, context, callback) => {
const response = {
statusCode: 200,
headers: {
"x-custom-header" : "This exists for reasons."
}
};
// I know that event doesn't actually have any httpmethod, but I'm not sure what it does have, or how to use it.
switch(event.httpmethod) {
case "POST":
console.log("POST!!!");
create(event, context, callback);
break;
case "GET":
console.log("GET!!!");
read(event, context, callback);
break;
case "PUT":
console.log("PUT!!!");
update(event, context, callback);
break;
}
The lambda above, should be able to console.log whichever method it got, but I'm not sure what should go in place of the event.httpmethod which is something I just made up.
You are looking for the event.httpMethod (note CAPITAL M) property.
If you are not sure what data your Lambda event has, you can always log the result by using
console.log(event);
and the result will be visible in the CloudWatch log associated with the Lambda function.
For proxy integration between API Gateway and Lambda, you can find specific details about those events in the AWS API Gateway developer guide:
{
"resource": "Resource path",
"path": "Path parameter",
"httpMethod": "Incoming request's method name"
"headers": {String containing incoming request headers}
"multiValueHeaders": {List of strings containing incoming request headers}
"queryStringParameters": {query string parameters }
"multiValueQueryStringParameters": {List of query string parameters}
"pathParameters": {path parameters}
"stageVariables": {Applicable stage variables}
"requestContext": {Request context, including authorizer-returned key-value pairs}
"body": "A JSON string of the request payload."
"isBase64Encoded": "A boolean flag to indicate if the applicable request payload is Base64-encode"
}
Or in the AWS Lambda Developer Guide:
{
"path": "/test/hello",
"headers": {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
"Accept-Encoding": "gzip, deflate, lzma, sdch, br",
"Accept-Language": "en-US,en;q=0.8",
"CloudFront-Forwarded-Proto": "https",
"CloudFront-Is-Desktop-Viewer": "true",
"CloudFront-Is-Mobile-Viewer": "false",
"CloudFront-Is-SmartTV-Viewer": "false",
"CloudFront-Is-Tablet-Viewer": "false",
"CloudFront-Viewer-Country": "US",
"Host": "wt6mne2s9k.execute-api.us-west-2.amazonaws.com",
"Upgrade-Insecure-Requests": "1",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.82 Safari/537.36 OPR/39.0.2256.48",
"Via": "1.1 fb7cca60f0ecd82ce07790c9c5eef16c.cloudfront.net (CloudFront)",
"X-Amz-Cf-Id": "nBsWBOrSHMgnaROZJK1wGCZ9PcRcSpq_oSXZNQwQ10OTZL4cimZo3g==",
"X-Forwarded-For": "192.168.100.1, 192.168.1.1",
"X-Forwarded-Port": "443",
"X-Forwarded-Proto": "https"
},
"pathParameters": {
"proxy": "hello"
},
"requestContext": {
"accountId": "123456789012",
"resourceId": "us4z18",
"stage": "test",
"requestId": "41b45ea3-70b5-11e6-b7bd-69b5aaebc7d9",
"identity": {
"cognitoIdentityPoolId": "",
"accountId": "",
"cognitoIdentityId": "",
"caller": "",
"apiKey": "",
"sourceIp": "192.168.100.1",
"cognitoAuthenticationType": "",
"cognitoAuthenticationProvider": "",
"userArn": "",
"userAgent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.82 Safari/537.36 OPR/39.0.2256.48",
"user": ""
},
"resourcePath": "/{proxy+}",
"httpMethod": "GET",
"apiId": "wt6mne2s9k"
},
"resource": "/{proxy+}",
"httpMethod": "GET",
"queryStringParameters": {
"name": "me"
},
"stageVariables": {
"stageVarName": "stageVarValue"
}
}
The event variable is a request given json that your lambda gets.
For your code to work you need to pass to the lambda a the following json
{
httpmethod : "value"
}
where the value will be POST,GET or PUT.
If you go to the console on the right of the button actions you can creat a test with an event json input.
I have found the httpMethod value using this -
if (event.requestContext.http.method === 'GET') {
// code goes here...
}
The below code can use to find the method.
if (event.httpMethod === "GET") {
// Get method code goes here
} else if(event.httpMethod === "POST") {
// Post method code goes here
}
Example event.json API Gateway proxy event (REST API)

KV filter logstash value_split

Hi i am using kv filter to split my string I wanted to know how do I put the values after I split em. For example:
My logs look like below:
47.30.221.46 - - [04/Sep/2017:13:24:44 +0530] "GET /api/v1.2/places/search/json?username=gaurav.saxena889&location=28.5506382,77.2689024&query=sunrise%20hy&explain=true&bridge=true HTTP/1.1" 200 2522 45402
47.30.221.46 - - [04/Sep/2017:13:24:46 +0530] "GET /api/v1.2/places/search/json?username=gaurav.saxena889&location=28.5506382,77.2689024&query=hy&explain=true&bridge=true HTTP/1.1" 200 2169 55267
47.30.221.46 - - [04/Sep/2017:13:24:47 +0530] "GET /api/v1.2/places/search/json?username=gaurav.saxena889&location=28.5506382,77.2689024&query=hyun&explain=true&bridge=true HTTP/1.1" 200 2530 29635
47.30.221.46 - - [04/Sep/2017:13:24:47 +0530] "GET /api/v1.2/places/search/json?username=gaurav.saxena889&location=28.5506382,77.2689024&query=hyunda&explain=true&bridge=true HTTP/1.1" 200 2572 25449
47.30.221.46 - - [04/Sep/2017:13:24:48 +0530] "GET /api/v1.2/places/search/json?username=gaurav.saxena889&location=28.5506382,77.2689024&query=hyundai&explain=true&bridge=true HTTP/1.1" 200 3576 28007
47.30.221.46 - - [04/Sep/2017:13:24:58 +0530] "GET /api/v1.2/places/search/json?username=gaurav.saxena889&location=28.5506382,77.2689024&query=su&explain=true&bridge=true HTTP/1.1" 200 2354 96861
47.30.221.46 - - [04/Sep/2017:13:24:58 +0530] "GET /api/v1.2/places/search/json?username=gaurav.saxena889&location=28.5506382,77.2689024&query=sun&explain=true&bridge=true HTTP/1.1" 200 3224 50897
My logstash config file looks like below:
input {
beats {
port => 5044
client_inactivity_timeout => 86400
}
}
filter {
grok {
match => {
"message" => "%{IPORHOST:client_ip} %{HTTPDUSER:ident} %{HTTPDUSER:auth} \[%{HTTPDATE:timestamp}\] \"(?:%{WORD:method} /api/v%{NUMBER:version}/%{DATA:resource}/%{DATA:subresource}/%{DATA:response_type}\?%{NOTSPACE:request}(?: HTTP/%{NUMBER:httpversion})?|%{DATA:rawrequest})\" %{NUMBER:response_code} (?:%{NUMBER:data_transfered}|-) %{NUMBER:response_time}"
}
}
kv {
source => "request"
field_split => "&"
}
if [query] {
mutate {
rename => { "query" => "searched_keword" }
}
} else if [keyword] {
mutate {
rename => { "keyword" => "searched_keyword" }
}
}
if [refLocation] {
mutate {
rename => { "refLocation" => "location" }
}
}
mutate {
convert => { "response_code" => "integer" }
}
mutate {
convert => { "data_transfered" => "integer" }
}
mutate {
convert => { "version" => "float" }
}
mutate {
convert => { "response_time" => "integer" }
}
if [location] {
kv {
source => "location"
value_split => ","
}
}
}
output {
elasticsearch {
hosts => ["http://localhost:9200"]
index => "logstash_apachelogs"
document_type => "log"
}
}
If you have a look at the last kv filter, I've split my location value with a ,. I have 2 questions:
If you see from the logs I have a location=28.5506382,77.2689024 using the kv filter I split the values using the , now how do I use the splited values in a goip filter which takes the vaues as below:
geoip {
source => "ClientIP"
target => "geoip"
add_field => [ "[geoip][coordinates]", "%{[geoip][longitude]}" ]
add_field => [ "[geoip][coordinates]", "%{[geoip][latitude]}" ]
}
How do i replace the %20 in the query parameter with a white space?

Resources