How to use AWK regExp to print multiple substring pattern in a excel format in different column - linux

I have a log file which contains millions line like this:
$ cat file.log
10.0.7.92 - - [05/Jun/2017:03:50:06 +0000] "GET /adserver/html5/inwapads/?category=[IAB]&size=320x280&ak=AY1234&output=vast&version=1.1&sleepAfter=&requester=John&adFormat=preappvideo HTTP/1.1" 200 131 "-" "Mozilla/5.0 (Linux; Android 6.0.1; SM-S120VL Build/MMB29M; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/58.0.3029.83 Mobile Safari/537.36" 0.000 1029 520 127.0.0.1
10.0.6.91 - - [05/Jun/2017:03:50:06 +0000] "GET /adserver/html5/inwapads/?category=[IAB]&output=vast&version=1.1&sleepAfter=&requester=John&size=320x280&ak=AY1234&adFormat=preappvideo HTTP/1.1" 200 131 "-" "Mozilla/5.0 (Linux; Android 6.0.1; SM-S120VL Build/MMB29M; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/58.0.3029.83 Mobile Safari/537.36" 0.000 1029 520 127.0.0.1
I want print output of every line like this in excel with different columns:
inwapads AY1234 john 320x280
How to do that use awk or do I need to use another method.

If your desired Input looks like the file data:
$ cat file.log
10.0.7.92 - - [05/Jun/2017:03:50:06 +0000] "GET /adserver/html5/inwapads/?category=[IAB]&size=320x280&ak=AY1234&output=vast&version=1.1&sleepAfter=&requester=John&adFormat=preappvideo HTTP/1.1" 200 131 "-" "Mozilla/5.0 (Linux; Android 6.0.1; SM-S120VL Build/MMB29M; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/58.0.3029.83 Mobile Safari/537.36" 0.000 1029 520 127.0.0.1
10.0.6.91 - - [05/Jun/2017:03:50:06 +0000] "GET /adserver/html5/inwapads/?category=[IAB]&output=vast&version=1.1&sleepAfter=&requester=John&size=320x280&ak=AY1234&adFormat=preappvideo HTTP/1.1" 200 131 "-" "Mozilla/5.0 (Linux; Android 6.0.1; SM-S120VL Build/MMB29M; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/58.0.3029.83 Mobile Safari/537.36" 0.000 1029 520 127.0.0.1
Then you can simply use awk working on column $7 with some gensub( /regex/, substitution, n, column), awk's general substitution tool
$ awk '{
item=gensub( /(^.*\/)(.*\/)(.*)(\/)(\?.*$)/ , "\\3" , 1, $7 )
ak=gensub( /(^.*ak\=)([A-Z]*[0-9]*)(\&)(.*$)/ , "\\2" , 1, $7)
req=gensub( /(^.*requester\=)([A-Za-z]*)(\&)(.*$)/ , "\\2", 1, $7)
s=gensub( /(^.*size\=)([0-9]*x[0-9]*)(\&.*$)/, "\\2", 1, $7)
print item, ak, req, s
}' file.log
Output:
inwapads AY1234 John 320x280
inwapads AY1234 John 320x280

Related

I'm trying to deploy a backend server on elastic beanstalk and when I use the link it get 502 bad gateway

I'm trying to deploy a backend server on elastic beanstalk and when I use the link it get 502 bad gateway
it's a node.js and express server
here is the log output
/var/log/nginx/access.log
----------------------------------------
172.31.90.144 - - [27/Jan/2023:20:20:35 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.18.237 - - [27/Jan/2023:20:20:35 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.40.18 - - [27/Jan/2023:20:20:43 +0000] "GET / HTTP/1.1" 502 559 "-" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36" "41.69.184.245"
172.31.40.18 - - [27/Jan/2023:20:20:44 +0000] "GET /favicon.ico HTTP/1.1" 502 559 "http://project1-env.eba-cjrmrczb.us-east-1.elasticbeanstalk.com/" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36" "41.69.184.245"
172.31.40.18 - - [27/Jan/2023:20:20:50 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.90.144 - - [27/Jan/2023:20:20:50 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.18.237 - - [27/Jan/2023:20:20:50 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.90.144 - - [27/Jan/2023:20:21:05 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.40.18 - - [27/Jan/2023:20:21:05 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.18.237 - - [27/Jan/2023:20:21:05 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.40.18 - - [27/Jan/2023:20:21:20 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.90.144 - - [27/Jan/2023:20:21:20 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.18.237 - - [27/Jan/2023:20:21:20 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.40.18 - - [27/Jan/2023:20:21:35 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.90.144 - - [27/Jan/2023:20:21:35 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.18.237 - - [27/Jan/2023:20:21:35 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.40.18 - - [27/Jan/2023:20:21:50 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.90.144 - - [27/Jan/2023:20:21:50 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.18.237 - - [27/Jan/2023:20:21:50 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.40.18 - - [27/Jan/2023:20:22:05 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.90.144 - - [27/Jan/2023:20:22:05 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.18.237 - - [27/Jan/2023:20:22:05 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.40.18 - - [27/Jan/2023:20:22:20 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.90.144 - - [27/Jan/2023:20:22:20 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.18.237 - - [27/Jan/2023:20:22:20 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.40.18 - - [27/Jan/2023:20:22:35 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.90.144 - - [27/Jan/2023:20:22:35 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.18.237 - - [27/Jan/2023:20:22:35 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.40.18 - - [27/Jan/2023:20:22:50 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.90.144 - - [27/Jan/2023:20:22:50 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.18.237 - - [27/Jan/2023:20:22:50 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.40.18 - - [27/Jan/2023:20:23:05 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.90.144 - - [27/Jan/2023:20:23:05 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.18.237 - - [27/Jan/2023:20:23:05 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.90.144 - - [27/Jan/2023:20:23:08 +0000] "GET / HTTP/1.1" 502 157 "-" "Expanse, a Palo Alto Networks company, searches across the global IPv4 space multiple times per day to identify customers' presences on the Internet. If you would like to be excluded from our scans, please send IP addresses/domains to: scaninfo#paloaltonetworks.com" "205.210.31.13"
172.31.40.18 - - [27/Jan/2023:20:23:20 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.90.144 - - [27/Jan/2023:20:23:20 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.18.237 - - [27/Jan/2023:20:23:20 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.40.18 - - [27/Jan/2023:20:23:35 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.90.144 - - [27/Jan/2023:20:23:35 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.18.237 - - [27/Jan/2023:20:23:35 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.40.18 - - [27/Jan/2023:20:23:50 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.90.144 - - [27/Jan/2023:20:23:50 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.18.237 - - [27/Jan/2023:20:23:50 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.40.18 - - [27/Jan/2023:20:24:05 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.90.144 - - [27/Jan/2023:20:24:05 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.18.237 - - [27/Jan/2023:20:24:05 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.40.18 - - [27/Jan/2023:20:24:20 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.90.144 - - [27/Jan/2023:20:24:20 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.18.237 - - [27/Jan/2023:20:24:20 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.40.18 - - [27/Jan/2023:20:24:35 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.90.144 - - [27/Jan/2023:20:24:35 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.18.237 - - [27/Jan/2023:20:24:35 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.40.18 - - [27/Jan/2023:20:24:50 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.90.144 - - [27/Jan/2023:20:24:50 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.18.237 - - [27/Jan/2023:20:24:50 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.40.18 - - [27/Jan/2023:20:25:05 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.90.144 - - [27/Jan/2023:20:25:05 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.18.237 - - [27/Jan/2023:20:25:05 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.40.18 - - [27/Jan/2023:20:25:20 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.90.144 - - [27/Jan/2023:20:25:20 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.18.237 - - [27/Jan/2023:20:25:20 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.40.18 - - [27/Jan/2023:20:25:35 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.90.144 - - [27/Jan/2023:20:25:35 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.18.237 - - [27/Jan/2023:20:25:35 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.40.18 - - [27/Jan/2023:20:25:50 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.18.237 - - [27/Jan/2023:20:25:50 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.90.144 - - [27/Jan/2023:20:25:50 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.40.18 - - [27/Jan/2023:20:26:05 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.18.237 - - [27/Jan/2023:20:26:05 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.90.144 - - [27/Jan/2023:20:26:05 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.40.18 - - [27/Jan/2023:20:26:20 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.18.237 - - [27/Jan/2023:20:26:20 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.90.144 - - [27/Jan/2023:20:26:20 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.40.18 - - [27/Jan/2023:20:26:35 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.90.144 - - [27/Jan/2023:20:26:35 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.18.237 - - [27/Jan/2023:20:26:35 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.40.18 - - [27/Jan/2023:20:26:50 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.90.144 - - [27/Jan/2023:20:26:50 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.18.237 - - [27/Jan/2023:20:26:51 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.40.18 - - [27/Jan/2023:20:27:05 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.90.144 - - [27/Jan/2023:20:27:06 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.18.237 - - [27/Jan/2023:20:27:06 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.40.18 - - [27/Jan/2023:20:27:21 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.90.144 - - [27/Jan/2023:20:27:21 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.18.237 - - [27/Jan/2023:20:27:21 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.40.18 - - [27/Jan/2023:20:27:36 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.90.144 - - [27/Jan/2023:20:27:36 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.18.237 - - [27/Jan/2023:20:27:36 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.40.18 - - [27/Jan/2023:20:27:51 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.90.144 - - [27/Jan/2023:20:27:51 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.18.237 - - [27/Jan/2023:20:27:51 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.40.18 - - [27/Jan/2023:20:28:06 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.90.144 - - [27/Jan/2023:20:28:06 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.18.237 - - [27/Jan/2023:20:28:06 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.40.18 - - [27/Jan/2023:20:28:21 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.90.144 - - [27/Jan/2023:20:28:21 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.18.237 - - [27/Jan/2023:20:28:21 +0000] "GET / HTTP/1.1" 502 157 "-" "ELB-HealthChecker/2.0" "-"
172.31.90.144 - - [27/Jan/2023:20:28:26 +0000] "GET / HTTP/1.1" 502 559 "-" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36" "41.69.184.245"
172.31.90.144 - - [27/Jan/2023:20:28:27 +0000] "GET /favicon.ico HTTP/1.1" 502 559 "http://project1-env.eba-cjrmrczb.us-east-1.elasticbeanstalk.com/" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36" "41.69.184.245"
i have tried to set env variable NPM_CONFIG_PRODUCTION=true but still nothing changed and i have tried to create .ebextensions folder in my root folder and add nodecommand.config but when i do so the project doesn't perform deploy anymore
I solved the problem. It turns out that I forgot to include the dist folder in my deployment.

AWS Elastic Beanstalk status severe but no causes and working well

I have a nodejs apollo server (graphql) and almost all the time the status is severe, but the server is working well. Also, there is no cause information about it.
I have checked all the logs and could not find the reason, anyone has an idea what could be?
access.log
IP - - [22/Dec/2020:09:08:53 +0000] "GET /api/subscriptions HTTP/1.1" 101 27 "-" "-" "IP"
IP - - [22/Dec/2020:09:08:53 +0000] "POST /api/graphql HTTP/1.1" 200 163 "-" "ELB-HealthChecker/2.0" "172.31.37.93, IP"
IP - - [22/Dec/2020:09:08:53 +0000] "POST /api/graphql HTTP/1.1" 200 187 "-" "ELB-HealthChecker/2.0" "172.31.37.93, IP"
IP - - [22/Dec/2020:09:08:53 +0000] "POST /api/graphql HTTP/1.1" 200 187 "-" "ELB-HealthChecker/2.0" "172.31.37.93, IP"
IP - - [22/Dec/2020:09:08:53 +0000] "POST /api/graphql HTTP/1.1" 200 163 "-" "ELB-HealthChecker/2.0" "172.31.37.93, IP"
IP - - [22/Dec/2020:09:08:54 +0000] "GET /api/subscriptions HTTP/1.1" 101 27 "-" "-" "IP"
IP - - [22/Dec/2020:09:08:54 +0000] "GET /api/subscriptions HTTP/1.1" 101 27 "-" "-" "IP"
IP - - [22/Dec/2020:09:08:54 +0000] "GET /api/subscriptions HTTP/1.1" 101 27 "-" "-" "IP"
IP - - [22/Dec/2020:09:08:54 +0000] "GET /api/subscriptions HTTP/1.1" 101 27 "-" "-" "IP"
IP - - [22/Dec/2020:09:08:54 +0000] "GET /api/subscriptions HTTP/1.1" 101 27 "-" "-" "IP"
IP - - [22/Dec/2020:09:08:54 +0000] "GET /api/subscriptions HTTP/1.1" 101 27 "-" "-" "IP"
the health overview pt1:
the health overview pt2:
the monitor of the health:
Could be a linked service such as amazon SNS that has messages in its dead-letter queue. Or a failed deployment. Or something else.. Have you already tried rebuilding the app? AWS EB indeed is sometimes difficult to debug.

No measurements from input from telegraf to influxdb

In short that is my problem:
https://github.com/influxdata/telegraf/issues/4399
I see it wasn't answered, but I also have it.
These are logs from access log. Please help me
192.168.0.122 - - [18/Oct/2018:13:13:40 +0300] "GET /server-status HTTP/1.1" 200 4598 "-" "Go-http-client/1.1"
192.168.0.122 - - [18/Oct/2018:13:13:50 +0300] "GET /server-status HTTP/1.1" 200 4598 "-" "Go-http-client/1.1"
192.168.0.122 - - [18/Oct/2018:13:14:00 +0300] "GET /server-status HTTP/1.1" 200 4598 "-" "Go-http-client/1.1"
192.168.0.122 - - [18/Oct/2018:13:14:10 +0300] "GET /server-status HTTP/1.1" 200 4598 "-" "Go-http-client/1.1"
192.168.0.122 - - [18/Oct/2018:13:14:20 +0300] "GET /server-status HTTP/1.1" 200 4596 "-" "Go-http-client/1.1"
192.168.0.122 - - [18/Oct/2018:13:14:30 +0300] "GET /server-status HTTP/1.1" 200 4598 "-" "Go-http-client/1.1"
192.168.0.122 - - [18/Oct/2018:13:14:40 +0300] "GET /server-status HTTP/1.1" 200 4598 "-" "Go-http-client/1.1"
192.168.0.122 - - [18/Oct/2018:13:14:50 +0300] "GET /server-status HTTP/1.1" 200 4598 "-" "Go-http-client/1.1"
192.168.0.122 - - [18/Oct/2018:13:15:00 +0300] "GET /server-status HTTP/1.1" 200 4598 "-" "Go-http-client/1.1"
192.168.0.122 - - [18/Oct/2018:13:15:10 +0300] "GET /server-status HTTP/1.1" 200 4598 "-" "Go-http-client/1.1
The issue was fixed by changing system rights of telegraf service.
Also telegraf does not notify that it cannot read access logs.

Adding another column to awk output

I have a HAProxy log file with content similar to this:
Feb 28 11:16:10 localhost haproxy[20072]: 88.88.88.88:6152 [28/Feb/2017:11:16:01.220] frontend backend_srvs/srv1 9063/0/0/39/9102 200 694 - - --VN 9984/5492/191/44/0 0/0 {Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36|http://subdomain.domain.com/location1} "GET /location1 HTTP/1.1"
Feb 28 11:16:10 localhost haproxy[20072]: 88.88.88.88:6152 [28/Feb/2017:11:16:10.322] frontend backend_srvs/srv1 513/0/0/124/637 200 14381 - - --VN 9970/5491/223/55/0 0/0 {Mozilla/5.0 AppleWebKit/537.36 Chrome/56.0.2924.87 Safari/537.36|http://subdomain.domain.com/location2} "GET /location2 HTTP/1.1"
Feb 28 11:16:13 localhost haproxy[20072]: 88.88.88.88:6152 [28/Feb/2017:11:16:10.960] frontend backend_srvs/srv1 2245/0/0/3/2248 200 7448 - - --VN 9998/5522/263/54/0 0/0 {another user agent with fewer columns|http://subdomain.domain.com/location3} "GET /location3 HTTP/1.1"
Feb 28 11:16:13 localhost haproxy[20072]: 88.88.88.88:6152 [28/Feb/2017:11:16:10.960] frontend backend_srvs/srv1 2245/0/0/3/2248 200 7448 - - --VN 9998/5522/263/54/0 0/0 {Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36|} "GET /another_location HTTP/1.1"
I want to extract some of the fields in order to have the following output:
Field 1 Field 2 Field 3 Field 4 Field 5 Field 6
Date/time HTTP status code HTTP Method Request HTTP version Referer URL
Basically, in this particular case the output should be:
Feb 28 11:16:10 200 GET /location1 HTTP/1.1 http://subdomain.domain.com/location1
Feb 28 11:16:10 200 GET /location2 HTTP/1.1 http://subdomain.domain.com/location2
Feb 28 11:16:13 200 GET /location3 HTTP/1.1 http://subdomain.domain.com/location3
Feb 28 11:16:13 200 GET /another_location HTTP/1.1
The only problem here is extracting the Referer URL which is between curly brackets together with the user agent and they're separated by a pipe. Also, the user agent has a variable number of fields.
The only solution I could think of was extracting the referer url separately and then pasting the columns together:
requests_temp=`grep -F " 88.88.88.88:" /root/file.log | tr -d '"'`
requests=`echo "${requests_temp}" | awk '{print $1" "$2" "$3" "$11, $(NF-2), $(NF-1), $NF}' > /tmp/requests_tmp`
referer_url=`echo "${requests_temp}" | awk 'NR > 1 {print $1}' RS='{' FS='}' | awk -F'|' '{ print $2 }' > /tmp/referer_url_tmp`
paste /tmp/abuse_requests_tmp /tmp/referer_url_tmp
But I don't really like this method. Is there any other way in which I can do it using only one awk line? Maybe assign the referer url column to a variable inside awk and then using it to create the same output?
try below solution -
awk '/88.88.88.88/ {gsub(/"/,"",$0);split($(NF-3),a,"|"); {print $1,$2,$3,$11, $(NF-2), $(NF-1), $NF, substr(a[2],1,(length(a[2])-1))}}' a
Feb 28 11:16:10 200 GET /location1 HTTP/1.1 http://subdomain.domain.com/location1
Feb 28 11:16:10 200 GET /location2 HTTP/1.1 http://subdomain.domain.com/location2
Feb 28 11:16:13 200 GET /location3 HTTP/1.1 http://subdomain.domain.com/location3
Feb 28 11:16:13 200 GET /another_location HTTP/1.1
You can do all at once using awk:
awk '$6 ~ /88\.88\.88\.88:[0-9]+/{
split($0,a,/[{}]/)
$0=a[1] OFS a[3]
split(a[2],b,"|")
print $1,$2,$3,$11,substr($18,2),$19,substr($20,1,length($20)-1),b[2]
}' file.log
The first split is splitting the variable part of line (included in between the {...}) into the array a.
The line is rebuilt in order to have a fix number of fields $0=a[1] OFS a[3]
The second split allows extracting the URL from variable based on | characters.
At last the print shows all needed elements. Note the substr are here for removing the ".

Counting IPs from a file

I have access logs like this, and I would like to grab each and everyone of them and then order them by which one is found the most.
173.192.238.41 - - [28/Feb/2013:07:06:09 -0500] "GET / HTTP/1.1" 200 20644 "-" "Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.0.19; aggregator:Spinn3r (Spinn3r 3.1); http://spinn3r.com/robot) Gecko/2010040121 Firefox/3.0.19"
208.115.113.84 - - [28/Feb/2013:07:06:19 -0500] "GET /tag/bright HTTP/1.1" 404 327 "-" "Mozilla/5.0 (compatible; Ezooms/1.0; ezooms.bot#gmail.com)"
94.228.34.214 - - [28/Feb/2013:07:10:16 -0500] "GET /alli-comes-home-12-10-09-day-224-2264/feed HTTP/1.1" 404 359 "-" "magpie-crawler/1.1 (U; Linux amd64; en-GB; +http://www.brandwatch.net)"
209.171.42.71 - - [28/Feb/2013:07:11:19 -0500] "GET /feed/atom HTTP/1.1" 404 326 "-" "Mozilla/5.0 (compatible; BlogScope/1.0; +http://www.blogscope.net/; U of Toronto)"
94.228.34.229 - - [28/Feb/2013:07:12:48 -0500] "GET /the-latest-design-franck-muller-watches-and-versace-watches-6838/feed HTTP/1.1" 404 386 "-" "magpie-crawler/1.1 (U; Linux amd64; en-GB; +http://www.brandwatch.net)"
I can to cat and sort it right like this?
cat /path/to/access.log | awk '{print $1}' | sort | uniq -c
You're close. After counting them, you have to sort by the count:
awk '{print $1}' /path/to/access.log | sort | uniq -c | sort -n
You can also do the counting in awk rather than using sort and uniq:
awk '{count[$1]++} END {for (ip in count) print count[ip], ip;}' | sort -n
awk '{a[$1]++}END{for(i in a)print a[i],i}' your_log|sort -rn
or
perl -lane '$x{$F[0]}++;END{for(keys %x){print $x{$_}." ".$_;}}' your_log|sort -rn
Here's one way you can order the IPv4 addresses by occurrence and then by address:
# cut takes only the first column from access.log
<access.log cut -d' ' -f1 |
# Presort the IP addresses so uniq can count them
sort |
uniq -c |
# Format the stream so it only contains `.' delimiters
sed 's/^ *//; s/ /./' |
# Now sort numerically based on each consecutive dot delimited column
sort -t. -k1,1n -k2,2n -k3,3n -k4,4n -k5,5n |
# Reset the first delimter
sed 's/\./ /'
Test input:
cat << EOF > access.log
173.192.238.41 - - [28/Feb/2013:07:06:09 -0500] "GET / HTTP/1.1" 200 20644 "-" "Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.0.19; aggregator:Spinn3r (Spinn3r 3.1); http://spinn3r.com/robot) Gecko/2010040121 Firefox/3.0.19"
208.115.113.84 - - [28/Feb/2013:07:06:19 -0500] "GET /tag/bright HTTP/1.1" 404 327 "-" "Mozilla/5.0 (compatible; Ezooms/1.0; ezooms.bot#gmail.com)"
94.228.34.229 - - [28/Feb/2013:07:12:48 -0500] "GET /the-latest-design-franck-muller-watches-and-versace-watches-6838/feed HTTP/1.1" 404 386 "-" "magpie-crawler/1.1 (U; Linux amd64; en-GB; +http://www.brandwatch.net)"
94.228.34.214 - - [28/Feb/2013:07:10:16 -0500] "GET /alli-comes-home-12-10-09-day-224-2264/feed HTTP/1.1" 404 359 "-" "magpie-crawler/1.1 (U; Linux amd64; en-GB; +http://www.brandwatch.net)"
209.171.42.71 - - [28/Feb/2013:07:11:19 -0500] "GET /feed/atom HTTP/1.1" 404 326 "-" "Mozilla/5.0 (compatible; BlogScope/1.0; +http://www.blogscope.net/; U of Toronto)"
209.71.42.71 - - [28/Feb/2013:07:11:19 -0500] "GET /feed/atom HTTP/1.1" 404 326 "-" "Mozilla/5.0 (compatible; BlogScope/1.0; +http://www.blogscope.net/; U of Toronto)"
94.228.34.229 - - [28/Feb/2013:07:12:48 -0500] "GET /the-latest-design-franck-muller-watches-and-versace-watches-6838/feed HTTP/1.1" 404 386 "-" "magpie-crawler/1.1 (U; Linux amd64; en-GB; +http://www.brandwatch.net)"
94.229.34.229 - - [28/Feb/2013:07:12:48 -0500] "GET /the-latest-design-franck-muller-watches-and-versace-watches-6838/feed HTTP/1.1" 404 386 "-" "magpie-crawler/1.1 (U; Linux amd64; en-GB; +http://www.brandwatch.net)"
94.227.34.229 - - [28/Feb/2013:07:12:48 -0500] "GET /the-latest-design-franck-muller-watches-and-versace-watches-6838/feed HTTP/1.1" 404 386 "-" "magpie-crawler/1.1 (U; Linux amd64; en-GB; +http://www.brandwatch.net)"
EOF
Output:
1 94.227.34.229
1 94.228.34.214
1 94.229.34.229
1 173.192.238.41
1 208.115.113.84
1 209.71.42.71
1 209.171.42.71
2 94.228.34.229

Resources