Sending Log Data to Splunk using Python - python-3.x

I have an app that detects file changes, backups, and syncs files to Azure.
I currently have a logger setup writes log events to a file called log.log. I also have event data streaming to stdout. This is my current working code.
I’d like to send log data to Splunk via requests.post() or logging.handlers.HTTPHandler.
Question: How do I set up an HTTP Handler in Python logging?
(I need to become more familiar with the advanced features of logging in Python.)
import logging
def setup_logger(logger_name:str=__name__, logfile:str='log.log'):
""" Standard Logging: std out and log file.
Args:
logger_name (str, optional): Logger Name. Defaults to __name__.
logfile (str, optional): Log File Name. Defaults to 'app.log'.
Returns:
logging: function to set logging as a object.
"""
logger = logging.getLogger(logger_name)
logger.setLevel(logging.INFO)
fh = logging.FileHandler(logfile)
fh.setLevel(logging.INFO)
ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
formatter = logging.Formatter(
'%(asctime)s | %(name)s | %(levelname)s | %(message)s',
'%m-%d-%Y %H:%M:%S')
fh.setFormatter(formatter)
ch.setFormatter(formatter)
logger.addHandler(fh)
logger.addHandler(ch)
return logger
if __name__ == "__main__":
logger=setup_logger('logger','log-sample.log') # Creates a test file vs default log.log
logger.info("My Logger has been initialized")
Currently I’m trying to send test data to Splunk via this code example (before I figure out the logging issue):
import requests
# Set up the Splunk HEC URL and token
splunk_url = "http://127.0.0.1:8088/services/collector/event"
splunk_token = "57489f00-605e-4f2a-8df3-123456789abcdef="
# Set up the log event data
log_data = {
"event": "This is a test log event",
"sourcetype": "my_sourcetype",
"index": "test_index"
}
# Send the log event to Splunk
response = requests.post(splunk_url, json=log_data, headers={
"Authorization": f"Splunk {splunk_token}"
})
# Check the response status code to make sure the request was successful
if response.status_code == 200:
print("Log event sent to Splunk successfully")
else:
print(f"Error sending log event to Splunk: {response.text}")

I found the solution myself.
import logging
import requests
import urllib3
urllib3.disable_warnings() # using default cert.
url = "https://127.0.0.1:8088/services/collector/event"
headers = {"Authorization": "Splunk 09584dbe-183b-4d14-9ee9-be66a37b331a"}
index = 'test_index'
class CustomHttpHandler(logging.Handler):
def __init__(self, url:str, headers:dict, index:str) -> None:
self.url = url
self.headers = headers
self.index = index
super().__init__()
def emit(self, record:str) -> exec:
'''
This function gets called when a log event gets emitted. It receives a
record, formats it and sends it to the url
Parameters:
record: a log record (created by logging module)
'''
log_entry = self.format(record)
response = requests.post(
url=self.url, headers=self.headers,
json={"index": self.index, "event": log_entry},
verify=False)
def setup_logger(logger_name:str=__name__, logfile:str='log.log'):
""" Standard Logging: std out and log file.
1.creates file handler which logs even debug messages: fh
2.creates console handler with a higher log level: ch
3.creates formatter and add it to the handlers: formatter, setFormatter
4.adds the handlers to the logger: addHandler
Args:
logger_name (str, optional): Logger Name. Defaults to __name__.
logfile (str, optional): Log File Name. Defaults to 'app.log'.
Returns:
logging: function to set logging as a object.
"""
logger = logging.getLogger(logger_name)
logger.setLevel(logging.INFO)
fh = logging.FileHandler(logfile)
fh.setLevel(logging.INFO)
ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
formatter = logging.Formatter(
'%(asctime)s | %(name)s | %(levelname)s | %(message)s',
'%m-%d-%Y %H:%M:%S')
splunk_handler = CustomHttpHandler(url=url, headers=headers, index=index)
fh.setFormatter(formatter)
ch.setFormatter(formatter)
splunk_handler.setFormatter(formatter)
logger.addHandler(fh)
logger.addHandler(ch)
logger.addHandler(splunk_handler)
return logger
if __name__ == "__main__":
logger=setup_logger('logger','app.log')
logger.info("My Logger has been initialized")

Related

How to use aioboto3 & asyncio to download file from S3 aws - Python

I have the sync script which is running & working well, but i see some download files takes time, thought of using async approach here.
import json
import os
import io
import time
import gzip
import re
import logging
from logging.handlers import RotatingFileHandler
import boto3
AWS_KEY = "**"
AWS_SECRET = "**"
QUEUE_URL = "***"
OUTPUT_PATH = "./test"
VISIBILITY_TIMEOUT = 10
REGION_NAME = "region"
sqs = boto3.resource('sqs', region_name=REGION_NAME, aws_access_key_id=AWS_KEY, aws_secret_access_key=AWS_SECRET)
s3 = boto3.client('s3', region_name=REGION_NAME, aws_access_key_id=AWS_KEY, aws_secret_access_key=AWS_SECRET)
queue = sqs.Queue(url=QUEUE_URL)
def handle_response(msg, path):
"""Logic goes here"""
print('message: %s' % msg)
def download_message_files(msg):
for s3_file in msg['files']:
s3_path = s3_file['path']
with io.BytesIO() as f:
s3.download_fileobj(msg['bucket'], s3_path, f)
f.seek(0)
for line in gzip.GzipFile(fileobj=f):
await handle_response(line.decode('UTF-8'), s3_path)
def consume():
while True:
for msg in queue.receive_messages(VisibilityTimeout=VISIBILITY_TIMEOUT):
body = json.loads(msg.body) # grab the actual message body
download_message_files(body)
msg.delete()
time.sleep(sleep_time)
if __name__ == '__main__':
# Setup our root logger
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(name)s %(levelname)s %(message)s")
# Create our FDR logger
logger = logging.getLogger("Consumer")
# Rotate log file handler
RFH = RotatingFileHandler("test.log", maxBytes=20971520, backupCount=5)
# Log file output format
F_FORMAT = logging.Formatter('%(asctime)s %(name)s %(levelname)s %(message)s')
# Set the log file output level to INFO
RFH.setLevel(logging.INFO)
# Add our log file formatter to the log file handler
RFH.setFormatter(F_FORMAT)
# Add our log file handler to our logger
logger.addHandler(RFH)
consume()
I have tried converting this using aioboto3 and got struck in queue approach.
session = aioboto3.Session()
sqs = session.resource('sqs', region_name=REGION_NAME, aws_access_key_id=AWS_KEY, aws_secret_access_key=AWS_SECRET)
s3 = session.client('s3', region_name=REGION_NAME, aws_access_key_id=AWS_KEY, aws_secret_access_key=AWS_SECRET)
queue = sqs.Queue(url=QUEUE_URL) <---- this gives error as 'ResourceCreatorContext' object has no attribute 'Queue'
As i could understand from this there is no attribute, but could anyone guide me to make this working with async nature.
You can use asyncio and aioboto3 together.
Instead of creating a resource, you can use client. The difference between an aioboto3.client and aioboto3.resource can be found in this answer.
This is a simple working example:
import aioboto3
async def consume():
async with aioboto3.Session().client(service_name='sqs', region_name=REGION_NAME, aws_access_key_id=AWS_KEY, aws_secret_access_key=AWS_SECRET) as client:
messages = await (client.receive_messages(VisibilityTimeout=VISIBILITY_TIMEOUT)
for message in messages:
# Do something
This should solve the error you are facing. This solution can also be extended to S3 as per your requirements.

logging | error() gets sent to FileHandler... but not other Logging Levels

I am able to store error logs to a file... but not info() or any other Logging Levels.
What am I doing wrong?
How can I store any level of logs to FileHandler?
code.py
import sys
import logging
def setup_logging():
global logger
logger = logging.getLogger()
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
open('data_module.log', 'w').close() # empty logs
global fileHandler
fileHandler = logging.FileHandler('data_module.log')
fileHandler.setFormatter(formatter)
fileHandler.setLevel(logging.DEBUG)
logger.addHandler(fileHandler)
logger.error('Started') # info
logger.info('information') # info
test.py:
import code as c
c.setup_logging()
with open('data_module.log', 'r') as fileHandler:
logs = [l.rstrip() for l in fileHandler.readlines()]
open('data_module.log', 'w').close() # empty logs
assert len(logs) == 2
Error:
AssertionError: assert 1 == 2
Please let me know if there's anything else I should add to post.
You need to set the level for the logger itself:
logger.setLevel(logging.DEBUG)
The default log level is WARN: when you write a DEBUG-level message, the logger does not handle it (ie send it to a handler). The handler you added is never invoked.
The handler can have its own level, but that is consulted only after the handler is invoked. If a logger sends a DEBUG message to a handler that is only interested in INFO+ messages, it does nothing.

Program logs in to same file

I created class that takes a url as parameter, also I have a list of urls and i am invoking class for every url. I have enabled logging to see why it fails for certiain url. However i got logs into one file.
class MyClass():
def __init__(self, url):
self.logger = logging.getLogger('bot')
self.logger.setLevel(level=logging.DEBUG)
self.handler = logging.StreamHandler()
self.handler_format = logging.Formatter("%(asctime)s %(levelname)s %(lineno)d:%(filename)s(%(process)d) - %(message)s")
logging.basicConfig(filename=self.data['path']['logs']+'/{}/{}'.format(datetime.today().strftime('%Y-%m-%d'), url.replace('/','-')),
filemode='w',
format='%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s',
datefmt='%H:%M:%S',
level=logging.INFO)
file where class is being invoked:
from file.py import MyClass
for i in urls:
obj = MyClass(i)
I also tried del obj after iteration but it doesn't help. I will hugely appreciate and hint.

Write different type of info to 2 different log files on Python 3?

I have usage and application logs.
I want to write application logs to app.log file and usage logs to usage.log file.
Here is how I have tried to do this:
# Application Log:
logging.basicConfig(filename = "app.log", level = logging.DEBUG, format = "%(asctime)s - %(name)s - %(levelname)s - %(message)s")
# Usage Log:
logging.basicConfig(filename = "usage.log", level = logging.DEBUG, format = "%(asctime)s - %(name)s - %(levelname)s - %(message)s")
For some reason I don't see the second file created.
Please advise what am I missing here?
This is what solved my issue:
import logging
formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
def setup_logger(name, log_file, level=logging.INFO):
"""To setup as many loggers as you want"""
handler = logging.FileHandler(log_file)
handler.setFormatter(formatter)
logger = logging.getLogger(name)
logger.setLevel(level)
logger.addHandler(handler)
return logger
# first file logger
logger = setup_logger('first_logger', 'first_logfile.log')
logger.info('This is just info message')
# second file logger
super_logger = setup_logger('second_logger', 'second_logfile.log')
super_logger.error('This is an error message')
def another_method():
# using logger defined above also works here
logger.info('Inside method')
Source
add different Handlers to the default logger:
import logging
def init_logging():
logger = logging.getLogger()
file_handler = logging.FileHandler('info.log')
error_handler = logging.FileHandler('error.log')
console_handler = logging.StreamHandler()
formatter = logging.Formatter(
'%(asctime)s - %(name)s - %(levelname)s - %(message)s')
file_handler.setFormatter(formatter)
console_handler.setFormatter(formatter)
error_handler.setFormatter(formatter)
error_handler.setLevel(logging.ERROR)
logger.addHandler(file_handler)
logger.addHandler(console_handler)
logger.addHandler(error_handler)
logger.setLevel(logging.DEBUG)
# use
init_logging()
LOGGER = logging.getLogger()
LOGGER.info('abc')

How to group related request log entries GAE python 3.7 standard env

I'm using Google App Engine python 3.7 standard and i'm trying to group related request log entries.
According to the Writing Application Logs documentation, I should:
Set the trace identifier in the LogEntry trace field of your app log
entries. The expected format is
projects/[PROJECT_ID]/traces/[TRACE_ID]
Where/How should use LogEntry?
The Stackdriver Logging documentation doesn't show how it's possible. Am I missing something?
Code examples would be much appreciated.
[UPDATE]
Following Duck Hunt Duo advice, I tried the following, without any success:
trace_id = request.headers.get('X-Cloud-Trace-Context', 'no_trace_id').split('/')[0]
client = logging.Client()
logger = client.logger('appengine.googleapis.com%2Fstdout') # Not shown
# logger = client.logger('projects/{}/logs/stdout'.format(GOOGLE_CLOUD_PROJECT)) # error
# logger = client.logger('projects/{}/logs/appengine.googleapis.com%2Fstdout'.format(GOOGLE_CLOUD_PROJECT)) # error
logger.log_text('log_message', trace=trace_id)
The log doesn't appear in the GAE service log web console
This is my basic solution:
trace_id = request.headers.get('X-Cloud-Trace-Context', 'no_trace_id').split('/')[0]
trace_str = "projects/{}/traces/{}".format(os.getenv('GOOGLE_CLOUD_PROJECT'), trace_id)
log_client = logging.Client()
# This is the resource type of the log
log_name = 'stdout'
# Inside the resource, nest the required labels specific to the resource type
labels = {
'module_id': os.getenv('GAE_SERVICE'),
'project_id': os.getenv('GOOGLE_CLOUD_PROJECT'),
'version_id': os.getenv('GAE_VERSION')
}
res = Resource(type="gae_app",
labels=labels,
)
logger = log_client.logger(log_name)
logger.log_text("MESSAGE_STRING_TO_LOG", resource=res, severity='ERROR', trace=trace_str)
After it was working, I wrapped it in a file so it would work similarly to Google's logger for python2.7 .
Here is my_gae_logging.py:
import logging as python_logging
import os
from flask import request
from google.cloud import logging as gcp_logging
from google.cloud.logging.resource import Resource
# From GCP logging lib for Python2.7
CRITICAL = 50
FATAL = CRITICAL
ERROR = 40
WARNING = 30
WARN = WARNING
INFO = 20
DEBUG = 10
NOTSET = 0
_levelNames = {
CRITICAL: 'CRITICAL',
ERROR: 'ERROR',
WARNING: 'WARNING',
INFO: 'INFO',
DEBUG: 'DEBUG',
NOTSET: 'NOTSET',
'CRITICAL': CRITICAL,
'ERROR': ERROR,
'WARN': WARNING,
'WARNING': WARNING,
'INFO': INFO,
'DEBUG': DEBUG,
'NOTSET': NOTSET,
}
def get_trace_id():
trace_str = None
try:
trace_id = request.headers.get('X-Cloud-Trace-Context', 'no_trace_id').split('/')[0]
trace_str = "projects/{project_id}/traces/{trace_id}".format(
project_id=os.getenv('GOOGLE_CLOUD_PROJECT'),
trace_id=trace_id)
except:
pass
return trace_str
class Logging:
def __init__(self):
self._logger = None
#property
def logger(self):
if self._logger is not None:
return self._logger
log_client = gcp_logging.Client()
# This is the resource type of the log
log_name = 'appengine.googleapis.com%2Fstdout'
# Inside the resource, nest the required labels specific to the resource type
self._logger = log_client.logger(log_name)
return self._logger
#property
def resource(self):
resource = Resource(
type="gae_app",
labels={
'module_id': os.getenv('GAE_SERVICE'),
'project_id': os.getenv('GOOGLE_CLOUD_PROJECT'),
'version_id': os.getenv('GAE_VERSION')
}
)
return resource
def log(self, text):
text = str(text)
self.logger.log_text(text, resource=self.resource, trace=get_trace_id())
def debug(self, text):
text = str(text)
self.logger.log_text(text, resource=self.resource, severity=_levelNames.get(DEBUG), trace=get_trace_id())
def info(self, text):
text = str(text)
self.logger.log_text(text, resource=self.resource, severity=_levelNames.get(INFO), trace=get_trace_id())
def warning(self, text):
text = str(text)
self.logger.log_text(text, resource=self.resource, severity=_levelNames.get(WARNING), trace=get_trace_id())
def warn(self, text):
return self.warning(text)
def error(self, text):
text = str(text)
self.logger.log_text(text, resource=self.resource, severity=_levelNames.get(ERROR), trace=get_trace_id())
def critical(self, text):
text = str(text)
self.logger.log_text(text, resource=self.resource, severity=_levelNames.get(CRITICAL), trace=get_trace_id())
if os.getenv('GAE_VERSION'): # check if running under gcp env
logging = Logging()
else:
# when not running under gcp env, use standard python_logging
logging = python_logging
Usage:
from my_gae_logging import logging
logging.warn('this is my warning')
You might want to take a look at an answer I provided here.
(This answer addresses how to add logging severity to Cloud Functions logs written into Stackdriver, but the basic workflow is the same)
Quoting it:
[...], you can still create logs with certain severity by using the
Stackdriver Logging Client
Libraries.
Check this
documentation
in reference to the Python libraries, and this
one
for some usage-case examples.
Notice that in order to let the logs be under the correct resource,
you will have to manually configure them, see this
list
for the supported resource types. As well, each resource type has
some required
labels
that need to be present in the log structure.
Edit:
Updating the previous answer with an example for App Engine:
from google.cloud import logging
from google.cloud.logging.resource import Resource
from flask import Flask
app = Flask(__name__)
#app.route('/')
def logger():
log_client = logging.Client()
log_name = 'appengine.googleapis.com%2Fstdout'
res = Resource( type='gae_app',
labels={
"project_id": "MY-PROJECT-ID",
"module_id": "MY-SERVICE-NAME"
})
logger = log_client.logger(log_name)
logger.log_struct({"message": "message string to log"}, resource=res, severity='ERROR') # As an example log message with a ERROR warning level
return 'Wrote logs to {}.'.format(logger.name)
By using this code as example, and changing the resource type of the log to appengine.googleapis.com%2Fstdout should work, and change the Resource fields to be the same as in the gae_app labels described in here.
Using the AppEngineHandler from Google Cloud Logging provides much of the infrastructure. This allows attaching to the python logging module, so that a standard logging import works.
Setting this up is straightforward enough:
# Setup google cloud logging.
import logging
import google.cloud.logging # Don't conflict with standard logging
from google.cloud.logging.handlers import AppEngineHandler, setup_logging
client = google.cloud.logging.Client()
handler = AppEngineHandler(client, name='stdout')
logging.getLogger().setLevel(logging.INFO)
setup_logging(handler)
The documentation at https://googleapis.dev/python/logging/latest/usage.html#cloud-logging-handler suggests very similar, but instead of using the AppEngineHandler uses the "CloudLoggingHandler". It also states that the "AppEngineHandler" is for the flexible environment, but this works in the standard python3 environment.
The Stackdriver Logging Client Library can be used to achieve this. The logger.log_text function sends a LogEntry object to the API. Example:
from google.cloud import logging
client = logging.Client()
logger = client.logger('appengine.googleapis.com%2Fstdout')
logger.log_text('log_message', trace=trace_id)
The trace_id should be retrieved from the request headers as the docs mention. The method of doing this will depend on how you're serving requests, but in Flask for example it would be simple as trace_id = request.headers['X-Cloud-Trace-Context'].split('/')[0]

Resources