Issue with nextSequenceToken for the PutLogEvents API (AWS Cloudwatch logs) - python-3.x

I am facing below error while trying to use AWS API for cloudwatch; the error is coming when i am trying to call: put_log_events.
Bit of context: these are organizational cloud trail logs and i am trying to create log group for each account (using kinesis stream for subscribing).
import base64
import gzip
import json
import logging
import os
import boto3
# Setup logging configuration
logging.basicConfig()
logger = logging.getLogger()
logger.setLevel(logging.INFO)
logs_client = boto3.client('logs', region_name=os.getenv('AWS_REGION'))
# setting environment variables
global seq_token
def unpack_kinesis_stream_records(event):
# decode and decompress each base64 encoded data element
return [gzip.decompress(base64.b64decode(k["kinesis"]["data"])).decode('utf-8') for k in event["Records"]]
def decode_raw_cloud_trail_events(cloudTrailEventDataList):
# Convert Raw Event Data List
eventList = [json.loads(e) for e in cloudTrailEventDataList]
# Filter out-non DATA_MESSAGES since we only require cloud watch message type = DATA_MESSAGE
filteredEvents = [
e for e in eventList if e["messageType"] == 'DATA_MESSAGE']
# Covert each individual log Event Message
events = []
for f in filteredEvents:
for e in f["logEvents"]:
events.append(
{
'timestamp': e["timestamp"],
'message': e["message"],
}
)
events.sort(key=lambda x: x["timestamp"])
logger.info("{0} Event Logs Decoded".format(len(events)))
log_group = ("log_group_for_cloudtrail_"+eventList[0]["logStream"].split("_")[1])
log_stream = os.getenv('AWS_LAMBDA_LOG_STREAM_NAME')
# creating log group
try:
logs_client.create_log_group(
logGroupName=log_group,
tags={
'Created By': 'ZH Lamda'
}
)
except logs_client.exceptions.ResourceAlreadyExistsException:
print("log group exists")
try:
logs_client.create_log_stream(
logGroupName=log_group,
logStreamName=log_stream,
)
except logs_client.exceptions.ResourceAlreadyExistsException:
print("log stream already exists")
return [log_group, log_stream, events]
def handle_request(event, context):
seq_token = None
# Unpack Kinesis Stream Records
kinesis_data = unpack_kinesis_stream_records(event)
# Decode and filter events
events = decode_raw_cloud_trail_events(kinesis_data)
if len(events[2]) == 0:
return f'No events to process'
log_event = {
'logGroupName': events[0],
'logStreamName': events[1],
'logEvents': events[2],
}
if seq_token is not None:
log_event['sequenceToken'] = seq_token
response = logs_client.put_log_events(**log_event)
seq_token = response['nextSequenceToken']
return f"Successfully processed {len(events)} records."
def lambda_handler(event, context):
return handle_request(event, context)
I can see below error in cloud watch logs:
[ERROR] InvalidSequenceTokenException: An error occurred (InvalidSequenceTokenException) when calling the PutLogEvents operation: The given sequenceToken is invalid. The next expected sequenceToken is: null
Traceback (most recent call last):
File "/var/task/lambda_function.py", line 120, in lambda_handler
return handle_request(event, context)
File "/var/task/lambda_function.py", line 107, in handle_request
response = logs_client.put_log_events(**log_event)
File "/var/runtime/botocore/client.py", line 391, in _api_call
return self._make_api_call(operation_name, kwargs)
File "/var/runtime/botocore/client.py", line 719, in _make_api_call
raise error_class(parsed_response, operation_name)

I suspect that the Lambda function being invoked multiple times. If so, then the problem is due to global seq_token, which only initializes the value of the variable the first time the function is invoked.
On future invocations, the seq_token is already set from the previous run, and is never reset to None. As a result, when put_log_events() is next called, the if statement is setting a sequence value from an old execution.
To fix all this, initialize the seq_token variable within the handle_request() function rather than making it global.

Related

Is there any way of getting values from keys inside other keys?

(first post sorry if i do this wrong) So I am making a bot (on discord) for me and my friends using discord.py (since python is the easiest code ever) and I've came across this. I need to get values from keys INSIDE OTHER keys. How do I do this?
So I've tried to change res to res.text and res.json and res.content and I could only find the "data" but not "id","name" or "description" which I need.
import discord
from discord.ext.commands import Bot
from discord.ext import commands
import requests, json
import asyncio
Client = discord.Client()
client = commands.Bot(command_prefix='?')
#client.event
async def on_ready():
print('started')
#client.command()
async def findfriends(ctx,userid):
res = requests.get("https://friends.roblox.com/v1/users/"+userid+"/friends")
var = json.loads(res.text)
def a(a):
ID = a['id']
return ID
def b(b):
Name = b['name']
return Name
def c(c):
description = c['description']
return description
data = var['data'] #I can get this working
print(data)
#cv = data['name'] #but this wont work
#id = a(var) #nor this
#name = b(var) #nor this
#desc = c(var) #nor this
#await ctx.send("\nID: " + id + "\nName: " + name + "\nDesc: " + desc) # this is just sending the message
client.run(BOT TOKEN HERE) #yes i did indeed add it but just for the question i removed it
As I said in the code, I can only get "data" working and not id,name or desc. For id name and desc it just throws an error
Ignoring exception in command findfriends:
Traceback (most recent call last):
File "C:\Users\Calculator\PycharmProjects\ryhrthrthrhrebnfbngfbfg\venv\lib\site-packages\discord\ext\commands\core.py", line 79, in wrapped
ret = await coro(*args, **kwargs)
File "C:/Users/Calculator/PycharmProjects/ryhrthrthrhrebnfbngfbfg/a.py", line 277, in findfriends
id = a(var) #nor this
File "C:/Users/Calculator/PycharmProjects/ryhrthrthrhrebnfbngfbfg/a.py", line 266, in a
ID = a['id']
KeyError: 'id'
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "C:\Users\Calculator\PycharmProjects\ryhrthrthrhrebnfbngfbfg\venv\lib\site-packages\discord\ext\commands\bot.py", line 863, in invoke
await ctx.command.invoke(ctx)
File "C:\Users\Calculator\PycharmProjects\ryhrthrthrhrebnfbngfbfg\venv\lib\site-packages\discord\ext\commands\core.py", line 728, in invoke
await injected(*ctx.args, **ctx.kwargs)
File "C:\Users\Calculator\PycharmProjects\ryhrthrthrhrebnfbngfbfg\venv\lib\site-packages\discord\ext\commands\core.py", line 88, in wrapped
raise CommandInvokeError(exc) from exc
discord.ext.commands.errors.CommandInvokeError: Command raised an exception: KeyError: 'id'
and
Ignoring exception in command findfriends:
Traceback (most recent call last):
File "C:\Users\Calculator\PycharmProjects\ryhrthrthrhrebnfbngfbfg\venv\lib\site-packages\discord\ext\commands\core.py", line 79, in wrapped
ret = await coro(*args, **kwargs)
File "C:/Users/Calculator/PycharmProjects/ryhrthrthrhrebnfbngfbfg/a.py", line 274, in findfriends
data = var['data']['id'] #I can get this working
TypeError: list indices must be integers or slices, not str
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "C:\Users\Calculator\PycharmProjects\ryhrthrthrhrebnfbngfbfg\venv\lib\site-packages\discord\ext\commands\bot.py", line 863, in invoke
await ctx.command.invoke(ctx)
File "C:\Users\Calculator\PycharmProjects\ryhrthrthrhrebnfbngfbfg\venv\lib\site-packages\discord\ext\commands\core.py", line 728, in invoke
await injected(*ctx.args, **ctx.kwargs)
File "C:\Users\Calculator\PycharmProjects\ryhrthrthrhrebnfbngfbfg\venv\lib\site-packages\discord\ext\commands\core.py", line 88, in wrapped
raise CommandInvokeError(exc) from exc
discord.ext.commands.errors.CommandInvokeError: Command raised an exception: TypeError: list indices must be integers or slices, not str
The https://friends.roblox.com/v1/users/<userid>/friends endpoint returns a list of all the friends that the user has, which can be of varying size.
With var = json.loads(res.text) you are loading the response text into a json object, which contains the key data, which you access using data = var['data']. The new data variable now contains a list object, which is why cv = data['name'] fails to work as list objects do not take strings as keys, they are accessed using integers.
You need to iterate over the list to get all information on the friends. The below code goes through the list, pulls information for each item in the list and sends a message of the information once it has gone through all items.
import discord
from discord.ext.commands import Bot
from discord.ext import commands
import requests, json
import asyncio
client = commands.Bot(command_prefix='?')
#client.event
async def on_ready():
print('started')
#client.command()
async def findfriends(ctx,userid):
res = requests.get("https://friends.roblox.com/v1/users/"+userid+"/friends")
var = json.loads(res.text)
data = var['data']
print(data)
friends_msg = 'Friends information:'
for friend in data:
id = friend['id']
name = friend['name']
desc = friend['description']
friends_msg = friends_msg + "\nID: " + id + "\nName: " + name + "\nDesc: " + desc
await ctx.send(friends_msg)
client.run(BOT TOKEN HERE)

Copy of file from one s3 location to another does not work after athena query boto3

I am using boto3 to query results from athena. They are working fine.
I am using boto3 again to copy the file from one s3 bucket to another, but it says it is unable to find the file. I am unable to find a solution. Please help!
When I go to s3 console, I am able to see the file, but boto3 is unable to find it.
import boto3
athena = boto3.client('athena')
s3 = boto3.resource('s3')
BUCKET_NAME = 'bucket1'
bucket = s3.Bucket(BUCKET_NAME)
query = 'SELECT * FROM "db"."table" limit 2'
response = athena.start_query_execution(QueryString=query, QueryExecutionContext={
'Database': 'db'
}, ResultConfiguration={
'OutputLocation': 's3://bucket1/',
})
key = response['QueryExecutionId'] + '.csv'
copy_source = {
'Bucket': 'bucket1',
'Key': key
}
s3.meta.client.copy(copy_source, 'bucket2', 'main.csv')
Error is:-
Traceback (most recent call last):
File "/Users/tanmaysinghal/Vizualization/Python Scripts/test.py", line 23, in <module>
s3.meta.client.copy(copy_source, 'bucket2', 'main.csv')
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/boto3/s3/inject.py", line 379, in copy
return future.result()
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/s3transfer/futures.py", line 106, in result
return self._coordinator.result()
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/s3transfer/futures.py", line 265, in result
raise self._exception
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/s3transfer/tasks.py", line 255, in _main
self._submit(transfer_future=transfer_future, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/s3transfer/copies.py", line 110, in _submit
**head_object_request)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/botocore/client.py", line 357, in _api_call
return self._make_api_call(operation_name, kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/botocore/client.py", line 661, in _make_api_call
raise error_class(parsed_response, operation_name)
botocore.exceptions.ClientError: An error occurred (404) when calling the HeadObject operation: Not Found
There is fundamental problem with your code, i.e. you are trying to copy the Athena S3 output file before query was completed.
When you see it in console, it takes some seconds for you to see and by that time the file is ready.
You must wait for Athena to complete your query and write the output to S3.
Here is how Athena works:
Submit a query.
Check the status of your query.
If status is running, wait and go to step 2. Elseif SUCCEEDED, go to step 4. Else FAILED, then do the corrective action.
read the output S3 file.
Here goes the working code.
import boto3
import time
athena = boto3.client('athena')
# s3 = boto3.resource('s3')
# BUCKET_NAME = 'bucket1'
# bucket = s3.Bucket(BUCKET_NAME)
query = 'SELECT * FROM your-database.your-table limit 10'
response = athena.start_query_execution(QueryString=query, QueryExecutionContext={
'Database': 'your-database'
}, ResultConfiguration={
'OutputLocation': 's3://your-s3-output-bucket',
})
execution_id = response['QueryExecutionId']
key = execution_id + ".csv"
state = 'RUNNING';
##waiting for query to complete and then read the result.
while (state in ['RUNNING']):
response = athena.get_query_execution(QueryExecutionId=execution_id)
if 'QueryExecution' in response and \
'Status' in response['QueryExecution'] and \
'State' in response['QueryExecution']['Status']:
state = response['QueryExecution']['Status']['State']
if state == 'FAILED':
print("FAILED")
elif state == 'SUCCEEDED':
s3_path = response['QueryExecution']['ResultConfiguration']['OutputLocation']
print("S3-Path:" + s3_path)
time.sleep(1)
# If state is succeeded, meaning query has completed successfully, now, you could read the output file or try copy it to somewhere else.
if state == 'SUCCEEDED':
copy_source = {
'Bucket': 'your-s3-output-bucket',
'Key': key
}
s3.meta.client.copy(copy_source, 'bucket2', 'main.csv')
Hope it will help you!

Python - Multiprocessing Pool map returning can't pickle error

I have following code which creates a testrail client and executes testrail's GET_SUITES API call.
I have a function to call the GET_SUITES API and I am passing testrail client & test_rail_project_id as params
I am trying to use multiprocessing to execute over my list of projects to speed up things and I am can't pickle error
My code:
from itertools import product
def get_suites(client, project_id):
try:
path = 'get_suites/{projectid}'.format(projectid=project_id)
test_rail_response = client.send_get(path)
return test_rail_response
except Exception as e:
raise Exception(str(e))
if __name__ == "__main__":
testRailClient = APIClient(TESTRAIL_URL)
pool = Pool(2)
all_project_ids = [100, 200, 300]
data = pool.starmap(get_suites, product([testRailClient], all_project_ids))
Error stack:
Traceback (most recent call last):
File "main.py", line 57, in <module>
data = pool.starmap(testrailapi.get_suites, product([testRailClient], all_project_ids))
File "/usr/local/Cellar/python/3.6.5/Frameworks/Python.framework/Versions/3.6/lib/python3.6/multiprocessing/pool.py", line 274, in starmap
return self._map_async(func, iterable, starmapstar, chunksize).get()
File "/usr/local/Cellar/python/3.6.5/Frameworks/Python.framework/Versions/3.6/lib/python3.6/multiprocessing/pool.py", line 644, in get
raise self._value
File "/usr/local/Cellar/python/3.6.5/Frameworks/Python.framework/Versions/3.6/lib/python3.6/multiprocessing/pool.py", line 424, in _handle_tasks
put(task)
File "/usr/local/Cellar/python/3.6.5/Frameworks/Python.framework/Versions/3.6/lib/python3.6/multiprocessing/connection.py", line 206, in send
self._send_bytes(_ForkingPickler.dumps(obj))
File "/usr/local/Cellar/python/3.6.5/Frameworks/Python.framework/Versions/3.6/lib/python3.6/multiprocessing/reduction.py", line 51, in dumps
cls(buf, protocol).dump(obj)
TypeError: can't pickle SSLContext objects
Any suggestions please?
Thank you
PS: I am using Python3.6
UPDATE:
As suggested I tried removing the API client as a parameter and it worked but I am getting the same error when I have "get_suites" as a method. Please see my updated code below
class TestRailExecution:
def __init__(self, url, username, password):
self.url = url
self.username = username
self.password = password
self.client = APIClient(self.url)
self.client.user = username
self.client.password = password
def get_suites(self, project_id):
try:
path = 'get_suites/{projectid}'.format(projectid=project_id)
test_rail_response = self.client.send_get(path)
return test_rail_response
except Exception as e:
raise Exception(str(e))
if __name__ == "__main__":
testRailClient = TestRailExecution(TESTRAIL_URL, user, password)
pool = Pool(2)
data = pool.map(get_suites, [100, 200, 300])

Name 'Key' not defined Lambda function to access DynamoDB

I am using the query function from the boto3 library in Python and receiving the following error:
name 'Key' is not defined: NameError
Traceback (most recent call last):
File "/var/task/lambda_function.py", line 51, in lambda_handler
if not getAssetExistance(slack_userID):
File "/var/task/lambda_function.py", line 23, in getAssetExistance
response = dynamoTable.query(KeyConditionExpression=Key('userID').eq(asset))
NameError: name 'Key' is not defined
I have been reading through a bunch of tutorials on accessing DynamoDB through Lambda, and this they all use this KeyConditionExpression line when trying to if a key exists.
Here is the relevant code (line 23 is the query line):
def getAssetExistance(asset):
dynamoTable = dynamo.Table('Assets')
response = dynamoTable.query(KeyConditionExpression=Key('userID').eq(asset))
return bool(response)
I basically want to check the primary partition key in my DynamoDB table (which is a slack user ID) and see if exist.
Here is the rest of the code if it is relevant:
################################
# Slack Lambda handler.
################################
import boto3
import logging
import os
import urllib
# Grab data from the environment.
BOT_TOKEN = os.environ["BOT_TOKEN"]
ASSET_TABLE = os.environ["ASSET_TABLE"]
REGION_NAME = os.getenv('REGION_NAME', 'us-east-1')
dynamo = boto3.resource('dynamodb', region_name=REGION_NAME, endpoint_url="https://dynamodb.us-east-1.amazonaws.com")
# Define the URL of the targeted Slack API resource.
SLACK_URL = "https://slack.com/api/chat.postMessage"
def getAssetExistance(asset):
dynamoTable = dynamo.Table('Assets')
response = dynamoTable.query(KeyConditionExpression=Key('userID').eq(asset))
return bool(response)
def lambda_handler(data, context):
# Slack challenge answer.
if "challenge" in data:
return data["challenge"]
# Grab the Slack channel data.
slack_event = data['event']
slack_userID = slack_event["user"]
slack_text = slack_event["text"]
channel_id = slack_event["channel"]
slack_reply = ""
# Ignore bot messages.
if "bot_id" in slack_event:
slack_reply = ""
else:
# Start data sift.
if slack_text.startswith("!networth"):
slack_reply = "Your networth is: "
elif slack_text.startswith("!price"):
command,asset = text.split()
slack_reply = "The price of a(n) %s is: " % (asset)
elif slack_text.startswith("!addme"):
if not getAssetExistance(slack_userID):
slack_reply = "Adding user: %s" % (slack_userID)
dynamo.update_item(TableName=ASSET_TABLE,
Key={'userID':{'S':'slack_userID'}},
AttributeUpdates= {
'resources':{
'Action': 'ADD',
'Value': {'N': '1000'}
}
}
)
else:
slack_reply = "User %s already exists" % (slack_userID)
# We need to send back three pieces of information:
data = urllib.parse.urlencode(
(
("token", BOT_TOKEN),
("channel", channel_id),
("text", slack_reply)
)
)
data = data.encode("ascii")
# Construct the HTTP request that will be sent to the Slack API.
request = urllib.request.Request(
SLACK_URL,
data=data,
method="POST"
)
# Add a header mentioning that the text is URL-encoded.
request.add_header(
"Content-Type",
"application/x-www-form-urlencoded"
)
# Fire off the request!
urllib.request.urlopen(request).read()
# Everything went fine.
return "200 OK"
My DynamoDB table is named 'Assets' and has a primary partition key named 'userID' (string).
I am definitely still new to all this, so don't be afraid of calling me a dummy. Any and all help is appreciated. The goal of this code is to check if a user exists as a key in DynamoDB and if not, add them to the table.
You need to import the Key function, like so:
from boto3.dynamodb.conditions import Key
Without importing it second time, u can address it like:
boto3.dynamodb.conditions.Key
def getAssetExistance(asset):
dynamoTable = dynamo.Table('Assets')
response = dynamoTable.query(KeyConditionExpression= boto3.dynamodb.conditions.Key('userID').eq(asset))
return bool(response)
Wrote the below code for getting data from dynamo db and got the error 'NameError: name 'Key' is not defined'.
I had to fix it by importing 'from boto3.dynamodb.conditions import Key'
response = dynamo_table.query(KeyConditionExpression=Key(val).eq(str(key_dictionary[val])))

AttributeError: 'module' object has no attribute 'ensure_future'

Hi i am writing a n/w bound server application using python asyncio which can accept a post request.
In post request i am accepting a symbol parameter
please tell me the best way to deal with n/w bound application.where i am collecting the data from another web api's by sending the post request to them.
Following is the code :
import asyncio
import aiohttp
import json
import logging
# async def fetch_content(url, symbols):
# yield from aiohttp.post(url, symbols=symbols)
#asyncio.coroutine
def fetch_page(writer, url, data):
response = yield from aiohttp.post(url, data=data)
resp = yield from response.read_and_close()
print(resp)
writer.write(resp)
return
#asyncio.coroutine
def process_payload(writer, data, scale):
tasks = []
data = data.split('\r\n\r\n')[1]
data = data.split('\n')
data = [x.split(':') for x in data]
print(data)
data = {x[0]: x[1] for x in data}
print(data)
# data = data[0].split(':')[1]
data = data['symbols']
print(data)
data = data.split(',')
data_len = len(data)
data_first = 0
data_last = scale
url = 'http://xxxxxx.xxxxxx.xxx/xxxx/xxxx'
while data_last < data_len:
tasks.append(asyncio.ensure_future(fetch_page(writer, url,{'symbols': ",".join(data[data_first:data_last])})))
data_first += scale
data_last += scale
tasks.append(asyncio.ensure_future(fetch_page(writer, url,{'symbols': ",".join(data[data_first:data_last])})))
loop.run_until_complete(tasks)
return
#asyncio.coroutine
def process_url(url):
pass
#asyncio.coroutine
def echo_server():
yield from asyncio.start_server(handle_connection, 'xxxxxx.xxxx.xxx', 3000)
#asyncio.coroutine
def handle_connection(reader, writer):
data = yield from reader.read(8192)
if data:
message = data.decode('utf-8')
print(message)
yield from process_payload(writer, message, 400)
writer.write_eof()
writer.close()
#url = 'http://XXXXXXX.xxxxx.xxx/xxxx/xxxxxx/xxx'
data = {'symbols': 'GD-US,14174T10,04523Y10,88739910,03209R10,46071F10,77543110,92847N10'}
loop = asyncio.get_event_loop()
loop.run_until_complete(echo_server())
try:
loop.run_forever()
finally:
loop.close()
But i am receiving the following error:
future: <Task finished coro=<handle_connection() done, defined at fql_server_async_v2.py:53> exception=AttributeError("'module' object has no attribute 'ensure_future'",)>
Traceback (most recent call last):
File "/home/user/anugupta/lib/python3.4/asyncio/tasks.py", line 234, in _step
result = coro.send(value)
File "fql_server_async_v2.py", line 60, in handle_connection
yield from process_payload(writer, message, 400)
File "/home/user/anugupta/lib/python3.4/asyncio/coroutines.py", line 141, in coro
res = func(*args, **kw)
File "fql_server_async_v2.py", line 41, in process_payload
tasks.append(asyncio.ensure_future(fetch_page(writer, url, {'symbols':",".join(data[data_first:data_last])})))
AttributeError: 'module' object has no attribute 'ensure_future'
^CTraceback (most recent call last):
File "fql_server_async_v2.py", line 72, in <module>
loop.run_forever()
File "/home/user/anugupta/lib/python3.4/asyncio/base_events.py", line 236, in run_forever
self._run_once()
File "/home/user/anugupta/lib/python3.4/asyncio/base_events.py", line 1017, in _run_once
event_list = self._selector.select(timeout)
File "/home/user/anugupta/lib/python3.4/selectors.py", line 424, in select
fd_event_list = self._epoll.poll(timeout, max_ev)
ensure_future was added in asyncio 3.4.4, use async for earlier versions.
While async is deprecated now it will be supported in oversable future.

Resources