I am using the query function from the boto3 library in Python and receiving the following error:
name 'Key' is not defined: NameError
Traceback (most recent call last):
File "/var/task/lambda_function.py", line 51, in lambda_handler
if not getAssetExistance(slack_userID):
File "/var/task/lambda_function.py", line 23, in getAssetExistance
response = dynamoTable.query(KeyConditionExpression=Key('userID').eq(asset))
NameError: name 'Key' is not defined
I have been reading through a bunch of tutorials on accessing DynamoDB through Lambda, and this they all use this KeyConditionExpression line when trying to if a key exists.
Here is the relevant code (line 23 is the query line):
def getAssetExistance(asset):
dynamoTable = dynamo.Table('Assets')
response = dynamoTable.query(KeyConditionExpression=Key('userID').eq(asset))
return bool(response)
I basically want to check the primary partition key in my DynamoDB table (which is a slack user ID) and see if exist.
Here is the rest of the code if it is relevant:
################################
# Slack Lambda handler.
################################
import boto3
import logging
import os
import urllib
# Grab data from the environment.
BOT_TOKEN = os.environ["BOT_TOKEN"]
ASSET_TABLE = os.environ["ASSET_TABLE"]
REGION_NAME = os.getenv('REGION_NAME', 'us-east-1')
dynamo = boto3.resource('dynamodb', region_name=REGION_NAME, endpoint_url="https://dynamodb.us-east-1.amazonaws.com")
# Define the URL of the targeted Slack API resource.
SLACK_URL = "https://slack.com/api/chat.postMessage"
def getAssetExistance(asset):
dynamoTable = dynamo.Table('Assets')
response = dynamoTable.query(KeyConditionExpression=Key('userID').eq(asset))
return bool(response)
def lambda_handler(data, context):
# Slack challenge answer.
if "challenge" in data:
return data["challenge"]
# Grab the Slack channel data.
slack_event = data['event']
slack_userID = slack_event["user"]
slack_text = slack_event["text"]
channel_id = slack_event["channel"]
slack_reply = ""
# Ignore bot messages.
if "bot_id" in slack_event:
slack_reply = ""
else:
# Start data sift.
if slack_text.startswith("!networth"):
slack_reply = "Your networth is: "
elif slack_text.startswith("!price"):
command,asset = text.split()
slack_reply = "The price of a(n) %s is: " % (asset)
elif slack_text.startswith("!addme"):
if not getAssetExistance(slack_userID):
slack_reply = "Adding user: %s" % (slack_userID)
dynamo.update_item(TableName=ASSET_TABLE,
Key={'userID':{'S':'slack_userID'}},
AttributeUpdates= {
'resources':{
'Action': 'ADD',
'Value': {'N': '1000'}
}
}
)
else:
slack_reply = "User %s already exists" % (slack_userID)
# We need to send back three pieces of information:
data = urllib.parse.urlencode(
(
("token", BOT_TOKEN),
("channel", channel_id),
("text", slack_reply)
)
)
data = data.encode("ascii")
# Construct the HTTP request that will be sent to the Slack API.
request = urllib.request.Request(
SLACK_URL,
data=data,
method="POST"
)
# Add a header mentioning that the text is URL-encoded.
request.add_header(
"Content-Type",
"application/x-www-form-urlencoded"
)
# Fire off the request!
urllib.request.urlopen(request).read()
# Everything went fine.
return "200 OK"
My DynamoDB table is named 'Assets' and has a primary partition key named 'userID' (string).
I am definitely still new to all this, so don't be afraid of calling me a dummy. Any and all help is appreciated. The goal of this code is to check if a user exists as a key in DynamoDB and if not, add them to the table.
You need to import the Key function, like so:
from boto3.dynamodb.conditions import Key
Without importing it second time, u can address it like:
boto3.dynamodb.conditions.Key
def getAssetExistance(asset):
dynamoTable = dynamo.Table('Assets')
response = dynamoTable.query(KeyConditionExpression= boto3.dynamodb.conditions.Key('userID').eq(asset))
return bool(response)
Wrote the below code for getting data from dynamo db and got the error 'NameError: name 'Key' is not defined'.
I had to fix it by importing 'from boto3.dynamodb.conditions import Key'
response = dynamo_table.query(KeyConditionExpression=Key(val).eq(str(key_dictionary[val])))
Related
I'm currently writing an API in Flask, and am currently trying to me my POST request to work. Ideally, it would add a band to the DB with a band_ID, name, genre, number of gigs, and a rating. For some reason, running the proper POST request in postman returns a 405. I didn't even know an API *I made * would tell me I don't have access to a POST I'm writing. Do I need to change anything to make it have access?
from flask import Flask, request
from flask_restful import Api, Resource
import sqlite3
app = Flask(__name__)
api = Api(app)
class Band(Resource):
def get(self, band_id):
conn = sqlite3.connect('database.db')
c = conn.cursor()
c.execute("SELECT * FROM bands WHERE band_id=?", (band_id,))
result = c.fetchone()
if result:
return {"message": result}
else:
return {"message": "Band not found"}
def post(self):
data = request.get_json()
if not all(key in data for key in ('band_name', 'band_genre', 'gigs', 'rating')):
return {"message": "Missing data"}
band_name = data['band_name']
band_genre = data['band_genre']
gigs = data['gigs']
rating = data['rating']
conn = sqlite3.connect('database.db')
c = conn.cursor()
c.execute("INSERT INTO bands (band_name,band_genre,gigs,rating) VALUES (?,?,?,?)", (band_name,band_genre,gigs,rating))
conn.commit()
return {"message": "Success"}
api.add_resource(Band, '/bands/<int:band_id>')
if __name__ == "__main__":
conn = sqlite3.connect('database.db')
c = conn.cursor()
c.execute("DROP TABLE IF EXISTS bands")
c.execute("""CREATE TABLE IF NOT EXISTS bands (
band_id INTEGER PRIMARY KEY,
band_name TEXT,
band_genre TEXT,
gigs INTEGER,
rating INTEGER
)""")
conn.commit()
app.run(debug=True)
I've tried changing the call on postman and fiddling around with my add.resource lines but to no avail. Totally lost on what to do next.
I am facing below error while trying to use AWS API for cloudwatch; the error is coming when i am trying to call: put_log_events.
Bit of context: these are organizational cloud trail logs and i am trying to create log group for each account (using kinesis stream for subscribing).
import base64
import gzip
import json
import logging
import os
import boto3
# Setup logging configuration
logging.basicConfig()
logger = logging.getLogger()
logger.setLevel(logging.INFO)
logs_client = boto3.client('logs', region_name=os.getenv('AWS_REGION'))
# setting environment variables
global seq_token
def unpack_kinesis_stream_records(event):
# decode and decompress each base64 encoded data element
return [gzip.decompress(base64.b64decode(k["kinesis"]["data"])).decode('utf-8') for k in event["Records"]]
def decode_raw_cloud_trail_events(cloudTrailEventDataList):
# Convert Raw Event Data List
eventList = [json.loads(e) for e in cloudTrailEventDataList]
# Filter out-non DATA_MESSAGES since we only require cloud watch message type = DATA_MESSAGE
filteredEvents = [
e for e in eventList if e["messageType"] == 'DATA_MESSAGE']
# Covert each individual log Event Message
events = []
for f in filteredEvents:
for e in f["logEvents"]:
events.append(
{
'timestamp': e["timestamp"],
'message': e["message"],
}
)
events.sort(key=lambda x: x["timestamp"])
logger.info("{0} Event Logs Decoded".format(len(events)))
log_group = ("log_group_for_cloudtrail_"+eventList[0]["logStream"].split("_")[1])
log_stream = os.getenv('AWS_LAMBDA_LOG_STREAM_NAME')
# creating log group
try:
logs_client.create_log_group(
logGroupName=log_group,
tags={
'Created By': 'ZH Lamda'
}
)
except logs_client.exceptions.ResourceAlreadyExistsException:
print("log group exists")
try:
logs_client.create_log_stream(
logGroupName=log_group,
logStreamName=log_stream,
)
except logs_client.exceptions.ResourceAlreadyExistsException:
print("log stream already exists")
return [log_group, log_stream, events]
def handle_request(event, context):
seq_token = None
# Unpack Kinesis Stream Records
kinesis_data = unpack_kinesis_stream_records(event)
# Decode and filter events
events = decode_raw_cloud_trail_events(kinesis_data)
if len(events[2]) == 0:
return f'No events to process'
log_event = {
'logGroupName': events[0],
'logStreamName': events[1],
'logEvents': events[2],
}
if seq_token is not None:
log_event['sequenceToken'] = seq_token
response = logs_client.put_log_events(**log_event)
seq_token = response['nextSequenceToken']
return f"Successfully processed {len(events)} records."
def lambda_handler(event, context):
return handle_request(event, context)
I can see below error in cloud watch logs:
[ERROR] InvalidSequenceTokenException: An error occurred (InvalidSequenceTokenException) when calling the PutLogEvents operation: The given sequenceToken is invalid. The next expected sequenceToken is: null
Traceback (most recent call last):
File "/var/task/lambda_function.py", line 120, in lambda_handler
return handle_request(event, context)
File "/var/task/lambda_function.py", line 107, in handle_request
response = logs_client.put_log_events(**log_event)
File "/var/runtime/botocore/client.py", line 391, in _api_call
return self._make_api_call(operation_name, kwargs)
File "/var/runtime/botocore/client.py", line 719, in _make_api_call
raise error_class(parsed_response, operation_name)
I suspect that the Lambda function being invoked multiple times. If so, then the problem is due to global seq_token, which only initializes the value of the variable the first time the function is invoked.
On future invocations, the seq_token is already set from the previous run, and is never reset to None. As a result, when put_log_events() is next called, the if statement is setting a sequence value from an old execution.
To fix all this, initialize the seq_token variable within the handle_request() function rather than making it global.
I am able to get the data from pdf to text.
But now i need to get the data in csv format with table structure.
I tried it to get the table structure with but it didn't happen.Any inputs?
Also, i'm able to generate it through json.
Is there a way to get the result into table csv format?
any inputs ?
Below is the code i have used.
import boto3
import time
# Document
s3BucketName = "textractanalysisexample"
documentName = "sheet_example.pdf"
def startJob(s3BucketName, objectName):
response = None
client = boto3.client('textract')
response = client.start_document_text_detection(
DocumentLocation={
'S3Object': {
'Bucket': s3BucketName,
'Name': objectName
}
})
return response["JobId"]
def isJobComplete(jobId):
# For production use cases, use SNS based notification
# Details at: https://docs.aws.amazon.com/textract/latest/dg/api-async.html
time.sleep(5)
client = boto3.client('textract')
response = client.get_document_text_detection(JobId=jobId)
status = response["JobStatus"]
#print("Job status: {}".format(status))
while(status == "IN_PROGRESS"):
time.sleep(5)
response = client.get_document_text_detection(JobId=jobId)
status = response["JobStatus"]
#print("Job status: {}".format(status))
return status
def getJobResults(jobId):
pages = []
client = boto3.client('textract')
response = client.get_document_text_detection(JobId=jobId)
pages.append(response)
print("Resultset page recieved: {}".format(len(pages)))
nextToken = None
if('NextToken' in response):
nextToken = response['NextToken']
while(nextToken):
response = client.get_document_text_detection(JobId=jobId, NextToken=nextToken)
pages.append(response)
#print("Resultset page recieved: {}".format(len(pages)))
nextToken = None
if('NextToken' in response):
nextToken = response['NextToken']
return pages
def lambda_handler(event, context):
jobId = startJob(s3BucketName, documentName)
#print("Started job with id: {}".format(jobId))
if(isJobComplete(jobId)):
response = getJobResults(jobId)
# Print detected text
for resultPage in response:
for item in resultPage["Blocks"]:
if item["BlockType"] == "LINE":
print (item["Text"]) ```
You can import CSV to write to a csv file like so:
import csv
with open('my_pdf.txt', 'r') as in_file:
stripped = (line.strip() for line in in_file)
lines = (line.split(",") for line in stripped if line)
with open('my_pdf.csv', 'w') as out_file:
writer = csv.writer(out_file)
writer.writerow(('title', 'intro'))
writer.writerows(lines)
You can just put in the rows you need, and this splits your data into comma separated values. You can see more information for CSV writer (and csv python in general) here (Python Docs).
I can't, for the life of me, figure out what is wrong with the following four lines of code.
def getAssetExistance(asset, element, table):
dynamoTable = dynamo.Table(table)
response = dynamoTable.query(KeyConditionExpression=Key(element).eq(asset)
return bool(response)
I am running this through aws Lambda and the log on cloudwatch is telling me the error is on the return line. This is the error (line 24 is the return line):
Syntax error in module 'lambda_function': invalid syntax (lambda_function.py, line 24)
In case this helps at all, here is the rest of the code:
################################
# Slack Lambda handler.
################################
import boto3
import os
import logging
import urllib
# Grab data from the environment.
BOT_TOKEN = os.environ["BOT_TOKEN"]
ASSET_TABLE = os.environ["ASSET_TABLE"]
REGION_NAME = os.getenv('REGION_NAME', 'us-east-1')
dynamo = boto3.client('dynamodb', region_name=REGION_NAME)
# Define the URL of the targeted Slack API resource.
SLACK_URL = "https://slack.com/api/chat.postMessage"
def getAssetExistance(asset, element, table):
dynamoTable = dynamo.Table(table)
response = dynamoTable.query(KeyConditionExpression=Key(element).eq(asset)
return bool(response)
def lambda_handler(data, context):
# Slack challenge answer.
if "challenge" in data:
return data["challenge"]
# Grab the Slack channel data.
slack_event = data['event']
slack_user = slack_event["user"]
slack_text = slack_event["text"]
channel_id = slack_event["channel"]
slack_userID = slack_user["ID"]
slack_reply = ""
# Ignore bot messages.
if "bot_id" in slack_event:
logging.warn("Ignore bot event")
else:
# Start data sift.
if slack_text.startswith("!networth"):
slack_reply = "Your networth is: "
elif slack_text.startwith("!price"):
command,asset = text.split()
slack_reply = "The price of a(n) %s is: " % (asset)
elif slack_text.startwith("!Addme"):
if not getAssetExistance(slack_userID, userID, ASSET_TABLE):
slack_reply = "Adding user: %s(%s)" % (slack_user, slack_userID)
dynamo.update_item(TableName=ASSET_TABLE,
Key={'userID':{'S':'slack_userID'},
AttributeUpdates= {
'resources':{
'Action': 'ADD',
'Value': {'N': '1000'}
}
}
)
else
slack_reply = "User %s(%s) already exists" % (slack_user, slack_userID)
# We need to send back three pieces of information:
data = urllib.parse.urlencode(
(
("token", BOT_TOKEN),
("channel", channel_id),
("text", slack_reply)
)
)
data = data.encode("ascii")
# Construct the HTTP request that will be sent to the Slack API.
request = urllib.request.Request(
SLACK_URL,
data=data,
method="POST"
)
# Add a header mentioning that the text is URL-encoded.
request.add_header(
"Content-Type",
"application/x-www-form-urlencoded"
)
# Fire off the request!
urllib.request.urlopen(request).read()
# Everything went fine.
return "200 OK"
Hopefully I am doing something dumb; I am pretty new to all this. Any help is much appreciated it. Thanks!
You skipped closed round bracket in this line:
response = dynamoTable.query(KeyConditionExpression=Key(element).eq(asset)
replace this line by:
response = dynamoTable.query(KeyConditionExpression=Key(element)).eq(asset)
I learnt that Twitter has stopped providing JSON for deleted tweets.I am trying to get past this limitation by using a polling method to see if tweet is deleted.
But my code still fails. I would appreciate it if you can help me figure out what I am missing.
import sys
import json
import tweepy
from tweepy import Stream
from tweepy.streaming import StreamListener
import datetime
import time
from polling import TimeoutException, poll
# Go to http://apps.twitter.com and create an app.
# The consumer key and secret will be generated for you after
consumer_key = 'xx'
consumer_secret = 'xx'
access_token = 'xx'
access_token_secret = 'xx'
# Set up the authorisation to use the Twitter API
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)
# Handle the output generated from the stream
class listener(StreamListener):
tweetcount = 0
def on_data(self, data):
# Convert the message to JSON
json_data = json.loads(data)
# text_file = open(json_data['id_str'] + ".json", "w")
# text_file.write(data)
# text_file.close()
if 'id_str' not in json_data:
# If this isn't a status, do nothing.
print("no ID")
else:
#print("Twitter Id ",json_data['id_str'])
#print("User Id ",json_data['user']['id_str'])
if json_data['user']['id_str'] == '51241574': #Associated Press
tweetcount = json_data['user']['statuses_count']
tweet = api.get_status(json_data['id'])
print("Tweet Count ",tweetcount)
print("Account Name ", json_data['user']['name'])
print(tweet.text)
else:
pass
# if 'delete' in json_data:
# print ("DELETED!")
# if json_data['delete']['status']['user_id'] == '51241574':
# deleted_tweet_id =json_data['delete']['status']['id']
# tweetcount -= 1
# print("New Count is ",tweetcount)
# print(deleted_tweet_id)
# deleted_tweet =api.get_status(deleted_tweet_id)
# print(deleted_tweet.text)
#
# else:
# pass
return True
def on_error(self, status):
print("Error status is ",status)
# Start consuming from the stream. This will get all the Tweets & Deletions from the users the user is following.
twitterStream = Stream(auth, listener())
twitterStream.filter(follow=['51241574'], async=True)
# polling method to check if tweet is deleted
try:
user = api.get_user('AP')
poll(lambda: user.statuses_count >= listener.tweetcount > 0, timeout=30, step=1)
print("Tweet was deleted,New Tweet count is ", user.statuses_count)
except Exception as ex:
template = "An exception of type {0} occurred. Arguments:\n{1!r}"
message = template.format(type(ex).__name__, ex.args)
print (message)
When a listener event is fired, the application shows the value in tweet count variable and checks it against the value retrieved from querying the api.