boto3 - AWS lambda -copy files between buckets

boto3 - AWS lambda -copy files between buckets - python-3.x

I am trying to copy multiple files in a source bucket to a destination bucket using AWS lambda and am getting the error below. Bucket structures are as follows
Source Buckets
mysrcbucket/Input/daily/acctno_pin_xref/ABC_ACCTNO_PIN_XREF_FULL_20170926_0.csv.gz
mysrcbucket/Input/daily/acctno_pin_xref/ABC_ACCTNO_PIN_XREF_FULL_20170926_1.csv.gz
mysrcbucket/Input/daily/acctno_pin_xref/ABC_ACCTNO_PIN_XREF_count_20170926.inf
Destination Buckets
mydestbucket/Input/daily/acctno_pin_xref/ABC_ACCTNO_PIN_XREF_FULL_20170926_0.csv.gz
mydestbucket/Input/daily/acctno_pin_xref/ABC_ACCTNO_PIN_XREF_FULL_20170926_1.csv.gz
mydestbucket/Input/daily/acctno_pin_xref/ABC_ACCTNO_PIN_XREF_count_20170926.inf
I wrote the lambda function below but am getting the error below. Can someone help me explain what I am doing wrong
{ "errorMessage": "expected string or bytes-like object", "errorType": "TypeError", "stackTrace": [
[
"/var/task/lambda_function.py",
17,
"lambda_handler",
"s3.Object(dest_bucket,dest_key).copy_from(CopySource= { 'Bucket': obj.bucket_name , 'Key' : obj.key})"
],
[
"/var/runtime/boto3/resources/factory.py",
520,
"do_action",
"response = action(self, *args, **kwargs)"
],
[
"/var/runtime/boto3/resources/action.py",
83,
"__call__",
"response = getattr(parent.meta.client, operation_name)(**params)"
],
[
"/var/runtime/botocore/client.py",
312,
"_api_call",
"return self._make_api_call(operation_name, kwargs)"
],
[
"/var/runtime/botocore/client.py",
575,
"_make_api_call",
"api_params, operation_model, context=request_context)"
],
[
"/var/runtime/botocore/client.py",
627,
"_convert_to_request_dict",
"params=api_params, model=operation_model, context=context)"
],
[
"/var/runtime/botocore/hooks.py",
227,
"emit",
"return self._emit(event_name, kwargs)"
],
[
"/var/runtime/botocore/hooks.py",
210,
"_emit",
"response = handler(**kwargs)"
],
[
"/var/runtime/botocore/handlers.py",
208,
"validate_bucket_name",
"if VALID_BUCKET.search(bucket) is None:"
] ] }
Lambda Function Code
import boto3
import json
s3 = boto3.resource('s3')
def lambda_handler (event, context):
bucket = s3.Bucket('mysrcbucket')
dest_bucket=s3.Bucket('mydestbucket')
print(bucket)
print(dest_bucket)
for obj in bucket.objects.filter(Prefix='Input/daily/acctno_pin_xref/ABC_ACCTNO_PIN_XREF',Delimiter='/'):
dest_key=obj.key
print(dest_key)
s3.Object(dest_bucket,dest_key).copy_from(CopySource= { 'Bucket': obj.bucket_name , 'Key' : obj.key})

The issue is with:
s3.Object(dest_bucket, dest_key).copy_from(CopySource= {'Bucket': obj.bucket_name,
'Key': obj.key})
change dest_bucket to dest_bucket.name:
s3.Object(dest_bucket.name, dest_key).copy_from(CopySource= {'Bucket': obj.bucket_name,
'Key': obj.key})
dest_bucket is a resource and name is its identifier.

Related

Pandas read_json bytes error on AWS Lambda but not Local

The following code works fine locally, but fails in AWS Lambda:
authURL = os.environ['authURL_env']
reportURL = os.environ['reportURL_env']
FirstDayOfPreviousMonth = dt.date.today()-dt.timedelta(days=1)
LastDayOfPreviousMonth = dt.date.today()-dt.timedelta(days=1)
payload = json.dumps({
"email": os.environ['email_env'],
"password": os.environ['password_env']
})
headers = {
'Content-Type': 'application/json'
}
apiConn = url.PoolManager()
# try:
tokenResponse = apiConn.request('POST', authURL, headers=headers, body=payload)
authToken = json.loads(tokenResponse.data)
payload = json.dumps({
"start_date": str(FirstDayOfPreviousMonth),
"end_date": str(LastDayOfPreviousMonth),
"interval": "day",
"dimensions": [
"supply_tag_id",
"demand_partner_id"
]
})
headers = {
'Authorization': authToken['token'],
'Content-Type': 'application/json'
}
response = apiConn.request('POST', reportURL, headers=headers, body=payload)
vendor_df = pd.read_json(response.data)
In AWS Lambda I get the following error:
{
"errorMessage": "Expected file path name or file-like object, got <class 'bytes'> type",
"errorType": "TypeError",
"requestId": "9f594e9e-7703-420b-b981-e4352f1d64db",
"stackTrace": [
" File \"/var/task/lambda_function.py\", line 56, in lambda_handler\n springserve_df = pd.read_json(response.data)\n",
" File \"/opt/python/pandas/util/_decorators.py\", line 207, in wrapper\n return func(*args, **kwargs)\n",
" File \"/opt/python/pandas/util/_decorators.py\", line 311, in wrapper\n return func(*args, **kwargs)\n",
" File \"/opt/python/pandas/io/json/_json.py\", line 588, in read_json\n json_reader = JsonReader(\n",
" File \"/opt/python/pandas/io/json/_json.py\", line 673, in __init__\n data = self._get_data_from_filepath(filepath_or_buffer)\n",
" File \"/opt/python/pandas/io/json/_json.py\", line 710, in _get_data_from_filepath\n self.handles = get_handle(\n",
" File \"/opt/python/pandas/io/common.py\", line 823, in get_handle\n raise TypeError(\n"
]
}
To add more mystery to it, this exact code works in an older Lambda. Its only when I try to create it in a new lambda that it begins to fail.

Lambda error "TypeError: 'str' object does not support item assignment Traceback"

I am trying out
import json
import uuid
import boto3
def lambda_handler(event, context):
dynamo_client = boto3.resource('dynamodb')
loadeo_carrier_company = dynamo_client.Table('loadeo_carrier_company')
item = {}
item = event['body']
print(item)
item['company_id'] = str(uuid.uuid4())
print (type(item))
try:
loadeo_carrier_company.put_item(
Item=item
)
return {
"statusCode": 200,
"headers" : {
"Access-Control-Allow-Origin" : "*"
},
"message": json.dumps("Record has been inserted"),
"body": item
}
except Exception as e:
return {
"statusCode": 500,
"headers" : {
"Access-Control-Allow-Origin" : "*"
},
"message": "Error: Unable to save record!"
}
This code.
When I test it through Lambda Test event it is working fine.
But when I create an API and try it out with postman showes an internal server error but when I look at the cloud watch logs The below error is shown.
[ERROR] TypeError: 'str' object does not support item assignment
Traceback (most recent call last):
File "/var/task/lambda_function.py", line 19, in lambda_handler
item['company_id'] = str(uuid.uuid4())
NOTE:
The Lambda test event is working fine.
"body": {
"company_name" : "waveaxis pvt ltd",
"mc_number" : "00000",
"email_adderess": "waveaxis#waveaxis.co.in",
"phone_number" : "+91 1234567890",
"company_address" : "Kormangala, Bengaluru"
},
After Trying the answer:
When I test it from lambda test event:
{
"errorMessage": "the JSON object must be str, bytes or bytearray, not dict",
"errorType": "TypeError",
"stackTrace": [
" File \"/var/task/lambda_function.py\", line 17, in lambda_handler\n item = json.loads(event['body'])\n",
" File \"/var/lang/lib/python3.8/json/__init__.py\", line 341, in loads\n raise TypeError(f'the JSON object must be str, bytes or bytearray, '\n"
]
}
and when I call it from postman.
Execution stops before try block with no error
Edit 2: print the json for event
{
"resource":"/",
"path":"/",
"httpMethod":"PUT",
"headers":{
"Accept":"*/*",
"Accept-Encoding":"gzip, deflate, br",
"Authorization":"Bearer eyJraWQiOiJCUUl6ZkxcL1VOdm9QTDVxNExlWGFRNXNxOG1mVmhmXC9rK3ZJUDdYc0p0VjQ9IiwiYWxnIjoiUlMyNTYifQ.eyJzdWIiOiJkYjdkODBmOC1mY2VhLTQwYjItYTZhYi1jMjhhNTZiMTI1NDIiLCJ0b2tlbl91c2UiOiJhY2Nlc3MiLCJzY29wZSI6ImNhcnJpZXJcL3JlYWQgY2FycmllclwvdXBkYXRlIGNhcnJpZXJcL2RlbGV0ZSIsImF1dGhfdGltZSI6MTYwMjA0NDQ5NywiaXNzIjoiaHR0cHM6XC9cL2NvZ25pdG8taWRwLnVzLWVhc3QtMS5hbWF6b25hd3MuY29tXC91cy1lYXN0LTFfWW5nVnJxYUFGIiwiZXhwIjoxNjAyMDQ4MDk3LCJpYXQiOjE2MDIwNDQ0OTcsInZlcnNpb24iOjIsImp0aSI6ImExMzg4ZGUyLTRhZWQtNGI2MC04YjM0LWYzN2I1N2RjM2ZmMiIsImNsaWVudF9pZCI6IjVlZzU1NWhpNzAwZ21lbWc3N3B0NWkzbDIyIiwidXNlcm5hbWUiOiJHb29nbGVfMTEyNjUyMTUzMDI4OTQyNjAzNDM5In0.XMy9GP03o5EYrcLtQFzrMV6KID4IlDI_n0WrHa8osY_7CeeDjaCjH6Dtr766TAommLUzcLoKt-NrBUdq0Zfx-BL919j25rwiZXJbHiZP_4y9n891ddOXfPabO7n8O84-63W6l13QEBozuc21vXi7vuE_dSJ7KAgute46KP3LyoS73WPDhYim_7HZJO8pVedk64hhGNZsYWv6VU5QeQyqPl926spA25ZBo_z5dcoBnMZ_i2n5nz6qxRcINOKfMXL1f4_nDRbtKb5Pd33hKnsKYLkxEI0mrT1JKPJhkJRg9vGqaKcd13oqrigJRFSXYuVQuKNDluc38KbQJcwUoXDjjA",
"Content-Type":"application/json",
"Host":"661ny3iw92.execute-api.us-east-2.amazonaws.com",
"Postman-Token":"654cfdd0-8080-48a5-8758-3aa7e5bcefcc",
"User-Agent":"PostmanRuntime/7.26.5",
"X-Amzn-Trace-Id":"Root=1-5fa13860-63da2718701bf3fc458c78e1",
"X-Forwarded-For":"157.49.141.105",
"X-Forwarded-Port":"443",
"X-Forwarded-Proto":"https"
},
"multiValueHeaders":{
"Accept":[
"*/*"
],
"Accept-Encoding":[
"gzip, deflate, br"
],
"Authorization":[
"Bearer eyJraWQiOiJCUUl6ZkxcL1VOdm9QTDVxNExlWGFRNXNxOG1mVmhmXC9rK3ZJUDdYc0p0VjQ9IiwiYWxnIjoiUlMyNTYifQ.eyJzdWIiOiJkYjdkODBmOC1mY2VhLTQwYjItYTZhYi1jMjhhNTZiMTI1NDIiLCJ0b2tlbl91c2UiOiJhY2Nlc3MiLCJzY29wZSI6ImNhcnJpZXJcL3JlYWQgY2FycmllclwvdXBkYXRlIGNhcnJpZXJcL2RlbGV0ZSIsImF1dGhfdGltZSI6MTYwMjA0NDQ5NywiaXNzIjoiaHR0cHM6XC9cL2NvZ25pdG8taWRwLnVzLWVhc3QtMS5hbWF6b25hd3MuY29tXC91cy1lYXN0LTFfWW5nVnJxYUFGIiwiZXhwIjoxNjAyMDQ4MDk3LCJpYXQiOjE2MDIwNDQ0OTcsInZlcnNpb24iOjIsImp0aSI6ImExMzg4ZGUyLTRhZWQtNGI2MC04YjM0LWYzN2I1N2RjM2ZmMiIsImNsaWVudF9pZCI6IjVlZzU1NWhpNzAwZ21lbWc3N3B0NWkzbDIyIiwidXNlcm5hbWUiOiJHb29nbGVfMTEyNjUyMTUzMDI4OTQyNjAzNDM5In0.XMy9GP03o5EYrcLtQFzrMV6KID4IlDI_n0WrHa8osY_7CeeDjaCjH6Dtr766TAommLUzcLoKt-NrBUdq0Zfx-BL919j25rwiZXJbHiZP_4y9n891ddOXfPabO7n8O84-63W6l13QEBozuc21vXi7vuE_dSJ7KAgute46KP3LyoS73WPDhYim_7HZJO8pVedk64hhGNZsYWv6VU5QeQyqPl926spA25ZBo_z5dcoBnMZ_i2n5nz6qxRcINOKfMXL1f4_nDRbtKb5Pd33hKnsKYLkxEI0mrT1JKPJhkJRg9vGqaKcd13oqrigJRFSXYuVQuKNDluc38KbQJcwUoXDjjA"
],
"Content-Type":[
"application/json"
],
"Host":[
"661ny3iw92.execute-api.us-east-2.amazonaws.com"
],
"Postman-Token":[
"654cfdd0-8080-48a5-8758-3aa7e5bcefcc"
],
"User-Agent":[
"PostmanRuntime/7.26.5"
],
"X-Amzn-Trace-Id":[
"Root=1-5fa13860-63da2718701bf3fc458c78e1"
],
"X-Forwarded-For":[
"157.49.141.105"
],
"X-Forwarded-Port":[
"443"
],
"X-Forwarded-Proto":[
"https"
]
},
"queryStringParameters":"None",
"multiValueQueryStringParameters":"None",
"pathParameters":"None",
"stageVariables":"None",
"requestContext":{
"resourceId":"529nsbfu6a",
"resourcePath":"/",
"httpMethod":"PUT",
"extendedRequestId":"VbW_FFkYCYcF3PQ=",
"requestTime":"03/Nov/2020:11:00:48 +0000",
"path":"/dev",
"accountId":"272075499248",
"protocol":"HTTP/1.1",
"stage":"dev",
"domainPrefix":"661ny3iw92",
"requestTimeEpoch":1604401248435,
"requestId":"970dd9d2-9b35-45c5-b194-806060e27d10",
"identity":{
"cognitoIdentityPoolId":"None",
"accountId":"None",
"cognitoIdentityId":"None",
"caller":"None",
"sourceIp":"157.49.141.105",
"principalOrgId":"None",
"accessKey":"None",
"cognitoAuthenticationType":"None",
"cognitoAuthenticationProvider":"None",
"userArn":"None",
"userAgent":"PostmanRuntime/7.26.5",
"user":"None"
},
"domainName":"661ny3iw92.execute-api.us-east-2.amazonaws.com",
"apiId":"661ny3iw92"
},
"body":"{\r\n \"company_name\": \"waveaxis pvt ltd\",\r\n \"mc_number\": \"00000\",\r\n \"email_adderess\": \"waveaxis#waveaxis.co.in\",\r\n \"phone_number\": \"+91 1234567890\",\r\n \"company_address\": \"Kormangala, Bengaluru\"\r\n}\r\n",
"isBase64Encoded":False
}

Your event['body'] is probably just a json string, not actual json, from what I remember.
Thus, instead of
item = event['body']
you can use
item = json.loads(event['body'])
which should parse json string into json object.
Update
Based on the posted form of the event, you should use ast, not json
import ast
item = ast.literal_eval(event['body'])

ec2client.describe_instances returns UnauthorizedOperation after adding condition to IAM policy

My goals is to restrict access to ec2 using tag key. It works fine if I remove the condition from the IAM policy. However, if I add the aws:TagKeys condition then I get UnauthorizedOperation error. Need some assistance in fixing the IAM policy or either the code to work with tagkey.
Here's the IAM policy:
{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Action": [
"ec2:DescribeInstances",
"ec2:DescribeKeyPairs"
],
"Resource": "*",
"Condition": {
"ForAnyValue:StringEquals": {
"aws:TagKeys": "mytag"
}
}
}
]
}
Here's my python code:
import os
import boto3
import json
os.environ['AWS_DEFAULT_REGION'] = 'ap-south-1'
os.environ['AWS_ACCESS_KEY_ID'] = 'myacceskey'
os.environ['AWS_SECRET_ACCESS_KEY'] = 'secret'
def list_instances_by_tag_value(tagkey, tagvalue):
# When passed a tag key, tag value this will return a list of InstanceIds that were found.
ipdict={}
ec2client = boto3.client('ec2')
#response = ec2client.describe_key_pairs()
#print(response)
response = ec2client.describe_instances(
Filters=[
{
'Name':'tag:' + tagkey,
'Values':[tagvalue]
}
]
)
client_dict = {}
for reservation in (response["Reservations"]):
print(reservation)
#boto3.set_stream_logger(name='botocore')
output = list_instances_by_tag_value("mytag", "abcd")
Here's the exception:
Traceback (most recent call last):
File "test.py", line 29, in <module>
output = list_instances_by_tag_value("mytag", "abcd")
File "test.py", line 20, in list_instances_by_tag_value
'Values':[tagvalue]
File "C:\python35\lib\site-packages\botocore\client.py", line 272, in _api_call
return self._make_api_call(operation_name, kwargs)
File "C:\python35\lib\site-packages\botocore\client.py", line 576, in _make_api_call
raise error_class(parsed_response, operation_name)
botocore.exceptions.ClientError: An error occurred (UnauthorizedOperation) when calling the DescribeInstances operation: You are not authorized to perform this operation.
I have checked that tagkey is supported by describeinstances - https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_DescribeInstances.html
Also checked couple of SO threads after which I changed my action to very specific DescribeInstances from Describe*
But its still not working for me.

Got it: Why does applying a condition to ec2:DescribeInstances in an IAM policy fail?
DescribeInstances does not support resource level permissions

Google Adwords script runs on AWS Lambda:[Errno 30] Read-only file system: '/home/sbx_user1051'

A python script fetch google adwords report, it working as expected in my local machine, but when deployed in AWS Lambda function, I got the following error
{
"errorMessage": "[Errno 30] Read-only file system: '/home/sbx_user1051'",
"errorType": "OSError",
"stackTrace": [
[
"/var/task/lambda_function.py",
24,
"lambda_handler",
"report_downloader = client.GetReportDownloader(version='v201809')"
],
[
"/var/task/googleads/adwords.py",
370,
"GetReportDownloader",
"return ReportDownloader(self, version, server)"
],
[
"/var/task/googleads/adwords.py",
1213,
"__init__",
"self.proxy_config, self._namespace, self._adwords_client.cache)"
],
[
"/var/task/googleads/common.py",
819,
"__init__",
"transport = _ZeepProxyTransport(timeout, proxy_config, cache)"
],
[
"/var/task/googleads/common.py",
667,
"__init__",
"cache = zeep.cache.SqliteCache()"
],
[
"/var/task/zeep/cache.py",
77,
"__init__",
"self._db_path = path if path else _get_default_cache_path()"
],
[
"/var/task/zeep/cache.py",
155,
"_get_default_cache_path",
"os.makedirs(path)"
],
[
"/var/lang/lib/python3.6/os.py",
210,
"makedirs",
"makedirs(head, mode, exist_ok)"
],
[
"/var/lang/lib/python3.6/os.py",
210,
"makedirs",
"makedirs(head, mode, exist_ok)"
],
[
"/var/lang/lib/python3.6/os.py",
220,
"makedirs",
"mkdir(name, mode)"
]
]
}
I know in Lambda it could only write files located in tem folder, but what confused me is that in my script, I don't write to any file at all, here is the main structure of my script:
client = adwords.AdWordsClient.LoadFromStorage('tmp/googleads.yaml')
report_downloader = client.GetReportDownloader(version='v201809')
report_query = (adwords.ReportQueryBuilder()
.Select( str)
.From('ACCOUNT_PERFORMANCE_REPORT')
.During('LAST_7_DAYS')
.Build())
results=report_downloader.DownloadReportAsStringWithAwql( report_query, 'TSV', skip_report_header=True, skip_column_header=True, skip_report_summary=True, include_zero_impressions=False)
campaigns=results.splitlines()
Please advise how to fix this issue. The env is python 3.6

It looks like Adwords is using a cache and, by default, that cache goes into the home directory of the user running your code. To fix this, set the environment variable XDG_CACHE_HOME to /tmp/.cache. You can set this in the Lambda environment variables.

AWS Lambda, saving S3 file to /tmp directory

I want to copy a set of files over from S3, and put them in the /tmp directory while my lambda function is running, to use and manipulate the contents. The following code excerpt works fine on my PC (which is running windows)
s3 = boto3.resource('s3')
BUCKET_NAME = 'car_sentiment'
keys = ['automated.csv', 'connected_automated.csv', 'connected.csv',
'summary.csv']
for KEY in keys:
try:
local_file_name = 'tmp/'+KEY
s3.Bucket(BUCKET_NAME).download_file(KEY, local_file_name)
except botocore.exceptions.ClientError as e:
if e.response['Error']['Code'] == "404":
continue
else:
raise
However, when I try to run on AWS lambda, I get:
{
"errorMessage": "[Errno 2] No such file or directory: 'tmp/automated.csv.4Bcd0bB9'",
"errorType": "FileNotFoundError",
"stackTrace": [
[
"/var/task/SentimentForAWS.py",
28,
"my_handler",
"s3.Bucket(BUCKET_NAME).download_file(KEY, local_file_name)"
],
[
"/var/runtime/boto3/s3/inject.py",
246,
"bucket_download_file",
"ExtraArgs=ExtraArgs, Callback=Callback, Config=Config)"
],
[
"/var/runtime/boto3/s3/inject.py",
172,
"download_file",
"extra_args=ExtraArgs, callback=Callback)"
],
[
"/var/runtime/boto3/s3/transfer.py",
307,
"download_file",
"future.result()"
],
[
"/var/runtime/s3transfer/futures.py",
73,
"result",
"return self._coordinator.result()"
],
[
"/var/runtime/s3transfer/futures.py",
233,
"result",
"raise self._exception"
],
[
"/var/runtime/s3transfer/tasks.py",
126,
"__call__",
"return self._execute_main(kwargs)"
],
[
"/var/runtime/s3transfer/tasks.py",
150,
"_execute_main",
"return_value = self._main(**kwargs)"
],
[
"/var/runtime/s3transfer/download.py",
582,
"_main",
"fileobj.seek(offset)"
],
[
"/var/runtime/s3transfer/utils.py",
335,
"seek",
"self._open_if_needed()"
],
[
"/var/runtime/s3transfer/utils.py",
318,
"_open_if_needed",
"self._fileobj = self._open_function(self._filename, self._mode)"
],
[
"/var/runtime/s3transfer/utils.py",
244,
"open",
"return open(filename, mode)"
]
]
}
Why does it think the file name is tmp/automated.csv.4Bcd0bB9 rather than just tmp/automated.csv and how do I fix it? Been pulling my hair out on this one, trying multiple approaches, some of which generate a similar error when running locally on my PC. Thanks!

You should save in /tmp, rather than tmp/.
eg:
local_file_name = '/tmp/' + KEY

Well the reason why lambda gives the above error is because it doesn't allow to write in the heirarchial structure with in the /tmp/ directory. You can write the files directly to /tmp/example.txt dir but not to say /tmp/dir1/example.txt

Develop Reference

node.js excel linux python-3.x azure haskell apache-spark rust .htaccess string

boto3 - AWS lambda -copy files between buckets - python-3.x

Related

Pandas read_json bytes error on AWS Lambda but not Local

Lambda error "TypeError: 'str' object does not support item assignment Traceback"

ec2client.describe_instances returns UnauthorizedOperation after adding condition to IAM policy

Google Adwords script runs on AWS Lambda:[Errno 30] Read-only file system: '/home/sbx_user1051'

AWS Lambda, saving S3 file to /tmp directory

Categories

Resources