I have an AWS Lambda function written in Python 3.7, The function is set to delete snapshots older than 120 days. I'm running this from a management account and hitting 2 child accounts using 'aws_account_numbers'. The function runs successfully but in CloudWatch the logs show the same snapshots being deleted for each account. The log shows snapshot x y z is deleted for accountA but then it shows the same snapshots x y z being deleted for accountB. What is really happening is that all of these snapshots do not live in accountA or accountB but they actually live in the management account.
from datetime import datetime, timedelta, timezone
import boto3
import collections
import sys
from botocore.exceptions import ClientError
region ='us-east-1'
aws_account_numbers = {"accountA":"xxxxxxxxxxxx", "accountB":"xxxxxxxxxxxx"}
def lambda_handler(event, context):
delete_time = datetime.now(tz=timezone.utc) - timedelta(days=120)
ec2_resource = boto3.resource('ec2')
snapshots = ec2_resource.snapshots.filter(OwnerIds=['self'])
for name, acctnum in aws_account_numbers.items():
roleArn = "arn:aws:iam::%s:role/EOTSS-Snapshot-Cleanup-120days" % acctnum
stsClient = boto3.client('sts')
sts_response = stsClient.assume_role(RoleArn=roleArn,RoleSessionName='AssumeCrossAccountRole', DurationSeconds=1800)
ec2 = boto3.resource(service_name='ec2',region_name=region,aws_access_key_id = sts_response['Credentials']['AccessKeyId'],
aws_secret_access_key = sts_response['Credentials']['SecretAccessKey'], aws_session_token = sts_response['Credentials']['SessionToken'])
for snapshot in snapshots:
try:
if not snapshot.description.startswith('Snapshot created by task soe-backup') and delete_time > snapshot.start_time:
#snapshot.delete()
print ("Snapshot %s is deleted in acct: %s" % (snapshot, acctnum))
except ClientError as e:
if e.response['Error']['Code'] == 'InvalidSnapshot.InUse':
print ("Snapshot %s in use in acct: %s" % (snapshot, acctnum))
continue
else:
print("Unexpected error: %s" % e)
continue
return 'Execution Complete'
Related
I am implementing a cron job that will upload a large daily backup file to an S3 Bucket. It works most of the time, but every once in a while, I will check the bucket, and the file size is significantly smaller than the actual size.
It should be roughly 50GB, but the last time it happened, it showed 34GB. My main problem is that I am unsure of what error to try/catch.
I am still learning Python as I go, so bare with me.
from progress import ProgressPercentage # class file progress.py
from slack import * # function file for Slack notifications
import random
import glob
import os
import boto3
import botocore
from boto3.s3.transfer import TransferConfig
bucket = "my-s3-backup"
s3 = boto3.resource('s3')
# Grabbing the last file, and removing the full path from the string
pattern = "/path/to/backup/file/xb_*"
files = list(filter(os.path.isfile, glob.glob(pattern)))
files.sort(key=lambda x: os.path.getmtime(x))
file_to_upload = files[-1]
file_name = file_to_upload.replace('/path/to/backup/file/', '')
key_path = 'physical_db_backups/' + file_name
# Multipart upload function
def multi_part_upload():
config = TransferConfig(multipart_threshold=1024 * 25,
max_concurrency=10,
multipart_chunksize=1024 * 25,
use_threads=True)
try:
s3.meta.client.upload_file(file_to_upload, bucket, key_path, Config=config,
Callback=ProgressPercentage(file_to_upload))
# Custom Slack notification to inform completion
sendslacksuccess("Physical Backup to S3 Complete:\n" + file_name)
except botocore.exceptions.ClientError as error:
# Custom Slack notification to inform of failure
sendslackerror("Physical Backup to S3 Failed:\n" + file_name + "\nError: " + error)
if __name__ == '__main__':
multi_part_upload()
If the script is not "failing," but it's not uploading the complete file size, what exception am I trying to catch here? Should I log output somewhere?
I'm looking through the Botocore Exceptions documentation. I'm just unsure of what to try/catch with this.
For reference, here is the file size difference:
aws s3 ls --summarize --human-readable --recursive s3://my-s3-backup/physical_db_backups/
2022-05-07 14:31:28 50.7 GiB physical_db_backups/xb_202205070101.xb.zst
2022-05-08 12:48:07 50.8 GiB physical_db_backups/xb_202205080101.xb.zst
2022-05-09 01:30:04 34.2 GiB physical_db_backups/xb_202205090101.xb.zst <--- WRONG
Alright, since I was an idiot and didn't realize the file had not completed yet, I made a couple of changes.
I edited the cron to start later.
I have created logic to determine if the backup script is running.
I may incorporate additional checks to make sure the file exists, but for now this is a working POC that has been tested.
from progress import ProgressPercentage # class file progress.py
from slack import * # function file for Slack notifications
import random
from time import sleep
import psutil
import glob
import os
import boto3
import botocore
from boto3.s3.transfer import TransferConfig
import logging
bucket = "fsn-s3-backup"
s3 = boto3.resource('s3')
pattern = "/path/to/backup/file/xb_*"
files = list(filter(os.path.isfile, glob.glob(pattern)))
files.sort(key=lambda x: os.path.getmtime(x))
file_to_upload = files[-1]
file_name = file_to_upload.replace('/path/to/backup/file/', '')
key_path = 'physical_db_backups/' + file_name
logging.basicConfig(filename='/var/log/s3-backup.log', format='%(asctime)s - %(levelname)s - %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p', filemode='a')
logger = logging.getLogger()
logger.setLevel(logging.INFO)
def multi_part_upload():
config = TransferConfig(multipart_threshold=1024 * 25,
max_concurrency=10,
multipart_chunksize=1024 * 25,
use_threads=True)
try:
s3.meta.client.upload_file(file_to_upload, bucket, key_path, Config=config,
Callback=ProgressPercentage(file_to_upload),
ExtraArgs={'ContentType': 'application/zstd'})
logger.info("Physical Backup to S3 Complete")
sendslacksuccess("Physical Backup to S3 Complete:\n" + file_name)
except botocore.exceptions.ClientError as error:
logger.error("Physical Backup to S3 Failed: " + error)
sendslackerror("Physical Backup to S3 Failed:\n" + file_name + "\nError: " + error)
def checkIfProcessRunning(processName):
for proc in psutil.process_iter():
cmdline = proc.cmdline()
if processName in cmdline:
return True
return False
if __name__ == '__main__':
backuprunning = True
while backuprunning:
logger.info("Checking if backup shell script is running")
if checkIfProcessRunning('/path/to/physical_backup.sh'):
logger.info("Backup shell script still running. Sleeping for 60s")
sleep(60)
else:
backuprunning = False
logger.info("Beginning multipart upload")
multi_part_upload()
I am new to python and AWS lambda. I am trying to run this script from the lambda function but I am getting error:
Runtime.HandlerNotFound
This script is working fine if I run it from the ec2 instance, but when I run the same script from AWS lambda it throws an error.
I would be really thankful if someone guides me on what I did wrong.
Thank you
import boto3
import requests
import time
AWS_Access_Key_ID =
AWS_Secret_Access_Key =
DELAY_TIME=10 # 10 Seconds
region = 'us-east-2'
# instances = ['']
instances = {
'instance id': 'http://link',
'instance id': 'http://link'
}
ec2 = None
try:
ec2 = boto3.client('ec2', aws_access_key_id=AWS_Access_Key_ID, aws_secret_access_key=AWS_Secret_Access_Key, region_name=region)
# ec2 = boto3.resource('ec2',aws_access_key_id=AWS_Access_Key_ID, aws_secret_access_key=AWS_Secret_Access_Key, region_name=region)
except Exception as e:
print(e)
print("AWS CREDS ERROR, Exiting...")
exit()
def startInstances(instancesIds):
if(type(instancesIds) != list):
instancesIds = [instancesIds]
try:
response = ec2.start_instances(InstanceIds=instancesIds, DryRun=False)
print(response)
print("Instances Started")
except ClientError as e:
print(e)
print("Instances Failed to Start")
def stopInstances(instancesIds):
if(type(instancesIds) != list):
instancesIds = [instancesIds
]
try:
response = ec2.stop_instances(InstanceIds=instancesIds, DryRun=False)
print(response)
print("Instances Stopped")
except ClientError as e:
print(e)
print("Instances Failed to Stop")
def check():
for x in instances:
retry = 0
live = False
print("Checking Webiste " + instances[x])
while(retry < 5):
try:
r = requests.get(instances[x] ,verify=True)
if(r.status_code == 200):
live = True
break
except:
print("Not Live, retry time " + str(retry + 1))
print("Delaying request for " + str(DELAY_TIME) + " seconds...")
retry += 1
time.sleep(DELAY_TIME)
if(live):
print("Website is live")
# call function to start the ec2 instance
startInstances(x)
else:
# call function to stop the ec2 instance
print('Website is dead')
stopInstances(x)
print("")
def main():
check()
if __name__ == '__main__':
main()
https://docs.aws.amazon.com/lambda/latest/dg/python-handler.html You need to specify what is the name of the handler function, which is the function that AWS lambda will call. Then you need to implement that function in your Python script.
I had a similar problem recently. I was able to define a lambda handler function in my python code that solved the problem. Got the guidance from this post
in short, add this code (adjust naming conventions accordingly):
import botocore
import boto3
def lambda_handler(event, context):
s3 = boto3.resource('s3')
bucket = s3.Bucket('bucketname')
exists = True
try:
s3.meta.client.head_bucket(Bucket='bucketname')
except botocore.exceptions.ClientError as e:
# If a client error is thrown, then check that it was a 404 error.
# If it was a 404 error, then the bucket does not exist.
error_code = int(e.response['Error']['Code'])
if error_code == 404:
exists = False
I have the following Python 3.7 Lambda function and I want to delete AMIs that are older than 90 days but I want to exclude any AMIs that have the tag/value combo of 'amiarchive' / 'yes'. When I run this I get the error "list indices must be integers or slices, not str". I've done a bunch of research and I can't quite figure this out.
import collections
import sys
from datetime import datetime, timedelta, timezone
from botocore.exceptions import ClientError
region ='us-east-1'
aws_account_numbers = {"accountA":"xxxxxxxxxxxx"}
def lambda_handler(event, context):
delete_time = datetime.now() - timedelta(days=90)
for name, acctnum in aws_account_numbers.items():
roleArn = "arn:aws:iam::%s:role/EOTSS-Snapshot-Cleanup-90days" % acctnum
stsClient = boto3.client('sts')
sts_response = stsClient.assume_role(RoleArn=roleArn,RoleSessionName='AssumeCrossAccountRole', DurationSeconds=1800)
ec2 = boto3.resource(service_name='ec2',region_name=region,aws_access_key_id = sts_response['Credentials']['AccessKeyId'],
aws_secret_access_key = sts_response['Credentials']['SecretAccessKey'], aws_session_token = sts_response['Credentials']['SessionToken'])
ec = boto3.client('ec2', 'us-east-1')
images = ec2.images.filter(Owners=["self"])
tag=[{"Name" :"tag:amiarchive", "Values":[] }]
for image in images:
t = datetime.strptime(image.creation_date, "%Y-%m-%dT%H:%M:%S.%fZ")
try:
if delete_time > t and (tag['Value']) != yes:
print ("AMI %s deregistered in acct: %s" % (image.image_id, acctnum))
response = image.deregister()
except ClientError as e:
if e.response['Error']['Code'] == 'InvalidImage.InUse':
print("Image in use")
continue
else:
print("Unexpected error: %s" % e)
continue
return 'Execution Complete'```
You declare
tag=[{"Name" :"tag:amiarchive", "Values":[] }]
So tag is a list (array) that contains one item (which is an object). You need to use an integer index to access this, such as tag[0]. Then, once you have the object at tag[0] you can get its properties such as Name and Values. Your code calls tag['Value'] and that creates the error you see.
I ended up re-working the code with a co-worker. I changed to boto3.client rather than using boto3.client and added an additional for loop to check for the existence of the tag / value combination and skip the remaining steps if that condition is true. The following code does the trick.
import collections
import sys
from datetime import datetime, timedelta, timezone
from botocore.exceptions import ClientError
region ='us-east-1'
aws_account_numbers = {"AccountA":"xxxxxxxxxxxx"}
def lambda_handler(event, context):
delete_time = datetime.now() - timedelta(days=67)
for name, acctnum in aws_account_numbers.items():
roleArn = "arn:aws:iam::%s:role/EOTSS-Snapshot-Cleanup-90days" % acctnum
stsClient = boto3.client('sts')
sts_response = stsClient.assume_role(RoleArn=roleArn,RoleSessionName='AssumeCrossAccountRole', DurationSeconds=1800)
ec2 = boto3.client(service_name='ec2',region_name=region,aws_access_key_id = sts_response['Credentials']['AccessKeyId'],
aws_secret_access_key = sts_response['Credentials']['SecretAccessKey'], aws_session_token = sts_response['Credentials']['SessionToken'])
ec = boto3.client('ec2', 'us-east-1')
images2 = ec2.describe_images(Owners=['self'])
for image in images2['Images']:
if 'Tags' in image:
for tag in image['Tags']:
if tag['Key'] == 'amiarchive' and tag['Value'] == 'yes':
print("Skipping Image",image['ImageId'],"because its archived")
continue
t = datetime.strptime(image['CreationDate'], "%Y-%m-%dT%H:%M:%S.%fZ")
try:
if delete_time > t:
print ("AMI %s deregistered in acct: %s" % (image['ImageId'], acctnum))
#response = image.deregister()
except ClientError as e:
if e.response['Error']['Code'] == 'InvalidImage.InUse':
print("Image in use")
continue
else:
print("Unexpected error: %s" % e)
continue
return 'Execution Complete''''
I'm working on a Lambda function to delete AMIs (images) older than a set number of days. I'm comparing datetime.now with the image Creation_date. I can see that these values are returned in different formats.
datetime.now format - 2019-11-15 20:34:53.057320+00:00
image creation_date format - 2010-10-16T21::31:46.000Z
When I test this I get the error "'>' not supported between instances of 'datetime.datetime' and 'str'",
My code is below. I believe the issue is due to the different date formats.
from datetime import datetime, timedelta, timezone
import boto3
import collections
import sys
from botocore.exceptions import ClientError
region ='us-east-1'
aws_account_numbers = {"MassIT-Engineering-Sandbox":"xxxxxxxxx"}
def lambda_handler(event, context):
delete_time = datetime.now(tz=timezone.utc) - timedelta(days=320)
print (delete_time)
ec2 = boto3.resource('ec2', 'us-east-1')
images = ec2.images.filter(Owners=["self"])
for name, acctnum in aws_account_numbers.items():
roleArn = "arn:aws:iam::%s:role/EOTSS-Snapshot-Cleanup-120days" % acctnum
stsClient = boto3.client('sts')
sts_response = stsClient.assume_role(RoleArn=roleArn,RoleSessionName='AssumeCrossAccountRole', DurationSeconds=1800)
ec2 = boto3.resource(service_name='ec2',region_name=region,aws_access_key_id = sts_response['Credentials']['AccessKeyId'],
aws_secret_access_key = sts_response['Credentials']['SecretAccessKey'], aws_session_token = sts_response['Credentials']['SessionToken'])
for Image in images:
try:
if delete_time > image.creation_date:
Image.delete()
print('AMI with Id = {} is deleted '.format(image.image_id))
except ClientError as e:
if e.response['Error']['Code'] == 'InvalidSnapshot.InUse':
print("Snapshot in use")
continue
else:
print("Unexpected error: %s" % e)
continue
return 'Execution Complete'
You need to convert the image.creation_date from string to datetime.
Here how it is done:
t = datetime.datetime.strptime(image.creation_date, "%Y-%m-%dT%H:%M:%S.%fZ")
I'm trying to delete old AWS snapshots but I need to exclude any with a description value that starts with "Created by CreateImage".
I've tried variations of boto3.resource and boto3.client.
from datetime import datetime, timedelta, timezone
import boto3
client = boto3.client('ec2')
snapshots = client.snapshots.filter(Description!='Created by CreateImage')
def lambda_handler(event, context):
for snapshot in snapshots:
start_time = snapshot.start_time
delete_time = datetime.now(tz=timezone.utc) - timedelta(days=790)
if delete_time > start_time:
snapshot.delete()
print('Snapshot with Id = {} is deleted '.format(snapshot.snapshot_id))
Right now I have about 10 snapshots older than 790 days, 5 with a description that starts with "Created by CreateImage" and 5 that don't. In testing this out I want to delete those snapshots without that description.
the error I get is:
module initialization error: 'EC2' object has no attribute 'snapshots'
Here's a version that works.
Note the use of OwnerIds=['self'] which limits results to only snapshots created by your AWS account. Without this, it will return all publicly-available snapshots created by any AWS account.
from datetime import datetime, timedelta, timezone
import boto3
def lambda_handler(event, context):
delete_time = datetime.now(tz=timezone.utc) - timedelta(days=790)
ec2_resource = boto3.resource('ec2', region_name='ap-southeast-2')
snapshots = ec2_resource.snapshots.filter(OwnerIds=['self'])
for snapshot in snapshots:
if not snapshot.description.startswith('Created by CreateImage') and delete_time > snapshot.start_time:
snapshot.delete()
print('Snapshot with Id = {} is deleted '.format(snapshot.snapshot_id))
You need to use describe_snapshots and properly pass in filters.
Also, the results are going to be a dictionary, not references to the Snapshot class so you need to update the way you pull out the attributes and delete the snapshot.
Something like:
from datetime import datetime, timedelta, timezone
import boto3
client = boto3.client('ec2')
snapshots = client.describe_snapshots(Filters=[
{
'Name': 'description',
'Values': [
'Created by CreateImage',
]
},
])['Snapshots']
def lambda_handler(event, context):
for snapshot in snapshots:
start_time = snapshot['StartTime']
delete_time = datetime.now(tz=timezone.utc) - timedelta(days=790)
if delete_time > start_time:
client.delete_snapshot(SnapshotId=snapshot['SnapshotId'])
print('Snapshot with Id = {} is deleted '.format(snapshot['SnapshotId']))
Reference:
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/ec2.html#EC2.Client.describe_snapshots