Issue comparing datetime.now and EC2 image creation_date - python-3.x

I'm working on a Lambda function to delete AMIs (images) older than a set number of days. I'm comparing datetime.now with the image Creation_date. I can see that these values are returned in different formats.
datetime.now format - 2019-11-15 20:34:53.057320+00:00
image creation_date format - 2010-10-16T21::31:46.000Z
When I test this I get the error "'>' not supported between instances of 'datetime.datetime' and 'str'",
My code is below. I believe the issue is due to the different date formats.
from datetime import datetime, timedelta, timezone
import boto3
import collections
import sys
from botocore.exceptions import ClientError
region ='us-east-1'
aws_account_numbers = {"MassIT-Engineering-Sandbox":"xxxxxxxxx"}
def lambda_handler(event, context):
delete_time = datetime.now(tz=timezone.utc) - timedelta(days=320)
print (delete_time)
ec2 = boto3.resource('ec2', 'us-east-1')
images = ec2.images.filter(Owners=["self"])
for name, acctnum in aws_account_numbers.items():
roleArn = "arn:aws:iam::%s:role/EOTSS-Snapshot-Cleanup-120days" % acctnum
stsClient = boto3.client('sts')
sts_response = stsClient.assume_role(RoleArn=roleArn,RoleSessionName='AssumeCrossAccountRole', DurationSeconds=1800)
ec2 = boto3.resource(service_name='ec2',region_name=region,aws_access_key_id = sts_response['Credentials']['AccessKeyId'],
aws_secret_access_key = sts_response['Credentials']['SecretAccessKey'], aws_session_token = sts_response['Credentials']['SessionToken'])
for Image in images:
try:
if delete_time > image.creation_date:
Image.delete()
print('AMI with Id = {} is deleted '.format(image.image_id))
except ClientError as e:
if e.response['Error']['Code'] == 'InvalidSnapshot.InUse':
print("Snapshot in use")
continue
else:
print("Unexpected error: %s" % e)
continue
return 'Execution Complete'

You need to convert the image.creation_date from string to datetime.
Here how it is done:
t = datetime.datetime.strptime(image.creation_date, "%Y-%m-%dT%H:%M:%S.%fZ")

Related

How do I pass AMI tag values in lambda function written in python 3.7

I have the following Python 3.7 Lambda function and I want to delete AMIs that are older than 90 days but I want to exclude any AMIs that have the tag/value combo of 'amiarchive' / 'yes'. When I run this I get the error "list indices must be integers or slices, not str". I've done a bunch of research and I can't quite figure this out.
import collections
import sys
from datetime import datetime, timedelta, timezone
from botocore.exceptions import ClientError
region ='us-east-1'
aws_account_numbers = {"accountA":"xxxxxxxxxxxx"}
def lambda_handler(event, context):
delete_time = datetime.now() - timedelta(days=90)
for name, acctnum in aws_account_numbers.items():
roleArn = "arn:aws:iam::%s:role/EOTSS-Snapshot-Cleanup-90days" % acctnum
stsClient = boto3.client('sts')
sts_response = stsClient.assume_role(RoleArn=roleArn,RoleSessionName='AssumeCrossAccountRole', DurationSeconds=1800)
ec2 = boto3.resource(service_name='ec2',region_name=region,aws_access_key_id = sts_response['Credentials']['AccessKeyId'],
aws_secret_access_key = sts_response['Credentials']['SecretAccessKey'], aws_session_token = sts_response['Credentials']['SessionToken'])
ec = boto3.client('ec2', 'us-east-1')
images = ec2.images.filter(Owners=["self"])
tag=[{"Name" :"tag:amiarchive", "Values":[] }]
for image in images:
t = datetime.strptime(image.creation_date, "%Y-%m-%dT%H:%M:%S.%fZ")
try:
if delete_time > t and (tag['Value']) != yes:
print ("AMI %s deregistered in acct: %s" % (image.image_id, acctnum))
response = image.deregister()
except ClientError as e:
if e.response['Error']['Code'] == 'InvalidImage.InUse':
print("Image in use")
continue
else:
print("Unexpected error: %s" % e)
continue
return 'Execution Complete'```
You declare
tag=[{"Name" :"tag:amiarchive", "Values":[] }]
So tag is a list (array) that contains one item (which is an object). You need to use an integer index to access this, such as tag[0]. Then, once you have the object at tag[0] you can get its properties such as Name and Values. Your code calls tag['Value'] and that creates the error you see.
I ended up re-working the code with a co-worker. I changed to boto3.client rather than using boto3.client and added an additional for loop to check for the existence of the tag / value combination and skip the remaining steps if that condition is true. The following code does the trick.
import collections
import sys
from datetime import datetime, timedelta, timezone
from botocore.exceptions import ClientError
region ='us-east-1'
aws_account_numbers = {"AccountA":"xxxxxxxxxxxx"}
def lambda_handler(event, context):
delete_time = datetime.now() - timedelta(days=67)
for name, acctnum in aws_account_numbers.items():
roleArn = "arn:aws:iam::%s:role/EOTSS-Snapshot-Cleanup-90days" % acctnum
stsClient = boto3.client('sts')
sts_response = stsClient.assume_role(RoleArn=roleArn,RoleSessionName='AssumeCrossAccountRole', DurationSeconds=1800)
ec2 = boto3.client(service_name='ec2',region_name=region,aws_access_key_id = sts_response['Credentials']['AccessKeyId'],
aws_secret_access_key = sts_response['Credentials']['SecretAccessKey'], aws_session_token = sts_response['Credentials']['SessionToken'])
ec = boto3.client('ec2', 'us-east-1')
images2 = ec2.describe_images(Owners=['self'])
for image in images2['Images']:
if 'Tags' in image:
for tag in image['Tags']:
if tag['Key'] == 'amiarchive' and tag['Value'] == 'yes':
print("Skipping Image",image['ImageId'],"because its archived")
continue
t = datetime.strptime(image['CreationDate'], "%Y-%m-%dT%H:%M:%S.%fZ")
try:
if delete_time > t:
print ("AMI %s deregistered in acct: %s" % (image['ImageId'], acctnum))
#response = image.deregister()
except ClientError as e:
if e.response['Error']['Code'] == 'InvalidImage.InUse':
print("Image in use")
continue
else:
print("Unexpected error: %s" % e)
continue
return 'Execution Complete''''

AWS Lambda Python script not iterating through 'aws_account_numbers' as expected

I have an AWS Lambda function written in Python 3.7, The function is set to delete snapshots older than 120 days. I'm running this from a management account and hitting 2 child accounts using 'aws_account_numbers'. The function runs successfully but in CloudWatch the logs show the same snapshots being deleted for each account. The log shows snapshot x y z is deleted for accountA but then it shows the same snapshots x y z being deleted for accountB. What is really happening is that all of these snapshots do not live in accountA or accountB but they actually live in the management account.
from datetime import datetime, timedelta, timezone
import boto3
import collections
import sys
from botocore.exceptions import ClientError
region ='us-east-1'
aws_account_numbers = {"accountA":"xxxxxxxxxxxx", "accountB":"xxxxxxxxxxxx"}
def lambda_handler(event, context):
delete_time = datetime.now(tz=timezone.utc) - timedelta(days=120)
ec2_resource = boto3.resource('ec2')
snapshots = ec2_resource.snapshots.filter(OwnerIds=['self'])
for name, acctnum in aws_account_numbers.items():
roleArn = "arn:aws:iam::%s:role/EOTSS-Snapshot-Cleanup-120days" % acctnum
stsClient = boto3.client('sts')
sts_response = stsClient.assume_role(RoleArn=roleArn,RoleSessionName='AssumeCrossAccountRole', DurationSeconds=1800)
ec2 = boto3.resource(service_name='ec2',region_name=region,aws_access_key_id = sts_response['Credentials']['AccessKeyId'],
aws_secret_access_key = sts_response['Credentials']['SecretAccessKey'], aws_session_token = sts_response['Credentials']['SessionToken'])
for snapshot in snapshots:
try:
if not snapshot.description.startswith('Snapshot created by task soe-backup') and delete_time > snapshot.start_time:
#snapshot.delete()
print ("Snapshot %s is deleted in acct: %s" % (snapshot, acctnum))
except ClientError as e:
if e.response['Error']['Code'] == 'InvalidSnapshot.InUse':
print ("Snapshot %s in use in acct: %s" % (snapshot, acctnum))
continue
else:
print("Unexpected error: %s" % e)
continue
return 'Execution Complete'

how to download files from s3 bucket based on files modified date?

I want to download files from a particular s3 bucket based on files Last modified date.
I have researched on how to connect boto3 and there is plenty of code and documentation available for downloading the file without any conditions. I made a pseudo code
def download_file_s3(bucket_name,modified_date)
# connect to reseource s3
s3 = boto3.resource('s3',aws_access_key_id='demo', aws_secret_access_key='demo')
# connect to the desired bucket
my_bucket = s3.Bucket(bucket_name)
# Get files
for file in my_bucket.objects.all():
I want to complete this function, basically, passing a modified date the function returns the files in the s3 bucket for that particular modified date.
I have a Better solution or a function which could do this automatically. Just pass In the Bucket name and Download path name.
from boto3.session import Session
from datetime import date, timedelta
import boto3
import re
def Download_pdf_specifc_date_subfolder(bucket_name,download_path)
ACCESS_KEY = 'XYZ'
SECRET_KEY = 'ABC'
Bucket_name=bucket_name
# code to create a session
session = Session(aws_access_key_id=ACCESS_KEY,
aws_secret_access_key=SECRET_KEY)
s3 = session.resource('s3')
bucket = s3.Bucket(Bucket_name)
# code to get the yesterdays date
yesterday = date.today() - timedelta(days=1)
x=yesterday.strftime('20%y-%m-%d')
print(x)
#code to add the files to a list which needs to be downloaded
files_to_downloaded = []
#code to take all the files from s3 under a specific bucket
for fileObject in bucket.objects.all():
file_name = str(fileObject.key)
last_modified=str(fileObject.last_modified)
last_modified=last_modified.split()
if last_modified[0]==x:
# Enter the specific bucketname in the regex in place of Airports to filter only the particluar subfolder
if re.findall(r"Airports/[a-zA-Z]+", file_name):
files_to_downloaded.append(file_name)
# code to Download into a specific Folder
for fileObject in bucket.objects.all():
file_name = str(fileObject.key)
if file_name in files_to_downloaded:
print(file_name)
d_path=download_path + file_name
print(d_path)
bucket.download_file(file_name,d_path)
Download_pdf_specifc_date_subfolder(bucket_name,download_path)
Ultimately the function will give the results in the specific Folder with the files to be downloaded.
Here is my test code and it will print the last_modified datetime of objects which have the datetime after what I set.
import boto3
from datetime import datetime
from datetime import timezone
s3 = boto3.resource('s3')
response = s3.Bucket('<bucket name>').objects.all()
for item in response:
obj = s3.Object(item.bucket_name, item.key)
if obj.last_modified > datetime(2019, 8, 1, 0, 0, 0, tzinfo=timezone.utc):
print(obj.last_modified)
If you have a specific date, then
import boto3
from datetime import datetime, timezone
s3 = boto3.resource('s3')
response = s3.Bucket('<bucket name>').objects.all()
date = '20190827' # input('Insert Date as a form YYYYmmdd')
for item in response:
obj = s3.Object(item.bucket_name, item.key)
if obj.last_modified.strftime('%Y%m%d') == date:
print(obj.last_modified)
will give the results as follows.
2019-08-27 07:13:04+00:00
2019-08-27 07:13:36+00:00
2019-08-27 07:13:39+00:00
If edited this answer to download all files after a certain timestamp and then write the current time to a file for use in the next iteration. You can easily adapt this to only download files of a specific date, month, year, yesterday, etc.
import os
import boto3
import datetime
import pandas as pd
### Load AWS Key, Secret and Region
# ....
###
# Open file to read last download time and update file with current time
latesttime_file = "latest request.txt"
with open(latesttime_file, 'r') as f:
latest_download = pd.to_datetime(f.read(), utc=True)
with open(latesttime_file, 'w') as f:
f.write(str(datetime.datetime.utcnow()))
# Initialize S3-client
s3_client = boto3.client('s3',
region_name=AWS_REGION,
aws_access_key_id=AWS_KEY_ID,
aws_secret_access_key=AWS_SECRET)
def download_dir(prefix, local, bucket, timestamp, client=s3_client):
"""
params:
- prefix: pattern to match in s3
- local: local path to folder in which to place files
- bucket: s3 bucket with target contents
- client: initialized s3 client object
"""
keys = []
dirs = []
next_token = ''
base_kwargs = {
'Bucket':bucket,
'Prefix':prefix,
}
while next_token is not None:
kwargs = base_kwargs.copy()
if next_token != '':
kwargs.update({'ContinuationToken': next_token})
results = client.list_objects_v2(**kwargs)
contents = results.get('Contents')
for i in contents:
k = i.get('Key')
t = i.get('LastModified')
if k[-1] != '/':
if t > timestamp:
keys.append(k)
else:
dirs.append(k)
next_token = results.get('NextContinuationToken')
for d in dirs:
dest_pathname = os.path.join(local, d)
if not os.path.exists(os.path.dirname(dest_pathname)):
os.makedirs(os.path.dirname(dest_pathname))
for k in keys:
dest_pathname = os.path.join(local, k)
if not os.path.exists(os.path.dirname(dest_pathname)):
os.makedirs(os.path.dirname(dest_pathname))
client.download_file(bucket, k, dest_pathname)
download_dir(<prefix or ''>, <local folder to download to>, <bucketname>, latest_download)

Delete snapshots except those with specific description

I'm trying to delete old AWS snapshots but I need to exclude any with a description value that starts with "Created by CreateImage".
I've tried variations of boto3.resource and boto3.client.
from datetime import datetime, timedelta, timezone
import boto3
client = boto3.client('ec2')
snapshots = client.snapshots.filter(Description!='Created by CreateImage')
def lambda_handler(event, context):
for snapshot in snapshots:
start_time = snapshot.start_time
delete_time = datetime.now(tz=timezone.utc) - timedelta(days=790)
if delete_time > start_time:
snapshot.delete()
print('Snapshot with Id = {} is deleted '.format(snapshot.snapshot_id))
Right now I have about 10 snapshots older than 790 days, 5 with a description that starts with "Created by CreateImage" and 5 that don't. In testing this out I want to delete those snapshots without that description.
the error I get is:
module initialization error: 'EC2' object has no attribute 'snapshots'
Here's a version that works.
Note the use of OwnerIds=['self'] which limits results to only snapshots created by your AWS account. Without this, it will return all publicly-available snapshots created by any AWS account.
from datetime import datetime, timedelta, timezone
import boto3
def lambda_handler(event, context):
delete_time = datetime.now(tz=timezone.utc) - timedelta(days=790)
ec2_resource = boto3.resource('ec2', region_name='ap-southeast-2')
snapshots = ec2_resource.snapshots.filter(OwnerIds=['self'])
for snapshot in snapshots:
if not snapshot.description.startswith('Created by CreateImage') and delete_time > snapshot.start_time:
snapshot.delete()
print('Snapshot with Id = {} is deleted '.format(snapshot.snapshot_id))
You need to use describe_snapshots and properly pass in filters.
Also, the results are going to be a dictionary, not references to the Snapshot class so you need to update the way you pull out the attributes and delete the snapshot.
Something like:
from datetime import datetime, timedelta, timezone
import boto3
client = boto3.client('ec2')
snapshots = client.describe_snapshots(Filters=[
{
'Name': 'description',
'Values': [
'Created by CreateImage',
]
},
])['Snapshots']
def lambda_handler(event, context):
for snapshot in snapshots:
start_time = snapshot['StartTime']
delete_time = datetime.now(tz=timezone.utc) - timedelta(days=790)
if delete_time > start_time:
client.delete_snapshot(SnapshotId=snapshot['SnapshotId'])
print('Snapshot with Id = {} is deleted '.format(snapshot['SnapshotId']))
Reference:
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/ec2.html#EC2.Client.describe_snapshots

Boto3 script to query S3 bucket file dates is slow

I've written a simple script to find the latest file in S3 buckets. It works correctly but is extremely slow. Obviously it has a lot of files to check but if I use something like S3 Browser the file information appears almost immediately. Have I done something wrong or is this just a limitation of bobo3?
#! /usr/bin/python3
import argparse
import boto3
from datetime import datetime
from datetime import timezone
def build_argparser():
parser = argparse.ArgumentParser(description='List S3 buckets by file date.')
parser.add_argument('-p', '--profile', help='Profile to use')
return parser
if __name__ == "__main__":
parser = build_argparser()
args = parser.parse_args()
if args.profile == None:
s3 = boto3.resource('s3')
else:
profile = boto3.session.Session(profile_name=args.profile)
s3 = profile.resource('s3')
for bucket in s3.buckets.all():
print(bucket.name)
latest_key = ""
latest_datetime = datetime
for object in bucket.objects.all():
#print('\t' + str(object.key) + ': ' + str(object.last_modified))
if latest_datetime == datetime or latest_datetime < object.last_modified:
latest_key = object.key
latest_datetime = object.last_modified
print('\t' + str(latest_key) + ': ' + str(latest_datetime))

Resources