Delete snapshots except those with specific description - python-3.x

I'm trying to delete old AWS snapshots but I need to exclude any with a description value that starts with "Created by CreateImage".
I've tried variations of boto3.resource and boto3.client.
from datetime import datetime, timedelta, timezone
import boto3
client = boto3.client('ec2')
snapshots = client.snapshots.filter(Description!='Created by CreateImage')
def lambda_handler(event, context):
for snapshot in snapshots:
start_time = snapshot.start_time
delete_time = datetime.now(tz=timezone.utc) - timedelta(days=790)
if delete_time > start_time:
snapshot.delete()
print('Snapshot with Id = {} is deleted '.format(snapshot.snapshot_id))
Right now I have about 10 snapshots older than 790 days, 5 with a description that starts with "Created by CreateImage" and 5 that don't. In testing this out I want to delete those snapshots without that description.
the error I get is:
module initialization error: 'EC2' object has no attribute 'snapshots'

Here's a version that works.
Note the use of OwnerIds=['self'] which limits results to only snapshots created by your AWS account. Without this, it will return all publicly-available snapshots created by any AWS account.
from datetime import datetime, timedelta, timezone
import boto3
def lambda_handler(event, context):
delete_time = datetime.now(tz=timezone.utc) - timedelta(days=790)
ec2_resource = boto3.resource('ec2', region_name='ap-southeast-2')
snapshots = ec2_resource.snapshots.filter(OwnerIds=['self'])
for snapshot in snapshots:
if not snapshot.description.startswith('Created by CreateImage') and delete_time > snapshot.start_time:
snapshot.delete()
print('Snapshot with Id = {} is deleted '.format(snapshot.snapshot_id))

You need to use describe_snapshots and properly pass in filters.
Also, the results are going to be a dictionary, not references to the Snapshot class so you need to update the way you pull out the attributes and delete the snapshot.
Something like:
from datetime import datetime, timedelta, timezone
import boto3
client = boto3.client('ec2')
snapshots = client.describe_snapshots(Filters=[
{
'Name': 'description',
'Values': [
'Created by CreateImage',
]
},
])['Snapshots']
def lambda_handler(event, context):
for snapshot in snapshots:
start_time = snapshot['StartTime']
delete_time = datetime.now(tz=timezone.utc) - timedelta(days=790)
if delete_time > start_time:
client.delete_snapshot(SnapshotId=snapshot['SnapshotId'])
print('Snapshot with Id = {} is deleted '.format(snapshot['SnapshotId']))
Reference:
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/ec2.html#EC2.Client.describe_snapshots

Related

Lambda Function to Stop RDS-Instances in all Regions Based on a Tag Filter Using Boto3

I have put together this code to stop rds-instances in all aws-regions. Currently, this code stops only instances in my current default region. Is there something that I am missing here?
Tag:
The instances are first filtered by a tag whose Key = ttl and Value = some date
Filter:
The filter returns only instances whose tag value is smaller than today.
import boto3
from datetime import datetime
current_date = datetime.today().strftime('%Y-%m-%d')
available_regions = boto3.Session().get_available_regions('rds')
def lambda_handler(event, context):
for region in available_regions:
rds = boto3.client('rds', region_name=region)
# get all instances
instances = rds.describe_db_instances()
stopInstances = []
# Locate all instances that are tagged for stop based on date.
for instance in instances["DBInstances"]:
# Example RDS Instance tags:
tags = rds.list_tags_for_resource(ResourceName=instance["DBInstanceArn"])
for tag in tags["TagList"]:
if tag['Key'] == 'ttl' or tag['Key'] == '' :
if tag['Value'] < current_date:
stopInstances.append(instance["DBInstanceIdentifier"])
rds.stop_db_instance(DBInstanceIdentifier=instance["DBInstanceIdentifier"])
pass
pass
# print if instances will stop.
if len(stopInstances) > 0:
print ("stopInstances")
else:
print ("No rds instances to shutdown.")
This code now works:
import boto3
from datetime import datetime
# Get current time in format yyyy-mm-dd
# define boto3 the connection
current_date = datetime.today().strftime('%Y-%m-%d')
# get all regions
available_regions = boto3.Session().get_available_regions('rds')
def lambda_handler(event, context):
for region in available_regions:
rds = boto3.client('rds', region_name=region)
# Define instances
instances = rds.describe_db_instances()
stopInstances = []
# Define and locate tags.
for instance in instances["DBInstances"]:
tags = rds.list_tags_for_resource(ResourceName=instance["DBInstanceArn"])
for tag in tags["TagList"]:
if tag['Key'] == 'ttl' or tag['Key'] == '' :
if tag['Value'] < current_date:
# Stop instances matching a tag pattern
stopInstances.append(instance["DBInstanceIdentifier"])
rds.stop_db_instance(DBInstanceIdentifier=instance["DBInstanceIdentifier"])
pass
pass
# print all instances that will stop.
if len(stopInstances) > 0:
print ("stopInstances")
else:
print ("No rds instances to shutdown.")

AWS Lambda Python script not iterating through 'aws_account_numbers' as expected

I have an AWS Lambda function written in Python 3.7, The function is set to delete snapshots older than 120 days. I'm running this from a management account and hitting 2 child accounts using 'aws_account_numbers'. The function runs successfully but in CloudWatch the logs show the same snapshots being deleted for each account. The log shows snapshot x y z is deleted for accountA but then it shows the same snapshots x y z being deleted for accountB. What is really happening is that all of these snapshots do not live in accountA or accountB but they actually live in the management account.
from datetime import datetime, timedelta, timezone
import boto3
import collections
import sys
from botocore.exceptions import ClientError
region ='us-east-1'
aws_account_numbers = {"accountA":"xxxxxxxxxxxx", "accountB":"xxxxxxxxxxxx"}
def lambda_handler(event, context):
delete_time = datetime.now(tz=timezone.utc) - timedelta(days=120)
ec2_resource = boto3.resource('ec2')
snapshots = ec2_resource.snapshots.filter(OwnerIds=['self'])
for name, acctnum in aws_account_numbers.items():
roleArn = "arn:aws:iam::%s:role/EOTSS-Snapshot-Cleanup-120days" % acctnum
stsClient = boto3.client('sts')
sts_response = stsClient.assume_role(RoleArn=roleArn,RoleSessionName='AssumeCrossAccountRole', DurationSeconds=1800)
ec2 = boto3.resource(service_name='ec2',region_name=region,aws_access_key_id = sts_response['Credentials']['AccessKeyId'],
aws_secret_access_key = sts_response['Credentials']['SecretAccessKey'], aws_session_token = sts_response['Credentials']['SessionToken'])
for snapshot in snapshots:
try:
if not snapshot.description.startswith('Snapshot created by task soe-backup') and delete_time > snapshot.start_time:
#snapshot.delete()
print ("Snapshot %s is deleted in acct: %s" % (snapshot, acctnum))
except ClientError as e:
if e.response['Error']['Code'] == 'InvalidSnapshot.InUse':
print ("Snapshot %s in use in acct: %s" % (snapshot, acctnum))
continue
else:
print("Unexpected error: %s" % e)
continue
return 'Execution Complete'

Issue comparing datetime.now and EC2 image creation_date

I'm working on a Lambda function to delete AMIs (images) older than a set number of days. I'm comparing datetime.now with the image Creation_date. I can see that these values are returned in different formats.
datetime.now format - 2019-11-15 20:34:53.057320+00:00
image creation_date format - 2010-10-16T21::31:46.000Z
When I test this I get the error "'>' not supported between instances of 'datetime.datetime' and 'str'",
My code is below. I believe the issue is due to the different date formats.
from datetime import datetime, timedelta, timezone
import boto3
import collections
import sys
from botocore.exceptions import ClientError
region ='us-east-1'
aws_account_numbers = {"MassIT-Engineering-Sandbox":"xxxxxxxxx"}
def lambda_handler(event, context):
delete_time = datetime.now(tz=timezone.utc) - timedelta(days=320)
print (delete_time)
ec2 = boto3.resource('ec2', 'us-east-1')
images = ec2.images.filter(Owners=["self"])
for name, acctnum in aws_account_numbers.items():
roleArn = "arn:aws:iam::%s:role/EOTSS-Snapshot-Cleanup-120days" % acctnum
stsClient = boto3.client('sts')
sts_response = stsClient.assume_role(RoleArn=roleArn,RoleSessionName='AssumeCrossAccountRole', DurationSeconds=1800)
ec2 = boto3.resource(service_name='ec2',region_name=region,aws_access_key_id = sts_response['Credentials']['AccessKeyId'],
aws_secret_access_key = sts_response['Credentials']['SecretAccessKey'], aws_session_token = sts_response['Credentials']['SessionToken'])
for Image in images:
try:
if delete_time > image.creation_date:
Image.delete()
print('AMI with Id = {} is deleted '.format(image.image_id))
except ClientError as e:
if e.response['Error']['Code'] == 'InvalidSnapshot.InUse':
print("Snapshot in use")
continue
else:
print("Unexpected error: %s" % e)
continue
return 'Execution Complete'
You need to convert the image.creation_date from string to datetime.
Here how it is done:
t = datetime.datetime.strptime(image.creation_date, "%Y-%m-%dT%H:%M:%S.%fZ")

how to download files from s3 bucket based on files modified date?

I want to download files from a particular s3 bucket based on files Last modified date.
I have researched on how to connect boto3 and there is plenty of code and documentation available for downloading the file without any conditions. I made a pseudo code
def download_file_s3(bucket_name,modified_date)
# connect to reseource s3
s3 = boto3.resource('s3',aws_access_key_id='demo', aws_secret_access_key='demo')
# connect to the desired bucket
my_bucket = s3.Bucket(bucket_name)
# Get files
for file in my_bucket.objects.all():
I want to complete this function, basically, passing a modified date the function returns the files in the s3 bucket for that particular modified date.
I have a Better solution or a function which could do this automatically. Just pass In the Bucket name and Download path name.
from boto3.session import Session
from datetime import date, timedelta
import boto3
import re
def Download_pdf_specifc_date_subfolder(bucket_name,download_path)
ACCESS_KEY = 'XYZ'
SECRET_KEY = 'ABC'
Bucket_name=bucket_name
# code to create a session
session = Session(aws_access_key_id=ACCESS_KEY,
aws_secret_access_key=SECRET_KEY)
s3 = session.resource('s3')
bucket = s3.Bucket(Bucket_name)
# code to get the yesterdays date
yesterday = date.today() - timedelta(days=1)
x=yesterday.strftime('20%y-%m-%d')
print(x)
#code to add the files to a list which needs to be downloaded
files_to_downloaded = []
#code to take all the files from s3 under a specific bucket
for fileObject in bucket.objects.all():
file_name = str(fileObject.key)
last_modified=str(fileObject.last_modified)
last_modified=last_modified.split()
if last_modified[0]==x:
# Enter the specific bucketname in the regex in place of Airports to filter only the particluar subfolder
if re.findall(r"Airports/[a-zA-Z]+", file_name):
files_to_downloaded.append(file_name)
# code to Download into a specific Folder
for fileObject in bucket.objects.all():
file_name = str(fileObject.key)
if file_name in files_to_downloaded:
print(file_name)
d_path=download_path + file_name
print(d_path)
bucket.download_file(file_name,d_path)
Download_pdf_specifc_date_subfolder(bucket_name,download_path)
Ultimately the function will give the results in the specific Folder with the files to be downloaded.
Here is my test code and it will print the last_modified datetime of objects which have the datetime after what I set.
import boto3
from datetime import datetime
from datetime import timezone
s3 = boto3.resource('s3')
response = s3.Bucket('<bucket name>').objects.all()
for item in response:
obj = s3.Object(item.bucket_name, item.key)
if obj.last_modified > datetime(2019, 8, 1, 0, 0, 0, tzinfo=timezone.utc):
print(obj.last_modified)
If you have a specific date, then
import boto3
from datetime import datetime, timezone
s3 = boto3.resource('s3')
response = s3.Bucket('<bucket name>').objects.all()
date = '20190827' # input('Insert Date as a form YYYYmmdd')
for item in response:
obj = s3.Object(item.bucket_name, item.key)
if obj.last_modified.strftime('%Y%m%d') == date:
print(obj.last_modified)
will give the results as follows.
2019-08-27 07:13:04+00:00
2019-08-27 07:13:36+00:00
2019-08-27 07:13:39+00:00
If edited this answer to download all files after a certain timestamp and then write the current time to a file for use in the next iteration. You can easily adapt this to only download files of a specific date, month, year, yesterday, etc.
import os
import boto3
import datetime
import pandas as pd
### Load AWS Key, Secret and Region
# ....
###
# Open file to read last download time and update file with current time
latesttime_file = "latest request.txt"
with open(latesttime_file, 'r') as f:
latest_download = pd.to_datetime(f.read(), utc=True)
with open(latesttime_file, 'w') as f:
f.write(str(datetime.datetime.utcnow()))
# Initialize S3-client
s3_client = boto3.client('s3',
region_name=AWS_REGION,
aws_access_key_id=AWS_KEY_ID,
aws_secret_access_key=AWS_SECRET)
def download_dir(prefix, local, bucket, timestamp, client=s3_client):
"""
params:
- prefix: pattern to match in s3
- local: local path to folder in which to place files
- bucket: s3 bucket with target contents
- client: initialized s3 client object
"""
keys = []
dirs = []
next_token = ''
base_kwargs = {
'Bucket':bucket,
'Prefix':prefix,
}
while next_token is not None:
kwargs = base_kwargs.copy()
if next_token != '':
kwargs.update({'ContinuationToken': next_token})
results = client.list_objects_v2(**kwargs)
contents = results.get('Contents')
for i in contents:
k = i.get('Key')
t = i.get('LastModified')
if k[-1] != '/':
if t > timestamp:
keys.append(k)
else:
dirs.append(k)
next_token = results.get('NextContinuationToken')
for d in dirs:
dest_pathname = os.path.join(local, d)
if not os.path.exists(os.path.dirname(dest_pathname)):
os.makedirs(os.path.dirname(dest_pathname))
for k in keys:
dest_pathname = os.path.join(local, k)
if not os.path.exists(os.path.dirname(dest_pathname)):
os.makedirs(os.path.dirname(dest_pathname))
client.download_file(bucket, k, dest_pathname)
download_dir(<prefix or ''>, <local folder to download to>, <bucketname>, latest_download)

How to import xlsx into dynamodb using python and boto3

Trying to use LinuxAcademy posting of how to import Excel data into DynamoDB but the code posting is two years old and does not work. Any tips or suggestions would be very helpful.
Sorry I'm new to stackoverflow.
I was trying to take an excel spreadsheet and convert it to json then upload to DynamoDB like the posting on LinuxAcademy. The instructions are old and they use three scripts to upload one file.
Here is the code I used to create a lambda AWS python function.
The only problem is that it reads in the excel file and converts it to json and the file is too big to ingest into DynamoDB before the 5 minute timeout. I will probably convert it to step functions but this worked for me.
import boto3
import os
import sys
import uuid
import pandas as pd
s3_client = boto3.client('s3')
bucket = "serverless-record-storage-lambda"
def upload_to_dynamodb(report):
df=pd.read_excel(report)
df.columns=["APPLICATION", "FORM_NUMBER", "FILE_DATE", "STATUS_DATE", "STATUS", "STATUS_CODE", "EXPIRATION_DATE", "ESTIMATED COST", "REVISED_COST", "EXISTING_USE", "EXISTING_UNITS", "PROPOSED_USE","PROPOSED_UNITS","PLANSETS", "15_DAY_HOLD?" , "EXISTING_STORIES", "PROPOSED_STORIES", "ASSESSOR_STORIES", "VOLUNTARY", "PAGES", "BLOCK", "LOT", "STREET_NUMBER", "STREET_NUMBER_SFX", "AVS_STREET_NAME", "AVS_STREET_SFX", "UNIT", "UNIT_SFX", "FIRST_NAME", "LAST_NAME", "CONTRACTORPHONE",
"COMPANY_NAME", "STREET_NUMBER", "STREET", "STREET_SUFFIX", "CITY", "STATE", "ZIP_CODE", "CONTACT_NAME", "CONTACT_PHONE", "DESCRIPTION" ]
# Clean-up the data, change column types to strings to be on safer side :)
df=df.replace({'-': '0'}, regex=True)
df=df.fillna(0)
for i in df.columns:
df[i] = df[i].astype(str)
# Convert dataframe to list of dictionaries (JSON) that can be consumed by any no-sql database
myl=df.T.to_dict().values()
# Connect to DynamoDB using boto
resource = boto3.resource('dynamodb', region_name='us-west-2')
# Connect to the DynamoDB table
table = resource.Table('permitdata')
# Load the JSON object created in the step 3 using put_item method
for permit in myl:
table.put_item(Item=permit)
def handler(event, context):
for record in event['Records']:
print(record)
bucket = record['s3']['bucket']['name']
print(bucket)
key = record['s3']['object']['key']
print(key)
download_path = '/tmp/{}{}'.format(uuid.uuid4(), key)
upload_path = '/tmp/resized-{}'.format(key)
s3_client.download_file(bucket, key, download_path)
upload_to_dynamodb(download_path)
def main():
handler(event, None)
if __name__ == "__main__":
main()

Resources