How can I delete via python all the s3 "delete marker" placeholder filtered by "LastModified " (for ex. just from yesterday)? - python-3.x

I modified some scripts to delete all the "delete marker" placeholders from some bucket in s3 but I never found/developed something where I can delete the "delete marker" filtered by datetime.
My scope is to create a script to run after a "delete_object_job" failure, so I can run a new script where set the datatime of the failure and delete all the "delete-marker" just from that datetime.
Actually from this code I can delete all the "data marker" from some buckets but without a datetime filter:
#!/usr/bin/env python
import json
from datetime import datetime
from boto3 import Session
BUCKET_NAME = "germano"
prefix = ''
session = Session(region_name='eu-south-1', profile_name='default')
bucket = session.client('s3')
MAX_KEYS = 10000
def get_bucket_versions(version_id, key):
return bucket.list_object_versions(Bucket=BUCKET_NAME,
MaxKeys=MAX_KEYS,
Prefix=prefix,
VersionIdMarker=version_id,
KeyMarker=key)
class DateTimeEncoder(json.JSONEncoder):
def default(self, o):
if isinstance(o, datetime):
return o.isoformat()
return json.JSONEncoder.default(self, o)
#ottengo gli attributes della classe creata DateTimeEncoder
#DateTimeEncoder = DateTimeEncoder()
#attributes_of_DateTimeEncoder = dir(DateTimeEncoder)
#print(attributes_of_DateTimeEncoder)
def objects_to_restore(versions):
return [
{
'VersionId': marker['VersionId'],
'Key': marker['Key']
} for marker in versions.get('DeleteMarkers') if marker['IsLatest']
]
def restore_s3_objects(version_markers, count):
markers_to_delete = objects_to_restore(version_markers)
print(f"Will restore {len(markers_to_delete)} items during request number: {count}")
if not markers_to_delete:
return 0
bucket.delete_objects(Bucket=BUCKET_NAME, Delete={'Objects': markers_to_delete})
return len(markers_to_delete)
obj_list = bucket.list_object_versions(Bucket=BUCKET_NAME,
MaxKeys=MAX_KEYS,
Prefix=prefix)
_next_version_id = obj_list.get('NextVersionIdMarker')
_next_key_marker = obj_list.get('NextKeyMarker')
counter = 1
total_restored = restore_s3_objects(obj_list, counter)
while _next_version_id and _next_key_marker:
counter += 1
another_list_of_versions = get_bucket_versions(_next_version_id, _next_key_marker)
_next_version_id = another_list_of_versions.get('NextVersionIdMarker')
_next_key_marker = another_list_of_versions.get('NextKeyMarker')
total_restored += restore_s3_objects(another_list_of_versions, counter)
print(f"Total Restored: {total_restored}")

i solved just modifing a bit the function "objects_to_restore":
def objects_to_restore(versions, last_modified_timestamp="2022-04-28 09:19:56.986445+00:00"):
print (versions.get('DeleteMarkers'))
#print (versions.get('Versions'))
return [
{
'VersionId': marker['VersionId'],
'Key': marker['Key'],
} for marker in versions.get('DeleteMarkers')
if marker['IsLatest']
if str(marker["LastModified"]) >= str(last_modified_timestamp)

Related

Boto3 get list of all ec2 instances with ebs volume id, size to an excel

Export AWS EC2 details to xlsx/csv using boto3 and python - This works but to gather EBS volumes, type and size attached to each ec2 instance and append to the same line in the excel is challenging for me. Below one, just appends the volume info in the next line. If I have a separate function and call it in "result.append", I could fetch only the first volume. If I return multiple values in the function like volume id, volume size, volume type - I could add all 3 of these values to the same cell in the excel, instead of a separate column for each. Please help. I'm obviously in learning phase.
volume_iterator = ec3.volumes.all()
for v in volume_iterator:
for a in v.attachments:
if a['InstanceId'] == each['InstanceId']:
result.append({
'volume.id': v.id,
'volume.size': v.size,
'volume.state': v.volume_type
})
Final output in CSV looks like below. All the volume related values are in the same column "volume.id". Volume info should be separated.
ImageId InstanceType InstanceId InstanceName volume.id volume.type volume.size
ami-042e828f5df03 t3.large i-07db6118eb51e <server_name> [{8, 'vol-0085fdebc7', 'gp3'}, {'vol-0d417698824e', 'gp3', 128}]
This works.
import boto3
import csv
import datetime
import logging
from os import environ
import collections
import time
import sys
### ENABLE The profilename below, while testing from local. Disable this and session line in 63, enable line 64 session before pushing to Lambda#######
profilename='<>'
aws_Acct='<>.csv'
volume_id_list=[]
result = []
regions = ['us-east-1', 'us-east-2', 'us-west-1', 'us-west-2']
#regions = ['us-east-1']
#Name Tag
def get_tag(each, tag_name):
if 'Tags' in each:
for tag in each['Tags']:
if tag['Key'] == tag_name:
return tag['Value']
return ''
#Volumes
def get_vol(each, ec2):
resultVol = {
"vol_id": "",
"vol_size": "",
"vol_type": ""
}
resp = ec2.describe_volumes(
Filters=[{'Name':'attachment.instance-id','Values':[each['InstanceId']]}]
)
for volume in (resp["Volumes"]):
resultVol['vol_id'] += (str(volume["VolumeId"]) + "\n")
resultVol['vol_size'] += (str(volume["Size"]) + "\n")
resultVol['vol_type'] += (str(volume["VolumeType"]) + "\n")
return resultVol
#Security Groups
def sec_gp(each, ec2):
resultSG = {
"sg_id": "",
"sg_name": ""
}
for sg in each['SecurityGroups']:
resultSG['sg_id'] += (str(sg["GroupId"]) + "\n")
resultSG['sg_name'] += (str(sg["GroupName"]) + "\n")
return resultSG
def lambda_handler(event, context):
try:
logging.basicConfig(level=logging.INFO)
logging.info('EC2 Inventory details')
for region in regions:
session = boto3.Session(profile_name=profilename, region_name=region)
#session = boto3.Session(region_name=region)
ec2 = session.client('ec2')
response = ec2.describe_instances()
for item in response["Reservations"]:
for each in item['Instances']:
volsss = get_vol(each, ec2)
sgss = sec_gp(each, ec2)
#print(sgss)
result.append({
'ImageId': each.get('ImageId', ''),
'InstanceType': each.get('InstanceType', ''),
'PublicIp': each.get('PublicIpAddress', ''),
'PrivateIp': each.get('PrivateIpAddress', ''),
'InstanceId': each.get('InstanceId', ''),
'SubnetId': each.get('SubnetId', ''),
'VpcId': each.get('VpcId', ''),
'InstanceName': get_tag(each, 'Name'),
'volume.size': volsss['vol_size'],
'volume.id': volsss['vol_id'],
'volume.type': volsss['vol_type'],
'DeleteOnTermination': each.get('DeleteOnTermination', ''),
'SGGroupName': sgss['sg_name'],
'SGGroupID': sgss['sg_id'],
'State': each['State']['Name'],
'Region': each['Placement']['AvailabilityZone']
})
# Write to csv file.
header = ['ImageId', 'InstanceType', 'InstanceId', 'InstanceName', 'PublicIp', 'PrivateIp', 'Region', 'State', 'volume.id', 'volume.size', 'volume.type', 'SubnetId', 'VpcId', 'SGGroupName', 'SGGroupID', 'DeleteOnTermination']
with open(aws_Acct, 'w') as file:
writer = csv.DictWriter(file, fieldnames=header)
writer.writeheader()
writer.writerows(result)
except Exception as e:
logging.error(
'EC2 inventory with uncaught exception: {}'.format(e)
)
if __name__ == '__main__':
lambda_handler(None, None)
Final output looks like:

Write file name based on return

I'm creating a boto3 script that scrapes and uploads our entire accounts Public Ips and NatGateway Ips to our S3 bucket. I'm stuck on writing files for both returns. I would ideally like to write two separate files while still using the same filename variable you see in main(). Right now I can get this to work with only one return(either nat_ips or public_ips)
import boto3
from datetime import datetime
from csv import writer
def get_ips():
# Uses STS to assume the role needed.
boto_sts=boto3.client('sts')
sts_response = boto_sts.assume_role(
RoleArn='arn:aws:iam::1234:role/foo',
RoleSessionName='Foo'
)
# Save the details from assumed role into vars
sts_credentials = sts_response["Credentials"]
session_id = sts_credentials["AccessKeyId"]
session_key = sts_credentials["SecretAccessKey"]
session_token = sts_credentials["SessionToken"]
# List and store all the regions
ec2_client=boto3.client('ec2',aws_access_key_id=session_id,aws_secret_access_key=session_key,aws_session_token=session_token,region_name='us-west-1')
all_regions=[region['RegionName'] for region in ec2_client.describe_regions()['Regions']]
nat_ips = []
public_ips = []
for region in all_regions:
max_results = 1000
next_token = ''
ec2_client=boto3.client('ec2',aws_access_key_id=session_id,aws_secret_access_key=session_key,aws_session_token=session_token,region_name=region)
session=boto3.Session(aws_access_key_id=session_id, aws_secret_access_key=session_key, aws_session_token=session_token, region_name=region)
while next_token or next_token == '':
response = ec2_client.describe_nat_gateways(MaxResults=max_results, NextToken=next_token)
filters = [{'Name':'tag:Name', 'Values':['*sgw-eip']}]
get_ips = ec2_client.describe_addresses(Filters=filters)
for gateway in response["NatGateways"]:
for address in gateway["NatGatewayAddresses"]:
nat_ips.append(address["PublicIp"]+'/32')
for eip_dict in get_ips['Addresses']:
public_ip_string = eip_dict['Tags'][0]['Value'] + ' : ' + eip_dict['PublicIp']
public_ips.append(public_ip_string)
next_token = response.get("NextToken", None)
return nat_ips, public_ips
def _s3_upload(filename):
s3 = boto3.resource('s3')
bucket = 'foo-bar'
object_name = 'foo/'
s3.meta.client.upload_file(Filename=filename,Bucket=bucket,Key=object_name+filename)
print(f'Uploading {filename} to {bucket}')
def write_list_to_file(filename, data):
lines_string = '\n'.join(str(x) for x in data)
with open(filename,'w') as output:
output.writelines(lines_string)
print(f'Writing file to {filename}')
if __name__ == "__main__":
date = datetime.now().strftime('%Y%m%d')
# Stuck here since I want to make it one variable
filename_nat_ips = f'natgateway_ips{date}.csv'
filename_sga_ips = f'sga_ips{date}.csv'
public_ips = get_ips()
nat_ips = get_ips()
print(filename)
write_list_to_file(filename, nat_ips)
_s3_upload(filename)
I see that you are already returning a tuple of public_ips and nat_ips from your get_ips() function. So in your main, you could collect them together as well.
You might try something like this:
if __name__ == "__main__":
date = datetime.now().strftime('%Y%m%d')
# Stuck here since I want to make it one variable
filename_nat_ips = f'natgateway_ips{date}.csv'
filename_sga_ips = f'sga_ips{date}.csv'
nat_ips, public_ips = get_ips()
write_list_to_file(filename_nat_ips, nat_ips)
write_list_to_file(filename_public_ips, public_ips)
_s3_upload(filename_nat_ips)
_s3_upload(filename_public_ips)
I was doing it right the first time. And was trying to make it more complicated.
if __name__ == "__main__":
date = datetime.now().strftime('%Y%m%d')
filename_nat_ips = f'natgateway_ips{date}.csv'
filename_sga_ips = f'sga_ips{date}.csv'
nat_ips, public_ips = get_ips()
print(filename_nat_ips)
print(filename_sga_ips)
write_list_to_file(filename_nat_ips, nat_ips)
write_list_to_file(filename_sga_ips, public_ips)
_s3_upload(filename_nat_ips)
_s3_upload(filename_sga_ips)

Python Dictionary using keys

group_by owners
My code:
def group_by_owners(files):
for key in files.keys():
if files[key] in files:
return files[key],key
files = {
'Input.txt': 'Randy',
'Code.py': 'Stan',
'Output.txt': 'Randy'
}
print(group_by_owners(files))
Expected output:
{'Randy': ['Input.txt','Output.txt'], 'Stan':['Code.py']}
def group_by_owners(files):
new_dic = dict() # Our new dict.
for key, val in files.items():
if val in new_dic: # If we have this name
new_dic[val].append(key) # Then just add the file
else:
new_dic[val] = [key] # Else create a new list with this file for this name
return new_dic
Output:
{'Randy': ['Input.txt', 'Output.txt'], 'Stan': ['Code.py']}

Use dictionary instead of list of dictionary to reduce program complexity

Trying to validate the consistency between DynamoDB tables, Used list of dictionary to store dynamodb table items, Which is taking longer time for execution.
New to python, any help to convert List of dictionary to dictionary please, To reduce my program complexity .
#!/usr/bin/python
import sys
import boto3
import argparse
import argparse
def table_consistency_check(table, column_name):
paginator = dynamoClient.get_paginator('scan')
modified_accounts = []
params = {
'TableName': table
}
page_iterator = paginator.paginate(**params)
for page in page_iterator:
for item in page['Items']:
account = item['account_name']['S']
license_key = item[column_name]['S']
credentials = {
'account_name': account,
column_name: license_key
}
modified_accounts.append(credentials)
return modified_accounts
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Find all accounts with different license key and account key hash')
parser.add_argument('-r', '--region', nargs='?', type=str, default='us-west-2')
try:
args = parser.parse_args()
except:
exit_code = int(str(sys.exc_info()[1]))
accounts_table = 'accounts_table'
Credentail_table = 'credential_table'
dynamoClient = boto3.client('dynamodb', region_name=args.region)
account1 = table_consistency_check(accounts_table, 'license_key')
account2 = table_consistency_check(Credentail_table, 'access_key_hash')
output = []
for acct_item in account1:
for creds_item in account2:
if acct_item['account_name'] == creds_item['account_name']:
if creds_item['access_key_hash'].startswith('ORIGINAL_KEY_'):
val = creds_item['access_key_hash']
length = len('ORIGINAL_KEY_')
str = val[length:]
if acct_item['license_key'] != str:
output.append(creds_item['account_name'])
print('Duplicate record found')
print('Account Name : ' + acct_item['account_name'] + ', License Key : ' + acct_item[
'license_key'] + ', Access Key Hash : ' + creds_item['access_key_hash'])
if not output:
print('the tables are consistent, No duplicate item found')

Dictionary with functions versus dictionary with class

I'm creating a game where i have the data imported from a database, but i have a little problem...
Currently i get a copy of the data as a dictionary, which i need to pass as argument to my GUI, however i also need to process some data, like in this example:
I get the data as a dict (I've created the UseDatabase context manager and is working):
def get_user(name: str, passwd: str):
user = {}
user['name'] = name
user['passwd'] = passwd
with UseDatabase() as cursor:
_SQL = "SELECT id, cash, ruby FROM user WHERE name='Admin' AND password='adminpass'"
cursor.execute(_SQL)
res = cursor.fetchall()
if res:
user['id'] = res[0][0]
user['cash'] = res[0][1]
user['ruby'] = res[0][2]
return user
return res
.
.
.
def get_activities():
with UseDatabase() as cursor:
_SQL = "SELECT * FROM activities WHERE user_id='2'"
cursor.execute(_SQL)
res = cursor.fetchall()
if res:
ids = [i[0] for i in res]
activities = {}
for i in res:
activities[i[0]] = {'title':i[1],'unlock':i[2],'usr_progress':i[3]}
return (ids, activities)
return res
Need it as a dict in my GUI ("content" argument):
class SideBar:
def __init__(self, screen: 'pygame.display.set_mode()', box_width: int, box_height: int, content: dict, font: 'font = pygame.font.Font()'):
#content dict: {id: {'title':'','unlock':'','usr_progress':''},...}
self.box_width = box_width
self.box_height = box_height
self.box_per_screen = screen.get_height() // box_height
self.content = content
self.current_box = 1
self.screen = screen
self.font = font
self.generate_bar()
def generate_bar (self):
active = [i for i in self.content.keys() if i in range(self.current_box, self.current_box+self.box_per_screen)]
for i in range(self.box_per_screen):
gfxdraw.box(self.screen,pygame.Rect((0,i*self.box_height),(self.screen.get_width()/3,self.screen.get_height()/3)),(249,0,0,170))
self.screen.blit(self.font.render(str(active[i]) + ' - ' + self.content[active[i]]['title'], True, (255,255,255)),(10,i*self.box_height+4))
for i in range(self.box_per_screen):
pygame.draw.rect(self.screen,(50,0,0),pygame.Rect((0,i*self.box_height),(self.screen.get_width()/3,self.screen.get_height()/3)),2)
But still need to make some changes in the data:
def unlock_act(act_id):
if user['cash'] >= activities[act_id]['unlock'] and activities[act_id]['usr_progress'] == 0:
user['cash'] -= activities[act_id]['unlock']
activities[act_id]['usr_progress'] = 1
So the question is: in this situation should i keep a copy of the data as dict, and create a class with it plus the methods i need or use functions to edit the data inside the dict?

Resources