Write file name based on return - python-3.x

I'm creating a boto3 script that scrapes and uploads our entire accounts Public Ips and NatGateway Ips to our S3 bucket. I'm stuck on writing files for both returns. I would ideally like to write two separate files while still using the same filename variable you see in main(). Right now I can get this to work with only one return(either nat_ips or public_ips)
import boto3
from datetime import datetime
from csv import writer
def get_ips():
# Uses STS to assume the role needed.
boto_sts=boto3.client('sts')
sts_response = boto_sts.assume_role(
RoleArn='arn:aws:iam::1234:role/foo',
RoleSessionName='Foo'
)
# Save the details from assumed role into vars
sts_credentials = sts_response["Credentials"]
session_id = sts_credentials["AccessKeyId"]
session_key = sts_credentials["SecretAccessKey"]
session_token = sts_credentials["SessionToken"]
# List and store all the regions
ec2_client=boto3.client('ec2',aws_access_key_id=session_id,aws_secret_access_key=session_key,aws_session_token=session_token,region_name='us-west-1')
all_regions=[region['RegionName'] for region in ec2_client.describe_regions()['Regions']]
nat_ips = []
public_ips = []
for region in all_regions:
max_results = 1000
next_token = ''
ec2_client=boto3.client('ec2',aws_access_key_id=session_id,aws_secret_access_key=session_key,aws_session_token=session_token,region_name=region)
session=boto3.Session(aws_access_key_id=session_id, aws_secret_access_key=session_key, aws_session_token=session_token, region_name=region)
while next_token or next_token == '':
response = ec2_client.describe_nat_gateways(MaxResults=max_results, NextToken=next_token)
filters = [{'Name':'tag:Name', 'Values':['*sgw-eip']}]
get_ips = ec2_client.describe_addresses(Filters=filters)
for gateway in response["NatGateways"]:
for address in gateway["NatGatewayAddresses"]:
nat_ips.append(address["PublicIp"]+'/32')
for eip_dict in get_ips['Addresses']:
public_ip_string = eip_dict['Tags'][0]['Value'] + ' : ' + eip_dict['PublicIp']
public_ips.append(public_ip_string)
next_token = response.get("NextToken", None)
return nat_ips, public_ips
def _s3_upload(filename):
s3 = boto3.resource('s3')
bucket = 'foo-bar'
object_name = 'foo/'
s3.meta.client.upload_file(Filename=filename,Bucket=bucket,Key=object_name+filename)
print(f'Uploading {filename} to {bucket}')
def write_list_to_file(filename, data):
lines_string = '\n'.join(str(x) for x in data)
with open(filename,'w') as output:
output.writelines(lines_string)
print(f'Writing file to {filename}')
if __name__ == "__main__":
date = datetime.now().strftime('%Y%m%d')
# Stuck here since I want to make it one variable
filename_nat_ips = f'natgateway_ips{date}.csv'
filename_sga_ips = f'sga_ips{date}.csv'
public_ips = get_ips()
nat_ips = get_ips()
print(filename)
write_list_to_file(filename, nat_ips)
_s3_upload(filename)

I see that you are already returning a tuple of public_ips and nat_ips from your get_ips() function. So in your main, you could collect them together as well.
You might try something like this:
if __name__ == "__main__":
date = datetime.now().strftime('%Y%m%d')
# Stuck here since I want to make it one variable
filename_nat_ips = f'natgateway_ips{date}.csv'
filename_sga_ips = f'sga_ips{date}.csv'
nat_ips, public_ips = get_ips()
write_list_to_file(filename_nat_ips, nat_ips)
write_list_to_file(filename_public_ips, public_ips)
_s3_upload(filename_nat_ips)
_s3_upload(filename_public_ips)

I was doing it right the first time. And was trying to make it more complicated.
if __name__ == "__main__":
date = datetime.now().strftime('%Y%m%d')
filename_nat_ips = f'natgateway_ips{date}.csv'
filename_sga_ips = f'sga_ips{date}.csv'
nat_ips, public_ips = get_ips()
print(filename_nat_ips)
print(filename_sga_ips)
write_list_to_file(filename_nat_ips, nat_ips)
write_list_to_file(filename_sga_ips, public_ips)
_s3_upload(filename_nat_ips)
_s3_upload(filename_sga_ips)

Related

How can I delete via python all the s3 "delete marker" placeholder filtered by "LastModified " (for ex. just from yesterday)?

I modified some scripts to delete all the "delete marker" placeholders from some bucket in s3 but I never found/developed something where I can delete the "delete marker" filtered by datetime.
My scope is to create a script to run after a "delete_object_job" failure, so I can run a new script where set the datatime of the failure and delete all the "delete-marker" just from that datetime.
Actually from this code I can delete all the "data marker" from some buckets but without a datetime filter:
#!/usr/bin/env python
import json
from datetime import datetime
from boto3 import Session
BUCKET_NAME = "germano"
prefix = ''
session = Session(region_name='eu-south-1', profile_name='default')
bucket = session.client('s3')
MAX_KEYS = 10000
def get_bucket_versions(version_id, key):
return bucket.list_object_versions(Bucket=BUCKET_NAME,
MaxKeys=MAX_KEYS,
Prefix=prefix,
VersionIdMarker=version_id,
KeyMarker=key)
class DateTimeEncoder(json.JSONEncoder):
def default(self, o):
if isinstance(o, datetime):
return o.isoformat()
return json.JSONEncoder.default(self, o)
#ottengo gli attributes della classe creata DateTimeEncoder
#DateTimeEncoder = DateTimeEncoder()
#attributes_of_DateTimeEncoder = dir(DateTimeEncoder)
#print(attributes_of_DateTimeEncoder)
def objects_to_restore(versions):
return [
{
'VersionId': marker['VersionId'],
'Key': marker['Key']
} for marker in versions.get('DeleteMarkers') if marker['IsLatest']
]
def restore_s3_objects(version_markers, count):
markers_to_delete = objects_to_restore(version_markers)
print(f"Will restore {len(markers_to_delete)} items during request number: {count}")
if not markers_to_delete:
return 0
bucket.delete_objects(Bucket=BUCKET_NAME, Delete={'Objects': markers_to_delete})
return len(markers_to_delete)
obj_list = bucket.list_object_versions(Bucket=BUCKET_NAME,
MaxKeys=MAX_KEYS,
Prefix=prefix)
_next_version_id = obj_list.get('NextVersionIdMarker')
_next_key_marker = obj_list.get('NextKeyMarker')
counter = 1
total_restored = restore_s3_objects(obj_list, counter)
while _next_version_id and _next_key_marker:
counter += 1
another_list_of_versions = get_bucket_versions(_next_version_id, _next_key_marker)
_next_version_id = another_list_of_versions.get('NextVersionIdMarker')
_next_key_marker = another_list_of_versions.get('NextKeyMarker')
total_restored += restore_s3_objects(another_list_of_versions, counter)
print(f"Total Restored: {total_restored}")
i solved just modifing a bit the function "objects_to_restore":
def objects_to_restore(versions, last_modified_timestamp="2022-04-28 09:19:56.986445+00:00"):
print (versions.get('DeleteMarkers'))
#print (versions.get('Versions'))
return [
{
'VersionId': marker['VersionId'],
'Key': marker['Key'],
} for marker in versions.get('DeleteMarkers')
if marker['IsLatest']
if str(marker["LastModified"]) >= str(last_modified_timestamp)

UnboundLocalError: local variable 'date_pool_async_results' referenced before assignment

The following returns a list of the desired payload in a PDF for a single site:
import multiprocessing as mp
def process_dates(dates, serviceAreaId, serviceAreaName, siteId, siteName):
model_results = []
# for loop to process dates
return model_results
def process_service_area(site):
pdf_results = []
siteId = site["id"]
siteName = site["name"]
siteTimeZone = site["time_zone"]
service_area_response = requests.request("GET",f"{lrs_url}/serviceAreas/active?siteId={siteId}", headers={},data={})
if service_area_response.status_code != 200:
sendMessageToSlack(f"Failed to retrieve service areas for site {siteId}.")
return pdf_results
service_area_json_response = json.loads(service_area_response.content)
tz = timezone(siteTimeZone)
startdate = datetime.now(tz)
dates = pd.date_range(start=startdate, periods=7).to_pydatetime().tolist()
date_pool = mp.Pool(mp.cpu_count())
for service_area in service_area_json_response:
serviceAreaName = service_area["name"]
serviceAreaId = service_area["id"]
active = service_area["active"]
#Run comparison
date_pool_async_results = date_pool.apply_async(process_dates, args = (dates, serviceAreaId, serviceAreaName, siteId, siteName))
date_pool.close()
date_pool.join()
for r in date_pool_async_results.get():
pdf_results.append(r)
return pdf_results
def process_all_sites():
sites_response = requests.request("GET",f"{lrs_url}/sites/active", headers={}, data={})
sites_json_response = json.loads(sites_response.content)
pdf_results = []
for site in sites_json_response:
pdf_results += process_service_area(site)
break
# service_area_pool = mp.Pool(2)
# service_area_pool_async_results = #service_area_pool.apply_async(process_service_area, args = (site))
# service_area_pool.close()
# service_area_pool.join()
# for r in service_area_pool_async_results.get():
# pdf_results.append(r)
return pdf_results
results = process_all_sites()
create_pdf(results)
However, when the script is ran for multiple sites, that is, when break is commented and service_area_pool up to pdf_results.append(r) are uncommented, I receive an UnboundLocalError: local variable 'date_pool_async_results' referenced before assignment
Why is that the case? It seems that date_pool_async_results does not receive any further payloads than just the first iteration.
I've gone through these following resources; however, they don't seem to work as as solution as I am trying to append to a list in my case.
I've also tried the following, but received an UnboundLocalError: local variable 'pdf_results' referenced before assignment as well.
global pdf_results
pdf_results = []
def process_all_sites():
# retrieve payload
Please do let me know if further context is required.

bad performance for loop with instance and bulk create

I need to use bulk_create to create a lot of "detalle"(details), the problem is i have to iterate trough a json to get the arguments, and i got 4 fk so django ask to me for the instance, not the id. but to have id i have to do a .get(), so i got a bad performance, because its 4 gets by each iteration.
its there a way to get all objects instances and put in a list or something to perform load then the instance without using get every time?
class DetalleFichaAllViewSet(viewsets.ModelViewSet):
serializer_class = DetalleFichaUpdateAllSerializer
def create(self, request, *args, **kwargs):
user = self.request.user
data = request.data
try:
ficha = Ficha.objects.get(autor=user.id)
DetalleFicha.objects.filter(ficha=ficha.id).delete()
except Http404:
pass
# Create Ficha
now = datetime.now()
date_time = now.strftime("%Y-%m-%d %H:%M")
print("AAAAAA DATA:", data)
Ficha.objects.filter(autor=user.id).update(fecha_creacion=date_time, autor=user,
nombre=data["nombreFicha"], descripcion=data["descripcionFicha"])
ficha = Ficha.objects.filter(autor=user.id).last()
recintos = Recinto.objects.all()
productos = Producto.objects.all()
estandar_productos = EstandarProducto.objects.all()
cotizaciones = Cotizacion.objects.all()
detalles_ficha = []
for detalle in data["detalles"]:
recinto = recintos.get(id=detalle[1])
producto = productos.get(id=detalle[10])
estandar_producto = estandar_productos.get(id=detalle[9])
try:
cotizacion = cotizaciones.get(id=detalle[4])
except ObjectDoesNotExist:
cotizacion = None
print("Fecha: ", detalle[8])
detalle = DetalleFicha(carreras=detalle[0],
recinto=recinto, nombre=detalle[2],
cantidad_a_comprar=detalle[3], cotizacion=cotizacion,
valor_unitario=detalle[5], valor_total=detalle[6],
documento=detalle[7], fecha_cotizacion=detalle[8],
estandar_producto=estandar_producto, producto=producto,
ficha=ficha)
detalles_ficha.append(detalle)
DetalleFicha.objects.bulk_create(detalles_ficha)
print("Array convertida", detalles_ficha)
print(detalles_ficha[0])
return Response(status=status.HTTP_200_OK)

How to have bolded headers, but non-bolded cells - filled with worksheet.append_table of pygsheets==2.0.1

The python3 (pygsheets 2.0.1) script below will bold all the cells starting at A2.
Is there an easy way (i.e., in one command) to ask for all these cells not to be bolded?
Code:
import boto3, botocore
import datetime
import json
import pygsheets
currentDT = str(datetime.datetime.now())
def create_spreadsheet(outh_file, spreadsheet_name = "jSonar AWS usage"):
client = pygsheets.authorize(outh_file=outh_file, outh_nonlocal=True)
spread_sheet = client.create(spreadsheet_name)
return spread_sheet
def get_regions():
region = "us-west-1"
regions = dict()
ec2 = boto3.client("ec2", region_name=region)
ec2_responses = ec2.describe_regions()
ssm_client = boto3.client('ssm', region_name=region)
for resp in ec2_responses['Regions']:
region_id = resp['RegionName']
tmp = '/aws/service/global-infrastructure/regions/%s/longName' % region_id
ssm_response = ssm_client.get_parameter(Name = tmp)
region_name = ssm_response['Parameter']['Value']
regions[region_id] = region_name
return(regions)
def rds_worksheet_creation(spread_sheet, regions, spreadsheet_index):
worksheet = spread_sheet.add_worksheet("RDS", rows=100, cols=26, src_tuple=None, src_worksheet=None, index=spreadsheet_index)
worksheet.cell('A1').set_text_format('bold', True).value = 'DBInstanceIdentifier'
worksheet.cell('B1').set_text_format('bold', True).value = 'MasterUsername'
worksheet.cell('C1').set_text_format('bold', True).value = 'Region'
worksheet.cell('D1').set_text_format('bold', False).value = 'Sent Query to (Name)'
worksheet.cell('E1').set_text_format('bold', False).value = 'Sent Query to (email)'
worksheet.cell('F1').set_text_format('bold', False).value = 'WorksheetCreated: %s' % currentDT
cells_data = list()
for region, region_h in sorted(regions.items()):
client = boto3.client('rds', region_name=region)
clnt = boto3.client('ssm', region_name=region)
db_instances = client.describe_db_instances()
for instance in db_instances['DBInstances']:
MasterUsername = instance['MasterUsername']
DBInstanceIdentifier = instance['DBInstanceIdentifier']
cells_data.append([DBInstanceIdentifier, MasterUsername, region_h])
worksheet.append_table(cells_data, start='A2')
if __name__ == "__main__":
spread_sheet = create_spreadsheet(spreadsheet_name = "jSonar AWS usage",
outh_file = '/home/qa/.aws/client_secret.json')
regions = get_regions()
rds_worksheet_creation(spread_sheet, regions, 0)
spread_sheet.share("me#corp.com")
Output:
If i understand correctly you want to un-bold multiple cells in single command.
To set format to a range of cells create a Datarange and use apply_format.
model_cell = Cell('A1')
model_cell.set_text_format('bold', False)
Datarange('A1','A10', worksheet=wks).apply_format(model_cell)
docs

Use dictionary instead of list of dictionary to reduce program complexity

Trying to validate the consistency between DynamoDB tables, Used list of dictionary to store dynamodb table items, Which is taking longer time for execution.
New to python, any help to convert List of dictionary to dictionary please, To reduce my program complexity .
#!/usr/bin/python
import sys
import boto3
import argparse
import argparse
def table_consistency_check(table, column_name):
paginator = dynamoClient.get_paginator('scan')
modified_accounts = []
params = {
'TableName': table
}
page_iterator = paginator.paginate(**params)
for page in page_iterator:
for item in page['Items']:
account = item['account_name']['S']
license_key = item[column_name]['S']
credentials = {
'account_name': account,
column_name: license_key
}
modified_accounts.append(credentials)
return modified_accounts
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Find all accounts with different license key and account key hash')
parser.add_argument('-r', '--region', nargs='?', type=str, default='us-west-2')
try:
args = parser.parse_args()
except:
exit_code = int(str(sys.exc_info()[1]))
accounts_table = 'accounts_table'
Credentail_table = 'credential_table'
dynamoClient = boto3.client('dynamodb', region_name=args.region)
account1 = table_consistency_check(accounts_table, 'license_key')
account2 = table_consistency_check(Credentail_table, 'access_key_hash')
output = []
for acct_item in account1:
for creds_item in account2:
if acct_item['account_name'] == creds_item['account_name']:
if creds_item['access_key_hash'].startswith('ORIGINAL_KEY_'):
val = creds_item['access_key_hash']
length = len('ORIGINAL_KEY_')
str = val[length:]
if acct_item['license_key'] != str:
output.append(creds_item['account_name'])
print('Duplicate record found')
print('Account Name : ' + acct_item['account_name'] + ', License Key : ' + acct_item[
'license_key'] + ', Access Key Hash : ' + creds_item['access_key_hash'])
if not output:
print('the tables are consistent, No duplicate item found')

Resources