How i can list aws Security Groups older than 30 days in python? - python-3.x

import boto3
from datetime import datetime
ec2res = boto3.resource('ec2')
ec2cli = boto3.client('ec2')
now = datetime.now()
def calculator(launch_date, object):
age = now - launch_date
intage = int(age.days)
if intage <= 30:
print(object)
list_security_groups = ec2cli.describe_security_groups()
for security_group in list_security_groups['SecurityGroups']:
launch_date = datetime.strptime(security_group['CreationDate'].strftime("%Y-%m-%d %H:%M:%S.%f"), "%Y-%m-%d %H:%M:%S.%f")
intage = calculator(launch_date,security_group)
or
list_security_groups = ec2res.security_groups.all()
for security_group in list_security_groups:
launch_date = datetime.strptime(security_group.launchtime.strftime("%Y-%m-%d %H:%M:%S.%f"), "%Y-%m-%d %H:%M:%S.%f")
intage = calculator(launch_date,security_group)
these formats are working for other aws object, but i got KeyError for security groups

There is no concept of a "Creation Date", nor any date, for Security Groups.
You could theoretically go through the security group's history in AWS Config to get this information.
Always look at the documentation to see which fields are available.

Related

fetch all files from S3 for the last N days in python

I need to get all files of a S3 directory using python for the last N days. I am using the below code where it fetches all the files in the directory.
folder = 'main'
subfolder = ['test','prod']
base= os.path.join(current_directory, 'project')
for i in subfolder:
bucket_list = bucket.list(prefix="{}/{}".format(folder, i))
for l in bucket_list:
keyString = str(l.key)
d = base + "/" + keyString
l.get_contents_to_filename(d)
The below code provided only the last modified file. Is there any way we can get only the files that are modified or created in the last 2 days
for i in subfolder:
bucket_list = bucket.list(prefix="{}/{}".format(folder, i))
sorted_objs = sorted(bucket_list, key=attrgetter('last_modified'))
latest = sorted_objs.pop()
print(latest)
You appear to be using an old version of boto. These days you should be using boto3.
Here is an example to get recent files using the resource method:
import boto3
from datetime import datetime, timedelta, timezone
check_timestamp = datetime.now(timezone.utc) - timedelta(days = 5)
s3_resource = boto3.resource('s3')
bucket = s3_resource.Bucket('my-bucket')
objects = bucket.objects.filter(Prefix='my-prefix/')
recent_objects = [object.key for object in objects if object.last_modified > check_timestamp]
print(recent_objects)
Here is the same code using the client method:
import boto3
from datetime import datetime, timedelta, timezone
check_timestamp = datetime.now(timezone.utc) - timedelta(days = 5)
s3_client = boto3.client('s3')
response = s3_client.list_objects_v2(Bucket='my-bucket',Prefix='my-prefix/')
recent_objects = [object['Key'] for object in response['Contents'] if object['LastModified'] > check_timestamp]
print(recent_objects)

Write file name based on return

I'm creating a boto3 script that scrapes and uploads our entire accounts Public Ips and NatGateway Ips to our S3 bucket. I'm stuck on writing files for both returns. I would ideally like to write two separate files while still using the same filename variable you see in main(). Right now I can get this to work with only one return(either nat_ips or public_ips)
import boto3
from datetime import datetime
from csv import writer
def get_ips():
# Uses STS to assume the role needed.
boto_sts=boto3.client('sts')
sts_response = boto_sts.assume_role(
RoleArn='arn:aws:iam::1234:role/foo',
RoleSessionName='Foo'
)
# Save the details from assumed role into vars
sts_credentials = sts_response["Credentials"]
session_id = sts_credentials["AccessKeyId"]
session_key = sts_credentials["SecretAccessKey"]
session_token = sts_credentials["SessionToken"]
# List and store all the regions
ec2_client=boto3.client('ec2',aws_access_key_id=session_id,aws_secret_access_key=session_key,aws_session_token=session_token,region_name='us-west-1')
all_regions=[region['RegionName'] for region in ec2_client.describe_regions()['Regions']]
nat_ips = []
public_ips = []
for region in all_regions:
max_results = 1000
next_token = ''
ec2_client=boto3.client('ec2',aws_access_key_id=session_id,aws_secret_access_key=session_key,aws_session_token=session_token,region_name=region)
session=boto3.Session(aws_access_key_id=session_id, aws_secret_access_key=session_key, aws_session_token=session_token, region_name=region)
while next_token or next_token == '':
response = ec2_client.describe_nat_gateways(MaxResults=max_results, NextToken=next_token)
filters = [{'Name':'tag:Name', 'Values':['*sgw-eip']}]
get_ips = ec2_client.describe_addresses(Filters=filters)
for gateway in response["NatGateways"]:
for address in gateway["NatGatewayAddresses"]:
nat_ips.append(address["PublicIp"]+'/32')
for eip_dict in get_ips['Addresses']:
public_ip_string = eip_dict['Tags'][0]['Value'] + ' : ' + eip_dict['PublicIp']
public_ips.append(public_ip_string)
next_token = response.get("NextToken", None)
return nat_ips, public_ips
def _s3_upload(filename):
s3 = boto3.resource('s3')
bucket = 'foo-bar'
object_name = 'foo/'
s3.meta.client.upload_file(Filename=filename,Bucket=bucket,Key=object_name+filename)
print(f'Uploading {filename} to {bucket}')
def write_list_to_file(filename, data):
lines_string = '\n'.join(str(x) for x in data)
with open(filename,'w') as output:
output.writelines(lines_string)
print(f'Writing file to {filename}')
if __name__ == "__main__":
date = datetime.now().strftime('%Y%m%d')
# Stuck here since I want to make it one variable
filename_nat_ips = f'natgateway_ips{date}.csv'
filename_sga_ips = f'sga_ips{date}.csv'
public_ips = get_ips()
nat_ips = get_ips()
print(filename)
write_list_to_file(filename, nat_ips)
_s3_upload(filename)
I see that you are already returning a tuple of public_ips and nat_ips from your get_ips() function. So in your main, you could collect them together as well.
You might try something like this:
if __name__ == "__main__":
date = datetime.now().strftime('%Y%m%d')
# Stuck here since I want to make it one variable
filename_nat_ips = f'natgateway_ips{date}.csv'
filename_sga_ips = f'sga_ips{date}.csv'
nat_ips, public_ips = get_ips()
write_list_to_file(filename_nat_ips, nat_ips)
write_list_to_file(filename_public_ips, public_ips)
_s3_upload(filename_nat_ips)
_s3_upload(filename_public_ips)
I was doing it right the first time. And was trying to make it more complicated.
if __name__ == "__main__":
date = datetime.now().strftime('%Y%m%d')
filename_nat_ips = f'natgateway_ips{date}.csv'
filename_sga_ips = f'sga_ips{date}.csv'
nat_ips, public_ips = get_ips()
print(filename_nat_ips)
print(filename_sga_ips)
write_list_to_file(filename_nat_ips, nat_ips)
write_list_to_file(filename_sga_ips, public_ips)
_s3_upload(filename_nat_ips)
_s3_upload(filename_sga_ips)

Series format pandas

import pandas as pd
from datetime import datetime
import os
# get username
user = os.getlogin()
def file_process():
data = pd.read_excel('C:\\Users\\' + user + '\\My Documents\\XINVST.xls')
# Change the date and time formatting
data["INVDAT"] = data["INVDAT"].apply(lambda x: datetime.combine(x, datetime.min.time()))
data["INVDAT"] = data["INVDAT"].dt.strftime("%m-%d-%Y")
print(data)
# output to new file
# new_data = data
# new_data.to_excel('C:\\Users\\' + user + '\\Desktop\\XINVST.xls', index=None)
if __name__ == '__main__':
file_process()
I'm trying to format the INVDAT column to correct date format like 11/25/19, I've tried multiple solutions but keep running into errors like this one: TypeError: combine() argument 1 must be datetime.date, not int, I then tried to convert the integer to date type but it errors also.
Or you can simply use df["INVDAT"] = pd.to_datetime(df["INVDAT"], format="%m/%d/%y"), in this case you don't need the datetime pakage. For further information you should look the docs.
data['INVDAT'] = data['INVDAT'].astype('str')
data["INVDAT"] = pd.to_datetime(data["INVDAT"])
data["INVDAT"] = data["INVDAT"].dt.strftime("%m/%d/%Y")
This solution works but if the date representation is a single month like 12519 ( expected output 1/25/19), it fails. I tried using a conditional to add a 0 to the front if len() < 6 but it gives me an error that the dtype is int64.
import pandas as pd
import os
# get username
user = os.getlogin()
def file_process():
data = pd.read_excel('C:\\Users\\' + user + '\\My Documents\\XINVST.xls')
# Change the date and time formatting
data['INVDAT'] = data['INVDAT'].astype('str')
length = len(data['INVDAT'])
data['INVDAT'].pop(length - 1)
for i in data['INVDAT'].str.len():
if i <= 5:
data['INVDAT'] = data['INVDAT'].apply(lambda x: '{0:0>6}'.format(x))
length = len(data['INVDAT'])
data['INVDAT'].pop(length - 1)
data["INVDAT"] = pd.to_datetime(data["INVDAT"])
data["INVDAT"] = data["INVDAT"].dt.strftime("%m/%d/%Y")
else:
data["INVDAT"] = pd.to_datetime(data["INVDAT"])
data["INVDAT"] = data["INVDAT"].dt.strftime("%m/%d/%Y")
# output to new file
new_data = data
new_data.to_excel('C:\\Users\\' + user + '\\Desktop\\XINVST.xls', index=None)
if __name__ == '__main__':
file_process()
This is the solution, it's sloppy but works

How to have bolded headers, but non-bolded cells - filled with worksheet.append_table of pygsheets==2.0.1

The python3 (pygsheets 2.0.1) script below will bold all the cells starting at A2.
Is there an easy way (i.e., in one command) to ask for all these cells not to be bolded?
Code:
import boto3, botocore
import datetime
import json
import pygsheets
currentDT = str(datetime.datetime.now())
def create_spreadsheet(outh_file, spreadsheet_name = "jSonar AWS usage"):
client = pygsheets.authorize(outh_file=outh_file, outh_nonlocal=True)
spread_sheet = client.create(spreadsheet_name)
return spread_sheet
def get_regions():
region = "us-west-1"
regions = dict()
ec2 = boto3.client("ec2", region_name=region)
ec2_responses = ec2.describe_regions()
ssm_client = boto3.client('ssm', region_name=region)
for resp in ec2_responses['Regions']:
region_id = resp['RegionName']
tmp = '/aws/service/global-infrastructure/regions/%s/longName' % region_id
ssm_response = ssm_client.get_parameter(Name = tmp)
region_name = ssm_response['Parameter']['Value']
regions[region_id] = region_name
return(regions)
def rds_worksheet_creation(spread_sheet, regions, spreadsheet_index):
worksheet = spread_sheet.add_worksheet("RDS", rows=100, cols=26, src_tuple=None, src_worksheet=None, index=spreadsheet_index)
worksheet.cell('A1').set_text_format('bold', True).value = 'DBInstanceIdentifier'
worksheet.cell('B1').set_text_format('bold', True).value = 'MasterUsername'
worksheet.cell('C1').set_text_format('bold', True).value = 'Region'
worksheet.cell('D1').set_text_format('bold', False).value = 'Sent Query to (Name)'
worksheet.cell('E1').set_text_format('bold', False).value = 'Sent Query to (email)'
worksheet.cell('F1').set_text_format('bold', False).value = 'WorksheetCreated: %s' % currentDT
cells_data = list()
for region, region_h in sorted(regions.items()):
client = boto3.client('rds', region_name=region)
clnt = boto3.client('ssm', region_name=region)
db_instances = client.describe_db_instances()
for instance in db_instances['DBInstances']:
MasterUsername = instance['MasterUsername']
DBInstanceIdentifier = instance['DBInstanceIdentifier']
cells_data.append([DBInstanceIdentifier, MasterUsername, region_h])
worksheet.append_table(cells_data, start='A2')
if __name__ == "__main__":
spread_sheet = create_spreadsheet(spreadsheet_name = "jSonar AWS usage",
outh_file = '/home/qa/.aws/client_secret.json')
regions = get_regions()
rds_worksheet_creation(spread_sheet, regions, 0)
spread_sheet.share("me#corp.com")
Output:
If i understand correctly you want to un-bold multiple cells in single command.
To set format to a range of cells create a Datarange and use apply_format.
model_cell = Cell('A1')
model_cell.set_text_format('bold', False)
Datarange('A1','A10', worksheet=wks).apply_format(model_cell)
docs

AttributeError: 'datetime.datetime' object has no attribute 'striftime'

I am currently writing a machine learning program for school to predict the weather. I have been using this article https://stackabuse.com/using-machine-learning-to-predict-the-weather-part-1/ as my main resource (I have had to adjust as wunderground is no longer free so I have instead been using openweathermap). I was writing the data collection and organization part of my code I received the following error 'AttributeError: 'datetime.datetime' object has no attribute 'striftime'. Sorry in advance for the massive block of code, I figured it would be the best way to troubleshoot the problem. Thank you for any the help. The parts with '** code **' are what I am struggling with
from datetime import datetime
from datetime import timedelta
import time
from collections import namedtuple
import pandas as pd
import requests
import matplotlib.pyplot as plt
#Data collection and Organization
url = 'http://history.openweathermap.org//storage/d12a3df743e650ba4035d2c6d42fb68f.json'
#res = requests.get(url)
#data = res.json()
target_date = datetime(2018, 4, 22)
features = ["date", "temperature", "pressure", "humidity", "maxtemperature", "mintemperature"]
DailySummary = namedtuple("DailySummary", features)
def extra_weather_data(url, target_date, days):
for _ in range(days):
**request = url.format(target_date.striftime('%Y%m%d'))**
respone = requests.get(request)
if response.status_code == 200:
data = response.json()
records.append(DailySummary(
date = target_date,
temperature = data['main']['temp'],
pressure = data['main']['pressure'],
humidity = data['main']['humidity'],
maxtemperature = data['main']['temp_max'],
mintemperature = data['main']['temp_min']))
time.sleep(6)
target_date += timedelta(days=1)
**records = extra_weather_data(url, target_date, 365)**
#Finished data collection now begin to clean and process data using Pandas
df = pd.DataFrame(records, columns=features).set_index('date')
tmp = df[['temperature','pressure','humidty', 'maxtemperature', 'mintemperature']].head(10)
def derive_nth_day_feature(df, feature, N):
rows =df.shape[0]
nth_prior_measurements = [None]*N + [df[feature][i-N] for i in range(N,rows)]
col_name = "{}_{}".format(feature, N)
df[col_name] = nth_prior_measurements
for feature in features:
if feature != 'date':
for N in range(1, 4):
derive_nth_day_feature(df, feature, N)
df.columns

Resources