boto3 file upload in python - python-3.x

i am trying to upload file in aws s3 bucket via boto 3
but instead of file the following is being uploaded <_io.TextIOWrapper name='excel.csv' mode='a' encoding='UTF-8'>
def write_csv(data):
with open('excel.csv', 'a') as file:
writer = csv.writer(file)
writer.writerow([data['account_id'],
data['country'],
data['end_date'],
data['start_date']])
uploadtos3(str(file))
def uploadtos3(file):
key = 'xxxx'
seckey = 'xxxx'
s3 = boto3.resource( 's3',
aws_access_key_id = key,
aws_secret_access_key = seckey)
upload_file_bucket = 'apiuploadtest'
s3.Object(upload_file_bucket,str(file)).put(Body = str(file))
how to upload the file correctly?

Body in the put method of Object is:
Body (bytes or seekable file-like object) -- Object data.
Therefore, the following should be tried (fixed indentation and removed str):
def write_csv(data):
with open('excel.csv', 'a') as file:
writer = csv.writer(file)
writer.writerow([data['account_id'],
data['country'],
data['end_date'],
data['start_date']])
uploadtos3(file)
def uploadtos3(file):
key = 'xxxx'
seckey = 'xxxx'
s3 = boto3.resource('s3',
aws_access_key_id = key,
aws_secret_access_key = seckey)
upload_file_bucket = 'apiuploadtest'
s3.Object(upload_file_bucket, <key-name-on-s3>).put(Body = file)
By the way, its not a good practice to hardcode any AWS credentials in your source code.

Related

Access S3 bucket via ARN (as external client) [duplicate]

I'm trying to use the AssumeRole in such a way that i'm traversing multiple accounts and retrieving assets for those accounts. I've made it to this point:
import boto3
stsclient = boto3.client('sts')
assumedRoleObject = sts_client.assume_role(
RoleArn="arn:aws:iam::account-of-role-to-assume:role/name-of-role",
RoleSessionName="AssumeRoleSession1")
Great, i have the assumedRoleObject. But now i want to use that to list things like ELBs or something that isn't a built-in low level resource.
How does one go about doing that? If i may ask - please code out a full example, so that everyone can benefit.
Here's a code snippet from the official AWS documentation where an s3 resource is created for listing all s3 buckets. boto3 resources or clients for other services can be built in a similar fashion.
# create an STS client object that represents a live connection to the
# STS service
sts_client = boto3.client('sts')
# Call the assume_role method of the STSConnection object and pass the role
# ARN and a role session name.
assumed_role_object=sts_client.assume_role(
RoleArn="arn:aws:iam::account-of-role-to-assume:role/name-of-role",
RoleSessionName="AssumeRoleSession1"
)
# From the response that contains the assumed role, get the temporary
# credentials that can be used to make subsequent API calls
credentials=assumed_role_object['Credentials']
# Use the temporary credentials that AssumeRole returns to make a
# connection to Amazon S3
s3_resource=boto3.resource(
's3',
aws_access_key_id=credentials['AccessKeyId'],
aws_secret_access_key=credentials['SecretAccessKey'],
aws_session_token=credentials['SessionToken'],
)
# Use the Amazon S3 resource object that is now configured with the
# credentials to access your S3 buckets.
for bucket in s3_resource.buckets.all():
print(bucket.name)
To get a session with an assumed role:
import botocore
import boto3
import datetime
from dateutil.tz import tzlocal
assume_role_cache: dict = {}
def assumed_role_session(role_arn: str, base_session: botocore.session.Session = None):
base_session = base_session or boto3.session.Session()._session
fetcher = botocore.credentials.AssumeRoleCredentialFetcher(
client_creator = base_session.create_client,
source_credentials = base_session.get_credentials(),
role_arn = role_arn,
extra_args = {
# 'RoleSessionName': None # set this if you want something non-default
}
)
creds = botocore.credentials.DeferredRefreshableCredentials(
method = 'assume-role',
refresh_using = fetcher.fetch_credentials,
time_fetcher = lambda: datetime.datetime.now(tzlocal())
)
botocore_session = botocore.session.Session()
botocore_session._credentials = creds
return boto3.Session(botocore_session = botocore_session)
# usage:
session = assumed_role_session('arn:aws:iam::ACCOUNTID:role/ROLE_NAME')
ec2 = session.client('ec2') # ... etc.
The resulting session's credentials will be automatically refreshed when required which is quite nice.
Note: my previous answer was outright wrong but I can't delete it, so I've replaced it with a better and working answer.
You can assume role using STS token, like:
class Boto3STSService(object):
def __init__(self, arn):
sess = Session(aws_access_key_id=ARN_ACCESS_KEY,
aws_secret_access_key=ARN_SECRET_KEY)
sts_connection = sess.client('sts')
assume_role_object = sts_connection.assume_role(
RoleArn=arn, RoleSessionName=ARN_ROLE_SESSION_NAME,
DurationSeconds=3600)
self.credentials = assume_role_object['Credentials']
This will give you temporary access key and secret keys, with session token. With these temporary credentials, you can access any service. For Eg, if you want to access ELB, you can use the below code:
self.tmp_credentials = Boto3STSService(arn).credentials
def get_boto3_session(self):
tmp_access_key = self.tmp_credentials['AccessKeyId']
tmp_secret_key = self.tmp_credentials['SecretAccessKey']
security_token = self.tmp_credentials['SessionToken']
boto3_session = Session(
aws_access_key_id=tmp_access_key,
aws_secret_access_key=tmp_secret_key, aws_session_token=security_token
)
return boto3_session
def get_elb_boto3_connection(self, region):
sess = self.get_boto3_session()
elb_conn = sess.client(service_name='elb', region_name=region)
return elb_conn
with reference to the solution by #jarrad which is not working as of Feb 2021, and as a solution that does not use STS explicitly please see the following
import boto3
import botocore.session
from botocore.credentials import AssumeRoleCredentialFetcher, DeferredRefreshableCredentials
def get_boto3_session(assume_role_arn=None):
session = boto3.Session(aws_access_key_id="abc", aws_secret_access_key="def")
if not assume_role_arn:
return session
fetcher = AssumeRoleCredentialFetcher(
client_creator=_get_client_creator(session),
source_credentials=session.get_credentials(),
role_arn=assume_role_arn,
)
botocore_session = botocore.session.Session()
botocore_session._credentials = DeferredRefreshableCredentials(
method='assume-role',
refresh_using=fetcher.fetch_credentials
)
return boto3.Session(botocore_session=botocore_session)
def _get_client_creator(session):
def client_creator(service_name, **kwargs):
return session.client(service_name, **kwargs)
return client_creator
the function can be called as follows
ec2_client = get_boto3_session(role_arn='my_role_arn').client('ec2', region_name='us-east-1')
If you want a functional implementation, this is what I settled on:
def filter_none_values(kwargs: dict) -> dict:
"""Returns a new dictionary excluding items where value was None"""
return {k: v for k, v in kwargs.items() if v is not None}
def assume_session(
role_session_name: str,
role_arn: str,
duration_seconds: Optional[int] = None,
region_name: Optional[str] = None,
) -> boto3.Session:
"""
Returns a session with the given name and role.
If not specified, duration will be set by AWS, probably at 1 hour.
If not specified, region will be left unset.
Region can be overridden by each client or resource spawned from this session.
"""
assume_role_kwargs = filter_none_values(
{
"RoleSessionName": role_session_name,
"RoleArn": role_arn,
"DurationSeconds": duration_seconds,
}
)
credentials = boto3.client("sts").assume_role(**assume_role_kwargs)["Credentials"]
create_session_kwargs = filter_none_values(
{
"aws_access_key_id": credentials["AccessKeyId"],
"aws_secret_access_key": credentials["SecretAccessKey"],
"aws_session_token": credentials["SessionToken"],
"region_name": region_name,
}
)
return boto3.Session(**create_session_kwargs)
def main() -> None:
session = assume_session(
"MyCustomSessionName",
"arn:aws:iam::XXXXXXXXXXXX:role/TheRoleIWantToAssume",
region_name="us-east-1",
)
client = session.client(service_name="ec2")
print(client.describe_key_pairs())
import json
import boto3
roleARN = 'arn:aws:iam::account-of-role-to-assume:role/name-of-role'
client = boto3.client('sts')
response = client.assume_role(RoleArn=roleARN,
RoleSessionName='RoleSessionName',
DurationSeconds=900)
dynamodb_client = boto3.client('dynamodb', region_name='us-east-1',
aws_access_key_id=response['Credentials']['AccessKeyId'],
aws_secret_access_key=response['Credentials']['SecretAccessKey'],
aws_session_token = response['Credentials']['SessionToken'])
response = dynamodb_client.get_item(
Key={
'key1': {
'S': '1',
},
'key2': {
'S': '2',
},
},
TableName='TestTable')
print(response)
#!/usr/bin/env python3
import boto3
sts_client = boto3.client('sts')
assumed_role = sts_client.assume_role(RoleArn = "arn:aws:iam::123456789012:role/example_role",
RoleSessionName = "AssumeRoleSession1",
DurationSeconds = 1800)
session = boto3.Session(
aws_access_key_id = assumed_role['Credentials']['AccessKeyId'],
aws_secret_access_key = assumed_role['Credentials']['SecretAccessKey'],
aws_session_token = assumed_role['Credentials']['SessionToken'],
region_name = 'us-west-1'
)
# now we make use of the role to retrieve a parameter from SSM
client = session.client('ssm')
response = client.get_parameter(
Name = '/this/is/a/path/parameter',
WithDecryption = True
)
print(response)
Assuming that 1) the ~/.aws/config or ~/.aws/credentials file is populated with each of the roles that you wish to assume and that 2) the default role has AssumeRole defined in its IAM policy for each of those roles, then you can simply (in pseudo-code) do the following and not have to fuss with STS:
import boto3
# get all of the roles from the AWS config/credentials file using a config file parser
profiles = get_profiles()
for profile in profiles:
# this is only used to fetch the available regions
initial_session = boto3.Session(profile_name=profile)
# get the regions
regions = boto3.Session.get_available_regions('ec2')
# cycle through the regions, setting up session, resource and client objects
for region in regions:
boto3_session = boto3.Session(profile_name=profile, region_name=region)
boto3_resource = boto3_session.resource(service_name='s3', region_name=region)
boto3_client = boto3_session.client(service_name='s3', region_name=region)
[ do something interesting with your session/resource/client here ]
Credential Setup (boto3 - Shared Credentials File)
Assume Role Setup (AWS)
After a few days of searching, this is the simplest solution I have found. explained here but does not have a usage example.
import boto3
for profile in boto3.Session().available_profiles:
boto3.DEFAULT_SESSION = boto3.session.Session(profile_name=profile)
s3 = boto3.resource('s3')
for bucket in s3.buckets.all():
print(bucket)
This will switch the default role you will be using. To not make the profile the default, just do not assign it to boto3.DEFAULT_SESSION. but instead, do the following.
testing_profile = boto3.session.Session(profile_name='mainTesting')
s3 = testing_profile.resource('s3')
for bucket in s3.buckets.all():
print(bucket)
Important to note that the .aws credentials need to be set in a specific way.
[default]
aws_access_key_id = default_access_id
aws_secret_access_key = default_access_key
[main]
aws_access_key_id = main_profile_access_id
aws_secret_access_key = main_profile_access_key
[mainTesting]
source_profile = main
role_arn = Testing role arn
mfa_serial = mfa_arn_for_main_role
[mainProduction]
source_profile = main
role_arn = Production role arn
mfa_serial = mfa_arn_for_main_role
I don't know why but the mfa_serial key has to be on the roles for this to work instead of the source account which would make more sense.
Here's the code snippet I used
sts_client = boto3.client('sts')
assumed_role_object = sts_client.assume_role(
RoleArn=<arn of the role to assume>,
RoleSessionName="<role session name>"
)
print(assumed_role_object)
credentials = assumed_role_object['Credentials']
session = Session(
aws_access_key_id=credentials['AccessKeyId'],
aws_secret_access_key=credentials['SecretAccessKey'],
aws_session_token=credentials['SessionToken']
)
self.s3 = session.client('s3')

How to read parquet file from s3 using pandas

I am trying to read the parquet file which is in s3 using pandas.
Below is the code
import boto3
import pandas as pd
key = 'key'
secret = 'secret'
s3_client = boto3.client(
's3',
aws_access_key_id = key,
aws_secret_access_key = secret,
region_name = 'region_name'
)
print(s3_client)
AWS_S3_BUCKET='bucket_name'
filePath='data/wine_dataset'
response = s3_client.get_object(Bucket=AWS_S3_BUCKET, Key=filePath)
status = response.get("ResponseMetadata", {}).get("HTTPStatusCode")
if status == 200:
print(f"Successful S3 get_object response. Status - {status}")
books_df = pd.read_parquet(response.get("Body"))
print(books_df)
else:
print(f"Unsuccessful S3 get_object response. Status - {status}")
I am getting the below error
NoSuchKey: An error occurred (NoSuchKey) when calling the GetObject operation: The specified key does not exist.
But when I read the same s3 path using pyspark it worked
path= 's3a://bucket_name/data/wine_dataset'
df = spark.read.parquet(path)
I am not sure why it is not working using pandas. Can anyone help me on this?

python aws botocore.response.streamingbody to json

I am using boto3 to acccess files from S3,
The objective is to read the files and convert it to JSON
But the issue is none of the files have any file extension (no .csv,.json etc),although the data in the file is structured like JSON
client = boto3.client(
's3',
aws_access_key_id = 'AKEY',
aws_secret_access_key = 'ASAKEY',
region_name = 'us-east-1'
)
obj = client.get_object(
Bucket = 'bucketname',
Key = '*filename without extension*'
)
obj['Body'] returns a <botocore.response.StreamingBody> object
is it possible to find out the data within it?
The extension does not matter. Assuming your file contains valid json, you can get it:
my_json = json.loads(obj['Body'].read())
The response is a dictionary object.
Response returns StreamingBody in 'Body' attribute. So here is the solution.
Find more information here.
Boto S3 Get Object
client = boto3.client('s3')
response = client.get_object(
Bucket='<<bucket_name_here>>',
Key='<<file key from aws mangement console (S3 Info) >>'
)
jsonContent = json.loads(response['Body'].read())
print(jsonContent)

Kaggle login and unzip file to store in s3 bucket

Create a lambda function for python 3.7.
Role attached to the lambda function should have S3 access and lambda basic execution.
Read data from https://www.kaggle.com/therohk/india-headlines-news-dataset/download and save into S3 as CSV. file is zip how to unzip and store in temp file
Getting Failed in AWS Lambda function:
Lambda Handler to download news headline dataset from kaggle
import urllib3
import boto3
from botocore.client import Config
http = urllib3.PoolManager()
def lambda_handler(event, context):
bucket_name = 'news-data-kaggle'
file_name = "india-news-headlines.csv"
lambda_path = "/tmp/" +file_name
kaggle_info = {'UserName': "bossdk", 'Password': "xxx"}
url = "https://www.kaggle.com/account/login"
data_url = "https://www.kaggle.com/therohk/india-headlines-news-dataset/download"
r = http.request('POST',url,kaggle_info)
r = http.request('GET',data_url)
f = open(lambda_path, 'wb')
for chunk in r.iter_content(chunk_size = 512 * 1024):
if chunk:
f.write(chunk)
f.close()
data = ZipFile(lambda_path)
# S3 Connect
s3 = boto3.resource('s3',config=Config(signature_version='s3v4'))
# Uploaded File
s3.Bucket(bucket_name).put(Key=lambda_path, Body=data, ACL='public-read')
return {
'status': 'True',
'statusCode': 200,
'body': 'Dataset Uploaded'
}

Requirement is transfer of files between s3 buckets through lambda, taking keys of source s3 file dynamically

import boto3
import json
def lambda_handler(event, context):
print(event)
message = event['Records'][0]['s3']['object']['key']
print(message)
old_bucket_name = 'audio'
old_prefix = '2020/06/10/17/'
new_bucket_name = 'dev'
new_prefix = 'source_metadata/'
s3 = boto3.resource('s3')
old_bucket = s3.Bucket(old_bucket_name)
new_bucket = s3.Bucket(new_bucket_name)
for obj in old_bucket.objects.filter(Prefix=old_prefix):
old_source = { 'Bucket': old_bucket_name,
'Key': obj.key}
# replace the prefix
new_key = new_prefix + obj.key[len(old_prefix):]
new_obj = new_bucket.Object(new_key)
new_obj.copy(old_source)
I get the path and file name that i place in s3 in variable "message", how can i define as prefix of my destination bucket (dev)
A separate Lambda function will be triggered for each object that is created.
Therefore, the Lambda function should only move the object that triggered the function.
Here is some code that will move the object:
import boto3
import urllib
TARGET_BUCKET = 'dev'
TARGET_PATH = 'source_metadata/'
def lambda_handler(event, context):
# Get incoming bucket and key
source_bucket = event['Records'][0]['s3']['bucket']['name']
source_key = urllib.parse.unquote_plus(event['Records'][0]['s3']['object']['key'])
# Extract filename without path
filename = ('/' + source_key).rsplit('/', 1)[1]
# Copy object to different bucket
s3_resource = boto3.resource('s3')
copy_source = {
'Bucket': source_bucket,
'Key': source_key
}
s3_resource.Bucket(TARGET_BUCKET).Object(TARGET_PATH + filename).copy(copy_source)

Resources