Kaggle login and unzip file to store in s3 bucket - python-3.x

Create a lambda function for python 3.7.
Role attached to the lambda function should have S3 access and lambda basic execution.
Read data from https://www.kaggle.com/therohk/india-headlines-news-dataset/download and save into S3 as CSV. file is zip how to unzip and store in temp file
Getting Failed in AWS Lambda function:
Lambda Handler to download news headline dataset from kaggle
import urllib3
import boto3
from botocore.client import Config
http = urllib3.PoolManager()
def lambda_handler(event, context):
bucket_name = 'news-data-kaggle'
file_name = "india-news-headlines.csv"
lambda_path = "/tmp/" +file_name
kaggle_info = {'UserName': "bossdk", 'Password': "xxx"}
url = "https://www.kaggle.com/account/login"
data_url = "https://www.kaggle.com/therohk/india-headlines-news-dataset/download"
r = http.request('POST',url,kaggle_info)
r = http.request('GET',data_url)
f = open(lambda_path, 'wb')
for chunk in r.iter_content(chunk_size = 512 * 1024):
if chunk:
f.write(chunk)
f.close()
data = ZipFile(lambda_path)
# S3 Connect
s3 = boto3.resource('s3',config=Config(signature_version='s3v4'))
# Uploaded File
s3.Bucket(bucket_name).put(Key=lambda_path, Body=data, ACL='public-read')
return {
'status': 'True',
'statusCode': 200,
'body': 'Dataset Uploaded'
}

Related

How to pull logs from an API and store them in AWS S3

I am trying to achieve this through a Python program. I am able to pull logs from API to a local drive.
However I am struggling to copy them to AWS S3. I appreciate your help on this.
I am using the code below to copy files to a local drive
''''''
import requests
import requests.auth
from bs4 import BeautifulSoup
import os.path
from pathlib import Path
import os
import boto3
CLIENT_ID = "xxxxxxxx"
CLIENT_SECRET = "xxxxxxxx"
TOKEN_URL = "https://xxxxxxxx/dwsso/oauth2/access_token"
REDIRECT_URI = "https://xxxxxxxx/dwsso/oauth2/callback"
def get_token(code=None):
client_auth = requests.auth.HTTPBasicAuth(CLIENT_ID, CLIENT_SECRET)
post_data = {"grant_type": "client_credentials",
"code": code,
"redirect_uri": REDIRECT_URI}
response = requests.post(TOKEN_URL,
auth=client_auth,
data=post_data)
token_json = response.json()
return token_json["access_token"]
my_token=get_token()
headersAPI = {
'accept': 'application/json',
'Authorization': 'Bearer '+ my_token,
}
params = (
('offset', '0'),
('limit', '20'),
)
def download_files(urls):
for a in soup.find_all('a', href=True):
url = urls
a = a['href']
logs_url = url + a
r = requests.get(logs_url, headers=headersAPI, params=params, verify=True, stream = True)
save_path = "download" + ((url.split('/')[2]).split('.')[0])
Path(save_path).mkdir(parents=True, exist_ok=True)
lname = (logs_url.split('/')[-1])
completeName= os.path.join(save_path, lname)
print("Downloding file from %s domain: %s" % ( save_path, lname )
open(completeName, 'wb').write(r.content)
url_lst = [ "https://xxxxxxxx/webdav/Sites/Logs/",
"https://xxxxxxxx/webdav/Sites/Logs/",
"https://xxxxxxxx/Sites/Logs/" ]
for i in url_lst:
response = requests.get(myUrl, headers=headersAPI, params=params, verify=True, stream = True)
soup = BeautifulSoup(response.content, 'html.parser')
f_url = ((i.split('/')[2]))
url = "https://" + f_url
download_files(url)
''''''
You can upload to S3 buckets using Boto3:
import boto3
s3 = boto3.resource('s3')
s3.Bucket('mybucket').upload_file('/tmp/hello.txt', 'hello.txt')
See Upload to S3 Bucket documentation here.
And here for installation and config of Boto3.

How to get AWS S3 object Location/URL using python 3.8?

I am uploading a file to AWS S3 using AWS Lambda function (Python3.8) with the following code.
file_obj = open(filename, 'rb')
s3_upload = s3.put_object( Bucket="aaa", Key="aaa.png", Body=file_obj)
return {
'statusCode': 200,
'body': json.dumps("Executed Successfully")
}
I want to get the location/url of the S3 object in return. In Node.js we use the .location parameter for getting the object location/url.
Any idea how to do this using python 3.8?
The url of S3 objects has known format and follows Virtual hosted style access:
https://bucket-name.s3.Region.amazonaws.com/keyname
Thus, you can construct the url yourself:
bucket_name = 'aaa'
aws_region = boto3.session.Session().region_name
object_key = 'aaa.png'
s3_url = f"https://{bucket_name}.s3.{aws_region}.amazonaws.com/{object_key}"
return {
'statusCode': 200,
'body': json.dumps({'s3_url': s3_url})
}

boto3 file upload in python

i am trying to upload file in aws s3 bucket via boto 3
but instead of file the following is being uploaded <_io.TextIOWrapper name='excel.csv' mode='a' encoding='UTF-8'>
def write_csv(data):
with open('excel.csv', 'a') as file:
writer = csv.writer(file)
writer.writerow([data['account_id'],
data['country'],
data['end_date'],
data['start_date']])
uploadtos3(str(file))
def uploadtos3(file):
key = 'xxxx'
seckey = 'xxxx'
s3 = boto3.resource( 's3',
aws_access_key_id = key,
aws_secret_access_key = seckey)
upload_file_bucket = 'apiuploadtest'
s3.Object(upload_file_bucket,str(file)).put(Body = str(file))
how to upload the file correctly?
Body in the put method of Object is:
Body (bytes or seekable file-like object) -- Object data.
Therefore, the following should be tried (fixed indentation and removed str):
def write_csv(data):
with open('excel.csv', 'a') as file:
writer = csv.writer(file)
writer.writerow([data['account_id'],
data['country'],
data['end_date'],
data['start_date']])
uploadtos3(file)
def uploadtos3(file):
key = 'xxxx'
seckey = 'xxxx'
s3 = boto3.resource('s3',
aws_access_key_id = key,
aws_secret_access_key = seckey)
upload_file_bucket = 'apiuploadtest'
s3.Object(upload_file_bucket, <key-name-on-s3>).put(Body = file)
By the way, its not a good practice to hardcode any AWS credentials in your source code.

Requirement is transfer of files between s3 buckets through lambda, taking keys of source s3 file dynamically

import boto3
import json
def lambda_handler(event, context):
print(event)
message = event['Records'][0]['s3']['object']['key']
print(message)
old_bucket_name = 'audio'
old_prefix = '2020/06/10/17/'
new_bucket_name = 'dev'
new_prefix = 'source_metadata/'
s3 = boto3.resource('s3')
old_bucket = s3.Bucket(old_bucket_name)
new_bucket = s3.Bucket(new_bucket_name)
for obj in old_bucket.objects.filter(Prefix=old_prefix):
old_source = { 'Bucket': old_bucket_name,
'Key': obj.key}
# replace the prefix
new_key = new_prefix + obj.key[len(old_prefix):]
new_obj = new_bucket.Object(new_key)
new_obj.copy(old_source)
I get the path and file name that i place in s3 in variable "message", how can i define as prefix of my destination bucket (dev)
A separate Lambda function will be triggered for each object that is created.
Therefore, the Lambda function should only move the object that triggered the function.
Here is some code that will move the object:
import boto3
import urllib
TARGET_BUCKET = 'dev'
TARGET_PATH = 'source_metadata/'
def lambda_handler(event, context):
# Get incoming bucket and key
source_bucket = event['Records'][0]['s3']['bucket']['name']
source_key = urllib.parse.unquote_plus(event['Records'][0]['s3']['object']['key'])
# Extract filename without path
filename = ('/' + source_key).rsplit('/', 1)[1]
# Copy object to different bucket
s3_resource = boto3.resource('s3')
copy_source = {
'Bucket': source_bucket,
'Key': source_key
}
s3_resource.Bucket(TARGET_BUCKET).Object(TARGET_PATH + filename).copy(copy_source)

How to read the boto3 file object in opencv python3

I am trying to read the AWS S3 presigned uri file with open-CV. But the read parameter is of NoneType. How to read the boto3 file_obj in opencv and process further?
import cv2
import boto3
s3Client = boto3.client('s3')
file_path = s3Client.generate_presigned_url('get_object', Params = {'Bucket':
'www.mybucket.com', 'Key': 'hello.txt'}, ExpiresIn = 100)
img = cv2.imread(file_path)
But it is reading the file as <class 'NoneType'>. But I need it to be read by the cv2.
import urllib2
response = urllib2.urlopen(file_path)
image = response.read()
img = cv2.imread(image)
can you please try this

Resources