loading data from GCS bucket to Sharepoint folder - python-3.x

I am working on a POC where I have to load data from GCS Bucket to a sharePoint Location.
I am using the below code but not able to get desired result.
# Import the storage library
from google.cloud import storage
client = storage.Client()
bucket_name = 'my-bucket'
file_name = 'my-file.csv'
# Download the file from GCS
bucket = client.bucket(bucket_name)
blob = bucket.blob(file_name)
blob.download_to_filename(file_name)
# Import the office365-rest-python-client library
from office365.runtime.auth.authentication_context import AuthenticationContext
from office365.sharepoint.client_context import ClientContext
from office365.sharepoint.files.file import File
# Set the SharePoint site URL
site_url = 'https://7rhjkshshgvavvd.sharepoint.com/sites/MyDemo/testing/'
# Authenticate with SharePoint
context = AuthenticationContext(url=site_url)
if context.acquire_token_for_user(username="XXXXXX", password="XXXXXX"):
print("Authenticated with SharePoint")
else:
print("Failed to authenticate with SharePoint")
# Construct a ClientContext object
client_context = ClientContext(site_url, context)
# Set the path to the file you want to upload
# Upload the file to SharePoint
file_creation_info = File.from_local_file(client_context)
sp_file = file_creation_info.upload()
client_context.execute_query()
print(f'File uploaded to SharePoint: {sp_file.server_relative_url}')

Related

cloud function read storage object data without using the storage client

I have created a simple cloud function with trigger: google.cloud.storage.object.v1.finalized
When a file (.xlsx) is being uploaded to my bucket I want to read it's content.
I am using following method for the same:
import functions_framework
#functions_framework.cloud_event
def process_data(cloud_event):
print(f"Data: {cloud_event.data}")
I am able to print the cloud_event.data but how to I get the actual file which was uploaded
One way that I can do is using the storage client in below manner:
from google.cloud import storage
import functions_framework
def get_file(object_name, bucket_name, download_path):
storage_client = storage.Client()
bucket = storage_client.bucket(bucket_name)
blob = bucket.blob(object_name)
blob.download_to_filename(download_path)
#functions_framework.cloud_event
def process_data(cloud_event):
print(f"Data: {cloud_event.data}")
object_name = cloud_event.data['name']
bucket_name = cloud_event.data['bucket']
download_path = "/tmp/"
get_file(object_name, bucket_name, download_path)
But is there a way through which I can get the actual contents of the file without using the cloud storage client ?

GCP - Unable to access saved model file on GCP bucket in main.py

I have uploaded a PyTorch checkpoint file 'checkpoint_ic_d161.pth' to a GCP bucket.
I am trying to upload a PyTorch Flask model to GCP App Engine in order to make a simple web app.
But I'm not able to access the model file from the GCP bucket into my main.py in the App Engine.
MODEL_URL = 'https://storage.googleapis.com/end_to_end_challenge_bucket/checkpoint_ic_d161.pth'
def load_checkpoint(filepath):
checkpoint = torch.load(filepath)
model = checkpoint['model']
model.classifier = checkpoint['classifier']
model.load_state_dict(checkpoint['state_dict'])
model.class_to_idx = checkpoint['class_to_idx']
optimizer = checkpoint['optimizer']
epochs = checkpoint['epochs']
for param in model.parameters():
param.requires_grad = False
return model, checkpoint['class_to_idx']
def get_model():
model, class_to_idx = load_checkpoint(MODEL_URL)
model.eval()
return model
I get a FileNotFound error for https://storage.googleapis.com/end_to_end_challenge_bucket/checkpoint_ic_d161.pth although this is the public access to the file in the bucket.
Why is this so, how can I access the model /checkpoint file stored in my GCP bucket, within my main.py for the GCP App Engine?
In order to download and upload to Cloud Storage files you need to use the Cloud Storage libraries. As it is not supported by PyTorch.
pip install --upgrade google-cloud-storage
Upload
from google.cloud import storage
def upload_blob(bucket_name, source_file_name, destination_blob_name):
"""Uploads a file to the bucket."""
# bucket_name = "your-bucket-name"
# source_file_name = "local/path/to/file"
# destination_blob_name = "storage-object-name"
storage_client = storage.Client()
bucket = storage_client.bucket(bucket_name)
blob = bucket.blob(destination_blob_name)
blob.upload_from_filename(source_file_name)
print(
"File {} uploaded to {}.".format(
source_file_name, destination_blob_name
)
)
Download
from google.cloud import storage
def download_blob(bucket_name, source_blob_name, destination_file_name):
"""Downloads a blob from the bucket."""
# bucket_name = "your-bucket-name"
# source_blob_name = "storage-object-name"
# destination_file_name = "local/path/to/file"
storage_client = storage.Client()
bucket = storage_client.bucket(bucket_name)
# Construct a client side representation of a blob.
# Note `Bucket.blob` differs from `Bucket.get_blob` as it doesn't retrieve
# any content from Google Cloud Storage. As we don't need additional data,
# using `Bucket.blob` is preferred here.
blob = bucket.blob(source_blob_name)
blob.download_to_filename(destination_file_name)
print(
"Blob {} downloaded to {}.".format(
source_blob_name, destination_file_name
)
)

Direct to Google Bucket in flask

I share my solution to upload a file to a gcp bucket without saving the file locally.
from google.cloud import storage
#app.route('/upload/', methods=['POST'])
def upload():
if request.method == 'POST':
# FileStorage object wrapper
file = request.files["file"]
if file:
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = app.config['GOOGLE_APPLICATION_CREDENTIALS']
bucket_name = "bucket_name"
storage_client = storage.Client()
bucket = storage_client.bucket(bucket_name)
# Upload file to Google Bucket
blob = bucket.blob(file.filename)
blob.upload_from_string(file.read())

Download file from AWS S3 using Python

I am trying to download a file from Amazon S3 bucket to my local using the below code but I get an error saying "Unable to locate credentials"
Given below is the code I have written:
from boto3.session import Session
import boto3
ACCESS_KEY = 'ABC'
SECRET_KEY = 'XYZ'
session = Session(aws_access_key_id=ACCESS_KEY,
aws_secret_access_key=SECRET_KEY)
s3 = session.resource('s3')
your_bucket = s3.Bucket('bucket_name')
for s3_file in your_bucket.objects.all():
print(s3_file.key) # prints the contents of bucket
s3 = boto3.client ('s3')
s3.download_file('your_bucket','k.png','/Users/username/Desktop/k.png')
Could anyone help me on this?
You are not using the session you created to download the file, you're using s3 client you created. If you want to use the client you need to specify credentials.
your_bucket.download_file('k.png', '/Users/username/Desktop/k.png')
or
s3 = boto3.client('s3', aws_access_key_id=... , aws_secret_access_key=...)
s3.download_file('your_bucket','k.png','/Users/username/Desktop/k.png')
From an example in the official documentation, the correct format is:
import boto3
s3 = boto3.client('s3', aws_access_key_id=... , aws_secret_access_key=...)
s3.download_file('BUCKET_NAME', 'OBJECT_NAME', 'FILE_NAME')
You can also use a file-like object opened in binary mode.
s3 = boto3.client('s3', aws_access_key_id=... , aws_secret_access_key=...)
with open('FILE_NAME', 'wb') as f:
s3.download_fileobj('BUCKET_NAME', 'OBJECT_NAME', f)
f.seek(0)
The code in question uses s3 = boto3.client ('s3'), which does not provide any credentials.
The format for authenticating a client is shown here:
import boto3
client = boto3.client(
's3',
aws_access_key_id=ACCESS_KEY,
aws_secret_access_key=SECRET_KEY,
aws_session_token=SESSION_TOKEN,
)
# Or via the Session
session = boto3.Session(
aws_access_key_id=ACCESS_KEY,
aws_secret_access_key=SECRET_KEY,
aws_session_token=SESSION_TOKEN,
)
And lastly you can also re-use the authenticated session you created to get the bucket, and then download then file from the bucket.
from boto3.session import Session
import boto3
ACCESS_KEY = 'ABC'
SECRET_KEY = 'XYZ'
session = Session(aws_access_key_id=ACCESS_KEY,
aws_secret_access_key=SECRET_KEY)
# session is authenticated and can access the resource in question
session.resource('s3')
.Bucket('bucket_name')
.download_file('k.png','/Users/username/Desktop/k.png')
For others trying to download files from AWS S3 looking for a more user-friendly solution with other industrial-strength features, check out https://github.com/d6t/d6tpipe. It abstracts the S3 functions into a simpler interface. It also supports directory sync, uploading files, permissions and many other things you need to sync files from S3 (and ftp).
import d6tpipe
api = d6tpipe.api.APILocal() # keep permissions locally for security
settings = \
{
'name': 'my-files',
'protocol': 's3',
'location': 'bucket-name',
'readCredentials' : {
'aws_access_key_id': 'AAA',
'aws_secret_access_key': 'BBB'
}
}
d6tpipe.api.create_pipe_with_remote(api, settings)
pipe = d6tpipe.Pipe(api, 'my-files')
pipe.scan_remote() # show all files
pipe.pull_preview() # preview
pipe.pull(['k.png']) # download single file
pipe.pull() # download all files
pipe.files() # show files
file=open(pipe.dirpath/'k.png') # access file
You can setup your AWS profile with awscli to avoid introduce your credentials in the file. First add your profile:
aws configure --profile account1
Then in your code add:
aws_session = boto3.Session(profile_name="account1")
s3_client = aws_session.client('s3')
FileName:
can be any name; with that name; file will be downloaded.
It can be added to any existing local directory.
Key:
Is the S3 file path along with the file name in the end.
It does not start with a backslash.
Session()
It automatically picks the credentials from ~/.aws/config OR ~/.aws/credentials
If not you need to explicitly pass that.
from boto3.session import Session
import boto3
# Let's use Amazon S3
s3 = boto3.resource("s3")
# Print out bucket names to check you have accessibility
# for bucket in s3.buckets.all():
# print(bucket.name)
session = Session()
OR
session = Session(aws_access_key_id="AKIAYJN2LNOU",
aws_secret_access_key="wMyT0SxEOsoeiHYVO3v9Gc",
region_name="eu-west-1")
session.resource('s3').Bucket('bucket-logs').download_file(Key="logs/20221122_0_5ee03da676ac566336e2279decfc77b3.gz", Filename="/tmp/Local_file_name.gz")

Writing data to google cloud storage using python

I cannot find a way to to write a data set from my local machine into the google cloud storage using python. I have researched a a lot but didn't find any clue regarding this. Need help, thanks
Quick example, using the google-cloud Python library:
from google.cloud import storage
def upload_blob(bucket_name, source_file_name, destination_blob_name):
"""Uploads a file to the bucket."""
storage_client = storage.Client()
bucket = storage_client.get_bucket(bucket_name)
blob = bucket.blob(destination_blob_name)
blob.upload_from_filename(source_file_name)
print('File {} uploaded to {}.'.format(
source_file_name,
destination_blob_name))
More examples are in this GitHub repo: https://github.com/GoogleCloudPlatform/python-docs-samples/blob/master/storage/cloud-client
When we want to write a string to a GCS bucket blob, the only change necessary is using blob.upload_from_string(your_string) rather than blob.upload_from_filename(source_file_name):
from google.cloud import storage
def write_to_cloud(your_string):
client = storage.Client()
bucket = client.get_bucket('bucket123456789')
blob = bucket.blob('PIM.txt')
blob.upload_from_string(your_string)
In the earlier answers, I still miss the easiest way, using the open() method.
You can use the blob.open() as follows:
from google.cloud import storage
def write_file():
client = storage.Client()
bucket = client.get_bucket('bucket-name')
blob = bucket.blob('path/to/new-blob-name.txt')
## Use bucket.get_blob('path/to/existing-blob-name.txt') to write to existing blobs
with blob.open(mode='w') as f:
for line in object:
f.write(line)
You can find more examples and snippets here:
https://github.com/googleapis/python-storage/tree/main/samples/snippets
from googleapiclient import discovery
from oauth2client.client import GoogleCredentials
credentials = GoogleCredentials.get_application_default()
service = discovery.build('storage', 'v1', credentials=credentials)
filename = 'file.csv'
bucket = 'Your bucket name here'
body = {'name': 'file.csv'}
req = service.objects().insert(bucket=bucket, body=body, media_body=filename)
resp = req.execute()
from google.cloud import storage
def write_to_cloud(buffer):
client = storage.Client()
bucket = client.get_bucket('bucket123456789')
blob = bucket.blob('PIM.txt')
blob.upload_from_file(buffer)
While Brandon's answer indeed gets the file to Google cloud, it does this by uploading the file, as opposed to writing the file. This means that the file needs to exist on your disk before you upload it to the cloud.
My proposed solution uses an "in-memory" payload (the buffer parameter) which is then written to cloud. To write the content you need to use upload_from_file instead of upload_from_filename, everything else being the same.

Resources