Direct to Google Bucket in flask - python-3.x

I share my solution to upload a file to a gcp bucket without saving the file locally.
from google.cloud import storage
#app.route('/upload/', methods=['POST'])
def upload():
if request.method == 'POST':
# FileStorage object wrapper
file = request.files["file"]
if file:
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = app.config['GOOGLE_APPLICATION_CREDENTIALS']
bucket_name = "bucket_name"
storage_client = storage.Client()
bucket = storage_client.bucket(bucket_name)
# Upload file to Google Bucket
blob = bucket.blob(file.filename)
blob.upload_from_string(file.read())

Related

loading data from GCS bucket to Sharepoint folder

I am working on a POC where I have to load data from GCS Bucket to a sharePoint Location.
I am using the below code but not able to get desired result.
# Import the storage library
from google.cloud import storage
client = storage.Client()
bucket_name = 'my-bucket'
file_name = 'my-file.csv'
# Download the file from GCS
bucket = client.bucket(bucket_name)
blob = bucket.blob(file_name)
blob.download_to_filename(file_name)
# Import the office365-rest-python-client library
from office365.runtime.auth.authentication_context import AuthenticationContext
from office365.sharepoint.client_context import ClientContext
from office365.sharepoint.files.file import File
# Set the SharePoint site URL
site_url = 'https://7rhjkshshgvavvd.sharepoint.com/sites/MyDemo/testing/'
# Authenticate with SharePoint
context = AuthenticationContext(url=site_url)
if context.acquire_token_for_user(username="XXXXXX", password="XXXXXX"):
print("Authenticated with SharePoint")
else:
print("Failed to authenticate with SharePoint")
# Construct a ClientContext object
client_context = ClientContext(site_url, context)
# Set the path to the file you want to upload
# Upload the file to SharePoint
file_creation_info = File.from_local_file(client_context)
sp_file = file_creation_info.upload()
client_context.execute_query()
print(f'File uploaded to SharePoint: {sp_file.server_relative_url}')

cloud function read storage object data without using the storage client

I have created a simple cloud function with trigger: google.cloud.storage.object.v1.finalized
When a file (.xlsx) is being uploaded to my bucket I want to read it's content.
I am using following method for the same:
import functions_framework
#functions_framework.cloud_event
def process_data(cloud_event):
print(f"Data: {cloud_event.data}")
I am able to print the cloud_event.data but how to I get the actual file which was uploaded
One way that I can do is using the storage client in below manner:
from google.cloud import storage
import functions_framework
def get_file(object_name, bucket_name, download_path):
storage_client = storage.Client()
bucket = storage_client.bucket(bucket_name)
blob = bucket.blob(object_name)
blob.download_to_filename(download_path)
#functions_framework.cloud_event
def process_data(cloud_event):
print(f"Data: {cloud_event.data}")
object_name = cloud_event.data['name']
bucket_name = cloud_event.data['bucket']
download_path = "/tmp/"
get_file(object_name, bucket_name, download_path)
But is there a way through which I can get the actual contents of the file without using the cloud storage client ?

GCP - Unable to access saved model file on GCP bucket in main.py

I have uploaded a PyTorch checkpoint file 'checkpoint_ic_d161.pth' to a GCP bucket.
I am trying to upload a PyTorch Flask model to GCP App Engine in order to make a simple web app.
But I'm not able to access the model file from the GCP bucket into my main.py in the App Engine.
MODEL_URL = 'https://storage.googleapis.com/end_to_end_challenge_bucket/checkpoint_ic_d161.pth'
def load_checkpoint(filepath):
checkpoint = torch.load(filepath)
model = checkpoint['model']
model.classifier = checkpoint['classifier']
model.load_state_dict(checkpoint['state_dict'])
model.class_to_idx = checkpoint['class_to_idx']
optimizer = checkpoint['optimizer']
epochs = checkpoint['epochs']
for param in model.parameters():
param.requires_grad = False
return model, checkpoint['class_to_idx']
def get_model():
model, class_to_idx = load_checkpoint(MODEL_URL)
model.eval()
return model
I get a FileNotFound error for https://storage.googleapis.com/end_to_end_challenge_bucket/checkpoint_ic_d161.pth although this is the public access to the file in the bucket.
Why is this so, how can I access the model /checkpoint file stored in my GCP bucket, within my main.py for the GCP App Engine?
In order to download and upload to Cloud Storage files you need to use the Cloud Storage libraries. As it is not supported by PyTorch.
pip install --upgrade google-cloud-storage
Upload
from google.cloud import storage
def upload_blob(bucket_name, source_file_name, destination_blob_name):
"""Uploads a file to the bucket."""
# bucket_name = "your-bucket-name"
# source_file_name = "local/path/to/file"
# destination_blob_name = "storage-object-name"
storage_client = storage.Client()
bucket = storage_client.bucket(bucket_name)
blob = bucket.blob(destination_blob_name)
blob.upload_from_filename(source_file_name)
print(
"File {} uploaded to {}.".format(
source_file_name, destination_blob_name
)
)
Download
from google.cloud import storage
def download_blob(bucket_name, source_blob_name, destination_file_name):
"""Downloads a blob from the bucket."""
# bucket_name = "your-bucket-name"
# source_blob_name = "storage-object-name"
# destination_file_name = "local/path/to/file"
storage_client = storage.Client()
bucket = storage_client.bucket(bucket_name)
# Construct a client side representation of a blob.
# Note `Bucket.blob` differs from `Bucket.get_blob` as it doesn't retrieve
# any content from Google Cloud Storage. As we don't need additional data,
# using `Bucket.blob` is preferred here.
blob = bucket.blob(source_blob_name)
blob.download_to_filename(destination_file_name)
print(
"Blob {} downloaded to {}.".format(
source_blob_name, destination_file_name
)
)

Kaggle login and unzip file to store in s3 bucket

Create a lambda function for python 3.7.
Role attached to the lambda function should have S3 access and lambda basic execution.
Read data from https://www.kaggle.com/therohk/india-headlines-news-dataset/download and save into S3 as CSV. file is zip how to unzip and store in temp file
Getting Failed in AWS Lambda function:
Lambda Handler to download news headline dataset from kaggle
import urllib3
import boto3
from botocore.client import Config
http = urllib3.PoolManager()
def lambda_handler(event, context):
bucket_name = 'news-data-kaggle'
file_name = "india-news-headlines.csv"
lambda_path = "/tmp/" +file_name
kaggle_info = {'UserName': "bossdk", 'Password': "xxx"}
url = "https://www.kaggle.com/account/login"
data_url = "https://www.kaggle.com/therohk/india-headlines-news-dataset/download"
r = http.request('POST',url,kaggle_info)
r = http.request('GET',data_url)
f = open(lambda_path, 'wb')
for chunk in r.iter_content(chunk_size = 512 * 1024):
if chunk:
f.write(chunk)
f.close()
data = ZipFile(lambda_path)
# S3 Connect
s3 = boto3.resource('s3',config=Config(signature_version='s3v4'))
# Uploaded File
s3.Bucket(bucket_name).put(Key=lambda_path, Body=data, ACL='public-read')
return {
'status': 'True',
'statusCode': 200,
'body': 'Dataset Uploaded'
}

Writing data to google cloud storage using python

I cannot find a way to to write a data set from my local machine into the google cloud storage using python. I have researched a a lot but didn't find any clue regarding this. Need help, thanks
Quick example, using the google-cloud Python library:
from google.cloud import storage
def upload_blob(bucket_name, source_file_name, destination_blob_name):
"""Uploads a file to the bucket."""
storage_client = storage.Client()
bucket = storage_client.get_bucket(bucket_name)
blob = bucket.blob(destination_blob_name)
blob.upload_from_filename(source_file_name)
print('File {} uploaded to {}.'.format(
source_file_name,
destination_blob_name))
More examples are in this GitHub repo: https://github.com/GoogleCloudPlatform/python-docs-samples/blob/master/storage/cloud-client
When we want to write a string to a GCS bucket blob, the only change necessary is using blob.upload_from_string(your_string) rather than blob.upload_from_filename(source_file_name):
from google.cloud import storage
def write_to_cloud(your_string):
client = storage.Client()
bucket = client.get_bucket('bucket123456789')
blob = bucket.blob('PIM.txt')
blob.upload_from_string(your_string)
In the earlier answers, I still miss the easiest way, using the open() method.
You can use the blob.open() as follows:
from google.cloud import storage
def write_file():
client = storage.Client()
bucket = client.get_bucket('bucket-name')
blob = bucket.blob('path/to/new-blob-name.txt')
## Use bucket.get_blob('path/to/existing-blob-name.txt') to write to existing blobs
with blob.open(mode='w') as f:
for line in object:
f.write(line)
You can find more examples and snippets here:
https://github.com/googleapis/python-storage/tree/main/samples/snippets
from googleapiclient import discovery
from oauth2client.client import GoogleCredentials
credentials = GoogleCredentials.get_application_default()
service = discovery.build('storage', 'v1', credentials=credentials)
filename = 'file.csv'
bucket = 'Your bucket name here'
body = {'name': 'file.csv'}
req = service.objects().insert(bucket=bucket, body=body, media_body=filename)
resp = req.execute()
from google.cloud import storage
def write_to_cloud(buffer):
client = storage.Client()
bucket = client.get_bucket('bucket123456789')
blob = bucket.blob('PIM.txt')
blob.upload_from_file(buffer)
While Brandon's answer indeed gets the file to Google cloud, it does this by uploading the file, as opposed to writing the file. This means that the file needs to exist on your disk before you upload it to the cloud.
My proposed solution uses an "in-memory" payload (the buffer parameter) which is then written to cloud. To write the content you need to use upload_from_file instead of upload_from_filename, everything else being the same.

Resources