Getting corrupt zips using Python3 ZipStream in Django - python-3.x

I'm using zipstream from here and have a Django view that returns a zip file of all file attachments which are all hosted on Amazon S3. But the zip files are all coming up as corrupt when I download them, that is, I can't open them.
import io
import zipstream
s = io.BytesIO()
with zipstream.ZipFile(s,"w", compression=zipstream.ZIP_DEFLATED) as zf:
for file_path in file_paths:
file_dir, file_name = os.path.split(file_path)
zf.writestr(file_name, urllib.urlopen(file_path).read())
response = StreamingHttpResponse(s.getvalue(), content_type='application/octet-stream')
response['Content-Disposition'] = 'attachment; filename={}'.format('files.zip')
return response

Instead of zipstream package install aiozipstream package. If you've alreday installed the zipstream package uninstall it first.
pip uninstall zipstream and then do a
pip install aiozipstream
#Do the import in the same way
from zipstream import ZipStream
from django.http import StreamingHttpResponse
def s3_content_generator(key):
#s3_bucket - your s3 bucket name
infile_object = s3.get_object(Bucket=s3_bucket, Key= key)
data = str(infile_object.get('Body', '').read())
yield bytes(data, 'utf-8')
files = []
#filepath - list of s3 keys
for keyin filepath:
files.append({'stream': s3_content_generator(key),'name': 'filename'})
#large chunksize fasten the download process
zf = ZipStream(files, chunksize=32768)
response = StreamingHttpResponse(zf.stream(), content_type='application/zip')
response['Content-Disposition'] = 'attachment; filename={}'.format('files.zip')
return response

Related

Kaggle login and unzip file to store in s3 bucket

Create a lambda function for python 3.7.
Role attached to the lambda function should have S3 access and lambda basic execution.
Read data from https://www.kaggle.com/therohk/india-headlines-news-dataset/download and save into S3 as CSV. file is zip how to unzip and store in temp file
Getting Failed in AWS Lambda function:
Lambda Handler to download news headline dataset from kaggle
import urllib3
import boto3
from botocore.client import Config
http = urllib3.PoolManager()
def lambda_handler(event, context):
bucket_name = 'news-data-kaggle'
file_name = "india-news-headlines.csv"
lambda_path = "/tmp/" +file_name
kaggle_info = {'UserName': "bossdk", 'Password': "xxx"}
url = "https://www.kaggle.com/account/login"
data_url = "https://www.kaggle.com/therohk/india-headlines-news-dataset/download"
r = http.request('POST',url,kaggle_info)
r = http.request('GET',data_url)
f = open(lambda_path, 'wb')
for chunk in r.iter_content(chunk_size = 512 * 1024):
if chunk:
f.write(chunk)
f.close()
data = ZipFile(lambda_path)
# S3 Connect
s3 = boto3.resource('s3',config=Config(signature_version='s3v4'))
# Uploaded File
s3.Bucket(bucket_name).put(Key=lambda_path, Body=data, ACL='public-read')
return {
'status': 'True',
'statusCode': 200,
'body': 'Dataset Uploaded'
}

How to upload video to s3 using API GW and python?

Im trying to make a api which will upload video to s3 . I all ready managed to upload the video in s3, but the problem is the video file is not working . i checked content-type of video file, and it's binary/octet-stream instead on video/mp4 . So i set content-type to "video/mp4" while calling put_object api, but it still not working.
I use Lambda function for putting the video to s3 . here is my lambda code -
import json
import base64
import boto3
def lambda_handler(event, context):
bucket_name = 'ad-live-streaming'
s3_client = boto3.client('s3')
file_content = event['content']
merchantId = event['merchantId']
catelogId = event['catelogId']
file_name = event['fileName']
file_path = '{}/{}/{}.mp4'.format(merchantId, catelogId, file_name)
s3_response = s3_client.put_object(Bucket=bucket_name, Key=file_path, Body=file_content, ContentType='video/mp4')
return {
'statusCode': 200,
"merchantId":merchantId,
"catelogId":catelogId,
"file_name":file_name,
}
Any idea how to solve this issue ?
Based on the example in Upload binary files to S3 using AWS API Gateway with AWS Lambda | by Omer Hanetz | The Startup | Medium, it appears that you need to decode the file from base64:
file_content = base64.b64decode(event['content'])

Octet Stream to PDF Azure Python

I'm trying to upload a PDF to an Azure blob store, and then download and read it. The upload works fine, and when I open it in the Azure Storage Explorer, the file opens fine. However, when I try to download it, I get an Octet stream and I cant figure out how to convert it back into a PDF. Im doing this all through a Function App so I'm not sure if writing everything to a temporary file will help. I tried it and I got a corrupted pdf as my output. My code is as follows.
Upload:
blob_service_client = BlobServiceClient.from_connection_string(connect_str)
container_name = 'testblobstore123'
file = req.files['file']
try:
blob_client = blob_service_client.get_blob_client(container=container_name, blob=file.filename)
blob_client.upload_blob(file)
except Exception as e:
print(e)
return "Unspecified Error"
Download:
blob_service_client = BlobServiceClient.from_connection_string(connect_str)
container_name = 'testblobstore123'
file = req.form['file']
blob_client = blob_service_client.get_blob_client(container=container_name, blob=file)
# data = blob_service_client.get_data_to_text(container_name, file)
data = blob_client.download_blob().readall()
Firstly in my test, I open the file link or download it to local, the content-type is Octet stream, however they are able to read.
Below is my download code.
blob_service_client = BlobServiceClient.from_connection_string(connect_str)
container_name = 'test'
blob_name='nodejschinese.pdf'
download_file_path = os.path.join('D:', blob_name)
blob_client = blob_service_client.get_blob_client(container=container_name, blob=blob_name)
with open(download_file_path, "wb") as download_file:
download_file.write(blob_client.download_blob().readall())
Secondly if you want to change the content-type when y ou upload it, you could add ContentSettings in the upload_blob method.
Below is my upload code with ContentSettings.
from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient,ContentSettings
connect_str='connection string'
blob_service_client = BlobServiceClient.from_connection_string(connect_str)
container_name = 'test'
local_file_name='nodejs.pdf'
upload_file_path='E:\\nodejs.pdf'
blob_client = blob_service_client.get_blob_client(container=container_name, blob=local_file_name)
with open(upload_file_path, "rb") as data:
blob_client.upload_blob(data,content_settings=ContentSettings(content_type='application/pdf'))

How to read the boto3 file object in opencv python3

I am trying to read the AWS S3 presigned uri file with open-CV. But the read parameter is of NoneType. How to read the boto3 file_obj in opencv and process further?
import cv2
import boto3
s3Client = boto3.client('s3')
file_path = s3Client.generate_presigned_url('get_object', Params = {'Bucket':
'www.mybucket.com', 'Key': 'hello.txt'}, ExpiresIn = 100)
img = cv2.imread(file_path)
But it is reading the file as <class 'NoneType'>. But I need it to be read by the cv2.
import urllib2
response = urllib2.urlopen(file_path)
image = response.read()
img = cv2.imread(image)
can you please try this

Unable to read the buffer from BytesIO in google app engine flex environment

Here is the related code
import logging
logging.getLogger('googleapicliet.discovery_cache').setLevel(logging.ERROR)
import datetime
import json
from flask import Flask, render_template, request
from flask import make_response
from googleapiclient.discovery import build
from googleapiclient.http import MediaIoBaseDownload
from oauth2client.client import AccessTokenCredentials
...
#app.route('/callback_download')
def userselectioncallback_with_drive_api():
"""
Need to make it a background process
"""
logging.info("In download callback...")
code = request.args.get('code')
fileId = request.args.get('fileId')
logging.info("code %s", code)
logging.info("fileId %s", fileId)
credentials = AccessTokenCredentials(
code,
'flex-env/1.0')
http = httplib2.Http()
http_auth = credentials.authorize(http)
# Exports a Google Doc to the requested MIME type and returns the exported content. Please note that the exported content is limited to 10MB.
# v3 does not work? over quota?
drive_service = build('drive', 'v3', http=http_auth)
drive_request = drive_service.files().export(
fileId=fileId,
mimeType='application/pdf')
b = bytes()
fh = io.BytesIO(b)
downloader = MediaIoBaseDownload(fh, drive_request)
done = False
try:
while done is False:
status, done = downloader.next_chunk()
logging.log("Download %d%%.", int(status.progress() * 100))
except Exception as err:
logging.error(err)
logging.error(err.__class__)
response = make_response(fh.getbuffer())
response.headers['Content-Type'] = 'application/pdf'
response.headers['Content-Disposition'] = \
'inline; filename=%s.pdf' % 'yourfilename'
return response
It is based on some code example of drive api. I am trying to export some files from google drive to pdf format.
The exception comes from the line
response = make_response(fh.getbuffer())
It throws the exception:
TypeError: 'memoryview' object is not callable
How can I retrieve the pdf content properly from the fh? Do I need to further apply some base 64 encoding?
My local runtime is python 3.4.3
I have used an incorrect API. I should do this instead:
response = make_response(fh.getvalue())

Resources