Downloaded GZ files is showing 0 byte - python-3.x

I am using OCI Python SDK and when i am trying to download an object (from an OCI bucket) which is GZ format it is getting downloaded but the file size is zero byte. Attaching the code
Any help is much appriciable.
import os
import oci
import io
import sys
reporting_namespace = 'xygabcdef'
prefix_file = "abc/xyz"
# Update these values
destination_path = 'downloaded_reports'
# Make a directory to receive reports
if not os.path.exists(destination_path):
os.mkdir(destination_path)
# Get the list of reports
config = oci.config.from_file(oci.config.DEFAULT_LOCATION, oci.config.DEFAULT_PROFILE)
reporting_bucket = sys.argv[1]
object_storage = oci.object_storage.ObjectStorageClient(config)
report_bucket_objects = object_storage.list_objects(reporting_namespace, reporting_bucket, prefix=prefix_file)
#def download_audit():
for o in report_bucket_objects.data.objects:
print('Found file ' + o.name)
object_details = object_storage.get_object(reporting_namespace, reporting_bucket, o.name)
print (object_details)
filename = o.name.rsplit('/', 1)[-1]
with open(destination_path + '/' + filename, 'wb') as f:
for chunk in object_details.data.raw.stream(1024 * 1024, decode_content=False):
f.write(chunk)

Please see the example here. Does this work for you? Namely:
get_obj = object_storage.get_object(namespace, bucket_name, example_file_object_name)
with open('example_file_retrieved', 'wb') as f:
for chunk in get_obj.data.raw.stream(1024 * 1024, decode_content=False):
f.write(chunk)
In your example destintation_path seems to be undefined, and seems to have a typo (destintation -> destination). Could this be the problem?
Lastly, what does object_details report the file size / content-length as? It could be that the file size of the object in Object Storage is itself 0 bytes.

the .content from the .data of get_object should give you the file data (binary or text/josn/...), so here is a modified version of your code:
import os
import sys
import oci
reporting_namespace = 'xygabcdef'
prefix_file = "abc/xyz"
# Update these values
destination_path = 'downloaded_reports'
# Get the list of reports
config = oci.config.from_file(oci.config.DEFAULT_LOCATION, oci.config.DEFAULT_PROFILE)
reporting_bucket = sys.argv[1]
object_storage = oci.object_storage.ObjectStorageClient(config)
objects = object_storage.list_objects(reporting_namespace, reporting_bucket, prefix=prefix_file).data
# def download_audit():
for obj in objects:
print('Found file ' + obj.name)
object_response = object_storage.get_object(reporting_namespace, reporting_bucket, obj.name).data
print(object_response)
file_path = os.path.join(destination_path, obj.name)
# Make sure parent dirs up to the file level are created
os.makedirs(os.path.dirname(file_path), exist_ok=True)
with open(file_path, 'wb') as file:
file.write(object_response.content)

Related

Automation via python

Completely new to python so forgive me if this a dumb question.
Part of my working tasks is to upgrade the IOS on various Cisco routers and switches.
The most mind numbing part of this is comparing the pre change config with the post change config.
I use ExamDiff for this but with up to 100 devices each night this gets soul destroying.
Is it possible to get python to open ExamDiff and automatically compare the pre and post checks, saving the differences to a file for each device?
I know I can use the import os command to open ExamDiff but I have no idea how to get ExamDiff to work
Can someone point me in the right direction?
Thanks
I got this..........
Works pretty well
#!/usr/bin/python
import os
path = input("Enter the files location: ")
def nexus():
rootdir = path + os.sep
filelist = os.listdir(rootdir)
if filelist:
for file in filelist:
if 'pre' in file:
prefile = file
postfile = file.replace('pre', 'post')
resultfile = file.replace('pre', 'report')
if postfile in filelist:
prefile = rootdir + prefile
postfile = rootdir + postfile
resultfile = rootdir + resultfile
compare(prefile, postfile, resultfile)
else:
print('No corresponding "post"-file to {0}.'.format(prefile))
else:
print('No files found.')
def compare(file1loc, file2loc, comparefileloc):
with open(file1loc, 'r') as file1:
file1lines = file1.readlines()
file1lines = [x.strip() for x in file1lines] # getting rid of whitespace and breaks
with open(file2loc, 'r') as file2:
file2lines = file2.readlines()
file2lines = [x.strip() for x in file2lines] # getting rid of whitespace and breaks
with open(comparefileloc, 'w') as comparefile:
comparefile.write('===== IN FILE 1 BUT NOT FILE 2 =====\r\n')
for file1line in file1lines:
if not file1line in file2lines:
comparefile.write(file1line + '\r\n')
comparefile.write('\r\n')
comparefile.write('===== IN FILE 2 BUT NOT FILE 1 =====\r\n')
for file2line in file2lines:
if not file2line in file1lines:
comparefile.write(file2line + '\r\n')
if __name__ == '__main__':
nexus()

Change order in filenames in a folder

I need to rename a bunch of files in a specific folder. They all end with date and time, like for example "hello 2019-05-22 1310.txt" and I want the date and time for each file to be first so I can sort them. With my code I get an error and it wont find my dir where all files are located. What is wrong with the code?
import os
import re
import shutil
dir_path = r'C:\Users\Admin\Desktop\Testfiles'
comp = re.compile(r'\d{4}-\d{2}-\d{2}')
for file in os.listdir(dir_path):
if '.' in file:
index = [i for i, v in enumerate(file,0) if v=='.'][-1]
name = file[:index]
ext = file[index+1:]
else:
ext=''
name = file
data = comp.findall(name)
if len(data)!=0:
date= comp.findall(name)[0]
rest_name = ' '.join(comp.split(name)).strip()
new_name = '{} {}{}'.format(date,rest_name,'.'+ext)
print('changing {} to {}'.format(name, new_name))
shutil.move(os.path.join(dir_path,name), os.path.join(dir_path, new_name))
else:
print('file {} is not change'.format(name))

How to open and append nested zip archives into dataframe without extracting?

I am trying to open a large number of csv files which found in several layers of zip files. Given the nature of this project, I am trying to open, read_csv them into a dataframe, append that data to an aggregate dataframe then continue through the loop.
Example: Folder Directory/First Zip/Second Zip/Third Zip/csv file.csv
My existing code can loop through the contents of the second and third zip file and get the name of each csv file. I am aware that this code can probably be made more simple by importing glob, but I'm unfamiliar.
import os
import pandas as pd
import zipfile, re, io
directory = 'C:/Test/'
os.chdir(directory)
fname = "test" + ".zip"
with zipfile.ZipFile(fname, 'r') as zfile:
# second level of zip files
for zipname in zfile.namelist():
if re.search(r'\.zip$', zipname) != None:
zfiledata = io.BytesIO(zfile.read(zipname))
# third level of zip files
with zipfile.ZipFile(zfiledata) as zfile2:
for zipname2 in zfile2.namelist():
# this zipfile contains xml and csv contents. This filters out the xmls
if zipname2.find("csv") > 0:
zfiledata2 = io.BytesIO(zfile2.read(zipname2))
with zipfile.ZipFile(zfiledata2) as zfile3:
fullpath = directory + fname + "/" + zipname + "/" + zipname2 + "/"
# csv file names are always the same as their zips. this cleans the string.
csvf = zipname2.replace('_csv.zip',".csv")
filehandle = open(fullpath, 'rb')
# the above statement is erroring: FileNotFoundError: [Errno 2] No such file or directory:
zfilehandle = zipfile.ZipFile(filehandle)
data = []
csvdata = StringIO.StringIO(zfilehandle.read(csvf))
df = pd.read_csv(csvdata)
data.append(df)
print(data.head())

Save CSV for every functioncall with another name

at the moment I am able to create one CSV file with all the content I get at once.
Now I would like to create a list where I have different names in it.
How can I produce for every functioncall a different CSV file name? I thought about looping a list but I just want a +1 iteration at each call. I thought about saving my state somehow and use it in next functioncall. Everytime I initialize my variable with 0 and so I don't get 1. I think I could do it with Python Function Parameter calls but I have no idea how to use it. Can someone give me a little tip or example? If there are better ideas (maybe my idea is totally bullshit), how to solve this, just help please.
The comments in the code shall represent my imagination.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from tenable.sc import SecurityCenter as SC
import os.path
import sys
import getpass
import csv
SC_HOST = '...'
def parse_entry(entry):
split_after_path = ''
ip = entry.get('ip', None)
pluginText = entry.get('pluginText', None)
if 'Path : ' in pluginText:
for line in pluginText.splitlines(0):
if 'Path : ' in line:
split_after_path_in_plugintext = line.split("Path : ",1)[1]
# place = ['place1', 'place2', 'place3', 'place4', 'place5']
# i = 0
# i = i+1
file_exists = os.path.isfile('testfile_path.csv')
# file_exists = os.path.isfile('testfile_path_'+place[i]+'.csv')
data = open('testfile_path.csv', 'a')
# data = open('testfile_path_'+place[i]+'.csv', 'a')
with data as csvfile:
header = ['IP Address', 'Path']
writer = csv.DictWriter(csvfile, lineterminator='\n', quoting=csv.QUOTE_NONNUMERIC, fieldnames=header)
if not file_exists:
writer.writeheader()
writer.writerow({'IP Address': ip, 'Path': split_after_path})
data.close()
def main():
sc_user = input('[<<] username: ')
sc_pass = getpass.getpass('[<<] password: ')
sc = SC(SC_HOST)
sc.login(sc_user, sc_pass)
# Query API for data
# asset = [12,13,14,25,29]
# i = 0
# assetid = asset[i]
# vuln = sc.analysis.vulns(('pluginID', '=', '25072')('asset','=','assetid'))
# i = i+1
vuln = sc.analysis.vulns(('pluginID', '=', '25072'),('asset','=','11'))
for entry in vuln:
parse_entry(entry)
sc.logout()
return 0
if __name__ == '__main__':
sys.exit(main())
The simplest and most obvious solution is to pass the full file path to your parse_entry function, ie:
def parse_entry(entry, filepath):
# ...
if 'Path : ' in pluginText:
for line in pluginText.splitlines(0):
if 'Path : ' in line:
# ...
file_exists = os.path.isfile(filepath)
with open(filepath, 'a') as csvfile:
# ...
Then in main() use enumerate() to build sequential filenames:
def main():
# ...
for i, entry in enumerate(vuln):
path = "'testfile_path{}.csv".format(i)
parse_entry(entry, path)
You can use a function attribute to keep track of the number of times the function has been called.
def parse_entry(entry):
parse_entry.i += 1
# outside the function you have to initialize the attribute
parse_entry.i = 0
Or you can look at other ways to initialize the function attribute in this post.
Alternatively, you can use glob to get the current number of files.
from glob import glob
i = len(glob('testfile_path_*.csv'))

Split a zip archive into multiple chunks

I'm trying to create a zip archive of a possibly huge folder.
For this purpose I'm using the python zipfile module, but as far as I can see there is no option to split the created archive into multiple chunks with a max size.
The zipped archive is supposed to be sent via Telegram, which has a size limitation of 1.5 GB per file. Thereby I need to split the resulting zip archive.
I would really like to not use a subprocess and shell commands for creating this archive.
My current code looks like this:
def create_zip(archive_name, directory):
"""Create a zip file from given dir path."""
with ZipFile(archive_name, "w", ZIP_LZMA) as target_zip_file:
for root, _, files in os.walk(directory):
for file_to_zip in files:
absolute_path = os.path.join(root, file_to_zip)
zip_file_name = absolute_path[len(directory) + len(os.sep):]
target_zip_file.write(absolute_path, zip_file_name)
return target_zip_file
Thanks in Advance
Here is what i use to send file to telegram channel by telegram bot.
The file size limit is 50MB in upload by telegram bot
The file size limit is 1500MB in upload by telegram client but you may add some text or other info so 1495 is more safe
#! /usr/bin/python3
# -*- coding:utf-8 -*-
# apt-get install p7zip-full
import subprocess
import os
import math
import logzero
logger = logzero.logger
MAX_SPLIT_SIZE = 1495
def file_split_7z(file_path, split_size=MAX_SPLIT_SIZE):
file_path_7z_list = []
# if origin file is 7z file rename it
origin_file_path = ""
if os.path.splitext(file_path)[1] == ".7z":
origin_file_path = file_path
file_path = os.path.splitext(origin_file_path)[0] + ".7zo"
os.rename(origin_file_path, file_path)
# do 7z compress
fz = os.path.getsize(file_path) / 1024 / 1024
pa = math.ceil(fz / split_size)
head, ext = os.path.splitext(os.path.abspath(file_path))
archive_head = "".join((head, ext.replace(".", "_"))) + ".7z"
for i in range(pa):
check_file_name = "{}.{:03d}".format(archive_head, i + 1)
if os.path.isfile(check_file_name):
logger.debug("remove exists file | {}".format(check_file_name))
os.remove(check_file_name)
cmd_7z = ["7z", "a", "-v{}m".format(split_size), "-y", "-mx0", archive_head, file_path]
proc = subprocess.Popen(cmd_7z, shell=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
out, err = proc.communicate()
if b"Everything is Ok" not in out:
logger.error("7z output | {}".format(out.decode("utf-8")))
logger.error("7z error | {}".format(err.decode("utf-8")))
return file_path_7z_list
for i in range(pa):
file_path_7z_list.append("{}.{:03d}".format(archive_head, i + 1))
# if origin file is 7z file rename it back
if origin_file_path:
os.rename(file_path, origin_file_path)
return file_path_7z_list
def do_file_split(file_path, split_size=MAX_SPLIT_SIZE):
"""caculate split size
example max split size is 1495 file size is 2000
than the split part num should be int(2000 / 1495 + 0.5) = 2
so the split size should be 1000 + 1000 but not 1495 + 505
with the file size increase the upload risk would be increase too
"""
file_size = os.path.getsize(file_path) / 2 ** 20
split_part = math.ceil(file_size / split_size)
new_split_size = math.ceil(file_size / split_part)
logger.info("file size | {} | split num | {} | split size | {}".format(file_size, split_part, new_split_size))
file_path_7z_list = file_split_7z(file_path, split_size=new_split_size)
return file_path_7z_list
In case you don't find a better, native way with zipfile, you could still write the file splitting algorithm yourself. Something like this:
outfile = archive_name
packet_size = int(1.5 * 1024**3) # bytes
with open(outfile, "rb") as output:
filecount = 0
while True:
data = output.read(packet_size)
print(len(data))
if not data:
break # we're done
with open("{}{:03}".format(outfile, filecount), "wb") as packet:
packet.write(data)
filecount += 1
And similar to put it back together on the receiver's side.

Resources