Get the file name from BytesIO zip file - python-3.x

I have this code that loads a zip file from FTP server to the buffer
file_name = 'ftp_file_name.zip'
r = BytesIO()
ftp.retrbinary("RETR " + file_name, r.write)
with zipfile.ZipFile(r, 'r') as myzip:
f = myzip.open('hidden_name.csv')
My problem is I don't know how to get the hidden_name.csv programmatically from inside the zip file.
if I do a r.readline() I get this string which contains the name b'PK\x03\x04\x14\x00\x00\x00\x08\x00\t\x00\x08U\xdeQHH]\xeb.\x00\xa6=\xe3\x00\x17\x00\x00\x00hidden_name.csv\xec\xfd\xd9n;I\x92>\n'
Is there a method to do this?

Related

Finding a file by extension

I am trying to find files with .desktop extension in a specific directory in Python3. I tried the code snippet below but it didn't work as I wanted. I want it to be a single string value.
import os, fnmatch
desktopfile = configparser.ConfigParser ()
def find(pattern, path):
result = []
for root, dirs, files in os.walk(path):
for name in files:
if fnmatch.fnmatch(name, pattern):
result.append(os.path.join(root, name))
return result
script_tmp_dir = "/tmp/appiload/appinstall" # Geçici dizin (dosyalar burada ayıklanıyor)
desktopfilea=f"{script_tmp_dir}/squashfs-root/{str(find ('*.desktop', f'{script_tmp_dir}/squashfs-root/')}"
print(desktopfilea)
desktopfile.items()
Result:
/tmp/appiload/appinstall/squashfs-root/['/tmp/appiload/appinstall/squashfs-root/helloworld.desktop']
Use glob.glob instead of writing a function to do this job.
import os, glob
desktopfile = configparser.ConfigParser ()
script_tmp_dir = "/tmp/appiload/appinstall" # Geçici dizin (dosyalar burada ayıklanıyor)
desktopfilea = glob.glob(f'{script_tmp_dir}/squashfs-root/*.desktop')
# desktopfilea = " ".join(desktopfilea) # Join them in one string, using space as seperator
print(str(desktopfilea))
desktopfile.items()
I don't exactly understand what do you mean but I made a simple program that will print all the files with the .desktop extension and save them to 2 files: applications.json in an array and applications.txt just written one after another.
I also have 2 versions of the program, one that will only print and save only the file names with extensions and other one that will print and save the whole path.
File names only:
import os
import json
strApplications = ""
applications = []
for file in os.listdir(os.path.dirname(os.path.realpath(__file__))):
if file.endswith(".desktop"):
applications.append(file)
with open("applications.json", "w") as f:
json.dump(applications, f)
strApplications = strApplications + file + "\n"
with open("applications.txt", "w") as f:
f.write(strApplications)
print(strApplications)
Full file path:
import os
import json
cwd = os.getcwd()
files = [cwd + "\\" + f for f in os.listdir(cwd) if f.endswith(".desktop")]
with open("applications.json", "w") as f:
json.dump(files, f)
with open("applications.txt", "w") as f:
f.write("\n".join(files))
print("\n".join(files))

Is there a way to download N files from S3 as just one file?

Current code:
#!/usr/bin/python
import boto3
s3=boto3.client('s3')
list=s3.list_objects(Bucket='my_bucket_name')['Contents']
for key in list:
s3.download_file('my_bucket_name', key['Key'], key['Key'])
In the specific path I have N files. This way I download them and then I have also N local files. I just want one single file.
I did:
data=""
files = []
for file in glob.glob("*.json"):
files.append(file)
for file in files:
with open(file) as fp:
data += fp.read()
data += "\n"
with open ('output.json', 'w') as fp:
fp.write(data)
Is there a way to do it faster or even using boto to stream downloaded bytes to a file?

How do I preserve directories when zipping up a file in python?

I have the following function where I convert a directory to bytes, but the directories inside the parent directory are not being preserved. How do I preserve the directories? Here's what I have tried:
buf = io.BytesIO()
zipObj = ZipFile(buf, "w")
with zipObj:
# Iterate over all the files in directory
for folderName, subfolders, filenames in os.walk(path_to_extension_directory):
for filename in filenames:
# create complete filepath of file in directory
filePath = os.path.join(folderName, filename)
with open(f"{folderName}/{filename}", 'rb') as file_data:
bytes_content = file_data.read()
# Add file to zip
zipObj.writestr(filePath, bytes_content)
# Rewind the buffer's file pointer (may not be necessary)
buf.seek(0)
return buf.read()
This returns the bytes of the files but when I open it, all of the files are in the same directory. I thought that adding the filePath as the first parameter in writestr would do it, but it does not. Thanks for your help! Please let me know if I can provide any additional information.
Try replacing
filePath = os.path.join(folderName, filename)
with:
filePath = os.path.relpath(os.path.join(folderName, filename), path_to_extension_directory)

Python3: Index out of range for script that worked before

the attached script returns:
IndexError: list index out of range
for the line starting with values = {line.split (...)
values=dict()
with open(csv) as f:
lines =f.readlines()
values = {line.split(',')[0].strip():line.split(',')[1].strip() for line in lines}
However, I could use it yesterday for doing exactly the same:
replacing certain text in a dir of xml-files with different texts
import os
from distutils.dir_util import copy_tree
drc = 'D:/Spielwiese/00100_Arbeitsverzeichnis'
backup = 'D:/Spielwiese/Backup/'
csv = 'D:/persons1.csv'
copy_tree(drc, backup)
values=dict()
with open(csv) as f:
lines =f.readlines()
values = {line.split(',')[0].strip():line.split(',')[1].strip() for line in lines}
#Getting a list of the full paths of files
for dirpath, dirname, filename in os.walk(drc):
for fname in filename:
#Joining dirpath and filenames
path = os.path.join(dirpath, fname)
#Opening the files for reading only
filedata = open(path,encoding="Latin-1").read()
for k,v in values.items():
filedata=filedata.replace(k,v)
f = open(path, 'w',encoding="Latin-1")
# We are writing the the changes to the files
f.write(filedata)
f.close() #Closing the files
print("In case something went wrong, you can find a backup in " + backup)
I don't see anything weird and I could, as mentioned before use it before ... :-o
Any ideas on how to fix it?
best Wishes,
K

How to I check whether a file already contains the text I want to append?

I am currently working on a project. So I want to read all the *.pdf files in a directory, extract their text and append it to a text file. So far so good. I was able to do this, yeah.
Now the problem: if I am reading the same directory again, it appends the same files again. Is there a way to check whether the extracted text is already in the file and thus, skip the whole thing?
My code for this looks like this right now (I created the directory variable already):
`
for filename in os.listdir(directory):
if filename.endswith(".pdf"):
file = os.path.join(directory, filename)
print(file)
#parse data from file
file_data = parser.from_file(file)
#get files text content
text = file_data['content']
#print(type(text))
print("len ", len(text))
#print(text)
#save to textfile
f = open("test2.txt", "a+", encoding = 'utf-8')
f.write(text)
f.close()
else:
continue
`
Thanks in advance!
One thing you could do is load the file contents and check if the file is within the file:
if text in open("test2.txt"):
# write here
else:
# text is already in file, don't write
However, this is very inefficient. A better way is to create a file with the filenames that you have already written, and check that:
(at the beginning of your code):
files = open("files.txt").readlines()
(before parser.from_file(file)):
if file in files:
continue # don't read or write
(after f.close()):
files.append(file)
(after the whole loop has finished)
with open("files.txt", "w") as f:
f.write("\n".join(files))
Putting it all together:
files = open("files.txt").readlines()
for filename in os.listdir(directory):
if filename.endswith(".pdf"):
file = os.path.join(directory, filename)
if file in files:
continue # don't read or write
print(file)
#parse data from file
file_data = parser.from_file(file)
#get files text content
text = file_data['content']
#print(type(text))
print("len ", len(text))
#print(text)
#save to textfile
f = open("test2.txt", "a+", encoding = 'utf-8')
f.write(text)
f.close()
files.append(file)
else:
continue
with open("files.txt", "a+") as f:
f.write("\n".join(files))
Note that you need to create a file named files.txt in the current directory.

Resources