Opening two different format files with a single statement - python-3.x

I am Reading 2 files .txt and .tsv, i had 2 different methods to read these type of files but i want to do it in a single way. for .tsv file we have to pass an argument delimiter and for .txt files it does not need any argument. How can i pass delimiter argu? so that i would not need separate functions.
class DataReader:
def __init__(self, **type="tsv"**):
self.weather_records = []
def read(self, path: str, file: str):
with open(path + file) as file:
for line in csv.DictReader(file, delimiter="\t"):
self.__load(line)

You can try this. I have added comments in the code below.
class DataReader:
def __init__(self, type="tsv"): #type doesn't matter for file reading anymore
self.weather_records = []
def read(self, path, file):
with open(path + file) as file:
if file.endswith('.tsv'):
#do something with delimiter
#You could also set the type here, if you want to use it later
else:
#do without delimiter.

Related

List is empty when appending when using recursion

I have two functions. The first one is used to get a list of paths to text files, and the second one is used to iterate over this list of paths and then check if they include the word password. But because of the Try Except statement in the second function, I had to use recursion to make it continue running unless there's another way if possible to provide below. My problem is that the list returned in the second function is empty why and how to fix it?
def search_txt():
"""Function to search the C:\\ for .txt files -> then add them (including full path to file) to a list."""
list_of_txt = []
for dir_path, sub_dir, files in os.walk("C:\\"):
"""Method 1 -> checks the end of the file name (could be used for specific extensions)"""
for file in files:
if file.endswith(".txt"):
list_of_txt.append(os.path.join(dir_path, file))
return list_of_txt
def search_pass_file(list_of_files: list):
"""Function to iterate over each text file, searching if the word "password" is included -> Returns the text
file's path """
list_of_pass = []
if len(list_of_files) != 0:
for i in range(len(list_of_files)):
file = list_of_files.pop()
try:
with open(file, encoding="utf8") as f:
for line in f.readlines():
if "password" in line:
list_of_pass.append(file)
except UnicodeDecodeError:
return search_pass_file(list_of_files)
except PermissionError:
return search_pass_file(list_of_files)
else:
return list_of_pass
if __name__ == '__main__':
myList = search_txt()
print(search_pass_file(myList))
You're returning list_of_pass only if len(list_of_files) == 0 (it's in the else block). Your return statement should occur after the loop (which should be a while one btw)
You can except several errors in one line by putting them in parenthesis: except (UnicodeDecodeError, PermissionError) of except all exceptions (for instance, you're not handling FileNotFoundError).
I'd reduce your function to:
def search_pass_file(list_of_files: list):
"""Function to iterate over each text file, searching if the word "password" is included -> Returns the text
file's path """
list_of_pass = []
while list_of_files:
file = list_of_files.pop()
try:
with open(file, encoding="utf8") as f:
for line in f.readlines():
if "password" in line:
list_of_pass.append(file)
break
except Exception:
list_of_pass += search_pass_file(list_of_files)
return list_of_pass
Edit: also in your except block, you should append the returned value of the recursive function to list_of_pass otherwise you'll lose the files found after the error occurs.

How to best manage file either gz or not?

Hi I wonder if there is a better way in terms of code readability and repetition.
I have a large file that do not fit in memory. The file is either compressed .gz or not.
If it is compressed I need to open it using gzip from standard lib.
I am not sure the code I ended up is the best way to deal with that situation.
import gzip
from Path import pathlib
def parse_open_file(openfile):
"""parse the content of the file"""
return
def parse_file(file_: Path):
if file.suffix == ".gz":
with gzip.open(file_, 'rb') as f:
parse_open_file(f)
else:
with open(file_, 'rb') as f:
parse_open_file(f)
One way to handle this is to assign either open or gzip.open to a variable, depending on file type, then use that as an 'alias' in the with statement. For example:
if file.suffix == ".gz":
myOpen = gzip.open
else:
myOpen = open
with myOpen(file_, 'rb') as f:
parse_open_file(f)

How to copy from zip file to a folder without unzipping it?

How to make this code works?
There is a zip file with folders and .png files in it. Folder ".\icons_by_year" is empty. I need to get every file one by one without unzipping it and copy to the root of the selected folder (so no extra folders made).
class ArrangerOutZip(Arranger):
def __init__(self):
self.base_source_folder = '\\icons.zip'
self.base_output_folder = ".\\icons_by_year"
def proceed(self):
self.create_and_copy()
def create_and_copy(self):
reg_pattern = re.compile('.+\.\w{1,4}$')
f = open(self.base_source_folder, 'rb')
zfile = zipfile.ZipFile(f)
for cont in zfile.namelist():
if reg_pattern.match(cont):
with zfile.open(cont) as file:
shutil.copyfileobj(file, self.base_output_folder)
zfile.close()
f.close()
arranger = ArrangerOutZip()
arranger.proceed()
shutil.copyfileobj uses file objects for source and destination files. To open the destination you need to construct a file path for it. pathlib is a part of the standard python library and is a nice way to handle file paths. And ZipFile.extract does some of the work of creating intermediate output directories for you (plus sets file metadata) and can be used instead of copyfileobj.
One risk of unzipping files is that they can contain absolute or relative paths outside of the target directory you intend (e.g., "../../badvirus.exe"). extract is a bit too lax about that - putting those files in the root of the target directory - so I wrote a little something to reject the whole zip if you are being messed with.
With a few tweeks to make this a testable program,
from pathlib import Path
import re
import zipfile
#import shutil
#class ArrangerOutZip(Arranger):
class ArrangerOutZip:
def __init__(self, base_source_folder, base_output_folder):
self.base_source_folder = Path(base_source_folder).resolve(strict=True)
self.base_output_folder = Path(base_output_folder).resolve()
def proceed(self):
self.create_and_copy()
def create_and_copy(self):
"""Unzip files matching pattern to base_output_folder, raising
ValueError if any resulting paths are outside of that folder.
Output folder created if it does not exist."""
reg_pattern = re.compile('.+\.\w{1,4}$')
with open(self.base_source_folder, 'rb') as f:
with zipfile.ZipFile(f) as zfile:
wanted_files = [cont for cont in zfile.namelist()
if reg_pattern.match(cont)]
rebased_files = self._rebase_paths(wanted_files,
self.base_output_folder)
for cont, rebased in zip(wanted_files, rebased_files):
print(cont, rebased, rebased.parent)
# option 1: use shutil
#rebased.parent.mkdir(parents=True, exist_ok=True)
#with zfile.open(cont) as file, open(rebased, 'wb') as outfile:
# shutil.copyfileobj(file, outfile)
# option 2: zipfile does the work for you
zfile.extract(cont, self.base_output_folder)
#staticmethod
def _rebase_paths(pathlist, target_dir):
"""Rebase relative file paths to target directory, raising
ValueError if any resulting paths are not within target_dir"""
target = Path(target_dir).resolve()
newpaths = []
for path in pathlist:
newpath = target.joinpath(path).resolve()
newpath.relative_to(target) # raises ValueError if not subpath
newpaths.append(newpath)
return newpaths
#arranger = ArrangerOutZip('\\icons.zip', '.\\icons_by_year')
import sys
try:
arranger = ArrangerOutZip(sys.argv[1], sys.argv[2])
arranger.proceed()
except IndexError:
print("usage: test.py zipfile targetdir")
I'd take a look at the zipfile libraries' getinfo() and also ZipFile.Path() for construction since the constructor class can also use paths that way if you intend to do any creation.
Specifically PathObjects. This is able to do is to construct an object with a path in it, and it appears to be based on pathlib. Assuming you don't need to create zipfiles, you can ignore this ZipFile.Path()
However, that's not exactly what I wanted to point out. Rather consider the following:
zipfile.getinfo()
There is a person who I think is getting at this exact situation here:
https://www.programcreek.com/python/example/104991/zipfile.getinfo
This person seems to be getting a path using getinfo(). It's also clear that NOT every zipfile has the info.

I need an easiest way to alter the contents of a file using a python script

I have a python file "main.py" which has some code inside it I need a python simple function to find and replace a certain line of code in it.
Example: main.py contains
#receiver(post_save, sender=User)
def index_user(sender, instance, **kwargs):
if validate_checks(instance):
index_model(instance)
I need a function to change the above code into
# #receiver(post_save, sender=User)
def index_user(sender, instance, **kwargs):
if validate_checks(instance):
index_model(instance)
Read the entire file into memory, make the changes you want, then write those changes to the same file.
So, a basic piece of code that does what you want:
def replace_and_write(fn: str, exact_match: str, replacement: str) -> None:
with open(fn, "r") as f_in:
contents = f_in.read()
with open(fn, "w") as f_out:
f_out.write(contents.replace(exact_match, replacement))
replace_and_write("test.txt", "foo", "Hello, World!")
Running this on a file that used to look like:
foo
bar
baz
Will change it to:
Hello, World!
bar
baz

Determine if a path is valid in a class constructor

Without violating the guideline that a constructor should do work, I need to determine if the provided string (destination_directory) is a valid path before assigning it in the constructor.
It doesn't have to exist, but the provide string must be a valid one, i.e. no invalid symbols, or illegal characters. My project will run on Windows only, not Linux.
I looked at this page, but the answers seem to try and open the directory to test if the provided string is valid.
I also tried os.path.isabs(path)but it doesn't provide the results I require. For example, it says that T:\\\\Pictures is a absolute path, while that may be true, the \\\\ should mean the path is invalid.
Is there a clean, perhaps one line way of achieving what I want?
def __init__(self, destination_directory: str)
self._validate_path(path=destination_directory)
self.destination_directory = destination_directory
def _validate_path(self, path)
# code to validate path should go here.
We now a few things about a path, it contains at least a drive letter and subdirectories.
We also have rules about what symbols are not allowed in directories. We also know that a drive letter contains a single character.
Instead of allowing users of our class to pass in a full path, we break it down and only allow valid strings for directories names and one letter for the drive. When everything is validated, we can use the os module to build our path.
Here is how I would structure my Folder class:
class Folder:
def __init__(self, *subdirectories, root_drive):
self._validate_drive_letter(letter = root_drive)
self._validate_path(path=subdirectories)
self._root_drive = root_drive
self._subdirectories = subdirectories
def _validate_drive_letter(self, letter):
if not letter or len(letter) > 2 or not letter.isalpha():
raise ValueError("Drive letter is invalid")
def _validate_path(self, path):
self._forbidden_characters = ["<", ">", ":", "/", '"', "|", "?", "*", '\\']
for character in path:
for item in character:
if item in self._forbidden_characters:
raise ValueError("Directory cannot contain invalid characters")
def construct_full_path(self) -> str:
# use the os module and constructor parameters to build a valid path
def __str__(self) -> str:
return f"Drive Letter: {self._root_drive} Subdirectories: {self._subdirectories}"
Main:
def main():
try:
portable_drive = Folder("Pictures", "Landscape", root_drive="R") # Valid
# Using the construct_full_path() function, the returned string would be:
# R:\Pictures\Landscape
# Notice the user doesn't provide the : or the \, the class will do it.
vacation_pictures = Folder("Vac??tion", root_drive="T") # Will raise ValueError
# If we fix the error and call construct_full_path() we will get T:\Vacation
except ValueError as error:
print(error)
else:
print(portable_drive)
print(vacation_pictures)
if __name__ == "__main__":
main()
It may not be the best approach, but it works. I know a nested for loop is bad, but I don't see any other way to validate the individual characters of a string.
A regex solution:
import re
windows_path_regex = re.compile(r"""
\A
(?:(?:[a-z]:|\\\\[a-z0-9_.$\●-]+\\[a-z0-9_.$\●-]+)\\| # Drive
\\?[^\\/:*?"<>|\r\n]+\\?) # Relative path
(?:[^\\/:*?"<>|\r\n]+\\)* # Folder
[^\\/:*?"<>|\r\n]* # File
\Z
""", re.VERBOSE|re.I)
d = windows_path_regex .match(r"\test\txt.txt")
print(bool(d))
Note that\ is a valid path but / is not.
I used 8.18. Validate Windows Paths as a reference.

Resources