Exclude directories in a List Comprehension - python-3.x

I want to get a list of all picture files in a directory, excluding certain subdirectories.
I have a List Comprehension I normally use to extract files, which works, but includes subdirectories I do not want.
This is on macOS and 'Photos Library.photoslibrary' is "package".The contents are normally hidden by the OS and the library appears to the user as a file, but to Unix this is just a normal directory which contains a massive number of files.
I have attempted to exclude the directory, as os.walk() describes, but my attempts all produce syntax errors.
the caller can modify the dirnames list in-place
(e.g., via del or slice assignment), and walk will only recurse into the
subdirectories whose names remain in dirnames
Is it possible to exclude within a List Comprehension
#!/usr/bin/python3
import os
pdir = "/Users/ian/Pictures"
def get_files(top, extension=".jpg"):
"""
For each directory in the directory tree rooted at top,
return all files which match extension.
"""
files = [os.path.join(dirname, filename)
for dirname, dirnames, filenames in os.walk(top)
# if 'Photos Library.photoslibrary' in dirnames:
# dirnames.remove('Photos Library.photoslibrary')
for filename in filenames
if filename.endswith(extension)
if 'Photos Library.photoslibrary' in dirnames:
dirnames.remove('Photos Library.photoslibrary')
]
return files
for file in get_files(pdir, (".JPG", ".JPEG", ".jpg", ".jpeg")):
print(file)

I couldn't get a List Comprehension to work, so I modified the code to a Generator Function, and make a List from the result.
The code below works.
def get_files(top, exclude=None, extension=".jpg"):
"""
For each directory in the directory tree rooted at top,
return all files which match extension.
exclude is an optional string or tuple/list of strings
to exclude named subdirectories.
"""
for dirname, dirnames, filenames in os.walk(top):
if(exclude is not None):
if(type(exclude) == str): # prevent Python treating str as sequence
if exclude in dirnames:
dirnames.remove(exclude)
else:
for excl in exclude:
if excl in dirnames:
dirnames.remove(excl)
for filename in filenames:
if filename.endswith(extension):
yield(os.path.join(dirname, filename))
for file in get_files(pdir, ('Photos Library.photoslibrary', 'iPhoto Library.photolibrary'), (".JPG", ".JPEG", ".jpg", ".jpeg")):
print(file)
The type test for exclude is inelegant, but Python polymorphism otherwise misinterprets strings,

Related

How to get the folder path using folder name from the system in Python using Tkinter

I want to get the complete folder path by searching with the particular folder name using python
Input
example input foldername: Myfile_data
I want to search with the folder name in entire sys and get where the particular folder is located
expected Output
example complete folder path:C:\Users\DELL\Documents\Myfile_data
please help out with this.
Thank you in advance
This searches the entire OS for files or directories.
import os, re
def find_files(filename, search_path, files_or_dirs='files', is_regex=False):
result = []
# Walking top-down from the root
for root, dirs, files in os.walk(search_path):
if files_or_dirs == 'files':
for file in files:
if re.search(filename, file) if is_regex else file == filename:
result.append(os.path.join(root, file))
else:
for dir in dirs:
if re.search(filename, dir) if is_regex else dir == filename:
result.append(os.path.join(root, dir))
return result
To use it, just do:
results = find_files('file.txt', 'C:\\', 'files')
print(results)
# will print something like
# ['C:\example\file.txt', ...]
To search directories instead of files, do this:
results = find_files('mydir', 'C:\\', 'dirs')
print(results)
# will print something like
# ['C:\example\mydir\', ...]
To search files with a regex, do this:
results = find_files(r'.+\.txt', 'C:\\', 'files', is_regex=True)
print(results)
# will print something like
# ['C:\example\file.txt', 'C:\Users\DELL\example.txt', ...]
This searches the entire OS for files or directories but returns a generator, which basically means you can use it in a for ... in ... loop to get live results without waiting for the search to finish.
def find_files_gen(filename, search_path, files_or_dirs='files'):
# Walking top-down from the root
for root, dirs, files in os.walk(search_path):
if files_or_dirs == 'files':
for file in files:
if file == filename:
yield os.path.join(root, file)
else:
for dir in dirs:
if dir == filename:
yield os.path.join(root, dir)
To use this method, like this:
for result in find_files_gen('file.txt', 'C:\\', 'files'): # or 'dirs'
print(result)

Having trouble using zipfile.ZipFile.extractall (Already read the docs)

I have a folder with many zipfiles, most of these zipfiles contain shapefiles and some of them have subfolders which contain zipfiles that contain shapefiles. I am trying to extract everything into one main folder wihtout keeping any folder structure. This is where I am now;
import os, zipfile
def getListOfFiles(dirName):
# create a list of file and sub directories
# names in the given directory
listOfFile = os.listdir(dirName)
allFiles = list()
# Iterate over all the entries
for entry in listOfFile:
# Create full path
fullPath = os.path.join(dirName, entry)
# If entry is a directory then get the list of files in this directory
if os.path.isdir(fullPath):
allFiles = allFiles + getListOfFiles(fullPath)
else:
allFiles.append(fullPath)
return allFiles
def main():
dirName = r'C:\Users\myusername\My_Dataset'
# Get the list of all files in directory tree at given path
listOfFiles = getListOfFiles(dirName)
# Print the files
for elem in listOfFiles:
print(elem)
zipfile.ZipFile.extractall(elem)
print("****************")
if __name__ == '__main__':
main()
This script prints all the shapefiles (including the ones under subfolders). Now I need to extract all these listed shapefiles into one main folder. I try zipfile.ZipFile.extractall(elem) but it doesn't work.
line 1611, in extractall
members = self.namelist()
AttributeError: 'str' object has no attribute 'namelist'
Is the error I'm getting. zipfile.ZipFile.extractall(elem) is the line that doesn't work. I imagine it expects one zipfile but I'm trying to feed it a folder (or a list in this case?)
How would I change this script so that it extracts my listed shapefiles into a folder (preferably a new folder)
You need to make an instance of ZipFile first and use extractall on this instance:
for elem in listOfFiles:
my_zipfile = zipfile.ZipFile(elem)
my_zipfile.extractall()
I have added this code block to my script and it works now.
def getfiles(path):
if os.path.isdir(path):
for root, dirs, files in os.walk(path):
for name in files:
yield os.path.join(root, name)
else:
yield path
fromdir = r"C:\Users\username\My_Dataset\new"
for f in getfiles(fromdir):
filename = str.split(f, '/')[-1]
if os.path.isfile(destination + filename):
filename = f.replace(fromdir, "", 1).replace("/", "_")
# os.rename(f, destination+filename)
shutil.copy2(f, r"C:\Users\username\Documents\flatten")

Return a list of the paths of all the parts.txt files

Write a function list_files_walk that returns a list of the paths of all the parts.txt files, using the os module's walk generator. The function takes no input parameters.
def list_filess_walk():
for dirpath, dirnames, filenames in os.walk("CarItems"):
if 'parts.txt' in dirpath:
list_files.append(filenames)
print(list_files)
return list_files
Currently, list_files is still empty. The output is supposed to look similar to this:
CarItems/Chevrolet/Chevelle/2011/parts.txt
CarItems/Chevrolet/Chevelle/1982/parts.txt
How can I produce this output?
You pretty much have it here--the only adjustments I'd make are:
Make sure list_files is scoped locally to the function to avoid side effects.
Use parameters so that the function can work on any arbitrary path.
Return a generator with the yield keyword which allows for the next file to be fetched lazily.
'parts.txt' in dirpath could be error-prone if the filename happens to be a substring elsewhere in a path. I'd use endswith or iterate over the second item in the tuple that os.walk which is a list of all the items in the current directory, e.g. 'parts.txt' in dirnames.
Along the same line of thought as above, you might want to make sure that your target is a file with os.path.isfile.
Here's an example:
import os
def find_files_rec(path, fname):
for dirpath, dirnames, files in os.walk(path):
if fname in files:
yield f"{dirpath}/{fname}"
if __name__ == "__main__":
print(list(find_files_rec(".", "parts.txt")))

creating corresponding subfolders and writing a portion of the file in new files inside those subfolders using python

I have a folder named "data". It contains subfolders "data_1", "data_2", and "data_3". These subfolders contain some text files. I want to parse through all these subfolders and generate corresponding subfolders with the same name, inside another folder named "processed_data". I want to also generate corresponding files with "processed" as a prefix in the name and want to write all those lines from the original file where "1293" is there in the original files.
I am using the below code but not able to get the required result. Neither the subfolders "data_1", "data_2", and "data_3" nor the files are getting created
import os
folder_name=""
def pre_processor():
data_location="D:\data" # folder containing all the data
for root, dirs, files in os.walk(data_location):
for dir in dirs:
#folder_name=""
folder_name=dir
for filename in files:
with open(os.path.join(root, filename),encoding="utf8",mode="r") as f:
processed_file_name = 'D:\\processed_data\\'+folder_name+'\\'+'processed'+filename
processed_file = open(processed_file_name,"w", encoding="utf8")
for line_number, line in enumerate(f, 1):
if "1293" in line:
processed_file.write(str(line))
processed_file.close()
pre_processor()
You might need to elaborate on the issue you are having; e.g., are the files being created, but empty?
A few things I notice:
1) Your indentation is off (not sure if this is just a copy-paste issue though): the pre_processor function is empty, i.e. you are defining the function at the same level as the declaration, not inside of it.
try this:
import os
folder_name=""
def pre_processor():
data_location="D:\data" # folder containing all the data
for root, dirs, files in os.walk(data_location):
for dir in dirs:
#folder_name=""
folder_name=dir
for filename in files:
with open(os.path.join(root, filename), encoding="utf8",mode="r") as f:
processed_file_name = 'D:\\processed_data\\'+folder_name+'\\'+'processed'+filename
processed_file = open(processed_file_name,"w", encoding="utf8")
for line_number, line in enumerate(f, 1):
if "1293" in line:
processed_file.write(str(line))
processed_file.close()
pre_processor()
2) Check if the processed_data and sub_folders exist; if not, create them first as this will not do so.
Instead of creating the path to the new Folder by hand you could just replace the name of the folder.
Furthermore, you are not creating the subfolders.
This code should work but replace the Linux folder slashes:
import os
folder_name=""
def pre_processor():
data_location="data" # folder containing all the data
for root, dirs, files in os.walk(data_location):
for dir in dirs:
# folder_name=""
folder_name = dir
for filename in files:
joined_path = os.path.join(root, filename)
with open(joined_path, encoding="utf8", mode="r") as f:
processed_folder_name = root.replace("data/", 'processed_data/')
processed_file_name = processed_folder_name+'/processed'+filename
if not os.path.exists(processed_folder_name):
os.makedirs(processed_folder_name)
processed_file = open(processed_file_name, "w", encoding="utf8")
for line in f:
if "1293" in line:
processed_file.write(str(line))
processed_file.close()
pre_processor()

Can I force os.walk to visit directories in alphabetical order?

I would like to know if it's possible to force os.walk in python3 to visit directories in alphabetical order. For example, here is a directory and some code that will walk this directory:
ryan:~/bktest$ ls -1 sample
CD01
CD02
CD03
CD04
CD05
--------
def main_work_subdirs(gl):
for root, dirs, files in os.walk(gl['pwd']):
if root == gl['pwd']:
for d2i in dirs:
print(d2i)
When the python code hits the directory above, here is the output:
ryan:~/bktest$ ~/test.py sample
CD03
CD01
CD05
CD02
CD04
I would like to force walk to visit these dirs in alphabetical order, 01, 02 ... 05. In the python3 doc for os.walk, it says:
When topdown is True, the caller can modify the dirnames list in-place
(perhaps using del or slice assignment), and walk() will only recurse
into the subdirectories whose names remain in dirnames; this can be
used to prune the search, impose a specific order of visiting
Does that mean that I can impose an alphabetical visiting order on os.walk? If so, how?
Yes. You sort dirs in the loop.
def main_work_subdirs(gl):
for root, dirs, files in os.walk(gl['pwd']):
dirs.sort()
if root == gl['pwd']:
for d2i in dirs:
print(d2i)
I know this has already been answered but I wanted to add one little detail and adding more than a single line of code in the comments is wonky.
In addition to wanting the directories sorted I also wanted the files sorted so that my iteration through "gl" was consistent and predictable. To do this one more sort was required:
for root, dirs, files in os.walk(gl['pwd']):
dirs.sort()
for filename in sorted(files):
print(os.path.join(root, filename))
And, with benefit of learning more about Python, a different (better) way:
from pathlib import Path
# Directories, per original question.
[print(p) for p in sorted(Path(gl['pwd']).glob('**/*')) if p.is_dir()]
# Files, like I usually need.
[print(p) for p in sorted(Path(gl['pwd']).glob('**/*')) if p.is_file()]
This answer is not specific to this question and the problem is a little different but the solution can be used in either case.
Consider having these files ("one1.txt", "one2.txt", "one10.txt") and the content of all of them is a String "default":
I want to loop through a directory that contains these files and find a specific String in every file and replace it with the name of the file.
If you use any other methods which have already mentioned here and in other questions (like dirs.sort() and sorted(files) and sorted(dirs), the result will be something like this:
"one1.txt"--> "one10"
"one2.txt"--> "one1"
"one10.txt" --> "one2"
But we want it to be:
"one1.txt"--> "one1"
"one2.txt"--> "one2"
"one10.txt" --> "one10"
I found this method which changes file content alphabetically:
import re, os, fnmatch
def atoi(text):
return int(text) if text.isdigit() else text
def natural_keys(text):
'''
alist.sort(key=natural_keys) sorts in human order
http://nedbatchelder.com/blog/200712/human_sorting.html
(See Toothy's implementation in the comments)
'''
return [ atoi(c) for c in re.split('(\d+)', text) ]
def findReplace(directory, find, replace, filePattern):
count = 0
for path, dirs, files in sorted(os.walk(os.path.abspath(directory))):
dirs.sort()
for filename in sorted(fnmatch.filter(files, filePattern), key=natural_keys):
count = count +1
filepath = os.path.join(path, filename)
with open(filepath) as f:
s = f.read()
s = s.replace(find, replace+str(count)+".png")
with open(filepath, "w") as f:
f.write(s)
Then run this line:
findReplace(os.getcwd(), "default", "one", "*.xml")

Resources