extracting file content inside multiple folders and printing required folder names - python-3.x

I have files and directory structure like this:
C:\data\4512\required_data\121\a.txt
C:\data\4512\required_data\121\b.txt
C:\data\4512\required_data\456\c.txt
C:\data\4512\required_data\456\d.txt
C:\data\9813\required_data\789\e.txt
C:\data\9813\required_data\789\f.txt
C:\data\9813\required_data\301\g.txt
C:\data\9813\required_data\301\h.txt
I want to print the content of the text files
Also I want to print outer_folder number like 4512, inner folder number like 121 and the file name
I was trying some code like this:
path = "C:\\data"
for root, dirs, files in os.walk(path):
for dir in dirs:
print(dir)
data_location = os.path.join(path, dir, "required_data")
for example, for this case:
C:\data\4512\required_data\121\a.txt
Expected output:
file=open("a.txt")
print(file) # content of file
print("outer_number") # 4512
print("inner_number") # 121
print("name_of_file") # a.txt
I want to do this for all files

Related

Compare the files and find the duplicate queries

How to conpare two directories. First directory contain 5 sql files ,second directory contains 2 sql files .Compare the files and find the duplicate queries using python
import os
import filecmp
Define the paths of the two folders to compare
folder1 = 'path/to/folder1'
folder2 = 'path/to/folder2'
Loop through each file in folder1
for filename in os.listdir(folder1):
file1 = os.path.join(folder1, filename)
file2 = os.path.join(folder2, filename)
# Check if the file exists in folder2 and is a SQL file
if os.path.isfile(file2) and file1.endswith('.sql') and file2.endswith('.sql'):
# Compare the contents of the two files
if filecmp.cmp(file1, file2, shallow=False):
print(f"Duplicate file found: {filename}")

How to get the folder path using folder name from the system in Python using Tkinter

I want to get the complete folder path by searching with the particular folder name using python
Input
example input foldername: Myfile_data
I want to search with the folder name in entire sys and get where the particular folder is located
expected Output
example complete folder path:C:\Users\DELL\Documents\Myfile_data
please help out with this.
Thank you in advance
This searches the entire OS for files or directories.
import os, re
def find_files(filename, search_path, files_or_dirs='files', is_regex=False):
result = []
# Walking top-down from the root
for root, dirs, files in os.walk(search_path):
if files_or_dirs == 'files':
for file in files:
if re.search(filename, file) if is_regex else file == filename:
result.append(os.path.join(root, file))
else:
for dir in dirs:
if re.search(filename, dir) if is_regex else dir == filename:
result.append(os.path.join(root, dir))
return result
To use it, just do:
results = find_files('file.txt', 'C:\\', 'files')
print(results)
# will print something like
# ['C:\example\file.txt', ...]
To search directories instead of files, do this:
results = find_files('mydir', 'C:\\', 'dirs')
print(results)
# will print something like
# ['C:\example\mydir\', ...]
To search files with a regex, do this:
results = find_files(r'.+\.txt', 'C:\\', 'files', is_regex=True)
print(results)
# will print something like
# ['C:\example\file.txt', 'C:\Users\DELL\example.txt', ...]
This searches the entire OS for files or directories but returns a generator, which basically means you can use it in a for ... in ... loop to get live results without waiting for the search to finish.
def find_files_gen(filename, search_path, files_or_dirs='files'):
# Walking top-down from the root
for root, dirs, files in os.walk(search_path):
if files_or_dirs == 'files':
for file in files:
if file == filename:
yield os.path.join(root, file)
else:
for dir in dirs:
if dir == filename:
yield os.path.join(root, dir)
To use this method, like this:
for result in find_files_gen('file.txt', 'C:\\', 'files'): # or 'dirs'
print(result)

How to have ZipFile only zip a specified directory - Python 3

When I try to zip a directory with the following code, my directory is zipped and contains all the files I would like zipped, however it is also zipping the root directories for the directory I would like zipped.
(Test is the target directory to be zipped - it contains other directories and files) When unzipping the my_python_files.zip, it unzips with absolute paths:
unzipping my_python_files.zip:
\Users\hhafez\Desktop\Test
when I would like to have:
\Test
I am having trouble trying to find a way to avoid this, any tips would be much appreciated.
def get_all_file_paths(directory):
file_paths = []
for root, directories, files in os.walk(directory):
for filename in files:
filepath = os.path.join(root, filename)
file_paths.append(filepath)
return file_paths
def zipfiles():
file_paths = get_all_file_paths(r"C:\Users\hhafez\Desktop\Test")
with ZipFile('my_python_files.zip','w') as myzip:
for file in file_paths:
print(file)
myzip.write(file)
print('All files zipped successfully!')
zipfiles()
if you want to zip the list of files and dont want the zip archive to contain the absolute paths of each file...
def zipFiles(directory):
parentDir = "C:\Users\hhafez\Desktop" #hardcoding parent path to strip
with ZipFile('my_python_files.zip','w') as myzip:
for root, directories, files in os.walk(directory):
zipFileName = root[len(parentDir):] #always take whats after the parentDir for the filename going in the zip
for file in files:
myzip.write(os.path.join(root,file), os.path.join(zipFileName,file), compress_type=zipfile.ZIP_DEFLATED)
zipfiles(r"C:\Users\hhafez\Desktop\Test")
This should accomplish what you need. The major difference here is the zipFileName variable.
zipFileName = root[len(parentDir):]
This line strips out the parentDir from the directory that you are crawling through . zipFileName and the name of the file would be the archive name to pass to myzip.write which explains this:
myzip.write(os.path.join(root,file), os.path.join(zipFileName,file), compress_type=zipfile.ZIP_DEFLATED)

creating corresponding subfolders and writing a portion of the file in new files inside those subfolders using python

I have a folder named "data". It contains subfolders "data_1", "data_2", and "data_3". These subfolders contain some text files. I want to parse through all these subfolders and generate corresponding subfolders with the same name, inside another folder named "processed_data". I want to also generate corresponding files with "processed" as a prefix in the name and want to write all those lines from the original file where "1293" is there in the original files.
I am using the below code but not able to get the required result. Neither the subfolders "data_1", "data_2", and "data_3" nor the files are getting created
import os
folder_name=""
def pre_processor():
data_location="D:\data" # folder containing all the data
for root, dirs, files in os.walk(data_location):
for dir in dirs:
#folder_name=""
folder_name=dir
for filename in files:
with open(os.path.join(root, filename),encoding="utf8",mode="r") as f:
processed_file_name = 'D:\\processed_data\\'+folder_name+'\\'+'processed'+filename
processed_file = open(processed_file_name,"w", encoding="utf8")
for line_number, line in enumerate(f, 1):
if "1293" in line:
processed_file.write(str(line))
processed_file.close()
pre_processor()
You might need to elaborate on the issue you are having; e.g., are the files being created, but empty?
A few things I notice:
1) Your indentation is off (not sure if this is just a copy-paste issue though): the pre_processor function is empty, i.e. you are defining the function at the same level as the declaration, not inside of it.
try this:
import os
folder_name=""
def pre_processor():
data_location="D:\data" # folder containing all the data
for root, dirs, files in os.walk(data_location):
for dir in dirs:
#folder_name=""
folder_name=dir
for filename in files:
with open(os.path.join(root, filename), encoding="utf8",mode="r") as f:
processed_file_name = 'D:\\processed_data\\'+folder_name+'\\'+'processed'+filename
processed_file = open(processed_file_name,"w", encoding="utf8")
for line_number, line in enumerate(f, 1):
if "1293" in line:
processed_file.write(str(line))
processed_file.close()
pre_processor()
2) Check if the processed_data and sub_folders exist; if not, create them first as this will not do so.
Instead of creating the path to the new Folder by hand you could just replace the name of the folder.
Furthermore, you are not creating the subfolders.
This code should work but replace the Linux folder slashes:
import os
folder_name=""
def pre_processor():
data_location="data" # folder containing all the data
for root, dirs, files in os.walk(data_location):
for dir in dirs:
# folder_name=""
folder_name = dir
for filename in files:
joined_path = os.path.join(root, filename)
with open(joined_path, encoding="utf8", mode="r") as f:
processed_folder_name = root.replace("data/", 'processed_data/')
processed_file_name = processed_folder_name+'/processed'+filename
if not os.path.exists(processed_folder_name):
os.makedirs(processed_folder_name)
processed_file = open(processed_file_name, "w", encoding="utf8")
for line in f:
if "1293" in line:
processed_file.write(str(line))
processed_file.close()
pre_processor()

iterating over files in folder using os python

Ultimate Goal: Iterate over many files in a folder to perform a specific set of tasks.
Immediate Goal: Load next file (file2) to perform tasks
Background: I am using the following code
import os
folder = '/Users/eer/Desktop/myfolder/'
for subdir, dirs, files in os.walk(folder):
for item in os.listdir(folder):
if not item.startswith('.') and os.path.isfile(os.path.join(folder, item)): #gets rid of .DS_store file
print(item)
Output: print(item)
file1.txt
file2.txt
file3.txt
(etc...)
I am using the following code to open the first file:
data_path = folder + item
file = open(data_path, "r")
#perform a set of tasks for this file
This works well for opening the first file, file1.txt and performing a set of tasks.
However, I am not sure how to load file2.txt (and eventually file3.txt and etc...)so I can continue the task performance
Questions:
1) How do I put this code in a for loop? (so I can load, and perform tasks on all the files)?
You can do the file operations in the same loop like:
import os
folder = '/Users/eer/Desktop/myfolder/'
for subdir, dirs, files in os.walk(folder):
for item in os.listdir(folder):
if not item.startswith('.') and os.path.isfile(os.path.join(folder, item)):
data_path = folder + item
with open(data_path, "r") as file:
... use file here ...

Resources