Read folders in folder one by one - python-3.x

I am trying to read folders in one folder, and analyse the daten in these folders. My idea is to write two loop: one to choose the folder, one to analyse daten in the chosen folder. But i don't know how can i write the new path with iterate variable. line3 of this snapshot is my problem.
Thank you for any input.

If you only want to get the folders inside another folder (subfolders) and aren't interested in other files that could be in the root folder you could use:
import os
path = '...'
for file_folder in os.listdir(path):
path1 = os.path.join(path, file_folder)
if os.path.isdir(path1):
print(path1)
# do something
or
import glob
path = '...'
for folder in glob.glob(f'{path}/*/'):
# do something

In line number two & three from the image, you have written:
for file_folder in os.listdir(path):
path1 = 'path' + '\' + str(file_folder)
file_folder is like a temporary variable holding folder name value for a particular iteration.
It should instead be:
for file_folder in os.listdir(path):
path1 = 'path' + '\\' + file_folder

Related

how to rename files in a folder using pathlib in python?

I need help renaming .jpg files in my folder with the same prefix, 'cat_'. for example, 070.jpg should be renamed cat_070.jpg.
the files are located within the Cat folder:
from pathlib import Path
p = Path('C:\\Users\\me\\Jupiter_Notebooks\\Dataset\\Train\\Cat\\')
so I dont quite see how to do it? the below is wrong because it does not 'look into' the files in this directory.
p.rename(Path(p.parent, 'cat_' + p.suffix))
I have also unsuccessfully tried this:
import os
from os import rename
from os import listdir
# Get path
cwd = "C:\\Users\\me\\Jupiter_Notebooks\\Dataset\\Train\\Cat"
# Get all files in dir
onlyfiles = [f for f in listdir(cwd) if isfile(join(cwd, f))]
for file in onlyfiles:
# Get the current format
if file[-4:]==(".jpg"):
s = file[1]
# Change format and get new filename
s[1] = 'cat'
s = '_'.join(s)
# Rename file
os.rename(file, s)
print(f"Renamed {file} to {s}")
FileNotFoundError: [WinError 3] The system cannot find the path specified: 'C:\\Users\\me\\Jupiter_Notebooks\\Dataset\\Train\\Cat\\'
how can I do it? sorry I'm really a beginner here.
How about:
from pathlib import Path
img_dir = Path('C:\\Users\\me\\Jupiter_Notebooks\\Dataset\\Train\\Cat\\') # path to folder with images
for img_path in img_dir.glob('*.jpg'): # iterate over all .jpg images in img_dir
new_name = f'cat_{img_path.stem}{img_path.suffix}' # or directly: f'cat_{img_path.name}'
img_path.rename(img_dir / new_name)
print(f'Renamed `{img_path.name}` to `{new_name}`')
pathlib also supports renaming files, so the os module is not even needed here.
use pathlib. Path. iterdir() to rename all files in a directory
1) for path in pathlib. Path("a_directory"). iterdir():
2) if path. is_file():
3) old_name = path. stem. original filename.
4) old_extension = path. suffix. original file extension.
5) directory = path. parent. ...
6) new_name = "text" + old_name + old_extension.
7) path. rename(pathlib.

How to rename the files of different format from the same folder but different subfolder using python

I have one scenario where i have to rename the files in the folder. Please find the scenario,
Example :
Elements(Main Folder)<br/>
2(subfolder-1) <br/>
sample_2_description.txt(filename1)<br/>
sample_2_video.avi(filename2)<br/>
3(subfolder2)
sample_3_tag.jpg(filename1)<br/>
sample_3_analysis.GIF(filename2)<br/>
sample_3_word.docx(filename3)<br/>
I want to modify the names of the files as,
Elements(Main Folder)<br/>
2(subfolder1)<br/>
description.txt(filename1)<br/>
video.avi(filename2)<br/>
3(subfolder2)
tag.jpg(filename1)<br/>
analysis.GIF(filename2)<br/>
word.docx(filename3)<br/>
Could anyone guide on how to write the code?
Recursive directory traversal to rename a file can be based on this answer. All we are required to do is to replace the file name instead of the extension in the accepted answer.
Here is one way - split the file name by _ and use the last index of the split list as the new name
import os
import sys
directory = os.path.dirname(os.path.realpath("/path/to/parent/folder")) #get the directory of your script
for subdir, dirs, files in os.walk(directory):
for filename in files:
subdirectoryPath = os.path.relpath(subdir, directory) #get the path to your subdirectory
filePath = os.path.join(subdirectoryPath, filename) #get the path to your file
newFilePath = filePath.split("_")[-1] #create the new name by splitting the old name by _ and grabbing last index
os.rename(filePath, newFilePath) #rename your file
Hope this helps.
check below code example for the first filename1, replace path with the actual path of the file:
import os
os.rename(r'path\\sample_2_description.txt',r'path\\description.txt')
print("File Renamed!")

Walking a directory, looking for .xls files and glob is ignoring them (Python and glob module)

I wrote a for loop that walks a directory tree and does successfully return all the files names. The sub folders include .pdf files so I'm trying to extract data only from the .xls files. Using glob('*.xls') is not working for some reason.
They are not .xlsx extensions but rather .xls so that's not the issue. I have run print functions to test the directory walk (it works) but then I cannot seem to grab files with extension .xls.
import os
from glob import glob
for folderName, subfolders, filenames in os.walk('C:\\Users\\userName\\someFiles'):
print('The current folder is ' + folderName)
for subfolder in subfolders:
print('SUBFOLDER OF ' + folderName + ': ' + subfolder)
for filename in filenames:
dataPrint = glob('*.xls')
print('File Name is: '+filename+ ' Glob name is: '+str(dataPrint))
This works to this extent:
The current folder is C:\Users\\userName\\someFiles\subFolder
File Name is: Sample Data March 2019.pdf Glob name is: []
File Name is: File 1 March 2019.pdf Glob name is: []
File Name is: File 1 March 2019.xls Glob name is: []
You can see how glob comes up blank. I can't figure it out because glob has never let me down before! Thanks for taking a look.
this might work for you
import os
from glob import glob
path = 'C:\\Users\\username\\Desktop\\glob\\target'
for folderName, subfolders, filenames in os.walk(path):
for subfolder in subfolders:
print('SUBFOLDER OF ' + folderName + ': ' + subfolder)
for filename in filenames:
dataPrint = glob(path+'\\*.xlsx')
print('File Name is: '+filename+ ' Glob name is: '+str(dataPrint))

creating corresponding subfolders and writing a portion of the file in new files inside those subfolders using python

I have a folder named "data". It contains subfolders "data_1", "data_2", and "data_3". These subfolders contain some text files. I want to parse through all these subfolders and generate corresponding subfolders with the same name, inside another folder named "processed_data". I want to also generate corresponding files with "processed" as a prefix in the name and want to write all those lines from the original file where "1293" is there in the original files.
I am using the below code but not able to get the required result. Neither the subfolders "data_1", "data_2", and "data_3" nor the files are getting created
import os
folder_name=""
def pre_processor():
data_location="D:\data" # folder containing all the data
for root, dirs, files in os.walk(data_location):
for dir in dirs:
#folder_name=""
folder_name=dir
for filename in files:
with open(os.path.join(root, filename),encoding="utf8",mode="r") as f:
processed_file_name = 'D:\\processed_data\\'+folder_name+'\\'+'processed'+filename
processed_file = open(processed_file_name,"w", encoding="utf8")
for line_number, line in enumerate(f, 1):
if "1293" in line:
processed_file.write(str(line))
processed_file.close()
pre_processor()
You might need to elaborate on the issue you are having; e.g., are the files being created, but empty?
A few things I notice:
1) Your indentation is off (not sure if this is just a copy-paste issue though): the pre_processor function is empty, i.e. you are defining the function at the same level as the declaration, not inside of it.
try this:
import os
folder_name=""
def pre_processor():
data_location="D:\data" # folder containing all the data
for root, dirs, files in os.walk(data_location):
for dir in dirs:
#folder_name=""
folder_name=dir
for filename in files:
with open(os.path.join(root, filename), encoding="utf8",mode="r") as f:
processed_file_name = 'D:\\processed_data\\'+folder_name+'\\'+'processed'+filename
processed_file = open(processed_file_name,"w", encoding="utf8")
for line_number, line in enumerate(f, 1):
if "1293" in line:
processed_file.write(str(line))
processed_file.close()
pre_processor()
2) Check if the processed_data and sub_folders exist; if not, create them first as this will not do so.
Instead of creating the path to the new Folder by hand you could just replace the name of the folder.
Furthermore, you are not creating the subfolders.
This code should work but replace the Linux folder slashes:
import os
folder_name=""
def pre_processor():
data_location="data" # folder containing all the data
for root, dirs, files in os.walk(data_location):
for dir in dirs:
# folder_name=""
folder_name = dir
for filename in files:
joined_path = os.path.join(root, filename)
with open(joined_path, encoding="utf8", mode="r") as f:
processed_folder_name = root.replace("data/", 'processed_data/')
processed_file_name = processed_folder_name+'/processed'+filename
if not os.path.exists(processed_folder_name):
os.makedirs(processed_folder_name)
processed_file = open(processed_file_name, "w", encoding="utf8")
for line in f:
if "1293" in line:
processed_file.write(str(line))
processed_file.close()
pre_processor()

Python 3 How to delete images in a folder

How do I delete all png format pics in a folder using Python 3?
This single line statement will take each file in a specified path and remove it if the filename ends in .png:
import os
os.remove(file) for file in os.listdir('path/to/directory') if file.endswith('.png')
import glob
removing files = glob.glob('file path/*.jpg')
for i in removing files:
os.remove(i)
replace file path with the directory to the image folder
this function will help you to delete a single image file all you need to do is put it in for loop to delete the multiple images or file..just double check that you are providing valid path to your file.
'
def remove_img(self, path, img_name):
os.remove(path + '/' + img_name)
# check if file exists or not
if os.path.exists(path + '/' + img_name) is false:
# file did not exists
return True
'

Resources