Change order in filenames in a folder - python-3.x

I need to rename a bunch of files in a specific folder. They all end with date and time, like for example "hello 2019-05-22 1310.txt" and I want the date and time for each file to be first so I can sort them. With my code I get an error and it wont find my dir where all files are located. What is wrong with the code?
import os
import re
import shutil
dir_path = r'C:\Users\Admin\Desktop\Testfiles'
comp = re.compile(r'\d{4}-\d{2}-\d{2}')
for file in os.listdir(dir_path):
if '.' in file:
index = [i for i, v in enumerate(file,0) if v=='.'][-1]
name = file[:index]
ext = file[index+1:]
else:
ext=''
name = file
data = comp.findall(name)
if len(data)!=0:
date= comp.findall(name)[0]
rest_name = ' '.join(comp.split(name)).strip()
new_name = '{} {}{}'.format(date,rest_name,'.'+ext)
print('changing {} to {}'.format(name, new_name))
shutil.move(os.path.join(dir_path,name), os.path.join(dir_path, new_name))
else:
print('file {} is not change'.format(name))

Related

Loop over excel files' paths under a directory and pass them to data manipulation function in Python

I need to check the excel files under a directory /Users/x/Documents/test/ by DataCheck function from data_check.py, so I can do data manipulation of many excel files, data_check.py has code structure as follows:
import pandas as pd
def DataCheck(filePath):
df = pd.read_excel(filePath)
try:
df = df.dropna(subset=['building', 'floor', 'room'], how = 'all')
...
...
...
df.to_excel(writer, 'Sheet1', index = False)
if __name__ == '__main__':
status = True
while status:
rawPath = input(r"")
filePath = rawPath.strip('\"')
if filePath.strip() == "":
status = False
DataCheck(filePath)
In order to loop all the excel files' paths under a directory, I use:
import os
directory = '/Users/x/Documents/test/'
for filename in os.listdir(directory):
if filename.endswith(".xlsx") or filename.endswith(".xls"):
print(os.path.join(directory, filename))
else:
pass
Out:
/Users/x/Documents/test/test 3.xlsx
/Users/x/Documents/test/test 2.xlsx
/Users/x/Documents/test/test 4.xlsx
/Users/x/Documents/test/test.xlsx
But I don't know how to combine the code above together, to pass the excel files' paths to DataCheck(filePath).
Thanks for your kind help at advance.
Call the function with the names instead of printing them:
import os
directory = '/Users/x/Documents/test/'
for filename in os.listdir(directory):
if filename.endswith(".xlsx") or filename.endswith(".xls"):
fullname = os.path.join(directory, filename)
DataCheck(fullname)

Use glob to iterate through files in dir to select correct extension?

I'm trying to iterate through a dir a select the first file available.
These files look like this:
img_1.png img_2.png img_3.mp4 img_4.png img_5.jpg img_6.mp4
As you can see their names are cohesive but their extensions are different. I'd like the script to iterate through each extension for each number before it moves onto the next, IE:
I assume the best way to go about it is iterating through each file and extention like this: img_1.png img_1.jpg and img_1.mp4, and if neither of the three are available, move to the next file and repeat like img_2.png img_2.jpg and img_2.mp4 until there is an available
Question:
Is it best to iterate through the files and use glob to extend a file path with the extensions? Is there a better method?
This is what I thought would work, but it doesn't:
# Gets number of files in dir
list = os.listdir(folder_path)
number_files = len(list)
# Chooses file from dir
e = 0
for i in range(number_files):
try:
chosen_file = folder_path + "img_" + str(e)
for ext in ('*.jpg', '*.png', '*.mp4'):
full_path = chosen_file.extend(glob(join(chosen_file, ext)))
print (full_path)
#random_file = random.choice(os.listdir(folder_path)) # Chooses random file
except:
e += 1
print ('Hit except')
Are there other files in the folder with different names that you do not want to select or are all the files in the folder of interest? Is all that matters that they have the those 3 extensions or are the names important as well?
If you are only interested in files with those 3 extensions then this code will work
import os
import glob
folder_path = 'test\\'
e = 0
for r,d,f in os.walk(folder_path):
for file in f:
extensions = ['.jpg', '.png', '.mp4']
for ext in extensions:
if file.endswith(ext):
full_path = os.path.join(folder_path, file)
print (full_path)
else:
e += 1
print ('Hit except')
Given:
$ ls /tmp
img_1.png img_1.jpg img_2.png img_4.png img_5.jpg img_3.mp4 img_6.mp4
You can use pathlib and a more targeted glob:
from pathlib import Path
p=Path('/tmp')
for fn in (x for x in p.glob('img_[0-9].*')
if x.suffix in ('.png', '.jpg', '.mp4')):
print(fn)
Prints:
/tmp/img_1.png
/tmp/img_1.jpg
/tmp/img_2.png
/tmp/img_4.png
/tmp/img_5.jpg
/tmp/img_3.mp4
/tmp/img_6.mp4
Answer:
Decided to not use glob and did this instead:
i = 0
for i in range(number_files):
try:
chosen_file = folder_path + "img_" + str(i)
jpg_file = chosen_file + ".jpg"
png_file = chosen_file + ".png"
mp4_file = chosen_file + ".mp4"
if os.path.exists(png_file) == True:
print ('png true')
print (png_file)
break
elif os.path.exists(jpg_file) == True:
print ('jpg true')
print (jpg_file)
break
elif os.path.exists(mp4_file) == True:
print ('mp4 true')
print (mp4_file)
break
except:
i += 1
print ('false')

Rename by Appending a prefix to a file name

I would appreciate if someone could give me a hint. I have to rename a batch of files by adding a prefix (date) to the file name, so files are organized in ordered manner in the folder: from older to newer.
The date itself contained inside of the file. Therefore, my script has to open the file, find the date and use it as a "prefix" to add to the file name.
from datetime import datetime
import re
import os
file = open('blog_entry.txt', 'r', encoding='utf-8')
source_code = file.read()
<...>
# convert the date:
date = datetime.strptime(date_only, "%d-%b-%Y")
new_date = date.strftime('%Y_%m_%d')
The new_date variable should be used as a "prefix", so the new file name looks like "yyyy_mm_dd blog_entry.txt"
I cannot wrap my head around how to generate a "new name" using this prefix, so I can apply os.rename(old_name, new_name) command to the file. apply
Here is one way, using string concatenation to build the new filename you want:
from datetime import datetime
import re
import os
file = open('blog_entry.txt', 'r', encoding='utf-8')
source_code = file.read()
# read the date from the file contents
date = datetime.strptime(date_only, "%d-%b-%Y")
new_date = date.strftime('%Y_%m_%d')
path = "/path/to/your/file/"
os.rename(path + 'blog_entry.txt', path + new_date + ' ' + 'blog_entry.txt')

How can I loop through all the xls files in a folder to find the sheet names, and then replace them

I am trying to loop through all the XLS files in a folder, and then replace the worksheet name by another string. This has to be done for all the files inside.
I am relatively new to programming, and here is my Python code. It runs okay (partially, when I do it for one file at a time), however, I am unable to get it to work for all the files in the folder.
from xlutils.copy import copy
from xlrd import open_workbook
# open the file
direc = input('Enter file name: ')
rb = open_workbook(direc)
wb = copy(rb)
#index of a sheet
pointSheet = rb.sheet_names()
print(pointSheet)
idx = pointSheet.index(pointSheet)
wb.get_sheet(idx).name = u'RenamedSheet1'
wb.save(direc)
Error message:
Traceback (most recent call last):
File "./Rename.py", line 13, in <module>
idx = pointSheet.index(pointSheet)
ValueError: ['x xxx xxxx xxxxxx'] is not in list
My bad! The above code is for testing with a single file. Here is the loop:
files = []
for dirname, dirnames, filenames in os.walk('D:\Temp\Final'):
# print path to all subdirectories first.
for subdirname in dirnames:
files.append(os.path.join(dirname, subdirname))
# print path to all filenames.
for filename in filenames:
files.append(os.path.join(dirname, filename))
pprint(files)
for i in range(0,len(files)):
rb = open_workbook(files[i])
wb = copy(rb)
idx = rb.sheet_names().index('5 new bulk rename')
wb.get_sheet(idx).name = u'RenamedSheet1'
wb.save(files[i])
print('Operation succeeded!')
Try something like this (untested) for a single file:
from xlutils.copy import copy
from xlrd import open_workbook
# open the file
direc = input('Enter file name: ')
rb = open_workbook(direc)
wb = copy(rb)
for pointSheet in rb.sheet_names()
print(pointSheet)
idx = pointSheet.index(pointSheet)
wb.get_sheet(idx).name = u'RenamedSheet1'
wb.save(direc)
And wrap that in another loop using listdir (taken from here):
import os
for file in os.listdir("/mydir"):
if file.endswith(".xls"):
# <do what you did for a single file>

Re-loop until all matches are found, logic?

I cannot figure out the logic for this. I am attempting to compare a list of matches 'matches' to files from a folder. If file in 'folders' equal the name in 'matches', then do something, but obviously it doesn't 'try' each match to each file. I'm thinking I need to use a while loop but I don't know how to apply it.
import os
import glob
import os.path
folders = glob.glob('C:\\Corrections\\*.*')
matches = open('filename.txt', 'r')
for each in folders:
splitname_one = each.split('\\', 3) #Separate the filename from the path
filename = splitname_one[3] #Get Filename only
basefile = filename.split('.', 1) #Separate filename and file extension
compare0 = basefile[0] #assign base file name to compare0
#print (basefile[0])
for line in matches:
match = line.split('.', 1) #Separe base filename from file extension
#print (match[1])
compare1 = match[0] #assign base file name to compare1
if compare1==compare0:
#os.rename(filename, 'C:\\holder\\' + filename)
print ('We Have a match!')
else:
print ('no match :( ')
FWIW here's how I might end up doing something like this:
import glob
from os.path import basename, splitext
def file_base(filename):
return splitext(basename(filename))[0]
folders = set(file_base(f) for f in glob.glob('C:\\Corrections\\*.*'))
with open('filename.txt') as fobj:
matches = set(file_base(f) for f in fobj.readlines())
print(folders.intersection(matches))

Resources