Re-loop until all matches are found, logic? - python-3.x

I cannot figure out the logic for this. I am attempting to compare a list of matches 'matches' to files from a folder. If file in 'folders' equal the name in 'matches', then do something, but obviously it doesn't 'try' each match to each file. I'm thinking I need to use a while loop but I don't know how to apply it.
import os
import glob
import os.path
folders = glob.glob('C:\\Corrections\\*.*')
matches = open('filename.txt', 'r')
for each in folders:
splitname_one = each.split('\\', 3) #Separate the filename from the path
filename = splitname_one[3] #Get Filename only
basefile = filename.split('.', 1) #Separate filename and file extension
compare0 = basefile[0] #assign base file name to compare0
#print (basefile[0])
for line in matches:
match = line.split('.', 1) #Separe base filename from file extension
#print (match[1])
compare1 = match[0] #assign base file name to compare1
if compare1==compare0:
#os.rename(filename, 'C:\\holder\\' + filename)
print ('We Have a match!')
else:
print ('no match :( ')

FWIW here's how I might end up doing something like this:
import glob
from os.path import basename, splitext
def file_base(filename):
return splitext(basename(filename))[0]
folders = set(file_base(f) for f in glob.glob('C:\\Corrections\\*.*'))
with open('filename.txt') as fobj:
matches = set(file_base(f) for f in fobj.readlines())
print(folders.intersection(matches))

Related

How to find and replace string in a file using input of line number in python

My requirement is to find a file from a directory and then in that file find LOG_X_PARAMS and in that append a string after the first comma this is what i am having for now
import os, fnmatch
def findReplacelist(directory, finds, new_string, file):
line_number = 0
list_of_results = []
for path, dirs, files in os.walk(os.path.abspath(directory)):
if file in files:
filepath = os.path.join(path, file)
with open(filepath, 'r') as f:
for line in f:
line_number += 1
if finds in line:
list_of_results.append((line_number))
print(list_of_results)
def get_git_root(path):
Path = "E:\Code\modules"
file_list=["pb_sa_ch.c"]
for i in file_list:
findReplacelist(Path , "LOG_1_PARAMS", "instance", i)
The example line is below change
LOG_X_PARAMS(string 1, string 2); #string1 andd string2 is random
this to
LOG_X_PARAMS(string 1, new_string, string 2);
I can find the line number using LOG_X_PARAMS now using this line number I need to append a string in the same line can someone help solving it ?
This is how I would do the task. I would find the files I want to change, read then file line by line and if there is a change in the file, then write the file back out. Heres the approach:
def findReplacelist(directory, finds, new_string, file):
for path, dirs, files in os.walk(os.path.abspath(directory)):
if file in files:
filepath = os.path.join(path, file)
find_replace(finds, new_string, filepath)
def find_replace(tgt_phrase, new_string, file):
outfile = ''
chgflg = False
with open(file, 'r') as f:
for line in f:
if tgt_phrase in line:
outfile += line + new_string
chgflg = True
else:
outfile += line
if chgflg:
with open(file, 'w') as f:
f.write(outfile)

Use glob to iterate through files in dir to select correct extension?

I'm trying to iterate through a dir a select the first file available.
These files look like this:
img_1.png img_2.png img_3.mp4 img_4.png img_5.jpg img_6.mp4
As you can see their names are cohesive but their extensions are different. I'd like the script to iterate through each extension for each number before it moves onto the next, IE:
I assume the best way to go about it is iterating through each file and extention like this: img_1.png img_1.jpg and img_1.mp4, and if neither of the three are available, move to the next file and repeat like img_2.png img_2.jpg and img_2.mp4 until there is an available
Question:
Is it best to iterate through the files and use glob to extend a file path with the extensions? Is there a better method?
This is what I thought would work, but it doesn't:
# Gets number of files in dir
list = os.listdir(folder_path)
number_files = len(list)
# Chooses file from dir
e = 0
for i in range(number_files):
try:
chosen_file = folder_path + "img_" + str(e)
for ext in ('*.jpg', '*.png', '*.mp4'):
full_path = chosen_file.extend(glob(join(chosen_file, ext)))
print (full_path)
#random_file = random.choice(os.listdir(folder_path)) # Chooses random file
except:
e += 1
print ('Hit except')
Are there other files in the folder with different names that you do not want to select or are all the files in the folder of interest? Is all that matters that they have the those 3 extensions or are the names important as well?
If you are only interested in files with those 3 extensions then this code will work
import os
import glob
folder_path = 'test\\'
e = 0
for r,d,f in os.walk(folder_path):
for file in f:
extensions = ['.jpg', '.png', '.mp4']
for ext in extensions:
if file.endswith(ext):
full_path = os.path.join(folder_path, file)
print (full_path)
else:
e += 1
print ('Hit except')
Given:
$ ls /tmp
img_1.png img_1.jpg img_2.png img_4.png img_5.jpg img_3.mp4 img_6.mp4
You can use pathlib and a more targeted glob:
from pathlib import Path
p=Path('/tmp')
for fn in (x for x in p.glob('img_[0-9].*')
if x.suffix in ('.png', '.jpg', '.mp4')):
print(fn)
Prints:
/tmp/img_1.png
/tmp/img_1.jpg
/tmp/img_2.png
/tmp/img_4.png
/tmp/img_5.jpg
/tmp/img_3.mp4
/tmp/img_6.mp4
Answer:
Decided to not use glob and did this instead:
i = 0
for i in range(number_files):
try:
chosen_file = folder_path + "img_" + str(i)
jpg_file = chosen_file + ".jpg"
png_file = chosen_file + ".png"
mp4_file = chosen_file + ".mp4"
if os.path.exists(png_file) == True:
print ('png true')
print (png_file)
break
elif os.path.exists(jpg_file) == True:
print ('jpg true')
print (jpg_file)
break
elif os.path.exists(mp4_file) == True:
print ('mp4 true')
print (mp4_file)
break
except:
i += 1
print ('false')

How can I loop through all the xls files in a folder to find the sheet names, and then replace them

I am trying to loop through all the XLS files in a folder, and then replace the worksheet name by another string. This has to be done for all the files inside.
I am relatively new to programming, and here is my Python code. It runs okay (partially, when I do it for one file at a time), however, I am unable to get it to work for all the files in the folder.
from xlutils.copy import copy
from xlrd import open_workbook
# open the file
direc = input('Enter file name: ')
rb = open_workbook(direc)
wb = copy(rb)
#index of a sheet
pointSheet = rb.sheet_names()
print(pointSheet)
idx = pointSheet.index(pointSheet)
wb.get_sheet(idx).name = u'RenamedSheet1'
wb.save(direc)
Error message:
Traceback (most recent call last):
File "./Rename.py", line 13, in <module>
idx = pointSheet.index(pointSheet)
ValueError: ['x xxx xxxx xxxxxx'] is not in list
My bad! The above code is for testing with a single file. Here is the loop:
files = []
for dirname, dirnames, filenames in os.walk('D:\Temp\Final'):
# print path to all subdirectories first.
for subdirname in dirnames:
files.append(os.path.join(dirname, subdirname))
# print path to all filenames.
for filename in filenames:
files.append(os.path.join(dirname, filename))
pprint(files)
for i in range(0,len(files)):
rb = open_workbook(files[i])
wb = copy(rb)
idx = rb.sheet_names().index('5 new bulk rename')
wb.get_sheet(idx).name = u'RenamedSheet1'
wb.save(files[i])
print('Operation succeeded!')
Try something like this (untested) for a single file:
from xlutils.copy import copy
from xlrd import open_workbook
# open the file
direc = input('Enter file name: ')
rb = open_workbook(direc)
wb = copy(rb)
for pointSheet in rb.sheet_names()
print(pointSheet)
idx = pointSheet.index(pointSheet)
wb.get_sheet(idx).name = u'RenamedSheet1'
wb.save(direc)
And wrap that in another loop using listdir (taken from here):
import os
for file in os.listdir("/mydir"):
if file.endswith(".xls"):
# <do what you did for a single file>

Change order in filenames in a folder

I need to rename a bunch of files in a specific folder. They all end with date and time, like for example "hello 2019-05-22 1310.txt" and I want the date and time for each file to be first so I can sort them. With my code I get an error and it wont find my dir where all files are located. What is wrong with the code?
import os
import re
import shutil
dir_path = r'C:\Users\Admin\Desktop\Testfiles'
comp = re.compile(r'\d{4}-\d{2}-\d{2}')
for file in os.listdir(dir_path):
if '.' in file:
index = [i for i, v in enumerate(file,0) if v=='.'][-1]
name = file[:index]
ext = file[index+1:]
else:
ext=''
name = file
data = comp.findall(name)
if len(data)!=0:
date= comp.findall(name)[0]
rest_name = ' '.join(comp.split(name)).strip()
new_name = '{} {}{}'.format(date,rest_name,'.'+ext)
print('changing {} to {}'.format(name, new_name))
shutil.move(os.path.join(dir_path,name), os.path.join(dir_path, new_name))
else:
print('file {} is not change'.format(name))

Comparing two different file types with same name in separate paths by iterating over the files

I am trying to compare two different file extensions in different folders by iterating over the files present in those folders.
import os
from os.path import splitext
hpath = os.path.dirname(r'C:\Monarch')
dpath = os.path.dirname(r'C:\DOLV')
documents = set([splitext(filename)[0] for filename in dpath])
matches = [filename for filename in set(hpath) if splitext(filename)[0] in documents]
print (matches)
i tried this it only gives me the result as C, : \\
I even tried this
from os import walk
hpath = r'C:\Monarch'
dpath=r'C:\DOLV'
h = []
for (dirpath, dirnames, filenames) in walk(hpath):
h.extend(filenames)
break
print(filenames)
d=[]
for (dirpath, dirnames, filenames) in walk(dpath):
d.extend(filenames)
break
print(filenames)
i am unable to compare them. I am able to print the name but unable to iterate over these file names and print a match.
What could i change to make it work?
Regards,
Ren.
Solved my issue.
from os import walk
hpath = r'C:\Monarch'
dpath=r'C:\DOLV'
h = []
for (dirpath, dirnames, filenames) in walk(hpath):
h.extend(filenames)
break
print(filenames)
d=[]
for (dirpath, dirnames, filenames) in walk(dpath):
d.extend(filenames)
break
print(filenames)
documents = set([splitext(filename)[0] for filename in d])
matches = [filename for filename in set(h) if splitext(filename)[0] in documents]
print (matches)

Resources