Comparing two different file types with same name in separate paths by iterating over the files - python-3.x

I am trying to compare two different file extensions in different folders by iterating over the files present in those folders.
import os
from os.path import splitext
hpath = os.path.dirname(r'C:\Monarch')
dpath = os.path.dirname(r'C:\DOLV')
documents = set([splitext(filename)[0] for filename in dpath])
matches = [filename for filename in set(hpath) if splitext(filename)[0] in documents]
print (matches)
i tried this it only gives me the result as C, : \\
I even tried this
from os import walk
hpath = r'C:\Monarch'
dpath=r'C:\DOLV'
h = []
for (dirpath, dirnames, filenames) in walk(hpath):
h.extend(filenames)
break
print(filenames)
d=[]
for (dirpath, dirnames, filenames) in walk(dpath):
d.extend(filenames)
break
print(filenames)
i am unable to compare them. I am able to print the name but unable to iterate over these file names and print a match.
What could i change to make it work?
Regards,
Ren.

Solved my issue.
from os import walk
hpath = r'C:\Monarch'
dpath=r'C:\DOLV'
h = []
for (dirpath, dirnames, filenames) in walk(hpath):
h.extend(filenames)
break
print(filenames)
d=[]
for (dirpath, dirnames, filenames) in walk(dpath):
d.extend(filenames)
break
print(filenames)
documents = set([splitext(filename)[0] for filename in d])
matches = [filename for filename in set(h) if splitext(filename)[0] in documents]
print (matches)

Related

How get on same time count list files and files adress?

i need to ask if possible and how get on same time the sum of list files in directory and subdirectory with fnmatch filter and files adress.
I use for now this:
def return_ext():
file_pasok = ["AAAAA.txt", "BBBBBB.txt"]
for i in range(len(file_pasok)):
for ext_f in file_pasok:
return ext_f
def list_files(file_path):
ext = return_ext()
for _, dirnames, filenames in os.walk(file_path):
if not filenames:
continue
for file in fnmatch.filter(filenames, ext):
file_found_str = Path(os.path.join(_, file))
file_found = str(file_found_str)
yield file_found
ext = return_ext()
########GOT HOW MANY FILE FOUND
count_founded = sum([len(fnmatch.filter(files, ext)) for r, d, files in os.walk(file_path)])
########GOT LIST ADRESS FILE FOUND
for file_found in list_files(file_path):
print(file_founds)
But of course the script make 2 time the same search :(
Thanks so much for any suggest !!
def return_ext():
file_pasok = ["AAAA.txt", "BBBB.txt"]
for ext_f in file_pasok:
yield ext_f
def list_files(file_path):
found_ext = return_ext()
exts = list(found_ext)
for _, dirnames, filenames in os.walk(file_path):
if not filenames:
continue
for ext in exts:
for file in fnmatch.filter(filenames, ext):
if any(fnmatch.fnmatch(file, ext) for ext in file):
file_found_str = Path(os.path.join(_, file))
file_found = str(file_found_str)
yield file_found
converted_list = list_files(file_path)
count_found = list(converted_list)
print(len(count_found))
for file_found in count_found:
print(file_found)

Use glob to iterate through files in dir to select correct extension?

I'm trying to iterate through a dir a select the first file available.
These files look like this:
img_1.png img_2.png img_3.mp4 img_4.png img_5.jpg img_6.mp4
As you can see their names are cohesive but their extensions are different. I'd like the script to iterate through each extension for each number before it moves onto the next, IE:
I assume the best way to go about it is iterating through each file and extention like this: img_1.png img_1.jpg and img_1.mp4, and if neither of the three are available, move to the next file and repeat like img_2.png img_2.jpg and img_2.mp4 until there is an available
Question:
Is it best to iterate through the files and use glob to extend a file path with the extensions? Is there a better method?
This is what I thought would work, but it doesn't:
# Gets number of files in dir
list = os.listdir(folder_path)
number_files = len(list)
# Chooses file from dir
e = 0
for i in range(number_files):
try:
chosen_file = folder_path + "img_" + str(e)
for ext in ('*.jpg', '*.png', '*.mp4'):
full_path = chosen_file.extend(glob(join(chosen_file, ext)))
print (full_path)
#random_file = random.choice(os.listdir(folder_path)) # Chooses random file
except:
e += 1
print ('Hit except')
Are there other files in the folder with different names that you do not want to select or are all the files in the folder of interest? Is all that matters that they have the those 3 extensions or are the names important as well?
If you are only interested in files with those 3 extensions then this code will work
import os
import glob
folder_path = 'test\\'
e = 0
for r,d,f in os.walk(folder_path):
for file in f:
extensions = ['.jpg', '.png', '.mp4']
for ext in extensions:
if file.endswith(ext):
full_path = os.path.join(folder_path, file)
print (full_path)
else:
e += 1
print ('Hit except')
Given:
$ ls /tmp
img_1.png img_1.jpg img_2.png img_4.png img_5.jpg img_3.mp4 img_6.mp4
You can use pathlib and a more targeted glob:
from pathlib import Path
p=Path('/tmp')
for fn in (x for x in p.glob('img_[0-9].*')
if x.suffix in ('.png', '.jpg', '.mp4')):
print(fn)
Prints:
/tmp/img_1.png
/tmp/img_1.jpg
/tmp/img_2.png
/tmp/img_4.png
/tmp/img_5.jpg
/tmp/img_3.mp4
/tmp/img_6.mp4
Answer:
Decided to not use glob and did this instead:
i = 0
for i in range(number_files):
try:
chosen_file = folder_path + "img_" + str(i)
jpg_file = chosen_file + ".jpg"
png_file = chosen_file + ".png"
mp4_file = chosen_file + ".mp4"
if os.path.exists(png_file) == True:
print ('png true')
print (png_file)
break
elif os.path.exists(jpg_file) == True:
print ('jpg true')
print (jpg_file)
break
elif os.path.exists(mp4_file) == True:
print ('mp4 true')
print (mp4_file)
break
except:
i += 1
print ('false')

Filemanager in python with wildcard

I'm making a script to scan a network folder and copy the files (PDF's) in it to another network folder.
The pdf names start with a number, and this number is also in the name of the destination
Here is my code:
import os, shutil, glob
from os import listdir
from os.path import isfile, join files = [f for f in listdir(os.path.join('\\\\fatboy.leleu.be','iedereen','Glenn','Insite')) if isfile(join('\\\\fatboy.leleu.be','iedereen','Glenn','Insite', f))]
dst = glob.glob(os.path.join('\\\\fatboy.leleu.be','iedereen','Glenn','Workflow','2125967_*','PDF Druk'))
for x in files:
#orderNumber = (x[:7]) <-- get's the 7 numbers
#client = glob.glob('orderNumber_*') <-- this returns an empty list but should return me the number + name of the client
path = os.path.join('\\\\fatboy.leleu.be','iedereen','Glenn','Insite', '')
moveto = os.path.join('\\\\fatboy.leleu.be','iedereen','Glenn','Workflow','2125967_klant_1','PDF Druk')
files = os.listdir(path)
files.sort()
for f in files:
src = path+f
dst = moveto
shutil.move(src,dst)
print('Succesfully moved', f, 'to', moveto)
I'm trying to make the name after the number a wildcard, but glob.glob returns a list and gives an error with path.join
Has anyone some tips for me?

Re-loop until all matches are found, logic?

I cannot figure out the logic for this. I am attempting to compare a list of matches 'matches' to files from a folder. If file in 'folders' equal the name in 'matches', then do something, but obviously it doesn't 'try' each match to each file. I'm thinking I need to use a while loop but I don't know how to apply it.
import os
import glob
import os.path
folders = glob.glob('C:\\Corrections\\*.*')
matches = open('filename.txt', 'r')
for each in folders:
splitname_one = each.split('\\', 3) #Separate the filename from the path
filename = splitname_one[3] #Get Filename only
basefile = filename.split('.', 1) #Separate filename and file extension
compare0 = basefile[0] #assign base file name to compare0
#print (basefile[0])
for line in matches:
match = line.split('.', 1) #Separe base filename from file extension
#print (match[1])
compare1 = match[0] #assign base file name to compare1
if compare1==compare0:
#os.rename(filename, 'C:\\holder\\' + filename)
print ('We Have a match!')
else:
print ('no match :( ')
FWIW here's how I might end up doing something like this:
import glob
from os.path import basename, splitext
def file_base(filename):
return splitext(basename(filename))[0]
folders = set(file_base(f) for f in glob.glob('C:\\Corrections\\*.*'))
with open('filename.txt') as fobj:
matches = set(file_base(f) for f in fobj.readlines())
print(folders.intersection(matches))

Filter file list in python/ lowercase and uppercase extension files

I am filtering my file list using this line:
MyList = filter(lambda x: x.endswith(('.doc','.txt','.dat')), os.listdir(path))
The line above will only filter lowercase extension files. Therefore, is there an elegant way to make it filter also the uppercase extension files?
You just need to add a .lower() to your lambda function
MyList = filter(lambda x: x.lower().endswith(('.doc','.txt','.dat')), os.listdir(path))
I'd prefer to use os.path.splitext with a list comprehension
from os.path import splitext
my_list = [x for x in os.listdir(path) if splitext(x)[1].lower() in {'.doc', '.txt', '.dat'}]
Still a bit much for a single line, so perhaps
from os.path import splitext
def valid_extension(x, valid={'.doc', '.txt', '.dat'}):
return splitext(x)[1].lower() in valid
my_list = [x for x in os.listdir(path) if valid_extension(x)]
import os
import re
pat = re.compile(r'[.](doc|txt|dat)$', re.IGNORECASE)
filenames = [filename for filename in os.listdir(path)
if re.search(pat, filename)]
print(filenames)

Resources