Use glob to iterate through files in dir to select correct extension? - python-3.x

I'm trying to iterate through a dir a select the first file available.
These files look like this:
img_1.png img_2.png img_3.mp4 img_4.png img_5.jpg img_6.mp4
As you can see their names are cohesive but their extensions are different. I'd like the script to iterate through each extension for each number before it moves onto the next, IE:
I assume the best way to go about it is iterating through each file and extention like this: img_1.png img_1.jpg and img_1.mp4, and if neither of the three are available, move to the next file and repeat like img_2.png img_2.jpg and img_2.mp4 until there is an available
Question:
Is it best to iterate through the files and use glob to extend a file path with the extensions? Is there a better method?
This is what I thought would work, but it doesn't:
# Gets number of files in dir
list = os.listdir(folder_path)
number_files = len(list)
# Chooses file from dir
e = 0
for i in range(number_files):
try:
chosen_file = folder_path + "img_" + str(e)
for ext in ('*.jpg', '*.png', '*.mp4'):
full_path = chosen_file.extend(glob(join(chosen_file, ext)))
print (full_path)
#random_file = random.choice(os.listdir(folder_path)) # Chooses random file
except:
e += 1
print ('Hit except')

Are there other files in the folder with different names that you do not want to select or are all the files in the folder of interest? Is all that matters that they have the those 3 extensions or are the names important as well?
If you are only interested in files with those 3 extensions then this code will work
import os
import glob
folder_path = 'test\\'
e = 0
for r,d,f in os.walk(folder_path):
for file in f:
extensions = ['.jpg', '.png', '.mp4']
for ext in extensions:
if file.endswith(ext):
full_path = os.path.join(folder_path, file)
print (full_path)
else:
e += 1
print ('Hit except')

Given:
$ ls /tmp
img_1.png img_1.jpg img_2.png img_4.png img_5.jpg img_3.mp4 img_6.mp4
You can use pathlib and a more targeted glob:
from pathlib import Path
p=Path('/tmp')
for fn in (x for x in p.glob('img_[0-9].*')
if x.suffix in ('.png', '.jpg', '.mp4')):
print(fn)
Prints:
/tmp/img_1.png
/tmp/img_1.jpg
/tmp/img_2.png
/tmp/img_4.png
/tmp/img_5.jpg
/tmp/img_3.mp4
/tmp/img_6.mp4

Answer:
Decided to not use glob and did this instead:
i = 0
for i in range(number_files):
try:
chosen_file = folder_path + "img_" + str(i)
jpg_file = chosen_file + ".jpg"
png_file = chosen_file + ".png"
mp4_file = chosen_file + ".mp4"
if os.path.exists(png_file) == True:
print ('png true')
print (png_file)
break
elif os.path.exists(jpg_file) == True:
print ('jpg true')
print (jpg_file)
break
elif os.path.exists(mp4_file) == True:
print ('mp4 true')
print (mp4_file)
break
except:
i += 1
print ('false')

Related

Automation via python

Completely new to python so forgive me if this a dumb question.
Part of my working tasks is to upgrade the IOS on various Cisco routers and switches.
The most mind numbing part of this is comparing the pre change config with the post change config.
I use ExamDiff for this but with up to 100 devices each night this gets soul destroying.
Is it possible to get python to open ExamDiff and automatically compare the pre and post checks, saving the differences to a file for each device?
I know I can use the import os command to open ExamDiff but I have no idea how to get ExamDiff to work
Can someone point me in the right direction?
Thanks
I got this..........
Works pretty well
#!/usr/bin/python
import os
path = input("Enter the files location: ")
def nexus():
rootdir = path + os.sep
filelist = os.listdir(rootdir)
if filelist:
for file in filelist:
if 'pre' in file:
prefile = file
postfile = file.replace('pre', 'post')
resultfile = file.replace('pre', 'report')
if postfile in filelist:
prefile = rootdir + prefile
postfile = rootdir + postfile
resultfile = rootdir + resultfile
compare(prefile, postfile, resultfile)
else:
print('No corresponding "post"-file to {0}.'.format(prefile))
else:
print('No files found.')
def compare(file1loc, file2loc, comparefileloc):
with open(file1loc, 'r') as file1:
file1lines = file1.readlines()
file1lines = [x.strip() for x in file1lines] # getting rid of whitespace and breaks
with open(file2loc, 'r') as file2:
file2lines = file2.readlines()
file2lines = [x.strip() for x in file2lines] # getting rid of whitespace and breaks
with open(comparefileloc, 'w') as comparefile:
comparefile.write('===== IN FILE 1 BUT NOT FILE 2 =====\r\n')
for file1line in file1lines:
if not file1line in file2lines:
comparefile.write(file1line + '\r\n')
comparefile.write('\r\n')
comparefile.write('===== IN FILE 2 BUT NOT FILE 1 =====\r\n')
for file2line in file2lines:
if not file2line in file1lines:
comparefile.write(file2line + '\r\n')
if __name__ == '__main__':
nexus()

Change order in filenames in a folder

I need to rename a bunch of files in a specific folder. They all end with date and time, like for example "hello 2019-05-22 1310.txt" and I want the date and time for each file to be first so I can sort them. With my code I get an error and it wont find my dir where all files are located. What is wrong with the code?
import os
import re
import shutil
dir_path = r'C:\Users\Admin\Desktop\Testfiles'
comp = re.compile(r'\d{4}-\d{2}-\d{2}')
for file in os.listdir(dir_path):
if '.' in file:
index = [i for i, v in enumerate(file,0) if v=='.'][-1]
name = file[:index]
ext = file[index+1:]
else:
ext=''
name = file
data = comp.findall(name)
if len(data)!=0:
date= comp.findall(name)[0]
rest_name = ' '.join(comp.split(name)).strip()
new_name = '{} {}{}'.format(date,rest_name,'.'+ext)
print('changing {} to {}'.format(name, new_name))
shutil.move(os.path.join(dir_path,name), os.path.join(dir_path, new_name))
else:
print('file {} is not change'.format(name))

Re-loop until all matches are found, logic?

I cannot figure out the logic for this. I am attempting to compare a list of matches 'matches' to files from a folder. If file in 'folders' equal the name in 'matches', then do something, but obviously it doesn't 'try' each match to each file. I'm thinking I need to use a while loop but I don't know how to apply it.
import os
import glob
import os.path
folders = glob.glob('C:\\Corrections\\*.*')
matches = open('filename.txt', 'r')
for each in folders:
splitname_one = each.split('\\', 3) #Separate the filename from the path
filename = splitname_one[3] #Get Filename only
basefile = filename.split('.', 1) #Separate filename and file extension
compare0 = basefile[0] #assign base file name to compare0
#print (basefile[0])
for line in matches:
match = line.split('.', 1) #Separe base filename from file extension
#print (match[1])
compare1 = match[0] #assign base file name to compare1
if compare1==compare0:
#os.rename(filename, 'C:\\holder\\' + filename)
print ('We Have a match!')
else:
print ('no match :( ')
FWIW here's how I might end up doing something like this:
import glob
from os.path import basename, splitext
def file_base(filename):
return splitext(basename(filename))[0]
folders = set(file_base(f) for f in glob.glob('C:\\Corrections\\*.*'))
with open('filename.txt') as fobj:
matches = set(file_base(f) for f in fobj.readlines())
print(folders.intersection(matches))

Python: os.walk usage for scanning specified directory and search specified file issue

I want to scan current and deeper folder to search specified file.
[~/test]$tree -a
Upon is my test environment.
[~/test]$ls
NCRAM955E/ RNCMST954E/ RNCMST957E/ test.py*
Below is my code:
import os, shutil, sys, getopt, re
def GetOption(argv):
FileDir = ""
Roptarget = ""
Dirtarget=[]
try:
opts, args = getopt.getopt(argv, "hD:F:",["FileDir=", "Roptarget="])
except getopt.GetoptError:
print ('Error arg input -D <FileDir> -F <Roptarget>')
sys.exit(2)
for opt, arg in opts:
if opt == "-h":
print ('Error arg input -D <FileDir> -F <Roptarget>')
sys.exit()
elif opt in ("-D", "--FileDir"):
FileDir = arg
Dirtarget = FileDir.split("|")
elif opt in ("-F", "--Roptarget"):
Roptarget = arg
return(Dirtarget, Roptarget)
#Below self defined function need update
def detect_walk(file_dir):
L_0 = []
L = []
DirList,Ropfile = GetOption(sys.argv[1:])
print("DirList = " + str(DirList))
print("Ropfile = " + Ropfile)
for root, dirs, files in os.walk(file_dir):
for file in files:
L_0.append(file)
if " ".join(L_0).find(Ropfile):
print("target rop file = " + Ropfile)
L.append(os.path.join(root, Ropfile))
return(L)
if __name__ == '__main__':
file_path = "/home/test/"
List = detect_walk(file_path)
My expect output, for exapmle
if I type python test.py -D "RNCRAM955E|RNCMST954E" -F "^A20180520.1300+0300-1315+0300*RNCMST954E_statsfile.xml$"
the program will only search foler RNCRAM955E and RNCMST954E, when specified file match pattern found , it will display the full-path of the target file.
i am python freshers. please give me some advice. thank you.
I have update my code, this code will enable transform specified file which in specified folder to another folder.
Use like python temp.py -D "RNCMST954E|RNCMST957E|RNCRAM955E" -F "A20180520\.13*", but some codes remain improve, as you see, the filename after -F should add escape symbol \ . So, how can I improve this ?
import os, shutil, sys, getopt, re
def GetOption(argv):
FileDir = ""
Roptarget = ""
Dirtarget=[]
try:
opts, args = getopt.getopt(argv, "hD:F:",["FileDir=", "Roptarget="])
except getopt.GetoptError:
print ('Error arg input -D <FileDir> -F <Roptarget>')
sys.exit(2)
for opt, arg in opts:
if opt == "-h":
print ('Error arg input -D <FileDir> -F <Roptarget>')
sys.exit()
elif opt in ("-D", "--FileDir"):
FileDir = arg
Dirtarget = FileDir.split("|")
elif opt in ("-F", "--Roptarget"):
Roptarget = arg
return(Dirtarget, Roptarget)
def detect_walk(file_dir):
L = []
desdir = "/home/ekoopgj/ITK/Task/test_folder/"
for root, dirs, files in os.walk(file_dir):
for file in files:
if re.search(fileIndex,file) is not None:
L.append(os.path.join(root, file))
print("start copy " + os.path.join(root, file) + " to " + desdir)
shutil.copyfile(os.path.join(root, file),desdir + file)
if __name__ == '__main__':
DirList,fileIndex = GetOption(sys.argv[1:])
#use LOOP For and store the file_path varibale as a Formal parameters , can reduce the search time if the target folder contains too many files.
for dir in DirList:
file_path = "/home/ekoopgj/ITK/Task/0521/"
file_path += dir
print("dir = " + dir)
List = detect_walk(file_path)

Archive/pack a directory with contents as plain-text representation?

Under Linux / bash, how can I obtain a plain-text representation of a directory of its contents? (Note that by "plain-text" here I mean "UTF-8").
In other words, how could I "pack" or "archive" a directory (with contents - including binary files) as a plain text file - such that I could "unpack" it later, and obtain the same directory with its contents?
I was interested in this for a while, and I think I finally managed to cook up a script that works in both Python 2.7 and 3.4 -- however, I'd still like to know if there is something else that does the same. Here it is as a Gist (with some more comments):
https://gist.github.com/anonymous/1a68bf2c9134fd5312219c8f68713632
Otherwise, I'm posting a slightly abridged version here (below) for reference.
The usage is: to archive/pack into a .json text file:
python archdir2text-json.py -a /tmp > myarchdir.json
... and to unpack from the .json text file into the current (calling) directory:
python archdir2text-json.py -u myarchdir.json
Binary files are handled as base64.
Here is the script:
archdir2text-json.py
#!/usr/bin/env python
import pprint, inspect
import argparse
import os
import stat
import errno
import base64
import codecs
class SmartDescriptionFormatter(argparse.RawDescriptionHelpFormatter):
def _fill_text(self, text, width, indent):
if text.startswith('R|'):
paragraphs = text[2:].splitlines()
rebroken = [argparse._textwrap.wrap(tpar, width) for tpar in paragraphs]
rebrokenstr = []
for tlinearr in rebroken:
if (len(tlinearr) == 0):
rebrokenstr.append("")
else:
for tlinepiece in tlinearr:
rebrokenstr.append(tlinepiece)
return '\n'.join(rebrokenstr)
return argparse.RawDescriptionHelpFormatter._fill_text(self, text, width, indent)
textchars = bytearray({7,8,9,10,12,13,27} | set(range(0x20, 0x100)) - {0x7f})
is_binary_string = lambda bytes: bool(bytes.translate(None, textchars))
cwd = os.getcwd()
if os.name == 'nt':
import win32api, win32con
def folder_is_hidden(p):
if os.name== 'nt':
attribute = win32api.GetFileAttributes(p)
return attribute & (win32con.FILE_ATTRIBUTE_HIDDEN | win32con.FILE_ATTRIBUTE_SYSTEM)
else:
return os.path.basename(p).startswith('.') #linux-osx
def path_hierarchy(path):
hierarchy = {
'type': 'folder',
'name': os.path.basename(path),
'path': path,
}
try:
cleared_contents = [contents
for contents in os.listdir(path)
if not(
os.path.isdir(os.path.join(path, contents))
and
folder_is_hidden(os.path.join(path, contents))
)]
hierarchy['children'] = [
path_hierarchy(os.path.join(path, contents))
for contents in cleared_contents
]
except OSError as e:
if e.errno == errno.ENOTDIR:
hierarchy['type'] = 'file'
else:
hierarchy['type'] += " " + str(e)
if hierarchy['type'] == 'file':
isfifo = stat.S_ISFIFO(os.stat(hierarchy['path']).st_mode)
if isfifo:
ftype = "fifo"
else:
try:
data = open(hierarchy['path'], 'rb').read()
ftype = "bin" if is_binary_string(data) else "txt"
if (ftype == "txt"):
hierarchy['content'] = data.decode("utf-8")
else:
hierarchy['content'] = base64.b64encode(data).decode("utf-8")
except Exception as e:
ftype = str(e)
hierarchy['ftype'] = ftype
return hierarchy
def recurse_unpack(inobj, relpath=""):
if (inobj['type'] == "folder"):
rpname = relpath + inobj['name']
sys.stderr.write("folder name: " + rpname + os.linesep);
os.mkdir(rpname)
for tchild in inobj['children']:
recurse_unpack(tchild, relpath=relpath+inobj['name']+os.sep)
elif (inobj['type'] == "file"):
rfname = relpath + inobj['name']
sys.stderr.write("file name: " + rfname + os.linesep)
if inobj['ftype'] == "txt":
with codecs.open(rfname, "w", "utf-8") as text_file:
text_file.write(inobj['content'])
elif inobj['ftype'] == "bin":
with open(rfname, "wb") as bin_file:
bin_file.write(base64.b64decode(inobj['content']))
if __name__ == '__main__':
import json
import sys
parser = argparse.ArgumentParser(formatter_class=SmartDescriptionFormatter, description="""R|Command-line App that packs/archives (and vice-versa) a directory to a plain-text .json file; should work w/ both Python 2.7 and 3.4
see full help text in https://gist.github.com/anonymous/1a68bf2c9134fd5312219c8f68713632""")
parser.add_argument('input_paths', type=str, nargs='*', default=['.'],
help='Paths to files/directories to include in the archive; or path to .json archive file')
group = parser.add_mutually_exclusive_group(required=True)
group.add_argument('-a', '--archive', action='store_true', help="Interpret input_paths as paths to files/directories, and archive them to a .json file (output to stdout)")
group.add_argument('-u', '--unpack', action='store_true', help="Interpret input_paths as path to an archive .json file, and unpack it in the current directory")
args = parser.parse_args()
if (args.archive):
valid_input_paths = []
for p in args.input_paths:
if os.path.isdir(p) or os.path.exists(p):
valid_input_paths.append(p)
else:
sys.stderr.write("Ignoring invalid input path: " + p + os.linesep)
sys.stderr.write("Encoding input path(s): " + str(valid_input_paths) + os.linesep)
path_hier_arr = [path_hierarchy(vp) for vp in valid_input_paths]
outjson = json.dumps(path_hier_arr, indent=2, sort_keys=True, separators=(',', ': '))
print(outjson)
elif (args.unpack):
valid_input_paths = []
for p in args.input_paths:
if os.path.isdir(p) or os.path.exists(p):
valid_input_paths.append(p)
else:
sys.stderr.write("Ignoring invalid input path: " + p + os.linesep)
for vp in valid_input_paths:
with open(vp) as data_file:
data = json.load(data_file)
for datachunk in data:
recurse_unpack(datachunk)

Resources