Error in using os.path.walk() correctly - python-3.x

So I created this Folder C:\TempFiles to test run the following code snippet
Inside this folder i had two files -> nd1.txt, nd2.txt and a folder C:\TempFiles\Temp2, inside which i had only one file nd3.txt
Now when i execute this code:-
import os,file,storage
database = file.dictionary()
tools = storage.misc()
lui = -1 # last used file index
fileIndex = 1
def sendWord(wrd, findex): # where findex is the file index
global lui
if findex!=lui:
tools.refreshRecentList()
lui = findex
if tools.mustIgnore(wrd)==0 and tools.toRecentList(wrd)==1:
database.addWord(wrd,findex) # else there's no point adding the word to the database, because its either trivial, or has recently been added
def showPostingsList():
print("\nPOSTING's LIST")
database.display()
def parseFile(nfile, findex):
for line in nfile:
pl = line.split()
for word in pl:
sendWord(word.lower(),findex)
def parseDirectory(dirname):
global fileIndex
for root,dirs,files in os.walk(dirname):
for name in dirs:
parseDirectory(os.path.join(root,name))
for filename in files:
nf = open(os.path.join(root,filename),'r')
parseFile(nf,fileIndex)
print(" --> "+ nf.name)
fileIndex+=1
nf.close()
def main():
dirname = input("Enter the base directory :-\n")
print("\nParsing Files...")
parseDirectory(dirname)
print("\nPostings List has Been successfully created.\n",database.entries()," word(s) sent to database")
choice = ""
while choice!='y' and choice!='n':
choice = str(input("View List?\n(Y)es\n(N)o\n -> ")).lower()
if choice!='y' and choice!='n':
print("Invalid Entry. Re-enter\n")
if choice=='y':
showPostingsList()
main()
Now I should Traverse the three files only once each, and i put a print(filename) to test that, but apparently I am traversing the inside folder twice:-
Enter the base directory :-
C:\TempFiles
Parsing Files...
--> C:\TempFiles\Temp2\nd3.txt
--> C:\TempFiles\nd1.txt
--> C:\TempFiles\nd2.txt
--> C:\TempFiles\Temp2\nd3.txt
Postings List has Been successfully created.
34 word(s) sent to database
View List?
(Y)es
(N)o
-> n
Can Anyone tell me how to modify the os.path.walk() as such to avoid the error
Its not that my output is incorrect, but its traversing over one entire folder twice, and that's not very efficient.

Your issue isn't specific to Python 3, it's how os.walk() works - iterating already does the recursion to subfolders, so you can take out your recursive call:
def parseDirectory(dirname):
global fileIndex
for root,dirs,files in os.walk(dirname):
for filename in files:
nf = open(os.path.join(root,filename),'r')
parseFile(nf,fileIndex)
print(" --> "+ nf.name)
fileIndex+=1
nf.close()
By calling parseDirectory() for the dirs, you were starting another, independant walk of your only subfolder.

Related

Python Selenium: Check if a new file in the download folder is added

I have this when I press on a link it downloads to the download folder.
my Url looks something like so
url='https://vle......ac.uk/pluginfile.php/2814969/mod_page/content/16/Statistics_for_Business_and_Economics_----_%28Unit_I_Introduction%29.pdf'
driver.execute_script("window.open('%s', '_blank')" % URL)
Where the URL is a pdf file that I am trying to download.
I want to write a code that waits until number of files in the download folder increases to move on to the next itteration in the loop.
I wrote this code:
def wait_till_number_of_files_is_byound_the_current_file():
path_download=r'\\Mac\Home\Downloads\*'
list_of_files = glob.glob(path_download)
a=len(list_of_files)
while len(list_of_files)==a:
time.sleep(1)
list_of_files = glob.glob(path_download)
In my for loop I also tried this code
item = WebDriverWait(driver, 10).until(lambda driver: driver.execute_script("window.open('%s', '_blank')" % URL))
but this made the file being pressed infinitely not only once.
The best way, to get around this (I hope there would be a better way) is to use the following function
def download_wait(directory, timeout, nfiles=None):
"""
Wait for downloads to finish with a specified timeout.
Args
----
directory : str
The path to the folder where the files will be downloaded.
timeout : int
How many seconds to wait until timing out.
nfiles : int, defaults to None
If provided, also wait for the expected number of files.
"""
seconds = 0
dl_wait = True
while dl_wait and seconds < timeout:
time.sleep(1)
dl_wait = False
files = os.listdir(directory)
if nfiles and len(files) != nfiles:
dl_wait = True
for fname in files:
if fname.endswith('.crdownload'):
dl_wait = True
seconds += 1
return seconds
In my for loop, I wrote the following
for url in hyper_link_of_files:
# Click on this link
driver.execute_script("window.open('%s', '_blank')" % url)
# time.sleep(2)
download_wait(r'\\Mac\Home\Downloads', 10, nfiles=None)
time.sleep(2)
# move the last download file into the destination folder
Move_File(dest_folder)
I will share my Move_File function for reference to those who are interested in moving the downloaded file into a new destination
def Move_File(path_needed):
# Get the working directory of the downloads folder
path_download=r'\\Mac\Home\Downloads\*'
list_of_files = glob.glob(path_download)
latest_file = max(list_of_files, key=os.path.getctime)
# Copy to the new file into the destination
path_destination=os.path.join(path_needed,os.path.basename(latest_file))
shutil.move(latest_file,path_destination)

want to count if a file exists and store a duplicate followed by a copy number on python

I want to make a script that monitors changes in a folder and moves files to special directories. I use pip-watchdog
from watchdog.events import FileSystemEventHandler
def makeUnique(path,counter):
filename, extension = os.path.splitext(path)
# IF FILE EXISTS, ADDS NUMBER TO THE END OF THE FILENAME
file_split=filename.split('\\')
new_name="DUPLICATE_"+file_split[-1]
new_file=file_split[-1].replace(file_split[-1],new_name)
while os.path.exists(path):
path = new_file + " (" + str(counter) + ")" + extension
counter += 1
return path
def move(dest, entry, name):
"""dest = D:\Download\ChromeSetup.exe"""
file_exists = os.path.exists(dest + "\\" + name)
counter=1
if file_exists:
unique_name = makeUnique(entry,counter)
dest = dest+"\\"+unique_name
os.rename(entry, dest)
try:
print(f'[magenta][[/] [indian_red1 bold]{name}[/] [magenta]][/] -> [magenta][[/]
[orange_red1 bold]{dest}[/] [magenta]][/] to ' + f'[dark_orange]{dest}[/]')
shutil.move(entry,dest)
except:
pass
def moveByExtension(str, dest_dir, entry, name):
if name.endswith(str):
dest = dest_dir
move(dest, entry, name)
dest_dir_exe = r'D:\Download\exe_files'
class MoverHandler(FileSystemEventHandler):
def on_created(self, event):
"""when a file or directory is created"""
with os.scandir(source_dir) as entries:
for entry in entries:
name = entry.name
# app
moveByExtension('.exe', dest_dir_exe, entry, name)
My current filesystem tree:
-Download/
-- ChromeSetup.exe (rename and move to exe_files)
-Download/exe_files/
-- ChromeSetup.exe
-- DUPLICATE_ChromeSetup (1).exe
But if ChromeSetup.exe is in the download again, then the error occurs 'DUPLICATE_ChromeSetup (1).exe' already exists, although 'DUPLICATE_ChromeSetup (2).exe' is expected.
want to count if a file exists and store a duplicate followed by a copy number
I'm learning python at the moment...maybe I don't see the obvious problem
thank you in advance

Iterate through folder/sub-directories and move found regex files into new folder

I´ve got a folder/sub-directories structure as follow:
-main_folder
-sub_1
322.txt
024.ops
-sub_2
977.txt
004.txt
-sub_3
396.xml
059.ops
I´m trying to iterate with os.walk through the folder and its sub-directories and collect the names inside these folders. When a name gets found by a regex rule, I want to either store the path in list or directly move that file into a new folder (mkdir).
I´ve already got the regex done to find the document I want.
For example:
find_000_099 = r'\b(0\d{2}.\w{1,4})'
find_300_399 = r'\b(3\d{2}.\w{1,4})'
find_900_999 = r'\b(9\d{2}.\w{1,4})'
I wish my expected result to be like:
-main_folder
-sub_from_000_099
024.ops
004.txt
059.ops
-sub_from_300_399
322.txt
396.xml
-sub_from_900_999
977.txt
You can use the below-given code, which moves the file from its initial directory to the desired directory.
import os
import re
import shutil
find_000_099 = r'\b(0\d{2}.\w{1,4})'
find_300_399 = r'\b(3\d{2}.\w{1,4})'
find_900_999 = r'\b(9\d{2}.\w{1,4})'
count = 0
for roots,dirs,files in os.walk('Directory Path'):
#print(roots, len(dirs), len(files))
if count == 0:
parent_dir = roots
os.mkdir ( parent_dir + "/sub_from_000_099" )
os.mkdir ( parent_dir + "/sub_from_300_399" )
os.mkdir ( parent_dir + "/sub_from_900_999" )
count += 1
else:
print(count)
for file in files:
print(file)
if re.match(find_000_099, file):
shutil.move ( roots + "/" + file, parent_dir + "/sub_from_000_099/" + file)
elif re.match ( find_300_399, file ):
shutil.move ( roots + "/" + file, parent_dir + "/sub_from_300_399/" + file )
elif re.match ( find_900_999, file ):
shutil.move ( roots + "/" + file, parent_dir + "/sub_from_900_999/" + file )
It's a skeleton code, which fulfills your requirements.
You can add checks on creating directories, by first checking whether the directory exists or not, and other checks as per your needs.
Here is a simpler way, using pathlib and shutil
import re
import shutil
from pathlib import Path
new_path = Path("new_folder")
if not new_path.exists(): new_path.mkdir()
# Getting all files in the main directory
files = Path("main_folder").rglob("*.*")
regs = {
r'\b(0\d{2}.\w{1,4})': "sub_1", # find_000_099
r'\b(3\d{2}.\w{1,4})': "sub_2", # find_300_399
r'\b(9\d{2}.\w{1,4})': "sub_3" # find_900_999
}
for f in files:
for reg in regs:
if re.search(reg, f.name):
temp_path = new_path / regs[reg]
if not temp_path.exists(): temp_path.mkdir()
# Change the following method to 'move' after testing it
shutil.copy(f, temp_path / f.name)
break

Run code on specific files in a directory separately (by the name of file)

I have N files in the same folder with different index numbers like
Fe_1Sec_1_.txt
Fe_1Sec_2_.txt
Fe_1Sec_3_.txt
Fe_2Sec_1_.txt
Fe_2Sec_2_.txt
Fe_2Sec_3_.txt
.
.
.
and so on
Ex: If I need to run my code with only the files with time = 1 Sec, I can make it manually as follow:
path = "input/*_1Sec_*.txt"
files = glob.glob(path)
print(files)
which gave me:
Out[103]: ['input\\Fe_1Sec_1_.txt', 'input\\Fe_1Sec_2_.txt', 'input\\Fe_1Sec_3_.txt']
In case of I need to run my code for all files separately (depending on the measurement time in seconds, i.e. the name of file)
I tried this code to get the path for each time of measurement:
time = 0
while time < 4:
time += 1
t = str(time)
path = ('"input/*_'+t+'Sec_*.txt"')
which gives me:
"input/*_1Sec_*.txt"
"input/*_2Sec_*.txt"
"input/*_3Sec_*.txt"
"input/*_4Sec_*.txt"
After that I tried to use this path as follow:
files = glob.glob(path)
print(files)
But it doesn't import the wanted files and give me :
"input/*_1Sec_*.txt"
[]
"input/*_2Sec_*.txt"
[]
"input/*_3Sec_*.txt"
[]
"input/*_4Sec_*.txt"
[]
Any suggestions, please??
I think the best way would be to simply do
for time in range(1, 5): # 1,2,3,4
glob_path = 'input/*_{}Sec_*.txt'.format(time)
for file_path in glob.glob(glob_path):
do_something(file_path, measurement) # or whatever

How to create a symbolic link with SCons?

I'm using SCons for building a project and need to add a symbolic link to a file it is installing via env.Install. What command(s) will make a link that's the equivalent of running ln -s on the command line?
SCons doesn't have a dedicated symbolic link command, but you can use os.symlink(src, dst) from Python's os module:
import os
env = Environment()
def SymLink(target, source, env):
os.symlink(os.path.abspath(str(source[0])), os.path.abspath(str(target[0])))
env.Command("file.out", "file.in", SymLink)
This may not work correctly on Windows, I've only tried it on Linux.
There seems to be little advancement in the SCons core code for symbolic link support and I wasn't satisfied any one solution I found on the web. Here is a potential builder which incorporates aspects of both Nick's and richq's answers. Additionally, it will catch name changes (due to the emitter method) and is as platform-agnostic as I could get it.
I prefer this builder because it will make links relative to the directory in which they are installed. One could add an option to force the link to be absolute I suppose, but I have not needed or wanted that yet.
Currently, if the OS doesn't support symlinks, I just pass and do nothing, but one could use os.copytree() for example however the dependency becomes messy if the source is a directory so the emitter would need to do something fancy. I'm up for any suggestions here.
One can put the following code into the file site_scons/site_tools/symlink.py (with blank _init_.py files in the appropriate places). Then do this in the SConstruct file:
SConstruct:
env = Environment()
env.Tool('symlink')
env.SymLink('link_name.txt', 'real_file.txt')
symlink.py:
import os
from os import path
from SCons.Node import FS
from SCons.Script import Action, Builder
def generate(env):
'''
SymLink(link_name,source)
env.SymLink(link_name,source)
Makes a symbolic link named "link_name" that points to the
real file or directory "source". The link produced is always
relative.
'''
bldr = Builder(action = Action(symlink_builder,symlink_print),
target_factory = FS.File,
source_factory = FS.Entry,
single_target = True,
single_source = True,
emitter = symlink_emitter)
env.Append(BUILDERS = {'SymLink' : bldr})
def exists(env):
'''
we could test if the OS supports symlinks here, or we could
use copytree as an alternative in the builder.
'''
return True
def symlink_print(target, source, env):
lnk = path.basename(target[0].abspath)
src = path.basename(source[0].abspath)
return 'Link: '+lnk+' points to '+src
def symlink_emitter(target, source, env):
'''
This emitter removes the link if the source file name has changed
since scons does not seem to catch this case.
'''
lnk = target[0].abspath
src = source[0].abspath
lnkdir,lnkname = path.split(lnk)
srcrel = path.relpath(src,lnkdir)
if int(env.get('verbose',0)) > 3:
ldir = path.relpath(lnkdir,env.Dir('#').abspath)
if rellnkdir[:2] == '..':
ldir = path.abspath(ldir)
print ' symbolic link in directory: %s' % ldir
print ' %s -> %s' % (lnkname,srcrel)
try:
if path.exists(lnk):
if os.readlink(lnk) != srcrel:
os.remove(lnk)
except AttributeError:
# no symlink available, so we remove the whole tree? (or pass)
#os.rmtree(lnk)
print 'no os.symlink capability on this system?'
return (target, source)
def symlink_builder(target, source, env):
lnk = target[0].abspath
src = source[0].abspath
lnkdir,lnkname = path.split(lnk)
srcrel = path.relpath(src,lnkdir)
if int(env.get('verbose',0)) > 4:
print 'target:', target
print 'source:', source
print 'lnk:', lnk
print 'src:', src
print 'lnkdir,lnkname:', lnkdir, lnkname
print 'srcrel:', srcrel
if int(env.get('verbose',0)) > 4:
print 'in directory: %s' % path.relpath(lnkdir,env.Dir('#').abspath)
print ' symlink: %s -> %s' % (lnkname,srcrel)
try:
os.symlink(srcrel,lnk)
except AttributeError:
# no symlink available, so we make a (deep) copy? (or pass)
#os.copytree(srcrel,lnk)
print 'no os.symlink capability on this system?'
return None
This creates a builder to perform the job:
mylib = env.SharedLibrary("foobar", SRCS)
builder = Builder(action = "ln -s ${SOURCE.file} ${TARGET.file}", chdir = True)
env.Append(BUILDERS = {"Symlink" : builder})
mylib_link = env.Symlink("_foobar.so", mylib)
env.Default(mylib)
env.Default(mylib_link)
Again, this solution is for Linux.
If you wanted to issue the command directly to the shell and know the OS, subprocess can be used as well.
E.g.: subprocess.call(['ln', '-s', '</src/path>', '</dest/path>'])
In addition to Nicks solution, you can add a directory symlink by using a file as a directory name carrier. It's not the cleanest solution and debugging path names is a pain, but this works well:
def symlink_last(target_source_env):
src = os.path.basename(os.path.dirname(str(source[0])))
link = "deliverables/last"
print "Symlinking "+ src + "as" + link
os.symlink(src, link)
BUILD_TARGETS.append('link')
install_dir = "deliverables/subdir"
carrier_file = "filename"
builder = Builder(action = symlink_last, chdir=False)
env.Append(BUILDERS={ "Symlink" : builder })
env.Alias(target="link", source=env.Symlink(dir="deliverables", source = install_dir + carrier_file)
This will make a link to deliverables/subdir named deliverables/last, provided that a file deliverables/subdir/filename exists.

Resources