Searching for a keyword inside a list - python-3.x

I'm trying to search for a keyword in a list and display all the list item's positions that match said keyword. I've not managed to mind anything helpful yet and was wondering if anyone can help me out, here's the code I have currently
import os
directory = os.listdir()
print(directory)
pyFilename = '.py'
if pyFilename in directory:
print("working")

Checkout list comprehenssions and enumerate
import os
pyFilename = '.py'
directory = [(position, filename) for position, filename in enumerate(os.listdir())
if filename.endswith(pyFilename)]
print(directory)

import os
directory = os.listdir()
print(directory)
pyFilename = '.py'
list_location = 0
for z in directory:
if pyFilename in z:
print("working: list location %s" % list_location)
list_location = list_location + 1

Related

How to safely move a file to another directory in Python

The below works as expected:
import shutil
source = "c:\\mydir\myfile.txt"
dest_dir = "c:\\newdir"
shutil.move(source,dest_dir)
However, this also succeeds. I would want this to fail.
import shutil
source = "c:\\mydir"
dest_dir = "c:\\newdir"
shutil.move(source,dest_dir)
Any way to ensure that only a file is moved. Both Windows and Unix would be great. If not, Unix at least.
You could use pathlib's purepath.suffix to determine if a path points to a file or a directory, like so:
import pathlib
def points_to_file(path) -> bool:
if pathlib.PurePath(path).suffix:
return True
else:
return False
pathtodir = r'C:\Users\username'
pathtofile = r'C:\Users\username\filename.extension'
print (f'Does "{pathtodir}" point to a file? {points_to_file(pathtodir)}')
# Result -> Does "C:\Users\username" point to a file? False
print (f'Does "{pathtofile}" point to a file? {points_to_file(pathtofile)}')
# Result -> Does "C:\Users\username\filename.extension" point to a file? True
You can define a custom function to ensure that source is a file (with os.path.isfile function):
from os import path
def move_file(src, dst):
if not path.isfile(src):
raise IsADirectoryError('Source is not a file')
shutil.move(src, dst)

Python how to search files using regular expression [duplicate]

I recently started getting into Python and I am having a hard time searching through directories and matching files based on a regex that I have created.
Basically I want it to scan through all the directories in another directory and find all the files that ends with .zip or .rar or .r01 and then run various commands based on what file it is.
import os, re
rootdir = "/mnt/externa/Torrents/completed"
for subdir, dirs, files in os.walk(rootdir):
if re.search('(w?.zip)|(w?.rar)|(w?.r01)', files):
print "match: " . files
import os
import re
rootdir = "/mnt/externa/Torrents/completed"
regex = re.compile('(.*zip$)|(.*rar$)|(.*r01$)')
for root, dirs, files in os.walk(rootdir):
for file in files:
if regex.match(file):
print(file)
CODE BELLOW ANSWERS QUESTION IN FOLLOWING COMMENT
That worked really well, is there a way to do this if match is found on regex group 1 and do this if match is found on regex group 2 etc ? – nillenilsson
import os
import re
regex = re.compile('(.*zip$)|(.*rar$)|(.*r01$)')
rx = '(.*zip$)|(.*rar$)|(.*r01$)'
for root, dirs, files in os.walk("../Documents"):
for file in files:
res = re.match(rx, file)
if res:
if res.group(1):
print("ZIP",file)
if res.group(2):
print("RAR",file)
if res.group(3):
print("R01",file)
It might be possible to do this in a nicer way, but this works.
Given that you are a beginner, I would recommend using glob in place of a quickly written file-walking-regex matcher.
Snippets of functions using glob and a file-walking-regex matcher
The below snippet contains two file-regex searching functions (one using glob and the other using a custom file-walking-regex matcher). The snippet also contains a "stopwatch" function to time the two functions.
import os
import sys
from datetime import timedelta
from timeit import time
import os
import re
import glob
def stopwatch(method):
def timed(*args, **kw):
ts = time.perf_counter()
result = method(*args, **kw)
te = time.perf_counter()
duration = timedelta(seconds=te - ts)
print(f"{method.__name__}: {duration}")
return result
return timed
#stopwatch
def get_filepaths_with_oswalk(root_path: str, file_regex: str):
files_paths = []
pattern = re.compile(file_regex)
for root, directories, files in os.walk(root_path):
for file in files:
if pattern.match(file):
files_paths.append(os.path.join(root, file))
return files_paths
#stopwatch
def get_filepaths_with_glob(root_path: str, file_regex: str):
return glob.glob(os.path.join(root_path, file_regex))
Comparing runtimes of the above functions
On using the above two functions to find 5076 files matching the regex filename_*.csv in a dir called root_path (containing 66,948 files):
>>> glob_files = get_filepaths_with_glob(root_path, 'filename_*.csv')
get_filepaths_with_glob: 0:00:00.176400
>>> oswalk_files = get_filepaths_with_oswalk(root_path,'filename_(.*).csv')
get_filepaths_with_oswalk: 0:03:29.385379
The glob method is much faster and the code for it is shorter.
For your case
For your case, you can probably use something like the following to get your *.zip,*.rar and *.r01 files:
files = []
for ext in ['*.zip', '*.rar', '*.r01']:
files += get_filepaths_with_glob(root_path, ext)
Here's an alternative using glob.
from pathlib import Path
rootdir = "/mnt/externa/Torrents/completed"
for extension in 'zip rar r01'.split():
for path in Path(rootdir).glob('*.' + extension):
print("match: " + path)
I would do it this way:
import re
from pathlib import Path
def glob_re(path, regex="", glob_mask="**/*", inverse=False):
p = Path(path)
if inverse:
res = [str(f) for f in p.glob(glob_mask) if not re.search(regex, str(f))]
else:
res = [str(f) for f in p.glob(glob_mask) if re.search(regex, str(f))]
return res
NOTE: per default it will recursively scan all subdirectories. If you want to scan only the current directory then you should explicitly specify glob_mask="*"

How to move the files from one folder to other folder based on time or date

I am trying to move the files from one folder to the other based on the time or date stamp. It's something like I want to keep today file in the same folder and move yesterday file into a different folder.
Currently, I am able to move the files from one folder to other but it's not on date or time-based.
The file name will look something like this.
"output-android_login_scenarios-android-1.43-9859-2019-04-30 11:29:31.542548.html"
-------python
def move(self, srcdir,dstdir):
currentDirectory = os.path.dirname(__file__)
sourceFile = os.path.join(currentDirectory, srcdir)
destFile = os.path.join(currentDirectory, dstdir)
if not os.path.exists(destFile):
os.makedirs(destFile)
source = os.listdir(sourceFile)
try:
for files in source:
shutil.move(sourceFile+'/'+files, destFile)
except:
print("No file are present")
I think I have something that might work for you. I have made some minor tweaks to your "move" function, so I hope you don't mind. This method will also work if you have more than one 'old' file that needs moving.
Let me know if this helps :)
import os
import shutil
import re
from datetime import datetime
sourceDir = 'C:\\{folders in your directory}\\{folder containing the files}'
destDir = 'C:\\{folders in your directory}\\{folder containing the old files}'
files = os.listdir(sourceDir)
list_of_DFs = []
for file in files:
if file.endswith('.html'):
name = file
dateRegex = re.compile(r'\d{4}-\d{2}-\d{2}')
date = dateRegex.findall(file)
df = pd.DataFrame({'Name': name, 'Date': date})
list_of_DFs.append(df)
filesDF = pd.concat(list_of_DFs,ignore_index=True)
today = datetime.today().strftime('%Y-%m-%d')
filesToMove = filesDF[filesDF['Date'] != today]
def move(file, sourceDir, destDir):
sourceFile = os.path.join(sourceDir, file)
if not os.path.exists(destDir):
os.makedirs(destDir)
try:
shutil.move(sourceFile, destDir)
except:
print("No files are present")
for i in range(len(filesToMove)):
file = filesToMove['Name'][i]
move(file,sourceDir,destDir)

How to get a specific file name from a path without the extensions python

I have the followint path:
f_file = /home/reads_dataset_1/E2_ER/E2_ER_exp1_L1.fastq.gz
And I'd like to get only the last file name without the 2 extensions:
E2_ER_exp1_L1
Tried:
sample_name = os.path.splitext(f_file)[0]
But I got the whole name of the path without the last extension.
may be funny and dirty, but works :)
sample_name = os.path.splitext(os.path.splitext(os.path.basename(f_file))[0])[0]
also can use shorter, nicer version:
sample_name = os.path.basename(f_file).split('.')[0]
I know this is old, but since pathlib was not mentioned:
Use pathlib.Path
from pathlib import Path
f_file = "/home/reads_dataset_1/E2_ER/E2_ER_exp1_L1.fastq.gz"
f_path = Path(f_file)
sstem = None
# Repeatedly get the stem until no change
while sstem != f_path.stem:
sstem = f_path.stem
f_path = Path(sstem)
print(f_path.stem)
You'll get:
E2_ER_exp1_L1
Or if you know exactly that there will be two suffixes:
from pathlib import Path
f_file = "/home/reads_dataset_1/E2_ER/E2_ER_exp1_L1.fastq.gz"
f_path = Path(f_file)
stem1 = f_path.stem
stem2 = Path(stem1).stem
print(stem2)

How to display Folders and recent items

I have 2 questions in trying to retrieve a set of data from a directory and displays it out into the ListWidget.
As I am a linux user, I set my ListWidget to read my directory from Desktop in which insides contains say 5 folders and 5 misc items (.txt, .py etc)
Currently I am trying to make my ListWidget to display just the folders but apparently it does that but it also displays all the items, making it a total of 10 items instead of 5.
I tried looking up on the net but I am unable to find any info. Can someone help me?
Pertaining to Qns 1, I am wondering if it is possible to display the top 3 recent folders in the ListWidget, if a checkbox is being checked?
import glob
import os
def test(object):
testList = QListWidget()
localDir = os.listdir("/u/ykt/Desktop/test")
testList.addItems(localDir)
Maybe you should try "QFileDialog" like the following:
class MyWidget(QDialog):
def __init__(self):
QDialog.__init__(self)
fileNames = QFileDialog.getExistingDirectory(self, "list dir", "C:\\",QFileDialog.ShowDirsOnly)
print fileNames
if __name__ == "__main__":
app = QApplication(sys.argv)
widget = MyWidget()
widget.show()
app.exec_()
2nd question, you could reference to this: enter link description here
I guess that you are expecting that os.listdir() will return only the directory names from the given path. Actually it returns the file names too. If you want to add only directories to the listWidget, do the following:
import os
osp = os.path
def test(object):
testList = QListWidget()
dirPath = "/u/ykt/Desktop/test"
localDir = os.listdir(dirPath)
for dir in lacalDir:
path = osp.join(dirPath, dir)
if osp.isdir(path):
testList.addItem(dir)
This will add only directories to the listWidget ignoring the files.
If you want to get the access time for the files and/or folders, use time following method:
import os.path as osp
accessTime = osp.getatime("path/to/dir") # returns the timestamp
Get access time for all the directories and one which has the greatest value is the latest accessed directory. This way you can get the latest accessed 3 directories.

Resources