Delete each file older than X days in Y folder - python-3.x

I wrote this but it doesn't work.
In giorni I put the maximum days of stay in the SD andfile_dir is the default location where the files are analyzed.
import os
from datetime import datetime, timedelta
file_dir = "/home/pi/" #location
giorni = 2 #n max of days
giorni_pass = datetime.now() - timedelta(giorni)
for root, dirs, files in os.walk(file_dir):
for file in files:
filetime = datetime.fromtimestamp(os.path.getctime(file))
if filetime > giorni_pass:
os.remove(file)

Solved with:
for file in files:
path = os.path.join(file_dir, file)
filetime = datetime.fromtimestamp(os.path.getctime(path))
if filetime > giorni_pass:
os.remove(path)
Because "Filenames" contains a list of files whose path name is relative to "file_dir" and to make operations on those files should first get the absolute path, using path = os.path.join(file_dir, file)

Related

Python read oldest file first

I have object and attributes data in separate csv files. there are 3 different types of objects.
Directory may contain different files but I have to read and process object and attribute files. After reading the object file and then will have to read respective attribute file.
Below is code and files
plant = []
flower = []
person = []
for file_name in os.listdir(dir_path):
if os.path.isfile(os.path.join(dir_path, file_name)):
if file_name.startswith('plant_file'):
plant.append(file_name)
if file_name.startswith('person_file'):
person.append(file_name)
if file_name.startswith('flower_file'):
flower.append(file_name)
for file_name in person:
object_file_path = dir_path + file_name
attribute_file_path = dir_path + file_name.replace('file','attributes_file')
read_object_csv = pd.read_csv(object_file_path)
read_attribute_csv = pd.read_csv(attribute_file_path)
for file_name in flower:
object_file_path = dir_path + file_name
attribute_file_path = dir_path + file_name.replace('file','attributes_file')
read_object_csv = pd.read_csv(object_file_path)
read_attribute_csv = pd.read_csv(attribute_file_path)
file name contains date and time in the format YYYYMMDDHHMMSS . Sample file names are
plant_attributes_file_20221013134403.csv
plant_attributes_file_20221013142151.csv
plant_attributes_file_20221013142455.csv
plant_file_20221013134403.csv
plant_file_20221013142151.csv
plant_file_20221013142455.csv
person_file_20221012134948.csv
person_file_20221012140706.csv
person_attributes_file_20221012134948.csv
person_attributes_file_20221012140706.csv
How can we sort file names in list using timestamp, so that oldest file can be loaded first and load latest file at last ?

How do I get my Python script to work on a networked file?

I am working on a script to read the creation date from a source file and change the creation dates on the destination files to match it.
It reads the creation dates for files stored locally and on the NAS.
It changes the creation dates for files stored locally.
However, I can't get it to change the creation dates for files stored on the NAS. I don't get an error message, it just doesn't change the creation date.
Any ideas of how I can get this to work? Or will I have to copy the files to a local folder, make the changes, then copy them back?
creation_dates.py
# Get Creation Date of source file
def get_creeation_date(fn):
return time.ctime(os.path.getctime(fn))
# Convert datetime to integer (for conversion)
def convert_to_integer(dt_time_str):
dt_time = datetime.strptime(dt_time_str, "%a %b %d %H:%M:%S %Y")
return int(datetime.timestamp(dt_time))
# Change the creation date of the destination files based on the source file
# Working with module from this post: >>https://stackoverflow.com/q/47839248/11792868
def changeFileCreationTime(fname, newtime_int):
wintime = pywintypes.Time(newtime_int)
winfile = win32file.CreateFile(
fname,
win32con.GENERIC_WRITE,
win32con.FILE_SHARE_READ
| win32con.FILE_SHARE_WRITE
| win32con.FILE_SHARE_DELETE,
None,
win32con.OPEN_EXISTING,
win32con.FILE_ATTRIBUTE_NORMAL,
None,
)
win32file.SetFileTime(winfile, wintime, None, None)
winfile.close()
def main(fn_src, fn_dest):
changeFileCreationTime(fn_dest, convert_to_integer(get_creeation_date(fn_src)))
if __name__ == "__main__":
file_src = r"\\path\to\source\file 1.mp4" # Located in NAS folder
file_src_dir, file_src_name_ext = os.path.split(file_src)
file_src_name, file_src_ext = os.path.splitext(file_src_name_ext)
file_dest_jpg_1 = re.sub(r"(\s\d$|$)", " 2.jpg", file_src_name, 1) # file to be created to local folder
file_dest_jpg_2 = "\\\\path\\to\\source\\" + file_src_name + "2.jpg" # file to be created to NAS folder
# Also tried r"\\path\to\source\" + file_src_name + "2.jpg"
if os.path.exists(file_dest_jpg_1) == False:
with open(file_dest_jpg_1, "w"):
pass
if os.path.exists(file_dest_jpg_2) == False:
with open(file_dest_jpg_2, "w"):
pass
main(file_src, file_dest_jpg_1) # This works
main(file_src, file_dest_jpg_2) # This does not work

Python program to list all folders with date modified

I need a Python program to list all folders with date modified. When I run it, all of the modification dates are the same. What am I doing wrong?
Here is the code that I'm using:
import os, time, stat
path = 'h:\\lance\\'
folders = []
r=root, d=directories, f = files
for r, d, f in os.walk(path):
for folder in d:
modTimesinceEpoc = os.path.getctime(path)
modificationTime = time.strftime('%Y-%m-%d', time.localtime(modTimesinceEpoc))
folders.append(os.path.join(r, folder))
[0:5] just grabs the first 5 folders, helpful if the total amount of folders is large
for f in folders [0:5]:
print(f, "Last Modified Time : ", modificationTime)
Output:
h:\lance\Return series project Last Modified Time : 2019-09-23
h:\lance\Forecast Pro Last Modified Time : 2019-09-23
h:\lance\Custom Price Files Last Modified Time : 2019-09-23
h:\lance\MBO and responsibilities Last Modified Time : 2019-09-23
h:\lance.vscode Last Modified Time : 2019-09-23
I think this is what you are looking for
import os, time, stat
path = 'h:\\lance\\'
folders = []
# r=root, d=directories, f = files
for r, d, f in os.walk(path):
for folder in d:
location = os.path.join(r, folder)
modTimesinceEpoc = os.path.getctime(location)
modificationTime = time.strftime('%Y-%m-%d', time.localtime(modTimesinceEpoc))
folders.append((location, modificationTime))
# [0:5] just grabs the first 5 folders, helpful if the total amount of folders is large
for f in folders[:5]:
print(f)

Iterate through folder/sub-directories and move found regex files into new folder

I´ve got a folder/sub-directories structure as follow:
-main_folder
-sub_1
322.txt
024.ops
-sub_2
977.txt
004.txt
-sub_3
396.xml
059.ops
I´m trying to iterate with os.walk through the folder and its sub-directories and collect the names inside these folders. When a name gets found by a regex rule, I want to either store the path in list or directly move that file into a new folder (mkdir).
I´ve already got the regex done to find the document I want.
For example:
find_000_099 = r'\b(0\d{2}.\w{1,4})'
find_300_399 = r'\b(3\d{2}.\w{1,4})'
find_900_999 = r'\b(9\d{2}.\w{1,4})'
I wish my expected result to be like:
-main_folder
-sub_from_000_099
024.ops
004.txt
059.ops
-sub_from_300_399
322.txt
396.xml
-sub_from_900_999
977.txt
You can use the below-given code, which moves the file from its initial directory to the desired directory.
import os
import re
import shutil
find_000_099 = r'\b(0\d{2}.\w{1,4})'
find_300_399 = r'\b(3\d{2}.\w{1,4})'
find_900_999 = r'\b(9\d{2}.\w{1,4})'
count = 0
for roots,dirs,files in os.walk('Directory Path'):
#print(roots, len(dirs), len(files))
if count == 0:
parent_dir = roots
os.mkdir ( parent_dir + "/sub_from_000_099" )
os.mkdir ( parent_dir + "/sub_from_300_399" )
os.mkdir ( parent_dir + "/sub_from_900_999" )
count += 1
else:
print(count)
for file in files:
print(file)
if re.match(find_000_099, file):
shutil.move ( roots + "/" + file, parent_dir + "/sub_from_000_099/" + file)
elif re.match ( find_300_399, file ):
shutil.move ( roots + "/" + file, parent_dir + "/sub_from_300_399/" + file )
elif re.match ( find_900_999, file ):
shutil.move ( roots + "/" + file, parent_dir + "/sub_from_900_999/" + file )
It's a skeleton code, which fulfills your requirements.
You can add checks on creating directories, by first checking whether the directory exists or not, and other checks as per your needs.
Here is a simpler way, using pathlib and shutil
import re
import shutil
from pathlib import Path
new_path = Path("new_folder")
if not new_path.exists(): new_path.mkdir()
# Getting all files in the main directory
files = Path("main_folder").rglob("*.*")
regs = {
r'\b(0\d{2}.\w{1,4})': "sub_1", # find_000_099
r'\b(3\d{2}.\w{1,4})': "sub_2", # find_300_399
r'\b(9\d{2}.\w{1,4})': "sub_3" # find_900_999
}
for f in files:
for reg in regs:
if re.search(reg, f.name):
temp_path = new_path / regs[reg]
if not temp_path.exists(): temp_path.mkdir()
# Change the following method to 'move' after testing it
shutil.copy(f, temp_path / f.name)
break

Run code on specific files in a directory separately (by the name of file)

I have N files in the same folder with different index numbers like
Fe_1Sec_1_.txt
Fe_1Sec_2_.txt
Fe_1Sec_3_.txt
Fe_2Sec_1_.txt
Fe_2Sec_2_.txt
Fe_2Sec_3_.txt
.
.
.
and so on
Ex: If I need to run my code with only the files with time = 1 Sec, I can make it manually as follow:
path = "input/*_1Sec_*.txt"
files = glob.glob(path)
print(files)
which gave me:
Out[103]: ['input\\Fe_1Sec_1_.txt', 'input\\Fe_1Sec_2_.txt', 'input\\Fe_1Sec_3_.txt']
In case of I need to run my code for all files separately (depending on the measurement time in seconds, i.e. the name of file)
I tried this code to get the path for each time of measurement:
time = 0
while time < 4:
time += 1
t = str(time)
path = ('"input/*_'+t+'Sec_*.txt"')
which gives me:
"input/*_1Sec_*.txt"
"input/*_2Sec_*.txt"
"input/*_3Sec_*.txt"
"input/*_4Sec_*.txt"
After that I tried to use this path as follow:
files = glob.glob(path)
print(files)
But it doesn't import the wanted files and give me :
"input/*_1Sec_*.txt"
[]
"input/*_2Sec_*.txt"
[]
"input/*_3Sec_*.txt"
[]
"input/*_4Sec_*.txt"
[]
Any suggestions, please??
I think the best way would be to simply do
for time in range(1, 5): # 1,2,3,4
glob_path = 'input/*_{}Sec_*.txt'.format(time)
for file_path in glob.glob(glob_path):
do_something(file_path, measurement) # or whatever

Resources