Python: I want to rename multiple folders with modified date in its name - python-3.x

I want to rename multiple folders.
Example folder structure:
Main_Folder|
|winter(2017-12-18)
|summer(2018-03-26)
Many times I save many pictures to many different folders but i can't remember to what folder I saved pictures to.
Example of what I want to achieve:
summer(2018-03-26) when I save new pictures to that folder and I run program
,I want the program to rename that folder to summer(2018-08-14)<--this is modified date of that folder
import os
import datetime
def modifiedFolderName(folderdir):
target = folderdir
allFolder = os.listdir(target)
for foldername in allFolder:
checkname = foldername.find("(")
if checkname != -1 #if that folder don't have modified date skip it
#need help here
time = os.path.getmtime(foldername)
#
#
#
os.rename(foldername,trimFolderName(foldername)+"("+"new modified date"+")")
def trimFolderName(foldername):
a1 = foldername
b1 = a1.find("(")
a2 = a1[0:b1]
return a2 #return folder name without modified date
def main():
modifiedFolderName("") #for folder dir
if __name__ == "__main__":
main()

References:
https://docs.python.org/3.5/library/datetime.html#datetime.datetime.fromtimestamp
https://docs.python.org/3.5/library/datetime.html#datetime.datetime.strftime
https://docs.python.org/3.5/library/datetime.html#strftime-strptime-behavior
Solution:
time = os.path.getmtime(foldername)
lastModified = datetime.datetime.fromtimestamp(time)
lastModifiedStr = lastModified.strftime("(%Y-%m-%d)")
# just for debugging, to see what it is doing
print("Last modified: %s", lastModifiedStr)
newFolderName = trimFolderName(foldername)+lastModifiedStr
os.rename(foldername, newFolderName)

Related

How to copy merge files of two different directories with different extensions into one directory and remove the duplicated ones

I would need a Python function which performs below action:
I have two directories which in one of them I have files with .xml format and in the other one I have files with .pdf format. To simplify things consider this example:
Directory 1: a.xml, b.xml, c.xml
Directory 2: a.pdf, c.pdf, d.pdf
Output:
Directory 3: a.xml, b.xml, c.xml, d.pdf
As you can see the priority is with the xml files in the case that both extensions have similar names.
I would be thankful for your help.
You need to use the shutil module and the os module to achieve this. This function will work on the following assumption:
A given directory has all files with the same extension
The priority_directory will be the directory with file extensions to be prioritized
The secondary_directory will be the directory with file extensions to be dropped in case of a name collision
Try:
import os,shutil
def copy_files(priority_directory,secondary_directory,destination = "new_directory"):
file_names = [os.path.splitext(filename)[0] for filename in os.listdir(priority_directory)] # get the file names to check for collisions
os.mkdir(destination) # make a new directory
for file in os.listdir(priority_directory): # this loop copies the first direcotory as it is
file_path = os.path.join(priority_directory,file)
dst_path = os.path.join(destination,file)
shutil.copy(file_path,dst_path)
for file in os.listdir(secondary_directory): # this loop checks for collisions and drops files whose name collide
if(os.path.splitext(file)[0] not in file_names):
file_path = os.path.join(secondary_directory,file)
dst_path = os.path.join(destination,file)
shutil.copy(file_path,dst_path)
print(os.listdir(destination))
Let's run it with your direcotry names as arguments:
copy_files('directory_1','directory_2','directory_3')
You can now check a new directory with the name directory_3 will be created with the desired files in it.
This will work for all such similar cases no matter what the extension is.
Note: There should not be a need to do this i guess cause a directory can have two files with the same name as long as the extensions differ.
Rough working solution:
import os
from shutil import copy2
d1 = './d1/'
d2 = './d2/'
d3 = './d3/'
ext_1 = '.xml'
ext_2 = '.pdf'
def get_files(d: str, files: list):
directory = os.fsencode(d)
for file in os.listdir(d):
dup = False
filename = os.fsdecode(file)
if filename[-4:] == ext_2:
for (x, y) in files:
if y == filename[:-4] + ext_1:
dup = True
break
if dup:
continue
files.append((d, filename))
files = []
get_files(d1, files)
get_files(d2, files)
for d, file in files:
copy2(d+file, d3)
I'll see if I can get it to look/perform better.

For Loop to Move and Rename .html Files - Python 3

I'm asking for help in trying to create a loop to make this script go through all files in a local directory. Currently I have this script working with a single HTML file, but would like it so it picks the first file in the directory and just loops until it gets to the last file in the directory.
Another way to help would be adding a line to the string would add a (1), (2), (3), etc. at the end if the names are duplicate.
Can anyone help with renaming thousands of files with a string that is parsed with BeautifulSoup4. Each file contains a name and reference number at the same position/line. Could be same name and reference number, or could be different reference number with same name.
import bs4, shutil, os
src_dir = os.getcwd()
print(src_dir)
dest_dir = os.mkdir('subfolder')
os.listdir()
dest_dir = src_dir+"/subfolder"
src_file = os.path.join(src_dir, 'example_filename_here.html')
shutil.copy(src_file, dest_dir)
exampleFile = open('example_filename_here.html')
exampleSoup = bs4.BeautifulSoup(exampleFile.read(), 'html.parser')
elems = exampleSoup.select('.bodycopy')
type(elems)
elems[2].getText()
dst_file = os.path.join(dest_dir, 'example_filename_here.html')
new_dst_file_name = os.path.join(dest_dir, elems[2].getText()+ '.html')
os.rename(dst_file, new_dst_file_name)
os.chdir(dest_dir)
print(elems[2].getText())

Having trouble using zipfile.ZipFile.extractall (Already read the docs)

I have a folder with many zipfiles, most of these zipfiles contain shapefiles and some of them have subfolders which contain zipfiles that contain shapefiles. I am trying to extract everything into one main folder wihtout keeping any folder structure. This is where I am now;
import os, zipfile
def getListOfFiles(dirName):
# create a list of file and sub directories
# names in the given directory
listOfFile = os.listdir(dirName)
allFiles = list()
# Iterate over all the entries
for entry in listOfFile:
# Create full path
fullPath = os.path.join(dirName, entry)
# If entry is a directory then get the list of files in this directory
if os.path.isdir(fullPath):
allFiles = allFiles + getListOfFiles(fullPath)
else:
allFiles.append(fullPath)
return allFiles
def main():
dirName = r'C:\Users\myusername\My_Dataset'
# Get the list of all files in directory tree at given path
listOfFiles = getListOfFiles(dirName)
# Print the files
for elem in listOfFiles:
print(elem)
zipfile.ZipFile.extractall(elem)
print("****************")
if __name__ == '__main__':
main()
This script prints all the shapefiles (including the ones under subfolders). Now I need to extract all these listed shapefiles into one main folder. I try zipfile.ZipFile.extractall(elem) but it doesn't work.
line 1611, in extractall
members = self.namelist()
AttributeError: 'str' object has no attribute 'namelist'
Is the error I'm getting. zipfile.ZipFile.extractall(elem) is the line that doesn't work. I imagine it expects one zipfile but I'm trying to feed it a folder (or a list in this case?)
How would I change this script so that it extracts my listed shapefiles into a folder (preferably a new folder)
You need to make an instance of ZipFile first and use extractall on this instance:
for elem in listOfFiles:
my_zipfile = zipfile.ZipFile(elem)
my_zipfile.extractall()
I have added this code block to my script and it works now.
def getfiles(path):
if os.path.isdir(path):
for root, dirs, files in os.walk(path):
for name in files:
yield os.path.join(root, name)
else:
yield path
fromdir = r"C:\Users\username\My_Dataset\new"
for f in getfiles(fromdir):
filename = str.split(f, '/')[-1]
if os.path.isfile(destination + filename):
filename = f.replace(fromdir, "", 1).replace("/", "_")
# os.rename(f, destination+filename)
shutil.copy2(f, r"C:\Users\username\Documents\flatten")

Dynamically read and load files into Python

In Python, is there a way to import csv or text files dynamically.We process multiple files a week that have different names and I don't want to update the with open statement manually each time the script runs. I have a function to read the file name which I pass to a variable for later use in my code.
I can see and read the files in the directory but I am not sure if I can add the contents of the folder into a variable that can then be used in the with open statement.
import os
os.chdir('T:\Credit Suite')
DIR = os.listdir()
print(DIR)
import csv,sys
with open('July 19.csv',mode='r') as csv_file:
ROWCOUNT = 0
FILENAME = (csv_file.name)
output = csv.writer(open('test2.txt', 'w', newline=''))
reader =csv.DictReader(csv_file)
for records in reader:
ROWCOUNT += 1
EIN = records['EIN']
DATE = records['Date Established']
DUNS = records['DUNS #']
COMPANYNAME = records['Company Name']
lineout =('<S>'+ EIN+'$EIN '+EIN+'*'+DATE+')'+ COMPANYNAME +'#D-U-N-S '+DUNS).upper()
output.writerow([lineout])
print("writing completed")
I will be running my script when a file hits a folder using a monitor and scheduler in an automated process. I want the code to run no matter what the inbound file name is labeled as in the folder and I wont have to update the code manually for the file name or change the file name to a standard name each time.
os.chdir('T:\Credit Suite')
for root, dirs, files in os.walk("."):
for filename in files:
if filename.endswith('.csv'):
f=filename
import csv,sys
with open(f,mode='r') as csv_file:
os.listdir() returns a list of all the files in the dir, you can just loop all the files:
import os
os.chdir('T:\Credit Suite')
DIR = os.listdir()
print(DIR)
import csv,sys
for file in DIR:
if file.endswith('.csv'):
with open(file,mode='r') as csv_file:
ROWCOUNT = 0
FILENAME = (csv_file.name)
output = csv.writer(open(FILENAME + '_output.txt', 'w', newline=''))
reader =csv.DictReader(csv_file)
all_lines = []
for records in reader:
ROWCOUNT += 1
EIN = records['EIN']
DATE = records['Date Established']
DUNS = records['DUNS #']
COMPANYNAME = records['Company Name']
lineout =('<S>'+ EIN+'$EIN '+EIN+'*'+DATE+')'+ COMPANYNAME +'#D-U-N-S '+DUNS).upper()
all_lines.append(lineout)
output.writerow(all_lines)
print("writing completed")
# remove file to avoid reprocessing the file again in the next run
# of the script, or just move it elsewhere with os.rename
os.remove(file)

Recursive data scraping in excel sheets within nested folder structure

Help me out please. I would like to traverse through a directory structure that looks like this:
Topdir > subdir 1 > excel 1/2/3
Topdir > subdir 2 > excel 4
etc
I am scraping the excel's column B for a string, and that is working nicely. However, my script only goes through the topdir, and doesn't go into the subdirs. Below is my code:
import openpyxl, os, sys, warnings, glob
warnings.simplefilter("ignore")
targetString = str("Sample Error")
scriptPath = os.path.abspath(__file__)
outputFile = open('logging.txt', "w+")
def scrapeSheets():
for i in os.listdir(path='.'):
if i.endswith("data-eval.xlsm"):
print("Working on:", i)
wb = openpyxl.load_workbook(i, data_only=True)
sheet = wb["data-sheet"]
outputFile.write("{}\n".format(i))
for cellObj in sheet["B"]:
if cellObj.value == targetString:
print(cellObj.row, cellObj.value)
outputFile.write("\t{}\t{}\n".format(cellObj.row, cellObj.value))
def mainLoop():
for filename in glob.iglob('**/*.xlsm', recursive=True):
scrapeSheets()
if __name__ == "__main__":
mainLoop()
As I said, the scraping works, but I cannot go into the subfolders. I have a hunch it has to do with the line
for i in os.listdir(path='.')
however, I don't know the solution to make the loop's variable increment.
You can try like this:
for dirname in os.listdir(path='.'):
for main_dir, dirs, files in os.walk(dirname):
for f in files:
if f.endswith("data-eval.xlsm"):
print("Working on:", f)
wb = openpyxl.load_workbook(f, data_only=True)
sheet = wb["data-sheet"]
outputFile.write("{}\n".format(i))
for cellObj in sheet["B"]:
if cellObj.value == targetString:
print(cellObj.row, cellObj.value)
outputFile.write("\t{}\t{}\n".format(cellObj.row, cellObj.value))
Explanation:
Using listdir iterate over the directories.
for dirname in os.listdir(path='.')
Iterate over the sub-directories and the files inside those using :
for main_dir, dirs, files in os.walk(dirname):
Iterate over the files and continue your logic.
for f in files:
if f.endswith("data-eval.xlsm"):
print("Working on:", f)
wb = openpyxl.load_workbook(f, data_only=True)
sheet = wb["data-sheet"]
outputFile.write("{}\n".format(i))
for cellObj in sheet["B"]:
if cellObj.value == targetString:
print(cellObj.row, cellObj.value)
outputFile.write("\t{}\t{}\n".format(cellObj.row, cellObj.value))
For future reference, I figured out that implementing the for filename in glo.iglob part in the scraping function instead the os.listdir line works perfectly and loops through the content of the script's folder and its subfolders.

Resources