Rename by Appending a prefix to a file name - python-3.x

I would appreciate if someone could give me a hint. I have to rename a batch of files by adding a prefix (date) to the file name, so files are organized in ordered manner in the folder: from older to newer.
The date itself contained inside of the file. Therefore, my script has to open the file, find the date and use it as a "prefix" to add to the file name.
from datetime import datetime
import re
import os
file = open('blog_entry.txt', 'r', encoding='utf-8')
source_code = file.read()
<...>
# convert the date:
date = datetime.strptime(date_only, "%d-%b-%Y")
new_date = date.strftime('%Y_%m_%d')
The new_date variable should be used as a "prefix", so the new file name looks like "yyyy_mm_dd blog_entry.txt"
I cannot wrap my head around how to generate a "new name" using this prefix, so I can apply os.rename(old_name, new_name) command to the file. apply

Here is one way, using string concatenation to build the new filename you want:
from datetime import datetime
import re
import os
file = open('blog_entry.txt', 'r', encoding='utf-8')
source_code = file.read()
# read the date from the file contents
date = datetime.strptime(date_only, "%d-%b-%Y")
new_date = date.strftime('%Y_%m_%d')
path = "/path/to/your/file/"
os.rename(path + 'blog_entry.txt', path + new_date + ' ' + 'blog_entry.txt')

Related

Reading multiple excel files into a pandas dataframe, but also storing the file name

I would like to read multiple excel files and store them into a single pandas dataframe, but I would like one of the columns in the dataframe to be the file name. This is because the file name contains the date (this is monthly data) and I need that information. I can't seem to get the filename, but I'm able to get the excel files into a dataframe. Please help.
import os
import pandas as pd
import fsspec
files = os.listdir("C://Users//6J2754897//Downloads//monthlydata")
paths = "C://Users//6J2754897//Downloads//monthlydata"
a = pd.DataFrame([2], index = None)
df = pd.DataFrame()
for file in range(len(files)):
if files[file].endswith('.xlsx'):
df = df.append(pd.read_excel(paths + "//" + files[file], sheet_name = "information", skiprows=7), ignore_index=True)
df['Month'] = str(files[file])
The order of operations here is incorrect. The line:
df['Month'] = str(files[file])
Is going to overwrite the entire column with the most recent value.
Instead we should only add the value to the current DataFrame:
import os
import pandas as pd
paths = "C://Users//6J2754897//Downloads//monthlydata"
files = os.listdir(paths)
df = pd.DataFrame()
for file in range(len(files)):
if files[file].endswith('.xlsx'):
# Read in File
file_df = pd.read_excel(paths + "//" + files[file],
sheet_name="information",
skiprows=7)
# Add to just this DataFrame
file_df['Month'] = str(files[file])
# Update `df`
df = df.append(file_df, ignore_index=True)
Alternatively we can use DataFrame.assign to chain the column assignment:
import os
import pandas as pd
paths = "C://Users//6J2754897//Downloads//monthlydata"
files = os.listdir(paths)
df = pd.DataFrame()
for file in range(len(files)):
if files[file].endswith('.xlsx'):
# Read in File
df = df.append(
# Read in File
pd.read_excel(paths + "//" + files[file],
sheet_name="information",
skiprows=7)
.assign(Month=str(files[file])), # Add to just this DataFrame
ignore_index=True
)
For general overall improvements we can use pd.concat with a list comprehension over files. This is done to avoid growing the DataFrame (which can be extremely slow). Pathlib.glob can also help with the ability to select the appropriate files:
from pathlib import Path
import pandas as pd
paths = "C://Users//6J2754897//Downloads//monthlydata"
df = pd.concat([
pd.read_excel(file,
sheet_name="information",
skiprows=7)
.assign(Month=file.stem) # We may also want file.name here
for file in Path(paths).glob('*.xlsx')
])
Some options for the Month Column are either:
file.stem will give "[t]he final path component, without its suffix".
'folder/folder/sample.xlsx' -> 'sample'
file.name will give "the final path component, excluding the drive and root".
'folder/folder/sample.xlsx' -> 'sample.xlsx'

Import txt file and filter with space

I'm writing a script to track my orders from a website. I want to import the order# from a txt file and the script should repeat it self as long as there are ordernumbers.I wrote a code where the script imports this txt file and chooses a random ordernumber but the script puts all ordernumbers together and doesnt seperate them how can I fix this ?
this is my code:
f=open("Order#.txt", "r")
OrderNR = f.read()
words = OrderNR.split()
Repeat = len(words)
for i in range(Repeat):
randomlist = OrderNR
Orderrandom = random.choice(randomlist)
Mainlink = 'https://footlocker.narvar.com/footlocker/tracking/startrack?order_number=' + Orderrandom
Instead of using f.read(), try using f.readlines().
# Using readlines()
file1 = open('myfile.txt', 'r')
Lines = file1.readlines()
Try PANDAS
import pandas as pd
df = pd.read_csv('Order#.txt', delimiter='\t')
print(df)
you can see TXT file in table format

Change order in filenames in a folder

I need to rename a bunch of files in a specific folder. They all end with date and time, like for example "hello 2019-05-22 1310.txt" and I want the date and time for each file to be first so I can sort them. With my code I get an error and it wont find my dir where all files are located. What is wrong with the code?
import os
import re
import shutil
dir_path = r'C:\Users\Admin\Desktop\Testfiles'
comp = re.compile(r'\d{4}-\d{2}-\d{2}')
for file in os.listdir(dir_path):
if '.' in file:
index = [i for i, v in enumerate(file,0) if v=='.'][-1]
name = file[:index]
ext = file[index+1:]
else:
ext=''
name = file
data = comp.findall(name)
if len(data)!=0:
date= comp.findall(name)[0]
rest_name = ' '.join(comp.split(name)).strip()
new_name = '{} {}{}'.format(date,rest_name,'.'+ext)
print('changing {} to {}'.format(name, new_name))
shutil.move(os.path.join(dir_path,name), os.path.join(dir_path, new_name))
else:
print('file {} is not change'.format(name))

How to open and append nested zip archives into dataframe without extracting?

I am trying to open a large number of csv files which found in several layers of zip files. Given the nature of this project, I am trying to open, read_csv them into a dataframe, append that data to an aggregate dataframe then continue through the loop.
Example: Folder Directory/First Zip/Second Zip/Third Zip/csv file.csv
My existing code can loop through the contents of the second and third zip file and get the name of each csv file. I am aware that this code can probably be made more simple by importing glob, but I'm unfamiliar.
import os
import pandas as pd
import zipfile, re, io
directory = 'C:/Test/'
os.chdir(directory)
fname = "test" + ".zip"
with zipfile.ZipFile(fname, 'r') as zfile:
# second level of zip files
for zipname in zfile.namelist():
if re.search(r'\.zip$', zipname) != None:
zfiledata = io.BytesIO(zfile.read(zipname))
# third level of zip files
with zipfile.ZipFile(zfiledata) as zfile2:
for zipname2 in zfile2.namelist():
# this zipfile contains xml and csv contents. This filters out the xmls
if zipname2.find("csv") > 0:
zfiledata2 = io.BytesIO(zfile2.read(zipname2))
with zipfile.ZipFile(zfiledata2) as zfile3:
fullpath = directory + fname + "/" + zipname + "/" + zipname2 + "/"
# csv file names are always the same as their zips. this cleans the string.
csvf = zipname2.replace('_csv.zip',".csv")
filehandle = open(fullpath, 'rb')
# the above statement is erroring: FileNotFoundError: [Errno 2] No such file or directory:
zfilehandle = zipfile.ZipFile(filehandle)
data = []
csvdata = StringIO.StringIO(zfilehandle.read(csvf))
df = pd.read_csv(csvdata)
data.append(df)
print(data.head())

Re-loop until all matches are found, logic?

I cannot figure out the logic for this. I am attempting to compare a list of matches 'matches' to files from a folder. If file in 'folders' equal the name in 'matches', then do something, but obviously it doesn't 'try' each match to each file. I'm thinking I need to use a while loop but I don't know how to apply it.
import os
import glob
import os.path
folders = glob.glob('C:\\Corrections\\*.*')
matches = open('filename.txt', 'r')
for each in folders:
splitname_one = each.split('\\', 3) #Separate the filename from the path
filename = splitname_one[3] #Get Filename only
basefile = filename.split('.', 1) #Separate filename and file extension
compare0 = basefile[0] #assign base file name to compare0
#print (basefile[0])
for line in matches:
match = line.split('.', 1) #Separe base filename from file extension
#print (match[1])
compare1 = match[0] #assign base file name to compare1
if compare1==compare0:
#os.rename(filename, 'C:\\holder\\' + filename)
print ('We Have a match!')
else:
print ('no match :( ')
FWIW here's how I might end up doing something like this:
import glob
from os.path import basename, splitext
def file_base(filename):
return splitext(basename(filename))[0]
folders = set(file_base(f) for f in glob.glob('C:\\Corrections\\*.*'))
with open('filename.txt') as fobj:
matches = set(file_base(f) for f in fobj.readlines())
print(folders.intersection(matches))

Resources