Finding a file by extension - python-3.x

I am trying to find files with .desktop extension in a specific directory in Python3. I tried the code snippet below but it didn't work as I wanted. I want it to be a single string value.
import os, fnmatch
desktopfile = configparser.ConfigParser ()
def find(pattern, path):
result = []
for root, dirs, files in os.walk(path):
for name in files:
if fnmatch.fnmatch(name, pattern):
result.append(os.path.join(root, name))
return result
script_tmp_dir = "/tmp/appiload/appinstall" # Geçici dizin (dosyalar burada ayıklanıyor)
desktopfilea=f"{script_tmp_dir}/squashfs-root/{str(find ('*.desktop', f'{script_tmp_dir}/squashfs-root/')}"
print(desktopfilea)
desktopfile.items()
Result:
/tmp/appiload/appinstall/squashfs-root/['/tmp/appiload/appinstall/squashfs-root/helloworld.desktop']

Use glob.glob instead of writing a function to do this job.
import os, glob
desktopfile = configparser.ConfigParser ()
script_tmp_dir = "/tmp/appiload/appinstall" # Geçici dizin (dosyalar burada ayıklanıyor)
desktopfilea = glob.glob(f'{script_tmp_dir}/squashfs-root/*.desktop')
# desktopfilea = " ".join(desktopfilea) # Join them in one string, using space as seperator
print(str(desktopfilea))
desktopfile.items()

I don't exactly understand what do you mean but I made a simple program that will print all the files with the .desktop extension and save them to 2 files: applications.json in an array and applications.txt just written one after another.
I also have 2 versions of the program, one that will only print and save only the file names with extensions and other one that will print and save the whole path.
File names only:
import os
import json
strApplications = ""
applications = []
for file in os.listdir(os.path.dirname(os.path.realpath(__file__))):
if file.endswith(".desktop"):
applications.append(file)
with open("applications.json", "w") as f:
json.dump(applications, f)
strApplications = strApplications + file + "\n"
with open("applications.txt", "w") as f:
f.write(strApplications)
print(strApplications)
Full file path:
import os
import json
cwd = os.getcwd()
files = [cwd + "\\" + f for f in os.listdir(cwd) if f.endswith(".desktop")]
with open("applications.json", "w") as f:
json.dump(files, f)
with open("applications.txt", "w") as f:
f.write("\n".join(files))
print("\n".join(files))

Related

Python3: Index out of range for script that worked before

the attached script returns:
IndexError: list index out of range
for the line starting with values = {line.split (...)
values=dict()
with open(csv) as f:
lines =f.readlines()
values = {line.split(',')[0].strip():line.split(',')[1].strip() for line in lines}
However, I could use it yesterday for doing exactly the same:
replacing certain text in a dir of xml-files with different texts
import os
from distutils.dir_util import copy_tree
drc = 'D:/Spielwiese/00100_Arbeitsverzeichnis'
backup = 'D:/Spielwiese/Backup/'
csv = 'D:/persons1.csv'
copy_tree(drc, backup)
values=dict()
with open(csv) as f:
lines =f.readlines()
values = {line.split(',')[0].strip():line.split(',')[1].strip() for line in lines}
#Getting a list of the full paths of files
for dirpath, dirname, filename in os.walk(drc):
for fname in filename:
#Joining dirpath and filenames
path = os.path.join(dirpath, fname)
#Opening the files for reading only
filedata = open(path,encoding="Latin-1").read()
for k,v in values.items():
filedata=filedata.replace(k,v)
f = open(path, 'w',encoding="Latin-1")
# We are writing the the changes to the files
f.write(filedata)
f.close() #Closing the files
print("In case something went wrong, you can find a backup in " + backup)
I don't see anything weird and I could, as mentioned before use it before ... :-o
Any ideas on how to fix it?
best Wishes,
K

Dynamically read and load files into Python

In Python, is there a way to import csv or text files dynamically.We process multiple files a week that have different names and I don't want to update the with open statement manually each time the script runs. I have a function to read the file name which I pass to a variable for later use in my code.
I can see and read the files in the directory but I am not sure if I can add the contents of the folder into a variable that can then be used in the with open statement.
import os
os.chdir('T:\Credit Suite')
DIR = os.listdir()
print(DIR)
import csv,sys
with open('July 19.csv',mode='r') as csv_file:
ROWCOUNT = 0
FILENAME = (csv_file.name)
output = csv.writer(open('test2.txt', 'w', newline=''))
reader =csv.DictReader(csv_file)
for records in reader:
ROWCOUNT += 1
EIN = records['EIN']
DATE = records['Date Established']
DUNS = records['DUNS #']
COMPANYNAME = records['Company Name']
lineout =('<S>'+ EIN+'$EIN '+EIN+'*'+DATE+')'+ COMPANYNAME +'#D-U-N-S '+DUNS).upper()
output.writerow([lineout])
print("writing completed")
I will be running my script when a file hits a folder using a monitor and scheduler in an automated process. I want the code to run no matter what the inbound file name is labeled as in the folder and I wont have to update the code manually for the file name or change the file name to a standard name each time.
os.chdir('T:\Credit Suite')
for root, dirs, files in os.walk("."):
for filename in files:
if filename.endswith('.csv'):
f=filename
import csv,sys
with open(f,mode='r') as csv_file:
os.listdir() returns a list of all the files in the dir, you can just loop all the files:
import os
os.chdir('T:\Credit Suite')
DIR = os.listdir()
print(DIR)
import csv,sys
for file in DIR:
if file.endswith('.csv'):
with open(file,mode='r') as csv_file:
ROWCOUNT = 0
FILENAME = (csv_file.name)
output = csv.writer(open(FILENAME + '_output.txt', 'w', newline=''))
reader =csv.DictReader(csv_file)
all_lines = []
for records in reader:
ROWCOUNT += 1
EIN = records['EIN']
DATE = records['Date Established']
DUNS = records['DUNS #']
COMPANYNAME = records['Company Name']
lineout =('<S>'+ EIN+'$EIN '+EIN+'*'+DATE+')'+ COMPANYNAME +'#D-U-N-S '+DUNS).upper()
all_lines.append(lineout)
output.writerow(all_lines)
print("writing completed")
# remove file to avoid reprocessing the file again in the next run
# of the script, or just move it elsewhere with os.rename
os.remove(file)

Applying function to a list of file-paths and writing csv output to the respective paths

How do I apply a function to a list of file paths I have built, and write an output csv in the same path?
read file in a subfolder -> perform a function -> write file in the
subfolder -> go to next subfolder
#opened xml by filename
with open(r'XML_opsReport 100001.xml', encoding = "utf8") as fd:
Odict_parsedFromFilePath = xmltodict.parse(fd.read())
#func called in func below
def activity_to_df_one_day (list_activity_this_day):
ib_list = [pd.DataFrame(list_activity_this_day[i], columns=list_activity_this_day[i].keys()).drop("#uom") for i in range(len(list_activity_this_day))]
return pd.concat(ib_list)
#Processes parsed xml and writes csv
def activity_to_df_all_days (Odict_parsedFromFilePath, subdir): #writes csv from parsed xml after some processing
nodes_reports = Odict_parsedFromFilePath['opsReports']['opsReport']
list_activity = []
for i in range(len(nodes_reports)):
try:
df = activity_to_df_one_day(nodes_reports[i]['activity'])
list_activity.append(df)
except KeyError:
continue
opsReport = pd.concat(list_activity)
opsReport['dTimStart'] = pd.to_datetime(opsReport['dTimStart'], infer_datetime_format =True)
opsReport.sort_values('dTimStart', axis=0, ascending=True, inplace=True, kind='quicksort', na_position='last')
opsReport.to_csv("subdir\opsReport.csv") #write to the subdir
def scanfolder(): #fetches list of file-paths with desired starting name.
list_files = []
for path, dirs, files in os.walk(r'C:\..\xml_objects'): #directory containing several subfolders
for f in files:
if f.startswith('XML_opsReport'):
list_files.append(os.path.join(path, f))
return list_files
filepaths = scanfolder() #list of file-paths
Every function works well, the xml processing is good, so I am not sharing the xml structure. There are 100+ paths in filepaths , each a different subdirectory. I want to be able to apply above flow in future as well, where I can get filepaths and perform desired actions. It's important to write the csv file to it's sub directory.
To get the directory that a file is in, you can use:
import os
for root, dirs, files, in os.walk(some_dir):
for f in files:
print(root)
output_file = os.path.join(root, "output_file.csv")
print(output_file)
Is that what you're looking for?
Output:
somedir
somedir\output_file.csv
See also Python 3 - travel directory tree with limited recursion depth and Find current directory and file's directory.
Was able to solve with os.path.join.
exceptions_path_list =[]
for i in filepaths:
try:
with open(i, encoding = "utf8") as fd:
doc = xmltodict.parse(fd.read())
activity_to_df_all_days (doc, i)
except ValueError:
exceptions_path_list.append(os.path.dirname(i))
continue
def activity_to_df_all_days (Odict_parsedFromFilePath, filepath):
...
...
...
opsReport.to_csv(os.path.join(os.path.dirname(filepath), "opsReport.csv"))

Convert multiple .txt files into single .csv file (python)

I need to convert a folder with around 4,000 .txt files into a single .csv with two columns:
(1) Column 1: 'File Name' (as specified in the original folder);
(2) Column 2: 'Content' (which should contain all text present in the corresponding .txt file).
Here you can see some of the files I am working with.
The most similar question to mine here is this one (Combine a folder of text files into a CSV with each content in a cell) but I could not implement any of the solutions presented there.
The last one I tried was the Python code proposed in the aforementioned question by Nathaniel Verhaaren but I got the exact same error as the question's author (even after implementing some suggestions):
import os
import csv
dirpath = 'path_of_directory'
output = 'output_file.csv'
with open(output, 'w') as outfile:
csvout = csv.writer(outfile)
csvout.writerow(['FileName', 'Content'])
files = os.listdir(dirpath)
for filename in files:
with open(dirpath + '/' + filename) as afile:
csvout.writerow([filename, afile.read()])
afile.close()
outfile.close()
Other questions which seemed similar to mine (for example, Python: Parsing Multiple .txt Files into a Single .csv File?, Merging multiple .txt files into a csv, and Converting 1000 text files into a single csv file) do not solve this exact problem I presented (and I could not adapt the solutions presented to my case).
I had a similar requirement and so I wrote the following class
import os
import pathlib
import glob
import csv
from collections import defaultdict
class FileCsvExport:
"""Generate a CSV file containing the name and contents of all files found"""
def __init__(self, directory: str, output: str, header = None, file_mask = None, walk_sub_dirs = True, remove_file_extension = True):
self.directory = directory
self.output = output
self.header = header
self.pattern = '**/*' if walk_sub_dirs else '*'
if isinstance(file_mask, str):
self.pattern = self.pattern + file_mask
self.remove_file_extension = remove_file_extension
self.rows = 0
def export(self) -> bool:
"""Return True if the CSV was created"""
return self.__make(self.__generate_dict())
def __generate_dict(self) -> defaultdict:
"""Finds all files recursively based on the specified parameters and returns a defaultdict"""
csv_data = defaultdict(list)
for file_path in glob.glob(os.path.join(self.directory, self.pattern), recursive = True):
path = pathlib.Path(file_path)
if not path.is_file():
continue
content = self.__get_content(path)
name = path.stem if self.remove_file_extension else path.name
csv_data[name].append(content)
return csv_data
#staticmethod
def __get_content(file_path: str) -> str:
with open(file_path) as file_object:
return file_object.read()
def __make(self, csv_data: defaultdict) -> bool:
"""
Takes a defaultdict of {k, [v]} where k is the file name and v is a list of file contents.
Writes out these values to a CSV and returns True when complete.
"""
with open(self.output, 'w', newline = '') as csv_file:
writer = csv.writer(csv_file, quoting = csv.QUOTE_ALL)
if isinstance(self.header, list):
writer.writerow(self.header)
for key, values in csv_data.items():
for duplicate in values:
writer.writerow([key, duplicate])
self.rows = self.rows + 1
return True
Which can be used like so
...
myFiles = r'path/to/files/'
outputFile = r'path/to/output.csv'
exporter = FileCsvExport(directory = myFiles, output = outputFile, header = ['File Name', 'Content'], file_mask = '.txt')
if exporter.export():
print(f"Export complete. Total rows: {exporter.rows}.")
In my example directory, this returns
Export complete. Total rows: 6.
Note: rows does not count the header if present
This generated the following CSV file:
"File Name","Content"
"Test1","This is from Test1"
"Test2","This is from Test2"
"Test3","This is from Test3"
"Test4","This is from Test4"
"Test5","This is from Test5"
"Test5","This is in a sub-directory"
Optional parameters:
header: Takes a list of strings that will be written as the first line in the CSV. Default None.
file_mask: Takes a string that can be used to specify the file type; for example, .txt will cause it to only match .txt files. Default None.
walk_sub_dirs: If set to False, it will not search in sub-directories. Default True.
remove_file_extension: If set to False, it will cause the file name to be written with the file extension included; for example, File.txt instead of just File. Default True.

Moving files in python based on file and folder name

Relatively new to python ( not using it everyday ). However I am trying to simplify some things. I basically have Keys which have long names however a subset of the key ( or file name ) has the same sequence of the associated folder.{excuse the indentation, it is properly indented.} I.E
file1 would be: 101010-CDFGH-8271.dat and folder is CDFGH-82
file2 would be: 101010-QWERT-7425.dat and folder is QWERT-74
import os
import glob
import shutil
files = os.listdir("files/location")
dest_1 = os.listdir("dest/location")
for f in files:
file = f[10:21]
for d in dest_1:
dire = d
if file == dire:
shutil.move(file, dest_1)
The code runs with no errors, however nothing moves. Look forward to your reply and chance to learn.
Sorry updated the format.
Try a variation of:
basedir = "dest/location"
for fname in os.listdir("files/location"):
dirname = os.path.join(basedir, fname[10:21])
if os.path.isdir(dirname):
path = os.path.join("files/location", fname)
shutil.move(path, dirname)

Resources