I'm walking through a directory structure finding all files with the .shp extension and storing them in a dictionary. However, some files are named the same, how do I store files of the same name in a dictionary without overwriting? Appending the file structure to the name would be acceptable in this case. How is that done?
Current 'working' code:
def get_all_shp(mydir):
# layers = []
data = {}
for root, dirs, files in os.walk(mydir):
for file in files:
try:
if file.endswith(".shp"):
shp = os.path.join(root, file)
# layers.append(shp)
path = root + "/" + file
# print("path: " + path)
data[file] = gpd.read_file(path)
except:
pass
def get_all_shp(mydir):
# layers = []
data = {}
for root, dirs, files in os.walk(mydir):
for file in files:
try:
if file.endswith(".shp"):
shp = os.path.join(root, file)
# layers.append(shp)
path = root + "/" + file
# print("path: " + path)
data[path] = gpd.read_file(path)
except:
pass
Related
i need to ask if possible and how get on same time the sum of list files in directory and subdirectory with fnmatch filter and files adress.
I use for now this:
def return_ext():
file_pasok = ["AAAAA.txt", "BBBBBB.txt"]
for i in range(len(file_pasok)):
for ext_f in file_pasok:
return ext_f
def list_files(file_path):
ext = return_ext()
for _, dirnames, filenames in os.walk(file_path):
if not filenames:
continue
for file in fnmatch.filter(filenames, ext):
file_found_str = Path(os.path.join(_, file))
file_found = str(file_found_str)
yield file_found
ext = return_ext()
########GOT HOW MANY FILE FOUND
count_founded = sum([len(fnmatch.filter(files, ext)) for r, d, files in os.walk(file_path)])
########GOT LIST ADRESS FILE FOUND
for file_found in list_files(file_path):
print(file_founds)
But of course the script make 2 time the same search :(
Thanks so much for any suggest !!
def return_ext():
file_pasok = ["AAAA.txt", "BBBB.txt"]
for ext_f in file_pasok:
yield ext_f
def list_files(file_path):
found_ext = return_ext()
exts = list(found_ext)
for _, dirnames, filenames in os.walk(file_path):
if not filenames:
continue
for ext in exts:
for file in fnmatch.filter(filenames, ext):
if any(fnmatch.fnmatch(file, ext) for ext in file):
file_found_str = Path(os.path.join(_, file))
file_found = str(file_found_str)
yield file_found
converted_list = list_files(file_path)
count_found = list(converted_list)
print(len(count_found))
for file_found in count_found:
print(file_found)
I have some Python code that I have written that creates a zip file but it writes the file to the location where the Python script is located versus the folder I need it to be written to. How do I structure my code to make it write to the location I need it to?
def get_all_file_paths(directory):
file_paths = []
for root, directories, files in os.walk(directory):
for filename in files:
filepath = os.path.join(root, filename)
file_paths.append(filepath)
return file_paths
for root, subdirectories, files in os.walk(src):
if root != src + 'Errors':
for subdirectory in subdirectories:
if subdirectory != 'A' and subdirectory != 'B' and subdirectory != 'C':
print(subdirectory)
folderName = subdirectory
print('The folder name is', folderName)
print(os.path.join(root, subdirectory))
filePath = os.path.join(root, subdirectory)
file_paths = get_all_file_paths(filePath)
print('Following files will be zipped: ')
for file_name in file_paths:
print(file_name)
with ZipFile(folderName +'.zip', 'w') as zip:
for file in file_paths:
zip.write(file, os.path.relpath(file, root))
zip.close()
print('All files zipped successfully!')
I was working on a file converting function for a xlxs -> csv format. I was able to make the function work when I specified the exact file, but I'm running into issues when I try to iterate the process over a dir folder. Below is the code:
def ExceltoCSV(excel_file, csv_file_base_path):
workbook = xlrd.open_workbook(excel_file)
## get the worksheet names
for sheet_name in workbook.sheet_names():
print('processing - ' + sheet_name)
## extract the data from each worksheet
worksheet = workbook.sheet_by_name(sheet_name)
## create a new csv file, with the name being the original Excel worksheet name; tidied up a bit replacing spaces and dashes
csv_file_full_path = csv_file_base_path + sheet_name.lower().replace(" - ", "_").replace(" ","_") + '.csv'
csvfile = open(csv_file_full_path, 'w')
## write into the new csv file, one row at a time
writetocsv = csv.writer(csvfile, quoting = csv.QUOTE_ALL)
for rownum in range(worksheet.nrows):
writetocsv.writerow(
list(x.encode('utf-8') if type(x) == type(u'') else x for x in worksheet.row_values(rownum)
)
)
csvfile.close()
print(sheet_name + ' has been saved at - ' + csv_file_full_path)
## Paths as strings
p = r'//Network/TestingFolder/'
nf_p = r'//Network/TestingFolder/CSV_Only/'
## directory reference
directory = r'//Network/TestingFolder/' # for os.listdir() function below
file_list = []
## for iterating over directory and spitting out the paths for each file { to be used in conjunction
with ExceltoCSV() }
for filename in os.listdir(directory):
if filename.endswith(".xlsx"): # or filename.endswith(".csv")
file_path = os.path.join(directory, filename)
file_list.append(file_path)
else:
continue
for paths in file_list:
print(paths)
ExceltoCSV(paths, nf_p)
My error is occurring with the line >> csvfile = open(csv_file_full_path, 'w')
Error is: FileNotFoundError: [Errno 2] No such file or directory
I need to rename a bunch of files in a specific folder. They all end with date and time, like for example "hello 2019-05-22 1310.txt" and I want the date and time for each file to be first so I can sort them. With my code I get an error and it wont find my dir where all files are located. What is wrong with the code?
import os
import re
import shutil
dir_path = r'C:\Users\Admin\Desktop\Testfiles'
comp = re.compile(r'\d{4}-\d{2}-\d{2}')
for file in os.listdir(dir_path):
if '.' in file:
index = [i for i, v in enumerate(file,0) if v=='.'][-1]
name = file[:index]
ext = file[index+1:]
else:
ext=''
name = file
data = comp.findall(name)
if len(data)!=0:
date= comp.findall(name)[0]
rest_name = ' '.join(comp.split(name)).strip()
new_name = '{} {}{}'.format(date,rest_name,'.'+ext)
print('changing {} to {}'.format(name, new_name))
shutil.move(os.path.join(dir_path,name), os.path.join(dir_path, new_name))
else:
print('file {} is not change'.format(name))
I am trying to open a large number of csv files which found in several layers of zip files. Given the nature of this project, I am trying to open, read_csv them into a dataframe, append that data to an aggregate dataframe then continue through the loop.
Example: Folder Directory/First Zip/Second Zip/Third Zip/csv file.csv
My existing code can loop through the contents of the second and third zip file and get the name of each csv file. I am aware that this code can probably be made more simple by importing glob, but I'm unfamiliar.
import os
import pandas as pd
import zipfile, re, io
directory = 'C:/Test/'
os.chdir(directory)
fname = "test" + ".zip"
with zipfile.ZipFile(fname, 'r') as zfile:
# second level of zip files
for zipname in zfile.namelist():
if re.search(r'\.zip$', zipname) != None:
zfiledata = io.BytesIO(zfile.read(zipname))
# third level of zip files
with zipfile.ZipFile(zfiledata) as zfile2:
for zipname2 in zfile2.namelist():
# this zipfile contains xml and csv contents. This filters out the xmls
if zipname2.find("csv") > 0:
zfiledata2 = io.BytesIO(zfile2.read(zipname2))
with zipfile.ZipFile(zfiledata2) as zfile3:
fullpath = directory + fname + "/" + zipname + "/" + zipname2 + "/"
# csv file names are always the same as their zips. this cleans the string.
csvf = zipname2.replace('_csv.zip',".csv")
filehandle = open(fullpath, 'rb')
# the above statement is erroring: FileNotFoundError: [Errno 2] No such file or directory:
zfilehandle = zipfile.ZipFile(filehandle)
data = []
csvdata = StringIO.StringIO(zfilehandle.read(csvf))
df = pd.read_csv(csvdata)
data.append(df)
print(data.head())