Python 3.x | FileNotFoundError: [Errno 2] No such file or directory | writing .csv from .xlxs - python-3.x

I was working on a file converting function for a xlxs -> csv format. I was able to make the function work when I specified the exact file, but I'm running into issues when I try to iterate the process over a dir folder. Below is the code:
def ExceltoCSV(excel_file, csv_file_base_path):
workbook = xlrd.open_workbook(excel_file)
## get the worksheet names
for sheet_name in workbook.sheet_names():
print('processing - ' + sheet_name)
## extract the data from each worksheet
worksheet = workbook.sheet_by_name(sheet_name)
## create a new csv file, with the name being the original Excel worksheet name; tidied up a bit replacing spaces and dashes
csv_file_full_path = csv_file_base_path + sheet_name.lower().replace(" - ", "_").replace(" ","_") + '.csv'
csvfile = open(csv_file_full_path, 'w')
## write into the new csv file, one row at a time
writetocsv = csv.writer(csvfile, quoting = csv.QUOTE_ALL)
for rownum in range(worksheet.nrows):
writetocsv.writerow(
list(x.encode('utf-8') if type(x) == type(u'') else x for x in worksheet.row_values(rownum)
)
)
csvfile.close()
print(sheet_name + ' has been saved at - ' + csv_file_full_path)
## Paths as strings
p = r'//Network/TestingFolder/'
nf_p = r'//Network/TestingFolder/CSV_Only/'
## directory reference
directory = r'//Network/TestingFolder/' # for os.listdir() function below
file_list = []
## for iterating over directory and spitting out the paths for each file { to be used in conjunction
with ExceltoCSV() }
for filename in os.listdir(directory):
if filename.endswith(".xlsx"): # or filename.endswith(".csv")
file_path = os.path.join(directory, filename)
file_list.append(file_path)
else:
continue
for paths in file_list:
print(paths)
ExceltoCSV(paths, nf_p)
My error is occurring with the line >> csvfile = open(csv_file_full_path, 'w')
Error is: FileNotFoundError: [Errno 2] No such file or directory

Related

reading shp files to geopandas to dictionary with the same name

I'm walking through a directory structure finding all files with the .shp extension and storing them in a dictionary. However, some files are named the same, how do I store files of the same name in a dictionary without overwriting? Appending the file structure to the name would be acceptable in this case. How is that done?
Current 'working' code:
def get_all_shp(mydir):
# layers = []
data = {}
for root, dirs, files in os.walk(mydir):
for file in files:
try:
if file.endswith(".shp"):
shp = os.path.join(root, file)
# layers.append(shp)
path = root + "/" + file
# print("path: " + path)
data[file] = gpd.read_file(path)
except:
pass
def get_all_shp(mydir):
# layers = []
data = {}
for root, dirs, files in os.walk(mydir):
for file in files:
try:
if file.endswith(".shp"):
shp = os.path.join(root, file)
# layers.append(shp)
path = root + "/" + file
# print("path: " + path)
data[path] = gpd.read_file(path)
except:
pass

How can I loop through all the xls files in a folder to find the sheet names, and then replace them

I am trying to loop through all the XLS files in a folder, and then replace the worksheet name by another string. This has to be done for all the files inside.
I am relatively new to programming, and here is my Python code. It runs okay (partially, when I do it for one file at a time), however, I am unable to get it to work for all the files in the folder.
from xlutils.copy import copy
from xlrd import open_workbook
# open the file
direc = input('Enter file name: ')
rb = open_workbook(direc)
wb = copy(rb)
#index of a sheet
pointSheet = rb.sheet_names()
print(pointSheet)
idx = pointSheet.index(pointSheet)
wb.get_sheet(idx).name = u'RenamedSheet1'
wb.save(direc)
Error message:
Traceback (most recent call last):
File "./Rename.py", line 13, in <module>
idx = pointSheet.index(pointSheet)
ValueError: ['x xxx xxxx xxxxxx'] is not in list
My bad! The above code is for testing with a single file. Here is the loop:
files = []
for dirname, dirnames, filenames in os.walk('D:\Temp\Final'):
# print path to all subdirectories first.
for subdirname in dirnames:
files.append(os.path.join(dirname, subdirname))
# print path to all filenames.
for filename in filenames:
files.append(os.path.join(dirname, filename))
pprint(files)
for i in range(0,len(files)):
rb = open_workbook(files[i])
wb = copy(rb)
idx = rb.sheet_names().index('5 new bulk rename')
wb.get_sheet(idx).name = u'RenamedSheet1'
wb.save(files[i])
print('Operation succeeded!')
Try something like this (untested) for a single file:
from xlutils.copy import copy
from xlrd import open_workbook
# open the file
direc = input('Enter file name: ')
rb = open_workbook(direc)
wb = copy(rb)
for pointSheet in rb.sheet_names()
print(pointSheet)
idx = pointSheet.index(pointSheet)
wb.get_sheet(idx).name = u'RenamedSheet1'
wb.save(direc)
And wrap that in another loop using listdir (taken from here):
import os
for file in os.listdir("/mydir"):
if file.endswith(".xls"):
# <do what you did for a single file>

Running a Python script for files in a folder

There are 15 text files in a folder and I am trying to extract certain parts of each file and output them to a new file.
I am able to extract each file individually by just changing the file name and append each file to the output file but this means copying the same code 15 times and just changing the file name each time.
import glob,os
lst = []
filelist=glob.glob ('/C:/Users/bridaly/Documents/PythonTest/Python_Test_ENdata_3080_v20150914/input/*')
for file in filelist:
if os.path.isfile(file):
for line in filelist:
line = line.strip()
if not (
line.startswith("APPEND") or line.startswith("_") or
line.startswith("SAP") or line.startswith("~") or
line.startswith("INCLUDE") or line.startswith("ABAP")
or line.strip() == "" or line.startswith("Field") or
line.startswith("Short")
) :
y=line.replace(' ',' ')
#print(y)
z = y.replace('X','')
#print(z)
w = "|".join(z.split())
#print(w)
x = w.split("|",3)[:4]
#print(x)
x.insert(0,'./input/01BKPF')
#print(x)
if len(x) >=4:
t = [s.replace('|',' ') for s in x]
#print(t)
print("|".join(t))
lst.append("|".join(t))
#Output Script
output_file = open('Output_Final.txt', 'w')
for l in lst:
output_file.write(l)
output_file.write('\n')
output_file.close()
"""
The output should extract what's written in the code but for each file and append it to the output file. I have gotten the correct output by copying the code 15 times but I just want to use it once as it is more efficient.
files = glob.glob('path')
for file in files:
file_name = os.path.basename(file)
print(file_name)
you can iterate for each file

Change order in filenames in a folder

I need to rename a bunch of files in a specific folder. They all end with date and time, like for example "hello 2019-05-22 1310.txt" and I want the date and time for each file to be first so I can sort them. With my code I get an error and it wont find my dir where all files are located. What is wrong with the code?
import os
import re
import shutil
dir_path = r'C:\Users\Admin\Desktop\Testfiles'
comp = re.compile(r'\d{4}-\d{2}-\d{2}')
for file in os.listdir(dir_path):
if '.' in file:
index = [i for i, v in enumerate(file,0) if v=='.'][-1]
name = file[:index]
ext = file[index+1:]
else:
ext=''
name = file
data = comp.findall(name)
if len(data)!=0:
date= comp.findall(name)[0]
rest_name = ' '.join(comp.split(name)).strip()
new_name = '{} {}{}'.format(date,rest_name,'.'+ext)
print('changing {} to {}'.format(name, new_name))
shutil.move(os.path.join(dir_path,name), os.path.join(dir_path, new_name))
else:
print('file {} is not change'.format(name))

How to open and append nested zip archives into dataframe without extracting?

I am trying to open a large number of csv files which found in several layers of zip files. Given the nature of this project, I am trying to open, read_csv them into a dataframe, append that data to an aggregate dataframe then continue through the loop.
Example: Folder Directory/First Zip/Second Zip/Third Zip/csv file.csv
My existing code can loop through the contents of the second and third zip file and get the name of each csv file. I am aware that this code can probably be made more simple by importing glob, but I'm unfamiliar.
import os
import pandas as pd
import zipfile, re, io
directory = 'C:/Test/'
os.chdir(directory)
fname = "test" + ".zip"
with zipfile.ZipFile(fname, 'r') as zfile:
# second level of zip files
for zipname in zfile.namelist():
if re.search(r'\.zip$', zipname) != None:
zfiledata = io.BytesIO(zfile.read(zipname))
# third level of zip files
with zipfile.ZipFile(zfiledata) as zfile2:
for zipname2 in zfile2.namelist():
# this zipfile contains xml and csv contents. This filters out the xmls
if zipname2.find("csv") > 0:
zfiledata2 = io.BytesIO(zfile2.read(zipname2))
with zipfile.ZipFile(zfiledata2) as zfile3:
fullpath = directory + fname + "/" + zipname + "/" + zipname2 + "/"
# csv file names are always the same as their zips. this cleans the string.
csvf = zipname2.replace('_csv.zip',".csv")
filehandle = open(fullpath, 'rb')
# the above statement is erroring: FileNotFoundError: [Errno 2] No such file or directory:
zfilehandle = zipfile.ZipFile(filehandle)
data = []
csvdata = StringIO.StringIO(zfilehandle.read(csvf))
df = pd.read_csv(csvdata)
data.append(df)
print(data.head())

Resources