My code runs well when I didn't ask to rename the file, just to print all file names without number, but when I did it and checked file instead of running it, it just doesn't work.
here's my code:
import os
import re
def rename_file():
file_list = os.listdir(r"C:\Users\Zoe.Zhao\Desktop\prank")
saved_path = os.getcwd()
os.chdir(r"C:\Users\Zoe.Zhao\Desktop\prank")
print (saved_path)
for file_name in file_list:
file_name = re.sub('[0-9]','',file_name)
print(file_name)
rename_file()
Here are some sreenshots:
before:
after:
You need to create a new list: see below:
import os
import re
def rename_file():
file_list = os.listdir(r"C:\Users\afareh\Dropbox\PROGRAMMING\test\prank\prank")
saved_path = os.getcwd()
os.chdir(r"C:\Users\afareh\Dropbox\PROGRAMMING\test\prank\prank")
new_file_list=[] # create an empty list for storing the names of the renamed files.
for file_name in file_list:
file_name=re.sub('[0-9]','',file_name)
new_file_list.append(file_name)
print (new_file_list)
rename_file()
Related
I have a CSV file and I am running some Python to remove line breaks from the CSV.
import csv
with open('Jan2020.csv', 'r') as txtReader:
with open('new_jan2020.csv', 'w') as txtWriter:
for line in txtReader.readlines():
line = line.replace('\r', '')
txtWriter.write(line)
This works fine.
What I want to achieve is the following:
I have multiple CSV files in a folder: jan2020, feb2020, march2020, april2020, may2020
How would I loop through each file, remove line breaks like my above method and then output a new file for each where the name of the new file is the format: new_monthYear.csv?
So I would end up with a bunch of CSVs new_jan2020, new_feb2020, new_march2020, new_april2020, new_may2020
Thanks
You can list all files in a directory with os.listdir. I would also recommend to write your new files in another folder.
import os
import csv
# Folder name
path_input = '/path/folder/'
path_output = '/path/other/'
dirs = os.listdir( path_input )
# This would iterate over all the listed files
for file in dirs:
file_to_read = os.path.join( path_input, file )
file_to_write = os.path.join( path_output, 'new_' + file )
# Your code
with open(file_to_read, 'r') as txtReader:
with open(file_to_write, 'w') as txtWriter:
for line in txtReader.readlines():
line = line.replace('\r', '')
txtWriter.write(line)
Updated with the use of pandas:
import os
import csv
import pandas as pd
# Folder name
path_input = (r'your/path')
path_output = (r'your/path')
dirs = os.listdir( path_input )
# Iterate over all the listed files
df ={}
for file in dirs:
file_to_read = os.path.join( path_input, file )
file_to_write = os.path.join( path_output, 'new_' + file )
# Remove line breaks
df = pd.read_csv(file_to_read)
df2 = df.replace("\n","", regex=True)
df3 = df2.to_csv(file_to_write, index=False)
I am a beginner at python. I am writing a script to :
Read all csv files in a folder
Drop duplicate rows within a .csv file by reading one csv file at a time
Write to *_new.csv file
The code :
import csv
import os
import pandas as pd
path = "/Users/<mylocaldir>/Documents/Data/"
file_list = os.listdir(path)
for file in file_list:
fullpath = os.path.join(path, file)
data = pd.read_csv(fullpath)
newdata = data.drop_duplicates()
newfile = fullpath.replace(".csv","_new.csv")
newdata.to_csv ("newfile", index=True, header=True)
As I run the script, there is no error displayed. But, *_new.csv is not created
Any help to resolve this issue?
I don't know pandas but you don't need it. You could try something like this:
import os
file_list = os.listdir()
# loop through the list
for filename in file_list:
# don't process any non csv file
if not filename.endswith('.csv'):
continue
# lines will be a temporary holding spot to check
# for duplicates
lines = []
new_file = filename.replace('.csv', '_new.csv')
# open 2 files - csv file and new csv file to write
with open(filename, 'r') as fr, open(new_file, 'w') as fw:
# read line from csv
for line in fr:
# if that line is not in temporary list called lines,
# add it there and write to file
# if that line is found in temporary list called lines,
# don't do anything
if line not in lines:
lines.append(line)
fw.write(line)
print('Done')
Result
Original file
cat name.csv
id,name
1,john
1,john
2,matt
1,john
New file
cat name_new.csv
id,name
1,john
2,matt
Another original file
cat pay.csv
id,pay
1,100
2,300
1,100
4,400
4,400
2,300
4,400
It's new file
id,pay
1,100
2,300
4,400
Update
The following script works with a slight modification to read from Src folder and write to Dest folder :
import cdv
import os
import pandas as pd
path = "/Users/<localdir>/Documents/Data/Src"
newPath = "/Users/<localdir>/Documents/Data/Dest"
file_list = os.listdir(path)
for file in file_list:
fullpath = os.path.join(path, file)
data = pd.read_csv(fullpath)
newdata = data.drop_duplicates()
newfile = file.replace(".csv","_new.csv")
if not os.path.isfile(os.path.join(newPath, newfile)):
newdata.to_csv (os.path.join(newPath, newfile), index=False, header=True)
I also added a check to see if a file already exists in the Dest folder.
I will be keen to understand if there is a better way to write this script.
I am trying to find the file size of a txt file in the folder named nOK where the date modified is today. nOK folder will be there in many dirs.
Below is the example:
I have a directories:
d:\Sample\Sample1
d:\sample\sample2
I have files in those directories:
d:\sample\Sample1\file1\nOk\test.txt
d:\sample\Sample1\file1\nOk\test2.txt
d:\sample\Sample1\file2\nOk\test.txt
d:\sample\Sample1\file2\nOk\test2.txt
d:\sample\sample2\file1\nOk\test.txt
d:\sample\Sample2\file1\nOk\test2.txt
d:\sample\Sample2\file2\nOk\test.txt
d:\sample\Sample2\file2\nOk\test2.txt
I wanted to check the file size in nOK folder only for date modified today files.
Below is my sample code:
import os
import re
import glob
#from os import path
path1 = r'D:\rmanchal040518\summit_sp22\mo\xload\xml'
os.chdir(path1)
print(os.getcwd())
listdir = os.listdir(path1)
#print(listdir)
for dirs,files in listdir:
if files.endswith('.txt'):
#print(files)
txt_file= files.endswith('.txt')
file_size = os.path.getsize(files)
if file_size>0:
print("File path is:")
print("this file has data: ",files,"with file size:",file_size)
I got the solution.
import os
import re
import glob
path1 = r'D:\rmchal0\xml'
os.chdir(path1)
print(os.getcwd())
listdir = os.listdir(path1)
print(listdir)
def txt_check(path):
os.chdir(path)
listdir = os.listdir(path)
for files in listdir:
if files.endswith('.txt'):
# txt_file= files.endswith('.txt')
file_size = os.path.getsize(files)
if file_size > 0:
print("this file has data: ", files, "with file size:", file_size, "in", os.getcwd())
print(listdir)
def file_size_check(path):
os.chdir(path)
print(os.getcwd())
listdir = os.listdir(path)
print(listdir)
for files in listdir:
if (files.find('.') == -1):
path2= os.path.join(path,files)
if files.lower()== 'nok':
print(files)
txt_check(path2)
else:
#path2 = os.path.join(path,files)
file_size_check(path2)
file_size_check(path1)
I have a text file in that i have to access particular headings and access the first lines under the heading.I was able to do it for one heading while doing multiple heading i was facing issue.
I have successfully done for one heading. but doing to list of words i was unable to do it.
i was able to do it for one heading
Data =['work:']
i was not able to do it for this scenario.
Data =['work:','test:','ride:']
In the text file the data is like below:
work:
'500'
'ast'
'800'
test:
'tim'
'200'
'300'
ride:
'mic'
'100'
'657'
import math
import os
import glob
import re
import sys
sys.path.append('C:/Documents/tes')
def read(file,Data,res,outputline):
with open(file,'r') as f:
stc_file = os.path.basename(file)
for line in f:
if Data in line:
line = f.readlines()
return line[outputline]
fls = []
src_dir = r'C:/Documents/tes'
for root, dirs, files in os.walk(src_dir):
for filename in files:
if not filename.endswith('.txt'):
continue
filepath = os.path.join(root, filename)
fls.append(filepath)
result = []
Data = ['work:','test:','ride:']
for file in fls:
result=read(file,Data,result,0).split()+read(file,Data,result,1).split()+read(file,Data,result,2).split()
The above code is working for one heading,but for multiple headings i was not able to do.
['500','ast','800']
['tim','200','300']
['mic','100','657']
This above expected output .
This script will do what you asked, if each of the three (not sure if you wanted more, or an arbitrary number?) lines of data you are looking for are surrounded by single quotes—and if I understood your goal correctly...
import os
src_dir = os.getcwd() # or whatever path you want
keywords = ['work:', 'test:', 'ride:']
result = []
for root, dirs, files in os.walk(src_dir):
for filename in files:
if filename.endswith('.txt'):
path = os.path.join(root, filename)
try:
fh = open(path, 'r')
lines = [l.strip("'") for l in fh.read().splitlines()]
for i in range(len(lines)):
if lines[i] in keywords:
result.append(' '.join(lines[i+1:i+4]).split())
except Exception as e:
print('Something went wrong.')
print(e)
continue
print(result)
I'm trying to change a list of excel files to csvs before loading them into a pandas dataframe, but I'm unsure how I can convert them in my script. Csvkit and xlsx2csv seem to work for doing it from the command line, but when I try to start a subprocess like so
for filename in sorted_files:
file = subprocess.Popen("in2csv filename", stdout=subprocess.PIPE)
print file.stdout
dataframe = pd.read_csv(file)
I'm getting the error
IOError: Expected file path name or file-like object, got type
schema must not be null when format is "fixed"
Is it possible to get the output from the subprocess and pipe that to a dataframe? Any help greatly appreciated!
Although it has been so long since the question was made, I had the same issue and this is the way it was implemented inside a python script:
Could only execute Xlsx2csv with sheetid parameter. In order to get sheet names and ids, get_sheet_details was used.
csvfrmxlsx creates csv files for each sheet in csv folder under parent directory.
import pandas as pd
from pathlib import Path
def get_sheet_details(filename):
import xmltodict
import shutil
import zipfile
sheets = []
# Make a temporary directory with the file name
directory_to_extract_to = (filename.with_suffix(''))
directory_to_extract_to.mkdir(parents=True, exist_ok=True)
# Extract the xlsx file as it is just a zip file
zip_ref = zipfile.ZipFile(filename, 'r')
zip_ref.extractall(directory_to_extract_to)
zip_ref.close()
# Open the workbook.xml which is very light and only has meta data, get sheets from it
path_to_workbook = directory_to_extract_to / 'xl' / 'workbook.xml'
with open(path_to_workbook, 'r') as f:
xml = f.read()
dictionary = xmltodict.parse(xml)
for sheet in dictionary['workbook']['sheets']['sheet']:
sheet_details = {
'id': sheet['#sheetId'], # can be sheetId for some versions
'name': sheet['#name'] # can be name
}
sheets.append(sheet_details)
# Delete the extracted files directory
shutil.rmtree(directory_to_extract_to)
return sheets
def csvfrmxlsx(xlsxfl, df): # create csv files in csv folder on parent directory
from xlsx2csv import Xlsx2csv
(xlsxfl.parent / 'csv').mkdir(parents=True, exist_ok=True)
for index, row in df.iterrows():
shnum = row['id']
shnph = xlsxfl.parent / 'csv' / Path(row['name'] + '.csv') # path for converted csv file
Xlsx2csv(str(xlsxfl), outputencoding="utf-8").convert(str(shnph), sheetid=int(shnum))
return
pthfnc = 'c:/xlsx/'
wrkfl = 'my.xlsx'
xls_file = Path(pthfnc + wrkfl)
sheetsdic = get_sheet_details(xls_file) # dictionary with sheet names and ids without opening xlsx file
df = pd.DataFrame.from_dict(sheetsdic)
csvfrmxlsx(xls_file, df) # df with sheets to be converted