Remove all lines startng with < for all files in current folder - python-3.x

This is what I have so far. But it just hangs.
import os
import sys
import re
directory_path=os.path.dirname(os.path.abspath(__file__))
for root, dirs, files in os.walk(directory_path):
for file in files:
s = ''.join(sys.stdin.readlines())
s = re.sub(r'(?m)^\<.*\n?', '', s)
take 2, this still doesnt work but maybe a bit closer?
import fileinput
import glob
import sys
import os
path = os.path.dirname(os.path.abspath(__file__))
search = "<"
replace = " "
for line in fileinput.input(glob.glob(path), inplace=1):
sys.stdout.write(line.replace(search, replace))

You can use python inbuilt functions readlines, filter & writelines to achieve your requirement.
I believe the below code serves your purpose.
import os
source_dir = 'C:/Users/rlal1/Desktop/test_folder'
files = os.listdir(source_dir)
for each_file in files:
with open(os.path.join(source_dir, each_file), 'r+') as fp:
all_lines = fp.readlines()
filtered = filter(lambda x: not x.startswith('<'), all_lines)
fp.seek(0)
fp.truncate(0)
fp.writelines(filtered)

Related

Loop over files

I have a series of files named file_0001.csv, file_0002.csv, ... file_1000.csv etc. I need to read them iteratively by creating a list of the filenames and as
import numpy as np
import pandas as pd
for fileName in files:
data = pd.read_csv("folder" + fileName)
data = data.values
How do I create the list of file names by checking the first and last file.
Thank you for your help
if you are trying to create the specific file names, you can loop over from 1 to 1000 and create filenames
import numpy as np
import pandas as pd
for i in range(1,1001):
num = str(i)
filename = "file_" + num.zfill(4) + ".csv"
#data = pd.read_csv("folder" + fileName)
#data = data.values
print filename
output last 10 lines:
file_0991.csv
file_0992.csv
file_0993.csv
file_0994.csv
file_0995.csv
file_0996.csv
file_0997.csv
file_0998.csv
file_0999.csv
file_1000.csv
You should use pathlib from the standard library. Then you can simply do
import pandas as pd
from pathlib import Path
# get file number assuming the name format "file_number.csv"
def get_file_number(file_path):
return int(file_path.stem.split("_")[1])
folder_path = Path("path/to/folder")
# sort files by file number
files = sorted(folder_path.glob("file_*.csv"), key=get_file_number)
for file in files:
print(file.name) # just to check
data = pd.read_csv(file)
# do something with data

Chose which function to run in script

I'm trying to make it so that the user chooses which function to run using if.
import os
import csv
import collections
import datetime
import pandas as pd
import time
import string
import re
import glob, os
folder_path = 'C:/ProgramData/WebPort/system/tags'
folder2_path = 'C:/ProgramData/WebPort/system'
search2_str = '"Prefix"'
print("Choices:\n 1 - Read from CSV\n 2 - Read from WPP")
x = input("Please enter your choice:\n")
x = int(x)
if x == 1:
csv_file_list = glob.glob(folder_path + '/*.csv')
with open("csv.txt", 'w') as wf:
for file in csv_file_list:
print(glob.glob(folder_path + '/*.csv'))
with open(file) as rf:
for line in rf:
if line.strip(): # if line is not empty
if not line.endswith("\n"):
line+="\n"
wf.write(line)
print('Reading from .csv')
elif x == 2:
for root, dirs, files in os.walk(folder2_path):
for file in files:
if file.endswith(".wpp"):
print(os.path.join(root, file))
with open(os.path.join(root, file), 'r') as fr, open ("wpp.txt",'a', encoding='utf-8') as fw:
for i,line in enumerate(fr):
if line.find(search2_str) != -1:
fw.write(line)
print('Reading from .wpp')
else:
print('wrong choice')
Getting Invalid syntax in line 34 using this.

Dynamically create file folder based on line 1 in csv

This script downloads images, renames them based on line[0] adds a number to the end of the file name and saves them to a file folder. My goal here is to create the file folder name based on line[0] of my csv file, I'm new to python and need to download/sort 15000+ images. Any help would be appreciated! Using python 3.8.6
Note: 1 model may contain many images so the idea is to create the folder, place images for that model inside, move on to the next model, etc.
Csv file content
RHV3-484-L,https://www.fisherpaykel.com/on/demandware.static/-/Sites-fpa-master-catalog/default/dw0c85e188/product-mugs/cooking/ranges/mug/retail/RHV3-484-N-RHV3-484-L-external-mug-rs-84.png
RHV3-484-L,https://www.fisherpaykel.com/on/demandware.static/-/Sites-fpa-master-catalog/default/dwcbd711e5/inspire/caitlin-wilson-portland-dk-339-rs-84.png
RHV3-484-L,https://www.fisherpaykel.com/on/demandware.static/-/Sites-fpa-master-catalog/default/dw3702e52a/inspire/caitlin-wilson-portland-dk-385-rs-84.jpg
RHV3-484-L,https://www.fisherpaykel.com/on/demandware.static/-/Sites-fpa-master-catalog/default/dw0c85e188/product-mugs/cooking/ranges/mug/retail/RHV3-484-N-RHV3-484-L-external-mug-rs-84.png
RHV3-484-L,https://www.fisherpaykel.com/on/demandware.static/-/Sites-fpa-master-catalog/default/dwf99a5a9d/inspire/david-berridge-project-brooklyn-mw-6457-rs-84.jpg
Python script
import sys
import urllib
import urllib.request
from csv import reader
import os.path
import os
csv_filename = "images"
with open(csv_filename+".csv".format(csv_filename), 'r') as csv_file:
n = 1
for line in reader(csv_file):
if not os.path.exists("ImageID"):
os.makedirs("ImageID")
print("Image skipped for {0}".format(line[0]))
else:
if line[1] != '' and line[0] != "ImageID":
urllib.request.urlretrieve(line[1], "ImageID/" + line[0] + "-" + str(n) + ".jpg")
n += 1
print("Image saved for {0}".format(line[0]))
else:
print("No result for {0}".format(line[0]))
This seems to work as desired....
Couple comments in middle. Notably, you need to respect the .jpg or .png file. If you have file extensions that are longer (4 chars) you may need to split out the file name and then split by "."
Good Luck!
import sys
import urllib
import urllib.request
from csv import reader
import os.path
import os
csv_filename = "images.csv"
with open(csv_filename, 'r') as csv_file:
n = 1 # starting point
for line in reader(csv_file):
tgt_folder = line[0]
if not os.path.exists(tgt_folder):
os.makedirs(tgt_folder)
n = 1 # restart n if you find a NEW folder
# there should be no "else" clause here. Just test the folder name above, but don't waste a file
if line[1] != '' and line[0] != "ImageID": # not clear what this is for... ??
filename = ''.join([line[0], '-', str(n), line[1][-4:]])
destination = os.path.join(tgt_folder, filename)
urllib.request.urlretrieve(line[1], destination)
n += 1
print("Image saved for {0}".format(line[1]))
else:
print("No result for {0}".format(line[1]))

Unable to read data about FCN,i don't know how to do.the result is:Found pickle file! 0 0

import numpy as np
import os
import random
from six.moves import cPickle as pickle
from tensorflow.python.platform import gfile
import glob
import TensorflowUtils as utils
DATA_URL = 'http:\\data.csail.mit.edu\\places\\ADEchallenge\\ADEChallengeData2016.zip'
#download and read dataset
def read_dataset(data_dir):
pickle_filename = "MITSceneParsing.pickle"
pickle_filepath = os.path.join(data_dir, pickle_filename)
if not os.path.exists(pickle_filepath):
utils.maybe_download_and_extract(data_dir, DATA_URL, is_zipfile=True)
SceneParsing_folder = os.path.splitext(DATA_URL.split("/")[-1])[0]
result = create_image_lists(os.path.join(data_dir, SceneParsing_folder))
print ("Pickling ...")
with open(pickle_filepath, 'wb') as f:
pickle.dump(result, f, pickle.HIGHEST_PROTOCOL)
else:
print ("Found pickle file!")
with open(pickle_filepath, 'rb') as f:
result = pickle.load(f)
training_records = result['training']
validation_records = result['validation']
del result
return training_records, validation_records
train_records, valid_records = read_dataset('Data_zoo/MIT_SceneParsing')
print(len(train_records))
print(len(valid_records))
the result is:Found pickle file! 0 0
why the lens about train_records and valid_records are 0?
i don't know whree is wrong and how to correct it.
This code is right. The bug is in 'create_image_lists'.
Note this code in create_image_lists:
filename = os.path.splitext(f.split('/')[-1])[0]
This is no problem in Linux, but in windows, the separator is '\\', so you should modify this code to:
filename = os.path.splitext(f.split('\\')[-1])[0]
Then delete this file 'MITSceneParsing.pickle', and run read_dataset again.

Rename a file/ remove numbers from a file name in python 3

My code runs well when I didn't ask to rename the file, just to print all file names without number, but when I did it and checked file instead of running it, it just doesn't work.
here's my code:
import os
import re
def rename_file():
file_list = os.listdir(r"C:\Users\Zoe.Zhao\Desktop\prank")
saved_path = os.getcwd()
os.chdir(r"C:\Users\Zoe.Zhao\Desktop\prank")
print (saved_path)
for file_name in file_list:
file_name = re.sub('[0-9]','',file_name)
print(file_name)
rename_file()
Here are some sreenshots:
before:
after:
You need to create a new list: see below:
import os
import re
def rename_file():
file_list = os.listdir(r"C:\Users\afareh\Dropbox\PROGRAMMING\test\prank\prank")
saved_path = os.getcwd()
os.chdir(r"C:\Users\afareh\Dropbox\PROGRAMMING\test\prank\prank")
new_file_list=[] # create an empty list for storing the names of the renamed files.
for file_name in file_list:
file_name=re.sub('[0-9]','',file_name)
new_file_list.append(file_name)
print (new_file_list)
rename_file()

Resources