Code in python that writes more than one text-file - python-3.x

I have started with a code that is intended to write many textfiles by first reading one textfile. More details of question after the started code.
The textfile (Im reading from texfile called alphabet.txt):
a
b
c
:
d
e
f
:
g
h
i
:
I want the result to be like this:
file1:
a
b
c
file2:
d
e
f
file3:
g
h
i
enter code here
with open('alphabet.txt', 'r') as f:
a = []
for i in f:
i.split(':')
a.append(a)
The code is of course not done. Question: I don't know how to continue with the code. Is it possible to write the textfiles and to place them in a specific folder and too maybe rename them as 'file1, file2...' without hardcoding the naming (directly from the code)?

You could implement that function with something like this
if __name__ == '__main__':
with open('alphabet.txt', 'r') as f:
split_alph = f.read().split(':\n')
for i in range(len(split_alph)):
x = open(f"file_{i}", "w")
x.write(split_alph[i])
x.close()
Depending on whether there is a last : in the alphabet.txt file, you'd have to dismiss the last element in split_alph with
split_alph = f.read().split(':\n')[:-1]
If you got any further questions regarding the solution, please tell me.

file = open("C:/Users/ASUS/Desktop/tutl.py" , "r") # This is input File
text = file.read() # Reading The Content of The File.
file.close() # Closing The File
splitted = text.split(":") # Creating a List ,Containing strings of all Splitted part.
destinationFolder = "path_to_Folder" # Replace this With Your Folder Path
for x in range(len(splitted)): # For loop for each parts
newFile = open(destinationFolder + "/File"+str(x)+".txt" , "w") # Creating a File for a part.
newFile.write(splitted[x]) # Writing the content
newFile.close() # Closing The File

Related

File comparison in two directories

I am comparing all files in two directories, if comparison is greater than 90% so i continue the outer loop and i want to remove the file in the second directory that was matched so that the second file in the first directory doesn't compare with the file that's already matched.
Here's what i've tried:
for i for i in sorted_files:
for j in sorted_github_files:
#pdb.set_trace()
with open(f'./files/{i}') as f1:
try:
text1 = f1.read()
except:
pass
with open(f'./github_files/{j}') as f2:
try:
text2 = f2.read()
except:
pass
m = SequenceMatcher(None, text1, text2)
print("file1:", i, "file2:", j)
if m.ratio() > 0.90:
os.remove(f'./github_files/{j}')
break
I know i cannot change the iteration once it's in action that's why its returning me file not found error i dont want to use try except blocks. Any ideas appreciated
A couple of things to point out:
Always provide a minimal reproducible example
Your first for loop is not working since you used `for i for i ..``
If you want to iterate over the files in list1 (sorted_files) first, then read the file outside of the second loop
I would add the files that match with a ratio over 0.90 to a new list and remove the files afterward so your items do not change during the iteration
You can find the test-data i created and used here
import os
from difflib import SequenceMatcher
# define your two folders, full paths
first_path = os.path.abspath(r"C:\Users\XYZ\Desktop\testfolder\a")
second_path = os.path.abspath(r"C:\Users\XYZ\Desktop\testfolder\b")
# get files from folder
first_path_files = os.listdir(first_path)
second_path_files = os.listdir(second_path)
# join path and filenames
first_folder = [os.path.join(first_path, f) for f in first_path_files]
second_folder = [os.path.join(second_path, f) for f in second_path_files]
# empty list for matching results
matched_files = []
# iterate over the files in the first folder
for file_one in first_folder:
# read file content
with open(file_one, "r") as f:
file_one_text = f.read()
# iterate over the files in the second folder
for file_two in second_folder:
# read file content
with open(file_two, "r") as f:
file_two_text = f.read()
# match the two file contents
match = SequenceMatcher(None, file_one_text, file_two_text)
if match.ratio() > 0.90:
print(f"Match found ({match.ratio()}): '{file_one}' | '{file_two}'")
# TODO: here you have to decide if you rather want to remove files from the first or second folder
matched_files.append(file_two) # i delete files from the second folder
# remove duplicates from the resulted list
matched_files = list(set(matched_files))
# remove the files
for f in matched_files:
print(f"Removing file: {f}")
os.remove(f)

Creating a python spellchecker using tkinter

For school, I need to create a spell checker, using python. I decided to do it using a GUI created with tkinter. I need to be able to input a text (.txt) file that will be checked, and a dictionary file, also a text file. The program needs to open both files, check the check file against the dictionary file, and then display any words that are misspelled.
Here's my code:
import tkinter as tk
from tkinter.filedialog import askopenfilename
def checkFile():
# get the sequence of words from a file
text = open(file_ent.get())
dictDoc = open(dict_ent.get())
for ch in '!"#$%&()*+,-./:;<=>?#[\\]^_`{|}~':
text = text.replace(ch, ' ')
words = text.split()
# make a dictionary of the word counts
wordDict = {}
for w in words:
wordDict[w] = wordDict.get(w,0) + 1
for k in dictDict:
dictDoc.pop(k, None)
misspell_lbl["text"] = dictDoc
# Set-up the window
window = tk.Tk()
window.title("Temperature Converter")
window.resizable(width=False, height=False)
# Setup Layout
frame_a = tk.Frame(master=window)
file_lbl = tk.Label(master=frame_a, text="File Name")
space_lbl = tk.Label(master=frame_a, width = 6)
dict_lbl =tk.Label(master=frame_a, text="Dictionary File")
file_lbl.pack(side=tk.LEFT)
space_lbl.pack(side=tk.LEFT)
dict_lbl.pack(side=tk.LEFT)
frame_b = tk.Frame(master=window)
file_ent = tk.Entry(master=frame_b, width=20)
dict_ent = tk.Entry(master=frame_b, width=20)
file_ent.pack(side=tk.LEFT)
dict_ent.pack(side=tk.LEFT)
check_btn = tk.Button(master=window, text="Spellcheck", command=checkFile)
frame_c = tk.Frame(master=window)
message_lbl = tk.Label(master=frame_c, text="Misspelled Words:")
misspell_lbl = tk.Label(master=frame_c, text="")
message_lbl.pack()
misspell_lbl.pack()
frame_a.pack()
frame_b.pack()
check_btn.pack()
frame_c.pack()
# Run the application
window.mainloop()
I want the file to check against the dictionary and display the misspelled words in the misspell_lbl.
The test files I'm using to make it work, and to submit with the assignment are here:
check file
dictionary file
I preloaded the files to the site that I'm submitting this on, so it should just be a matter of entering the file name and extension, not the entire path.
I'm pretty sure the problem is with my function to read and check the file, I've been beating my head on a wall trying to solve this, and I'm stuck. Any help would be greatly appreciated.
Thanks.
The first problem is with how you try to read the files. open(...) will return a _io.TextIOWrapper object, not a string and this is what causes your error. To get the text from the file, you need to use .read(), like this:
def checkFile():
# get the sequence of words from a file
with open(file_ent.get()) as f:
text = f.read()
with open(dict_ent.get()) as f:
dictDoc = f.read().splitlines()
The with open(...) as f part gives you a file object called f, and automatically closes the file when it's done. This is more concise version of
f = open(...)
text = f.read()
f.close()
f.read() will get the text from the file. For the dictionary I also added .splitlines() to turn the newline separated text into a list.
I couldn't really see where you'd tried to check for misspelled words, but you can do it with a list comprehension.
misspelled = [x for x in words if x not in dictDoc]
This gets every word which is not in the dictionary file and adds it to a list called misspelled. Altogether, the checkFile function now looks like this, and works as expected:
def checkFile():
# get the sequence of words from a file
with open(file_ent.get()) as f:
text = f.read()
with open(dict_ent.get()) as f:
dictDoc = f.read().splitlines()
for ch in '!"#$%&()*+,-./:;<=>?#[\\]^_`{|}~':
text = text.replace(ch, ' ')
words = text.split()
# make a dictionary of the word counts
wordDict = {}
for w in words:
wordDict[w] = wordDict.get(w,0) + 1
misspelled = [x for x in words if x not in dictDoc]
misspell_lbl["text"] = misspelled

Printing an entire list, instead of one line

I am having trouble writing the entire list into an outfile. Here is the code:
with open(infline, "r") as f:
lines = f.readlines()
for l in lines:
if "ATOM" in l :
split = l.split()
if split[-1] == "1":
print(split)
#print(type(split))
with open( newFile,"w") as f:
f.write("Model Number One" + "\n")
f.write(str(split))
When I use print(split) it allows me to see the entire list (image below):
with open(infile, "r") as f:
lines = f.readlines()
for l in lines:
if "ATOM" in l :
split = l.split()
if split[-1] == "1":
#print(split)
print(type(split))
with open( newFile,"w") as f:
f.write("Model Number One" + "\n")
for i in range(len(split)):
f.write(str(split))
However, when I try to use f.write(split) I get an error because the function can only take a str not a list. So, I used f.write(str(split)) and it worked. The only issue now is that it only writes the last item in the list, not the whole list.
The function print is slightly more permissible than the method f.write, in the sense that it can accept lists and various types of objects as input. f.write is usually called by passing pre-formatted strings, as you noticed.
I think the issue with the code is that the write routine is nested inside the code. This causes Python to erase any contents stored inside newFile, and write only the last line read (l).
The problem can be easily fixed by changing the open call to open( newFile,"a"). The flag "a" tells Python to append the new contents to the existing file newFile (without erasing information). If newFile does not exist yet, Python will automatically create it.

Python : Updating multiple words in a text file based on text in another text file using in_place module

I have a text file say storyfile.txt
Content in storyfile.txt is as
'Twas brillig, and the slithy toves
Did gyre and gimble in the wabe;
All mimsy were the borogoves,
And the mome raths outgrabe
I have another file- hashfile.txt that contains some words separated by comma(,)
Content of hashfile.txt is:
All,mimsy,were,the,borogoves,raths,outgrabe
My objective
My objective is to
1. Read hashfile.txt
2. Insert Hashtag on each of the comma separated word
3. Read storyfile.txt . Search for same words as in hashtag.txt and add hashtag on these words.
4. Update storyfile.txt with words that are hash-tagged
My Python code so far
import in_place
hashfile = open('hashfile.txt', 'w+')
n1 = hashfile.read().rstrip('\n')
print(n1)
checkWords = n1.split(',')
print(checkWords)
repWords = ["#"+i for i in checkWords]
print(repWords)
hashfile.close()
with in_place.InPlace('storyfile.txt') as file:
for line in file:
for check, rep in zip(checkWords, repWords):
line = line.replace(check, rep)
file.write(line)
The output
can be seen here
https://dpaste.de/Yp35
Why is this kind of output is coming?
Why the last sentence has no newlines in it?
Where I am wrong?
The output
attached image
The current working code for single text
import in_place
with in_place.InPlace('somefile.txt') as file:
for line in file:
line = line.replace('mome', 'testZ')
file.write(line)
Look if this helps. This fulfills the objective that you mentioned, though I have not used the in_place module.
hash_list = []
with open("hashfile.txt", 'r') as f:
for i in f.readlines():
for j in i.split(","):
hash_list.append(j.strip())
with open("storyfile.txt", "r") as f:
for i in f.readlines():
for j in hash_list:
i = i.replace(j, "#"+j)
print(i)
Let me know if you require further clarification on the same.

a python program that searches text files and prints out mutual lines

I am trying to write a python program that takes n number of text files , each file contains names , each name on a separate line like this
Steve
Mark
Sarah
what the program does is that it prints out only the names that exist in all the inputted files .
I am new to programming so I don't really know how to implement this idea , but I thought in recursion , still the program seems to run in an infinite loop , I am not sure what's the problem . is the implementation wrong ? if so , do you have a better idea of how to implement it ?
import sys
arguments = sys.argv[1:]
files = {}
file = iter(arguments)
for number in range(len(sys.argv[1:])):
files[number] = open(next(file))
def close_files():
for num in files:
files[num].close()
def start_next_file(line,files,orderOfFile):
print('starting next file')
if orderOfFile < len(files): # to avoid IndexError
for line_searched in files[orderOfFile]:
if line_searched.strip():
line_searched = line_searched[:-1]
print('searched line = '+line_searched)
print('searched compared to = ' + line)
if line_searched == line:
#good now see if that name exists in the other files as well
start_next_file(line,files,orderOfFile+1)
elif orderOfFile >= len(files): # when you finish searching all the files
print('got ya '+line) #print the name that exists in all the files
for file in files:
# to make sure the cursor is at the beginning of the read files
#so we can loop through them again
files[file].seek(0)
def start_find_match(files):
orderOfFile = 0
for line in files[orderOfFile] :
# for each name in the file see if it exists in all other files
if line.strip():
line = line[:-1]
print ('starting line = '+line)
start_next_file(line,files,orderOfFile+1)
start_find_match(files)
close_files()
I'm not sure how to fix your code exactly but here's one conceptual way to think about it.
listdir gets all the files in the directory as a list. We narrow that to only .txt files. Next, open, read, split on newlines, and lower to make a larger list containing names. So, files will be a list of lists. Last, find the intersection across all lists using some set logic.
import os
folder = [f for f in os.listdir() if f[-4:] == '.txt']
files = []
for i,file in enumerate(folder):
with open(file) as f:
files.append([name.lower() for name in f.read().splitlines()])
result = set.intersection(*map(set, files))
Example:
#file1.txt
john
smith
mary
sue
pretesh
ashton
olaf
Elsa
#file2.txt
David
Lorenzo
Cassy
Grant
elsa
Felica
Salvador
Candance
Fidel
olaf
Tammi
Pasquale
#file3.txt
Jaleesa
Domenic
Shala
Berry
Pamelia
Kenneth
Georgina
Olaf
Kenton
Milly
Morgan
elsa
Returns:
{'olaf', 'elsa'}

Resources