Printing Out a Word Starting with a Particular String - python-3.x

I could use some help with my below code. I am trying to read through the file mbox-short.txt line by line. Looking at each word in the line to see if it starts with the string "SAK-", if so print it out and continue. Right now my code is finding all the lines with SAK, but instead of just printing out the SAK- string it's showing everything else following the string as well.
fname = input('Enter File: ')
if len(fname) < 1 : fname = 'mbox-short.txt'
lst = list()
fhand = open(fname)
for line in fhand:
line = line.strip()
if not line.startswith('SAK'): continue
words = line.split()
print(words)

Related

fetch a string in file & get all lines containing the string along with line numbers

Also is it the correct code which works to fetch a string in file & get all lines containing the string along with line numbers
am getting syntax error in line 1 for the code,
def matched_lines('sam.txt', string_to_search):
matched_lines = search_string_in_file('sam.txt','is')
"""Search for the given string in file and return lines containing that string,
along with line numbers"""
line_number = 0
list_of_results = []
# Open the file in read only mode
with open('sam.txt', 'r') as matched_lines:
print('Total Matched lines : ', len(matched_lines))
# Read all lines in the file one by one
for elem in matched_lines:
print('Line Number = ', elem[0], ' :: Line = ', elem[1])
please help me.
Is this result needed?
def matched_lines(filename, string_to_search):
list_of_results = []
with open(filename, encoding='utf8')as matched_lines:
for elem in enumerate(matched_lines.read().split('\n')):
if string_to_search in elem[1]:
list_of_results.append(elem)
return list_of_results
result = matched_lines('some.txt', 'lorem')
print('Total Matched lines :', len(result))
print(result)

Comparison script diffrent files

i am trying to write a script that compares a bunch of files based on a search word, in this case i searched for 106, then i want the code to match the words from file 1 to the words in file 2 and print a list with the ones that dont match.
For example in file A i have this line
106_LB01_GP61_HAL;LB01;10892;DIGITAL;0;0;0;0;;;Smutsigt tilluftsfilter;;
and in file B i have
"Prefix": "106_LB01_GP61",
those lines match and then i want it to ignore that tag
when the script find lines that dont match etc when a tag in file A cant fint its buddy in file B i want it to write those tags to a file,
for example:
Total unused tags:1
106_LB01_GP61
right now i am stuc at making it read to diffrent files at the same time
#!/usr/bin/env python
#Import os module
import os
# Ask the user to enter string to search
search_path = (".")
file_type = (".wpp")
search_str = input("Enter searchword: ")
resultsFile = "results.csv"
file_name = ("results.csv")
# Append a directory separator if not already present
if not (search_path.endswith("/") or search_path.endswith("\\") ):
search_path = search_path + "/"
# If path does not exist, set search path to current directory
if not os.path.exists(search_path):
search_path ="."
0
# Repeat for each file in the directory
for fname in os.listdir(path=search_path):
# Apply file type filter
if fname.endswith(file_type):
# Open file for reading
fo = open(search_path + fname)
# Read the first line from the file
line = fo.readline()
# Initialize counter for line number
line_no = 1
# Loop until EOF
wf = open(search_path + resultsFile, 'a')
while line != '' :
# Search for string in line
index = line.find(search_str)
if ( index != -1) :
print(fname, "[", line_no, ",", index, "] ", line, sep="")
wf.write(line + " ")
# Read next line
line = fo.readline()
# Increment line counter
line_no += 1
# Close the files
fo.close()
def check_if_string_in_file(file_name, string_to_search):
""" Check if any line in the file contains given string """
# Open the file in read only mode
with open(file_name, 'r') as read_obj:
# Read all lines in the file one by one
for line in read_obj:
# For each line, check if line contains the string
if string_to_search in line:
return True
return False
def check_if_string_in_file(file_name2, string_to_search):
""" Check if any line in the file contains given string """
# Open the file in read only mode
with open(file_name2, 'r') as read_obj:
# Read all lines in the file one by one
for line in read_obj:
# For each line, check if line contains the string
if string_to_search in line:
return True
return False
def search_string_in_file(file_name, string_to_search):
"""Search for the given string in file and return lines containing that string,
along with line numbers"""
line_number = 0
list_of_results = []
# Open the file in read only mode
with open(file_name, 'r') as read_obj:
# Read all lines in the file one by one
for line in read_obj:
# For each line, check if line contains the string
line_number += 1
if string_to_search in line:
# If yes, then add the line number & line as a tuple in the list
list_of_results.append((line_number, line.rstrip()))
# Return list of tuples containing line numbers and lines where string is found
return list_of_results
def search_multiple_strings_in_file(file_name, list_of_strings):
"""Get line from the file along with line numbers, which contains any string from the list"""
line_number = 0
list_of_results = []
# Open the file in read only mode
with open(file_name, 'r') as read_obj:
# Read all lines in the file one by one
for line in read_obj:
line_number += 1
# For each line, check if line contains any string from the list of strings
for string_to_search in list_of_strings:
if string_to_search in line:
# If any string is found in line, then append that line along with line number in list
list_of_results.append((string_to_search, line_number, line.rstrip()))
# Return list of tuples containing matched string, line numbers and lines where string is found
return list_of_results
def main():
print('*** Loading *** ')
matched_lines = search_string_in_file(file_name, search_str)
for elem in matched_lines:
print('Line Number = ', elem[0], ' :: Line = ', elem[1])
# search for given strings in the file 'sample.txt'
matched_lines = search_multiple_strings_in_file(file_name, [search_str])
print('*** Checking if', [search_str], 'exists in a file *** ')
print('Total Matched lines : ', len(matched_lines))
# Check if string 'is' is found in file 'sample.txt'
if check_if_string_in_file(file_name, search_str):
print('Yes, string found in file')
else:
print('String not found in file')
if __name__ == '__main__':
main()

Unable to save the file correctly

I have a text file contains a text about a story and I want to find a word "like" and get the next word after it and call a function to find synonyms for that word. here is my code:
file = 'File1.txt'
with open(file, 'r') as open_file:
read_file = open_file.readlines()
output_lines = []
for line in read_file:
words = line.split()
for u, word in enumerate(words):
if 'like' == word:
next_word = words[u + 1]
find_synonymous(next_word )
output_lines.append(' '.join(words))
with open(file, 'w') as open_file:
open_file.write(' '.join(words))
my only problem I think in the text itself, because when I write one sentence including the word (like) it works( for example 'I like movies'). but when I have a file contains a lot of sentences and run the code it deletes all text. can anyone know where could be the problem
You have a couple of problems. find_synonymous(next_word ) doesn't replace the word in the list, so at best you will get the original text back. You do open(file, 'w') inside the for loop, so the file is overwritten for each line. next_word = words[u + 1] will raise an index error if like happens to be the last word on the line and you don't handle the case where the thing that is liked continues on the next line.
In this example, I track an "is_liked" state. If a word is in the like state, it is converted. That way you can handle sentences that are split across lines and don't have to worry about index errors. The list is written to the file outside the loop.
file = 'File1.txt'
with open(file, 'r') as open_file:
read_file = open_file.readlines()
output_lines = []
is_liked = False
for line in read_file:
words = line.split()
for u, word in enumerate(words):
if is_liked:
words[u] = find_synonymous(word)
is_liked = False
else:
is_liked = 'like' == word
output_lines.append(' '.join(words) + '\n')
with open(file, 'w') as open_file:
open_file.writelines(output_lines)

Writing python scripts

I need to write a standalone program that would run on a python cmd. This program counts the number of characters in every line of HumptyDumpty.txt file, and outputs this to a new file.
Note that the new file needs to contain only the number of characters per line.
Here's my code:
import sys
infilename = sys.argv[1]
outfilename = sys.argv[2]
infile=open(infilename)
outfile=open(outfilename, 'w')
char_=0
for line in infile:
line.split()
char_= len(line.strip("\n"))
outfile.write(str(char_ ))
print(line,end='')
infile.close()
outfile.close()
The ouput file has only one line, the concatenation of xyz instead of
x
y
z
"\n" doesnt seem to be doing the trick. Any suggestions?
If you don't want to include the white space between the words then you should replace them with an empty string.
for line in infile:
nline = line.replace(" ", "")
nline = nline.strip("\n")
char= len(nline)
outfile.write(str(char))
outfile.write("\n")
print(line, end='')
print(char)

How can I simplify and format this function?

So I have this messy code where I wanted to get every word from frankenstein.txt, sort them alphabetically, eliminated one and two letter words, and write them into a new file.
def Dictionary():
d = []
count = 0
bad_char = '~!##$%^&*()_+{}|:"<>?\`1234567890-=[]\;\',./ '
replace = ' '*len(bad_char)
table = str.maketrans(bad_char, replace)
infile = open('frankenstein.txt', 'r')
for line in infile:
line = line.translate(table)
for word in line.split():
if len(word) > 2:
d.append(word)
count += 1
infile.close()
file = open('dictionary.txt', 'w')
file.write(str(set(d)))
file.close()
Dictionary()
How can I simplify it and make it more readable and also how can I make the words write vertically in the new file (it writes in a horizontal list):
abbey
abhorred
about
etc....
A few improvements below:
from string import digits, punctuation
def create_dictionary():
words = set()
bad_char = digits + punctuation + '...' # may need more characters
replace = ' ' * len(bad_char)
table = str.maketrans(bad_char, replace)
with open('frankenstein.txt') as infile:
for line in infile:
line = line.strip().translate(table)
for word in line.split():
if len(word) > 2:
words.add(word)
with open('dictionary.txt', 'w') as outfile:
outfile.writelines(sorted(words)) # note 'lines'
A few notes:
follow the style guide
string contains constants you can use to provide the "bad characters";
you never used count (which was just len(d) anyway);
use the with context manager for file handling; and
using a set from the start prevents duplicates, but they aren't ordered (hence sorted).
Using re module.
import re
words = set()
with open('frankenstein.txt') as infile:
for line in infile:
words.extend([x for x in re.split(r'[^A-Za-z]*', line) if len(x) > 2])
with open('dictionary.txt', 'w') as outfile:
outfile.writelines(sorted(words))
From r'[^A-Za-z]*' in re.split, replace 'A-Za-z' with the characters which you want to include in dictionary.txt.

Resources