How to remove '#' comments from a string? - string

The problem:
Implement a Python function called stripComments(code) where code is a parameter that takes a string containing the Python code. The function stripComments() returns the code with all comments removed.
I have:
def stripComments(code):
code = str(code)
for line in code:
comments = [word[1:] for word in code.split() if word[0] == '#']
del(comments)
stripComments(code)
I'm not sure how to specifically tell python to search through each line of the string and when it finds a hashtag, to delete the rest of the line.
Please help. :(

You could achieve this through re.sub function.
import re
def stripComments(code):
code = str(code)
return re.sub(r'(?m)^ *#.*\n?', '', code)
print(stripComments("""#foo bar
bar foo
# buz"""))
(?m) enables the multiline mode. ^ asserts that we are at the start. <space>*# matches the character # at the start with or without preceding spaces. .* matches all the following characters except line breaks. Replacing those matched characters with empty string will give you the string with comment lines deleted.

def remove_comments(filename1, filename2):
""" Remove all comments beginning with # from filename1 and writes
the result to filename2
"""
with open(filename1, 'r') as f:
lines = f.readlines()
with open(filename2, 'w') as f:
for line in lines:
# Keep the Shebang line
if line[0:2] == "#!":
f.writelines(line)
# Also keep existing empty lines
elif not line.strip():
f.writelines(line)
# But remove comments from other lines
else:
line = line.split('#')
stripped_string = line[0].rstrip()
# Write the line only if the comment was after the code.
# Discard lines that only contain comments.
if stripped_string:
f.writelines(stripped_string)
f.writelines('\n')

For my future reference.
def remove_comments(lines: list[str]) -> list[str]:
new_lines = []
for line in lines:
if line.startswith("#"): # Deal with comment as the first character
continue
line = line.split(" #")[0]
if line.strip() != "":
new_lines.append(line)
return new_lines
print(remove_comments("Hello #World!\n\nI have a question # that #".split('\n')))
>>> ['Hello', 'I have a question']
This implementation has benefit of not requiring the re module and being easy to understand. It also removes pre-existing blank lines, which is useful for my use case.

Related

Read the latest line from a file which keeps getting updated

I currently coding a discord bot which reads a local log file which keeps getting updated. Every time a new line is added and matches a certain pattern the bot (should) post a messages with that content. I tried the following or my current stand is the following code. The current problem with my code is that the file is opened all the time. Is there a way to just read the file if its gets updated since the last reading? or in other words is there a way to realize my solution with a solution which does not need to have the file opened at all times/beeing busy with reading the file consitantly?
while not BOT.is_closed():
for log in logs:
file_path = config["path_to_log"]
logfiles= []
for filename in glob.glob(os.path.join(file_path,f'_{log}*')):
logfiles.append(filename)
latest_file = max(logfiles, key=os.path.getmtime)
""" #second try doenst work if to much content gets added at the same time
with open(latest_file,"rb") as f:
if(len(messagelist)>1000):
messagelist = []
f.seek(-2, os.SEEK_END)
while f.read(1) != b'\n':
f.seek(-2, os.SEEK_CUR)
last_line = f.readline().decode()
for word in words:
if word in last_line and last_line not in messagelist:
messagelist.append(last_line)
print(last_line)
await channel.send(last_line)
else:
time.sleep(1)
"""
# first try it works but i dont think its the best solution
try:
fp = open(latest_file, 'r')
except:
end_program("Error while reading the log file")
for line in (fp.readlines() [-10:]):
new = line
for word in words:
if word in new and new not in messagelist:
messagelist.append(new)
print(new)
await channel.send(new)
else:
time.sleep(1)
###
fp.close()
You could use a generator.
import time, os
def follow(filename):
with open(filename) as f:
f.seek(0, os.SEEK_END)
while True:
line = f.readline()
if not line:
time.sleep(1)
continue
if line.startswith('a'):
yield line
The generator above will yield the lines with a newline character left at the end. You may want to slightly modify yield line to exclude the newline character, say, yield line[:-1]. line.startswith('a') is just an example to show how to yield lines that match a pattern. You will need to update this with your pattern.
In your code, you could use the generator as:
for line in follow('/path/to/file'):
# do something with line

Merge only if two consecutives lines startwith at python and write the rest of text normally

Input
02000|42163,54|
03100|4|6070,00
03110|||6070,00|00|00|
00000|31751150201912001|01072000600074639|
02000|288465,76|
03100|11|9060,00
03110|||1299,00|00|
03110||||7761,00|00|
03100|29|14031,21
03110|||14031,21|00|
00000|31757328201912001|01072000601021393|
Code
prev = ''
with open('out.txt') as f:
for line in f:
if prev.startswith('03110') and line.startswith('03110'):
print(prev.strip()+ '|03100|XX|PARCELA|' + line)
prev = line
Hi, I have this code that search if two consecutives lines startswith 03110 and print those line, but I wanna transforme the code so it prints or write at .txt also the rest of the lines
Output should be like this
02000|42163,54|
03100|4|6070,00
03110|||6070,00|00|00|
00000|31751150201912001|01072000600074639|
02000|288465,76|
03100|11|9060,00
03110|||1299,00|00|3100|XX|PARCELA|03110||||7761,00|00|
03100|29|14031,21
03110|||14031,21|00|
00000|31757328201912001|01072000601021393|
I´m know that I´m getting only those two lines merged, because that is the command at print()
03110|||1299,00|00|3100|XX|PARCELA|03110||||7761,00|00|
But I don´t know to make the desire output, can anyone help me with my code?
# I assume the input is in a text file:
with open('myFile.txt', 'r') as my_file:
splited_line = [line.rstrip().split('|') for line in my_file] # this will split every line as a separate list
new_list = []
for i in range(len(splited_line)):
try:
if splited_line[i][0] == '03110' and splited_line[i-1][0] == '03110': # if the current line and the previous line start with 03110
first = '|'.join(splited_line[i-1])
second = '|'.join(splited_line[i])
newLine = first + "|03100|XX|PARCELA|"+ second
new_list.append(newLine)
elif splited_line[i][0] == '03110' and splited_line[i+1][0] == '03110': # to escape duplicating in the list
pass
else:
line = '|'.join(splited_line[i])
new_list.append(line)
except IndexError:
pass
# To write the new_list to text files
with open('new_file' , 'a') as f:
for item in new_list:
print(item)
f.write(item + '\n')

Removing \n from a list of strings

Using this code...
def read_restaurants(file):
file = open('restaurants_small.txt', 'r')
contents_list = file.readlines()
for line in contents_list:
line.strip('\n')
print (contents_list)
file.close()
read_restaurants('restaurants_small.txt')
I get this result...
['Georgie Porgie\n', '87%\n', '$$$\n', 'Canadian,Pub Food\n', '\n', 'Queen St. Cafe\n', '82%\n', '$\n', 'Malaysian,Thai\n', '\n', 'Dumplings R Us\n', '71%\n', '$\n', 'Chinese\n', '\n', 'Mexican Grill\n', '85%\n', '$$\n', 'Mexican\n', '\n', 'Deep Fried Everything\n', '52%\n', '$\n', 'Pub Food\n']
I want to strip out the \n...I've read through a lot of answers on here that I thought might help, but nothing seems to work specifically with this!
I guess the for...in process needs to be stored as a new list, and I need to return that...just not sure how to do it!
A bit more of a pythonic (and, to my mind, easier to read) approach:
def read_restaurants(filename):
with open(filename) as fh:
return [line.rstrip() for line in fh]
Also, since no one has quite clarified this: the reason your original approach doesn't work is that line.strip() returns a modified version of line, but it doesn't alter line:
>>> line = 'hello there\n'
>>> print(repr(line))
'hello there\n'
>>> line.strip()
'hello there'
>>> print(repr(line))
'hello there\n']
So whenever you call stringVar.strip(), you need to do something with the output - build a list, like above, or store it in a variable, or something like that.
You can replace your regular for loop with list comprehension and you don't have to pass '\n' as an argument since strip() method removes leading and trailing white characters by default:
contents_list = [line.strip() for line in contents_list]
You are right: you will need a new list. Also, probably you want to use rstrip() instead of strip():
def read_restaurants(file_name):
file = open(file_name, 'r')
contents_list = file.readlines()
file.close()
new_contents_list = [line.rstrip('\n') for line in contents_list]
return new_contents_list
Then you can do the following:
print(read_restaurants('restaurant.list'))

print complete line which includes a search word. the eol is a dot not a line feed

I have a long text (winter's tale). Now I want search for the word 'Luzifer' and than the complete line, which includes the word 'Luzifer' should be printed. With complete line I means all between2 dots.
My scrip is printing 'Luzifer' and all following words til end of line dot. But I want have the full line.
For example. the text line is:
'Today Luzifer has a bad day. And he is ill'
My scrip is printing: 'Luzifer has a bad day.'
But I need the complete line inclusive today.
Is there a function or way to rad back ?
Here my script:
#!/usr/bin/python3.6
# coding: utf-8
import re
def suchen(regAusdruck, textdatei):
f = open(textdatei, 'r', encoding='utf-8')
rfctext = f.read()
f.close()
return re.findall(regAusdruck, rfctext)
pattern1 = r'\bLuzifer\b[^.;:!?]{2,}'
print(suchen(pattern1, "tale.txt"))
One of the most straightforward ways of handling this is to read in your entire text (hopefully it is not too big), split on '.', and then return the strings that contain your search word. For good measure, I think it will be useful to replace the newline characters with a space so that you don't have any strings broken into multiple lines.
def suchen(regAusdruck, textdatei):
with open(textdatei, 'r', encoding='utf-8') as f:
entire_text = f.read()
entire_text = entire_text.replace('\n', ' ') # replace newlines with space
sentences = entire_text.split('.')
return [sentence for sentence in sentences if regAusdruck in sentence]
# Alternatively...
# return list(filter(lambda x: regAusdruck in x, sentences))
print(suchen('Luzifer', "tale.txt"))
If you really need to use a regular expression (which may be the case for more complicated searches) a modification is only needed in the return statement.
def suchen(regAusdruck, textdatei):
with open(textdatei, 'r', encoding='utf-8') as f:
entire_text = f.read()
entire_text = entire_text.replace('\n', ' ') # replace newlines with space
sentences = entire_text.split('.')
# We assume you passed in a compiled regular expression object.
return [sentence for sentence in sentences if regAusdruck.search(sentence)]
# Alternatively...
# return list(filter(regAusdruck.search, sentences))
import re
print(suchen(re.compile(r'\bluzifer\b', flags=re.IGNORECASE), "tale.txt"))

replace a string within file using python

I am getting an odd problem. when I am trying to replace a string in a file.
The relevant line in the file is:
lattice parameter A [a.u.]
5.771452243459
and I am trying to replace it as:
with open(newsys, "r+") as finp:
for line in finp:
# print(line)
if line.startswith("lattice parameter A [a.u.]"):
line = next(finp)
print(line)
print(Alat)
line.replace(line.strip(), str(Alat))
print(line)
the last 3 print statement gives:
5.771452243459 # string that will be replaced
6.63717007997785 #value of Alat
5.771452243459 #the line after replace statement
What is going wrong here?
replace method does not modify existing string. Instead it is creating a new one. So in line
line.replace(line.strip(), str(Alat))
You are creating a completely new string and discards it (because not assign to any variable).
I would do something like:
with open(newsys, "r+") as finp:
with open('newfile', 'w') as fout:
for line in finp:
# print(line)
if line.startswith("lattice parameter A [a.u.]"):
line = next(finp)
print(line)
print(Alat)
line = line.replace(line.strip(), str(Alat))
print(line)
fout.write(line)

Resources