Merge only if two consecutives lines startwith at python and write the rest of text normally - python-3.x

Input
02000|42163,54|
03100|4|6070,00
03110|||6070,00|00|00|
00000|31751150201912001|01072000600074639|
02000|288465,76|
03100|11|9060,00
03110|||1299,00|00|
03110||||7761,00|00|
03100|29|14031,21
03110|||14031,21|00|
00000|31757328201912001|01072000601021393|
Code
prev = ''
with open('out.txt') as f:
for line in f:
if prev.startswith('03110') and line.startswith('03110'):
print(prev.strip()+ '|03100|XX|PARCELA|' + line)
prev = line
Hi, I have this code that search if two consecutives lines startswith 03110 and print those line, but I wanna transforme the code so it prints or write at .txt also the rest of the lines
Output should be like this
02000|42163,54|
03100|4|6070,00
03110|||6070,00|00|00|
00000|31751150201912001|01072000600074639|
02000|288465,76|
03100|11|9060,00
03110|||1299,00|00|3100|XX|PARCELA|03110||||7761,00|00|
03100|29|14031,21
03110|||14031,21|00|
00000|31757328201912001|01072000601021393|
I´m know that I´m getting only those two lines merged, because that is the command at print()
03110|||1299,00|00|3100|XX|PARCELA|03110||||7761,00|00|
But I don´t know to make the desire output, can anyone help me with my code?

# I assume the input is in a text file:
with open('myFile.txt', 'r') as my_file:
splited_line = [line.rstrip().split('|') for line in my_file] # this will split every line as a separate list
new_list = []
for i in range(len(splited_line)):
try:
if splited_line[i][0] == '03110' and splited_line[i-1][0] == '03110': # if the current line and the previous line start with 03110
first = '|'.join(splited_line[i-1])
second = '|'.join(splited_line[i])
newLine = first + "|03100|XX|PARCELA|"+ second
new_list.append(newLine)
elif splited_line[i][0] == '03110' and splited_line[i+1][0] == '03110': # to escape duplicating in the list
pass
else:
line = '|'.join(splited_line[i])
new_list.append(line)
except IndexError:
pass
# To write the new_list to text files
with open('new_file' , 'a') as f:
for item in new_list:
print(item)
f.write(item + '\n')

Related

How to Read Multiple Files in a Loop in Python and get count of matching words

I have two text files and 2 lists (FIRST_LIST,SCND_LIST),i want to find out count of each file matching words from FIRST_LIST,SCND_LIST individually.
FIRST_LIST =
"accessorizes","accessorizing","accessorized","accessorize"
SCND_LIST=
"accessorize","accessorized","accessorizes","accessorizing"
text File1 contains:
This is a very good question, and you have received good answers which describe interesting topics accessorized accessorize.
text File2 contains:
is more applied,using accessorize accessorized,accessorizes,accessorizing
output
File1 first list count=2
File1 second list count=0
File2 first list count=0
File2 second list count=4
This code i have tried to achive this functionality but not able to get the expected output.
if any help appreciated
import os
import glob
files=[]
for filename in glob.glob("*.txt"):
files.append(filename)
# remove Punctuations
import re
def remove_punctuation(line):
return re.sub(r'[^\w\s]', '', line)
two_files=[]
for filename in files:
for line in open(filename):
#two_files.append(remove_punctuation(line))
print(remove_punctuation(line),end='')
two_files.append(remove_punctuation(line))
FIRST_LIST = "accessorizes","accessorizing","accessorized","accessorize"
SCND_LIST="accessorize","accessorized","accessorizes","accessorizing"
c=[]
for match in FIRST_LIST:
if any(match in value for value in two_files):
#c=match+1
print (match)
c.append(match)
print(c)
len(c)
d=[]
for match in SCND_LIST:
if any(match in value for value in two_files):
#c=match+1
print (match)
d.append(match)
print(d)
len(d)
Using Counter and some list comprehension is one of many different approaches to solve your problem.
I assume, your sample output being wrong since some words are part of both lists and both files but are not counted. In addition I added a second line to the sample strings in order to show how that is working with multi-line strings which might be the typical contents of a given file.
io.StringIO objects emulate your files, but working with real files from your file system works exactly the same since both provide a file-like object or file-like interface:
from collections import Counter
list_a = ["accessorizes", "accessorizing", "accessorized", "accessorize"]
list_b = ["accessorize", "accessorized", "accessorizes", "accessorizing"]
# added a second line to each string just for the sake
file_contents_a = 'This is a very good question, and you have received good answers which describe interesting topics accessorized accessorize.\nThis is the second line in file a'
file_contents_b = 'is more applied,using accessorize accessorized,accessorizes,accessorizing\nThis is the second line in file b'
# using io.StringIO to simulate a file input (--> file-like object)
# you should use `with open(filename) as ...` for real file input
file_like_a = io.StringIO(file_contents_a)
file_like_b = io.StringIO(file_contents_b)
# read file contents and split lines into a list of strings
lines_of_file_a = file_like_a.read().splitlines()
lines_of_file_b = file_like_b.read().splitlines()
# iterate through all lines of each file (for file a here)
for line_number, line in enumerate(lines_of_file_a):
words = line.replace('.', ' ').replace(',', ' ').split(' ')
c = Counter(words)
in_list_a = sum([v for k,v in c.items() if k in list_a])
in_list_b = sum([v for k,v in c.items() if k in list_b])
print("Line {}".format(line_number))
print("- in list a {}".format(in_list_a))
print("- in list b {}".format(in_list_b))
# iterate through all lines of each file (for file b here)
for line_number, line in enumerate(lines_of_file_b):
words = line.replace('.', ' ').replace(',', ' ').split(' ')
c = Counter(words)
in_list_a = sum([v for k,v in c.items() if k in list_a])
in_list_b = sum([v for k,v in c.items() if k in list_b])
print("Line {}".format(line_number))
print("- in list a {}".format(in_list_a))
print("- in list b {}".format(in_list_b))
# actually, your two lists are the same
lists_are_equal = sorted(list_a) == sorted(list_b)
print(lists_are_equal)

reading text line by line in python 3.6

I have date.txt file where are codes
ex:
1111111111111111
2222222222222222
3333333333333333
4444444444444444
I want to check each code in website.
i tried:
with open('date.txt', 'r') as f:
data = f.readlines()
for line in data:
words = line.split()
send_keys(words)
But this copy only last line to.
I need to make a loop that will be checking line by line until check all
thanks for help
4am is to late 4my little brain..
==
edit:
slove
while lines > 0:
lines = lines - 1
with open('date.txt', 'r') as f:
data = f.readlines()
words = data[lines]
print(words)
Try this I think it will work :
line_1 = file.readline()
line_2 = file.readline()
repeat this for how many lines you would like to read.
One thing to keep in mind is if you print these lines they will all print on the same line.

Parse Text with Python

I have data like the example data below in a text file. What I would like to do is search through the text file and return everything between "SpecialStuff" and the next ";", like I've done with the example out put. I'm pretty new to python so any tips are greatly appreciated, would something like .split() work?
Example Data:
stuff:
1
1
1
23
];
otherstuff:
do something
23
4
1
];
SpecialStuff
select
numbers
,othernumbers
words
;
MoreOtherStuff
randomstuff
##123
Example Out Put:
select
numbers
,othernumbers
words
You can try this:
file = open("filename.txt", "r") # This opens the original file
output = open("result.txt", "w") # This opens a new file to write to
seenSpecialStuff = 0 # This will keep track of whether or not the 'SpecialStuff' line has been seen.
for line in file:
if ";" in line:
seenSpecialStuff = 0 # Set tracker to 0 if it sees a semicolon.
if seenSpecialStuff == 1:
output.write(line) # Print if tracker is active
if "SpecialStuff" in line:
seenSpecialStuff = 1 # Set tracker to 1 when SpecialStuff is seen
This returns a file named result.txt that contains:
select
numbers
,othernumbers
words
This code can be improved! Since this is likely a homework assignment, you'll probably want to do more research about how to make this more efficient. Hopefully it can be a useful starting ground for you!
Cheers!
EDIT
If you wanted the code to specifically read the line "SpecialStuff" (instead of lines containing "SpecialStuff"), you could easily change the "if" statements to make them more specific:
file = open("my.txt", "r")
output = open("result.txt", "w")
seenSpecialStuff = 0
for line in file:
if line.replace("\n", "") == ";":
seenSpecialStuff = 0
if seenSpecialStuff == 1:
output.write(line)
if line.replace("\n", "") == "SpecialStuff":
seenSpecialStuff = 1
with open('path/to/input') as infile, open('path/to/output', 'w') as outfile: # open the input and output files
wanted = False # do we want the current line in the output?
for line in infile:
if line.strip() == "SpecialStuff": # marks the begining of a wanted block
wanted = True
continue
if line.strip() == ";" and wanted: # marks the end of a wanted block
wanted = False
continue
if wanted: outfile.write(line)
Don't use str.split() for that - str.find() is more than enough:
parsed = None
with open("example.dat", "r") as f:
data = f.read() # load the file into memory for convinience
start_index = data.find("SpecialStuff") # find the beginning of your block
if start_index != -1:
end_index = data.find(";", start_index) # find the end of the block
if end_index != -1:
parsed = data[start_index + 12:end_index] # grab everything in between
if parsed is None:
print("`SpecialStuff` Block not found")
else:
print(parsed)
Keep in mind that this will capture everything between those two, including new lines and other whitespace - you can additionally do parsed.strip() to remove leading and trailing whitespaces if you don't want them.

How to print a file containing a list

So basically i have a list in a file and i only want to print the line containing an A
Here is a small part of the list
E5341,21/09/2015,C102,440,E,0
E5342,21/09/2015,C103,290,A,290
E5343,21/09/2015,C104,730,N,0
E5344,22/09/2015,C105,180,A,180
E5345,22/09/2015,C106,815,A,400
So i only want to print the line containing A
Sorry im still new at python,
i gave a try using one "print" to print the whole line but ended up failing guess i will always suck at python
You just have to:
open file
read lines
for each line, split at ","
for each line, if the 5th part of the splitted str is equal to "A", print line
Code:
filepath = 'file.txt'
with open(filepath, 'r') as f:
lines = f.readlines()
for line in lines:
if line.split(',')[4] == "A":
print(line)

How to remove '#' comments from a string?

The problem:
Implement a Python function called stripComments(code) where code is a parameter that takes a string containing the Python code. The function stripComments() returns the code with all comments removed.
I have:
def stripComments(code):
code = str(code)
for line in code:
comments = [word[1:] for word in code.split() if word[0] == '#']
del(comments)
stripComments(code)
I'm not sure how to specifically tell python to search through each line of the string and when it finds a hashtag, to delete the rest of the line.
Please help. :(
You could achieve this through re.sub function.
import re
def stripComments(code):
code = str(code)
return re.sub(r'(?m)^ *#.*\n?', '', code)
print(stripComments("""#foo bar
bar foo
# buz"""))
(?m) enables the multiline mode. ^ asserts that we are at the start. <space>*# matches the character # at the start with or without preceding spaces. .* matches all the following characters except line breaks. Replacing those matched characters with empty string will give you the string with comment lines deleted.
def remove_comments(filename1, filename2):
""" Remove all comments beginning with # from filename1 and writes
the result to filename2
"""
with open(filename1, 'r') as f:
lines = f.readlines()
with open(filename2, 'w') as f:
for line in lines:
# Keep the Shebang line
if line[0:2] == "#!":
f.writelines(line)
# Also keep existing empty lines
elif not line.strip():
f.writelines(line)
# But remove comments from other lines
else:
line = line.split('#')
stripped_string = line[0].rstrip()
# Write the line only if the comment was after the code.
# Discard lines that only contain comments.
if stripped_string:
f.writelines(stripped_string)
f.writelines('\n')
For my future reference.
def remove_comments(lines: list[str]) -> list[str]:
new_lines = []
for line in lines:
if line.startswith("#"): # Deal with comment as the first character
continue
line = line.split(" #")[0]
if line.strip() != "":
new_lines.append(line)
return new_lines
print(remove_comments("Hello #World!\n\nI have a question # that #".split('\n')))
>>> ['Hello', 'I have a question']
This implementation has benefit of not requiring the re module and being easy to understand. It also removes pre-existing blank lines, which is useful for my use case.

Resources