Recognize a string in a txt from two columns - python-3.x

I want my program to read lines in a txt and to recognize a string using two columns, i tried with for row in column1 and column2 but it isn't working and i don't really know why, here is the code.(Here i want to print the 5 first letters when it recognize the string, but later i will put those five letters in a list).
import pandas as pd
import re
import numpy as np
link = "excelfilett.txt"
file = open(link, "r")
frames_load = []
is_count_frames_load = False
for line in file:
if "[Interface1]" in line:
is_count_frames_load = True
if is_count_frames_load== True:
frames_load.append(line)
if "[EthernetComNeed]" in line:
break
number_of_rows_load = len(frames_load) -1
header_load = re.split(r'\t', frames_load[0])
number_of_columns_load = len(header_load)
frame_array_load = np.full((number_of_rows_load, number_of_columns_load), 0)
df_frame_array_load = pd.DataFrame(frame_array_load)
df_frame_array_load.columns= header_load
for row in range(number_of_rows_load):
frame_row_load = re.split(r'\t', frames_load[row])
for position in range(len(frame_row_load)):
df_frame_array_load.iloc[row, position] = frame_row_load[position]
print(df_frame_array_load)
df_frame_array_load["[Name]"] = df_frame_array_load["[End1]"] + '\t' + df_frame_array_load["[End2]"]
df_frame_array_load["[Name2]"] = df_frame_array_load["[End2]"] + '\t' + df_frame_array_load["[End1]"]
print(df_frame_array_load["[Name]"])
print(df_frame_array_load["[Name2]"])
link = "excelfilett.txt"
file = open(link, "r")
frames_path = []
is_count_frames_path = False
for line in file:
if "[Routing Paths]" in line:
is_count_frames_path = True
if is_count_frames_path== True:
for row in df_frame_array_load["[Name]"] and df_frame_array_load["[Name2]"]:
if row in line:
print(row)
print(line[0:4])
if "[EthernetComNeed]" in line:
break
if "[EthernetComConfig]" in line:
break
What I want as output is to print the 5 first letters in the lines of the txt. I'm using when it recognize a string, for example, when "S1\tS2" is in the line of the txt, it will print me the 5 first letters, so "FL_1", the two columns contains string as "S1\tS2" and the inverse (like "S2\tS1"), it is the point of the line where I have an issue, it gives me
ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().
in the line "for row in column1 and column2:".

I think it isn't possible to read rows on two columns at one time, i just make my program read the two columns separately, here is what i changed:
link = "excelfilett.txt"
file = open(link, "r")
frames_path = []
is_count_frames_path = False
for line in file:
if "[Routing Paths]" in line:
is_count_frames_path = True
if is_count_frames_path== True:
for row in df_frame_array_load["[Name]"]:
if row in line:
print(row)
print(line[0:4])
if "[EthernetComNeed]" in line:
break
for row in df_frame_array_load["[Name2]"]:
if row in line:
print(row)
print(line[0:4])
if "[EthernetComConfig]" in line:
break

Related

Import to Python a specific format file line per line

How can I Import this file which contains plain text with numbers?
It's difficult to import because the first line contains 7 numbers and the second line contains 8 numbers...
In general:
LINE 1: 7 numbers.
LINE 2: 8 numbers.
LINE 3: 7 numbers.
LINE 4: 8 numbers.
... and so on
I just had tried to read but cannot import it. I need to save the data in a NumPy array.
filepath = 'CHALLENGE.001'
with open(filepath) as fp:
line = fp.readline()
cnt = 1
while line:
print("Line {}: {}".format(cnt, line.strip()))
line = fp.readline()
cnt += 1
LINK TO DATA
This file contains information for each frequency has is explained below:
You'll have to skip the blank lines when reading as well.
Just check if the first line is blank. If it isn't, read 3 more lines.
Rinse and repeat.
Here's an example of both a numpy array and a pandas dataframe.
import pandas as pd
import numpy as np
filepath = 'CHALLENGE.001'
data = []
headers = ['frequency in Hz',
'ExHy coherency',
'ExHy scalar apparent resistivity',
'ExHy scalar phase',
'EyHz coherency',
'EyHx scalar apparent resistivity',
'EyHx scalar phase',
're Zxx/√(µo)',
'im Zxx/√(µo)',
're Zxy/√(µo)',
'im Zxy/√(µo)',
're Zyx/√(µo)',
'im Zyx/√(µo)',
're Zyy/√(µo)',
'im Zyy/√(µo)',
]
with open(filepath) as fp:
while True:
line = fp.readline()
if not len(line):
break
fp.readline()
line2 = fp.readline()
fp.readline()
combined = line.strip().split() + line2.strip().split()
data.append(combined)
df = pd.DataFrame(data, columns=headers).astype('float')
array = np.array(data).astype(np.float)
# example of type
print(type(df['frequency in Hz'][0]))

Print the last occurrence of a word in a TXT in Python

I'm working on extracting data from a .txt file, and I want to pick up the last occurrence of a certain word in the whole file. In this case, I get three occurrences of the words D / DÑA and Tfno but I want only the last one of each one and print it.
def extract(in_filename):
if not os.path.exists(in_filename):
print("ERROR: Input file does not exist ❌ ")
sys.exit()
with open(in_filename, 'r') as file:
rows = file.readlines()
for row in rows:
if re.match('D/DÑA', row):
row_parse = row.strip()
print(row_parse)
elif re.match('Tfno', row):
row_parse = row.strip()
print(row_parse)
extract('apd29.txt')
The output is:
D/DÑA: PRUEBA PRUEBITA
Tfno: 666666666
D/DÑA: PRUEBA PRUEBITA
Tfno: 666666666
D/DÑA: PRUEBA PRUEBITA <-- I need this
Tfno: 666666666 <-- I need this
I expect the output:
D/DÑA: PRUEBA PRUEBITA
Tfno: 666666666
Use reversed
Ex:
def extract(in_filename):
if not os.path.exists(in_filename):
print("ERROR: Input file does not exist ❌ ")
sys.exit()
with open(in_filename, 'r') as file:
rows = file.readlines()
for row in reversed(rows): #!Update
if re.match(r'D/DÑA', row):
row_parse = row.strip()
print(row_parse)
break
extract('apd29.txt')
Assigning a variable outside of for loop would work in this situation.
result = "not found"
with open(in_filename, 'r') as file:
rows = file.readlines()
for row in rows:
if re.match('D/DÑA', row):
row_strip = row.strip()
row_parse = row_strip.rfind('D/DÑA')
result = row_parse
print(result)

Set list of str and set name of list automaticly

i am encountering an issue with my program, i would like the program to recognize string from a txt using two columns, then to put the strings of the txt in a list with the name of rows in the two columns, i don't know how to create list automatically now, but i still want to make a program that everything he recognize in the txt is put in a list.
link = "excelfilett.txt"
file = open(link, "r")
frames_path = []
is_count_frames_path = False
for line in file:
if "[Routing Paths]" in line:
is_count_frames_path = True
if is_count_frames_path== True:
for row in df_frame_array_load["[Name]"]:
if row in line:
a = line[0:4]
a = list(a)
if "[EthernetComNeed]" in line:
break
for row in df_frame_array_load["[Name2]"]:
if row in line:
a = line[0:4]
a = list(a)
if "[EthernetComNeed]" in line:
break
if "[EthernetComConfig]" in line:
break
print(a)
But it only gives me this:
['F', 'L', '_', '9']
While i want something like:
['FL_1', 'FL_1', 'FL_2', 'FL_9']
Any ideas on how to do this?

AttributeError: 'Series' object has no attribute 'rows'

i want my program to print the five first characters when he recognize a string, made of the addition of two columns (from a dataframe made with pandas), in some lines of a .txt, but as it is said in the title, it gives me this error when i run the code. Here is the code (the important lines are in the end of the code, i just put everything if you want to see the whole code).
import pandas as pd
import re
import numpy as np
link = "excelfilett.txt"
file = open(link, "r")
frames_load = []
is_count_frames_load = False
for line in file:
if "[Interface1]" in line:
is_count_frames_load = True
if is_count_frames_load== True:
frames_load.append(line)
if "[EthernetComNeed]" in line:
break
number_of_rows_load = len(frames_load) -1
header_load = re.split(r'\t', frames_load[0])
number_of_columns_load = len(header_load)
frame_array_load = np.full((number_of_rows_load, number_of_columns_load), 0)
df_frame_array_load = pd.DataFrame(frame_array_load)
df_frame_array_load.columns= header_load
for row in range(number_of_rows_load):
frame_row_load = re.split(r'\t', frames_load[row])
for position in range(len(frame_row_load))
df_frame_array_load["[Name]"] = df_frame_array_load["[End1]"] + " " + df_frame_array_load["[End2]"]
link = "excelfilett.txt"
file = open(link, "r")
frames_path = []
is_count_frames_path = False
for line in file:
if "[Routing Paths]" in line:
is_count_frames_path = True
if is_count_frames_path== True:
for row in df_frame_array_load["[Name]"].rows:
if row in line:
print(line[0:4])
if "[EthernetComConfig]" in line:
break
It gives me the AttributeError on "for row in df_frame_array_load["[Name]"].rows:" and it shoudln't be a version error, what is the problem then? I don't understand.
for row in df_frame_array_load["[Name]"].rows:
because pandas Series object does not have a "rows" attribute, as you for perform a perform a loop operation in a Series you are iterating over it.
should be changed to just:
for row in df_frame_array_load["[Name]"]:
...

How can I simplify and format this function?

So I have this messy code where I wanted to get every word from frankenstein.txt, sort them alphabetically, eliminated one and two letter words, and write them into a new file.
def Dictionary():
d = []
count = 0
bad_char = '~!##$%^&*()_+{}|:"<>?\`1234567890-=[]\;\',./ '
replace = ' '*len(bad_char)
table = str.maketrans(bad_char, replace)
infile = open('frankenstein.txt', 'r')
for line in infile:
line = line.translate(table)
for word in line.split():
if len(word) > 2:
d.append(word)
count += 1
infile.close()
file = open('dictionary.txt', 'w')
file.write(str(set(d)))
file.close()
Dictionary()
How can I simplify it and make it more readable and also how can I make the words write vertically in the new file (it writes in a horizontal list):
abbey
abhorred
about
etc....
A few improvements below:
from string import digits, punctuation
def create_dictionary():
words = set()
bad_char = digits + punctuation + '...' # may need more characters
replace = ' ' * len(bad_char)
table = str.maketrans(bad_char, replace)
with open('frankenstein.txt') as infile:
for line in infile:
line = line.strip().translate(table)
for word in line.split():
if len(word) > 2:
words.add(word)
with open('dictionary.txt', 'w') as outfile:
outfile.writelines(sorted(words)) # note 'lines'
A few notes:
follow the style guide
string contains constants you can use to provide the "bad characters";
you never used count (which was just len(d) anyway);
use the with context manager for file handling; and
using a set from the start prevents duplicates, but they aren't ordered (hence sorted).
Using re module.
import re
words = set()
with open('frankenstein.txt') as infile:
for line in infile:
words.extend([x for x in re.split(r'[^A-Za-z]*', line) if len(x) > 2])
with open('dictionary.txt', 'w') as outfile:
outfile.writelines(sorted(words))
From r'[^A-Za-z]*' in re.split, replace 'A-Za-z' with the characters which you want to include in dictionary.txt.

Resources