Converting a textfile to a dictionary - python-3.x

My file:
Goal: to read a txt file and return dictionaries
What I have:
def load_snacks(snack_file: TextIO) -> Tuple[Dict[str, List[str]],
Dict[str, List[str]]]:
"""Return a two-item tuple containing a "healthysnack_to_junkfood" dictionary
and a "healthysnack_to_healthysnack" dictionary with the data from snack_file.
"""
snack_H2J = {}
snack_H2H = {}
line = snack_file.readline().strip()
while line != '':
# due to structure of the file, line contains a healthy snack name
healthysnack_name = line
# properly format the 1st healthy snack name, use helper fcn (see below for helper fcn)
flip_name_and_del_comma(healthysnack_name)
healthy_list = []
junk_list = []
line = snack_file.readline().strip()
while line != '\n':
if ',' in line:
snack_H2J[healthysnack_name] = line
a = flip_name_and_del_comma(line)
healthy_list.append(a)
else:
snack_H2H[healthysnack_name] = line
junk_list.append(line)
line = snack_file.readline().strip()
return (snack_H2J, snack_H2H)
Below is my helperfcn; I have verified that this works
def flip_name_and_del_comma(s: str) -> str:
""" Retrun a new str that reverses the format name order from 'colour, healthy snack name' to
'healthy snack name to colour'
>>> flip_name_and_del_comma('orange, carrot')
'carrot orange'
>>> flip_name_and_del_comma('yellow, mango')
'mango yellow'
"""
s_reversed = ', '.join(reversed(s.split(', ')))
s_comma_delete = s_reversed.replace(', ', ' ')
return s_comma_delete

Related

Read data from txt file, store it, use it for analyzing, write it to the txt file

The task is to read from given txt file the data add the numbers in there to the list[], so that every number in a row will be a element/object in this list. After reading the file created list will be sent to the main().
this list with the objects will be parameters for the def Analyze part in where at the same time
will be found min, max, average and sum.
def lueTiedosto(data):
Tiedosto = open("L07T4D1.txt", 'r', encoding="UTF-8")
Rivi = Tiedosto.readline()
while (len(Rivi) > 0):
data.append(int(Rivi))
Rivi = Tiedosto.readline()
for element in data:
print(element)
print(f"Tiedosto L07T4D1.txt luettu.")
Tiedosto.close()
return element
The fixed code which works:
def lueTiedosto(data):
Lue = input("Luettavan tiedoston nimi on ''.\n")
print(f"Anna uusi nimi, enter säilyttää nykyisen: ", end='')
Tiedosto = open(Lue, 'r', encoding="UTF-8")
Rivi = Tiedosto.readline()
while (len(Rivi) > 0):
data.append(int(Rivi))
Rivi = Tiedosto.readline()
print(f"Tiedosto '{Lue}' luettu.")
Tiedosto.close()
return data
Making an assumption that your input file is similar to the following:
10000
12345
10008
12000
I would do the following:
filepath = r".......\L07T4D1.txt" # Path to file being loaded
def readData(filepath: str) -> list[int]:
# Returns a list of integers from file
rslt = []
with open (filepath, 'r') as f:
data = f.readline().strip()
while data:
data = data.split(' ')
rslt.append(int(data[0]))
data = f.readline().strip()
return rslt
def analyze(data: list[int]) -> None:
# prints results of data analysis
print(f'Max Value = {max(data)}')
print(f'Min Value = {min(data)}')
print(f'Sum Value = {sum(data)}')
print(f'Avg Value = {sum(data)/len(data)}')
Running analyze(readData(filepath)) Yields:
Max Value = 12345
Min Value = 10000
Sum Value = 44353
Avg Value = 11088.25

fetch a string in file & get all lines containing the string along with line numbers

Also is it the correct code which works to fetch a string in file & get all lines containing the string along with line numbers
am getting syntax error in line 1 for the code,
def matched_lines('sam.txt', string_to_search):
matched_lines = search_string_in_file('sam.txt','is')
"""Search for the given string in file and return lines containing that string,
along with line numbers"""
line_number = 0
list_of_results = []
# Open the file in read only mode
with open('sam.txt', 'r') as matched_lines:
print('Total Matched lines : ', len(matched_lines))
# Read all lines in the file one by one
for elem in matched_lines:
print('Line Number = ', elem[0], ' :: Line = ', elem[1])
please help me.
Is this result needed?
def matched_lines(filename, string_to_search):
list_of_results = []
with open(filename, encoding='utf8')as matched_lines:
for elem in enumerate(matched_lines.read().split('\n')):
if string_to_search in elem[1]:
list_of_results.append(elem)
return list_of_results
result = matched_lines('some.txt', 'lorem')
print('Total Matched lines :', len(result))
print(result)

PdfMiner: Erro processing the page literal required: /b'begin'

I am trying to read .pdf file using python3 with package called pdfminer which I have done successfully but for some of the page in .pdf file while reading the page using interpreter.process_page in getAllPages() of the following code I am getting an errors as follows:
error processing the page literal required: /b'begin'.
error processing the page Unknown operator: 'Qq'.
This is happening only for few docs but not able to find out what is the problem , in which case this could happen?
Code:-
class PDFDoc():
def __init__(self):
self.rsrcmgr = PDFResourceManager()
self.laparams = LAParams()
self.device = PDFPageDetailedAggregator(self.rsrcmgr, laparams=self.laparams)
self.interpreter = PDFPageInterpreter(self.rsrcmgr, self.device)
self.doc_values = []
self.total_no_of_pages = 0
self.doc_page_dict = collections.OrderedDict()
# self.doc = None
"""
Read PDF Document
"""
def readDoc(self, doc_name):
fp = open(doc_name, 'rb')
self.parser = PDFParser(fp)
self.doc = PDFDocument(self.parser)
"""
Read all pages in the document and saved in List of tuples format.
It contains the text and their coordinate info along with page number
"""
def getAllPages(self):
for page in PDFPage.create_pages(self.doc):
self.interpreter.process_page(page)
# receive the LTPage object for this page
self.device.get_result()
self.doc_values = self.device.rows
"""
Get the total number of pages
"""
def getTotalPages(self):
self.total_no_of_pages = max(self.doc_page_dict)+1
"""
Convert the document info into Page-wise dict. {Key:Value}-->{Page no:[Page text, coordinates]}
"""
def getPageDict(self):
for i in range(len(self.doc_values)):
left = self.doc_values[i][1]
bottom = self.doc_values[i][2]
content = self.doc_values[i][-1]
if self.doc_page_dict.get(self.doc_values[i][0]):
self.doc_page_dict[self.doc_values[i][0]].append({'left':left, 'bottom':bottom, 'content':content})
else:
self.doc_page_dict[self.doc_values[i][0]]=[{'left':left, 'bottom':bottom, 'content':content}]
"""
Align the page text in case they are misaligned
"""
def create_page_table_modified(self, pagedict_list):
# ##print(pagedict_list)
page_dict = collections.OrderedDict()
page_table_1 = []
page_table = []
exc_arr = []
count = 0
for line in pagedict_list:
row = []
temp_key = float(line['bottom'])
if not line in exc_arr and line["content"]:
row.append(line)
exc_arr.append(line)
for line_1 in pagedict_list:
if not line_1 in exc_arr and line_1["content"]:
# #print('last_top:', last_top, each_dict_adjusted['bottom'])
if abs(int(line["bottom"]) - int(line_1["bottom"])) <= 6:
row.append(line_1)
exc_arr.append(line_1)
if row:
page_dict[temp_key] = row
page_table.append(row)
count += 1
# ##print("\n\nPage:",page_table)
page_dict_keys = sorted(page_dict, reverse=True)
for i in page_dict_keys:
# i = sorted(i, key=lambda k: k['left'])
page_table_1.append(page_dict[i])
return page_table_1
"""
Sort the line elements based on its position coordinates
"""
def sortRowElements(self,row_list):
return sorted(row_list, key=lambda k:k['left'])
"""
Combine line elements to form the line text
"""
def combineText(self, row):
temp_ = []
# for i in range(len(row)):
text = [k['content'] for k in row]
temp_.append(' '.join(text))
return ' '.join(temp_)
"""
To call aligning and sorting functions
"""
def sortText(self):
for page in self.doc_page_dict:
self.doc_page_dict[page] = self.create_page_table_modified(self.doc_page_dict[page])
self.doc_page_dict[page] = [self.sortRowElements(line) for line in self.doc_page_dict[page]]
"""
To get text from particular page of the document --> List of line text
"""
def pageText(self, page_no):
page_text = [self.combineText(line) for line in self.doc_page_dict[page_no]]
return page_text
read_document = PDFDoc()

Never resets list

I am trying to create a calorie counter the standard input goes like this:
python3 calories.txt < test.txt
Inside calories the food is the following format: apples 500
The problem I am having is that whenever I calculate the values for the person it seems to never return to an empty list..
import sys
food = {}
eaten = {}
finished = {}
total = 0
#mappings
def calories(x):
with open(x,"r") as file:
for line in file:
lines = line.strip().split()
key = " ".join(lines[0:-1])
value = lines[-1]
food[key] = value
def calculate(x):
a = []
for keys,values in x.items():
for c in values:
try:
a.append(int(food[c]))
except:
a.append(100)
print("before",a)
a = []
total = sum(a) # Problem here
print("after",a)
print(total)
def main():
calories(sys.argv[1])
for line in sys.stdin:
lines = line.strip().split(',')
for c in lines:
values = lines[0]
keys = lines[1:]
eaten[values] = keys
calculate(eaten)
if __name__ == '__main__':
main()
Edit - forgot to include what test.txt would look like:
joe,almonds,almonds,blue cheese,cabbage,mayonnaise,cherry pie,cola
mary,apple pie,avocado,broccoli,butter,danish pastry,lettuce,apple
sandy,zuchini,yogurt,veal,tuna,taco,pumpkin pie,macadamia nuts,brazil nuts
trudy,waffles,waffles,waffles,chicken noodle soup,chocolate chip cookie
How to make it easier on yourself:
When reading the calories-data, convert the calories to int() asap, no need to do it every time you want to sum up somthing that way.
Dictionary has a .get(key, defaultvalue) accessor, so if food not found, use 100 as default is a 1-liner w/o try: ... except:
This works for me, not using sys.stdin but supplying the second file as file as well instead of piping it into the program using <.
I modified some parsings to remove whitespaces and return a [(name,cal),...] tuplelist from calc.
May it help you to fix it to your liking:
def calories(x):
with open(x,"r") as file:
for line in file:
lines = line.strip().split()
key = " ".join(lines[0:-1])
value = lines[-1].strip() # ensure no whitespaces in
food[key] = int(value)
def getCal(foodlist, defValueUnknown = 100):
"""Get sum / total calories of a list of ingredients, unknown cost 100."""
return sum( food.get(x,defValueUnknown ) for x in foodlist) # calculate it, if unknown assume 100
def calculate(x):
a = []
for name,foods in x.items():
a.append((name, getCal(foods))) # append as tuple to list for all names/foods eaten
return a
def main():
calories(sys.argv[1])
with open(sys.argv[2]) as f: # parse as file, not piped in via sys.stdin
for line in f:
lines = line.strip().split(',')
for c in lines:
values = lines[0].strip()
keys = [x.strip() for x in lines[1:]] # ensure no whitespaces in
eaten[values] = keys
calced = calculate(eaten) # calculate after all are read into the dict
print (calced)
Output:
[('joe', 1400), ('mary', 1400), ('sandy', 1600), ('trudy', 1000)]
Using sys.stdin and piping just lead to my console blinking and waiting for manual input - maybe VS related...

Python: print dictionary keys and values individually

I'm wondering: how do you print the keys or the values individually from a dictionary in a function?
Example .txt file
00000000;Pikachu Muchacho;region1
11111111;SoSo good;region2
22222222;Marshaw williams;region3
33333333;larry Mikal Carter;region3
Code
test_file = open("test.txt", "r")
customer = {}
def dictionary():
for line in test_file:
entries = line.split(";")
key = entries[0]
values = entries[1]
customer[key] = values
def test():
print(customer)
print(customer[key])
def main():
dictionary()
test()
main()
As #jamesRH commented, you can use customer.keys() and customer.values():
test_file = open("test.txt", "r")
customer = {}
def dictionary():
for line in test_file:
entries = line.split(";")
key = entries[0]
values = entries[1]
customer[key] = values
def test():
# Print all the keys in customer
print(customer.keys())
# Print all the values in customer
print(customer.values())
def main():
dictionary()
test()
main()
This gives the output:
['00000000', '22222222', '33333333', '11111111']
['Pikachu Muchacho', 'Marshaw williams', 'larry Mikal Carter', 'SoSo good']
Your original code causes an error because key is not within the scope of test().

Resources