How to avoid repetion with my code - python-3.x

I've written a code that extracts all the words from two files, and only returns the words that are in both of the file.
However, i have done some repetition and that is not considered a good style, so i wondering if it would be possible to avoid this with my code?
import re
def print_common_words(filename_1, filename_2):
try:
input_file = open(filename_1, 'r')
source_string = input_file.read().lower()
input_file.close()
all_words1 = set(re.findall('[a-zA-Z]+', source_string))
input_file = open(filename_2, 'r') #Repetition
source_string = input_file.read().lower() #Repetition
input_file.close() #Repetition
all_words2 = set(re.findall('[a-zA-Z]+', source_string)) #Repetition
intersection_list = all_words1.intersection(all_words2)
union_list = []
for word in intersection_list:
union_list += [word]
union_list.sort()
for i in union_list:
print(i)
except FileNotFoundError:
print("A file could not be found.")

Use a method to factor out the duplicated code.
def get_file(file):
input_file = open(file, 'r')
source_string = input_file.read().lower()
input_file.close()
return set(re.findall('[a-zA-Z]+', source_string))
Call it like:
all_words1 = get_file(filename_1)
all_words2 = get_file(filename_2)
Eg:
all_words1 = get_file(filename_1)
all_words2 = get_file(filename_2)
intersection_list = all_words1.intersection(all_words2)
union_list = []
for word in intersection_list:
union_list += [word]
union_list.sort()
for i in union_list:
print(i)

Related

How do I change a text dictionary from file into usable dictionary

Right so, I need to make this function that basically saves a player's username in a dictionary which is next saved in a text file to be reused again.
The problem is on reusing it I can't manage to get the str that I get from the file into a dictionary.
Here is my code:
from ast import eval
def verification(j, d):
if j in d.keys():
return d
else:
d[j] = [0,0]
return d
savefile = open("save.txt", "r")
'''d = dict()
for line in savefile:
(key, val) = line.split(".")
d[key] = val
print(d)'''
d = savefile.read()
python_dict = literal_eval(d)
savefile.close()
j = input("name? ")
result = verification(j, python_dict)
savefile = open("save.txt", "w")
'''for i in result:
text = i + "." + str(result[i]) + " \n"
savefile.write(text)'''
savefile.write(str(result))
savefile.close()
As you can see I tried with the literal_eval from ast. I also tried to do a .split() but that wouldn't work. So I'm stuck. Any ideas? It would be of great help.
Thanks
There is no need to do your own encoding/decoding from scratch when you have existing libraries to do it for you.
One good example is JSON which is also not Python exclusive so the database you create can be used by other applications.
This can be done easily by:
import json
def verification(j, d):
if j not in d:
d[j] = [0,0]
return d
with open("save.txt", "r") as savefile:
python_dict = json.load(savefile)
j = input("name? ")
result = verification(j, python_dict)
with open("save.txt", "w") as savefile:
json.dump(result, savefile)

How do I append a text changing the number format?

I'm getting number from a HTML, some of them are %, 4 digits and 7 digits (37.89%, 3.464, 2,193.813). I would like to save just the numbers, not the percentages, without the thousand separators (".").
list_of_rows = []
for row in table.findAll('div', attrs={'class': 'quadrado'}):
list_of_cells = []
for cell in row.findAll('span', attrs={'class': 'circulo'}):
text = cell.text
# print(text)
for cell_index in row.findAll('span', attrs={'class': 'triangulo'}):
text_index = cell_index.text
list_of_cells_index = [text, text_index]
list_of_cells_index_clean = ','.join(list_of_cells_index) # remove brackets and ''
# print(list_of_cells_index_clean)
list_of_cells.append(list_of_cells_index_clean)
list_of_rows.append(list_of_cells)
outfile = open("./list.csv", "a")
writer = csv.writer(outfile, lineterminator = '\n')
writer.writerows(list_of_rows)
I would like to get:
37.89%, 3464, 2193,813.
How can I do it?
I don't know all your input parameters, but this works for the ones that you provided.
s = ('37.89%', '3.464', '2,193.813')
for item in s:
remove_comma = item.replace(',', '')
keep_percentage = re.findall(r'\d{1,4}\.\d{1,4}%', remove_comma)
if keep_percentage:
keep_percentage = ''.join(keep_percentage)
print (keep_percentage)
else:
if (len(remove_comma)) == 5:
print (remove_comma.replace('.', ''))
else:
print (remove_comma.replace('.', ','))
**OUTPUTS**
37.89%
3464
2193,813

Never resets list

I am trying to create a calorie counter the standard input goes like this:
python3 calories.txt < test.txt
Inside calories the food is the following format: apples 500
The problem I am having is that whenever I calculate the values for the person it seems to never return to an empty list..
import sys
food = {}
eaten = {}
finished = {}
total = 0
#mappings
def calories(x):
with open(x,"r") as file:
for line in file:
lines = line.strip().split()
key = " ".join(lines[0:-1])
value = lines[-1]
food[key] = value
def calculate(x):
a = []
for keys,values in x.items():
for c in values:
try:
a.append(int(food[c]))
except:
a.append(100)
print("before",a)
a = []
total = sum(a) # Problem here
print("after",a)
print(total)
def main():
calories(sys.argv[1])
for line in sys.stdin:
lines = line.strip().split(',')
for c in lines:
values = lines[0]
keys = lines[1:]
eaten[values] = keys
calculate(eaten)
if __name__ == '__main__':
main()
Edit - forgot to include what test.txt would look like:
joe,almonds,almonds,blue cheese,cabbage,mayonnaise,cherry pie,cola
mary,apple pie,avocado,broccoli,butter,danish pastry,lettuce,apple
sandy,zuchini,yogurt,veal,tuna,taco,pumpkin pie,macadamia nuts,brazil nuts
trudy,waffles,waffles,waffles,chicken noodle soup,chocolate chip cookie
How to make it easier on yourself:
When reading the calories-data, convert the calories to int() asap, no need to do it every time you want to sum up somthing that way.
Dictionary has a .get(key, defaultvalue) accessor, so if food not found, use 100 as default is a 1-liner w/o try: ... except:
This works for me, not using sys.stdin but supplying the second file as file as well instead of piping it into the program using <.
I modified some parsings to remove whitespaces and return a [(name,cal),...] tuplelist from calc.
May it help you to fix it to your liking:
def calories(x):
with open(x,"r") as file:
for line in file:
lines = line.strip().split()
key = " ".join(lines[0:-1])
value = lines[-1].strip() # ensure no whitespaces in
food[key] = int(value)
def getCal(foodlist, defValueUnknown = 100):
"""Get sum / total calories of a list of ingredients, unknown cost 100."""
return sum( food.get(x,defValueUnknown ) for x in foodlist) # calculate it, if unknown assume 100
def calculate(x):
a = []
for name,foods in x.items():
a.append((name, getCal(foods))) # append as tuple to list for all names/foods eaten
return a
def main():
calories(sys.argv[1])
with open(sys.argv[2]) as f: # parse as file, not piped in via sys.stdin
for line in f:
lines = line.strip().split(',')
for c in lines:
values = lines[0].strip()
keys = [x.strip() for x in lines[1:]] # ensure no whitespaces in
eaten[values] = keys
calced = calculate(eaten) # calculate after all are read into the dict
print (calced)
Output:
[('joe', 1400), ('mary', 1400), ('sandy', 1600), ('trudy', 1000)]
Using sys.stdin and piping just lead to my console blinking and waiting for manual input - maybe VS related...

Python: print dictionary keys and values individually

I'm wondering: how do you print the keys or the values individually from a dictionary in a function?
Example .txt file
00000000;Pikachu Muchacho;region1
11111111;SoSo good;region2
22222222;Marshaw williams;region3
33333333;larry Mikal Carter;region3
Code
test_file = open("test.txt", "r")
customer = {}
def dictionary():
for line in test_file:
entries = line.split(";")
key = entries[0]
values = entries[1]
customer[key] = values
def test():
print(customer)
print(customer[key])
def main():
dictionary()
test()
main()
As #jamesRH commented, you can use customer.keys() and customer.values():
test_file = open("test.txt", "r")
customer = {}
def dictionary():
for line in test_file:
entries = line.split(";")
key = entries[0]
values = entries[1]
customer[key] = values
def test():
# Print all the keys in customer
print(customer.keys())
# Print all the values in customer
print(customer.values())
def main():
dictionary()
test()
main()
This gives the output:
['00000000', '22222222', '33333333', '11111111']
['Pikachu Muchacho', 'Marshaw williams', 'larry Mikal Carter', 'SoSo good']
Your original code causes an error because key is not within the scope of test().

How can I simplify and format this function?

So I have this messy code where I wanted to get every word from frankenstein.txt, sort them alphabetically, eliminated one and two letter words, and write them into a new file.
def Dictionary():
d = []
count = 0
bad_char = '~!##$%^&*()_+{}|:"<>?\`1234567890-=[]\;\',./ '
replace = ' '*len(bad_char)
table = str.maketrans(bad_char, replace)
infile = open('frankenstein.txt', 'r')
for line in infile:
line = line.translate(table)
for word in line.split():
if len(word) > 2:
d.append(word)
count += 1
infile.close()
file = open('dictionary.txt', 'w')
file.write(str(set(d)))
file.close()
Dictionary()
How can I simplify it and make it more readable and also how can I make the words write vertically in the new file (it writes in a horizontal list):
abbey
abhorred
about
etc....
A few improvements below:
from string import digits, punctuation
def create_dictionary():
words = set()
bad_char = digits + punctuation + '...' # may need more characters
replace = ' ' * len(bad_char)
table = str.maketrans(bad_char, replace)
with open('frankenstein.txt') as infile:
for line in infile:
line = line.strip().translate(table)
for word in line.split():
if len(word) > 2:
words.add(word)
with open('dictionary.txt', 'w') as outfile:
outfile.writelines(sorted(words)) # note 'lines'
A few notes:
follow the style guide
string contains constants you can use to provide the "bad characters";
you never used count (which was just len(d) anyway);
use the with context manager for file handling; and
using a set from the start prevents duplicates, but they aren't ordered (hence sorted).
Using re module.
import re
words = set()
with open('frankenstein.txt') as infile:
for line in infile:
words.extend([x for x in re.split(r'[^A-Za-z]*', line) if len(x) > 2])
with open('dictionary.txt', 'w') as outfile:
outfile.writelines(sorted(words))
From r'[^A-Za-z]*' in re.split, replace 'A-Za-z' with the characters which you want to include in dictionary.txt.

Resources