How to calculate from a dictionary in python - python-3.x

import operator
with open("D://program.txt") as f:
Results = {}
for line in f:
part_one,part_two = line.split()
Results[part_one] = part_two
c=sum(int(Results[x]) for x in Results)
r=c/12
d=len(Results)
F=max(Results.items(), key=operator.itemgetter(1))[0]
u=min(Results.items(), key=operator.itemgetter(1))[0]
print ("Number of entries are",d)
print ("Student with HIGHEST mark is",F)
print ("Student with LOWEST mark is",u)
print ("Avarage mark is",r)
Results = [ (v,k) for k,v in Results.items() ]
Results.sort(reverse=True)
for v,k in Results:
print(k,v)
import sys
orig_stdout = sys.stdout
f = open('D://programssr.txt', 'w')
sys.stdout = f
print ('Number of entries are',d)
print ("Student with HIGHEST mark is",F)
print ("Student with LOWEST mark is",u)
print ("Avarage mark is",r)
for v,k in Results:
print(k,v)
sys.stdout = orig_stdout
f.close()
I want to read a txt file but problem is it cant compute the results i want to write in a new file because of the NAMES and MARKS in file.if you remove them it works fine.i want to make calculations without removing NAMES and MARKS in txt file..Help what i am i doing wrong
NAMES MARKS
Lux 95
Veron 70
Lesley 88
Sticks 80
Tipsey 40
Joe 62
Goms 18
Wesley 35
Villa 11
Dentist 72
Onty 50

Just consume the first line using next() function, before looping over it:
with open("D://program.txt") as f:
Results = {}
next(f)
for line in f:
part_one,part_two = line.split()
Results[part_one] = part_two
Note that file objects are iterator-like object (one shot iterable) and when you loop over them you consume the items and you have no access to them anymore.

Related

How to sum specific values from two different txt files in python

I have 2 txt files with names and scores. For example:
File 1 File 2 Desired Output
Name Score Name Score Name Score
Michael 20 Michael 30 Michael 50
Adrian 40 Adrian 50 Adrian 90
Jane 60 Jane 60
I want to sum scores with same names and print them. I tried to pair names and scores in two different dictionaries and after that merge the dictionaries. However, I can't keep same names with different scores. So, I'm stuck here. I've written something like following :
d1=dict()
d2=dict()
with open('data1.txt', "r") as f:
test = [i for line in f for i in line.split()]
i = 0
while i < len(test) - 1:
d1[test[i]] = test[i + 1]
i += 2
del d1['Name']
with open('data2.txt', "r") as f:
test = [i for line in f for i in line.split()]
i = 0
while i < len(test) - 1:
d2[test[i]] = test[i + 1]
i += 2
del d2['Name']
z = dict(d2.items() | d1.items())
Using a dictionary comprehension should get you what you are after. I have assumed the contents of the files are:
File1.txt:
Name Score
Michael 20
Adrian 40
Jane 60
File2.txt:
Name Score
Michael 30
Adrian 50
Then you can get a total as:
with open("file1.txt", "r") as file_in:
next(file_in) # skip header
file1_data = dict(row.split() for row in file_in if row)
with open("file2.txt", "r") as file_in:
next(file_in) # skip header
file2_data = dict(row.split() for row in file_in if row)
result = {
key: int(file1_data.get(key, 0)) + int(file2_data.get(key, 0))
for key
in set(file1_data).union(file2_data) # could also use file1_data.keys()
}
print(result)
This should give you a result like:
{'Michael': 50, 'Jane': 60, 'Adrian': 90}
Use defaultdict
from collections import defaultdict
name_scores = defaultdict(int)
files = ('data1.txt', 'data2.txt')
for file in files:
with open(file, 'r') as f:
for name, score in f.split():
name_scores[name] += int(score)
edit: You'll probably have to skip any header line and maybe clean up trailing white spaces, but the gist of it is above.

tab character is not identified while parsing

I have strings which have 2 tab characters as
# File contains multiple lines like this
'T1 Original 210 227 Extra Mile'
'T8 Modified 1646 1655 Tickets'
# Eg: "Tx" "indication" "start_index" "end_index" "word"
# 'T1\tOriginal 210 227\tExtra Mile'
I want the word after the second tab. so I am trying to find the index of '\t' and replace the initial string as empty.
def find_index(s, ch):
return [i for i, ltr in enumerate(s) if ltr == ch]
def extract_words(filename):
extracted_data = [line.rstrip('\n') for line in open(filename)]
search_key = '\t'
for i in range(len(extracted_data)):
indices = find_index(extracted_data[i], search_key)
extracted_data[i] = extracted_data[i].replace(extracted_data[i][:indices[-1]], '')
return extracted_data
but it does not identify the '\t' as indices output is [].
what is causing the problem ?
the expected output
'Extra Mile'
'Tickets'
Some of your lines do not contain tabs - hence no indexes, hence IndexError.
Use:
if len(indices)>1: # only extract by slicing if indexes found!
to check for that.
Why so complex? Use str.split("\t"):
def extract_words(filename):
with open(filename) as f:
lines = [x.strip() for x in f.readlines()]
k = []
for l in lines:
try:
k.append(l.split("\t")[2])
except IndexError:
print (f"no 2 tabs in '{l}'")
return k
t = """T1\tOriginal 210 227\tExtra Mile
T8\tModified 1646 1655\tTickets
Error\ttext"""
fn = "t.txt"
with open(fn,"w") as f:
f.write(t)
print(*extract_words(fn), sep="\n")
Output:
no 2 tabs in 'Error text'
Extra Mile
Tickets
This will work on lines with 2 tabs and report any that do not have those.

Count frequency of words under given index in a file

I am trying to count occurrence of words under specific index in my file and print it out as a dictionary.
def count_by_fruit(file_name="file_with_fruit_data.txt"):
with open(file_name, "r") as file:
content_of_file = file.readlines()
dict_of_fruit_count = {}
for line in content_of_file:
line = line[0:-1]
line = line.split("\t")
for fruit in line:
fruit = line[1]
dict_of_fruit_count[fruit] = dict_of_fruit_count.get(fruit, 0) + 1
return dict_of_fruit_count
print(count_by_fruit())
Output: {'apple': 6, 'banana': 6, 'orange': 3}
I am getting this output, however, it doesn't count frequency of the words correctly. After searching around I didn't seem to find the proper solution. Could anyone help me to identify my mistake?
My file has the following content: (data separated with tabs, put "\t" in example as format is being altered by stackoverflow)
I am line one with \t apple \t from 2018
I am line two with \t orange \t from 2017
I am line three with \t apple \t from 2016
I am line four with \t banana \t from 2010
I am line five with \t banana \t from 1999
You are looping too many times over the same line. Notice that the results you are getting are all 3 times what you are expecting.
Also, in Python, you also do not need to read the entire file. Just iterate over the file object line by line.
Try:
def count_by_fruit(file_name="file_with_fruit_data.txt"):
with open(file_name, "r") as f_in:
dict_of_fruit_count = {}
for line in f_in:
fruit=line.split("\t")[1]
dict_of_fruit_count[fruit] = dict_of_fruit_count.get(fruit, 0) + 1
return dict_of_fruit_count
Which can be further simplified to:
def count_by_fruit(file_name="file_with_fruit_data.txt"):
with open(file_name) as f_in:
dict_of_fruit_count = {}
for fruit in (line.split('\t')[1] for line in f_in):
dict_of_fruit_count[fruit] = dict_of_fruit_count.get(fruit, 0) + 1
return dict_of_fruit_count
Or, if you can use Counter:
from collections import Counter
def count_by_fruit(file_name="file_with_fruit_data.txt"):
with open(file_name) as f_in:
return dict(Counter(line.split('\t')[1] for line in f_in))
The problem is for fruit in line:. Splitting the lines on the tabs is going to split them into three parts. If you loop over those three parts every time, adding one to the count for each, then your counts are going to be 3 times as large as the actual data.
Below is how I would write this function, using generator expressions and Counter.
from collections import Counter
def count_by_fruit(file_name="file_with_fruit_data.txt"):
with open(file_name, "r") as file:
lines = (line[:-1] for line in file)
fruit = (line.split('\t')[1] for line in lines)
return Counter(fruit)

Never resets list

I am trying to create a calorie counter the standard input goes like this:
python3 calories.txt < test.txt
Inside calories the food is the following format: apples 500
The problem I am having is that whenever I calculate the values for the person it seems to never return to an empty list..
import sys
food = {}
eaten = {}
finished = {}
total = 0
#mappings
def calories(x):
with open(x,"r") as file:
for line in file:
lines = line.strip().split()
key = " ".join(lines[0:-1])
value = lines[-1]
food[key] = value
def calculate(x):
a = []
for keys,values in x.items():
for c in values:
try:
a.append(int(food[c]))
except:
a.append(100)
print("before",a)
a = []
total = sum(a) # Problem here
print("after",a)
print(total)
def main():
calories(sys.argv[1])
for line in sys.stdin:
lines = line.strip().split(',')
for c in lines:
values = lines[0]
keys = lines[1:]
eaten[values] = keys
calculate(eaten)
if __name__ == '__main__':
main()
Edit - forgot to include what test.txt would look like:
joe,almonds,almonds,blue cheese,cabbage,mayonnaise,cherry pie,cola
mary,apple pie,avocado,broccoli,butter,danish pastry,lettuce,apple
sandy,zuchini,yogurt,veal,tuna,taco,pumpkin pie,macadamia nuts,brazil nuts
trudy,waffles,waffles,waffles,chicken noodle soup,chocolate chip cookie
How to make it easier on yourself:
When reading the calories-data, convert the calories to int() asap, no need to do it every time you want to sum up somthing that way.
Dictionary has a .get(key, defaultvalue) accessor, so if food not found, use 100 as default is a 1-liner w/o try: ... except:
This works for me, not using sys.stdin but supplying the second file as file as well instead of piping it into the program using <.
I modified some parsings to remove whitespaces and return a [(name,cal),...] tuplelist from calc.
May it help you to fix it to your liking:
def calories(x):
with open(x,"r") as file:
for line in file:
lines = line.strip().split()
key = " ".join(lines[0:-1])
value = lines[-1].strip() # ensure no whitespaces in
food[key] = int(value)
def getCal(foodlist, defValueUnknown = 100):
"""Get sum / total calories of a list of ingredients, unknown cost 100."""
return sum( food.get(x,defValueUnknown ) for x in foodlist) # calculate it, if unknown assume 100
def calculate(x):
a = []
for name,foods in x.items():
a.append((name, getCal(foods))) # append as tuple to list for all names/foods eaten
return a
def main():
calories(sys.argv[1])
with open(sys.argv[2]) as f: # parse as file, not piped in via sys.stdin
for line in f:
lines = line.strip().split(',')
for c in lines:
values = lines[0].strip()
keys = [x.strip() for x in lines[1:]] # ensure no whitespaces in
eaten[values] = keys
calced = calculate(eaten) # calculate after all are read into the dict
print (calced)
Output:
[('joe', 1400), ('mary', 1400), ('sandy', 1600), ('trudy', 1000)]
Using sys.stdin and piping just lead to my console blinking and waiting for manual input - maybe VS related...

Creating a dictionary to count the number of occurrences of Sequence IDs

I'm trying to write a function to count the number of each sequence ID that occurs in this file (it's a sample blast file)
The picture above is the input file I'm dealing with.
def count_seq(input):
dic1={}
count=0
for line in input:
if line.startswith('#'):
continue
if line.find('hits found'):
line=line.split('\t')
if line[1] in dic1:
dic1[line]+=1
else:
dic1[line]=1
return dic1
Above is my code which when called just returns empty brackets {}
So I'm trying to count how many times each of the sequence IDs (second element of last 13 lines) occur eg: FO203510.1 occurs 4 times.
Any help would be appreciated immensely, thanks!
Maybe this is what you're after:
def count_seq(input_file):
dic1={}
with open(input_file, "r") as f:
for line in f:
line = line.strip()
if not line.startswith('#'):
line = line.split()
seq_id = line[1]
if not seq_id in dic1:
dic1[seq_id] = 1
else:
dic1[seq_id] += 1
return dic1
print(count_seq("blast_file"))
This is a fitting case for collections.defaultdict. Let f be the file object. Assuming the sequences are in the second column, it's only a few lines of code as shown.
from collections import defaultdict
d = defaultdict(int)
seqs = (line.split()[1] for line in f if not line.strip().startswith("#"))
for seq in seqs:
d[seq] += 1
See if it works!

Resources