Speeding up my code for pset6 DNA in cs50x - python-3.x

I am currently doing CS50 DNA pset and I wrote all of my code but it is slower for large files which results in check50 considering it wrong. I have attached my code and the error check50 shows below.
import sys
import csv
def main():
argc = len(sys.argv)
if (argc != 3):
print("Usage: python dna.py [database] [sequence]")
exit()
# Sets variable name for each argv argument
arg_database = sys.argv[1]
arg_sequence = sys.argv[2]
# Converts sequence csv file to string, and returns as thus
sequence = get_sequence(arg_sequence)
seq_len = len(sequence)
# Returns STR patterns as list
STR_array = return_STRs(arg_database)
STR_array_len = len(STR_array)
# Counts highest instance of consecutively reoccurring STRs
STR_values = STR_count(sequence, seq_len, STR_array, STR_array_len)
DNA_match(STR_values, arg_database, STR_array_len)
# Reads argv2 (sequence), and returns text within as a string
def get_sequence(arg_sequence):
with open(arg_sequence, 'r') as csv_sequence:
sequence = csv_sequence.read()
return sequence
# Reads STR headers from arg1 (database) and returns as list
def return_STRs(arg_database):
with open(arg_database, 'r') as csv_database:
database = csv.reader(csv_database)
STR_array = []
for row in database:
for column in row:
STR_array.append(column)
break
# Removes first column header (name)
del STR_array[0]
return STR_array
def STR_count(sequence, seq_len, STR_array, STR_array_len):
# Creates a list to store max recurrence values for each STR
STR_count_values = [0] * STR_array_len
# Temp value to store current count of STR recurrence
temp_value = 0
# Iterates over each STR in STR_array
for i in range(STR_array_len):
STR_len = len(STR_array[i])
# Iterates over each sequence element
for j in range(seq_len):
# Ensures it's still physically possible for STR to be present in sequence
while (seq_len - j >= STR_len):
# Gets sequence substring of length STR_len, starting from jth element
sub = sequence[j:(j + (STR_len))]
# Compares current substring to current STR
if (sub == STR_array[i]):
temp_value += 1
j += STR_len
else:
# Ensures current STR_count_value is highest
if (temp_value > STR_count_values[i]):
STR_count_values[i] = temp_value
# Resets temp_value to break count, and pushes j forward by 1
temp_value = 0
j += 1
i += 1
return STR_count_values
# Searches database file for DNA matches
def DNA_match(STR_values, arg_database, STR_array_len):
with open(arg_database, 'r') as csv_database:
database = csv.reader(csv_database)
name_array = [] * (STR_array_len + 1)
next(database)
# Iterates over one row of database at a time
for row in database:
name_array.clear()
# Copies entire row into name_array list
for column in row:
name_array.append(column)
# Converts name_array number strings to actual ints
for i in range(STR_array_len):
name_array[i + 1] = int(name_array[i + 1])
# Checks if a row's STR values match the sequence's values, prints the row name if match is found
match = 0
for i in range(0, STR_array_len, + 1):
if (name_array[i + 1] == STR_values[i]):
match += 1
if (match == STR_array_len):
print(name_array[0])
exit()
print("No match")
exit()
main()
Check50 error link:
https://submit.cs50.io/check50/fd890301a0dc9414cd29c2b4dcb27bd47e6d0a48
If you wait for long, then you get the answer but since my program is running slow check50 is considering it wrong

Well, I solved it just by adding a break statement.

Related

How to convert this nested string list into the actual list

How to convert string into new_list without using eval(), exec() or any other libraries?
string = '[1,2,3,[4,5,6],7,[8,[9,10]],11]'
new_list = [1,2,3,[4,5,6],7,[8,[9,10]],11]
The biggest problem you're facing with this question is that you cannot say how deep the list will nest, therefore the best option is to make a recursive function that evaluates the string and calls itself whenever it encounters a new list.
Another issue you will run in to is that a number can be multiple digits long and it would not be a clean solution to only assume it might be max 2 digits. A while loop to find all consecutive digits is probably the best solution for that. Here is my implementation:
def get_number(str, idx):
"""Find all consecutive digits and return them and the updated index"""
number = ""
while str[idx].isdigit():
number += str[idx]
idx += 1
return int(number), idx
def recursive_list(str, idx):
"""Transform a string to list, recursively calling this function whenever a nested list is encountered."""
lst = []
# Loop over the entire string
while idx < len(str):
char = str[idx]
# When encountering a digit, get the entire number
if char.isdigit():
number, idx = get_number(str, idx)
lst.append(number)
idx += 1
# When encountering a closing bracket, return the (sub-)list
elif char == ']':
return lst, idx + 1
# When encountering an opening bracket, append the nested list
elif char == '[':
nested, idx = recursive_list(str, idx + 1)
lst.append(nested)
else:
# Go to the next character if we encounter anything else
idx += 1
return lst, idx
def main():
"""Transform a string to a list."""
str = "[1,2,3,[4,5,6],7,[8,[9,10]],11]"
new_list, _ = recursive_list(str, 1)
print(new_list)
if __name__ == "__main__":
main()

Formatting required output for String slicing in Python

Beginner in python here, and I have a code that is supposed to slice a string evenly and oddly and display it.
Here is my code:
def even_bits(str):
result = ""
for i in range(len(str)):
if i % 2 == 0:
result = result + str[i]
return result
def odd_bits(str):
result = ""
for i in range(len(str)):
if i % 2 == 1:
result = result + str[i]
return result
for i in range(int(input())):
w = input('')
print(even_bits(w), ' ' ,odd_bits(w))
This runs correctly however gives output as follows:
Sample Input:
2
Hello
World
Sample Output:
2
Hello
Hlo el
World
Wrd ol
How do I format the output such that I get output as follows:
Sample Output:
Hlo el
Wrd ol
Thank You in advance.
You can first declare an input list that contains all input strings. Then iterate over the input list and print the even and odd characters:
def even_bits(chain):
result = ""
for i in range(len(chain)):
if i % 2 == 0:
result = result + chain[i]
return result
def odd_bits(chain):
result = ""
for i in range(len(chain)):
if i % 2 == 1:
result = result + chain[i]
return result
input_list = [] # input list that contains all input strings
for i in range(int(input())):
w = input('')
input_list.append(w)
# iterate over input list to print even and odd characters
for inp in input_list:
print(even_bits(inp), ' ', odd_bits(inp))
You can make two result lists. One for the even outputs, one for the odd outputs, then zip them and print each element. Also, you can easily take the even and odd letters using a single slice.
x = "Hello world"
# slicing looks like [START:STOP:STEP], so we can do
evens = x[0::2] # Will start at 0, go to the end, and take every 2nd character
odds = x[1::2] # Will start at 1, go to the end, and take every 2nd character
print(evens) # >>> Hlowrd
print(odds) # >>> el ol
This works even if you have an empty string.
Putting it all together, it could look like this:
def even_bits(my_str):
return my_str[0::2]
def odd_bits(my_str):
return my_str[1::2]
even_results = []
odd_results = []
for i in range(int(input("How many inputs: "))):
w = input('Input # {}: '.format(i+1))
even_results.append(even_bits(w))
odd_results.append(odd_bits(w))
for ev, od in zip(even_results, odd_results):
print(ev, od)
Output:
How many inputs: 2
Input # 1: Hello
Input # 2: World
Hlo el
Wrd ol

list index error python (numeric words program)

I'm trying to write a program in Python 3.
This is how it works:
The input is a word.
And the program has to look if the word contains a dutch numeric word.
A word cannot contain more than 1 numeric word and if a word doesn't contain a numeric word than it has to print 'geen' (none in dutch).
example:
Input = BETWEEN
Output = TWEE (two in dutch)
Input = ZEEVERS
Output = ZES (six in dutch)
Here is my code:
import sys
invoer = input()
letterwoorden = [['T','W','E','E'], ['D','R','I','E'], ['V','I','E','R'],
['V','I','J','F'], ['Z','E','S'], ['Z','E','V','E','N'], ['A','C','H','T'],
['N','E','G','E','N']]
antwoord = []
woord = [str(a) for a in str(invoer)]
L = len(woord)
a = 0
b = 0
c = 0
for i in range(0, 8):
for j in range(0, len(letterwoorden[a])):
if letterwoorden[a][b] == woord[c]:
antwoord.append(letterwoorden[a][b])
b = b + 1
c = c + 1
else:
c = c + 1
if antwoord == letterwoorden[a]:
print(antwoord)
break
else:
a = a + 1
antwoord.clear()
if antwoord != letterwoorden[a]:
print('geen')
Could someone help me with the error on line 21? (list index out of range)
Thanks!
The code is not fully done but when the input is TWEET the output is TWEE,
when the input is BETWEEN it gives the error.
Even though usr2564301's first solution looks good, I wanted to add a couple things. You'll find this in the comments in the code.
Here's how I'd modify your code:
## use strings instead of list of strings
## ['T', 'W', 'E', 'E'] ==> 'TWEE'
## they are also iterable (you can loop through them)
## as well as indexable (['T', 'W', 'E', 'E'][0] == 'TWEE'[0])
letterwoorden = ['TWEE', 'DRIE', 'VIER', 'VIJF', 'ZES', 'ZEVEN', 'ACHT', 'NEGEN']
## keep the string
woord = input()
print('Input:', woord)
## answer should be string as well
antwoord = ''
## name your variables something meaningful
## how does one differentiate between b and c? smh
letterWoordenIndex = 0
woordenIndex = 0
## for-loops can be powerful in Python
## you might've been used to index-based loops from other languages :|
## this way, tempWord can replace all occurrences of letterwoorden[a]
for tempWord in letterwoorden:
## reset your variables at the beginning of the loop
letterWoordenIndex = woordenIndex = 0
# """ ## debug output printing word
print('Word:', tempWord, '?')
# """
## loop through the length of word
## use _ to indicate that the variable won't be used
for _ in range(len(woord)):
# """ ## debug output comparing letters/indices
print(tempWord[letterWoordenIndex],
'({})'.format(letterWoordenIndex),
'<=>', woord[woordenIndex],
'({})'.format(woordenIndex))
# """
## check current indices equals match
if tempWord[letterWoordenIndex] == woord[woordenIndex]:
antwoord += tempWord[letterWoordenIndex] ## append char to string using +
## increment indices
letterWoordenIndex += 1
woordenIndex += 1
## check if index is filled
if letterWoordenIndex == len(tempWord):
break
else:
woordenIndex += 1
# """ ## debug output comparing final results
print(antwoord, '>==<', tempWord)
# """
## assert that the accumulated result (antwoord)
## equates to the tempWord
if antwoord == tempWord:
# """ ## debug assert true
print('Yep\n')
# """
print(antwoord)
break
## no need to use else here
## if the code inside the above if executes, then it's already game-over
antwoord = '' ## empty the string
# """ ## debug assert false
print('Nope\n')
# """
## antwoord would be empty if everything failed
if antwoord == '':
print('GEEN')
You are iterating over the wrong list with your line
for j in range(0, len(letterwoorden[a])):
as this increases a for all letterwoordenagain – you already iterate over letterwoorden with the first loop, for i in range(0, 8):. Changing that to iterate over the word in question led to another error if you run out of 'source' characters (the length of invoer) OR of 'compare' characters (the word woord[c]). You must also take care to reset your b and c counters when restarting a compare.
At the very end, you test antwoord[a] but a may be out of range
The following code works
import sys
invoer = input()
letterwoorden = [['T','W','E','E'], ['D','R','I','E'], ['V','I','E','R'],
['V','I','J','F'], ['Z','E','S'], ['Z','E','V','E','N'], ['A','C','H','T'],
['N','E','G','E','N']]
antwoord = []
woord = [str(a) for a in str(invoer)]
L = len(woord)
a = 0
b = 0
c = 0
for i in range(len(letterwoorden)):
b = 0
c = 0
for j in range(len(woord)):
print ('test', a,b,c, letterwoorden[a][b], woord[c])
if letterwoorden[a][b] == woord[c]:
antwoord.append(letterwoorden[a][b])
b = b + 1
if b >= len(letterwoorden[a]):
break
c = c + 1
else:
c = c + 1
if antwoord == letterwoorden[a]:
print(antwoord)
break
else:
a = a + 1
antwoord = []
if antwoord == []:
print('geen')
A few additional notes: you don't need to maintain separate variables if you already have a loop variable. Where you use a, for example, you can also use the loop i; and the same with j.
About all of your comparing can be done much more efficiently, since Python can immediately check if a phrase contains another phrase: if straw in haystack. So, basically,
invoer = 'ZEEVERS'
letterwoorden = ['TWEE', 'DRIE', 'VIER', 'VIJF', 'ZES', 'ZEVEN', 'ACHT', 'NEGEN']
for word in letterwoorden:
if word in invoer:
print (word)
break
else:
print ('geen')
where the else is linked to the for and not to the else, to check if the loop stopped for a result.

always got the same output with random.choice in a function

I tried using for-loop to exec my function but its always the same result.
import random
def main(arr):
result = random.choice(arr)
...some code...
return len(result)
for i in range(100):
main(arr)
I could only get diff result from stop/run the terminal. Anyone know why?
my question is the same as this one. random.choice always same
import random
results = []
with open('kargerMinCut.txt') as inputfile:
for line in inputfile:
results.append(line.strip().split('\t'))
def contract(arr):
while len(arr) > 2:
# Generate random number in list of lists
# ranList = random.choice(arr)
ranList = arr[np.random.choice(len(arr))]
ranNum = random.choice(ranList[1:])
# retrieve the index of the random number
listIndex = arr.index(ranList)
for i in range(0, len(arr)):
if arr[i][0] == ranNum:
targetList = i
break
target = ranList[0]
for i in range(0, len(arr)):
if i == listIndex:
arr[i].pop(0)
arr[i] = [x for x in arr[i] if x != ranNum]
elif i == targetList:
arr[i] = [x for x in arr[i] if x != target]
else:
for index, item in enumerate(arr[i]):
if item == target:
arr[i][index] = ranNum
arr[targetList] += arr[listIndex]
del arr[listIndex]
return len(arr[0])-1
the arr would be like this
array = [[1,2,3,4],[2,1,3,4],[3,1,2,4],[4,1,2,3]]
I don't know what you do inside your function but I've got the normal result. And in the question what you linked to the person just used seed. This is kinda pseudorandom that gives you all the time the same random output. Here is the link to deep your knowledge about pseudorandom
import random
arr = [1,2,3,4,5]
def main(arr):
result = random.choice(arr)
print(result)
for i in range(100):
main(arr)
The result is as it has to be:
1
3
5
3
4
3
1
4
4
3
2

Change specific elements of a matrix puzzle - Python

I have to solve how to replace the elements below zero elements with zeros and output the sum of the remaining elements in the matrix.
For example, [[0,3,5],[3,4,0],[1,2,3]] should output the sum of 3 + 5 + 4 + 1 + 2, which is 15.
So far:
def matrixElementsSum(matrix):
out = 0
# locate the zeros' positions in array & replace element below
for i,j in enumerate(matrix):
for k,l in enumerate(j):
if l == 0:
break
out += l
return out
The code outputs seemingly random numbers.
Can someone fix what's wrong? Thanks
You can easily drop elements that are below a zero element is by using the zip function.
def matrixElementsSum(matrix):
out = 0
# locate the zeros' positions in array & replace element below
for i,j in enumerate(matrix):
# elements in the first row cannot be below a '0'
if i == 0:
out += sum(j)
else:
k = matrix[i-1]
for x, y in zip(j, k):
if y != 0:
out += x
return out
Now consider naming your variables a little more meaningfully. Something like:
def matrixElementsSum(matrix):
out = 0
# locate the zeros' positions in array & replace element below
for row_number, row in enumerate(matrix):
# elements in the first row cannot be below a '0'
if row_number == 0:
out += sum(row)
else:
row_above = matrix[row_number - 1]
for element, element_above in zip(row, row_above):
if element_above != 0:
out += element
return out
You should look into list comprehensions to make the code even more readable.

Resources