I have these functions. They are working perfectly, but is there a way to speed them up? I tried to split the dataset, but it takes the same or more time than the original functions. I'm working with big arrays (1Mill+X2504X2). create_needed_pos takes arount 350sec for 1.2millX2054X2 array, but my biggest is around 10bilionx2054x2.
#nb.njit
def create_needed_pos(chr_pos, pos):
needed_pos = nb.typed.List.empty_list(nb.int32)
for i in range(len(chr_pos)):
for k in range(len(pos)):
if chr_pos[i] == pos[k]:
if i == k == 1:
needed_pos = nb.typed.List([pos[k]])
else:
needed_pos.append(pos[k])
return needed_pos
#nb.njit
def create_mat(geno):
# create matrix as np.uint8 (1 byte) instead of list of python integers (8 byte)
# also no need to dynamically resize / increase list size
geno_mat = np.zeros((len(geno[:, 0]), len(geno[1, :])), dtype=np.uint8)
for i in np.arange(len(geno[:, 0])):
for k in np.arange(len(geno[1, :])):
g = geno[i, k]
# nested ifs to avoid duplicate comparisons
if g[0] == 0:
if g[1] == 0:
geno_mat[i, k] = 2
elif g[1] == 1:
geno_mat[i, k] = 1
else:
geno_mat[i, k] = 9
elif g[0] == 1:
if g[1] == 0:
geno_mat[i, k] = 1
elif g[1] == 1:
geno_mat[i, k] = 0
else:
geno_mat[i, k] = 9
else:
geno_mat[i, k] = 9
return geno_mat
Related
I have some trouble about project euler problem 60.
The primes 3, 7, 109, and 673, are quite remarkable. By taking any two primes and concatenating them in any order the result will always be prime. For example, taking 7 and 109, both 7109 and 1097 are prime. The sum of these four primes, 792, represents the lowest sum for a set of four primes with this property.
Find the lowest sum for a set of five primes for which any two primes concatenate to produce another prime.
The code that I created is too slow to see the correct answer. And I don't even see that is it work correctly or not. The code is:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sat Aug 27 21:18:10 2022
#author: burak
"""
def is_prime(n, check_list_for_primes): #checks if value is prime
if check_list_for_primes.count(n) > 0: #checks if valu calculated before. if it were, it avoid loop.
return True
else:
if n == 1:
return False
if n == 2 or n == 3:
return True
i = 2
while i * i <= n:
if n % i == 0:
return False
exit(0)
i += 1
check_list_for_primes.append(n) # if it never calculated, stores the value to avoid loop at the beginning of function.
return True
def check_1(i, j): # checks the concanated calues if they are primes.
if is_prime(int(str(i)+str(j)), check_list_for_primes) == True and is_prime(int(str(j)+str(i)), check_list_for_primes) == True:
return True
else:
return False
def check_2(temp_list, n): # checks the final list that obtain the minimum summation.
if temp_list.count(n) == 0:
temp_list.append(n)
for i in temp_list:
for j in temp_list:
if len(temp_list) == 1:
return check_1(i, j)
elif i == j:
continue
elif len(temp_list) == 1:
return True
break
elif check_1(i, j) == False:
return False
return True
def func_(prime_list): # creates a dictionary summation of the five prime numbers in order to problem.
temp_list = []
result_dic = {}
k = 0
t = 0
for i in prime_list:
if i == 5:
continue
while k == 0:
t = k
for j in prime_list:
if i == j or j == 5:
continue
elif j < i:
continue
else:
temp_list.append(j)
if check_2(temp_list, i) == True:
continue
else:
temp_list.remove(j)
if t > 0 and len(temp_list) > 1:
t -= 1
temp_list.remove(max(temp_list))
continue
if len(temp_list) == 5:
result_dic[sum(temp_list)] = temp_list
elif len(temp_list) < 5:
k +=1
temp_list = []
return result_dic
if __name__ == "__main__":
dic_ = {}
prime_list = []
check_list_for_primes = []
for i in range(3, 9000, 1): #creates prime list between given range
if is_prime(i, check_list_for_primes) == True:
prime_list.append(i)
check_list_for_primes = prime_list.copy() #pseudo prime list to avoid calculating if the number is prime.
dic_ = func_(prime_list) #final dictionary to obtain minimum summation of five prime numbers.
x = min(list(dic_.keys()))
print(str(x) + " : " + str(dic_[x]))
I tried to type the examination of calculating order.
The main problem is at "func_" function. The for loop of "j" must be manipulated if the code not to get required list lenght. The "j" loop must be restart again after remove second element of "temp_list" and it must be start after shift to removed element of "prime_list".
Could you help me to see where I made mistakes and how can I improve calculation speed. Thanks so much.
I solved it. while loop changed and the "j" for loop determined by a list. The final code is;
def is_prime(n, check_list_for_primes): #checks if value is prime
if check_list_for_primes.count(n) > 0: #checks if valu calculated before. if it were, it avoid loop.
return True
else:
if n == 1:
return False
if n == 2 or n == 3:
return True
i = 2
while i * i <= n:
if n % i == 0:
return False
exit(0)
i += 1
check_list_for_primes.append(n) # if it never calculated, stores the value to avoid loop at the beginning of function.
return True
def check_1(i, j): # checks the concanated values if they are primes.
if is_prime(int(str(i)+str(j)), check_list_for_primes) == True and is_prime(int(str(j)+str(i)), check_list_for_primes) == True:
return True
else:
return False
def check_2(temp_list): # checks the final list that obtain the minimum summation.
for i in temp_list:
for j in temp_list:
if len(temp_list) == 1:
return check_1(i, j)
elif i == j:
continue
elif len(temp_list) == 1:
return True
break
elif check_1(i, j) == False:
return False
return True
def func_(prime_list): # creates a dictionary summation of the five prime numbers in order to problem.
prime_list.remove(5)
temp_list = []
result_dic = {}
# k = 0
copy_primes = prime_list.copy()
for i in prime_list:
for z in prime_list:
if z <= i:
copy_primes.remove(z)
else:
break
if i == max(prime_list):
break
elif i == 5:
continue
# for z in range(len(prime_list) - 1, 0, -1):
# if check_1(i,prime_list[z]) == True:
# max_prime_of_i = prime_list[z]
# break
while len(temp_list) < 5:
# if temp_list[1] == max_prime_of_i:
# break
if len(temp_list) == 1:
break
if len(copy_primes) == 0 and len(temp_list) > 1:
copy_primes = prime_list.copy()
for z in prime_list:
if z <= temp_list[1]:
copy_primes.remove(z)
else:
break
temp_list = []
for j in prime_list:
if len(copy_primes) > 0:
j = copy_primes[0]
copy_primes.remove(j)
else:
break
if temp_list.count(i) == 0:
temp_list.append(i)
continue
temp_list.append(j)
temp_list.sort()
if check_2(temp_list) == True and len(temp_list) > 1:
continue
elif check_2(temp_list) == False and len(temp_list) > 1:
temp_list.remove(j)
print(i)
print(temp_list)
if len(temp_list) < 5 and len(copy_primes) == 0:
continue
elif len(temp_list) == 5:
break
copy_primes = prime_list.copy()
if len(temp_list) == 5:
result_dic[sum(temp_list)] = temp_list
print(str(min(list(result_dic.keys()))) + " : " + str(result_dic[min(list(result_dic.keys()))]))
weight_ = 0
check_weight = 0
for p in temp_list:
weight_ = weight_ + len(str(p))
if weight_ < check_weight or check_weight == 0:
check_weight = weight_
elif check_weight < weight_ and len(temp_list) == 5:
return result_dic
temp_list = []
return result_dic
if __name__ == "__main__":
dic_ = {}
prime_list = []
check_list_for_primes = []
for i in range(3, 9000, 1): #creates prime list between given range
if is_prime(i, check_list_for_primes) == True:
prime_list.append(i)
check_list_for_primes = prime_list.copy() #pseudo prime list to avoid calculating if the number is prime.
dic_ = func_(prime_list) #final dictionary to obtain minimum summation of five prime numbers.
x = min(list(dic_.keys()))
print(str(x) + " : " + str(dic_[x]))
I have been coding this problem for HackerRank and I ran into so many problems. The problem is called "Plus Minus" and I am doing it in Python 3. The directions are on https://www.hackerrank.com/challenges/plus-minus/problem. I tried so many things and it says that "there is no response on stdout". I guess a none-type is being returned. Here is the code.:
def plusMinus(arr):
p = 0
neg = 0
z = arr.count(0)
no = 0
for num in range(n):
if arr[num] < 0:
neg+=1
if arr[num] > 0:
p+=1
else:
no += 1
continue
return p/n
The following are the issues:
1) variable n, which represents length of the array, needs to be passed to the function plusMinus
2) No need to maintain the extra variable no, as you have already calculated the zero count. Therefore, we can eliminate the extra else condition.
3) No need to use continue statement, as there is no code after the statement.
4) The function needs to print the values instead of returning.
Have a look at the following code with proper naming of variables for easy understanding:
def plusMinus(arr, n):
positive_count = 0
negative_count = 0
zero_count = arr.count(0)
for num in range(n):
if arr[num] < 0:
negative_count += 1
if arr[num] > 0:
positive_count += 1
print(positive_count/n)
print(negative_count/n)
print(zero_count/n)
if __name__ == '__main__':
n = int(input())
arr = list(map(int, input().rstrip().split()))
plusMinus(arr, n)
The 6 decimals at the end are needed too :
Positive_Values = 0
Zeros = 0
Negative_Values = 0
n = int(input())
array = list(map(int,input().split()))
if len(array) != n:
print(f"Error, the list only has {len(array)} numbers out of {n}")
else:
for i in range(0,n):
if array[i] == 0:
Zeros +=1
elif array[i] > 0:
Positive_Values += 1
else:
Negative_Values += 1
Proportion_Positive_Values = Positive_Values / n
Proportion_Of_Zeros = Zeros / n
Proportion_Negative_Values = Negative_Values / n
print('{:.6f}'.format(Proportion_Positive_Values))
print('{:.6f}'.format(Proportion_Negative_Values))
print('{:.6f}'.format(Proportion_Of_Zeros))
I am studying the Bioinformatics course at Coursera, and have been stuck on the following problem for 5 days:
Implement GreedyMotifSearch.
Input: Integers k and t, followed by a collection of strings Dna.
Output: A collection of strings BestMotifs resulting from applying GreedyMotifSearch(Dna, k, t).
If at any step you find more than one Profile-most probable k-mer in a given string, use the
one occurring first.
Here's my attempt to solve this (I just copied it from my IDE, so pardon any print statements):
def GreedyMotifSearch(DNA, k, t):
"""
Documentation here
"""
import math
bestMotifs = []
bestScore = math.inf
for string in DNA:
bestMotifs.append(string[:k])
base = DNA[0]
for i in window(base, k):
newMotifs = []
for j in range(t):
profile = ProfileMatrix([i])
probable = ProfileMostProbable(DNA[j], k, profile)
newMotifs.append(probable)
if Score(newMotifs) <= bestScore:
bestScore = Score(newMotifs)
bestMotifs = newMotifs
return bestMotifs
The helper functions are these:
def SymbolToNumber(Symbol):
"""
Converts base to number (in lexicograpical order)
Symbol: the letter to be converted (str)
Returns: the number correspondinig to that base (int)
"""
if Symbol == "A":
return 0
elif Symbol == "C":
return 1
elif Symbol == "G":
return 2
elif Symbol == "T":
return 3
def NumberToSymbol(index):
"""
Finds base from number (in lexicographical order)
index: the number to be converted (int)
Returns: the base corresponding to index (str)
"""
if index == 0:
return str("A")
elif index == 1:
return str("C")
elif index == 2:
return str("G")
elif index == 3:
return str("T")
def HammingDistance(p, q):
"""
Finds the number of mismatches between 2 DNA segments of equal lengths
p: first DNA segment (str)
q: second DNA segment (str)
Returns: number of mismatches (int)
"""
return sum(s1 != s2 for s1, s2 in zip(p, q))
def window(s, k):
for i in range(1 + len(s) - k):
yield s[i:i+k]
def ProfileMostProbable(Text, k, Profile):
"""
Finds a k-mer that was most likely to be generated by profile among
all k-mers in Text
Text: given DNA segment (str)
k: length of pattern (int)
Profile: a 4x4 matrix (list)
Returns: profile-most probable k-mer (str)
"""
letter = [[] for key in range(k)]
probable = ""
hamdict = {}
index = 1
for a in range(k):
for j in "ACGT":
letter[a].append(Profile[j][a])
for b in range(len(letter)):
number = max(letter[b])
probable += str(NumberToSymbol(letter[b].index(number)))
for c in window(Text, k):
for x in range(len(c)):
y = SymbolToNumber(c[x])
index *= float(letter[x][y])
hamdict[c] = index
index = 1
for pat, ham in hamdict.items():
if ham == max(hamdict.values()):
final = pat
break
return final
def Count(Motifs):
"""
Documentation here
"""
count = {}
k = len(Motifs[0])
for symbol in "ACGT":
count[symbol] = []
for i in range(k):
count[symbol].append(0)
t = len(Motifs)
for i in range(t):
for j in range(k):
symbol = Motifs[i][j]
count[symbol][j] += 1
return count
def FindConsensus(motifs):
"""
Finds a consensus sequence for given list of motifs
motifs: a list of motif sequences (list)
Returns: consensus sequence of motifs (str)
"""
consensus = ""
for i in range(len(motifs[0])):
countA, countC, countG, countT = 0, 0, 0, 0
for motif in motifs:
if motif[i] == "A":
countA += 1
elif motif[i] == "C":
countC += 1
elif motif[i] == "G":
countG += 1
elif motif[i] == "T":
countT += 1
if countA >= max(countC, countG, countT):
consensus += "A"
elif countC >= max(countA, countG, countT):
consensus += "C"
elif countG >= max(countC, countA, countT):
consensus += "G"
elif countT >= max(countC, countG, countA):
consensus += "T"
return consensus
def ProfileMatrix(motifs):
"""
Finds the profile matrix for given list of motifs
motifs: list of motif sequences (list)
Returns: the profile matrix for motifs (list)
"""
Profile = {}
A, C, G, T = [], [], [], []
for j in range(len(motifs[0])):
countA, countC, countG, countT = 0, 0, 0, 0
for motif in motifs:
if motif[j] == "A":
countA += 1
elif motif[j] == "C":
countC += 1
elif motif[j] == "G":
countG += 1
elif motif[j] == "T":
countT += 1
A.append(countA)
C.append(countC)
G.append(countG)
T.append(countT)
Profile["A"] = A
Profile["C"] = C
Profile["G"] = G
Profile["T"] = T
return Profile
def Score(motifs):
"""
Finds score of motifs relative to the consensus sequence
motifs: a list of given motifs (list)
Returns: score of given motifs (int)
"""
consensus = FindConsensus(motifs)
score = 0.0000
for motif in motifs:
score += HammingDistance(consensus, motif)
#print(score)
return round(score, 4)
It seems fine to me. However, when I run this code for quiz problems, it gives an incorrect answer. Their code grading system shows this error:
Failed test #3. Your indexing may be off by one at the beginning of each string in Dna.
I have tried everything I can think of and run this code on all their sample data and debug data, but I simply can't figure out how to make this code work. Please help me with any possible solutions to this.
You have a few problems. I think this should address them all. I've included comments explaining each change along with your original code and a reference to the relevant Pseudocode in the debug data page you linked to.
def GreedyMotifSearch(DNA, k, t):
"""
Documentation here
"""
import math
bestMotifs = []
bestScore = math.inf
for string in DNA:
bestMotifs.append(string[:k])
base = DNA[0]
for i in window(base, k):
# Change here. Should start with one element in motifs and build up.
# As in the line "motifs ← list with only Dna[0](i,k)"
# newMotifs = []
newMotifs = [i]
# Change here to iterate over len(DNA).
# Should go through "for j from 1 to |Dna| - 1"
# for j in range(t):
for j in range(1, len(DNA)):
# Change here. Should build up motifs and build profile using them.
# profile = ProfileMatrix([i])
profile = ProfileMatrix(newMotifs)
probable = ProfileMostProbable(DNA[j], k, profile)
newMotifs.append(probable)
# Change to < rather < = to ensure getting the most recent hit. As referenced in the instructions:
# If at any step you find more than one Profile-most probable k-mer in a given string, use the one occurring **first**.
if Score(newMotifs) < bestScore:
#if Score(newMotifs) <= bestScore:
bestScore = Score(newMotifs)
bestMotifs = newMotifs
return bestMotifs
This code is to convert decimals to binary.
What I'm trying to do is to chop off the decimal part after diving by 2.
binary = []
n = 25
while n != 0:
binary.append(n % 2)
n = n / 2
int(n) #this part
print(binary)
print(n)
choose = input("continue?[Y/N]")
if choose == 'y':
continue
else:
break
print(list(reversed(binary)))
Is this what you want?
binary = []
n = 25
while n != 0:
binary.append(n % 2)
n = n / 2
n = int(n) #assign result to n
print(binary)
print(n)
choose = input("continue?[Y/N]")
if choose == 'y':
continue
else:
break
print(list(reversed(binary)))
I wrote the python code below that solves and prints each possible solution for anything under 6 unit fractions, but given how I programmed it, it takes infinitely long to check for 7 fractions. Any ideas on how to modify the code to find all the possible solutions more efficienty?
import sys
from fractions import Fraction
import os
#myfile = open('7fractions.txt', 'w')
max = 7 #>2 #THIS VARIABLE DECIDES HOW MANY FRACTIONS ARE ALLOWED
A = [0] * max
A[0] = 1
def printList(A):
return str(A).strip('[]')
def sumList(A):
sum = 0
for i in A:
if i != 0:
sum += Fraction(1, i)
return sum
def sumTest(A):
sum = 0
v = 0
for i in range(0, len(A)):
if A[i] == 0 and v == 0:
v = Fraction(1,A[i-1])
if v != 0:
sum += v
else:
sum += Fraction(1, A[i])
return sum
def solve(n, A):
if n == max - 2:
while (sumTest(A) > 1):
print(A)
if sumList(A) < 1:
e = 1 - sumList(A)
if e.numerator == 1 and e.denominator>A[n-1]:
A[n+1] = e.denominator
#myfile.write(printList(A) + '\n')
print(A)
A[n+1] = 0
A[n] += 1
else:
while (sumTest(A) > 1):
if sumList(A) < 1:
A[n+1] = A[n] + 1
solve(n+1, A)
A[n+1] = 0
A[n] += 1
#execute
solve(0, A)