Finding a substring that occurs k times in a long string - python-3.x

I'm trying to solve some algorithm task, but the solution does not pass the time limit.
The condition of the task is the following:
You are given a long string consisting of small Latin letters. We need to find all its substrings of length n that occur at least k times.
Input format:
The first line contains two natural numbers n and k separated by a space.
The second line contains a string consisting of small Latin letters. The string length is 1 ≤ L ≤ 10^6.
n ≤ L, k ≤ L.
Output Format:
For each found substring, print the index of the beginning of its first occurrence (numbering in the string starts from zero).
Output indexes in any order, in one line, separated by a space.
My final solution looks something like this:
def polinomial_hash(s: str, q: int, R: int) -> int:
h = 0
for c in s:
h = (h * q + ord(c)) % R
return h
def get_index_table(inp_str, n):
q = 1000000007
power = q ** (n-1)
R = 2 ** 64
M = len(inp_str)
res_dict = {}
cur_hash = polinomial_hash(inp_str[:n], q, R)
res_dict[cur_hash] = [0]
for i in range(n, M):
first_char = inp_str[i-n]
next_char = inp_str[i]
cur_hash = (
(cur_hash - ord(first_char)*(power))*q
+ ord(next_char)) % R
try:
d_val = res_dict[cur_hash]
d_val += [i-n+1]
except KeyError:
res_dict[cur_hash] = [i-n+1]
return res_dict
if __name__ == '__main__':
n, k = [int(i) for i in input().split()]
inp_str = input()
for item in get_index_table(inp_str, n).values():
if len(item) >= k:
print(item[0], end=' ')
Is it possible to somehow optimize this solution, or advise some alternative options?!

Related

Why isn't chr() outputting the correct character?

I'm working on a Caesar Cypher with Python 3 where s is the string input and k is the amount that you shift the letter. I'm currently just trying to work through getting a letter like 'z' to wrap around to equal 'B'(I know the case is wrong, I'll fix it later). However when I run caesarCipher using the the following inputs: s = 'z' and k = 2, the line: s[n] = chr((122-ord(s[n]) + 64 + k)) causes s[n] to equal 'D'. If i adjust it down two(logically on the unicode scale this would equal 'B'), it makes s[n] = #. What am I doing wrong on that line that's causing 'B' not to be the output?
def caesarCipher(s, k):
# Write your code here
n = 0
s = list(s)
while n < len(s):
if s[n].isalpha() == True:
if (ord(s[n].lower())+k) < 123:
s[n] = (chr(ord(s[n])+k))
n += 1
else:
s[n] = chr((122-ord(s[n]) + 64 + k))
else:
n += 1
s = ''.join(s)
return s
You forgot to add 1 to n in the test of (ord(s[n].lower())+k) < 123 so that it would count s[n] twice or more.
Change it to
else:
s[n] = chr((122 - ord(s[n]) + 64 + k))
n += 1
and if you input "z" and 2, you'll get "B"
print(caesarCipher("z", 2))
# B
and if you adjust it down two, you'll get "#", which is the previous previous character of B in ASCII.
...
else:
s[n] = chr((122 - ord(s[n]) + 62 + k))
n += 1
...
print(caesarCipher("z", 2))
# #

Error when performing pattern search on a randomly generated characters:

So I am trying to implement the Knuth-Morris-Pratt algorithm in Python, below is my implementation:
def KMP(Pattern, Chars):
# compute the start position (number of characters)of the longest suffix that matches the prefix
# Then store prefix and the suffix into the list K, and then set the first element of K to be 0 and the second element to be 1
K = [] # K[n] store the value so that if the mismatch happens at n, it should move pattern Pattern K[n] characters ahead.
n = -1
K.append(n) #add the first element, and keep n = 0.
for k in range (1,len(Pattern) + 1):
# traverse all the elements in Pattern, calculate the corresponding value for each element.
while(n >=0 and Pattern[n] != Pattern[k - 1]): # if n = 1, if n >=1 and the current suffix does not match then try a shorter suffix
n = K[n]
n = n + 1 # if it matches, then the matching position should be one character ahead
K.append(n) #record the matching position for k
#match the string Chars with Pattern
m = 0
for i in range(0, len(Chars)): #traverse through the list one by one
while(m >= 0 and Pattern[m] != Chars[i]): # if they do not match then move Pattern forward with K[m] characters and restart the comparison
m = K[m]
m = m + 1 #if position m matches, then move forward with the next position
if m == len(Pattern): # if m is already the end of K (or Pattern), then a fully matched pattern is found. Continue the comparison by moving Pattern forward K[m] characters
print(i - m + 1, i)
m = K[m]
def main():
Pattern = "abcba"
letters = "abc"
Chars = print ( ''.join(random.choice(letters) for i in range(1000)) )
kmp(Pattern, Chars)
if __name__ == '__main__':
main()
When I try to run this code for a list of randomly generated letters which are abc I get the following error:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-25-c7bc734e5e35> in <module>
1 if __name__ == '__main__':
----> 2 main()
<ipython-input-24-2c3de20f253f> in main()
3 letters = "abc"
4 Chars = print ( ''.join(random.choice(letters) for i in range(1000)) )
----> 5 KMP(Pattern, Chars)
<ipython-input-21-edf1808c23d4> in KMP(Pattern, Chars)
14 #match the string Chars with Pattern
15 m = 0
---> 16 for i in range(0, len(Chars)): #traverse through the list one by one
17 while(m >= 0 and Pattern[m] != Chars[i]): # if they do not match then move Pattern forward with K[m] characters and restart the comparison
18 m = K[m]
TypeError: object of type 'NoneType' has no len()
I am not really sure what I am doing wrong, any help will be greatly appreciated
After I replaced
Chars = print ( ''.join(random.choice(letters) for i in range(1000)) )
by
Chars = ''.join(random.choice(letters) for i in range(1000))
it worked for me.

could someone instruct me to understand this code

def count_char(text, char):
count = 0
for c in text:
if c == char:
count += 1
return count
filename = input("Enter a filename: ")
with open(filename) as f:
text = f.read()
for char in "abcdefghijklmnopqrstuvwxyz":
perc = 100 * count_char(text, char) / len(text)
print("{0} - {1}%".format(char, round(perc, 2)))
It's a script that counts the relative occurrence of letters abcdefghijklmnopqrstuvwxyz in the given text file.
The first block defines a function that counts how many times the character char is present in the text:
def count_char(text, char):
count = 0
for c in text:
if c == char:
count += 1
return count
The second block asks you to input the name of the file:
filename = input("Enter a filename: ")
and saves the contents of that file as a string in the variable text:
with open(filename) as f:
text = f.read()
The third block displays the relative occurrence of characters a b c d e f g h i j k l m n o p q r s t u v w x y z in text.
For each of these characters, it first computes the proportion of the amount of the given characters in the text count_char(text, char) to the total length of the text len(text) and multiplies the result by 100 to convert it to percentage:
perc = 100 * count_char(text, char) / len(text)
and displays the results as a formatted string. The numbers in curly brackets are replaced by the character char and the percentage of its occurrence, rounded to two decimals round(perc, 2):
print("{0} - {1}%".format(char, round(perc, 2)))
You can read more about string formatting in Python here.

How can i sum co-prime numbers in a pair

I have this list
a = [1,2,3,4,5,6,7,8,9]
I want to find out that how many co-prime pair elements of the list add up to sum=9
Ex, (1+8) = 9 , (2+7) = 9 , (3+6)=9 , (4+5)=9, (5+4)=9 , (6+3)=9, (7+2)=9 , (8+1)=9
Note that i don't want (3+6) as they are prime numbers. And i also don't want (7+2)=9 as it has already occurred (means 2,7 has been already taken in account)
I tried this But it takes repeated values too.
a = [1,2,3,4,5,6,7,8,9]
count=0
for m in a:
for n in a:
total=m+n
if(total==9):
s=str(m) + '+'+ str(n) + "="
print(s , m+n)
count=count+1
print("Count =" ,count)
The result should have count=3
Your mistake is in the way of doing the loops, so you repeat values.
Try this:
#from math import gcd as bltin_gcd
a = [1,2,3,4,5,6,7,8,9]
count = 0
def __gcd(a, b):
# Everything divides 0
if (a == 0 or b == 0): return 0
# base case
if (a == b): return a
# a is greater
if (a > b):
return __gcd(a - b, b)
return __gcd(a, b - a)
# Only python 3
# def coprime(a, b):
# return bltin_gcd(a, b) == 1
for i in range(0,9):
for j in range(i+1,9):
if __gcd(a[i], a[j]) == 1 and a[i] + a[j] == 9:
count += 1
print str(a[i]) + ' ' + str(a[j])
print 'Count = ' + str(count)
In number theory, two integers a and b are said to be relatively prime, mutually prime, or coprime if the only positive integer that divides both of them is 1. Consequently, any prime number that divides one does not divide the other. This is equivalent to their greatest common divisor being 1.
for m in a:
for n in a:
You are not selecting pairs by using this loops, ie. you are picking the first element in both the outer and inner loop during your first iteration.
if(total==9):
You are not checking the condition if the selected pair of numbers are coprime. You are only verifying the sum.
A pythonic solution may be obtained with a one-liner:
from math import gcd
a = [1,2,3,4,5,6,7,8,9]
pairs = [(m,n) for m in a for n in a if n > m and m+n == 9 and gcd(m,n) == 1]
Result :
pairs --> [(1, 8), (2, 7), (4, 5)]
If you are sure to never, never need the pairs but only the number of pairs (as written in the OP), the most efficient solution may be:
count = len([1 for m in a for n in a if n > m and m+n == 9 and gcd(m,n) == 1])
EDIT : I inversed the three conditions in the if statement for improved benefit from lazy boolean evaluation
You can solve this if you have something that calculates your prime factorization in python:
from functools import lru_cache
# cached function results for pime factorization of identical nr
#lru_cache(maxsize=100)
def factors(nr):
# adapted from https://stackoverflow.com/a/43129243/7505395
i = 2
factors = []
while i <= nr:
if (nr % i) == 0:
factors.append(i)
nr = nr / i
else:
i = i + 1
return factors
start_at = 1
end_at = 9
total = 9
r = range(start_at, end_at+1)
# create the tuples we look for, smaller number first - set so no duplicates
tupls = set( (a,b) if a<b else (b,a) for a in r for b in r if a+b == total)
for n in tupls:
a,b = n
f_a = set(factors(a))
f_b = set(factors(b))
# if either set contains the same value, the f_a & f_b will be truthy
# so not coprime - hence skip it
if f_a & f_b:
continue
print(n)
Output:
(2, 7)
(1, 8)
(4, 5)

Counting substrings in string

Lets assume that i have 2 strings
M = "sses"
N = "assesses"
I have to count how many times string M is present into string N
I am not allowed to use any import or methods just loops and range() if needed.
M = "sses"
N = "assesses"
counter = 0
if M in N:
counter +=1
print(counter)
This isn't good enough i need loop to go trough N and count all M present
in this case it is 2.
def count(M, N):
i = 0
count = 0
while True:
try:
i = N.index(M, i)+1
count += 1
except ValueError:
break
return count
Or a one-liner without str.index:
def count(M, N):
return sum(N[i:i+len(M)]==M for i in range(len(N)-len(M)+1))
The same without using the sum function:
def count(M, N):
count = 0
for i in range(len(N)-len(M)+1):
if N[i:i+len(M)] == M:
count += 1
return count

Resources