Merge two string and produce the lexicographical smallest merged string - string

I was solving a problem when I encountered a rather simple sub problem. Given two string S1 and S2, merge(S1,S2) denotes any string that's obtained by interspersing the two strings S1 and S2, maintaining the order of characters in both such that the resultant string is lexicographically smallest.
Example
S1 = abad
S2 = bbde
merge(S1, S2) = ababbdde
Now, i tried to solve the problem by applying a greedy technique starting from the first element of both the string and then looking for the smallest element and adding it to the result. But, soon I found out that this doesn't always lead to the optimal solution. The code looked something like below.
int n = a.size(), m = b.size();
int i =0, j=0, k=0; char array[n+m];
for(; i< n && j<n;) {
if(a[i] < b[j]) {
array[k] = a[i];
++i;
}
else {
array[k] = b[j];
++j;
}
++k;
}
while(i<n) {
array[k] = a[i];
++i;
++k;
}
while(j<m) {
array[k] = b[j];
++j;
++k;
}
for (int i = 0; i < n+m; ++i) {
cout<<array[i];
}
cout<<endl;
I thought of traversing it backwards and choosing the largest character and started adding it from behind. With the limited testing I performed this looked good.
int n = a.size(), m = b.size();
int i =n-1, j=m-1, k=n+m-1; char array[n+m];
for(; i>=0 && j>=0;) {
if(a[i] > b[j]) {
array[k] = a[i];
--i;
}
else {
array[k] = b[j];
--j;
}
--k;
}
while(i>=0) {
array[k] = a[i];
--i;
--k;
}
while(j>=0) {
array[k] = b[j];
--j;
--k;
}
for (int i = 0; i < n + m; ++i) {
cout<<array[i];
}
cout<<endl;
But, I'm unsure if this will always give the optimal solution always.
Is this solution correct in the first place and if yes can someone give me a slight proof as to why this produces the optimal solution always too.

The greedy approach works, however,
if(a[i] < b[j]) {
array[k] = a[i];
++i;
}
else {
array[k] = b[j];
++j;
}
this part is incorrect because when a[i] == b[j] you can't simply assign b[j] to array[k].
Instead, you need to compare the substring a[i:] and b[j:] when a[i] == b[j], and you can just operate on the std::string itself:
if(s1[i] < s2[j])
{
array[k] = s1[i];
++i;
}
else if (s1[i] == s2[j] && s1.substr(i) < s2.substr(j))
{
array[k] = s1[i];
++i;
}
else
{
array[k] = s2[j];
++j;
}
The time complexity would be quadratic (O(n^2)) since substr operation takes O(n).

Here's full solution based on my comment earlier.
import string
import random
global brute_force_lowest
global almost_greedy_lowest
global brute_force_calls
global almost_greedy_calls
def brute_force(p, a, b):
global brute_force_lowest
global brute_force_calls
brute_force_calls += 1
if len(a) > 0: brute_force(p + a[0], a[1:], b)
if len(b) > 0: brute_force(p + b[0], a, b[1:])
if len(a) == 0 and len(b) == 0:
if p < brute_force_lowest: brute_force_lowest = p
def almost_greedy(p, a, b):
global almost_greedy_lowest
global almost_greedy_calls
almost_greedy_calls += 1
if len(a) == 0 and len(b) == 0:
if p < almost_greedy_lowest: almost_greedy_lowest = p
elif len(b) == 0:
almost_greedy(p + a, '', '')
elif len(a) == 0:
almost_greedy(p + b, '', '')
elif a[0] < b[0]:
almost_greedy(p + a[0], a[1:], b)
elif a[0] > b[0]:
almost_greedy(p + b[0], a, b[1:])
else:
almost_greedy(p + a[0], a[1:], b)
almost_greedy(p + b[0], a, b[1:])
for j in range(10000):
a = ''.join(random.choice(string.ascii_lowercase) for _ in range(random.randint(2, 10)))
b = ''.join(random.choice(string.ascii_lowercase) for _ in range(random.randint(2, 10)))
brute_force_lowest = a + b
brute_force_calls = 0
brute_force('', a, b)
almost_greedy_calls = 0
almost_greedy_lowest = a + b
almost_greedy('', a, b)
print('%s, %s -> %s vs. %s (%.3f)' % (a, b, brute_force_lowest, almost_greedy_lowest, float(almost_greedy_calls) / brute_force_calls))
if almost_greedy_lowest != brute_force_lowest: print 'ERROR'
One interesting statistic is that this algorithm works about ten times faster then brute force algorithm on average if we limit alphabet to 'ab'.
UPDATE Some optimizations:
def prefix_length(a):
for i in range(len(a)):
if a[i] != a[0]: return i
return len(a)
def almost_greedy(p, a, b):
global almost_greedy_lowest
global almost_greedy_calls
almost_greedy_calls += 1
if p > almost_greedy_lowest: return
if len(a) == 0 and len(b) == 0:
if p < almost_greedy_lowest: almost_greedy_lowest = p
elif len(b) == 0:
almost_greedy(p + a, '', '')
elif len(a) == 0:
almost_greedy(p + b, '', '')
elif a[0] < b[0]:
almost_greedy(p + a[0], a[1:], b)
elif a[0] > b[0]:
almost_greedy(p + b[0], a, b[1:])
else:
la = prefix_length(a)
almost_greedy(p + a[0] * la, a[la:], b)
lb = prefix_length(b)
almost_greedy(p + b[0] * lb, a, b[lb:])

Greedy will solve the problem. to solve this problem you will have to visit both string for sure.
In your code you are missing at one place i.e. your first for loop if(a[i] < b[j]) it should be if(a[i] <= b[j]).
check the code here

Greedy will not give the correct solution and forking will increase the time complexity of the algorithm as you would have to continually revisit the substring that was the same in both strings. Instead, use a dequeue to store the characters that are common, and compare the next char to be chosen from either the deque, or the first string, or the second string
Look at the solution below (replace ints with char from the string, and switch the comparatror signs)
def maxNumber(self, nums1, nums2):
"""
:type nums1: List[int]
:type nums2: List[int]
:type k: int
:rtype: List[int]
"""
d = deque()
arr = []
i, j = 0, 0
while i < len(nums1) and j < len(nums2):
if len(d) and d[0] > nums1[i] and d[0] > nums2[j]:
arr.append(d.popleft())
else:
if nums1[i] > nums2[j]:
arr.append(nums1[i])
i += 1
elif nums1[i] < nums2[j]:
arr.append(nums2[j])
j += 1
else:
arr.append(nums1[i])
d.append(nums2[j])
i += 1
j += 1
while i < len(nums1):
if len(d) and d[0] > nums1[i]:
arr.append(d.popleft())
else:
arr.append(nums1[i])
i += 1
while j < len(nums2):
if len(d) and d[0] > nums2[j]:
arr.append(d.popleft())
else:
arr.append(nums2[j])
j += 1
while len(d):
arr.append(d.popleft())
return arr

Related

Comparing the values of 2 lists without built-ins

I'm trying to make and anagram checker without any built-in functions. So far, I've managed this:
def isa1(s1, s2):
a = s1.lower()
b = s2.lower()
c = list(a)
d = list(b)
l = len(s1)
counter = 0
for i in range(l):
if c[i] == d[0]:
del d[0]
counter += 1
elif c[i] == d[1]:
del d[1]
counter += 1
elif c[i] == d[2]:
del d[2]
counter += 1
elif c[i] == d[3]:
del d[3]
counter += 1
elif c[i] == d[4]:
del d[4]
counter += 1
elif c[i] == d[5]:
del d[5]
counter += 1
else:
pass
if counter == len(s1):
return True
else:
return False
I'm happy with the start, bar the assignment naming, but I cant figure out how to iterate through my second string, s2, without the for-loop being ridiculous. Plus this code will only work for a string/list 6 characters long.
Sorry if this seems simply, I'm just starting Python and programming in general
Thanks!
if you are okay with using for in side of for you can do:
def isa1(s1, s2):
a = s1.lower()
b = s2.lower()
c = list(a)
d = list(b)
l = len(s1)
counter = 0
for i in range(l):
for j in range(len(d)):
if c[i] == d[j]:
del d[j]
counter += 1
break # to continue to the next letter
if counter == len(s1):
return True
else:
return False
this solution will check against each letter in the second list, and if it finds a match it will break the inner loop going to the next letter.

I have to write a code for pattern recognition and replacing the pattern?

I have this problem statement where I have a column of patterns that were if the first four bytes have date it should replace those four bytes to ccyy and the rest to N and zeros to Z's
eg. 20190045689 -> CCYYZZNNNNN
if space we need to consider the space as well.
66-7830956 -> NN-NNNZNNN
def patternGeneration(string):
x = re.findall("[\s0-9a-zA-Z]", string)
n = len(x)
j = 0
r = re.compile("\A[^(19|20)]")
y = list(filter(r.match, x))
b = len(y)
for i in range(0, b):
if y[i] == "0":
y[i] = 0
elif y[i] == " ":
y[i] = " "
else:
y[i] = "n"
print(convert(y))
for i in range(0, n):
if x[i] == "0":
x[i] = 0
j = j + 1
elif x[i] == " ":
x[i] = " "
j = j + 1
else:
x[i] = "n"
print(convert(x))
str1 = input("enter the string\t")
patternGeneration(str1)
#convert to new format
def convert(string):
# check for year
head = string[:4]
tail = string[4:]
if head.isnumeric():
if 1900 <= int(head) <= 2099:
head = "CCYY"
new_string = head + tail
return "".join(["Z" if x == "0" else "N" if x.isnumeric() else x for x in str(new_string)])
sample = "20196705540"
print(convert(sample))
#"CCYYNNZNNNZ"
sample = "66-7830956"
print(convert(sample))
#"NN-NNNZNNN"

Python: Given 2 binary strings s and t, print minimum number of adjacent swaps to convert s to t

For example if s = "1000000111" and t = "0111000001" the output should be 11. Below is my solution but it gives a time limit exceeded error so I am looking for a faster method. The length of string is less than 10^6.
T = int(input())
for _ in range(0,T):
n = int(input())
s = input()
source = []
for letter in s:
source.append(letter)
#source[0],source[1] = source[1],source[0]
#print(source)
t = input()
target = []
for letter in t:
target.append(letter)
if source.count("1") != target.count("1") or source.count("0") != target.count("0"):
print(-1)
continue
else:
ans = 0
for i in range(0,n):
if source[i] != target[i]:
#print("".join(source),"".join(target))
if source[i] == "0":
j = i
while source[j] != "1":
j += 1
ans += j-i
source[i],source[j] = source[j],source[i]
else:
#print(source)
j = i
while source[j] != "0":
#print(j,ans)
j+=1
ans += j-i
source[i],source[j] = source[j],source[i]
print(ans)
Here's the code. The idea is that you count the location of '1's and then calculate the difference between the pairs. Time complexity O(n), space complexity O(n), but can be done O(1) with a careful indexing.
def foo(str1, str2):
if len(str1) != len(str2):
return -1
n = len(str1)
arr1 = [i for i in range(n) if str1[i] == '1']
arr2 = [i for i in range(n) if str2[i] == '1']
if len(arr1) != len(arr2):
return -1
res = 0
for i in range(len(arr1)):
res += abs(arr1[i] - arr2[i])
return res

How to turn the duplicate part in my code to a checking function?

I come up a solution for leetcode "5. Longest Palindromic Substring" with parts of duplicate codes. One of good ways to solve duplicate code is to make a function. How do I write my check here to a function? I am confused what I should return to make both variables - longest and ans - being updated. Thanks!
The part of duplicate code:
if len(s[l:r+1]) > longest:
longest = len(s[l:r+1])
ans = s[l:r+1]
Full code:
class Solution:
def longestPalindrome(self, s: str) -> str:
if len(s) == 0:
return ''
if len(s) == 1:
return s
longest = 0
ans = ''
for pos in range(len(s)-1):
l, r = pos, pos
if pos > 0 and pos < len(s) - 1 and s[pos-1] == s[pos+1]:
l, r = pos-1, pos+1
while l > 0 and r < len(s) - 1 and s[l-1] == s[r+1]:
l -= 1
r += 1
# duplicate code 1
if len(s[l:r+1]) > longest:
longest = len(s[l:r+1])
ans = s[l:r+1]
if s[pos] == s[pos+1]:
l, r = pos, pos+1
while l > 0 and r < len(s) - 1 and s[l-1] == s[r+1]:
l -= 1
r += 1
# duplicate code 2
if len(s[l:r+1]) > longest:
longest = len(s[l:r+1])
ans = s[l:r+1]
if ans == '' and len(s) > 0:
return s[0]
return ans
The if statements and while loops before the duplicate code blocks are mostly duplicated as well, as is using the longest variable to keep track of the length of ans when you already have ans -- here's one way you could simplify things via another function:
class Solution:
def find_longest(self, s, left, right):
if s[left] == s[right]:
if right - left + 1 > len(self.ans):
self.ans = s[left:right + 1]
if left > 0 and right < len(s) - 1:
self.find_longest(s, left - 1, right + 1)
def longestPalindrome(self, s: str) -> str:
if len(s) == 1:
return s
self.ans = ''
for pos in range(len(s) - 1):
self.find_longest(s, pos, pos)
self.find_longest(s, pos, pos + 1)
return self.ans

Puzzler solver program: How many different solutions are there to (1/a)+(1/b)+(1/c)+(1/d)+(1/e)+(1/f)+(1/g) = 1?

I wrote the python code below that solves and prints each possible solution for anything under 6 unit fractions, but given how I programmed it, it takes infinitely long to check for 7 fractions. Any ideas on how to modify the code to find all the possible solutions more efficienty?
import sys
from fractions import Fraction
import os
#myfile = open('7fractions.txt', 'w')
max = 7 #>2 #THIS VARIABLE DECIDES HOW MANY FRACTIONS ARE ALLOWED
A = [0] * max
A[0] = 1
def printList(A):
return str(A).strip('[]')
def sumList(A):
sum = 0
for i in A:
if i != 0:
sum += Fraction(1, i)
return sum
def sumTest(A):
sum = 0
v = 0
for i in range(0, len(A)):
if A[i] == 0 and v == 0:
v = Fraction(1,A[i-1])
if v != 0:
sum += v
else:
sum += Fraction(1, A[i])
return sum
def solve(n, A):
if n == max - 2:
while (sumTest(A) > 1):
print(A)
if sumList(A) < 1:
e = 1 - sumList(A)
if e.numerator == 1 and e.denominator>A[n-1]:
A[n+1] = e.denominator
#myfile.write(printList(A) + '\n')
print(A)
A[n+1] = 0
A[n] += 1
else:
while (sumTest(A) > 1):
if sumList(A) < 1:
A[n+1] = A[n] + 1
solve(n+1, A)
A[n+1] = 0
A[n] += 1
#execute
solve(0, A)

Resources