string split into all possible combination - string

for a given string "ABC", i wish to get all the possible character combination out of it ascendingly and without skip of character, the result should be:["A","B","C"],["AB","C"],["ABC"],["A","BC"]
Any idea how can i achieve this? I was thinking using a nested for loop to get all the component:
string input="ABCD";
List<string> component=new List<string>();
for(int i=0;i<=input.Length;i++){
for(int j=1;j<=(input.Length-i);j++){
component.Add(input.Substring(i,j));
}
}
But i have no idea how to put them into group as the above result. Any advice is appreciated.

You can go about this in several ways.
One way is recursion. Keep a current list of substrings and an overall results list. At the top level, iterate over all the possible gaps. Split the string into a substring and the rest. This should include the "gap" at the end, where you split the string into itself and the empty string as rest. Add the (non-empty) substring to the current list and recurse on the rest of the string. When the rest of the string is empty, add the current list to the overall results list. This will give you all 2ⁿ possibilities for a string with n + 1 letters.
Pseudocode:
// recursive function
function splits_r(str, current, res)
{
if (str.length == 0) {
res += [current]
} else {
for (i = 0; i < str.length; i++) {
splits_r(str.substr(i + 1, end),
current + [str.substr(0, i + 1)], res)
}
}
}
// wrapper to get the recursion going
function splits(str)
{
res = [];
splits_r(str, [], res);
return res;
}
Another way is enumeration of all possibilities. There are 2ⁿ possibilities for a string with n + 1 letters. You can consider one individual posibility as a combination of splits and non-splits. For example:
enum splits result
0 0 0 A B C D "ABCD"
0 0 1 A B C | D "ABC", "D"
0 1 0 A B | C D "AB", "CD"
0 1 1 A B | C | D "AB", "C", "D"
1 0 0 A | B C D "A", "BCD"
1 0 1 A | B C | D "A", "BC", "D"
1 1 0 A | B | C D "A", "B", "CD"
1 1 1 A | B | C | D "A", "B", "C", "D"
The enumeration uses 0 for no split and 1 for a split. It can be seen as a binary number. If you are familiar with bitwise operations, you can now enumerate all values from 0 to 2ⁿ and find out where the splits are.
Pseudocode:
function splits(str)
{
let m = str.length - 1; // possible gap positions
let n = (1 << m); // == pow(2, m)
let res = []
for (i = 0; i < n; i++) {
let last = 0
let current = []
for (j = 0; j < m; j++) { // loop over all gaps
if (i & (1 << j)) { // test for split
current.append(str.substr(last, j + 1));
last = j + 1;
}
}
current.append(s[last:])
res.append(current);
return res;
}

Related

Maximum element in array which is equal to product of two elements in array

We need to find the maximum element in an array which is also equal to product of two elements in the same array. For example [2,3,6,8] , here 6=2*3 so answer is 6.
My approach was to sort the array and followed by a two pointer method which checked whether the product exist for each element. This is o(nlog(n)) + O(n^2) = O(n^2) approach. Is there a faster way to this ?
There is a slight better solution with O(n * sqrt(n)) if you are allowed to use O(M) memory M = max number in A[i]
Use an array of size M to mark every number while you traverse them from smaller to bigger number.
For each number try all its factors and see if those were already present in the array map.
Here is a pseudo code for that:
#define M 1000000
int array_map[M+2];
int ans = -1;
sort(A,A+n);
for(i=0;i<n;i++) {
for(j=1;j<=sqrt(A[i]);j++) {
int num1 = j;
if(A[i]%num1==0) {
int num2 = A[i]/num1;
if(array_map[num1] && array_map[num2]) {
if(num1==num2) {
if(array_map[num1]>=2) ans = A[i];
} else {
ans = A[i];
}
}
}
}
array_map[A[i]]++;
}
There is an ever better approach if you know how to find all possible factors in log(M) this just becomes O(n*logM). You have to use sieve and backtracking for that
#JerryGoyal 's solution is correct. However, I think it can be optimized even further if instead of using B pointer, we use binary search to find the other factor of product if arr[c] is divisible by arr[a]. Here's the modification for his code:
for(c=n-1;(c>1)&& (max==-1);c--){ // loop through C
for(a=0;(a<c-1)&&(max==-1);a++){ // loop through A
if(arr[c]%arr[a]==0) // If arr[c] is divisible by arr[a]
{
if(binary_search(a+1, c-1, (arr[c]/arr[a]))) //#include<algorithm>
{
max = arr[c]; // if the other factor x of arr[c] is also in the array such that arr[c] = arr[a] * x
break;
}
}
}
}
I would have commented this on his solution, unfortunately I lack the reputation to do so.
Try this.
Written in c++
#include <vector>
#include <algorithm>
using namespace std;
int MaxElement(vector< int > Input)
{
sort(Input.begin(), Input.end());
int LargestElementOfInput = 0;
int i = 0;
while (i < Input.size() - 1)
{
if (LargestElementOfInput == Input[Input.size() - (i + 1)])
{
i++;
continue;
}
else
{
if (Input[i] != 0)
{
LargestElementOfInput = Input[Input.size() - (i + 1)];
int AllowedValue = LargestElementOfInput / Input[i];
int j = 0;
while (j < Input.size())
{
if (Input[j] > AllowedValue)
break;
else if (j == i)
{
j++;
continue;
}
else
{
int Product = Input[i] * Input[j++];
if (Product == LargestElementOfInput)
return Product;
}
}
}
i++;
}
}
return -1;
}
Once you have sorted the array, then you can use it to your advantage as below.
One improvement I can see - since you want to find the max element that meets the criteria,
Start from the right most element of the array. (8)
Divide that with the first element of the array. (8/2 = 4).
Now continue with the double pointer approach, till the element at second pointer is less than the value from the step 2 above or the match is found. (i.e., till second pointer value is < 4 or match is found).
If the match is found, then you got the max element.
Else, continue the loop with next highest element from the array. (6).
Efficient solution:
2 3 8 6
Sort the array
keep 3 pointers C, B and A.
Keeping C at the last and A at 0 index and B at 1st index.
traverse the array using pointers A and B till C and check if A*B=C exists or not.
If it exists then C is your answer.
Else, Move C a position back and traverse again keeping A at 0 and B at 1st index.
Keep repeating this till you get the sum or C reaches at 1st index.
Here's the complete solution:
int arr[] = new int[]{2, 3, 8, 6};
Arrays.sort(arr);
int n=arr.length;
int a,b,c,prod,max=-1;
for(c=n-1;(c>1)&& (max==-1);c--){ // loop through C
for(a=0;(a<c-1)&&(max==-1);a++){ // loop through A
for(b=a+1;b<c;b++){ // loop through B
prod=arr[a]*arr[b];
if(prod==arr[c]){
System.out.println("A: "+arr[a]+" B: "+arr[b]);
max=arr[c];
break;
}
if(prod>arr[c]){ // no need to go further
break;
}
}
}
}
System.out.println(max);
I came up with below solution where i am using one array list, and following one formula:
divisor(a or b) X quotient(b or a) = dividend(c)
Sort the array.
Put array into Collection Col.(ex. which has faster lookup, and maintains insertion order)
Have 2 pointer a,c.
keep c at last, and a at 0.
try to follow (divisor(a or b) X quotient(b or a) = dividend(c)).
Check if a is divisor of c, if yes then check for b in col.(a
If a is divisor and list has b, then c is the answer.
else increase a by 1, follow step 5, 6 till c-1.
if max not found then decrease c index, and follow the steps 4 and 5.
Check this C# solution:
-Loop through each element,
-loop and multiply each element with other elements,
-verify if the product exists in the array and is the max
private static int GetGreatest(int[] input)
{
int max = 0;
int p = 0; //product of pairs
//loop through the input array
for (int i = 0; i < input.Length; i++)
{
for (int j = i + 1; j < input.Length; j++)
{
p = input[i] * input[j];
if (p > max && Array.IndexOf(input, p) != -1)
{
max = p;
}
}
}
return max;
}
Time complexity O(n^2)

Modified longest common substring

Given two strings what is an efficient algorithm to find the number and length of longest common sub-strings with the sub-strings being called common if :
1) they have at-least x% characters same and at same position.
2) the start and end indexes of the sub-strings being same.
Ex :
String 1 -> abedefkhj
String 2 -> kbfdfjhlo
suppose the x% being asked is 40,then, ans is,
5 1
where 5 is the longest length and 1 is the number of sub-strings in each string satisfying the given property. Sub-String is "abede" in string 1 and "kbfdf" in string 2.
You can use smth like Levenshtein distance without deleting and inserting.
Build the table, where every element [i, j] is error for substring from position [i] to position [j].
foo(string a, string b, int x):
len = min(a.length, b.length)
error[0][0] = 0 if a[0] == b[0] else 1;
for (end: [1 -> len-1]):
for (start: [end -> 0]):
if a[end] == b[end]:
error[start][end] = error[start][end - 1]
else:
error[start][end] = error[start][end - 1] + 1
best_len = 0;
best_pos = 0;
for (i: [0 -> len-1]):
for (j: [i -> 0]):
len = i - j + 1
error_percent = 100 * error[i][j] / len
if (error_percent <= x and len > best_len):
best_len = len
best_pos = j
return (best_len, best_pos)

Generate all compositions of an integer into k parts

I can't figure out how to generate all compositions (http://en.wikipedia.org/wiki/Composition_%28number_theory%29) of an integer N into K parts, but only doing it one at a time. That is, I need a function that given the previous composition generated, returns the next one in the sequence. The reason is that memory is limited for my application. This would be much easier if I could use Python and its generator functionality, but I'm stuck with C++.
This is similar to Next Composition of n into k parts - does anyone have a working algorithm?
Any assistance would be greatly appreciated.
Preliminary remarks
First start from the observation that [1,1,...,1,n-k+1] is the first composition (in lexicographic order) of n over k parts, and [n-k+1,1,1,...,1] is the last one.
Now consider an exemple: the composition [2,4,3,1,1], here n = 11 and k=5. Which is the next one in lexicographic order? Obviously the rightmost part to be incremented is 4, because [3,1,1] is the last composition of 5 over 3 parts.
4 is at the left of 3, the rightmost part different from 1.
So turn 4 into 5, and replace [3,1,1] by [1,1,2], the first composition of the remainder (3+1+1)-1 , giving [2,5,1,1,2]
Generation program (in C)
The following C program shows how to compute such compositions on demand in lexicographic order
#include <stdio.h>
#include <stdbool.h>
bool get_first_composition(int n, int k, int composition[k])
{
if (n < k) {
return false;
}
for (int i = 0; i < k - 1; i++) {
composition[i] = 1;
}
composition[k - 1] = n - k + 1;
return true;
}
bool get_next_composition(int n, int k, int composition[k])
{
if (composition[0] == n - k + 1) {
return false;
}
// there'a an i with composition[i] > 1, and it is not 0.
// find the last one
int last = k - 1;
while (composition[last] == 1) {
last--;
}
// turn a b ... y z 1 1 ... 1
// ^ last
// into a b ... (y+1) 1 1 1 ... (z-1)
// be careful, there may be no 1's at the end
int z = composition[last];
composition[last - 1] += 1;
composition[last] = 1;
composition[k - 1] = z - 1;
return true;
}
void display_composition(int k, int composition[k])
{
char *separator = "[";
for (int i = 0; i < k; i++) {
printf("%s%d", separator, composition[i]);
separator = ",";
}
printf("]\n");
}
void display_all_compositions(int n, int k)
{
int composition[k]; // VLA. Please don't use silly values for k
for (bool exists = get_first_composition(n, k, composition);
exists;
exists = get_next_composition(n, k, composition)) {
display_composition(k, composition);
}
}
int main()
{
display_all_compositions(5, 3);
}
Results
[1,1,3]
[1,2,2]
[1,3,1]
[2,1,2]
[2,2,1]
[3,1,1]
Weak compositions
A similar algorithm works for weak compositions (where 0 is allowed).
bool get_first_weak_composition(int n, int k, int composition[k])
{
if (n < k) {
return false;
}
for (int i = 0; i < k - 1; i++) {
composition[i] = 0;
}
composition[k - 1] = n;
return true;
}
bool get_next_weak_composition(int n, int k, int composition[k])
{
if (composition[0] == n) {
return false;
}
// there'a an i with composition[i] > 0, and it is not 0.
// find the last one
int last = k - 1;
while (composition[last] == 0) {
last--;
}
// turn a b ... y z 0 0 ... 0
// ^ last
// into a b ... (y+1) 0 0 0 ... (z-1)
// be careful, there may be no 0's at the end
int z = composition[last];
composition[last - 1] += 1;
composition[last] = 0;
composition[k - 1] = z - 1;
return true;
}
Results for n=5 k=3
[0,0,5]
[0,1,4]
[0,2,3]
[0,3,2]
[0,4,1]
[0,5,0]
[1,0,4]
[1,1,3]
[1,2,2]
[1,3,1]
[1,4,0]
[2,0,3]
[2,1,2]
[2,2,1]
[2,3,0]
[3,0,2]
[3,1,1]
[3,2,0]
[4,0,1]
[4,1,0]
[5,0,0]
Similar algorithms can be written for compositions of n into k parts greater than some fixed value.
You could try something like this:
start with the array [1,1,...,1,N-k+1] of (K-1) ones and 1 entry with the remainder. The next composition can be created by incrementing the (K-1)th element and decreasing the last element. Do this trick as long as the last element is bigger than the second to last.
When the last element becomes smaller, increment the (K-2)th element, set the (K-1)th element to the same value and set the last element to the remainder again. Repeat the process and apply the same principle for the other elements when necessary.
You end up with a constantly sorted array that avoids duplicate compositions

KMP prefix table

I am reading about KMP for string matching.
It needs a preprocessing of the pattern by building a prefix table.
For example for the string ababaca the prefix table is: P = [0, 0, 1, 2, 3, 0, 1]
But I am not clear on what does the numbers show. I read that it helps to find matches of the pattern when it shifts but I can not connect this info with the numbers in the table.
Every number belongs to corresponding prefix ("a", "ab", "aba", ...) and for each prefix it represents length of longest suffix of this string that matches prefix. We do not count whole string as suffix or prefix here, it is called self-suffix and self-prefix (at least in Russian, not sure about English terms).
So we have string "ababaca". Let's look at it. KMP computes Prefix Function for every non-empty prefix. Let's define s[i] as the string, p[i] as the Prefix function. prefix and suffix may overlap.
+---+----------+-------+------------------------+
| i | s[0:i] | p[i] | Matching Prefix/Suffix |
+---+----------+-------+------------------------+
| 0 | a | 0 | |
| 1 | ab | 0 | |
| 2 | aba | 1 | a |
| 3 | abab | 2 | ab |
| 4 | ababa | 3 | aba |
| 5 | ababac | 0 | |
| 6 | ababaca | 1 | a |
| | | | |
+---+----------+-------+------------------------+
Simple C++ code that computes Prefix function of string S:
vector<int> prefixFunction(string s) {
vector<int> p(s.size());
int j = 0;
for (int i = 1; i < (int)s.size(); i++) {
while (j > 0 && s[j] != s[i])
j = p[j-1];
if (s[j] == s[i])
j++;
p[i] = j;
}
return p;
}
This code may not be the shortest, but easy to understand flow of code.
Simple Java Code for calculating prefix-Array-
String pattern = "ababaca";
int i = 1, j = 0;
int[] prefixArray = new int[pattern.length];
while (i < pattern.length) {
while (pattern.charAt(i) != pattern.charAt(j) && j > 0) {
j = prefixArray[j - 1];
}
if (pattern.charAt(i) == pattern.charAt(j)) {
prefixArray[i] = j + 1;
i++;
j++;
} else {
prefixArray[i] = j;
i++;
}
}
for (int k = 0; k < prefixArray.length; ++k) {
System.out.println(prefixArray[k]);
}
It produces the required output-
0
0
1
2
3
0
1
Python Implementation
p='ababaca'
l1 = len(p)
j = 0
i = 1
prefix = [0]
while len(prefix) < l1:
if p[j] == p[i]:
prefix.append(j+1)
i += 1
j += 1
else:
if j == 0:
prefix.append(0)
i += 1
if j != 0:
j = prefix[j-1]
print prefix
I have tried my hands using the Javascript, Open for suggestions.
const prefixArray = function (p) {
let aux = Array(p.length).fill(0);
// For index 0 the matched index will always be 0, so we will we start from 1
let i = 1;
let m = 0; // mismatched index will be from 0th
// run the loop on pattern length
while ( i < p.length) {
// 3 Cases here
// First when we have a match of prefix and suffix of pattern
if(p.charAt(i) === p.charAt(m)) {
// increment m
m++;
// update aux index
aux[i] = m;
// update the index.
i++;
}
// Now if there is no match and m !=0 means some match happened previously
// then we need to move back M to that index
else if(p.charAt(i) !== p.charAt(m) && m !== 0) {
m = aux[m-1];
// we dont want to increment I as we want to start comparing this suffix with previous matched
} else {
// if none of the above conditions then
// just update the current index in aux array to 0
aux[i] = 0; // no match
i++; // shift to the next char
}
}
return aux;
}
No offset version
This is based on the idea of what I call todo indexing:
int confix[1000000];
void build_confix(char *pattern) {
// build len %
int len_pat = strlen(pattern);
// i, j using todo-indexing.
int j, i;
confix[j = 1] = i = 0;
while (j < strlen(pattern)) {
whlie (i && pattern[j] != pattern[i])
// length -> length mapping, no offset
i = confix[i];
confix[++j] = pattern[j] == pattern[i]?
++i:
0;
}
}
Then you can use this confix[] table to find needles in the middle(test)
int kmp_find_first(char *test, char *needle) {
int j = 0, i = 0;
while (j < strlen(test)) {
while (i && test[j] != needle[i])
i = confix[i];
++j; test[j] == needle[i]?
++i:
0;
if (i == strlen(needle))
return j - strlen(needle);
}
return -1;
}

String comparison number of characters in same order

I have
Str A = "abcdef"
Str B = "abcdf"
I need a function(stA, stB) that returns 5 (ie. the number of characters matched), note that these characters need to be in same order.
For example:
Str A = "abcdef"
Str B = "fedcba",
function(stA, stB) would only return 1 for the 'a'
Pseudo code is good...
Oh btw given that all my strings will have <= 40 characters, O(n^2) might even be better than a O(41n) algorithm..
I haven't fully tested it, but this seems to be working (in JavaScript) for your test cases:
function compare(a, b)
{
var aChars = a.split('');
var bChars = b.split('');
var matches = [];
var bStart = 0;
for (var i=0; i < aChars.length; i++)
{
for (var j=bStart; j < bChars.length; j++)
{
if(aChars[i] == bChars[j])
{
matches.push(aChars[i]);
bStart = j;
break;
}
}
}
return matches.length;
}
compare('abcdef', 'abcdf'); // returns 5
compare('abcdef', 'fedcba'); // returns 1
Basically I'm starting at position 0 for string A and position 0 for string B. When a match is found, I change the start position for searching string B so that it skips the previous section.

Resources