Why is the rcpp function slow? - rcpp

I have made a simple function that sets a column reversal_indicator to "yes" if a value in the Reversal_Accounting_Transaction_ID is present anywhere in the column Accounting_Transaction_ID (i.e in other rows).Most entries in the Reversal_Accounting_Transaction_ID column are likely to be blank and therefore should be "no".
The dataframe is created from a 6gb csv file (assume approx 6 million rows) and is being processed on databricks.
I am not really sure why its taking such a long time
Rcpp::cppFunction('
std::vector<std::string>
reversals(DataFrame frame)
{
std::vector<std::string> Accounting_Transaction_ID = as<std::vector<std::string> >(frame["BELNR"]);
std::vector<std::string> Reversal_Accounting_Transaction_ID = as<std::vector<std::string> >(frame["STBLG"]);
std::vector<std::string> ReversalIndicator(Reversal_Accounting_Transaction_ID.size()) ;
if (Reversal_Accounting_Transaction_ID.size() == 0) {
return ReversalIndicator;
}
int dfSize = Reversal_Accounting_Transaction_ID.size();
for (int i = 0; i < dfSize; ++i) {
if (Reversal_Accounting_Transaction_ID[i] != "") {
for (int j = 1; j < dfSize; ++j) {
if(Accounting_Transaction_ID[j]== Reversal_Accounting_Transaction_ID[i]){
ReversalIndicator[i]="Yes";
break;
}
else if( (j== dfSize -1) ){
ReversalIndicator[i]="No";
}
}
}
else{
ReversalIndicator[i]="No";
}
}
return ReversalIndicator;
}
')```
```df$reversal=reversals(df)```

You are traversing your data frame for every row of your data frame, i.e. you have something like 6m x 6m operations (O(N^2)). That can take quite a while. However, you can go from O(N^2) to O(N) at the expense of some memory. Without any sample data I cannot test this, so I am providing some pseudo code only:
create empty set data structure
for each row in df:
Add Reversal_Accounting_Transaction_ID to set
for each row in df:
if Accounting_Transaction_ID can be found in set
ReversalIndicator = "Yes"

Based on Ralphs answer
Not sure if I need to allocate the set a size at the start though?
Rcpp::cppFunction('
std::vector<std::string> reversals(DataFrame frame)
{
std::vector<std::string> Accounting_Transaction_ID = as<std::vector<std::string> >(frame["BELNR"]);
std::vector<std::string> Reversal_Accounting_Transaction_ID = as<std::vector<std::string> >(frame["STBLG"]);
std::vector<std::string> ReversalIndicator(Reversal_Accounting_Transaction_ID.size()) ;
std::set<std::string> uniqueTransID;
if (Reversal_Accounting_Transaction_ID.size() == 0) {
return ReversalIndicator;
}
int dfSize = Reversal_Accounting_Transaction_ID.size();
for (int i = 0; i < dfSize; ++i) {
uniqueTransID.insert(Accounting_Transaction_ID[i]);
}
for (int i = 0; i < dfSize; ++i) {
if (Reversal_Accounting_Transaction_ID[i] !=""){
ReversalIndicator[i]="No";
continue;
}
if (uniqueTransID.find(Reversal_Accounting_Transaction_ID[i]) != uniqueTransID.end()) {
ReversalIndicator[i]="Yes";
}
else{
ReversalIndicator[i]="No";
}
}
return ReversalIndicator;
}
')```

Related

Interleaving Strings LCS

Hi I was trying to solve the interleaving strings problem.Here is the detailed explanation of the problem. https://practice.geeksforgeeks.org/problems/interleaved-strings/1
I was trying using lcs but it was not passing leetcode cases. Here is my Code:-
(I am taking lcs from start and end)
class Solution {
public boolean isInterLeave(String a, String b, String c) {
StringBuffer s=new StringBuffer();
StringBuffer s1=new StringBuffer();
StringBuffer s2=new StringBuffer();
StringBuffer s4=new StringBuffer();
int m=a.length();
int n=c.length();
int q=b.length();
if(n!=m+q){
return false;
}
LinkedHashSet<Integer> res2= new LinkedHashSet<Integer>();
res2= lcs(a,c,m,n);
LinkedHashSet<Integer> res4= new LinkedHashSet<Integer>();
res4= lcs(b,c,q,n);
for(int i=0;i<n;i++){
if(res2.contains(i)==false){
s.append(c.charAt(i));
}
}
for(int i=0;i<n;i++){
if(res4.contains(i)==false){
s1.append(c.charAt(i));
}
}
LinkedHashSet<Integer> res5= new LinkedHashSet<Integer>();
res5= LCS(a,c,m,n);
for(int i=0;i<n;i++){
if(res5.contains(i)==false){
s2.append(c.charAt(i));
}
} LinkedHashSet<Integer> res6= new LinkedHashSet<Integer>();
res6= LCS(b,c,q,n);
for(int i=0;i<n;i++){
if(res6.contains(i)==false){
s4.append(c.charAt(i));
}
}
String z=s.toString();
String u=s1.toString();
String v=s2.toString();
String w=s4.toString();
if( (b.equals(z)==true || a.equals(u)==true) || ( b.equals(v)==true || a.equals(w)==true)){
return true;
}
else{
return false;
}
}
public static LinkedHashSet<Integer> lcs(String X, String Y, int m, int n)
{
int[][] L = new int[m+1][n+1];
// Following steps build L[m+1][n+1] in bottom up fashion. Note
// that L[i][j] contains length of LCS of X[0..i-1] and Y[0..j-1]
for (int i=0; i<=m; i++)
{
for (int j=0; j<=n; j++)
{
if (i == 0 || j == 0)
L[i][j] = 0;
else if (X.charAt(i-1) == Y.charAt(j-1))
L[i][j] = L[i-1][j-1] + 1;
else
L[i][j] = Math.max(L[i-1][j], L[i][j-1]);
}
}
// Following code is used to print LCS
// Create a character array to store the lcs string
LinkedHashSet<Integer> linkedset =
new LinkedHashSet<Integer>();
// Start from the right-most-bottom-most corner and
// one by one store characters in lcs[]
int i=1;
int j=1;
while (i <= m && j <= n)
{
// If current character in X[] and Y are same, then
// current character is part of LCS
if (X.charAt(i-1) == Y.charAt(j-1))
{
// Put current character in result
linkedset.add(j-1);
// reduce values of i, j and index
i++;
j++;
}
// If not same, then find the larger of two and
// go in the direction of larger value
else if (L[i-1][j] > L[i][j-1])
i++;
else
j++;
}
return linkedset;
}
public static LinkedHashSet<Integer> LCS(String X, String Y, int m, int n)
{
int[][] L = new int[m+1][n+1];
// Following steps build L[m+1][n+1] in bottom up fashion. Note
// that L[i][j] contains length of LCS of X[0..i-1] and Y[0..j-1]
for (int i=0; i<=m; i++)
{
for (int j=0; j<=n; j++)
{
if (i == 0 || j == 0)
L[i][j] = 0;
else if (X.charAt(i-1) == Y.charAt(j-1))
L[i][j] = L[i-1][j-1] + 1;
else
L[i][j] = Math.max(L[i-1][j], L[i][j-1]);
}
}
// Following code is used to print LCS
// Create a character array to store the lcs string
LinkedHashSet<Integer> linkedset =
new LinkedHashSet<Integer>();
// Start from the right-most-bottom-most corner and
// one by one store characters in lcs[]
int i = m;
int j = n;
while (i > 0 && j > 0)
{
// If current character in X[] and Y are same, then
// current character is part of LCS
if (X.charAt(i-1) == Y.charAt(j-1))
{
// Put current character in result
linkedset.add(j-1);
// reduce values of i, j and index
i--;
j--;
}
// If not same, then find the larger of two and
// go in the direction of larger value
else if (L[i-1][j] > L[i][j-1])
i--;
else
j--;
}
return linkedset;
}
}
Can anyone suggest an LCS approach to this problem?.My code is not passing the following test case
"cacabcbaccbbcbb" -String A
"acaaccaacbbbabbacc"-String B
"accacaabcbacaccacacbbbbcbabbbbacc"-String C
This will be the LCS+DP approach. Try it out:
class Solution {
public boolean isInterleave(String s1, String s2, String s3) {
int m = s1.length(), n = s2.length();
if (n + m != s3.length()) return false;
if (s3.length() == 0) return true;
boolean[][] dp = new boolean[m+1][n+1];
dp[0][0] = true;
for (int i = 0; i <= m; i++) {
if (s1.substring(0, i).equals(s3.substring(0, i)))
dp[i][0] = true;
else
dp[i][0] = false;
}
for (int j = 0; j <= n; j++) {
if (s2.substring(0, j).equals(s3.substring(0, j)))
dp[0][j] = true;
else
dp[0][j] = false;
}
for (int i = 1; i <= m; i++) {
for (int j = 1; j <= n; j++) {
dp[i][j] = (dp[i-1][j] && s1.charAt(i-1) == s3.charAt(i+j-1))
|| (dp[i][j-1] && s2.charAt(j-1) == s3.charAt(i+j-1));
}
}
return dp[m][n];
}
}

Find the largest string such that 'a', 'b', 'c' are not continuos

Q3:- We are given the maximum occurances of ‘a’, ‘b’ and ‘c’ in a string. We need to make the largest length string containing only ‘a’, ‘b’ and ‘c’ such that no three consecutive characters are same.
Ex:-
Input:- 3 3 3
Output:- abcabcabc
(There can be a lot of different outputs)
Input:- 5 5 3
Output:- aabbcaabbcabc
You could use this algorithm:
As a preprocessing step, associate each letter (a, b and c) with its corresponding maximum frequency, so you can sort these character-frequency pairs as you wish.
Start with an empty string and perform the following actions in a loop:
Sort the three letter-frequency pairs by decreasing frequency
Pick the first pair from the sorted list and check its frequency. If it is zero, return the string
If that selected character would violate the rule that the same character cannot repeat 3 times in a row, then pick the second pair from the sorted list instead and check its frequency. If it is zero, return the string
Add the selected character to the string, and decrease its frequency.
Repeat.
Here is an interactive implementation in JavaScript:
function largestSequence(freq) {
// Create a data structure the links a frequency with a letter (a, b or c)
let chars = [];
for (let i = 0; i < 3; i++) {
chars[i] = {
freq: freq[i],
chr: "abc"[i]
};
}
let s = "";
while (true) {
// Sort the three characters by decreasing frequency
chars.sort((a, b) => b.freq - a.freq);
let choice = chars[0]; // Choose the one with the highest frequency
if (choice.freq === 0) break; // If no more character is avaiable, exit
if (choice.chr + choice.chr === s.slice(-2)) {
// If this character would violate the rule, choose the
// second one from the sorted list:
choice = chars[1];
if (choice.freq === 0) break; // If that character is not available, exit
}
choice.freq--; // Use this character
s += choice.chr;
}
return s;
}
// I/O handling
let input = document.querySelector("input");
let output = document.querySelector("span");
input.oninput = function() {
let freq = (input.value.match(/\d+/g) || []).map(Number);
if (freq.length !== 3) {
output.textContent = "(Please enter three integers)";
} else {
output.textContent = largestSequence(freq);
}
};
input.oninput();
Frequencies for a, b, and c: <input value="3 3 3"><br>
Longest string: <span></span>
Working Simple Solution with PQ
class Node {
int count;
char c;
public Node(int count, char c) {
this.count = count;
this.c = c;
}
public void dec() {
this.count--;
}
}
public String solution(int A, int B, int C) {
PriorityQueue<Node> queue = new PriorityQueue<>((a, b) -> b.count - a.count);
if (A>0)
queue.add(new Node(A, 'a'));
if (B>0)
queue.add(new Node(B, 'b'));
if (C>0)
queue.add(new Node(C, 'c'));
StringBuilder ans = new StringBuilder();
while (!queue.isEmpty()) {
Node max1 = queue.poll();
ans.append(max1.c); // a
max1.dec();
if (queue.size()>0){
Node max2 = queue.poll();
if (max1.count > max2.count) {
ans.append(max1.c); //aa
max1.dec();
ans.append(max2.c); //aab
max2.dec();
}
if (max2.count>0){
queue.add(max2); // add back if count not zero
}
} else {
if (max1.count>0){
ans.append(max1.c); //aa and return as queue size is 1
max1.dec();
return ans.toString();
}
}
if (max1.count>0){
queue.add(max1); // add back if count not zero
}
}
return ans.toString();
}
// C++ implementation
#include <bits/stdc++.h>
using namespace std;
bool compare(pair<int, char> a, pair<int, char> b) {
// Sorting according to decreasing frequencies
return a.first > b.first;
}
int main() {
int t;
cin>>t;
while(t--) {
// Making vector of pairs and mapping them with their characters v[0] is for "a", v[1] is for "b", v[2] is for "c"
vector<pair<int, char>> v(3);
cin>>v[0].first>>v[1].first>>v[2].first;
v[0].second = 'a';
v[1].second = 'b';
v[2].second = 'c';
string s;
int total = v[0].first + v[1].first + v[2].first;
while(1) {
string prev = s;
// sorting to bring the highest frequency element to front
sort(v.begin(), v.end(), compare);
if(v[0].first == 0)
break;
// when length of the string < 2
if(s.length() < 2) {
s += v[0].second;
v[0].first -= 1;
continue;
}
// when length of the string > 2
int len = s.length();
for(int i = 0; i < 3; ++i) {
if(v[i].first > 0 and (s[len - 1] != v[i].second or s[len - 2] != v[i].second)) {
s += v[i].second;
v[i].first -= 1;
break;
}
}
// check if the state has not changed from the previous state even after checking for "a", "b" and "c"
if(s.length() == prev.length())
break;
}
cout<<s<<"\n";
}
return 0;
}
You can do something similar using Priority Queue in C++. Adding my answer for someone looking for the answer in future.
int main() {
int a,b,c;
cin>>a>>b>>c;
priority_queue<pair<int,char>> pq;
pq.push({a,'a'});
pq.push({b,'b'});
pq.push({c,'c'});
string res="";
int x=5;
while(true){
auto p=pq.top(); pq.pop();
if(p.first==0){
break;
}
if(res.length()==0){
res+=string(min(p.first,2),p.second);
p.first-=min(p.first,2);
pq.push(p);
continue;
}
int len=res.length();
if(len>=1 && res[len-1]==p.second){
auto p2=pq.top();
pq.pop();
if(p2.first==0){
break;
}
res+=string(min(p2.first,2),p2.second);
p2.first-=min(p2.first,2);
pq.push(p2);
}else{
if(p.first==0){
break;
}
res+=string(min(p.first,2),p.second);
p.first-=min(p.first,2);
}
pq.push(p);
}
cout<<res<<endl;
}

Check if a permutation of a string can become a palindrome

Write a method to test if a string meets the preconditions to become a palindrome.
Eg:
Input | Output
mmo | True
yakak | True
travel | False
I'm thinking of this approach:
Make a suffix tree for all permutation of T such that T$Reverse(T)#
Check for all permutation for same node
Am I missing anything?
All you need to do is check that there's at most one character with an odd number of occurrences. Here's a Java example:
private static boolean canMakePalindrom(String s) {
Map<Character, Integer> countChars = new HashMap<>();
// Count the occurrences of each character
for (char c : s.toCharArray()) {
Integer count = countChars.get(c);
if (count == null) {
count = Integer.valueOf(1);
} else {
count = count + 1;
}
countChars.put(c, count);
}
boolean hasOdd = false;
for (int count : countChars.values()) {
if (count % 2 == 1) {
if (hasOdd) {
// Found two chars with odd counts - return false;
return false;
} else {
// Found the first char with odd count
hasOdd = true;
}
}
}
// Haven't found more than one char with an odd count
return true;
}
EDIT4 (yes - these are ordered to make sense, but numbered by chronological order):
The above implementation has a built in inefficiency. I don't think the first iteration over the string can be avoided, but there's no real reason to keep a count of all the occurrences - it's enough to just keep track of those with the an odd count. For this usecase, it's enough to keep track of each character we encounter (e.g., with a Set), and remove it when we encounter it again. In the worst case, where all the characters in the string are different, the performance is comparable, but in the common case, where there are several occurrences of each character, this implementation improves both time and memory complexity of the second loop (which is now reduced to a single condition) dramatically:
private static boolean canMakePalindrom(String s) {
Set<Character> oddChars = new HashSet<>();
// Go over the characters
for (char c : s.toCharArray()) {
// Record the encountered character:
if (!oddChars.add(c)) {
// If the char was already encountered, remove it -
// this is an even time we encounter it
oddChars.remove(c);
}
}
// Check the number of characters with odd counts:
return oddChars.size() <= 1;
}
EDIT3 (yes - these are ordered to make sense, but numbered by chronological order):
Java 8 provides a fluent streaming API which could be used to create an implementation similar to the Python one-liners below:
private static boolean canMakePalindrom(String s) {
return s.chars()
.boxed()
.collect(Collectors.groupingBy(Function.identity(),
Collectors.counting()))
.values()
.stream()
.filter(p -> p % 2 == 1)
.count() <= 1;
}
EDIT:
Python built-in functions and comprehension capabilities make this too attractive not to publish this one liner solution. It's probably less efficient than the aforementioned Java one, but is quite elegant:
from collections import Counter
def canMakePalindrom(s):
return len([v for v in Counter(s).values() if v % 2 == 1]) <= 1
EDIT2:
Or, an even cleaner approach as proposed by #DSM in the comments:
from collections import Counter
def canMakePalindrom(s):
return sum(v % 2 == 1 for v in Counter(s).values()) <= 1
Instead of counting how many times each letter occurs, another approach keeps track of whether a letter has occurred an odd or even number of times. If a letter has occurred an even number of times, you don’t need to worry about it, and only need to keep track of the odd occurrences in a set. In Java:
public static boolean canMakePalindrome(String s) {
Set<Character> oddLetters = new HashSet<>();
for ( char c : s.toCharArray() ) {
if ( ! oddLetters.remove(c) ) {
oddLetters.add(c);
}
}
return oddLetters.size() <= 1;
}
Really all you're looking for is if all (or all but one) of the letters are paired off. As long as they are, then they will be able to be turned into a palindrome.
So it would be something like...
bool canBeTurnedIntoAPalindrome(string drome)
{
// If we've found a letter that has no match, the center letter.
bool centerUsed = false;
char center;
char c;
int count = 0;
// TODO: Remove whitespace from the string.
// Check each letter to see if there's an even number of it.
for(int i = 0; i<drome.length(); i++)
{
c = drome[i];
count = 0;
for(int j = 0; j < drome.length(); j++)
if (drome[j] == c)
count++;
// If there was an odd number of those entries
// and the center is already used, then a palindrome
// is impossible, so return false.
if (count % 2 == 1)
{
if (centerUsed == true && center != c)
return false;
else
{
centerused = true;
center = c; // This is so when we encounter it again it
// doesn't count it as another separate center.
}
}
}
// If we made it all the way through that loop without returning false, then
return true;
}
This isn't the most efficient (it's counting letters as many times as it comes across them, even if they've been counted already) but it does work.
If I'm understanding your question correctly, this is how I understand it:
If the input string can be rearranged into a palindrome, output "True", otherwise output "False".
Then you can use these simple rules:
If the length is even, every unique character in the input has to occur a multiple of 2 times.
If the length is odd, every unique character except one has to occur a multiple of 2 times. Only 1 character is allowed to not occur a multiple of 2 times.
So for the 3 given examples:
"mmo", odd length, m occurs twice (multiple of 2), o occurs once (not a multiple of 2), so True.
"yakak", odd length, a occurs twice (multiple of 2), k occurs twice (multiple of 2), y occurs once (not a multiple of 2) , so True.
"travel", more than one character does not occur a multiple of 2, so False.
Additional examples:
"mmorpg", only m occurs a multiple of 2, the rest only once, so False.
"mmom", no characters occur a multiple of 2, more than one character occurs "not a multiple of 2 times", so False.
At this point you should realise that if only 1 character is allowed to occur a non-multiple-of-2 times, then you can disregard the length. A string with an even length will have either 2 or more characters occuring a non-multiple-of-2 times, or none at all.
So the final rule should be this:
If at most 1 unique character occurs a non-multiple-of-2 times in the input, the output is True otherwise the output is False.
def can_permutation_palindrome(s):
counter = {}
for c in s:
counter[c] = counter.get(c, 0) + 1
odd_count = 0
for count in counter.values():
odd_count += count % 2
return odd_count in [0, 1]
def check(string):
bv = 0
for s in string:
bv ^= 1 << ord(s)
return bv == 0 or bv & (bv - 1) == 0
I reached the solution below today (python). I think it's readable, and performance-wise it's really good.
sum(map(lambda x: word.count(x) % 2, set(word))) <= 1
We're basically counting the number of occurrences of each character in the string "word", getting the remainder of the division by 2, summing them all and checking if you have at most 1 of them.
The idea is that you need to have all characters paired, except potentially for one (the middle one).
My idea is, if the number of letters with odd count is one and rest all have even count, a palindrome is possible..Here's my program in Python
string = raw_input()
found = False
char_set = set(string) # Lets find unique letters
d_dict = {}
for c in char_set:
d_dict[c] = string.count(c) # Keep count of each letter
odd_l = [e for e in d_dict.values() if e%2 == 1] # Check how many has odd number of occurrence
if len(odd_l) >1:
pass
else:
found = True
if not found:
print("NO")
else:
print("YES")
Any string can be palindrome only if at most one character occur odd no. of times and all other characters must occur even number of times. The following program can be used to check whether a palindrome can be string or not.
void checkPalindrome(string s)
{
vector<int> vec(256,0); //Vector for all ASCII characters present.
for(int i=0;i<s.length();++i)
{
vec[s[i]-'a']++;
}
int odd_count=0,flag=0;
for(int i=0;i<vec.size();++i)
{
if(vec[i]%2!=0)
odd_count++;
if(odd_count>1)
{
flag=1;
cout<<"Can't be palindrome"<<endl;
break;
}
}
if(flag==0)
cout<<"Yes can be palindrome"<<endl;
}
With O(n) complexity .
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace PallindromePemutation
{
class charcount
{
public char character { get; set; }
public int occurences { get; set; }
}
class Program
{
static void Main(string[] args)
{
List<charcount> list = new List<charcount>();
charcount ch;
int count = 0;
char[] arr = "travel".ToCharArray();
for (int i = 0; i < arr.Length; i++)
{
charcount res = list.Find(x => x.character == arr.ElementAt(i));
if (res == null)
{
ch = new charcount();
ch.character = arr.ElementAt(i);
ch.occurences = 1;
list.Add(ch);
}
else
{
charcount temp= list.Find(x => x.character == arr.ElementAt(i));
temp.occurences++;
}
}
foreach (var item in list)
{
if (!(item.occurences % 2 == 0))
{
count++;
}
}
if (count > 1)
{
Console.WriteLine("false");
}
else
{
Console.WriteLine("true");
}
Console.ReadKey();
}
}
}
If we don't care case sensitivity of characters and spaces within a string, then a sample solution in C# by using Dictionary can be like :
private static bool IsPalindromePermutation(string inputStr)
{
// First, check whether input string is null or whitespace.
// If yes, then return false.
if (string.IsNullOrWhiteSpace(inputStr))
return false;
var inputDict = new Dictionary<char, int>();
// Big/small letter is not important
var lowerInputStr = inputStr.ToLower();
// Fill input dictionary
// If hit a space, then skip it
for (var i = 0; i < lowerInputStr.Length; i++)
{
if (lowerInputStr[i] != ' ')
{
if (inputDict.ContainsKey(lowerInputStr[i]))
inputDict[lowerInputStr[i]] += 1;
else
inputDict.Add(lowerInputStr[i], 1);
}
}
var countOdds = 0;
foreach(var elem in inputDict)
{
if(elem.Value % 2 != 0)
countOdds++;
}
return countOdds <= 1;
}
We can acheive this via collections also
String name = "raa";
List<Character> temp = new ArrayList<>(name.chars()
.mapToObj(e -> (char) e).collect(Collectors.toList()));
for (int i = 0; i < temp.size(); i++) {
for (int j = i + 1; j < temp.size(); j++) {
if (temp.get(i).equals(temp.get(j))) {
temp.remove(j);
temp.remove(i);
i--;
}
}
}
if (temp.size() <= 1) {
System.out.println("Pallindrome");
} else {
System.out.println(temp.size());
System.out.println("Not Pallindrome");
}
}
This is my solution
public static void main(String[] args) {
List<Character> characters = new ArrayList<>();
Scanner scanner = new Scanner(System.in);
String input = scanner.nextLine();
for (int i = 0; i < input.length(); i++){
char val = input.charAt(i);
if (characters.contains(val)){
characters.remove(characters.indexOf(val));
} else{
characters.add(val);
}
}
if (characters.size() == 1 || characters.size() == 0){
System.out.print("Yes");
} else{
System.out.print("No");
}
}
That 's my solution. The string could contain several words with spaces, such as
Input: Tact Coa
Output true
Input: Tact Coa vvu
Output: false
public static boolean checkForPalindrome(String str) {
String strTrimmed = str.replaceAll(" ","");
System.out.println(strTrimmed);
char[] str1 = strTrimmed.toCharArray();
for (int i = 0; i < str1.length; i++) {
str1[i] = Character.toLowerCase(str1[i]);
}
Arrays.sort(str1);
String result = new String(str1);
System.out.println(result);
int count = 0;
for (int j = 0; j < str1.length; j += 2) {
if (j != str1.length-1) {
if (str1[j] != str1[j+1]) {
count++;
j++;
}
} else {
count++;
}
}
if (count > 1) return false;
else return true;
}
Question: Can a String become a palindrome?
Method1: count of characters
IN Java :
public class TEST11 {
public static void main(String[] args) {
String a = "Protijayi";
int[] count = new int[256];
Arrays.fill(count, 0);
for (int i = 0; i < a.length(); i++) {
char ch = a.charAt(i);
count[ch]++;
} // for
// counting of odd letters
int odd = 0;
for (int i = 0; i < count.length; i++) {
if ((count[i] & 1) == 1) {
odd++;
}
} // for
if (odd > 1) {
System.out.println("no");
} else {
System.out.println("yes");
}
}
}
IN Python:
def fix (a):
count = [0] * 256
for i in a: count[ord(i)] += 1
# counting of odd characters
odd = 0
for i in range(256):
if((count[i] & 1) == 1): odd += 1
if(odd > 1):print("no")
else:print("yes")
a = "Protijayi"
fix(a)
Method 2 : Use of HashSet
In Java:
public class TEST11 {
public static void main(String[] args) {
String a = "Protijayi";
Set<Character> set = new HashSet<>();
for (char ch : a.toCharArray()) {
if (set.contains(ch)) {
set.remove(ch);
}
set.add(ch);
} // for
if (set.size() <= 1) {
System.out.println("yes can be a palindrome");
} else {
System.out.println("no");
}
}
}
Swift example for this question.
var str = "mmoosl"
extension String {
func count(of needle: Character) -> Int {
return reduce(0) {
$1 == needle ? $0 + 1 : $0
}
}
}
func canBeTurnedIntoAPalinpolyString(_ polyString: String) -> Bool {
var centerUsed = false
var center = Character("a")
for i in polyString {
let count = polyString.count(of: i)
if count == 1 && !centerUsed {
center = i
centerUsed = true
} else {
if count % 2 != 0 {
return false
}
}
}
return true
}
print(canBeTurnedIntoAPalinpolyString(str))
Java
private static boolean isStringPalindromePermutation(String input) {
if(input == null) return false;
if(input.isEmpty()) return false;
int checker = 0;
for (int i = 0; i < input.length(); i++) {
int character = input.charAt(i) - 'a';
int oneShiftedByNumberInCharacter = 1 << character;
int summaryAnd = checker & oneShiftedByNumberInCharacter;
if ( summaryAnd > 0 ) {
int revertToShiftedByChar = ~oneShiftedByNumberInCharacter;
checker = checker & revertToShiftedByChar;
} else {
checker |= oneShiftedByNumberInCharacter;
}
}
if ( input.length() % 2 == 0 ) {
if ( checker == 0) {
return true;
}
else return false;
} else {
int checkerMinusOne = checker-1;
if((checkerMinusOne & checker) == 0){
return true;
}else{
return false;
}
}
}
Why use a suffix tree or any other data structure?
The basic requirement of a palindromic string is the frequency of all characters must be even or only one character can have odd frequency.
Example :-
Input : aabbaa
Output : frequency of a is 4 and b is 2 (both even)
Input : xxzyzxx
Output : frequency of x is 4, z is 2 and y=1 (only 1 odd)
Sample code for better understanding :
bool ispalin(string str) //function to check
{
int freq[26] = {0}; //to store frequency of character here i am
// considering only lower case letters
for (int i = 0; str.length(); i++)
freq[str[i]]++;
int odd = 0;
for (int i = 0; i < 26; i++) //Count odd occurring characters
{
if (freq[i] & 1) //checking if odd
odd++;
if (odd > 1) //if number of odd freq is greater than 1
return false;
}
return true; //else return true
}
python code to check whether a palindrome can be formed from given string or not:
test_str = input('enter any string = ')
count = 0
for item in set(test_str):
if test_str.count(item)%2 != 0:
count+=1
if (count>1):
print(" palindrome cannot be formed")
else:
print(" palindrome can be formed")
Please try this code if any issue please comments
More efficient implementation - Java
boolean palindromeRearranging(String inputString) {
Map<Character, Integer> charsCount = new HashMap<Character, Integer>();
for(char c : inputString.toCharArray()){
charsCount.compute(c, (key, val) -> val == null ? 1 : val + 1);
}
List<Integer> result = new ArrayList<>();
charsCount.forEach((k, v) -> {
if(v % 2 != 0){
result.add(v);
}
});
return (result.size() == 0 || result.size() == 1);
}
Here is my code :
boolean palindromeRearranging(String inputString) {
HashMap<Character,Integer> stCount=new HashMap<>();
for(int i=0;i<inputString.length();i++){
stCount.put(inputString.charAt(i),0);
}
for(int i=0;i<inputString.length();i++){
int c= stCount.get(inputString.charAt(i));
stCount.put(inputString.charAt(i),++c);
}
int c=0;
for (Map.Entry<Character,Integer> entry : stCount.entrySet()){
if(entry.getValue()%2!=0){
c++;
if(c>1){
return false;
}
}
}
return true;
}
JS solution:
function solution(inputString) {
const arr = inputString.split('');
let hasCoupleList = arr.map( (el) => arr.filter( (el1) => el1 == el).length % 2 == 0).filter( (el) => el == false).length;
return (arr.length % 2 == 0)
? hasCoupleList == 0
: hasCoupleList == 1;
}
With JAVA
import java.util.*;
import java.lang.*;
//Classs
class Permutation {
/*
* We need to have an even number of almost all characters,
* so that half can be on one side and half can be on the other side.
* At most one character (the middle character) can have an odd count.
*/
public static boolean hasPalindrome(String str) {
boolean wasOdd = false;
for (Character c: str.toCharArray()) {
int counter = 0;
for (Character cc: str.toCharArray()) {
if (c == cc) {
counter++;
}
}
if (counter % 2 == 1) {
if (wasOdd) {
return false;
}
wasOdd = true;
}
}
return true;
}
public static void main(String args[]) throws Exception {
//Taking string input
//Scanner
Scanner s = new Scanner(System.in);
String str = s.nextLine();
if (Permutation.hasPalindrome(str)) {
System.out.println("YES"); // Writing output to STDOUT
} else {
System.out.println("NO"); // Writing output to STDOUT
}
}
}
Implementation from Checking if a String is a Permutation of a Palindrome
Time complexity is essentially O(n). This means that the function is linear in the length of the input string
public static boolean isPermutationOfPalindrome(String str) {
// Convert the input string to lower case and remove any non-letter characters
str = str.toLowerCase().replaceAll("[^a-z]", "");
// Create an array to count the frequency of each letter
int[] charCounts = new int[26];
for (int i = 0; i < str.length(); i++) {
charCounts[str.charAt(i) - 'a']++;
}
// Check if there is at most one character with an odd frequency
boolean foundOdd = false;
for (int count : charCounts) {
if (count % 2 == 1) {
if (foundOdd) {
return false;
}
foundOdd = true;
}
}
return true;
}

How to find the longest substring with no repeated characters?

I want an algorithm to find the longest substring of characters in a given string containing no repeating characters. I can think of an O(n*n) algorithm which considers all the substrings of a given string and calculates the number of non-repeating characters. For example, consider the string "AABGAKG" in which the longest substring of unique characters is 5 characters long which corresponds to BGAKG.
Can anyone suggest a better way to do it ?
Thanks
Edit: I think I'm not able to explain my question properly to others. You can have repeating characters in a substring (It's not that we need all distinct characters in a substring which geeksforgeeks solution does). The thing which I have to find is maximum no of non-repeating characters in any substring (it may be a case that some characters are repeated).
for eg, say string is AABGAKGIMN then BGAKGIMN is the solution.
for every start = 0 ... (n-1), try to expend end to the right-most position.
keep a bool array used[26] to remember if any character is already used.
suppose currently we finished (start, end)
for start+1,
first clear by set: used[str[start]] = false;
while ((end+1 < n) && (!used[str[end+1]])) { used[str[end+1]]=true; ++end;}
now we have check new (start, end). Total Complexity is O(N).
Here is the solution in C#. I tested in in Visual studio 2012 and it works
public static int LongestSubstNonrepChar(string str) {
int curSize = 0;
int maxSize = 0;
int end = 0;
bool[] present = new bool[256];
for (int start = 0; start < str.Length; start++) {
end = start;
while (end < str.Length) {
if (!present[str[end]] && end < str.Length)
{
curSize++;
present[str[end]] = true;
end++;
}
else
break;
}
if (curSize > maxSize) {
maxSize = curSize;
}
//reset current size and the set all letter to false
curSize = 0;
for (int i = 0; i < present.Length; i++)
present[i] = false;
}
return maxSize;
}
Pretty tricky question, I give you an O(n) solution based on C#.
public string MaxSubStringKUniqueChars(string source, int k)
{
if (string.IsNullOrEmpty(source) || k > source.Length) return string.Empty;
var start = 0;
var ret = string.Empty;
IDictionary<char, int> dict = new Dictionary<char, int>();
for (var i = 0; i < source.Length; i++)
{
if (dict.ContainsKey(source[i]))
{
dict[source[i]] = 1 + dict[source[i]];
}
else
{
dict[source[i]] = 1;
}
if (dict.Count == k + 1)
{
if (i - start > ret.Length)
{
ret = source.Substring(start, i - start);
}
while (dict.Count > k)
{
int count = dict[source[start]];
if (count == 1)
{
dict.Remove(source[start]);
}
else
{
dict[source[start]] = dict[source[start]] - 1;
}
start++;
}
}
}
//just for edge case like "aabbcceee", should return "cceee"
if (dict.Count == k && source.Length - start > ret.Length)
{
return source.Substring(start, source.Length - start);
}
return ret;
}
`
//This is the test case.
public void TestMethod1()
{
var ret = Item001.MaxSubStringKUniqueChars("aabcd", 2);
Assert.AreEqual("aab", ret);
ret = Item001.MaxSubStringKUniqueChars("aabbccddeee", 2);
Assert.AreEqual("ddeee", ret);
ret = Item001.MaxSubStringKUniqueChars("abccccccccaaddddeeee", 3);
Assert.AreEqual("ccccccccaadddd", ret);
ret = Item001.MaxSubStringKUniqueChars("ababcdcdedddde", 2);
Assert.AreEqual("dedddde", ret);
}
How about this:
public static String getLongestSubstringNoRepeats( String string ){
int iLongestSoFar = 0;
int posLongestSoFar = 0;
char charPrevious = 0;
int xCharacter = 0;
int iCurrentLength = 0;
while( xCharacter < string.length() ){
char charCurrent = string.charAt( xCharacter );
iCurrentLength++;
if( charCurrent == charPrevious ){
if( iCurrentLength > iLongestSoFar ){
iLongestSoFar = iCurrentLength;
posLongestSoFar = xCharacter;
}
iCurrentLength = 1;
}
charPrevious = charCurrent;
xCharacter++;
}
if( iCurrentLength > iLongestSoFar ){
return string.substring( posLongestSoFar );
} else {
return string.substring( posLongestSoFar, posLongestSoFar + iLongestSoFar );
}
}
Let s be the given string, and n its length.
Define f(i) to be the longest [contiguous] substring of s ending at s[i] with distinct letters. That's unique and well-defined.
Compute f(i) for each i. It's easy to deduce from f(i-1) and s[i]:
If the letter s[i] is in f(i-1), let j be the greatest position j < i such that s[j] = s[i]. Then f(i) is s[j+1 .. i] (in Python notation)
Otherwise, f(i) is f(i-1) with s[i] appended.
The solution to your problem is any f(i) of maximal length (not necessarily unique).
You could implement this algorithm to run in O(n * 26) time, where 26 is the number of letters in the alphabet.
public static int longestNonDupSubstring(char[] str) {
int maxCount = 0;
int count = 0;
int maxEnd = 0;
for(int i=1;i < str.length;i++) {
if(str[i] != str[i-1]) {
count++;
}
if (str[i] == str[i-1]) {
if(maxCount<count) {
maxCount = count;
maxEnd = i;
}
count = 0;
}
if ( i!=str.length-1 && str[i] == str[i+1]) {
if(maxCount<count) {
maxCount = count - 1;
maxEnd = i-1;
}
count = 0;
}
}
int startPos = maxEnd - maxCount + 1;
for(int i = 0; i < maxCount; i++) {
System.out.print(str[startPos+i]);
}
return maxCount;
}
//Given a string ,find the longest sub-string with all distinct characters in it.If there are multiple such strings,print them all.
#include<iostream>
#include<cstring>
#include<array>
using namespace std;
//for a string with all small letters
//for capital letters use 65 instead of 97
int main()
{
array<int ,26> count ;
array<string,26>largest;
for(int i = 0 ;i <26;i++)
count[i]=0;
string s = "abcdefghijrrstqrstuvwxyzprr";
string out = "";
int k = 0,max=0;
for(int i = 0 ; i < s.size() ; i++)
{
if(count[s[i] - 97]==1)
{
int loc = out.find(s[i]);
for(int j=0;j<=loc;j++) count[out[j] - 97]=0;
if(out.size() > max)
{
max = out.size();
k=1;
largest[0] = out;
}
else if(out.size()==max) largest[k++]=out;
out.assign(out,loc+1,out.size()-loc-1);
}
out = out + s[i];
count[s[i] - 97]++;
}
for(int i=0;i<k;i++) cout<<largest[i] << endl;
//output will be
// abcdefghijr
// qrstuvwxyzp
}
Let me contribute a little as well. I have this solution with complexity will be O(N). The algorithm’s space complexity will be O(K), where K is the number of distinct characters in the input string.
public static int NoRepeatSubstring(string str)
{
int start = 0;
int maxLen = 0;
Dictionary<char, int> dic = new Dictionary<char, int>();
for (int i = 0; i < str.Length; i++)
{
char rightChar = str[i];
// if the map already contains the 'rightChar', shrink the window from the beginning so that
// we have only one occurrence of 'rightChar'
if (dic.ContainsKey(rightChar))
{
// this is tricky; in the current window, we will not have any 'rightChar' after its previous index
// and if 'start' is already ahead of the last index of 'rightChar', we'll keep 'windowStart'
start = Math.Max(start, dic[rightChar] + 1);
}
if (dic.ContainsKey(str[i]))
dic[str[i]] = i;
else
dic.Add(str[i], i);
maxLen = Math.Max(maxLen, i - start + 1);
}
return maxLen;
}
And here some Unit Tests:
Assert.Equal(3, SlideWindow.NoRepeatSubstring("aabccbb"));
Assert.Equal(2, SlideWindow.NoRepeatSubstring("abbbb"));
Assert.Equal(3, SlideWindow.NoRepeatSubstring("abccde"));
string MaximumSubstringNonRepeating(string text)
{
string max = null;
bool isCapture = false;
foreach (string s in Regex.Split(text, #"(.)\1+"))
{
if (!isCapture && (max == null || s.Length > max.Length))
{
max = s;
}
isCapture = !isCapture;
}
return max;
}
. matches any character. ( ) captures that character. \1 matches the captured character again. + repeats that character. The whole pattern matches two or more repetitions of any one character. "AA" or ",,,,".
Regex.Split() splits the string at every match of the pattern, and returns an array of the pieces that are in between. (One caveat: It also includes the captured substrings. In this case, the one character that are being repeated. The captures will show up in between the pieces. This is way I just added the isCapture flag.)
The function cuts out all the repeated characters, and returns the longest piece that where in between the repeated each set of repeated characters.
>>> MaximumSubstringNonRepeating("AABGAKG") // "AA" is repeated
"BGAKG"
>>> MaximumSubstringNonRepeating("AABGAKGIMNZZZD") // "AA" and "ZZZ" are repeated.
"BGAKGIMN"

Find longest substring without repeating characters

Given a string S of length N find longest substring without repeating characters.
Example:
Input: "stackoverflow"
Output: "stackoverfl"
If there are two such candidates, return first from left. I need linear time and constant space algorithm.
You are going to need a start and an end locator(/pointer) for the
string and an array where you store information for each character:
did it occour at least once?
Start at the beginning of the string, both locators point to the
start of the string.
Move the end locator to the right till you find
a repetition (or reach the end of the string). For each processed character, store it in the array.
When stopped store the position if this is the largest substring. Also remember the repeated character.
Now do the same thing with the start locator, when processing
each character, remove its flags from the array. Move the locator till
you find the earlier occurrence of the repeated character.
Go back to step 3 if you haven't reached the end of string.
Overall: O(N)
import java.util.HashSet;
public class SubString {
public static String subString(String input){
HashSet<Character> set = new HashSet<Character>();
String longestOverAll = "";
String longestTillNow = "";
for (int i = 0; i < input.length(); i++) {
char c = input.charAt(i);
if (set.contains(c)) {
longestTillNow = "";
set.clear();
}
longestTillNow += c;
set.add(c);
if (longestTillNow.length() > longestOverAll.length()) {
longestOverAll = longestTillNow;
}
}
return longestOverAll;
}
public static void main(String[] args) {
String input = "substringfindout";
System.out.println(subString(input));
}
}
You keep an array indicating the position at which a certain character occurred last. For convenience all characters occurred at position -1. You iterate on the string keeping a window, if a character is repeated in that window, you chop off the prefix that ends with the first occurrence of this character. Throughout, you maintain the longest length. Here's a python implementation:
def longest_unique_substr(S):
# This should be replaced by an array (size = alphabet size).
last_occurrence = {}
longest_len_so_far = 0
longest_pos_so_far = 0
curr_starting_pos = 0
curr_length = 0
for k, c in enumerate(S):
l = last_occurrence.get(c, -1)
# If no repetition within window, no problems.
if l < curr_starting_pos:
curr_length += 1
else:
# Check if it is the longest so far
if curr_length > longest_len_so_far:
longest_pos_so_far = curr_starting_pos
longest_len_so_far = curr_length
# Cut the prefix that has repetition
curr_length -= l - curr_starting_pos
curr_starting_pos = l + 1
# In any case, update last_occurrence
last_occurrence[c] = k
# Maybe the longest substring is a suffix
if curr_length > longest_len_so_far:
longest_pos_so_far = curr_starting_pos
longest_len_so_far = curr_length
return S[longest_pos_so_far:longest_pos_so_far + longest_len_so_far]
EDITED:
following is an implementation of the concesus. It occured to me after my original publication. so as not to delete original, it is presented following:
public static String longestUniqueString(String S) {
int start = 0, end = 0, length = 0;
boolean bits[] = new boolean[256];
int x = 0, y = 0;
for (; x < S.length() && y < S.length() && length < S.length() - x; x++) {
bits[S.charAt(x)] = true;
for (y++; y < S.length() && !bits[S.charAt(y)]; y++) {
bits[S.charAt(y)] = true;
}
if (length < y - x) {
start = x;
end = y;
length = y - x;
}
while(y<S.length() && x<y && S.charAt(x) != S.charAt(y))
bits[S.charAt(x++)]=false;
}
return S.substring(start, end);
}//
ORIGINAL POST:
Here is my two cents. Test strings included. boolean bits[] = new boolean[256] may be larger to encompass some larger charset.
public static String longestUniqueString(String S) {
int start=0, end=0, length=0;
boolean bits[] = new boolean[256];
int x=0, y=0;
for(;x<S.length() && y<S.length() && length < S.length()-x;x++) {
Arrays.fill(bits, false);
bits[S.charAt(x)]=true;
for(y=x+1;y<S.length() && !bits[S.charAt(y)];y++) {
bits[S.charAt(y)]=true;
}
if(length<y-x) {
start=x;
end=y;
length=y-x;
}
}
return S.substring(start,end);
}//
public static void main(String... args) {
String input[][] = { { "" }, { "a" }, { "ab" }, { "aab" }, { "abb" },
{ "aabc" }, { "abbc" }, { "aabbccdefgbc" },
{ "abcdeafghicabcdefghijklmnop" },
{ "abcdeafghicabcdefghijklmnopqrabcdx" },
{ "zxxaabcdeafghicabcdefghijklmnopqrabcdx" },
{"aaabcdefgaaa"}};
for (String[] a : input) {
System.out.format("%s *** GIVES *** {%s}%n", Arrays.toString(a),
longestUniqueString(a[0]));
}
}
Here is one more solution with only 2 string variables:
public static String getLongestNonRepeatingString(String inputStr){
if(inputStr == null){
return null;
}
String maxStr = "";
String tempStr = "";
for(int i=0; i < inputStr.length(); i++){
// 1. if tempStr contains new character, then change tempStr
if(tempStr.contains("" + inputStr.charAt(i))){
tempStr = tempStr.substring(tempStr.lastIndexOf(inputStr.charAt(i)) + 1);
}
// 2. add new character
tempStr = tempStr + inputStr.charAt(i);
// 3. replace maxStr with tempStr if tempStr is longer
if(maxStr.length() < tempStr.length()){
maxStr = tempStr;
}
}
return maxStr;
}
Algorithm in JavaScript (w/ lots of comments)..
/**
Given a string S find longest substring without repeating characters.
Example:
Input: "stackoverflow"
Output: "stackoverfl"
Input: "stackoverflowabcdefghijklmn"
Output: "owabcdefghijklmn"
*/
function findLongestNonRepeatingSubStr(input) {
var chars = input.split('');
var currChar;
var str = "";
var longestStr = "";
var hash = {};
for (var i = 0; i < chars.length; i++) {
currChar = chars[i];
if (!hash[chars[i]]) { // if hash doesn't have the char,
str += currChar; //add it to str
hash[chars[i]] = {index:i};//store the index of the char
} else {// if a duplicate char found..
//store the current longest non-repeating chars. until now
//In case of equal-length, <= right-most str, < will result in left most str
if(longestStr.length <= str.length) {
longestStr = str;
}
//Get the previous duplicate char's index
var prevDupeIndex = hash[currChar].index;
//Find all the chars AFTER previous duplicate char and current one
var strFromPrevDupe = input.substring(prevDupeIndex + 1, i);
//*NEW* longest string will be chars AFTER prevDupe till current char
str = strFromPrevDupe + currChar;
//console.log(str);
//Also, Reset hash to letters AFTER duplicate letter till current char
hash = {};
for (var j = prevDupeIndex + 1; j <= i; j++) {
hash[input.charAt(j)] = {index:j};
}
}
}
return longestStr.length > str.length ? longestStr : str;
}
//console.log("stackoverflow => " + findLongestNonRepeatingSubStr("stackoverflow"));
//returns stackoverfl
//console.log("stackoverflowabcdefghijklmn => " +
findLongestNonRepeatingSubStr("stackoverflowabcdefghijklmn")); //returns owabcdefghijklmn
//console.log("1230123450101 => " + findLongestNonRepeatingSubStr("1230123450101")); //
returns 234501
We can consider all substrings one by one and check for each substring whether it contains all unique characters or not.
There will be n*(n+1)/2 substrings. Whether a substirng contains all unique characters or not can be checked in linear time by
scanning it from left to right and keeping a map of visited characters. Time complexity of this solution would be O(n^3).`
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
public class LengthOfLongestSubstringWithOutRepeatingChar {
public static void main(String[] args)
{
String s="stackoverflow";
//allSubString(s);
System.out.println("result of find"+find(s));
}
public static String find(String s)
{
List<String> allSubsring=allSubString(s);
Set<String> main =new LinkedHashSet<String>();
for(String temp:allSubsring)
{
boolean a = false;
for(int i=0;i<temp.length();i++)
{
for(int k=temp.length()-1;k>i;k--)
{
if(temp.charAt(k)==temp.charAt(i))
a=true;
}
}
if(!a)
{
main.add(temp);
}
}
/*for(String x:main)
{
System.out.println(x);
}*/
String res=null;
int min=0,max=s.length();
for(String temp:main)
{
if(temp.length()>min&&temp.length()<max)
{
min=temp.length();
res=temp;
}
}
System.out.println(min+"ha ha ha"+res+"he he he");
return res;
}
//substrings left to right ban rahi hai
private static List<String> allSubString(String str) {
List<String> all=new ArrayList<String>();
int c=0;
for (int i = 0; i < str.length(); i++) {
for (int j = 0; j <= i; j++) {
if (!all.contains(str.substring(j, i + 1)))
{
c++;
all.add(str.substring(j, i + 1));
}
}
}
for(String temp:all)
{
System.out.println("substring :-"+temp);
}
System.out.println("count"+c);
return all;
}
}
Another O(n) JavaScript solution. It does not alter strings during the looping; it just keeps track of the offset and length of the longest sub string so far:
function longest(str) {
var hash = {}, start, end, bestStart, best;
start = end = bestStart = best = 0;
while (end < str.length) {
while (hash[str[end]]) hash[str[start++]] = 0;
hash[str[end]] = 1;
if (++end - start > best) bestStart = start, best = end - start;
}
return str.substr(bestStart, best);
}
// I/O for snippet
document.querySelector('input').addEventListener('input', function () {
document.querySelector('span').textContent = longest(this.value);
});
Enter word:<input><br>
Longest: <span></span>
simple python snippet
l=length p=position
maxl=maxlength maxp=maxposition
Tested and working. For easy understanding, I suppose there's a drawer to put the letters.
Function:
public int lengthOfLongestSubstring(String s) {
int maxlen = 0;
int start = 0;
int end = 0;
HashSet<Character> drawer = new HashSet<Character>();
for (int i=0; i<s.length(); i++) {
char ch = s.charAt(i);
if (drawer.contains(ch)) {
//search for ch between start and end
while (s.charAt(start)!=ch) {
//drop letter from drawer
drawer.remove(s.charAt(start));
start++;
}
//Do not remove from drawer actual char (it's the new recently found)
start++;
end++;
}
else {
drawer.add(ch);
end++;
int _maxlen = end-start;
if (_maxlen>maxlen) {
maxlen=_maxlen;
}
}
}
return maxlen;
}
Longest substring without repeating character in python
public int lengthOfLongestSubstring(String s) {
if(s.equals(""))
return 0;
String[] arr = s.split("");
HashMap<String,Integer> map = new HashMap<>();
Queue<String> q = new LinkedList<>();
int l_till = 1;
int l_all = 1;
map.put(arr[0],0);
q.add(arr[0]);
for(int i = 1; i < s.length(); i++){
if (map.containsKey(arr[i])) {
if(l_till > l_all){
l_all = l_till;
}
while(!q.isEmpty() && !q.peek().equals(arr[i])){
map.remove(q.remove());
}
if(!q.isEmpty())
map.remove(q.remove());
q.add(arr[i]);
map.put(arr[i],i);
//System.out.println(q);
//System.out.println(map);
l_till = q.size();
}
else {
l_till = l_till + 1;
map.put(arr[i],i);
q.add(arr[i]);
}
}
if(l_till > l_all){
l_all = l_till;
}
return l_all;
}
I was asked the same question in an interview.
I have written Python3 code, to find the first occurrence of the substring with all distinct chars. In my implementations, I start with index = 0 and iterate over the input string. While iterating used a Python dict seems to store indexes of chars in input-string those has been visited in the iteration.
In iteration, if char c, does not find in current substring – raise KeyError exception
if c is found to be a duplicate char in the current substring (as c previously appeared during iteration – named that index last_seen) start a new substring
def lds(string: str) -> str:
""" returns first longest distinct substring in input `string` """
seens = {}
start, end, curt_start = 0, 0, 0
for curt_end, c in enumerate(string):
try:
last_seen = seens[c]
if last_seen < curt_start:
raise KeyError(f"{c!r} not found in {string[curt_start: curt_end]!r}")
if end - start < curt_end - curt_start:
start, end = curt_start, curt_end
curt_start = last_seen + 1
except KeyError:
pass
seens[c] = curt_end
else:
# case when the longest substring is suffix of the string, here curt_end
# do not point to a repeating char hance included in the substring
if string and end - start < curt_end - curt_start + 1:
start, end = curt_start, curt_end + 1
return string[start: end]
private static string LongestSubstring(string word)
{
var set = new HashSet<char>();
string longestOverAll = "";
string longestTillNow = "";
foreach (char c in word)
{
if (!set.Contains(c))
{
longestTillNow += c;
set.Add(c);
}
else
{
longestTillNow = string.Empty;
}
if (longestTillNow.Length > longestOverAll.Length)
{
longestOverAll = longestTillNow;
}
}
return longestOverAll;
}
import java.util.ArrayList;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;
import java.util.TreeMap;
public class LongestSubString2 {
public static void main(String[] args) {
String input = "stackoverflowabcdefghijklmn";
List<String> allOutPuts = new ArrayList<String>();
TreeMap<Integer, Set> map = new TreeMap<Integer, Set>();
for (int k = 0; k < input.length(); k++) {
String input1 = input.substring(k);
String longestSubString = getLongestSubString(input1);
allOutPuts.add(longestSubString);
}
for (String str : allOutPuts) {
int strLen = str.length();
if (map.containsKey(strLen)) {
Set set2 = (HashSet) map.get(strLen);
set2.add(str);
map.put(strLen, set2);
} else {
Set set1 = new HashSet();
set1.add(str);
map.put(strLen, set1);
}
}
System.out.println(map.lastKey());
System.out.println(map.get(map.lastKey()));
}
private static void printArray(Object[] currentObjArr) {
for (Object obj : currentObjArr) {
char str = (char) obj;
System.out.println(str);
}
}
private static String getLongestSubString(String input) {
Set<Character> set = new LinkedHashSet<Character>();
String longestString = "";
int len = input.length();
for (int i = 0; i < len; i++) {
char currentChar = input.charAt(i);
boolean isCharAdded = set.add(currentChar);
if (isCharAdded) {
if (i == len - 1) {
String currentStr = getStringFromSet(set);
if (currentStr.length() > longestString.length()) {
longestString = currentStr;
}
}
continue;
} else {
String currentStr = getStringFromSet(set);
if (currentStr.length() > longestString.length()) {
longestString = currentStr;
}
set = new LinkedHashSet<Character>(input.charAt(i));
}
}
return longestString;
}
private static String getStringFromSet(Set<Character> set) {
Object[] charArr = set.toArray();
StringBuffer strBuff = new StringBuffer();
for (Object obj : charArr) {
strBuff.append(obj);
}
return strBuff.toString();
}
}
This is my solution, and it was accepted by leetcode. However, after I saw the stats, I saw whole lot solutions has much faster result....meaning, my solution is around 600ms for all their test cases, and most of the js solutions are around 200 -300 ms bracket.. who can tell me why my solution is slowwww??
var lengthOfLongestSubstring = function(s) {
var arr = s.split("");
if (s.length === 0 || s.length === 1) {
return s.length;
}
var head = 0,
tail = 1;
var str = arr[head];
var maxL = 0;
while (tail < arr.length) {
if (str.indexOf(arr[tail]) == -1) {
str += arr[tail];
maxL = Math.max(maxL, str.length);
tail++;
} else {
maxL = Math.max(maxL, str.length);
head = head + str.indexOf(arr[tail]) + 1;
str = arr[head];
tail = head + 1;
}
}
return maxL;
};
I am posting O(n^2) in python . I just want to know whether the technique mentioned by Karoly Horvath has any steps that are similar to existing search/sort algorithms ?
My code :
def main():
test='stackoverflow'
tempstr=''
maxlen,index=0,0
indexsubstring=''
print 'Original string is =%s\n\n' %test
while(index!=len(test)):
for char in test[index:]:
if char not in tempstr:
tempstr+=char
if len(tempstr)> len(indexsubstring):
indexsubstring=tempstr
elif (len(tempstr)>=maxlen):
maxlen=len(tempstr)
indexsubstring=tempstr
break
tempstr=''
print 'max substring length till iteration with starting index =%s is %s'%(test[index],indexsubstring)
index+=1
if __name__=='__main__':
main()
Simple and Easy
import java.util.Scanner;
public class longestsub {
static Scanner sn = new Scanner(System.in);
static String word = sn.nextLine();
public static void main(String[] args) {
System.out.println("The Length is " +check(word));
}
private static int check(String word) {
String store="";
for (int i = 0; i < word.length(); i++) {
if (store.indexOf(word.charAt(i))<0) {
store = store+word.charAt(i);
}
}
System.out.println("Result word " +store);
return store.length();
}
}
Not quite optimized but simple answer in Python
def lengthOfLongestSubstring(s):
temp,maxlen,newstart = {},0,0
for i,x in enumerate(s):
if x in temp:
newstart = max(newstart,s[:i].rfind(x)+1)
else:
temp[x] = 1
maxlen = max(maxlen, len(s[newstart:i + 1]))
return maxlen
I think the costly affair is rfind which is why it's not quite optimized.
This is my solution. Hope it helps.
function longestSubstringWithoutDuplication(str) {
var max = 0;
//if empty string
if (str.length === 0){
return 0;
} else if (str.length === 1){ //case if the string's length is 1
return 1;
}
//loop over all the chars in the strings
var currentChar,
map = {},
counter = 0; //count the number of char in each substring without duplications
for (var i=0; i< str.length ; i++){
currentChar = str.charAt(i);
//if the current char is not in the map
if (map[currentChar] == undefined){
//push the currentChar to the map
map[currentChar] = i;
if (Object.keys(map).length > max){
max = Object.keys(map).length;
}
} else { //there is duplacation
//update the max
if (Object.keys(map).length > max){
max = Object.keys(map).length;
}
counter = 0; //initilize the counter to count next substring
i = map[currentChar]; //start from the duplicated char
map = {}; // clean the map
}
}
return max;
}
here is my javascript and cpp implementations with great details: https://algorithm.pingzhang.io/String/longest_substring_without_repeating_characters.html
We want to find the longest substring without repeating characters. The first thing comes to my mind is that we need a hash table to store every character in a substring so that when a new character comes in, we can easily know whether this character is already in the substring or not. I call it as valueIdxHash. Then, a substring has a startIdx and endIdx. So we need a variable to keep track of the starting index of a substring and I call it as startIdx. Let's assume we are at index i and we already have a substring (startIdx, i - 1). Now, we want to check whether this substring can keep growing or not.
If the valueIdxHash contains str[i], it means it is a repeated character. But we still need to check whether this repeated character is in the substring (startIdx, i - 1). So we need to retrieve the index of str[i] that is appeared last time and then compare this index with startIdx.
If startIdx is larger, it means the last appeared str[i] is outside of the substring. Thus the subtring can keep growing.
If startIdx is smaller, it means the last appeared str[i] is within of the substring. Thus, the substring cannot grow any more. startIdx will be updated as valueIdxHash[str[i]] + 1 and the new substring (valueIdxHash[str[i]] + 1, i) has potential to keep growing.
If the valueIdxHash does not contain str[i], the substring can keep growing.
I modified my solution to "find the length of the longest substring without repeating characters".
public string LengthOfLongestSubstring(string s) {
var res = 0;
var dict = new Dictionary<char, int>();
var start = 0;
for(int i =0; i< s.Length; i++)
{
if(dict.ContainsKey(s[i]))
{
start = Math.Max(start, dict[s[i]] + 1); //update start index
dict[s[i]] = i;
}
else
{
dict.Add(s[i], i);
}
res = Math.Max(res, i - start + 1); //track max length
}
return s.Substring(start,res);
}
import java.util.HashMap;
import java.util.HashSet;
public class SubString {
public static String subString(String input) {
String longesTillNOw = "";
String longestOverAll = "";
HashMap<Character,Integer> chars = new HashMap<>();
char[] array=input.toCharArray();
int start=0;
for (int i = 0; i < array.length; i++) {
char charactor = array[i];
if (chars.containsKey(charactor) ) {
start=chars.get(charactor)+1;
i=start;
chars.clear();
longesTillNOw = "";
} else {
chars.put(charactor,i);
longesTillNOw = longesTillNOw + charactor;
if (longesTillNOw.length() > longestOverAll.length()) {
longestOverAll = longesTillNOw;
}
}
}
return longestOverAll;
}
public static void main(String[] args) {
String input = "stackoverflowabcdefghijklmn";
System.out.println(subString(input));
}
}
Here are two ways to approach this problem in JavaScript.
A Brute Force approach is to loop through the string twice, checking every substring against every other substring and finding the maximum length where the substring is unique. We'll need two functions: one to check if a substring is unique and a second function to perform our double loop.
// O(n) time
const allUnique = str => {
const set = [...new Set(str)];
return (set.length == str.length) ? true: false;
}
// O(n^3) time, O(k) size where k is the size of the set
const lengthOfLongestSubstring = str => {
let result = 0,
maxResult = 0;
for (let i=0; i<str.length-1; i++) {
for (let j=i+1; j<str.length; j++) {
if (allUnique(str.substring(i, j))) {
result = str.substring(i, j).length;
if (result > maxResult) {
maxResult = result;
}
}
}
return maxResult;
}
}
This has a time complexity of O(n^3) since we perform a double loop O(n^2) and then another loop on top of that O(n) for our unique function. The space is the size of our set which can be generalized to O(n) or more accurately O(k) where k is the size of the set.
A Greedy Approach is to loop through only once and keep track of the maximum unique substring length as we go. We can use either an array or a hash map, but I think the new .includes() array method is cool, so let's use that.
const lengthOfLongestSubstring = str => {
let result = [],
maxResult = 0;
for (let i=0; i<str.length; i++) {
if (!result.includes(str[i])) {
result.push(str[i]);
} else {
maxResult = i;
}
}
return maxResult;
}
This has a time complexity of O(n) and a space complexity of O(1).
This problem can be solved in O(n) time complexity.
Initialize three variables
Start (index pointing to the start of the non repeating substring, Initialize it as 0 ).
End (index pointing to the end of the non repeating substring, Initialize it as 0 )
Hasmap (Object containing the last visited index positions of characters. Ex : {'a':0, 'b':1} for string "ab")
Steps :
Iterate over the string and perform following actions.
If the current character is not present in hashmap (), add it as to
hashmap, character as key and its index as value.
If current character is present in hashmap, then
a) Check whether the start index is less than or equal to the value present in the hashmap against the character (last index of same character earlier visited),
b) it is less then assign start variables value as the hashmaps' value + 1 (last index of same character earlier visited + 1);
c) Update hashmap by overriding the hashmap's current character's value as current index of character.
d) Calculate the end-start as the longest substring value and update if it's greater than earlier longest non-repeating substring.
Following is the Javascript Solution for this problem.
var lengthOfLongestSubstring = function(s) {
let length = s.length;
let ans = 0;
let start = 0,
end = 0;
let hashMap = {};
for (var i = 0; i < length; i++) {
if (!hashMap.hasOwnProperty(s[i])) {
hashMap[s[i]] = i;
} else {
if (start <= hashMap[s[i]]) {
start = hashMap[s[i]] + 1;
}
hashMap[s[i]] = i;
}
end++;
ans = ans > (end - start) ? ans : (end - start);
}
return ans;
};
Question: Find the longest substring without repeating characters.
Example 1 :
import java.util.LinkedHashMap;
import java.util.Map;
public class example1 {
public static void main(String[] args) {
String a = "abcabcbb";
// output => 3
System.out.println( lengthOfLongestSubstring(a));
}
private static int lengthOfLongestSubstring(String a) {
if(a == null || a.length() == 0) {return 0 ;}
int res = 0 ;
Map<Character , Integer> map = new LinkedHashMap<>();
for (int i = 0; i < a.length(); i++) {
char ch = a.charAt(i);
if (!map.containsKey(ch)) {
//If ch is not present in map, adding ch into map along with its position
map.put(ch, i);
}else {
/*
If char ch is present in Map, reposition the cursor i to the position of ch and clear the Map.
*/
i = map.put(ch, i);// updation of index
map.clear();
}//else
res = Math.max(res, map.size());
}
return res;
}
}
if you want the longest string without the repeating characters as output then do this inside the for loop:
String res ="";// global
int len = 0 ;//global
if(len < map.size()) {
len = map.size();
res = map.keySet().toString();
}
System.out.println("len -> " + len);
System.out.println("res => " + res);
def max_substring(string):
last_substring = ''
max_substring = ''
for x in string:
k = find_index(x,last_substring)
last_substring = last_substring[(k+1):]+x
if len(last_substring) > len(max_substring):
max_substring = last_substring
return max_substring
def find_index(x, lst):
k = 0
while k <len(lst):
if lst[k] == x:
return k
k +=1
return -1
can we use something like this .
def longestpalindrome(str1):
arr1=list(str1)
s=set(arr1)
arr2=list(s)
return len(arr2)
str1='abadef'
a=longestpalindrome(str1)
print(a)
if only length of the substring is to be returned
Algorithm: 1) Initialise an empty dictionary dct to check if any character already exists in the string. 2) cnt - to keep the count of substring without repeating characters. 3)l and r are the two pointers initialised to first index of the string. 4)loop through each char of the string. 5) If the character not present in the dct add itand increse the cnt. 6)If its already present then check if cnt is greater then resStrLen.7)Remove the char from dct and shift the left pointer by 1 and decrease the count.8)Repeat 5,6,7 till l,r greater or equal to length of the input string. 9)Have one more check at the end to handle cases like input string with non-repeating characters.Here is the simple python program to Find longest substring without repeating characters
a="stackoverflow"
strLength = len(a)
dct={}
resStrLen=0
cnt=0
l=0
r=0
strb=l
stre=l
while(l<strLength and r<strLength):
if a[l] in dct:
if cnt>resStrLen:
resStrLen=cnt
strb=r
stre=l
dct.pop(a[r])
cnt=cnt-1
r+=1
else:
cnt+=1
dct[a[l]]=1
l+=1
if cnt>resStrLen:
resStrLen=cnt
strb=r
stre=l
print "Result String Length : "+str(resStrLen)
print "Result String : " + a[strb:stre]
The solution in C.
#include<stdio.h>
#include <string.h>
void longstr(char* a, int *start, int *last)
{
*start = *last = 0;
int visited[256];
for (int i = 0; i < 256; i++)
{
visited[i] = -1;
}
int max_len = 0;
int cur_len = 0;
int prev_index;
visited[a[0]] = 0;
for (int i = 1; i < strlen(a); i++)
{
prev_index = visited[a[i]];
if (prev_index == -1 || i - cur_len > prev_index)
{
cur_len++;
*last = i;
}
else
{
if (max_len < cur_len)
{
*start = *last - cur_len;
max_len = cur_len;
}
cur_len = i - prev_index;
}
visited[a[i]] = i;
}
if (max_len < cur_len)
{
*start = *last - cur_len;
max_len = cur_len;
}
}
int main()
{
char str[] = "ABDEFGABEF";
printf("The input string is %s \n", str);
int start, last;
longstr(str, &start, &last);
//printf("\n %d %d \n", start, last);
memmove(str, (str + start), last - start);
str[last] = '\0';
printf("the longest non-repeating character substring is %s", str);
return 0;
}
public int lengthOfLongestSubstring(String s) {
int startIndex = 0;
int maxLength = 0;
//since we have 256 ascii chars
int[] lst = new int[256];
Arrays.fill(lst,-1);
for (int i = 0; i < s.length(); i++) {
char c = s.charAt(i);
//to get ascii value of c
int ic = (int) c;
int value = lst[ic];
//this will say to move start index to next index of the repeating char
//we only do this if the repeating char index is greater than start index
if (value >= startIndex) {
maxLength = Math.max(maxLength, i - startIndex);
startIndex = value + 1;
}
lst[ic] = i;
}
//when we came to an end of string
return Math.max(maxLength,s.length()-startIndex);
}
This is the fastest and it is linear time and constant space

Resources