Find rank of lottery combinations - combinatorics

I need to find the rank/index of a lottery combination and be able to reverse the process (Find the lottery combination given its rank).
Consider a lottery game with 5 balls from 1 to 45 and 1 powerball from 1 to 20. Duplication is not allowed and the order does not matter. The number of combinations is:
(45 * 44 * 43 * 42 * 41 / 5!) * 20 = 24,435,180
The first combination (index 0) is:
1, 2, 3, 4, 5, 1
The last combination (index 24,435,179) is:
41, 42, 43, 44, 45, 20
How can I convert a combination into its index and vice versa without exhaustively enumerating all combinations?
I came across this MSDN article, which shows how to get the combination of a given index. However, I don't know how to get the index from a combination. I've tried:
Choose(c1,k) + Choose(c2,k-1) + Choose(c3,k-2) + Choose(c4,k-3) ...
Where ci is the number at position i in the ordered combination set and k is the size of the set. Since the index of a combination depends on the range of the elements, this does not work. Additionally, I'm not sure if it is going to work with elements of different sizes in the set (e.g. main pool's range is 1-45, powerball's range is 1-20)

I was able to figure it out. I created a new Combination class, based on the MSDN example, which can convert a combination to an index and vice versa. A separate index is retrieved for each pool of numbers. All the indices are then combined to represent a combination with elements of different sizes.
A Pool class was also created to represent the settings of a pool (Range of elements, size etc). The Pool class:
public class Pool {
public int From { get; set; }
public int To { get; set; }
public int Size { get; set; }
public int Numbers { get { return (To - From + 1); } }
public Pool(int From, int To, int Size) {
this.From = From;
this.To = To;
this.Size = Size ;
}
}
The Combination class:
class Combination {
public Pool[] Pools { get; set; }
public long[][] Data { get; set; } //First index represents pool index, second represents the numbers
public Combination(Pool[] Pools, long[][] Data) {
this.Pools = Pools;
this.Data = Data;
if (Data.GetLength(0) != Pools.Length) {
throw (new ArgumentException("Invalid data length"));
}
for (int i = 0; i < Data.GetLength(0); i++) {
if (Data[i].Length != Pools[i].Size) {
throw (new ArgumentException("Invalid data length"));
}
}
}
public static Combination FromIndex(long Index, Pool[] Pools) {
long[][] elements = new long[Pools.Length][];
long[] c = new long[Pools.Length - 1];
long cumulative = 1;
for (int i = 0; i < Pools.Length - 1; i++) {
c[i] = Combination.Choose(Pools[i].Numbers, Pools[i].Size);
checked {
cumulative *= c[i];
}
}
for (int i = Pools.Length - 1; i >= 1; i--) {
long ind = Index / cumulative;
Index -= ind * cumulative;
cumulative /= c[i - 1];
elements[i] = Combination.FromIndex(ind, Pools[i]);
}
elements[0] = Combination.FromIndex(Index, Pools[0]);
return (new Combination(Pools, elements));
}
public static long[] FromIndex(long Index, Pool Pool) {
long[] ans = new long[Pool.Size];
long a = (long)Pool.Numbers;
long b = (long)Pool.Size;
long x = GetDual((long)Pool.Numbers, (long)Pool.Size, Index);
for (int k = 0; k < Pool.Size; k++) {
ans[k] = LargestV(a, b, x);
x -= Choose(ans[k], b);
a = ans[k];
b--;
}
for (int k = 0; k < Pool.Size; k++) {
ans[k] = ((long)Pool.Numbers - 1) - ans[k];
}
//Transform to relative
for (int i = 0; i < ans.Length; i++) {
ans[i] += Pool.From;
}
return (ans);
}
private static long GetDual(long To, long Size, long m) {
return (Choose(To, Size) - 1) - m;
}
public static long Choose(long To, long Size) {
if (To < 0 || Size < 0)
throw new Exception("Invalid negative parameter in Choose()");
if (To < Size)
return 0; // special case
if (To == Size)
return 1;
long delta, iMax;
if (Size < To - Size) {
delta = To - Size;
iMax = Size;
} else {
delta = Size;
iMax = To - Size;
}
long ans = delta + 1;
for (long i = 2; i <= iMax; ++i) {
checked {
ans = (ans * (delta + i)) / i;
}
}
return ans;
}
private static long LargestV(long a, long b, long x) {
long v = a - 1;
while (Choose(v, b) > x)
--v;
return v;
}
public long ToIndex() {
long Index = 0;
long cumulative = 1;
for (int i = 0; i < Pools.Length; i++) {
checked {
Index += ToIndex(i) * cumulative;
cumulative *= Combination.Choose(Pools[i].Numbers, Pools[i].Size);
}
}
return (Index);
}
public long ToIndex(int PoolIndex) {
long ind = 0;
for (int i = 0; i < Pools[PoolIndex].Size; i++) {
long d = (Pools[PoolIndex].Numbers - 1) - (Data[PoolIndex][i] - Pools[PoolIndex].From);
ind += Choose(d, Pools[PoolIndex].Size - i);
}
ind = GetDual(Pools[PoolIndex].Numbers, Pools[PoolIndex].Size, ind);
return (ind);
}
public override string ToString() {
string s = "{ ";
for (int i = 0; i < Data.Length; ++i) {
for (int k = 0; k < Data[i].Length; k++) {
s += Data[i][k] + " ";
}
if (i != Data.Length - 1) {
s += "| ";
}
}
s += "}";
return s;
}
}
To see this in action:
//Create pools
Pool[] pools = new Pool[2];
pools[0] = new Pool(1, 45, 5);
pools[1] = new Pool(1, 20, 1);
//Create a combination
long[][] data = new long[][] { new long[] { 41, 42, 43, 44, 45 }, new long[] { 20 } };
Combination combination = new Combination(pools, data);
//Get index from combination:
long index = combination.ToIndex();
Console.WriteLine("Index: " + index);
//Get combination from index:
Combination combFromIndex = Combination.FromIndex(index, pools);
Console.WriteLine("Combination: " + combFromIndex);
Output:
Index: 24435179
Combination: { 41 42 43 44 45 | 20 }

Related

C# Lock implement goes wrong

I try to make a lock on the incrementer "counter" in the Countrange method.
The counter should count under some conditions in the threads. If I complete the threads in order (not parallel) i get the right counter value over and over, but when i try to lock like i do in de program, it still isn't threadsafe, because i get diffrent counter values when i run the program a few times.
{
class Program
{
static int e, p, b, m;
static int counter = 0;
static int lok = 0;
static void Main(string[] args)
{
string input = Console.ReadLine();
string[] inputs = input.Split(' ');
counter = 0;
p = Convert.ToInt32(inputs[4]);
e = Convert.ToInt32(inputs[2]);
b = Convert.ToInt32(inputs[1]);
m = Convert.ToInt32(inputs[3]);
Thread[] ts = new Thread[p];
for (int t = 0; t < p; t++)
{
ts[t] = new Thread(countrange);
}
for (int t = 0; t < p; t++)
{
ts[t].Start(t);
}
for (int t = 0; t < p; t++)
{
ts[t].Join();
}
Console.Write(counter);
//Console.ReadKey();
}
public static void countrange(object mt)
{
int to = eind * (((int)mt + 1) / p) + verdeler + b;
int from = eind * (((int)mt) / p) + verdeler - a + b;
for (int i = from; i < to; i++)
{
proef = proef + moduler * keer;
}
{
if (proef % m == 0)
lock (mt)
{
counter++;
}
}
}
}
}
}
I made the lock static, now it works

Dynamic programming money change

I have two functions recursive and iterative to calculate money change; in the iterative version I needed to check for if the money is multiple of change (money modulus change is zero): is there a way to do iterative without checking for modulus?
public static int MoneyChangeRecur(int money, int[] changes)
{
int minChange = 0;
if (money == 0) return 0;
int min = int.MaxValue;
for (int i = 0; i < changes.Length; i++)
{
if (money >= changes[i])
{
minChange = MoneyChangeRecur(money - changes[i], changes);
if (minChange + 1 <= min)
{
min = minChange + 1;
}
}
}
return min;
}
static int IterativeMoneychange(int money)
{
int[] ar = new int[money + 1];
int[] change = { 6, 5, 1 };
int min = 9999;
int index = 0;
for (int i = 0; i < money+1; i++)
{
min = 99999;
index = 0;
bool modSet = false;
for (int j= 0; j <change.Length; j++)
{
if (i >= change[j])
{
int mod=(i % change[j]);
if(mod==0&&change[j]!=1)
{
if (!modSet) min = 99999;
if ((i-change[j] )< min)
{
min = (i - change[j]);
modSet = true;
}
}
else
{
if ((i - change[j]) < min)
{
min = (i - change[j]);
modSet = false;
}
}
}
}
if (min != 99999)// min = 0;
ar[i] = ar[min] +1;
}
return ar[money];
}enter code here

If I have given the word ssseeerabbcccdd then output should be like s3e3rab2c3d2 in c#

class Program
{
static void Main(string[] args)
{
Console.Write("Enter Word : ");
string word = Console.ReadLine();
for (var i = 0; i < word.Length; i++)
{
var check = true;
var count = 0;
for (var k = i - 1; k <= 0; k--)
{
if (word[k] == word[i]) check = false;
}
if (check)
{
for (var j = i; j<= word.Length; j++)
{
if (word[i] == word[j]) count++;
}
Console.WriteLine(word[i] + " Occurs at " + count + " places.");
}
}
Console.ReadLine();
}
}
I have tried this one but not working.
The problem is that check is set to false if the string contains two adjacent identical characters. So if (check) {...} won't be executed, if this is the case. Another problem is that you set k to string position -1 in the first iteration of for (var k = i - 1; k <= 0; k--). If i=0, then k will be -1. You also need only one inner loop.
Here is a possible solution, although slightly different than your's.
class Program
{
static void Main(string[] args)
{
Console.Write("Enter Word : ");
string word = Console.ReadLine();
int i = 0;
while (i < word.Length)
{
int count = 1;
for (int k = i+1; k < word.Length; k++)
if (word[i] == word[k])
count++;
if (count>1)
{
Console.WriteLine(word[i] + " Occurs at " + count + " places.");
}
i += count; // continue next char or after after repeated chars
}
Console.ReadLine();
}
}

Jaro–Winkler distance algorithm in C#

How would the Jaro–Winkler distance string comparison algorithm be implemented in C#?
public static class JaroWinklerDistance
{
/* The Winkler modification will not be applied unless the
* percent match was at or above the mWeightThreshold percent
* without the modification.
* Winkler's paper used a default value of 0.7
*/
private static readonly double mWeightThreshold = 0.7;
/* Size of the prefix to be concidered by the Winkler modification.
* Winkler's paper used a default value of 4
*/
private static readonly int mNumChars = 4;
/// <summary>
/// Returns the Jaro-Winkler distance between the specified
/// strings. The distance is symmetric and will fall in the
/// range 0 (perfect match) to 1 (no match).
/// </summary>
/// <param name="aString1">First String</param>
/// <param name="aString2">Second String</param>
/// <returns></returns>
public static double distance(string aString1, string aString2) {
return 1.0 - proximity(aString1,aString2);
}
/// <summary>
/// Returns the Jaro-Winkler distance between the specified
/// strings. The distance is symmetric and will fall in the
/// range 0 (no match) to 1 (perfect match).
/// </summary>
/// <param name="aString1">First String</param>
/// <param name="aString2">Second String</param>
/// <returns></returns>
public static double proximity(string aString1, string aString2)
{
int lLen1 = aString1.Length;
int lLen2 = aString2.Length;
if (lLen1 == 0)
return lLen2 == 0 ? 1.0 : 0.0;
int lSearchRange = Math.Max(0,Math.Max(lLen1,lLen2)/2 - 1);
// default initialized to false
bool[] lMatched1 = new bool[lLen1];
bool[] lMatched2 = new bool[lLen2];
int lNumCommon = 0;
for (int i = 0; i < lLen1; ++i) {
int lStart = Math.Max(0,i-lSearchRange);
int lEnd = Math.Min(i+lSearchRange+1,lLen2);
for (int j = lStart; j < lEnd; ++j) {
if (lMatched2[j]) continue;
if (aString1[i] != aString2[j])
continue;
lMatched1[i] = true;
lMatched2[j] = true;
++lNumCommon;
break;
}
}
if (lNumCommon == 0) return 0.0;
int lNumHalfTransposed = 0;
int k = 0;
for (int i = 0; i < lLen1; ++i) {
if (!lMatched1[i]) continue;
while (!lMatched2[k]) ++k;
if (aString1[i] != aString2[k])
++lNumHalfTransposed;
++k;
}
// System.Diagnostics.Debug.WriteLine("numHalfTransposed=" + numHalfTransposed);
int lNumTransposed = lNumHalfTransposed/2;
// System.Diagnostics.Debug.WriteLine("numCommon=" + numCommon + " numTransposed=" + numTransposed);
double lNumCommonD = lNumCommon;
double lWeight = (lNumCommonD/lLen1
+ lNumCommonD/lLen2
+ (lNumCommon - lNumTransposed)/lNumCommonD)/3.0;
if (lWeight <= mWeightThreshold) return lWeight;
int lMax = Math.Min(mNumChars,Math.Min(aString1.Length,aString2.Length));
int lPos = 0;
while (lPos < lMax && aString1[lPos] == aString2[lPos])
++lPos;
if (lPos == 0) return lWeight;
return lWeight + 0.1 * lPos * (1.0 - lWeight);
}
}
You can take a look on Lucene.Net ,
it implement Jaro–Winkler distance algorithm ,
and its score is different from which leebickmtu post ,
you can take it as reference
the url is below :
http://lucenenet.apache.org/docs/3.0.3/db/d12/_jaro_winkler_distance_8cs_source.html
You can use the below code which works very well for all the kind of strings.After getting the result you need to multiply with 100 to get the percentage of similarity. I hope it will solves your problem.
public class JaroWinkler
{
private const double defaultMismatchScore = 0.0;
private const double defaultMatchScore = 1.0;
/// <summary>
/// Gets the similarity between two strings by using the Jaro-Winkler algorithm.
/// A value of 1 means perfect match. A value of zero represents an absolute no match
/// </summary>
/// <param name="_firstWord"></param>
/// <param name="_secondWord"></param>
/// <returns>a value between 0-1 of the similarity</returns>
///
public static double RateSimilarity(string _firstWord, string _secondWord)
{
// Converting to lower case is not part of the original Jaro-Winkler implementation
// But we don't really care about case sensitivity in DIAMOND and wouldn't decrease security names similarity rate just because
// of Case sensitivity
_firstWord = _firstWord.ToLower();
_secondWord = _secondWord.ToLower();
if ((_firstWord != null) && (_secondWord != null))
{
if (_firstWord == _secondWord)
//return (SqlDouble)defaultMatchScore;
return defaultMatchScore;
else
{
// Get half the length of the string rounded up - (this is the distance used for acceptable transpositions)
int halfLength = Math.Min(_firstWord.Length, _secondWord.Length) / 2 + 1;
// Get common characters
StringBuilder common1 = GetCommonCharacters(_firstWord, _secondWord, halfLength);
int commonMatches = common1.Length;
// Check for zero in common
if (commonMatches == 0)
//return (SqlDouble)defaultMismatchScore;
return defaultMismatchScore;
StringBuilder common2 = GetCommonCharacters(_secondWord, _firstWord, halfLength);
// Check for same length common strings returning 0 if is not the same
if (commonMatches != common2.Length)
//return (SqlDouble)defaultMismatchScore;
return defaultMismatchScore;
// Get the number of transpositions
int transpositions = 0;
for (int i = 0; i < commonMatches; i++)
{
if (common1[i] != common2[i])
transpositions++;
}
int j = 0;
j += 1;
// Calculate Jaro metric
transpositions /= 2;
double jaroMetric = commonMatches / (3.0 * _firstWord.Length) + commonMatches / (3.0 * _secondWord.Length) + (commonMatches - transpositions) / (3.0 * commonMatches);
//return (SqlDouble)jaroMetric;
return jaroMetric;
}
}
//return (SqlDouble)defaultMismatchScore;
return defaultMismatchScore;
}
/// <summary>
/// Returns a string buffer of characters from string1 within string2 if they are of a given
/// distance seperation from the position in string1.
/// </summary>
/// <param name="firstWord">string one</param>
/// <param name="secondWord">string two</param>
/// <param name="separationDistance">separation distance</param>
/// <returns>A string buffer of characters from string1 within string2 if they are of a given
/// distance seperation from the position in string1</returns>
private static StringBuilder GetCommonCharacters(string firstWord, string secondWord, int separationDistance)
{
if ((firstWord != null) && (secondWord != null))
{
StringBuilder returnCommons = new StringBuilder(20);
StringBuilder copy = new StringBuilder(secondWord);
int firstWordLength = firstWord.Length;
int secondWordLength = secondWord.Length;
for (int i = 0; i < firstWordLength; i++)
{
char character = firstWord[i];
bool found = false;
for (int j = Math.Max(0, i - separationDistance); !found && j < Math.Min(i + separationDistance, secondWordLength); j++)
{
if (copy[j] == character)
{
found = true;
returnCommons.Append(character);
copy[j] = '#';
}
}
}
return returnCommons;
}
return null;
}
}
Use below class to use jaro winkler.
i have customized both algorithm jaro and jaro-winkler.
Visit on Github for DLL.
using System;
using System.Linq;
namespace Search
{
public static class EditDistance
{
private struct JaroMetrics
{
public int Matches;
public int Transpositions;
}
private static EditDistance.JaroMetrics Matches(string s1, string s2)
{
string text;
string text2;
if (s1.Length > s2.Length)
{
text = s1;
text2 = s2;
}
else
{
text = s2;
text2 = s1;
}
int num = Math.Max(text.Length / 2 - 1, 0);
int[] array = new int[text2.Length];
int i;
for (i = 0; i < array.Length; i++)
{
array[i] = -1;
}
bool[] array2 = new bool[text.Length];
int num2 = 0;
for (int j = 0; j < text2.Length; j++)
{
char c = text2[j];
int k = Math.Max(j - num, 0);
int num3 = Math.Min(j + num + 1, text.Length);
while (k < num3)
{
if (!array2[k] && c == text[k])
{
array[j] = k;
array2[k] = true;
num2++;
break;
}
k++;
}
}
char[] array3 = new char[num2];
char[] ms2 = new char[num2];
i = 0;
int num4 = 0;
while (i < text2.Length)
{
if (array[i] != -1)
{
array3[num4] = text2[i];
num4++;
}
i++;
}
i = 0;
num4 = 0;
while (i < text.Length)
{
if (array2[i])
{
ms2[num4] = text[i];
num4++;
}
i++;
}
int num5 = array3.Where((char t, int mi) => t != ms2[mi]).Count<char>();
EditDistance.JaroMetrics result;
result.Matches = num2;
result.Transpositions = num5 / 2;
return result;
}
public static float JaroWinkler(this string s1, string s2, float prefixScale, float boostThreshold)
{
prefixScale = ((prefixScale > 0.25f) ? 0.25f : prefixScale);
prefixScale = ((prefixScale < 0f) ? 0f : prefixScale);
float num = s1.Jaro(s2);
int num2 = 0;
for (int i = 0; i < Math.Min(s1.Length, s2.Length); i++)
{
if (s1[i] != s2[i])
{
break;
}
num2++;
}
return (num < boostThreshold) ? num : (num + prefixScale * (float)num2 * (1f - num));
}
public static float JaroWinkler(this string s1, string s2, float prefixScale)
{
return s1.JaroWinkler(s2, prefixScale, 0.7f);
}
public static float JaroWinkler(this string s1, string s2)
{
return s1.JaroWinkler(s2, 0.1f, 0.7f);
}
public static float Jaro(this string s1, string s2)
{
EditDistance.JaroMetrics jaroMetrics = EditDistance.Matches(s1, s2);
float num = (float)jaroMetrics.Matches;
int transpositions = jaroMetrics.Transpositions;
float result;
if (num == 0f)
{
result = 0f;
}
else
{
float num2 = (num / (float)s1.Length + num / (float)s2.Length + (num - (float)transpositions) / num) / 3f;
result = num2;
}
return result;
}
public static int LevenshteinDistance(this string source, string target)
{
int result;
if (string.IsNullOrEmpty(source))
{
if (string.IsNullOrEmpty(target))
{
result = 0;
}
else
{
result = target.Length;
}
}
else if (string.IsNullOrEmpty(target))
{
result = source.Length;
}
else
{
if (source.Length > target.Length)
{
string text = target;
target = source;
source = text;
}
int length = target.Length;
int length2 = source.Length;
int[,] array = new int[2, length + 1];
for (int i = 1; i <= length; i++)
{
array[0, i] = i;
}
int num = 0;
for (int j = 1; j <= length2; j++)
{
num = (j & 1);
array[num, 0] = j;
int num2 = num ^ 1;
for (int i = 1; i <= length; i++)
{
int num3 = (target[i - 1] == source[j - 1]) ? 0 : 1;
array[num, i] = Math.Min(Math.Min(array[num2, i] + 1, array[num, i - 1] + 1), array[num2, i - 1] + num3);
}
}
result = array[num, length];
}
return result;
}
}
}

Find longest substring without repeating characters

Given a string S of length N find longest substring without repeating characters.
Example:
Input: "stackoverflow"
Output: "stackoverfl"
If there are two such candidates, return first from left. I need linear time and constant space algorithm.
You are going to need a start and an end locator(/pointer) for the
string and an array where you store information for each character:
did it occour at least once?
Start at the beginning of the string, both locators point to the
start of the string.
Move the end locator to the right till you find
a repetition (or reach the end of the string). For each processed character, store it in the array.
When stopped store the position if this is the largest substring. Also remember the repeated character.
Now do the same thing with the start locator, when processing
each character, remove its flags from the array. Move the locator till
you find the earlier occurrence of the repeated character.
Go back to step 3 if you haven't reached the end of string.
Overall: O(N)
import java.util.HashSet;
public class SubString {
public static String subString(String input){
HashSet<Character> set = new HashSet<Character>();
String longestOverAll = "";
String longestTillNow = "";
for (int i = 0; i < input.length(); i++) {
char c = input.charAt(i);
if (set.contains(c)) {
longestTillNow = "";
set.clear();
}
longestTillNow += c;
set.add(c);
if (longestTillNow.length() > longestOverAll.length()) {
longestOverAll = longestTillNow;
}
}
return longestOverAll;
}
public static void main(String[] args) {
String input = "substringfindout";
System.out.println(subString(input));
}
}
You keep an array indicating the position at which a certain character occurred last. For convenience all characters occurred at position -1. You iterate on the string keeping a window, if a character is repeated in that window, you chop off the prefix that ends with the first occurrence of this character. Throughout, you maintain the longest length. Here's a python implementation:
def longest_unique_substr(S):
# This should be replaced by an array (size = alphabet size).
last_occurrence = {}
longest_len_so_far = 0
longest_pos_so_far = 0
curr_starting_pos = 0
curr_length = 0
for k, c in enumerate(S):
l = last_occurrence.get(c, -1)
# If no repetition within window, no problems.
if l < curr_starting_pos:
curr_length += 1
else:
# Check if it is the longest so far
if curr_length > longest_len_so_far:
longest_pos_so_far = curr_starting_pos
longest_len_so_far = curr_length
# Cut the prefix that has repetition
curr_length -= l - curr_starting_pos
curr_starting_pos = l + 1
# In any case, update last_occurrence
last_occurrence[c] = k
# Maybe the longest substring is a suffix
if curr_length > longest_len_so_far:
longest_pos_so_far = curr_starting_pos
longest_len_so_far = curr_length
return S[longest_pos_so_far:longest_pos_so_far + longest_len_so_far]
EDITED:
following is an implementation of the concesus. It occured to me after my original publication. so as not to delete original, it is presented following:
public static String longestUniqueString(String S) {
int start = 0, end = 0, length = 0;
boolean bits[] = new boolean[256];
int x = 0, y = 0;
for (; x < S.length() && y < S.length() && length < S.length() - x; x++) {
bits[S.charAt(x)] = true;
for (y++; y < S.length() && !bits[S.charAt(y)]; y++) {
bits[S.charAt(y)] = true;
}
if (length < y - x) {
start = x;
end = y;
length = y - x;
}
while(y<S.length() && x<y && S.charAt(x) != S.charAt(y))
bits[S.charAt(x++)]=false;
}
return S.substring(start, end);
}//
ORIGINAL POST:
Here is my two cents. Test strings included. boolean bits[] = new boolean[256] may be larger to encompass some larger charset.
public static String longestUniqueString(String S) {
int start=0, end=0, length=0;
boolean bits[] = new boolean[256];
int x=0, y=0;
for(;x<S.length() && y<S.length() && length < S.length()-x;x++) {
Arrays.fill(bits, false);
bits[S.charAt(x)]=true;
for(y=x+1;y<S.length() && !bits[S.charAt(y)];y++) {
bits[S.charAt(y)]=true;
}
if(length<y-x) {
start=x;
end=y;
length=y-x;
}
}
return S.substring(start,end);
}//
public static void main(String... args) {
String input[][] = { { "" }, { "a" }, { "ab" }, { "aab" }, { "abb" },
{ "aabc" }, { "abbc" }, { "aabbccdefgbc" },
{ "abcdeafghicabcdefghijklmnop" },
{ "abcdeafghicabcdefghijklmnopqrabcdx" },
{ "zxxaabcdeafghicabcdefghijklmnopqrabcdx" },
{"aaabcdefgaaa"}};
for (String[] a : input) {
System.out.format("%s *** GIVES *** {%s}%n", Arrays.toString(a),
longestUniqueString(a[0]));
}
}
Here is one more solution with only 2 string variables:
public static String getLongestNonRepeatingString(String inputStr){
if(inputStr == null){
return null;
}
String maxStr = "";
String tempStr = "";
for(int i=0; i < inputStr.length(); i++){
// 1. if tempStr contains new character, then change tempStr
if(tempStr.contains("" + inputStr.charAt(i))){
tempStr = tempStr.substring(tempStr.lastIndexOf(inputStr.charAt(i)) + 1);
}
// 2. add new character
tempStr = tempStr + inputStr.charAt(i);
// 3. replace maxStr with tempStr if tempStr is longer
if(maxStr.length() < tempStr.length()){
maxStr = tempStr;
}
}
return maxStr;
}
Algorithm in JavaScript (w/ lots of comments)..
/**
Given a string S find longest substring without repeating characters.
Example:
Input: "stackoverflow"
Output: "stackoverfl"
Input: "stackoverflowabcdefghijklmn"
Output: "owabcdefghijklmn"
*/
function findLongestNonRepeatingSubStr(input) {
var chars = input.split('');
var currChar;
var str = "";
var longestStr = "";
var hash = {};
for (var i = 0; i < chars.length; i++) {
currChar = chars[i];
if (!hash[chars[i]]) { // if hash doesn't have the char,
str += currChar; //add it to str
hash[chars[i]] = {index:i};//store the index of the char
} else {// if a duplicate char found..
//store the current longest non-repeating chars. until now
//In case of equal-length, <= right-most str, < will result in left most str
if(longestStr.length <= str.length) {
longestStr = str;
}
//Get the previous duplicate char's index
var prevDupeIndex = hash[currChar].index;
//Find all the chars AFTER previous duplicate char and current one
var strFromPrevDupe = input.substring(prevDupeIndex + 1, i);
//*NEW* longest string will be chars AFTER prevDupe till current char
str = strFromPrevDupe + currChar;
//console.log(str);
//Also, Reset hash to letters AFTER duplicate letter till current char
hash = {};
for (var j = prevDupeIndex + 1; j <= i; j++) {
hash[input.charAt(j)] = {index:j};
}
}
}
return longestStr.length > str.length ? longestStr : str;
}
//console.log("stackoverflow => " + findLongestNonRepeatingSubStr("stackoverflow"));
//returns stackoverfl
//console.log("stackoverflowabcdefghijklmn => " +
findLongestNonRepeatingSubStr("stackoverflowabcdefghijklmn")); //returns owabcdefghijklmn
//console.log("1230123450101 => " + findLongestNonRepeatingSubStr("1230123450101")); //
returns 234501
We can consider all substrings one by one and check for each substring whether it contains all unique characters or not.
There will be n*(n+1)/2 substrings. Whether a substirng contains all unique characters or not can be checked in linear time by
scanning it from left to right and keeping a map of visited characters. Time complexity of this solution would be O(n^3).`
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
public class LengthOfLongestSubstringWithOutRepeatingChar {
public static void main(String[] args)
{
String s="stackoverflow";
//allSubString(s);
System.out.println("result of find"+find(s));
}
public static String find(String s)
{
List<String> allSubsring=allSubString(s);
Set<String> main =new LinkedHashSet<String>();
for(String temp:allSubsring)
{
boolean a = false;
for(int i=0;i<temp.length();i++)
{
for(int k=temp.length()-1;k>i;k--)
{
if(temp.charAt(k)==temp.charAt(i))
a=true;
}
}
if(!a)
{
main.add(temp);
}
}
/*for(String x:main)
{
System.out.println(x);
}*/
String res=null;
int min=0,max=s.length();
for(String temp:main)
{
if(temp.length()>min&&temp.length()<max)
{
min=temp.length();
res=temp;
}
}
System.out.println(min+"ha ha ha"+res+"he he he");
return res;
}
//substrings left to right ban rahi hai
private static List<String> allSubString(String str) {
List<String> all=new ArrayList<String>();
int c=0;
for (int i = 0; i < str.length(); i++) {
for (int j = 0; j <= i; j++) {
if (!all.contains(str.substring(j, i + 1)))
{
c++;
all.add(str.substring(j, i + 1));
}
}
}
for(String temp:all)
{
System.out.println("substring :-"+temp);
}
System.out.println("count"+c);
return all;
}
}
Another O(n) JavaScript solution. It does not alter strings during the looping; it just keeps track of the offset and length of the longest sub string so far:
function longest(str) {
var hash = {}, start, end, bestStart, best;
start = end = bestStart = best = 0;
while (end < str.length) {
while (hash[str[end]]) hash[str[start++]] = 0;
hash[str[end]] = 1;
if (++end - start > best) bestStart = start, best = end - start;
}
return str.substr(bestStart, best);
}
// I/O for snippet
document.querySelector('input').addEventListener('input', function () {
document.querySelector('span').textContent = longest(this.value);
});
Enter word:<input><br>
Longest: <span></span>
simple python snippet
l=length p=position
maxl=maxlength maxp=maxposition
Tested and working. For easy understanding, I suppose there's a drawer to put the letters.
Function:
public int lengthOfLongestSubstring(String s) {
int maxlen = 0;
int start = 0;
int end = 0;
HashSet<Character> drawer = new HashSet<Character>();
for (int i=0; i<s.length(); i++) {
char ch = s.charAt(i);
if (drawer.contains(ch)) {
//search for ch between start and end
while (s.charAt(start)!=ch) {
//drop letter from drawer
drawer.remove(s.charAt(start));
start++;
}
//Do not remove from drawer actual char (it's the new recently found)
start++;
end++;
}
else {
drawer.add(ch);
end++;
int _maxlen = end-start;
if (_maxlen>maxlen) {
maxlen=_maxlen;
}
}
}
return maxlen;
}
Longest substring without repeating character in python
public int lengthOfLongestSubstring(String s) {
if(s.equals(""))
return 0;
String[] arr = s.split("");
HashMap<String,Integer> map = new HashMap<>();
Queue<String> q = new LinkedList<>();
int l_till = 1;
int l_all = 1;
map.put(arr[0],0);
q.add(arr[0]);
for(int i = 1; i < s.length(); i++){
if (map.containsKey(arr[i])) {
if(l_till > l_all){
l_all = l_till;
}
while(!q.isEmpty() && !q.peek().equals(arr[i])){
map.remove(q.remove());
}
if(!q.isEmpty())
map.remove(q.remove());
q.add(arr[i]);
map.put(arr[i],i);
//System.out.println(q);
//System.out.println(map);
l_till = q.size();
}
else {
l_till = l_till + 1;
map.put(arr[i],i);
q.add(arr[i]);
}
}
if(l_till > l_all){
l_all = l_till;
}
return l_all;
}
I was asked the same question in an interview.
I have written Python3 code, to find the first occurrence of the substring with all distinct chars. In my implementations, I start with index = 0 and iterate over the input string. While iterating used a Python dict seems to store indexes of chars in input-string those has been visited in the iteration.
In iteration, if char c, does not find in current substring – raise KeyError exception
if c is found to be a duplicate char in the current substring (as c previously appeared during iteration – named that index last_seen) start a new substring
def lds(string: str) -> str:
""" returns first longest distinct substring in input `string` """
seens = {}
start, end, curt_start = 0, 0, 0
for curt_end, c in enumerate(string):
try:
last_seen = seens[c]
if last_seen < curt_start:
raise KeyError(f"{c!r} not found in {string[curt_start: curt_end]!r}")
if end - start < curt_end - curt_start:
start, end = curt_start, curt_end
curt_start = last_seen + 1
except KeyError:
pass
seens[c] = curt_end
else:
# case when the longest substring is suffix of the string, here curt_end
# do not point to a repeating char hance included in the substring
if string and end - start < curt_end - curt_start + 1:
start, end = curt_start, curt_end + 1
return string[start: end]
private static string LongestSubstring(string word)
{
var set = new HashSet<char>();
string longestOverAll = "";
string longestTillNow = "";
foreach (char c in word)
{
if (!set.Contains(c))
{
longestTillNow += c;
set.Add(c);
}
else
{
longestTillNow = string.Empty;
}
if (longestTillNow.Length > longestOverAll.Length)
{
longestOverAll = longestTillNow;
}
}
return longestOverAll;
}
import java.util.ArrayList;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;
import java.util.TreeMap;
public class LongestSubString2 {
public static void main(String[] args) {
String input = "stackoverflowabcdefghijklmn";
List<String> allOutPuts = new ArrayList<String>();
TreeMap<Integer, Set> map = new TreeMap<Integer, Set>();
for (int k = 0; k < input.length(); k++) {
String input1 = input.substring(k);
String longestSubString = getLongestSubString(input1);
allOutPuts.add(longestSubString);
}
for (String str : allOutPuts) {
int strLen = str.length();
if (map.containsKey(strLen)) {
Set set2 = (HashSet) map.get(strLen);
set2.add(str);
map.put(strLen, set2);
} else {
Set set1 = new HashSet();
set1.add(str);
map.put(strLen, set1);
}
}
System.out.println(map.lastKey());
System.out.println(map.get(map.lastKey()));
}
private static void printArray(Object[] currentObjArr) {
for (Object obj : currentObjArr) {
char str = (char) obj;
System.out.println(str);
}
}
private static String getLongestSubString(String input) {
Set<Character> set = new LinkedHashSet<Character>();
String longestString = "";
int len = input.length();
for (int i = 0; i < len; i++) {
char currentChar = input.charAt(i);
boolean isCharAdded = set.add(currentChar);
if (isCharAdded) {
if (i == len - 1) {
String currentStr = getStringFromSet(set);
if (currentStr.length() > longestString.length()) {
longestString = currentStr;
}
}
continue;
} else {
String currentStr = getStringFromSet(set);
if (currentStr.length() > longestString.length()) {
longestString = currentStr;
}
set = new LinkedHashSet<Character>(input.charAt(i));
}
}
return longestString;
}
private static String getStringFromSet(Set<Character> set) {
Object[] charArr = set.toArray();
StringBuffer strBuff = new StringBuffer();
for (Object obj : charArr) {
strBuff.append(obj);
}
return strBuff.toString();
}
}
This is my solution, and it was accepted by leetcode. However, after I saw the stats, I saw whole lot solutions has much faster result....meaning, my solution is around 600ms for all their test cases, and most of the js solutions are around 200 -300 ms bracket.. who can tell me why my solution is slowwww??
var lengthOfLongestSubstring = function(s) {
var arr = s.split("");
if (s.length === 0 || s.length === 1) {
return s.length;
}
var head = 0,
tail = 1;
var str = arr[head];
var maxL = 0;
while (tail < arr.length) {
if (str.indexOf(arr[tail]) == -1) {
str += arr[tail];
maxL = Math.max(maxL, str.length);
tail++;
} else {
maxL = Math.max(maxL, str.length);
head = head + str.indexOf(arr[tail]) + 1;
str = arr[head];
tail = head + 1;
}
}
return maxL;
};
I am posting O(n^2) in python . I just want to know whether the technique mentioned by Karoly Horvath has any steps that are similar to existing search/sort algorithms ?
My code :
def main():
test='stackoverflow'
tempstr=''
maxlen,index=0,0
indexsubstring=''
print 'Original string is =%s\n\n' %test
while(index!=len(test)):
for char in test[index:]:
if char not in tempstr:
tempstr+=char
if len(tempstr)> len(indexsubstring):
indexsubstring=tempstr
elif (len(tempstr)>=maxlen):
maxlen=len(tempstr)
indexsubstring=tempstr
break
tempstr=''
print 'max substring length till iteration with starting index =%s is %s'%(test[index],indexsubstring)
index+=1
if __name__=='__main__':
main()
Simple and Easy
import java.util.Scanner;
public class longestsub {
static Scanner sn = new Scanner(System.in);
static String word = sn.nextLine();
public static void main(String[] args) {
System.out.println("The Length is " +check(word));
}
private static int check(String word) {
String store="";
for (int i = 0; i < word.length(); i++) {
if (store.indexOf(word.charAt(i))<0) {
store = store+word.charAt(i);
}
}
System.out.println("Result word " +store);
return store.length();
}
}
Not quite optimized but simple answer in Python
def lengthOfLongestSubstring(s):
temp,maxlen,newstart = {},0,0
for i,x in enumerate(s):
if x in temp:
newstart = max(newstart,s[:i].rfind(x)+1)
else:
temp[x] = 1
maxlen = max(maxlen, len(s[newstart:i + 1]))
return maxlen
I think the costly affair is rfind which is why it's not quite optimized.
This is my solution. Hope it helps.
function longestSubstringWithoutDuplication(str) {
var max = 0;
//if empty string
if (str.length === 0){
return 0;
} else if (str.length === 1){ //case if the string's length is 1
return 1;
}
//loop over all the chars in the strings
var currentChar,
map = {},
counter = 0; //count the number of char in each substring without duplications
for (var i=0; i< str.length ; i++){
currentChar = str.charAt(i);
//if the current char is not in the map
if (map[currentChar] == undefined){
//push the currentChar to the map
map[currentChar] = i;
if (Object.keys(map).length > max){
max = Object.keys(map).length;
}
} else { //there is duplacation
//update the max
if (Object.keys(map).length > max){
max = Object.keys(map).length;
}
counter = 0; //initilize the counter to count next substring
i = map[currentChar]; //start from the duplicated char
map = {}; // clean the map
}
}
return max;
}
here is my javascript and cpp implementations with great details: https://algorithm.pingzhang.io/String/longest_substring_without_repeating_characters.html
We want to find the longest substring without repeating characters. The first thing comes to my mind is that we need a hash table to store every character in a substring so that when a new character comes in, we can easily know whether this character is already in the substring or not. I call it as valueIdxHash. Then, a substring has a startIdx and endIdx. So we need a variable to keep track of the starting index of a substring and I call it as startIdx. Let's assume we are at index i and we already have a substring (startIdx, i - 1). Now, we want to check whether this substring can keep growing or not.
If the valueIdxHash contains str[i], it means it is a repeated character. But we still need to check whether this repeated character is in the substring (startIdx, i - 1). So we need to retrieve the index of str[i] that is appeared last time and then compare this index with startIdx.
If startIdx is larger, it means the last appeared str[i] is outside of the substring. Thus the subtring can keep growing.
If startIdx is smaller, it means the last appeared str[i] is within of the substring. Thus, the substring cannot grow any more. startIdx will be updated as valueIdxHash[str[i]] + 1 and the new substring (valueIdxHash[str[i]] + 1, i) has potential to keep growing.
If the valueIdxHash does not contain str[i], the substring can keep growing.
I modified my solution to "find the length of the longest substring without repeating characters".
public string LengthOfLongestSubstring(string s) {
var res = 0;
var dict = new Dictionary<char, int>();
var start = 0;
for(int i =0; i< s.Length; i++)
{
if(dict.ContainsKey(s[i]))
{
start = Math.Max(start, dict[s[i]] + 1); //update start index
dict[s[i]] = i;
}
else
{
dict.Add(s[i], i);
}
res = Math.Max(res, i - start + 1); //track max length
}
return s.Substring(start,res);
}
import java.util.HashMap;
import java.util.HashSet;
public class SubString {
public static String subString(String input) {
String longesTillNOw = "";
String longestOverAll = "";
HashMap<Character,Integer> chars = new HashMap<>();
char[] array=input.toCharArray();
int start=0;
for (int i = 0; i < array.length; i++) {
char charactor = array[i];
if (chars.containsKey(charactor) ) {
start=chars.get(charactor)+1;
i=start;
chars.clear();
longesTillNOw = "";
} else {
chars.put(charactor,i);
longesTillNOw = longesTillNOw + charactor;
if (longesTillNOw.length() > longestOverAll.length()) {
longestOverAll = longesTillNOw;
}
}
}
return longestOverAll;
}
public static void main(String[] args) {
String input = "stackoverflowabcdefghijklmn";
System.out.println(subString(input));
}
}
Here are two ways to approach this problem in JavaScript.
A Brute Force approach is to loop through the string twice, checking every substring against every other substring and finding the maximum length where the substring is unique. We'll need two functions: one to check if a substring is unique and a second function to perform our double loop.
// O(n) time
const allUnique = str => {
const set = [...new Set(str)];
return (set.length == str.length) ? true: false;
}
// O(n^3) time, O(k) size where k is the size of the set
const lengthOfLongestSubstring = str => {
let result = 0,
maxResult = 0;
for (let i=0; i<str.length-1; i++) {
for (let j=i+1; j<str.length; j++) {
if (allUnique(str.substring(i, j))) {
result = str.substring(i, j).length;
if (result > maxResult) {
maxResult = result;
}
}
}
return maxResult;
}
}
This has a time complexity of O(n^3) since we perform a double loop O(n^2) and then another loop on top of that O(n) for our unique function. The space is the size of our set which can be generalized to O(n) or more accurately O(k) where k is the size of the set.
A Greedy Approach is to loop through only once and keep track of the maximum unique substring length as we go. We can use either an array or a hash map, but I think the new .includes() array method is cool, so let's use that.
const lengthOfLongestSubstring = str => {
let result = [],
maxResult = 0;
for (let i=0; i<str.length; i++) {
if (!result.includes(str[i])) {
result.push(str[i]);
} else {
maxResult = i;
}
}
return maxResult;
}
This has a time complexity of O(n) and a space complexity of O(1).
This problem can be solved in O(n) time complexity.
Initialize three variables
Start (index pointing to the start of the non repeating substring, Initialize it as 0 ).
End (index pointing to the end of the non repeating substring, Initialize it as 0 )
Hasmap (Object containing the last visited index positions of characters. Ex : {'a':0, 'b':1} for string "ab")
Steps :
Iterate over the string and perform following actions.
If the current character is not present in hashmap (), add it as to
hashmap, character as key and its index as value.
If current character is present in hashmap, then
a) Check whether the start index is less than or equal to the value present in the hashmap against the character (last index of same character earlier visited),
b) it is less then assign start variables value as the hashmaps' value + 1 (last index of same character earlier visited + 1);
c) Update hashmap by overriding the hashmap's current character's value as current index of character.
d) Calculate the end-start as the longest substring value and update if it's greater than earlier longest non-repeating substring.
Following is the Javascript Solution for this problem.
var lengthOfLongestSubstring = function(s) {
let length = s.length;
let ans = 0;
let start = 0,
end = 0;
let hashMap = {};
for (var i = 0; i < length; i++) {
if (!hashMap.hasOwnProperty(s[i])) {
hashMap[s[i]] = i;
} else {
if (start <= hashMap[s[i]]) {
start = hashMap[s[i]] + 1;
}
hashMap[s[i]] = i;
}
end++;
ans = ans > (end - start) ? ans : (end - start);
}
return ans;
};
Question: Find the longest substring without repeating characters.
Example 1 :
import java.util.LinkedHashMap;
import java.util.Map;
public class example1 {
public static void main(String[] args) {
String a = "abcabcbb";
// output => 3
System.out.println( lengthOfLongestSubstring(a));
}
private static int lengthOfLongestSubstring(String a) {
if(a == null || a.length() == 0) {return 0 ;}
int res = 0 ;
Map<Character , Integer> map = new LinkedHashMap<>();
for (int i = 0; i < a.length(); i++) {
char ch = a.charAt(i);
if (!map.containsKey(ch)) {
//If ch is not present in map, adding ch into map along with its position
map.put(ch, i);
}else {
/*
If char ch is present in Map, reposition the cursor i to the position of ch and clear the Map.
*/
i = map.put(ch, i);// updation of index
map.clear();
}//else
res = Math.max(res, map.size());
}
return res;
}
}
if you want the longest string without the repeating characters as output then do this inside the for loop:
String res ="";// global
int len = 0 ;//global
if(len < map.size()) {
len = map.size();
res = map.keySet().toString();
}
System.out.println("len -> " + len);
System.out.println("res => " + res);
def max_substring(string):
last_substring = ''
max_substring = ''
for x in string:
k = find_index(x,last_substring)
last_substring = last_substring[(k+1):]+x
if len(last_substring) > len(max_substring):
max_substring = last_substring
return max_substring
def find_index(x, lst):
k = 0
while k <len(lst):
if lst[k] == x:
return k
k +=1
return -1
can we use something like this .
def longestpalindrome(str1):
arr1=list(str1)
s=set(arr1)
arr2=list(s)
return len(arr2)
str1='abadef'
a=longestpalindrome(str1)
print(a)
if only length of the substring is to be returned
Algorithm: 1) Initialise an empty dictionary dct to check if any character already exists in the string. 2) cnt - to keep the count of substring without repeating characters. 3)l and r are the two pointers initialised to first index of the string. 4)loop through each char of the string. 5) If the character not present in the dct add itand increse the cnt. 6)If its already present then check if cnt is greater then resStrLen.7)Remove the char from dct and shift the left pointer by 1 and decrease the count.8)Repeat 5,6,7 till l,r greater or equal to length of the input string. 9)Have one more check at the end to handle cases like input string with non-repeating characters.Here is the simple python program to Find longest substring without repeating characters
a="stackoverflow"
strLength = len(a)
dct={}
resStrLen=0
cnt=0
l=0
r=0
strb=l
stre=l
while(l<strLength and r<strLength):
if a[l] in dct:
if cnt>resStrLen:
resStrLen=cnt
strb=r
stre=l
dct.pop(a[r])
cnt=cnt-1
r+=1
else:
cnt+=1
dct[a[l]]=1
l+=1
if cnt>resStrLen:
resStrLen=cnt
strb=r
stre=l
print "Result String Length : "+str(resStrLen)
print "Result String : " + a[strb:stre]
The solution in C.
#include<stdio.h>
#include <string.h>
void longstr(char* a, int *start, int *last)
{
*start = *last = 0;
int visited[256];
for (int i = 0; i < 256; i++)
{
visited[i] = -1;
}
int max_len = 0;
int cur_len = 0;
int prev_index;
visited[a[0]] = 0;
for (int i = 1; i < strlen(a); i++)
{
prev_index = visited[a[i]];
if (prev_index == -1 || i - cur_len > prev_index)
{
cur_len++;
*last = i;
}
else
{
if (max_len < cur_len)
{
*start = *last - cur_len;
max_len = cur_len;
}
cur_len = i - prev_index;
}
visited[a[i]] = i;
}
if (max_len < cur_len)
{
*start = *last - cur_len;
max_len = cur_len;
}
}
int main()
{
char str[] = "ABDEFGABEF";
printf("The input string is %s \n", str);
int start, last;
longstr(str, &start, &last);
//printf("\n %d %d \n", start, last);
memmove(str, (str + start), last - start);
str[last] = '\0';
printf("the longest non-repeating character substring is %s", str);
return 0;
}
public int lengthOfLongestSubstring(String s) {
int startIndex = 0;
int maxLength = 0;
//since we have 256 ascii chars
int[] lst = new int[256];
Arrays.fill(lst,-1);
for (int i = 0; i < s.length(); i++) {
char c = s.charAt(i);
//to get ascii value of c
int ic = (int) c;
int value = lst[ic];
//this will say to move start index to next index of the repeating char
//we only do this if the repeating char index is greater than start index
if (value >= startIndex) {
maxLength = Math.max(maxLength, i - startIndex);
startIndex = value + 1;
}
lst[ic] = i;
}
//when we came to an end of string
return Math.max(maxLength,s.length()-startIndex);
}
This is the fastest and it is linear time and constant space

Resources