Swift generate random number without arc4random [duplicate] - linux

Im having 2 problems when trying to generate a random string in Linux with Swift 3.
arc4random_uniform is not available in Linux only on BSD. SO i was able to get away with using random() function. And this worked when i was generating random numbers of a variable size (See code below)
func generateRandomNumber() -> Int
{
var place = 1
var finalNumber = 0;
#if os(Linux)
for _ in 0..<5
{
place *= 10
let randomNumber = Int(random() % 10) + 1
finalNumber += randomNumber * place
}
#else
for _ in 0..<5
{
place *= 10
let randomNumber = Int(arc4random_uniform(10))
finalNumber += randomNumber * place
}
#endif
return finalNumber
}
And that WORKS.
Edit: it works but it gives me the same number every time :(
When trying to generate random alphanumeric string I'm limited to using Swift String and NOT NSSTRING. Linux throws this error
original pre Linux block of code:
func randomString(_ length: Int) -> String
{
let letters : NSString = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
let len = UInt32(letters.length)
var randomString = ""
for _ in 0 ..< length {
let rand = arc4random_uniform(len)
var nextChar = letters.character(at: Int(rand))
randomString += NSString(characters: &nextChar, length: 1) as String
}
return randomString
}
And the actual error I get when using above code
error: cannot convert value of type 'NSString' to type 'String' in coercion
randomString += NSString(characters: &nextChar, length: 1) as String
modified for linux block of code.
func randomString(_ length: Int) -> String
{
let letters : String = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
let len = letters.characters.count
var randomString = ""
#if os(Linux)
for _ in 0..<length
{
let randomValue = (random() % len) + 1
randomString += "\(letters[letters.index(letters.startIndex, offsetBy: Int(randomValue))])"
}
#else
for _ in 0 ..< length
{
let rand = arc4random_uniform(UInt32(len))
randomString += "\(letters[letters.index(letters.startIndex, offsetBy: Int(rand))])"
}
#endif
return randomString
}
but this time the error is weird it only says Illegal instruction with no extra information. I ran the docker container in interactive mode and i saw my server running and printing out when calling other functions etc.
but the thing is the function actually WORKS when i ran it in IBMs swift
sandbox
and I'm assuming its using linux also. Im very stuck and confused any help would be greatly appreciated.
(UPDATE): I ran the same function in just a linux env with a single swift file and not the Vapor swift web framework. and it works. As mentioned in my edit above it gives me the same random string everytime. I will still have to test the entire project once my build finishes. But besides that i need to know if the random() function will actually give me something new each time instead of the same crap.

Figured it out.
So the answer to the repeating random number/string was to just add this line before i called the random() function
srand(UInt32(time(nil)))
and I'm assuming thats what fixed the illegal instruction also. Because i don't recall changing anything else.
Needless to say here is my final result
func generateRandomNumber() -> Int
{
var place = 1
var finalNumber = 0;
#if os(Linux)
srand(UInt32(time(nil)))
for _ in 0..<5
{
place *= 10
let randomNumber = Int(random() % 10) + 1
finalNumber += randomNumber * place
}
#else
for _ in 0..<5
{
place *= 10
let randomNumber = Int(arc4random_uniform(10))
finalNumber += randomNumber * place
}
#endif
return finalNumber
}
func randomString(_ length: Int) -> String
{
let letters : String = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
let len = letters.characters.count
var randomString = ""
#if os(Linux)
srand(UInt32(time(nil)))
for _ in 0..<length
{
let randomValue = (random() % len) + 1
randomString += "\(letters[letters.index(letters.startIndex, offsetBy: Int(randomValue))])"
}
#else
for _ in 0 ..< length
{
let rand = arc4random_uniform(UInt32(len))
randomString += "\(letters[letters.index(letters.startIndex, offsetBy: Int(rand))])"
}
#endif
return randomString
}

1) Always the same number
You have to set a seed once to get "random" numbers from random():
randomSeed(Int(Date().timeIntervalSince1970)
Man page:
If no seed value is provided, the random() function is
automatically seeded with a value of 1.
As the seed is always the same (1), you always get the same sequence of "random" numbers.
2) Alphanumeric string
To create your string without using NSString:
func randomString(length: Int) -> String {
let letters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
let len = UInt32(letters.characters.count)
var randomString = ""
for _ in 0 ..< length {
let rand = myCustomRandom(len)
let randIndex = letters.index(letters.startIndex, offsetBy: Int(rand))
let nextChar = letters[randIndex]
randomString += String(nextChar)
}
return randomString
}

I copied and pasted your code exactly, and it doesn't compile.
fatal error: Can't form a Character from an empty String
Here's an alternative method:
// Keep at top of your code (outside of functions)
#if os(Linux)
srandom(UInt32(time(nil)))
#endif
func getRandomNumber(_ min: Int, _ max: Int) -> Int {
#if os(Linux)
return Int(random() % max) + min
#else
return Int(arc4random_uniform(UInt32(max)) + UInt32(min))
#endif
}
func getRandomString(_ chars: String, _ length: Int) -> String {
var str = ""
for _ in 1...length {
str.append(chars.itemOnStartIndex(advancedBy: getRandomNumber(0, chars.count - 1)))
}
return str
}
// Best practice to define this outside of the function itself
let chars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
print(getRandomString(chars, 10))
This works for me on Ubuntu.

Swift 4.2, Ubuntu 16.04
let letters : String = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
let len = letters.count
var randomString:String = ""
for _ in 0 ..< length {
let rand = Int.random(in: 0..<len)
randomString += letters.map { String($0) }[rand]
}

Related

Find all indices of a search term in a string

I need a fast method to find all indices of a search term that might occur in a string. I tried this 'brute force' String extension method:
// Note: makes use of ExSwift
extension String
{
var length: Int { return count(self) }
func indicesOf(searchTerm:String) -> [Int] {
var indices = [Int]()
for i in 0 ..< self.length {
let segment = self[i ... (i + searchTerm.length - 1)]
if (segment == searchTerm) {
indices.append(i)
}
}
return indices;
}
}
... But it's ridiculously slow, especially the shorter the search term is. What would be a better method to find all indices fast?
As Martin said you can implement some of the well known fastest algorithms in String Matching, The Knuth–Morris–Pratt string searching algorithm (or KMP algorithm) searches for occurrences of a "word" W within a main "text string" S.
The algorithm has complexity O(n), where n is the length of S and the O is big-O notation.
extension String {
// Build pi function of prefixes
private func build_pi(str: String) -> [Int] {
var n = count(str)
var pi = Array(count: n + 1, repeatedValue: 0)
var k = -1
pi[0] = -1
for (var i = 0; i < n; ++i) {
while (k >= 0 && str[k] != str[i]) {
k = pi[k]
}
pi[i + 1] = ++k
}
return pi
}
// Knuth-Morris Pratt algorithm
func searchPattern(pattern: String) -> [Int] {
var matches = [Int]()
var n = count(self)
var m = count(pattern)
var k = 0
var pi = build_pi(pattern)
for var i = 0; i < n; ++i {
while (k >= 0 && (k == m || pattern[k] != self[i])) {
k = pi[k]
}
if ++k == m {
matches.append(i - m + 1)
}
}
return matches
}
subscript (i: Int) -> Character {
return self[advance(self.startIndex, i)]
}
}
Then you can use it in the following way:
var string = "apurba mandal loves ayoshi loves"
var pattern = "loves"
println(string.searchPattern(pattern))
An the output should be :
[14, 27]
That belong to the start index of the pattern occurrences inside the the string. I hope this help you.
EDIT:
As Martin said in his comment you need to avoid the use of the advance function to index an String by an Int because it's O(position to index).
One possible solution is to convert the String to an array of Character and then access to the indexes is O(1).
Then the extension can be changed to this one :
extension String {
// Build pi function of prefixes
private func build_pi(str: [Character]) -> [Int] {
var n = count(str)
var pi = Array(count: n + 1, repeatedValue: 0)
var k = -1
pi[0] = -1
for (var i = 0; i < n; ++i) {
while (k >= 0 && str[k] != str[i]) {
k = pi[k]
}
pi[i + 1] = ++k
}
return pi
}
// Knuth-Morris Pratt algorithm
func searchPattern(pattern: String) -> [Int] {
// Convert to Character array to index in O(1)
var patt = Array(pattern)
var S = Array(self)
var matches = [Int]()
var n = count(self)
var m = count(pattern)
var k = 0
var pi = build_pi(patt)
for var i = 0; i < n; ++i {
while (k >= 0 && (k == m || patt[k] != S[i])) {
k = pi[k]
}
if ++k == m {
matches.append(i - m + 1)
}
}
return matches
}
}
Instead of checking for the search term at each position of the string
you could use rangeOfString() to find the next occurrence (hoping
that rangeOfString() uses more advanced algorithms):
extension String {
func indicesOf(searchTerm:String) -> [Int] {
var indices = [Int]()
var pos = self.startIndex
while let range = self.rangeOfString(searchTerm, range: pos ..< self.endIndex) {
indices.append(distance(self.startIndex, range.startIndex))
pos = range.startIndex.successor()
}
return indices
}
}
Generally, it depends on the size of the input string and the size
of the search string which algorithm is "the fastest". You'll find
an overview with links to various algorithms in
String searching algorithm.
Update for Swift 3:
extension String {
func indices(of searchTerm:String) -> [Int] {
var indices = [Int]()
var pos = self.startIndex
while let range = range(of: searchTerm, range: pos ..< self.endIndex) {
indices.append(distance(from: startIndex, to: range.lowerBound))
pos = index(after: range.lowerBound)
}
return indices
}
}
Using NSRegularExpression in Swift 4, you can do it like this. NSRegularExpression has been around forever and is probably a better choice than rolling your own algorithm for most cases.
let text = "The quieter you become, the more you can hear."
let searchTerm = "you"
let regex = try! NSRegularExpression(pattern: searchTerm, options: [])
let range: NSRange = NSRange(text.startIndex ..< text.endIndex, in: text)
let matches: [NSTextCheckingResult] = regex.matches(in: text, options: [], range: range)
let ranges: [NSRange] = matches.map { $0.range }
let indices: [Int] = ranges.map { $0.location }
let swiftRanges = ranges.map { Range($0, in: text) }
let swiftIndices: [String.Index] = swiftRanges.flatMap { $0?.lowerBound }

Swift fast low level String lastIndexOf

I need an implementation of lastIndexOf that is as fast as possible.
I am finding that the String advance function is extremely slow.
I tried using the c function strrchr, and tried copying the string to NSData and using pointers but I can't get the syntax right.
My string will always have 1 byte characters and the string i'm searching for "|" is always 1 byte also.
Any implementation using advance will be too slow but here is the fastest example I could find:
func indexOf(target: String, startIndex: Int) -> Int
{
var startRange = advance(self.startIndex, startIndex)
var range = self.rangeOfString(target, options: NSStringCompareOptions.LiteralSearch, range: Range<String.Index>(start: startRange, end: self.endIndex))
if let range = range {
return distance(self.startIndex, range.startIndex)
} else {
return -1
}
}
func lastIndexOf(target: String) -> Int
{
var index = -1
var stepIndex = self.indexOf(target)
while stepIndex > -1
{
index = stepIndex
if stepIndex + target.length < self.length
{
stepIndex = indexOf(target, startIndex: stepIndex + target.length)
}
else
{
stepIndex = -1
}
}
return index
}
This is an example of the string I need to parse.
var str:String = "4|0|66|5|0|3259744|6352141|1|3259744|WSMxt208L54yZ5irtHC3|Mc02|efland,nc|36.027992|-79.2212834|0|4|6|0|3259744|6352141|46|14|1|0|7|7|3259744|6352141|4|1|0|8|8|3259744|6352141|4|0|22|9|0|3259744|6352141|2|3|Room1|2|72|86330534|1|0|10|9|3259744|6352141|4|1|0|11|10|3259744|6352141|4|1|0|12|11|3259744|6352141|4|1|0|13|12|3259744|6352141|4|0|4|14|0|3259744|6352141|46|24|0|5|15|0|3259744|6352141|46|654|0|66|0|0|3259744|6352141|1|3259744|WSMxt208L54yZ5irtHC3|Mc02|efland,nc|36.027992|-79.2212834|0|4|16|0|3259744|6352141|46|4sageReceived:4|0|66|5|0|3259744|6352141|1|3259744|WSMxt208L54yZ5irtHC3|Mc02|efland,nc|36.027992|-79.2212834|0|4|6|0|3259744|6352141|46|14|1|0|7|7|3259744|6352141|4|1|0|8|8|3259744|6352141|4|0|22|9|0|3259744|6352141|2|3|Room1|2|72|86330534|1|0|10|9|3259744|6352141|4|1|0|11|10|3259744|6352141|4|1|0|12|11|3259744|6352141|4|1|0|13|12|3259744|6352141|4|0|4|14|0|3259744|6352141|46|24|0|5|15|0|3259744|6352141|46|654|0|66|0|0|3259744|6352141|1|3259744|WSMxt208L54yZ5irtHC3|Mc02|efland,nc|36.027992|-79.2212834|0|4|16|0|3259744|6352141|46|4352141|1|3259744|WSMxt208L54yZ5irtHC3|Mc02|efland,nc|36.027992|-79.2212834|0|4|6|0|3259744|6352141|46|14|1|0|7|7|3259744|6352141|4|1|0|8|8|3259744|6352141|4|0|22|9|0|3259744|6352141|2|3|Room1|2|72|86330534|1|0|10|9|3259744|6352141|4|1|0|11|10|3259744|6352141|4|1|0|12|11|3259744|6352141|4|1|0|13|12|3259744|6352141|4|0|4|14|0|3259744|6352141|46|24|0|5|15|0|3259744|6352141|46|654|0|66|0|0|3259744|6352141|1|3259744|WSMxt208L54yZ5irtHC3|Mc02|efland,nc|36.027992|-79.2212834|0|4|16|0|3259744|6352141|46|4TCPListener.onReceived: 4|0|66|5|0|3259744|6352141|1|3259744|WSMxt208L54yZ5irtHC3|Mc02|efland,nc|36.027992|-79.2212834|0|4|6|0|3259744|6352141|46|14|1|0|7|7|3259744|6352141|4|1|0|8|8|3259744|6352141|4|0|22|9|0|3259744|6352141|2|3|Room1|2|72|86330534|1|0|10|9|3259744|6352141|4|1|0|11|10|3259744|6352141|4|1|0|12|11|3259744|6352141|4|1|0|13|12|3259744|6352141|4|0|4|14|0|3259744|6352141|46|24|0|5|15|0|3259744|6352141|46|654|0|66|0|0|3259744|6352141|1|3259744|WSMxt208L54yZ5irtHC3|Mc02|efland,nc|36.027992|-79.2212834|0|4|16|0|3259744|6352141|46|4preParse
4|0|66|5|0|3259744|6352141|1|3259744|WSMxt208L54yZ5irtHC3|Mc02|efland,nc|36.027992|-79.221283"
Here is a Swift 2.0 Answer
func lastIndexOf(s: String) -> Int? {
if let r: Range<Index> = self.rangeOfString(s, options: .BackwardsSearch) {
return self.startIndex.distanceTo(r.startIndex)
}
return Optional<Int>()
}
Tests
func testStringLastIndexOf() {
let lastIndex = "0|2|45|7|9".lastIndexOf("|")
XCTAssertEqual(lastIndex, 8)
}
func testStringLastIndexOfNotFound() {
let lastIndex = "0123456789".lastIndexOf("|")
XCTAssertEqual(lastIndex, nil);
}
You can use strrchr in Swift
import Darwin
let str = "4|0|66|5|0|3259744|6352141|1|3259744"
func stringLastIndexOf(src:String, target:UnicodeScalar) -> Int? {
let c = Int32(bitPattern: target.value)
return src.withCString { s -> Int? in
let pos = strrchr(s, c)
return pos != nil ? pos - s : nil
}
}
stringLastIndexOf(str, "|") // -> {Some 28}
stringLastIndexOf(str, ",") // -> nil
You can use Objective C files in a Swift project; in these you can use plain C code and make a function which uses strrchr. Then you can call this from Swift.
If you do this in order to get all substring delimited by "|", you might test this approach:
import Foundation
let s = "4|0|66|5|0|3259744|6352141|1|3259744|WSMxt208L54yZ5irtHC3|..."
let a = s.componentsSeparatedByString("|")
The built in functions are sometimes very fast and you may be getting the required performance even by using String.
If you really need to get only the position of the last "|", you could work with utf16 representation, where advancing over the characters should be faster.
I think this should work:
let utf16String = s.utf16
var i = s.utf16Count - 1
while i >= 0 {
if utf16String[i] == 124 {
break
}
i--
}
println(i)
If the characters are guaranteed as single byte, the data is huge and performance is critical then it may be worth converting to an array of bytes (UInt8) and perform the operations directly on them. You can then convert the part that you need back to a String.
Also note that Optimised builds may be much faster than Debug builds so you should do any performance testing with the optimiser on. It may also be worth checking that the optimised versions are too slow at the moment.

How doing String-Programming in Swift

I miss usable String-functions, that are easy to use, without typing lines of strange identifiers. So I decided to built up a libary with useful and recognicable String-Functions.
I first tried to use Cocoa String-Functions to solve this problem. So I tried in the playground:
import Cocoa
func PartOfString(s: String, start: Int, length: Int) -> String
{
return s.substringFromIndex(advance(s.startIndex, start - 1)).substringToIndex(advance(s.startIndex, length))
}
PartOfString("HelloaouAOUs.World", 1, 5) --> "Hello"
PartOfString("HelloäöüÄÖÜß…World", 1, 5) --> "Hello"
PartOfString("HelloaouAOUs.World", 1, 18) --> "HelloaouAOUs.World"
PartOfString("HelloäöüÄÖÜß…World", 1, 18) --> "HelloäöüÄÖÜß…World"
PartOfString("HelloaouAOUs.World", 6, 7) --> "aouAOUs"
PartOfString("HelloäöüÄÖÜß…World", 6, 7) --> "äöüÄO"
If UnCode Characters are in the String for the case, that "substringFromIndex" is not the Start-Index. And even worse, the Swift-Program crashes sometimes at running time, if UnCode-Characters are in a String, for the case, that "substringFromIndex" is not the Start-Index. So I decided to create a set of new Functions, that take care of this problem and work with UnCode-Characters. Please note, that filenames can contain UnCode-Characters as well. So if you think you do not need UnCode-Characters you are wrong.
If you want to reproduce this, you need the same String I used, because copying from this Web-Page does not reproduce the problem.
var s: String = "HelloäöüÄÖÜß…World"
var t: String = s.stringByAddingPercentEscapesUsingEncoding(NSUTF8StringEncoding)!
var u: String = "Helloa%CC%88o%CC%88u%CC%88A%CC%88O%CC%88U%CC%88%C3%9F%E2%80%A6World".stringByRemovingPercentEncoding!
var b: Bool = (s == u) --> true
PartOfString(s, 6, 7) --> "äöüÄO"
Now you could get the idea, to convert the disturbing Canonical-Mapping UniCodes to compatible one with the following function:
func percentescapesremove (s: String) -> String
{
return (s.stringByRemovingPercentEncoding!.precomposedStringWithCompatibilityMapping)
}
And the result you will get is:
var v: String = percentescapesremove(t) --> "HelloäöüÄÖÜß...World"
PartOfString(v, 6, 7) --> "äöüÄÖÜß"
var a: Bool = (s == v) --> false
When you do so, the "äöüÄÖÜß" looks good and you think, everything is OK but look at the "..." which has been permanently converted from UniCode "…" to non-UniCode "..." and has the result which is not identically to the first string. If you have UniCode-filenames, then converting will result in not finding the file on a volume. So it is a good idea to convert only for scree-output and keep the original String in a save place.
The problem with the PartOfString-Function above is, that it generates a new String in the first part of the assignment and uses this new String with the index of the old one, which does not work, because the UniCodes have a different length than the normal letters. So I improved the funktion (thank to Martin R for his help):
func NewPartOfString(s: String, start: Int, length: Int) -> String
{
let t: String = s.substringFromIndex(advance(s.startIndex, start - 1))
return t.substringToIndex(advance(t.startIndex, length))
}
And the result is correct:
NewPartOfString("HelloaouAOUs.World", 1, 5) --> "Hello"
NewPartOfString("HelloäöüÄÖÜß…World", 1, 5) --> "Hello"
NewPartOfString("HelloaouAOUs.World", 1, 18) --> "HelloaouAOUs.World"
NewPartOfString("HelloäöüÄÖÜß…World", 1, 18) --> "HelloäöüÄÖÜß…World"
NewPartOfString("HelloaouAOUs.World", 6, 7) --> "aouAOUs"
NewPartOfString("HelloäöüÄÖÜß…World", 6, 7) --> "äöüÄÖÜß"
In the next step I will show a few functions, that can be used and work well. All of them are based on Integer-Index-Values that will start at 1 for the first character end end with the index for the last character being identically to the length of the String.
This function returns the length of a string:
func len (s: String) -> Int
{
return (countElements(s)) // This works not really fast, because of UniCode
}
This function returns the UniCode-Number of the first UniCode-Character in the String:
func asc (s: String) -> Int
{
if (s == "")
{
return 0
}
else
{
return (Int(s.unicodeScalars[s.unicodeScalars.startIndex].value))
}
}
This function returns the UniCode-Character of the given UniCode-Number:
func char (c: Int) -> String
{
var s: String = String(UnicodeScalar(c))
return (s)
}
This function returns the Upper-Case representation of a String:
func ucase (s: String) -> String
{
return (s.uppercaseString)
}
This function returns the Lower-Case representation of a String:
func lcase (s: String) -> String
{
return (s.lowercaseString)
}
The next Function gives the left part of a String with a given length:
func left (s: String, length: Int) -> String
{
if (length < 1)
{
return ("")
}
else
{
if (length > len(s))
{
return (s)
}
else
{
return (s.substringToIndex(advance(s.startIndex, length)))
}
}
}
The next Function gives the right part of a String with a given length:
func right (s: String, laenge: Int) -> String
{
var L: Int = len(s)
if (L <= laenge)
{
return(s)
}
else
{
if (laenge < 1)
{
return ("")
}
else
{
let t: String = s.substringFromIndex(advance(s.startIndex, L - laenge))
return t.substringToIndex(advance(t.startIndex, laenge))
}
}
}
The next Function gives the part of a String with a given length:
func mid (s: String, start: Int, laenge: Int) -> String
{
if (start <= 1)
{
return (left(s, laenge))
}
else
{
var L: Int = len(s)
if ((start > L) || (laenge < 1))
{
return ("")
}
else
{
if (start + laenge > L)
{
let t: String = s.substringFromIndex(advance(s.startIndex, start - 1))
return t.substringToIndex(advance(t.startIndex, L - start + 1))
}
else
{
let t: String = s.substringFromIndex(advance(s.startIndex, start - 1))
return t.substringToIndex(advance(t.startIndex, laenge))
}
}
}
}
A little more difficult is to get a character at a given position, because we cannot use "substringFromIndex" and "substringToIndex" with "substringFromIndex" is not the Start-Index. So the idea is to trace through the string, character for character, and get the needed substring.
func CharacterOfString(s: String, index: Int, length: Int) -> String
{
var c: String = ""
var i: Int = 0
for UniCodeChar in s.unicodeScalars
{
i = i + 1
if ((i >= index) && (i < index + length))
{
c = c + String(UniCodeChar)
}
}
return (c)
}
But this works not correctly for Strings which contain UniCode-Characters. The following examples show what happens:
CharacterOfString("Swift Example Text aouAOUs.", 16, 8) --> "ext aouA"
len(CharacterOfString("Swift Example Text aouAOUs.", 16, 8)) --> 8
CharacterOfString("Swift Example Text äöüÄÖÜß…", 16, 8) --> "ext äö"
len(CharacterOfString("Swift Example Text äöüÄÖÜß…", 16, 8)) --> 6
So we see, that the resulting String is too short, because a UniCode-Character can contain more than one character. This is because "ä" can be one UniCode-Character and also written as two "a¨" UniCode-Character. So we need another way to get a valid substring.
The solution is, to convert the UniCode-String to an array of UniCode-Characters and to use the index af the array to get a valid character. This works in all cases to get a single Character of an UniCode-String at a given index:
func indchar (s: String, i: Int) -> String
{
if ((i < 1) || (i > len(s)))
{
return ("")
}
else
{
return String(Array(s)[i - 1])
}
}
And with this knowledge, I have built a Function, which can get a valid UniCode-Substring with a given Start-Index and a given length:
func substring(s: String, Start: Int, Length: Int) -> String
{
var L: Int = len(s)
var UniCode = Array(s)
var result: String = ""
var TheEnd: Int = Start + Length - 1
if ((Start < 1) || (Start > L))
{
return ("")
}
else
{
if ((Length < 0) || (TheEnd > L))
{
TheEnd = L
}
for var i: Int = Start; i <= TheEnd; ++i
{
result = result + String(UniCode[i - 1])
}
return (result)
}
}
The next Function searches for the position of a given String in another String:
func position (original: String, search: String, start: Int) -> Int
{
var index = part(original, start).rangeOfString(search)
if (index != nil)
{
var pos: Int = distance(original.startIndex, index!.startIndex)
return (pos + start)
}
else
{
return (0)
}
}
This function looks, if a given Character-Code is a number (0-9):
func number (n: Int) -> Bool
{
return ((n >= 48) & (n <= 57)) // "0" to "9"
}
Now the basic String-Operations are shown, but what about Numbers? How will numbers converted to Strings and vice versa? Let's have a look at converting Strings to Numbers. Please not the "!" in the second line, which is used to get a Int and not an optional Int.
var s: String = "123" --> "123"
var v: Int = s.toInt() --> (123)
var v: Int = s.toInt()! --> 123
But this does not work, if the String contains some characters:
var s: String = "123." --> "123."
var v: Int = s.toInt()! --> Will result in a Runtime Error, because s.toInt() = nil
So I decided to built a smater Function to get the value of a String:
func val (s: String) -> Int
{
var p: Int = 0
var sign: Int = 0
if (indchar(s, 1) == "-")
{
sign = 1
p = 1
}
while(number(asc(indchar(s, p + 1))))
{
p = p + 1
}
if (p > sign)
{
return (left(s, p).toInt()!)
}
else
{
return (0)
}
}
Now the result is correct and does not produce a Runtime-Error:
var s: String = "123." --> "123."
var v: Int = val(s) --> 123
And now the same for Floating-Point Numbers:
func realval (s: String) -> Double
{
var r: Double = 0
var p: Int = 1
var a: Int = asc(indchar(s, p))
if (indchar(s, 1) == "-")
{
p = 2
}
while ((a != 44) && (a != 46) && ((a >= 48) & (a <= 57)))
{
p = p + 1
a = asc(indchar(s, p))
}
if (p >= len(s)) // Integer Number
{
r = Double(val(s))
}
else // Number with fractional part
{
var mantissa: Int = val(substring(s, p + 1, -1))
var fract: Double = 0
while (mantissa != 0)
{
fract = (fract / 10) + (Double(mantissa % 10) / 10)
mantissa = mantissa / 10
p = p + 1
}
r = Double(val(s)) + fract
p = p + 1
}
a = asc(indchar(s, p))
if ((a == 69) || (a == 101)) // Exponent
{
var exp: Int = val(substring(s, p + 1, -1))
if (exp != 0)
{
for var i: Int = 1; i <= abs(exp); ++i
{
if (exp > 0)
{
r = r * 10
}
else
{
r = r / 10
}
}
}
}
return (r)
}
This works for Floating points numbers with exponents:
var s: String = "123.456e3"
var t: String = "123.456e-3"
var v: Double = realval(s) --> 123456
var w: Double = realval(t) --> 0.123456
To generate a String from an Integer is much more simple:
func str (n: Int) -> String
{
return (String(n))
}
A String of a floating point variable does not work with String(n) but can be done with:
func strreal (n: Double) -> String
{
return ("\(n)")
}

How to replace nth character of a string with another

How could I replace nth character of a String with another one?
func replace(myString:String, index:Int, newCharac:Character) -> String {
// Write correct code here
return modifiedString
}
For example, replace("House", 2, "r") should be equal to "Horse".
Solutions that use NSString methods will fail for any strings with multi-byte Unicode characters. Here are two Swift-native ways to approach the problem:
You can use the fact that a String is a sequence of Character to convert the string to an array, modify it, and convert the array back:
func replace(myString: String, _ index: Int, _ newChar: Character) -> String {
var chars = Array(myString) // gets an array of characters
chars[index] = newChar
let modifiedString = String(chars)
return modifiedString
}
replace("House", 2, "r")
// Horse
Alternately, you can step through the string yourself:
func replace(myString: String, _ index: Int, _ newChar: Character) -> String {
var modifiedString = String()
for (i, char) in myString.characters.enumerate() {
modifiedString += String((i == index) ? newChar : char)
}
return modifiedString
}
Since these stay entirely within Swift, they're both Unicode-safe:
replace("🏠🏡🏠🏡🏠", 2, "🐴")
// 🏠🏡🐴🏡🏠
In Swift 4 it's much easier.
let newString = oldString.prefix(n) + char + oldString.dropFirst(n + 1)
This is an example:
let oldString = "Hello, playground"
let newString = oldString.prefix(4) + "0" + oldString.dropFirst(5)
where the result is
Hell0, playground
The type of newString is Substring. Both prefix and dropFirst return Substring. Substring is a slice of a string, in other words, substrings are fast because you don't need to allocate memory for the content of the string, but the same storage space as the original string is used.
I've found this solution.
var string = "Cars"
let index = string.index(string.startIndex, offsetBy: 2)
string.replaceSubrange(index...index, with: "t")
print(string)
// Cats
Please see NateCook answer for more details
func replace(myString: String, _ index: Int, _ newChar: Character) -> String {
var chars = Array(myString.characters) // gets an array of characters
chars[index] = newChar
let modifiedString = String(chars)
return modifiedString
}
For Swift 5
func replace(myString: String, _ index: Int, _ newChar: Character) -> String {
var chars = Array(myString) // gets an array of characters
chars[index] = newChar
let modifiedString = String(chars)
return modifiedString
}
replace("House", 2, "r")
This is no longer valid and deprecated.
You can always use swift String with NSString.So you can call NSString function on swift String.
By old stringByReplacingCharactersInRange: you can do like this
var st :String = "House"
let abc = st.bridgeToObjectiveC().stringByReplacingCharactersInRange(NSMakeRange(2,1), withString:"r") //Will give Horse
For modify existing string:
extension String {
subscript(_ n: Int) -> Character {
get {
let idx = self.index(startIndex, offsetBy: n)
return self[idx]
}
set {
let idx = self.index(startIndex, offsetBy: n)
self.replaceSubrange(idx...idx, with: [newValue])
}
}
}
var s = "12345"
print(s[0])
s[0] = "9"
print(s)
I've expanded upon Nate Cooks answer and transformed it into a string extension.
extension String {
//Enables replacement of the character at a specified position within a string
func replace(_ index: Int, _ newChar: Character) -> String {
var chars = Array(characters)
chars[index] = newChar
let modifiedString = String(chars)
return modifiedString
}
}
usage:
let source = "House"
let result = source.replace(2,"r")
result is "Horse"
I think what #Greg was trying to achieve with his extension is this:
mutating func replace(characterAt index: Int, with newChar: Character) {
var chars = Array(characters)
if index >= 0 && index < self.characters.count {
chars[index] = newChar
let modifiedString = String(chars)
self = modifiedString
} else {
print("can't replace character, its' index out of range!")
}
}
usage:
let source = "House"
source.replace(characterAt: 2, with: "r") //gives you "Horse"
After looking at the Swift Docs, I managed to make this function:
//Main function
func replace(myString:String, index:Int, newCharac:Character) -> String {
//Looping through the characters in myString
var i = 0
for character in myString {
//Checking to see if the index of the character is the one we're looking for
if i == index {
//Found it! Now instead of adding it, add newCharac!
modifiedString += newCharac
} else {
modifiedString += character
}
i = i + 1
}
// Write correct code here
return modifiedString
}
Please note that this is untested, but it should give you the right idea.
func replace(myString:String, index:Int, newCharac:Character) -> String {
var modifiedString = myString
let range = Range<String.Index>(
start: advance(myString.startIndex, index),
end: advance(myString.startIndex, index + 1))
modifiedString.replaceRange(range, with: "\(newCharac)")
return modifiedString
}
I would prefer to pass a String than a Character though.
Here's a way to replace a single character:
var string = "This is the original string."
let offset = 27
let index = string.index(string.startIndex, offsetBy: offset)
let range = index...index
print("ORIGINAL string: " + string)
string.replaceSubrange(range, with: "!")
print("UPDATED string: " + string)
// ORIGINAL string: This is the original string.
// UPDATED string: This is the original string!
This works with multi-character strings as well:
var string = "This is the original string."
let offset = 7
let index = string.index(string.startIndex, offsetBy: offset)
let range = index...index
print("ORIGINAL string: " + string)
string.replaceSubrange(range, with: " NOT ")
print("UPDATED string: " + string)
// ORIGINAL string: This is the original string.
// UPDATED string: This is NOT the original string.
var s = "helloworld"
let index = ((s.count) / 2) // index is 4
let firstIndex = s.index(s.startIndex, offsetBy: index)
let secondIndex = s.index(s.startIndex, offsetBy: index)
s.replaceSubrange(firstIndex...secondIndex, with: "*")
print("Replaced string is: \(s)") //OUTPUT IS: hell*world
This is working fine to replace string using the index.
String class in Swift (till v5 and maybe later) is what other languages call a StringBuilder class, and for performance reasons, Swift does NOT provide setting character by index; If you don't care about performance a simple solution could be:
public static func replace(_ string: String, at index: Int, with value: String) {
let start = string.index(string.startIndex, offsetBy: index)
let end = string.index(start, offsetBy: 1)
string.replaceSubrange(start..<end, with: value)
}
Or as an extension:
extension String {
public func charAt(_ index: Int) -> Character {
return self[self.index(self.startIndex, offsetBy: index)];
}
public mutating func setCharAt(_ index: Int, _ new: Character) {
self.setCharAt(index, String(new))
}
public mutating func setCharAt(_ index: Int, _ new: String) {
let i = self.index(self.startIndex, offsetBy: index)
self.replaceSubrange(i...i, with: new)
}
}
Note how above needs to call index(...) method to convert integer to actual-index!? It seems, Swift implements String like a linked-list, where append(...) is really fast, but even finding the index (without doing anything with it) is a linear-time operation (and gets slower based on concatenation count).
public void createEncodedSentence() {
StringBuffer buff = new StringBuffer();
int counter = 0;
char a;
for (int i = 0; i < sentence.length(); i++) {
a = sentence.charAt(i);
if (a == '.') {
buff.append('*');
}
if (a != ' ' && a != '.') {
counter++;
}
if (counter % 3 == 0) {
buff.append("");
}
buff.append(sentence.charAt(i));
}
encodedSentence = buff.toString();
}
Strings in swift don't have an accessor to read or write a single character. There's an excellent blog post by Ole Begemann describing how strings in swift work.
Note: the implementation below is wrong, read addendum
So the right way is by taking the left part of the string up to the index -1 character, append the replacing character, then append the string from index + 1 up to the end:
func myReplace(myString:String, index:Int, newCharac:Character) -> String {
var modifiedString: String
let len = countElements(myString)
if (index < len) && (index >= 0) {
modifiedString = myString.substringToIndex(index) + newCharac + myString.substringFromIndex(index + 1)
} else {
modifiedString = myString
}
return modifiedString
}
Note: in my implementation I chose to return the original string if the index is not in a valid range
Addendum Thanks to #slazyk, who found out that my implementation is wrong (see comment), I am providing a new swift only version of the function.
func replace(myString:String, index:Int, newCharac:Character) -> String {
var modifiedString: String
if (index < 0) || (index >= countElements(myString)) {
modifiedString = myString
} else {
var start = myString.startIndex
var end = advance(start, index)
modifiedString = myString[start ..< end]
modifiedString += newCharac
start = end.successor()
end = myString.endIndex
modifiedString += myString[start ... end]
}
return modifiedString
}
#codester's answer looks very good, and it's probably what I would use myself.
It would be interesting to know how performances compare though, using a fully swift solution and bridging to objective-c instead.
Here is an efficient answer :
import Foundation
func replace(myString:String, index:Int, newCharac:Character) -> String {
return myString.substringToIndex(index-1) + newCharac + myString.substringFromIndex(index)
}

How to find smallest substring which contains all characters from a given string?

I have recently come across an interesting question on strings. Suppose you are given following:
Input string1: "this is a test string"
Input string2: "tist"
Output string: "t stri"
So, given above, how can I approach towards finding smallest substring of string1 that contains all the characters from string 2?
To see more details including working code, check my blog post at:
http://www.leetcode.com/2010/11/finding-minimum-window-in-s-which.html
To help illustrate this approach, I use an example: string1 = "acbbaca" and string2 = "aba". Here, we also use the term "window", which means a contiguous block of characters from string1 (could be interchanged with the term substring).
i) string1 = "acbbaca" and string2 = "aba".
ii) The first minimum window is found.
Notice that we cannot advance begin
pointer as hasFound['a'] ==
needToFind['a'] == 2. Advancing would
mean breaking the constraint.
iii) The second window is found. begin
pointer still points to the first
element 'a'. hasFound['a'] (3) is
greater than needToFind['a'] (2). We
decrement hasFound['a'] by one and
advance begin pointer to the right.
iv) We skip 'c' since it is not found
in string2. Begin pointer now points to 'b'.
hasFound['b'] (2) is greater than
needToFind['b'] (1). We decrement
hasFound['b'] by one and advance begin
pointer to the right.
v) Begin pointer now points to the
next 'b'. hasFound['b'] (1) is equal
to needToFind['b'] (1). We stop
immediately and this is our newly
found minimum window.
The idea is mainly based on the help of two pointers (begin and end position of the window) and two tables (needToFind and hasFound) while traversing string1. needToFind stores the total count of a character in string2 and hasFound stores the total count of a character met so far. We also use a count variable to store the total characters in string2 that's met so far (not counting characters where hasFound[x] exceeds needToFind[x]). When count equals string2's length, we know a valid window is found.
Each time we advance the end pointer (pointing to an element x), we increment hasFound[x] by one. We also increment count by one if hasFound[x] is less than or equal to needToFind[x]. Why? When the constraint is met (that is, count equals to string2's size), we immediately advance begin pointer as far right as possible while maintaining the constraint.
How do we check if it is maintaining the constraint? Assume that begin points to an element x, we check if hasFound[x] is greater than needToFind[x]. If it is, we can decrement hasFound[x] by one and advancing begin pointer without breaking the constraint. On the other hand, if it is not, we stop immediately as advancing begin pointer breaks the window constraint.
Finally, we check if the minimum window length is less than the current minimum. Update the current minimum if a new minimum is found.
Essentially, the algorithm finds the first window that satisfies the constraint, then continue maintaining the constraint throughout.
You can do a histogram sweep in O(N+M) time and O(1) space where N is the number of characters in the first string and M is the number of characters in the second.
It works like this:
Make a histogram of the second string's characters (key operation is hist2[ s2[i] ]++).
Make a cumulative histogram of the first string's characters until that histogram contains every character that the second string's histogram contains (which I will call "the histogram condition").
Then move forwards on the first string, subtracting from the histogram, until it fails to meet the histogram condition. Mark that bit of the first string (before the final move) as your tentative substring.
Move the front of the substring forwards again until you meet the histogram condition again. Move the end forwards until it fails again. If this is a shorter substring than the first, mark that as your tentative substring.
Repeat until you've passed through the entire first string.
The marked substring is your answer.
Note that by varying the check you use on the histogram condition, you can choose either to have the same set of characters as the second string, or at least as many characters of each type. (Its just the difference between a[i]>0 && b[i]>0 and a[i]>=b[i].)
You can speed up the histogram checks if you keep a track of which condition is not satisfied when you're trying to satisfy it, and checking only the thing that you decrement when you're trying to break it. (On the initial buildup, you count how many items you've satisfied, and increment that count every time you add a new character that takes the condition from false to true.)
Here's an O(n) solution. The basic idea is simple: for each starting index, find the least ending index such that the substring contains all of the necessary letters. The trick is that the least ending index increases over the course of the function, so with a little data structure support, we consider each character at most twice.
In Python:
from collections import defaultdict
def smallest(s1, s2):
assert s2 != ''
d = defaultdict(int)
nneg = [0] # number of negative entries in d
def incr(c):
d[c] += 1
if d[c] == 0:
nneg[0] -= 1
def decr(c):
if d[c] == 0:
nneg[0] += 1
d[c] -= 1
for c in s2:
decr(c)
minlen = len(s1) + 1
j = 0
for i in xrange(len(s1)):
while nneg[0] > 0:
if j >= len(s1):
return minlen
incr(s1[j])
j += 1
minlen = min(minlen, j - i)
decr(s1[i])
return minlen
I received the same interview question. I am a C++ candidate but I was in a position to code relatively fast in JAVA.
Java [Courtesy : Sumod Mathilakath]
import java.io.*;
import java.util.*;
class UserMainCode
{
public String GetSubString(String input1,String input2){
// Write code here...
return find(input1, input2);
}
private static boolean containsPatternChar(int[] sCount, int[] pCount) {
for(int i=0;i<256;i++) {
if(pCount[i]>sCount[i])
return false;
}
return true;
}
public static String find(String s, String p) {
if (p.length() > s.length())
return null;
int[] pCount = new int[256];
int[] sCount = new int[256];
// Time: O(p.lenght)
for(int i=0;i<p.length();i++) {
pCount[(int)(p.charAt(i))]++;
sCount[(int)(s.charAt(i))]++;
}
int i = 0, j = p.length(), min = Integer.MAX_VALUE;
String res = null;
// Time: O(s.lenght)
while (j < s.length()) {
if (containsPatternChar(sCount, pCount)) {
if ((j - i) < min) {
min = j - i;
res = s.substring(i, j);
// This is the smallest possible substring.
if(min==p.length())
break;
// Reduce the window size.
sCount[(int)(s.charAt(i))]--;
i++;
}
} else {
sCount[(int)(s.charAt(j))]++;
// Increase the window size.
j++;
}
}
System.out.println(res);
return res;
}
}
C++ [Courtesy : sundeepblue]
#include <iostream>
#include <vector>
#include <string>
#include <climits>
using namespace std;
string find_minimum_window(string s, string t) {
if(s.empty() || t.empty()) return;
int ns = s.size(), nt = t.size();
vector<int> total(256, 0);
vector<int> sofar(256, 0);
for(int i=0; i<nt; i++)
total[t[i]]++;
int L = 0, R;
int minL = 0; //gist2
int count = 0;
int min_win_len = INT_MAX;
for(R=0; R<ns; R++) { // gist0, a big for loop
if(total[s[R]] == 0) continue;
else sofar[s[R]]++;
if(sofar[s[R]] <= total[s[R]]) // gist1, <= not <
count++;
if(count == nt) { // POS1
while(true) {
char c = s[L];
if(total[c] == 0) { L++; }
else if(sofar[c] > total[c]) {
sofar[c]--;
L++;
}
else break;
}
if(R - L + 1 < min_win_len) { // this judge should be inside POS1
min_win_len = R - L + 1;
minL = L;
}
}
}
string res;
if(count == nt) // gist3, cannot forget this.
res = s.substr(minL, min_win_len); // gist4, start from "minL" not "L"
return res;
}
int main() {
string s = "abdccdedca";
cout << find_minimum_window(s, "acd");
}
Erlang [Courtesy : wardbekker]
-module(leetcode).
-export([min_window/0]).
%% Given a string S and a string T, find the minimum window in S which will contain all the characters in T in complexity O(n).
%% For example,
%% S = "ADOBECODEBANC"
%% T = "ABC"
%% Minimum window is "BANC".
%% Note:
%% If there is no such window in S that covers all characters in T, return the emtpy string "".
%% If there are multiple such windows, you are guaranteed that there will always be only one unique minimum window in S.
min_window() ->
"eca" = min_window("cabeca", "cae"),
"eca" = min_window("cfabeca", "cae"),
"aec" = min_window("cabefgecdaecf", "cae"),
"cwae" = min_window("cabwefgewcwaefcf", "cae"),
"BANC" = min_window("ADOBECODEBANC", "ABC"),
ok.
min_window(T, S) ->
min_window(T, S, []).
min_window([], _T, MinWindow) ->
MinWindow;
min_window([H | Rest], T, MinWindow) ->
NewMinWindow = case lists:member(H, T) of
true ->
MinWindowFound = fullfill_window(Rest, lists:delete(H, T), [H]),
case length(MinWindow) == 0 orelse (length(MinWindow) > length(MinWindowFound)
andalso length(MinWindowFound) > 0) of
true ->
MinWindowFound;
false ->
MinWindow
end;
false ->
MinWindow
end,
min_window(Rest, T, NewMinWindow).
fullfill_window(_, [], Acc) ->
%% window completed
Acc;
fullfill_window([], _T, _Acc) ->
%% no window found
"";
fullfill_window([H | Rest], T, Acc) ->
%% completing window
case lists:member(H, T) of
true ->
fullfill_window(Rest, lists:delete(H, T), Acc ++ [H]);
false ->
fullfill_window(Rest, T, Acc ++ [H])
end.
REF:
http://articles.leetcode.com/finding-minimum-window-in-s-which/#comment-511216
http://www.mif.vu.lt/~valdas/ALGORITMAI/LITERATURA/Cormen/Cormen.pdf
Please have a look at this as well:
//-----------------------------------------------------------------------
bool IsInSet(char ch, char* cSet)
{
char* cSetptr = cSet;
int index = 0;
while (*(cSet+ index) != '\0')
{
if(ch == *(cSet+ index))
{
return true;
}
++index;
}
return false;
}
void removeChar(char ch, char* cSet)
{
bool bShift = false;
int index = 0;
while (*(cSet + index) != '\0')
{
if( (ch == *(cSet + index)) || bShift)
{
*(cSet + index) = *(cSet + index + 1);
bShift = true;
}
++index;
}
}
typedef struct subStr
{
short iStart;
short iEnd;
short szStr;
}ss;
char* subStringSmallest(char* testStr, char* cSet)
{
char* subString = NULL;
int iSzSet = strlen(cSet) + 1;
int iSzString = strlen(testStr)+ 1;
char* cSetBackUp = new char[iSzSet];
memcpy((void*)cSetBackUp, (void*)cSet, iSzSet);
int iStartIndx = -1;
int iEndIndx = -1;
int iIndexStartNext = -1;
std::vector<ss> subStrVec;
int index = 0;
while( *(testStr+index) != '\0' )
{
if (IsInSet(*(testStr+index), cSetBackUp))
{
removeChar(*(testStr+index), cSetBackUp);
if(iStartIndx < 0)
{
iStartIndx = index;
}
else if( iIndexStartNext < 0)
iIndexStartNext = index;
else
;
if (strlen(cSetBackUp) == 0 )
{
iEndIndx = index;
if( iIndexStartNext == -1)
break;
else
{
index = iIndexStartNext;
ss stemp = {iStartIndx, iEndIndx, (iEndIndx-iStartIndx + 1)};
subStrVec.push_back(stemp);
iStartIndx = iEndIndx = iIndexStartNext = -1;
memcpy((void*)cSetBackUp, (void*)cSet, iSzSet);
continue;
}
}
}
else
{
if (IsInSet(*(testStr+index), cSet))
{
if(iIndexStartNext < 0)
iIndexStartNext = index;
}
}
++index;
}
int indexSmallest = 0;
for(int indexVec = 0; indexVec < subStrVec.size(); ++indexVec)
{
if(subStrVec[indexSmallest].szStr > subStrVec[indexVec].szStr)
indexSmallest = indexVec;
}
subString = new char[(subStrVec[indexSmallest].szStr) + 1];
memcpy((void*)subString, (void*)(testStr+ subStrVec[indexSmallest].iStart), subStrVec[indexSmallest].szStr);
memset((void*)(subString + subStrVec[indexSmallest].szStr), 0, 1);
delete[] cSetBackUp;
return subString;
}
//--------------------------------------------------------------------
Edit: apparently there's an O(n) algorithm (cf. algorithmist's answer). Obviously this have this will beat the [naive] baseline described below!
Too bad I gotta go... I'm a bit suspicious that we can get O(n). I'll check in tomorrow to see the winner ;-) Have fun!
Tentative algorithm:
The general idea is to sequentially try and use a character from str2 found in str1 as the start of a search (in either/both directions) of all the other letters of str2. By keeping a "length of best match so far" value, we can abort searches when they exceed this. Other heuristics can probably be used to further abort suboptimal (so far) solutions. The choice of the order of the starting letters in str1 matters much; it is suggested to start with the letter(s) of str1 which have the lowest count and to try with the other letters, of an increasing count, in subsequent attempts.
[loose pseudo-code]
- get count for each letter/character in str1 (number of As, Bs etc.)
- get count for each letter in str2
- minLen = length(str1) + 1 (the +1 indicates you're not sure all chars of
str2 are in str1)
- Starting with the letter from string2 which is found the least in string1,
look for other letters of Str2, in either direction of str1, until you've
found them all (or not, at which case response = impossible => done!).
set x = length(corresponding substring of str1).
- if (x < minLen),
set minlen = x,
also memorize the start/len of the str1 substring.
- continue trying with other letters of str1 (going the up the frequency
list in str1), but abort search as soon as length(substring of strl)
reaches or exceed minLen.
We can find a few other heuristics that would allow aborting a
particular search, based on [pre-calculated ?] distance between a given
letter in str1 and some (all?) of the letters in str2.
- the overall search terminates when minLen = length(str2) or when
we've used all letters of str1 (which match one letter of str2)
as a starting point for the search
Here is Java implementation
public static String shortestSubstrContainingAllChars(String input, String target) {
int needToFind[] = new int[256];
int hasFound[] = new int[256];
int totalCharCount = 0;
String result = null;
char[] targetCharArray = target.toCharArray();
for (int i = 0; i < targetCharArray.length; i++) {
needToFind[targetCharArray[i]]++;
}
char[] inputCharArray = input.toCharArray();
for (int begin = 0, end = 0; end < inputCharArray.length; end++) {
if (needToFind[inputCharArray[end]] == 0) {
continue;
}
hasFound[inputCharArray[end]]++;
if (hasFound[inputCharArray[end]] <= needToFind[inputCharArray[end]]) {
totalCharCount ++;
}
if (totalCharCount == target.length()) {
while (needToFind[inputCharArray[begin]] == 0
|| hasFound[inputCharArray[begin]] > needToFind[inputCharArray[begin]]) {
if (hasFound[inputCharArray[begin]] > needToFind[inputCharArray[begin]]) {
hasFound[inputCharArray[begin]]--;
}
begin++;
}
String substring = input.substring(begin, end + 1);
if (result == null || result.length() > substring.length()) {
result = substring;
}
}
}
return result;
}
Here is the Junit Test
#Test
public void shortestSubstringContainingAllCharsTest() {
String result = StringUtil.shortestSubstrContainingAllChars("acbbaca", "aba");
assertThat(result, equalTo("baca"));
result = StringUtil.shortestSubstrContainingAllChars("acbbADOBECODEBANCaca", "ABC");
assertThat(result, equalTo("BANC"));
result = StringUtil.shortestSubstrContainingAllChars("this is a test string", "tist");
assertThat(result, equalTo("t stri"));
}
//[ShortestSubstring.java][1]
public class ShortestSubstring {
public static void main(String[] args) {
String input1 = "My name is Fran";
String input2 = "rim";
System.out.println(getShortestSubstring(input1, input2));
}
private static String getShortestSubstring(String mainString, String toBeSearched) {
int mainStringLength = mainString.length();
int toBeSearchedLength = toBeSearched.length();
if (toBeSearchedLength > mainStringLength) {
throw new IllegalArgumentException("search string cannot be larger than main string");
}
for (int j = 0; j < mainStringLength; j++) {
for (int i = 0; i <= mainStringLength - toBeSearchedLength; i++) {
String substring = mainString.substring(i, i + toBeSearchedLength);
if (checkIfMatchFound(substring, toBeSearched)) {
return substring;
}
}
toBeSearchedLength++;
}
return null;
}
private static boolean checkIfMatchFound(String substring, String toBeSearched) {
char[] charArraySubstring = substring.toCharArray();
char[] charArrayToBeSearched = toBeSearched.toCharArray();
int count = 0;
for (int i = 0; i < charArraySubstring.length; i++) {
for (int j = 0; j < charArrayToBeSearched.length; j++) {
if (String.valueOf(charArraySubstring[i]).equalsIgnoreCase(String.valueOf(charArrayToBeSearched[j]))) {
count++;
}
}
}
return count == charArrayToBeSearched.length;
}
}
This is an approach using prime numbers to avoid one loop, and replace it with multiplications. Several other minor optimizations can be made.
Assign a unique prime number to any of the characters that you want to find, and 1 to the uninteresting characters.
Find the product of a matching string by multiplying the prime number with the number of occurrences it should have. Now this product can only be found if the same prime factors are used.
Search the string from the beginning, multiplying the respective prime number as you move into a running product.
If the number is greater than the correct sum, remove the first character and divide its prime number out of your running product.
If the number is less than the correct sum, include the next character and multiply it into your running product.
If the number is the same as the correct sum you have found a match, slide beginning and end to next character and continue searching for other matches.
Decide which of the matches is the shortest.
Gist
charcount = { 'a': 3, 'b' : 1 };
str = "kjhdfsbabasdadaaaaasdkaaajbajerhhayeom"
def find (c, s):
Ns = len (s)
C = list (c.keys ())
D = list (c.values ())
# prime numbers assigned to the first 25 chars
prmsi = [ 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89 , 97]
# primes used in the key, all other set to 1
prms = []
Cord = [ord(c) - ord('a') for c in C]
for e,p in enumerate(prmsi):
if e in Cord:
prms.append (p)
else:
prms.append (1)
# Product of match
T = 1
for c,d in zip(C,D):
p = prms[ord (c) - ord('a')]
T *= p**d
print ("T=", T)
t = 1 # product of current string
f = 0
i = 0
matches = []
mi = 0
mn = Ns
mm = 0
while i < Ns:
k = prms[ord(s[i]) - ord ('a')]
t *= k
print ("testing:", s[f:i+1])
if (t > T):
# included too many chars: move start
t /= prms[ord(s[f]) - ord('a')] # remove first char, usually division by 1
f += 1 # increment start position
t /= k # will be retested, could be replaced with bool
elif t == T:
# found match
print ("FOUND match:", s[f:i+1])
matches.append (s[f:i+1])
if (i - f) < mn:
mm = mi
mn = i - f
mi += 1
t /= prms[ord(s[f]) - ord('a')] # remove first matching char
# look for next match
i += 1
f += 1
else:
# no match yet, keep searching
i += 1
return (mm, matches)
print (find (charcount, str))
(note: this answer was originally posted to a duplicate question, the original answer is now deleted.)
C# Implementation:
public static Tuple<int, int> FindMinSubstringWindow(string input, string pattern)
{
Tuple<int, int> windowCoords = new Tuple<int, int>(0, input.Length - 1);
int[] patternHist = new int[256];
for (int i = 0; i < pattern.Length; i++)
{
patternHist[pattern[i]]++;
}
int[] inputHist = new int[256];
int minWindowLength = int.MaxValue;
int count = 0;
for (int begin = 0, end = 0; end < input.Length; end++)
{
// Skip what's not in pattern.
if (patternHist[input[end]] == 0)
{
continue;
}
inputHist[input[end]]++;
// Count letters that are in pattern.
if (inputHist[input[end]] <= patternHist[input[end]])
{
count++;
}
// Window found.
if (count == pattern.Length)
{
// Remove extra instances of letters from pattern
// or just letters that aren't part of the pattern
// from the beginning.
while (patternHist[input[begin]] == 0 ||
inputHist[input[begin]] > patternHist[input[begin]])
{
if (inputHist[input[begin]] > patternHist[input[begin]])
{
inputHist[input[begin]]--;
}
begin++;
}
// Current window found.
int windowLength = end - begin + 1;
if (windowLength < minWindowLength)
{
windowCoords = new Tuple<int, int>(begin, end);
minWindowLength = windowLength;
}
}
}
if (count == pattern.Length)
{
return windowCoords;
}
return null;
}
I've implemented it using Python3 at O(N) efficiency:
def get(s, alphabet="abc"):
seen = {}
for c in alphabet:
seen[c] = 0
seen[s[0]] = 1
start = 0
end = 0
shortest_s = 0
shortest_e = 99999
while end + 1 < len(s):
while seen[s[start]] > 1:
seen[s[start]] -= 1
start += 1
# Constant time check:
if sum(seen.values()) == len(alphabet) and all(v == 1 for v in seen.values()) and \
shortest_e - shortest_s > end - start:
shortest_s = start
shortest_e = end
end += 1
seen[s[end]] += 1
return s[shortest_s: shortest_e + 1]
print(get("abbcac")) # Expected to return "bca"
String s = "xyyzyzyx";
String s1 = "xyz";
String finalString ="";
Map<Character,Integer> hm = new HashMap<>();
if(s1!=null && s!=null && s.length()>s1.length()){
for(int i =0;i<s1.length();i++){
if(hm.get(s1.charAt(i))!=null){
int k = hm.get(s1.charAt(i))+1;
hm.put(s1.charAt(i), k);
}else
hm.put(s1.charAt(i), 1);
}
Map<Character,Integer> t = new HashMap<>();
int start =-1;
for(int j=0;j<s.length();j++){
if(hm.get(s.charAt(j))!=null){
if(t.get(s.charAt(j))!=null){
if(t.get(s.charAt(j))!=hm.get(s.charAt(j))){
int k = t.get(s.charAt(j))+1;
t.put(s.charAt(j), k);
}
}else{
t.put(s.charAt(j), 1);
if(start==-1){
if(j+s1.length()>s.length()){
break;
}
start = j;
}
}
if(hm.equals(t)){
t = new HashMap<>();
if(finalString.length()<s.substring(start,j+1).length());
{
finalString=s.substring(start,j+1);
}
j=start;
start=-1;
}
}
}
JavaScript solution in bruteforce way:
function shortestSubStringOfUniqueChars(s){
var uniqueArr = [];
for(let i=0; i<s.length; i++){
if(uniqueArr.indexOf(s.charAt(i)) <0){
uniqueArr.push(s.charAt(i));
}
}
let windoww = uniqueArr.length;
while(windoww < s.length){
for(let i=0; i<s.length - windoww; i++){
let match = true;
let tempArr = [];
for(let j=0; j<uniqueArr.length; j++){
if(uniqueArr.indexOf(s.charAt(i+j))<0){
match = false;
break;
}
}
let checkStr
if(match){
checkStr = s.substr(i, windoww);
for(let j=0; j<uniqueArr.length; j++){
if(uniqueArr.indexOf(checkStr.charAt(j))<0){
match = false;
break;
}
}
}
if(match){
return checkStr;
}
}
windoww = windoww + 1;
}
}
console.log(shortestSubStringOfUniqueChars("ABA"));
# Python implementation
s = input('Enter the string : ')
s1 = input('Enter the substring to search : ')
l = [] # List to record all the matching combinations
check = all([char in s for char in s1])
if check == True:
for i in range(len(s1),len(s)+1) :
for j in range(0,i+len(s1)+2):
if (i+j) < len(s)+1:
cnt = 0
b = all([char in s[j:i+j] for char in s1])
if (b == True) :
l.append(s[j:i+j])
print('The smallest substring containing',s1,'is',l[0])
else:
print('Please enter a valid substring')
Java code for the approach discussed above:
private static Map<Character, Integer> frequency;
private static Set<Character> charsCovered;
private static Map<Character, Integer> encountered;
/**
* To set the first match index as an intial start point
*/
private static boolean hasStarted = false;
private static int currentStartIndex = 0;
private static int finalStartIndex = 0;
private static int finalEndIndex = 0;
private static int minLen = Integer.MAX_VALUE;
private static int currentLen = 0;
/**
* Whether we have already found the match and now looking for other
* alternatives.
*/
private static boolean isFound = false;
private static char currentChar;
public static String findSmallestSubStringWithAllChars(String big, String small) {
if (null == big || null == small || big.isEmpty() || small.isEmpty()) {
return null;
}
frequency = new HashMap<Character, Integer>();
instantiateFrequencyMap(small);
charsCovered = new HashSet<Character>();
int charsToBeCovered = frequency.size();
encountered = new HashMap<Character, Integer>();
for (int i = 0; i < big.length(); i++) {
currentChar = big.charAt(i);
if (frequency.containsKey(currentChar) && !isFound) {
if (!hasStarted && !isFound) {
hasStarted = true;
currentStartIndex = i;
}
updateEncounteredMapAndCharsCoveredSet(currentChar);
if (charsCovered.size() == charsToBeCovered) {
currentLen = i - currentStartIndex;
isFound = true;
updateMinLength(i);
}
} else if (frequency.containsKey(currentChar) && isFound) {
updateEncounteredMapAndCharsCoveredSet(currentChar);
if (currentChar == big.charAt(currentStartIndex)) {
encountered.put(currentChar, encountered.get(currentChar) - 1);
currentStartIndex++;
while (currentStartIndex < i) {
if (encountered.containsKey(big.charAt(currentStartIndex))
&& encountered.get(big.charAt(currentStartIndex)) > frequency.get(big
.charAt(currentStartIndex))) {
encountered.put(big.charAt(currentStartIndex),
encountered.get(big.charAt(currentStartIndex)) - 1);
} else if (encountered.containsKey(big.charAt(currentStartIndex))) {
break;
}
currentStartIndex++;
}
}
currentLen = i - currentStartIndex;
updateMinLength(i);
}
}
System.out.println("start: " + finalStartIndex + " finalEnd : " + finalEndIndex);
return big.substring(finalStartIndex, finalEndIndex + 1);
}
private static void updateMinLength(int index) {
if (minLen > currentLen) {
minLen = currentLen;
finalStartIndex = currentStartIndex;
finalEndIndex = index;
}
}
private static void updateEncounteredMapAndCharsCoveredSet(Character currentChar) {
if (encountered.containsKey(currentChar)) {
encountered.put(currentChar, encountered.get(currentChar) + 1);
} else {
encountered.put(currentChar, 1);
}
if (encountered.get(currentChar) >= frequency.get(currentChar)) {
charsCovered.add(currentChar);
}
}
private static void instantiateFrequencyMap(String str) {
for (char c : str.toCharArray()) {
if (frequency.containsKey(c)) {
frequency.put(c, frequency.get(c) + 1);
} else {
frequency.put(c, 1);
}
}
}
public static void main(String[] args) {
String big = "this is a test string";
String small = "tist";
System.out.println("len: " + big.length());
System.out.println(findSmallestSubStringWithAllChars(big, small));
}
def minimum_window(s, t, min_length = 100000):
d = {}
for x in t:
if x in d:
d[x]+= 1
else:
d[x] = 1
tot = sum([y for x,y in d.iteritems()])
l = []
ind = 0
for i,x in enumerate(s):
if ind == 1:
l = l + [x]
if x in d:
tot-=1
if not l:
ind = 1
l = [x]
if tot == 0:
if len(l)<min_length:
min_length = len(l)
min_length = minimum_window(s[i+1:], t, min_length)
return min_length
l_s = "ADOBECODEBANC"
t_s = "ABC"
min_length = minimum_window(l_s, t_s)
if min_length == 100000:
print "Not found"
else:
print min_length

Resources