Related
I have these three functions that successfully remove all non-numeric characters from a given string:
The first function loops through the characters of the input string, and if the current character is a number, it adds it to a new string that is returned as the result of the function.
function RemoveNonNumericChars(const s: string): string;
begin
Result := '';
for var i := 1 to Length(s) do
begin
if s[i] in ['0'..'9'] then
Result := Result + s[i];
end;
end;
The second function loops through the characters of the input string from right to left, and if the current character is not a number, it uses the Delete function to remove it from the string
function RemoveNonNumericChars(const s: string): string;
begin
Result := s;
for var i := Length(Result) downto 1 do
begin
if not(Result[i] in ['0'..'9']) then
Delete(Result, i, 1);
end;
end;
The third function uses a regular expression to replace all non-numeric characters with nothing, thus removing them. TRegEx is from the System.RegularExpressions unit.
function RemoveNonNumericChars(const s: string): string;
begin
var RegEx := TRegEx.Create('[^0-9]');
Result := RegEx.Replace(s, '');
end;
All three of them do what I need, but I want to know if there is maybe a built-in function in Delphi for this... Or maybe even a better way to do it than the way I'm doing it. What's the best and/or fastest way to remove non-numeric characters from a string in Delphi?
Both your approaches are slow because you constantly change the length of the string. Also, they only recognise Arabic digits.
To solve the performance issue, preallocate the maximum result length:
function RemoveNonDigits(const S: string): string;
begin
SetLength(Result, S.Length);
var LActualLength := 0;
for var i := 1 to S.Length do
if CharInSet(S[i], ['0'..'9']) then
begin
Inc(LActualLength);
Result[LActualLength] := S[i];
end;
SetLength(Result, LActualLength);
end;
To support non-Arabic digits, use the TCharacter.IsDigit function:
function RemoveNonDigits(const S: string): string;
begin
SetLength(Result, S.Length);
var LActualLength := 0;
for var i := 1 to S.Length do
if S[i].IsDigit then
begin
Inc(LActualLength);
Result[LActualLength] := S[i];
end;
SetLength(Result, LActualLength);
end;
To optimise even further, as suggested by Stefan Glienke, you can bypass the RTL's string handling machinery and write each character directly with some loss of code readability:
function RemoveNonDigits(const S: string): string;
begin
SetLength(Result, S.Length);
var ResChr := PChar(Result);
var LActualLength := 0;
for var i := 1 to S.Length do
if CharInSet(S[i], ['0'..'9']) then
begin
Inc(LActualLength);
ResChr^ := S[i];
Inc(ResChr);
end;
SetLength(Result, LActualLength);
end;
Benchmark
Just for fun I did a very primitive benchmark on random input strings with length < 100 and about 24% chance of a char being a digit:
program Benchmark;
{$APPTYPE CONSOLE}
{$R *.res}
uses
System.SysUtils, System.RegularExpressions, Windows;
function OP1(const s: string): string;
begin
Result := '';
for var i := 1 to Length(s) do
begin
if s[i] in ['0'..'9'] then
Result := Result + s[i];
end;
end;
function OP2(const s: string): string;
begin
Result := s;
for var i := Length(Result) downto 1 do
begin
if not(Result[i] in ['0'..'9']) then
Delete(Result, i, 1);
end;
end;
function OP3(const s: string): string;
begin
var RegEx := TRegEx.Create('[^0-9]');
Result := RegEx.Replace(s, '');
end;
function AR1(const S: string): string;
begin
SetLength(Result, S.Length);
var LActualLength := 0;
for var i := 1 to S.Length do
if CharInSet(S[i], ['0'..'9']) then
begin
Inc(LActualLength);
Result[LActualLength] := S[i];
end;
SetLength(Result, LActualLength);
end;
function AR2(const S: string): string;
begin
SetLength(Result, S.Length);
var ResChr := PChar(Result);
var LActualLength := 0;
for var i := 1 to S.Length do
if CharInSet(S[i], ['0'..'9']) then
begin
Inc(LActualLength);
ResChr^ := S[i];
Inc(ResChr);
end;
SetLength(Result, LActualLength);
end;
function AR3(const S: string): string;
begin
SetLength(Result, S.Length);
var ResChr := PChar(Result);
for var i := 1 to S.Length do
if CharInSet(S[i], ['0'..'9']) then
begin
ResChr^ := S[i];
Inc(ResChr);
end;
SetLength(Result, ResChr - PChar(Result));
end;
function RandomInputString: string;
begin
SetLength(Result, Random(100));
for var i := 1 to Result.Length do
Result[i] := Chr(Ord('0') + Random(42));
end;
begin
Randomize;
const N = 1000000;
var Data := TArray<string>(nil);
SetLength(Data, N);
for var i := 0 to N - 1 do
Data[i] := RandomInputString;
var f, c0, cOP1, cOP2, cOP3, cAR1, cAR2, cAR3: Int64;
QueryPerformanceFrequency(f);
QueryPerformanceCounter(c0);
for var i := 0 to High(Data) do
OP1(Data[i]);
QueryPerformanceCounter(cOP1);
Dec(cOP1, c0);
QueryPerformanceCounter(c0);
for var i := 0 to High(Data) do
OP2(Data[i]);
QueryPerformanceCounter(cOP2);
Dec(cOP2, c0);
QueryPerformanceCounter(c0);
for var i := 0 to High(Data) do
OP3(Data[i]);
QueryPerformanceCounter(cOP3);
Dec(cOP3, c0);
QueryPerformanceCounter(c0);
for var i := 0 to High(Data) do
AR1(Data[i]);
QueryPerformanceCounter(cAR1);
Dec(cAR1, c0);
QueryPerformanceCounter(c0);
for var i := 0 to High(Data) do
AR2(Data[i]);
QueryPerformanceCounter(cAR2);
Dec(cAR2, c0);
QueryPerformanceCounter(c0);
for var i := 0 to High(Data) do
AR3(Data[i]);
QueryPerformanceCounter(cAR3);
Dec(cAR3, c0);
Writeln('Computations per second:');
Writeln('OP1: ', Round(N / (cOP1 / f)));
Writeln('OP2: ', Round(N / (cOP2 / f)));
Writeln('OP3: ', Round(N / (cOP3 / f)));
Writeln('AR1: ', Round(N / (cAR1 / f)));
Writeln('AR2: ', Round(N / (cAR2 / f)));
Writeln('AR3: ', Round(N / (cAR3 / f)));
Readln;
end.
Result:
Computations per second:
OP1: 1398134
OP2: 875116
OP3: 39162
AR1: 3406172
AR2: 4063260
AR3: 4032343
As you can see, in this test at least, regular expressions are by far the slowest approach. And preallocating makes a major difference, while avoiding the _UniqueStringU issue appears to make only a relatively minor improvement.
But even with the very slow RegEx approach, you can do 40 000 calls per second. On my 13-year-old computer.
What's the most efficient way to replace every third character of the same type in a string?
I have a string like this:
str := 'c1'#9'c2'#9'c3'#9'c4'#9'c5'#9'c6'#9'
I want to replace every third #9 by #13#10, so that i get:
str1 := 'c1'#9'c2'#9'c3'#13#10'c4'#9'c5'#9'c6'#13#10'
I would do this in this way:
i:=0;
newStr:='';
lastPos := Pos(str,#9);
while lastPos > 0 do begin
if i mod 3 = 2 then begin
newStr := newStr + Copy(str,1,lastPos-1) + #13#10;
end else begin
newStr := newStr + Copy(str,1,lastPos);
end;
str := Copy(str,lastPos+1,MaxInt);
i := i + 1;
lastPos := Pos(str,#9);
end;
newStr := Copy(str,1,MaxInt);
But thats a lot of copying. Is there a string manipulation function to do this?
I think the problem as stated doesn't match the code you provided. Is every third character a #9? If so, do you want to change every third appearance of #9 for #13#10?
If so, I would do it this way:
function test(str: string): string;
var
i, c, l: integer;
begin
l := Length(str);
SetLength(Result, l + l div 9);
c := 1;
for i := 1 to l do
begin
if (i mod 9 = 0) and (i > 0) then
begin
Result[c] := #13;
Inc(c);
Result[c] := #10;
end
else
Result[c] := str[i];
Inc(c);
end;
end;
I actually have no idea if this function performs well. But given that the constraints aren't clear, I guess so.
If the position of the #9 character is unknown then this solution won't work at all.
Edit: as David points out, this is not nearly equivalent to the original code posted. This seems to work, but it requires two passes on the original string. The thing is, to know if its more efficient or not we need to know more about the input and context.
function OccurrencesOfChar(const S: string; const C: char): integer;
var
i: integer;
begin
result := 0;
for i := 1 to Length(S) do
if S[i] = C then
inc(result);
end;
function Test(str: string): string;
var
len, n, C, i: integer;
begin
C := 1;
len := Length(str);
n := OccurrencesOfChar(str, #9);
SetLength(result, len + n div 3);
n := 1;
for i := 1 to len do
begin
if str[i] = #9 then
begin
if n mod 3 = 0 then
begin
result[C] := #13;
inc(C);
result[C] := #10;
end
else
result[C] := #9;
Inc(n);
end
else
result[C] := str[i];
inc(C);
end;
end;
I expect this question will be closed, but just for fun, that would be my proposal.
Function Replace(const Instr:String;Re:Char;const ReWith:String):String;
var
i,o,cnt,l:Integer;
begin
cnt:=0;
o:=0;
SetLength(Result,Length(Instr)*Length(ReWith));// just for security
for I := 1 to Length(Instr) do
begin
if Instr[i]=Re then inc(cnt);
if cnt=3 then
begin
for l := 1 to Length(ReWith) do
begin
inc(o);
Result[o] := ReWith[l];
end;
cnt := 0;
end
else
begin
inc(o);
Result[o] := Instr[i];
end;
end;
SetLength(Result,o);
end;
procedure TForm3.Button1Click(Sender: TObject);
begin
Edit2.Text := Replace(Edit1.Text,'A','xxx')
end;
I would probably do something like this (coded in the browser). It only needs one string resize and should have less movement of data around. I exit when I have made the last replacement or if it didn't need any:
procedure ReplaceXChar(var aStringToReplace: string; const aIteration:
Integer; const aChar: Char; const aReplacement: string);
var
replaceCount: Integer;
cntr: Integer;
outputCntr: Integer;
lengthToReplace: Integer;
begin
// Find the number to replace
replaceCount := 0;
for cntr := 1 to Length(aStringToReplace) do
begin
if aStringToReplace[cntr] = aChar then
Inc(replaceCount);
end;
if replaceCount >= aIteration then
begin
// Now replace them
lengthToReplace := Length(aReplacement);
cntr := Length(aStringToReplace);
SetLength(aStringToReplace, cntr +
(replaceCount div aIteration) * (lengthToReplace - 1));
outputCntr := Length(aStringToReplace);
repeat
if aStringToReplace[cntr] = aChar then
begin
if (replaceCount mod aIteration) = 0 then
begin
Dec(outputCntr, lengthToReplace);
Move(aReplacement[1], aStringToReplace[outputCntr+1],
lengthToReplace * SizeOf(Char));
end
else
begin
aStringToReplace[outputCntr] := aStringToReplace[cntr];
Dec(outputCntr);
end;
Dec(replaceCount);
end
else
begin
aStringToReplace[outputCntr] := aStringToReplace[cntr];
Dec(outputCntr);
end;
Dec(cntr);
until replaceCount = 0;
end;
end;
Usage would be like this:
var
myString: String;
begin
myString := 'c1'#9'c2'#9'c3'#9'c4'#9'c5'#9'c6'#9;
ReplaceXChar(myString, 3, #9, #13#10);
ShowMessage(myString);
end;
I have a string of delimited text ie:
Value1:Value2:Value3:Value4:Value5:Value6
How would I extract, for example, a specific value Ie:
Label.caption := GetValuefromDelimitedText(2); to get Value2
Thanks in advance
Paul
Something like that - if you like compact code (but not as performant as Davids):
function GetValueFromDelimitedText(const s: string; Separator: char; Index: Integer): string;
var sl : TStringList;
begin
Result := '';
sl := TStringList.Create;
try
sl.Delimiter := Separator;
sl.DelimitedText := s;
if sl.Count > index then
Result := sl[index];
finally
sl.Free;
end;
end;
Hope that helps
This should do it:
function GetValueFromDelimitedText(
const s: string;
const Separator: char;
const Index: Integer
): string;
var
i, ItemIndex, Start: Integer;
begin
ItemIndex := 1;
Start := 1;
for i := 1 to Length(s) do begin
if s[i]=Separator then begin
if ItemIndex=Index then begin
Result := Copy(s, Start, i-Start);
exit;
end;
inc(ItemIndex);
Start := i+1;
end;
end;
if ItemIndex=Index then begin
Result := Copy(s, Start, Length(s)-Start+1);
end else begin
Result := '';
end;
end;
This version allows you to specify the separator, you would obviously pass ':'. If you ask for an item beyond the end then the function will return the empty string. You could change that to an exception if you preferred. Finally, I have arranged that this uses 1-based indexing as per your example, but I personally would choose 0-based indexing.
If using Delphi XE or higher you can also use StrUtils.SplitString like this:
function GetValueFromDelimitedText (const Str: string; Separator: Char; Index: Integer) : string;
begin
Result := SplitString (Str, Separator) [Index];
end;
In production code, you should check that Index is indeed a valid index.
This method returns a TStringDynArray (a dynamic array of strings) so you can also use it like this (using enumerators):
for Str in SplitString (Str, Separator) do
Writeln (Str);
which can be very useful IMHO.
I have a string comprising numerous words. How do I find and count the total amount of times that a particular word appears?
E.g "hello-apple-banana-hello-pear"
How would I go about finding all the "hello's" in the example above?
Thanks.
In Delphi XE you can use StrUtils.SplitString.
Something like this
var
Words: TstringDynArray;
Word: string;
WordCount: Integer;
begin
WordCount := 0;
Words := SplitString('hello-apple-banana-hello-pear', '-');
for Word in Words do
begin
if Word = 'hello' then
inc(WordCount);
end;
This would depend entirely on how you define a word and the text from which you wish to pull the words. If a "word" is everything between spaces, or "-" in your example, then it becomes a fairly simple task. If, however, you want to deal with hyphenated words, abbreviations, contractions, etc. then it becomes a lot more difficult.
More information please.
EDIT: After rereading your post, and if the example you give is the only one you want, then I'd suggest this:
function CountStr(const ASearchFor, ASearchIn : string) : Integer;
var
Start : Integer;
begin
Result := 0;
Start := Pos(ASearchFor, ASearchIn);
while Start > 0 do
begin
Inc(Result);
Start := PosEx(ASearchFor, ASearchIn, Start + 1);
end;
end;
This will catch ALL instances of a sequence of characters.
I'm sure there is plenty of code around to do this sort of thing, but it's easy enough to do it yourself with the help of Generics.Collections.TDictionary<K,V>.
program WordCount;
{$APPTYPE CONSOLE}
uses
SysUtils, Character, Generics.Collections;
function IsSeparator(const c: char): Boolean;
begin
Result := TCharacter.IsWhiteSpace(c);//replace this with whatever you want
end;
procedure PopulateWordDictionary(const s: string; dict: TDictionary<string, Integer>);
procedure AddItem(Item: string);
var
Count: Integer;
begin
if Item='' then
exit;
Item := LowerCase(Item);
if dict.TryGetValue(Item, Count) then
dict[Item] := Count+1
else
dict.Add(Item, 1);
end;
var
i, len, Start: Integer;
Item: string;
begin
len := Length(s);
Start := 1;
for i := 1 to len do begin
if IsSeparator(s[i]) then begin
AddItem(Copy(s, Start, i-Start));
Start := i+1;
end;
end;
AddItem(Copy(s, Start, len-Start+1));
end;
procedure Main;
var
dict: TDictionary<string, Integer>;
pair: TPair<string, Integer>;
begin
dict := TDictionary<string, Integer>.Create;
try
PopulateWordDictionary('hello apple banana Hello pear', dict);
for pair in dict do
Writeln(pair.Key, ': ', pair.Value);
finally
dict.Free;
end;
end;
begin
try
Main;
except
on E: Exception do
Writeln(E.ClassName, ': ', E.Message);
end;
end.
Output:
hello: 2
banana: 1
apple: 1
pear: 1
Note: I'm working with Delphi 2010 and don't have SplitString() available.
A very clever implementation I saw somewhere on the web:
{ Returns a count of the number of occurences of SubText in Text }
function CountOccurences( const SubText: string;
const Text: string): Integer;
begin
if (SubText = '') OR (Text = '') OR (Pos(SubText, Text) = 0) then
Result := 0
else
Result := (Length(Text) - Length(StringReplace(Text, SubText, '', [rfReplaceAll]))) div Length(subtext);
end; { CountOccurences }
I have to write program that counts how many different letters are in string.
For example "abc" will give 3; and "abcabc" will give 3 too, because there are only 3 different letters.
I need to use pascal, but if you can help with code in different languages it would be very nice too.
Here is my code that does not work:
var s:string;
i,j,x,count:integer;
c:char;
begin
clrscr;
Readln(s);
c:=s[1];
x:=1;
Repeat
For i:=1 to (length(s)) do
begin
If (c=s[i]) then
begin
delete(s,i,1);
writeln(s);
end;
end;
c:=s[1];
x:=x+1;
Until length(s)=1;
Writeln(x);
x is the different letter counter;
Maybe my algorythm is very bad.. any ideas? Thank you.
You've got answers on how to do it, here's why your way doesn't work.
First of all intuitively you had a good idea: Start with the first char in the string, count it (you forgot to include the counting code), remove all occurrences of the same char in the string. The idea is inefficient, but it would work. You ran into trouble with this bit of code:
For i:=1 to (length(s)) do
begin
If (c=s[i]) then
begin
delete(s,i,1);
end;
end;
The trouble is, Pascal will take the Length(s) value when it sets up the loop, but your code changes the length of the string by removing chars (using delete(s,i,1)). You'll end up looking at bad memory. The secondary issue is that i is going to advance, it doesn't matter if it matched and removed an char or not. Here's why that's bad.
Index: 12345
String: aabbb
You're going to test for i=1,2,3,4,5, looking for a. When i is 1 you'll find a match, remove the first char, and your string is going to look like this:
Index: 1234
String: abbb
You're now testing with i=2, and it's not a match, because s[2] =b. You just skiped one a, and that given a is going to stay in the array an other round and cause your algorithm to count it twice. The "fixed" algorithm would look like this:
i := 1;
while i <= Length(s) do
if (c=s[i]) then
Delete(s,i,1)
else
Inc(i);
This is different: In the given example, if I found a match at 1, the cursor doesn't advance, so it sees the second a. Also because I'm using a while loop, not a for loop, I can't get in trouble with possible implementation details of the for loop.
Your algorithm has an other problem. After the loop that removes all occurrences of the first char in string you're preparing the next loop using this code:
c:=s[1];
The trouble is, if you feed this algorithm an string of the form aa (length=2, two identical chars), it's going to enter the loop, delete or occurrences of a (those turning s into an EMPTY string) and then attempt to read the first char of the EMPTY string.
One final word: Your algorithm should handle the empty string on input, returning an count=0. Here's the fixed algorithm:
var s:string;
i,count:integer;
c:char;
begin
Readln(s);
count:=0;
while Length(s) > 0 do
begin
Inc(Count);
c := s[1];
i := 1;
while i <= Length(s) do
begin
If (c=s[i]) then
delete(s,i,1)
else
Inc(i);
end;
end;
Writeln(Count);
Readln;
end.
I am a Delphi expert, so I don't quite know how restrictive plain Pascal is. Nevertheless, this is Delphi:
// Returns the number of *distinct* "ANSI" characters in Str
function NumChrs(const Str: AnsiString): integer;
var
counts: array[0..255] of boolean;
i: Integer;
begin
ZeroMemory(#counts[0], sizeof(boolean) * length(counts));
for i := 1 to length(Str) do
counts[ord(Str[i])] := true;
result := 0;
for i := 0 to high(counts) do
if counts[i] then
inc(result);
end;
The first line can be written
for i := 0 to high(counts) do
counts[i] := false;
if you cannot use the Windows API (or the Delphi FillChar function).
If you wish to have Unicode support (as in Delphi 2009+), you can do
// Returns the number of *distinct* Unicode characters in Str
function NumChrs(const Str: string): integer;
const
AllocBy = 1024;
var
FoundCodepoints: array of integer;
i: Integer;
procedure Push(Codepoint: integer);
var
i: Integer;
begin
for i := 0 to result - 1 do
if FoundCodepoints[i] = Codepoint then
Exit;
if length(FoundCodepoints) = result then
SetLength(FoundCodepoints, length(FoundCodepoints) + AllocBy);
FoundCodepoints[result] := Codepoint;
inc(result);
end;
begin
result := 0;
for i := 1 to length(Str) do
Push(ord(Str[i]));
end;
Here's my version. I'm not saying you'll get a great mark in your assignment if you hand this in.
function NumberOfUniqueChars(s: string): Integer;
var
i, j: Integer;
c: char;
begin
for i := 1 to Length(s) do
for j := i+1 to Length(s) do
if s[i]<s[j] then
begin
c := s[i];
s[i] := s[j];
s[j] := c;
end;
Result := 0;
for i := 1 to Length(s) do begin
if (i=1) or (s[i]<>c) then
inc(Result);
c := s[i];
end;
end;
And using a Delphi construct (not efficient, but clean)
function returncount(basestring: String): Integer;
var charstrings: TStringList;
I:Integer;
begin
Result := 0;
charstrings := TStringlist.create;
try
charstrings.CaseSensitive := False;
charstrings.Duplicates := DupIgnore;
for I := 1 to length(basestring) do
charstrings.Add(basestring[i]);
Result := charstrings.Count;
finally
charstrings.free;
end;
end;
Different languages are ok?
RUBY:
s = "abcabc"
=> "abcabc"
m = s.split(//)
=> ["a", "b", "c", "a", "b", "c"]
p = m & m
=> ["a", "b", "c"]
p.count
=> 3
A Delphi version. Same idea as #The Communist Duck Python version.
function GetNumChars(Str: string): Integer;
var
s: string;
c: Char;
begin
s := '';
for c in Str do
begin
if Pos(c, s) = 0 then
begin
s := s + c;
end;
end;
Result := Length(s);
end;
Just tossing in a set-alternative...
program CountUniqueChars;
{$APPTYPE CONSOLE}
uses
SysUtils;
var
InputStr: String;
CountedChr: Set of Char;
TotalCount: Integer;
I: Integer;
begin
Write('Text: ');
ReadLn(InputStr);
CountedChr := [];
TotalCount := 0;
for I := 1 to Length(InputStr) do
begin
Write('Checking: ' + InputStr[i]);
if (InputStr[i] in CountedChr)
then WriteLn(' --')
else begin
Include(CountedChr, InputStr[i]);
Inc(TotalCount);
WriteLn(' +1')
end;
end;
WriteLn('Unique chars: ' + IntToStr(TotalCount));
ReadLn;
end.
In Python, with explanation if you want it for any other language: (Since you wanted different languages)
s = 'aahdhdfrhr' #s is the string
l = [] #l is an empty list of some kind.
for i in s: #Iterate through the string
if i not in l: #If the list does not contain the character
l.append(i) #Add the character to the list
print len(l) #Print the number of characters in the list
function CountChars(const S:AnsiString):Integer;
var C:AnsiChar; CS:Set of AnsiChar;
begin
Result := 0;
CS := [];
for C in S do
if not (C in CS) then
begin
CS := CS + [C];
Inc(Result);
end;
end;