How to remove space around a character? - string

Say I have the following string:
s := 'This , is, the Delphi , World!';
I would like the following output:
Result := 'This,is,the Delphi,World!';
Basically I need a routine that strips ALL occurrences of spaces ONLY if they appears before or after the comma char (which is my delimiter), leaving intact spaces between other words.
Any help is much appreciated.
What do you think of this solution?
function RemoveSpacesAroundDelimiter(var aString: string; aDelimiter:
string): string;
begin
while AnsiContainsText(aString, aDelimiter + ' ') do
begin
aString := StringReplace(aString, ', ', aDelimiter, [rfReplaceAll, rfIgnoreCase]);
end;
while AnsiContainsText(aString, ' ' + aDelimiter) do
begin
aString := StringReplace(aString, ' ' + aDelimiter, aDelimiter, [rfReplaceAll, rfIgnoreCase]);
end;
Result := aString;
end;
thanks
fabio

Sounds like a task for TStringList.
function UltraTrim(Value: string): string;
var
sl: TStringList;
i: Integer;
begin
sl := TStringList.Create;
try
// Prevent the stringlist from using spaces as delimiters too.
sl.StrictDelimiter := True;
// Set the comma separated text.
sl.CommaText := Value;
// Trim each item.
for i := 0 to sl.Count -1 do
sl[i] := Trim(sl[i]);
// Concat back to comma separated string.
Result := sl.CommaText;
finally
sl.Free;
end;
end;

A fast version could be:
function RemoveSpacesAroundDelimiter(const aString: string; aDelimiter: char = ','): string;
var S, D, D2: PChar;
begin
SetLength(result,length(aString));
if aString<>'' then
begin
S := pointer(aString);
D := pointer(result);
while S^<>#0 do
begin
if S^=' ' then
begin
D2 := D;
repeat
inc(S);
D^ := ' ';
inc(D);
until S^<>' ';
if S^=#0 then
break;
if S^=aDelimiter then
D := D2; // trim spaces before comma
end;
D^ := S^;
if (S[0]=aDelimiter) and (S[1]=' ') then
repeat inc(S) until S^<>' ' else // trim spaces after comma
inc(S);
inc(D);
end;
SetLength(result,D-pointer(result));
end;
end;
Some test code:
assert(RemoveSpacesAroundDelimiter('one two,three')='one two,three');
assert(RemoveSpacesAroundDelimiter('one two , three')='one two,three');
assert(RemoveSpacesAroundDelimiter('one,two,three')='one,two,three');
assert(RemoveSpacesAroundDelimiter('one , two, three')='one,two,three');

Copy characters one-by-one into the destination buffer, but look for spaces and delimiters, and remember the last location you copied a non-space character into. If you see a space and the last non-space you copied was the delimiter, then skip the space. If it's a space and the last character you copied wasn't the delimiter, then copy it to the destination, but remember the last non-space you added. That way, if you see a delimiter later, you can go back and overwrite it.
function RemoveSpacesAroundDelimiter(const AString: string; ADelimiter: Char): string;
var
c: Char;
dest: Integer;
LastNonSpace: Integer;
HaveDelimiter: Boolean;
begin
Assert(ADelimiter <> ' ');
SetLength(Result, Length(AString));
dest := 1;
LastNonSpace := 0;
HaveDelimiter := False;
for c in AString do begin
if (c = ' ') and HaveDelimiter then
continue; // Skip this character
if c = ADelimiter then begin
dest := LastNonSpace + 1;
HaveDelimiter := True;
end else
HaveDelimiter := False;
Result[dest] := c;
if c <> ' ' then
LastNonSpace := dest;
Inc(dest);
end;
SetLength(Result, dest - 1);
end;

If you are using Delphi XE or above you can do this trivially in a single line of code, using a regular expression.
program regex;
{$APPTYPE CONSOLE}
uses
RegularExpressions;
const
Input = 'This , is, the Delphi , World!';
begin
Writeln(TRegEx.Replace(Input, ' *, *', ','));
Readln;
end.
Naturally this is not the fastest running of the solutions on offer, but maybe that doesn't matter to you.

You can use regular expressions. You want to find the delimiter preceded or followed by any number of spaces, and replace it all with a single copy of the delimiter.
function RemoveSpacesAroundDelimiter(const AString: string; const ADelimiter: string): string;
var
re: TPerlRexEx;
begin
re := TPerlRegEx.Create;
try
re.RegEx := '\s*' + TPerlRegEx.EscapeRegExChars(ADelimiter) + '\s*';
re.Subject := AString;
re.Replacement := TPerlRegEx.EscapeRegExChars(ADelimiter);
re.ReplaceAll;
Result := re.Subject;
finally
re.Free;
end;
end;
Newer Delphi versions can use the built-in RegularExpressionCore unit. Older versions can use the equivalent PerlRegEx unit from Jan Goyvaerts.
Mick previously posted an answer demonstrating this, but he deleted it because he got the regular expression wrong (deleting all spaces instead of just the ones abutting the delimiter).

The simpler and easiest way is to use regular expressions. The last thing you would need is a huge complicated code block to solve such a simple problem. Unfortunatly I don't have Delphi with me right now, I can't test this code, but if it's nothing exactly like this, it's very very close:
s := 'This , is, the Delphi , World!';
RegEx := TRegEx.Create('[ ]*,[ ]*');
CleanStr := RegEx.Replace(s, ',');

I have this solution:
slValores.DelimitedText := StringReplace(sListSummary,' ','',[rfReplaceAll]);

I thought this was worth adding because it will work with early versions of Delphi, which the stringlist solution (which I liked) does not.
It is alo reasonably quick, I believe, and fairly simple to read and understand.
function TForm1.UltraTrim(const InString : String; Delim : Char) : String;
var
Buf : String;
i : Integer;
Token : String;
begin
Result := '';
if Trim(InString) <> '' then begin
i := 1;
Buf := StringReplace(InString, Delim, #0, [rfReplaceAll]) + #0;
while i < Length(Buf) do begin
Token := StrPas(#Buf[i]);
i := i + Length(Token) + 1;
Result := Result + Delim + Trim(Token);
end;
Result := Copy(Result,2,Length(Result));
end;
end;

Using Jedi Code Library, answer by #GolezTrol can be reformulated using one-liner.
function UltraTrim(Value: string): string;
begin
Result := JclStringList.Split(Value, ',').Trim.Join(',')
end;
http://en.wikipedia.org/wiki/Fluent_interface
http://wiki.delphi-jedi.org/wiki/JCL_Help:JclStringList

with this function :
function MBTrim(iStr :string):string;
const CTc= 3{Conditions Count};
CT :array[0..(CTc-1),0..1]of string= ( (' ,', ','), (', ', ','), (' ', ' ') );
var i :Integer;
begin
for i := 0 to CTc-1 do while Pos(CT[i,0], iStr) > 0 do
iStr:= StringReplace(iStr, CT[i,0], CT[i,1], [rfReplaceAll, rfIgnoreCase]);
Result:= Trim(iStr);
end;
you can add other conditions simply.
for example i add (' ', ' ') to convert space between words like :
'This , is, the Delphi , World!'

Changed, one more time.
while (pos(', ',s)>0) or (pos(' ,',s)>0) do begin
s := StringReplace(s, ', ', ',', [rfReplaceAll]);
s := StringReplace(s, ' ,', ',', [rfReplaceAll]); end;
OK for all the Delphi versions.

Related

Delphi: Remove chars from string

I have a string containing letters, numbers and other chars.
I want to remove from that string all numbers, dots and commas
Before: 'Axis moving to new position - X-Pos: 5.4mm / Y-Pos: 3.5mm'
After: 'Axis moving to new position - X-Pos mm / Y-Pos mm'
Unfortunately string.replace() only replaces one character. So I need several lines.
How can I avoid writing every replacement line by line?
sString := sString.Replace('0', '');
sString := sString.Replace('1', '');
sString := sString.Replace('2', '');
sString := sString.Replace('3', '');
sString := sString.Replace('3', '');
...
sString := sString.Replace(':', '');
sString := sString.Replace('.', '');
Although the OP's own solution is fine, it is somewhat inefficient.
Just for completeness, here's a slightly more optimized version:
function RemoveCharsFromString(const AString, AChars: string): string;
begin
SetLength(Result, AString.Length);
var ActualLength := 0;
for var i := 1 to AString.Length do
begin
if SomePredicate(AString[i]) then
begin
Inc(ActualLength);
Result[ActualLength] := AString[i];
end;
end;
SetLength(Result, ActualLength);
end;
The algorithm is independent of the particular predicate. In this case, the predicate is Pos(AString[i], AChars) = 0.
There are multiple ways of how you can approach this. Here are three solution.
Solution 1
You can go and simply loop though the source string checking each character to se if it is one of the characters that needs to be removed.
//Simple function that loops through all characters of the source strings removing them one by one
//It is manipulating the same string all the time
function Removechars1(sourceString: string; sCharsToBeRemoved: string):string;
var I: Integer;
begin
//Assign value of the source string to the result so we can work with result strin from now on
result := SourceString;
//Loop throught the whole result sring starting at end searching for characters to be removed
//We start at the end because when we will be removing characters from the string its length
//will be decreasing.
for I := Length(result) downto 1 do
begin
//Use Pos function to see if specific character in the result string can also be found
//in the sCharsToBeRemoved and therefore needs to be removed
if Pos(result[i], sCharsToBeRemoved) <> 0 then
begin
//If so we delete the specific character
Delete(result,I,1);
end;
end;
end;
Solution 2
Second solution is similar to the first one but it relies on adding characters non removable characters to the result. It is slightly slower than the first solution
//Slightly faster function that also loops through the whole sourceString character by character
//and adds such characters to result string if they are not present in sCharsToBeRemoved string
function RemoveChars2(sourceString: string; sCharsToBeRemoved: string):string;
var I: Integer;
begin
//Prepare enpty string for our result strung to which we will be copying our end characters
result := '';
//Loop through the whole string
for I := 1 to Length(sourceString) do
begin
//If specific haracter can't be found in sCharsToBeRemoved copy that character to the
//result string
if Pos(sourceString[I], sCharsToBeRemoved) = 0 then
begin
result := result + sourceString[I];
end;
end;
end;
Solution 3
The third solution relies on string helpers for replacing specific characters. This one is by far the fastest of the three needing about half the time that is needed by the first solution to process the same job
//Another approach of removing characters from source string that relies on Replace string helper
function RemoveChars3(sourceString: string; sCharsToBeRemoved: string):string;
var I: Integer;
begin
//Assign value of the source string to the result so we can work with result strin from now on
result := sourceString;
//Loop through the sCharsToBeRemoved string so we can then call Replace string helper in order
//to replace all occurrences of such character in sourceString;
for I := 1 to Length(sCharsToBeRemoved) do
begin
result := result.Replace(sCharsToBeRemoved[I],'');
end;
end;
Main advantages of this approach is that it is quite fast and could easily modified to be able to remove whole substrings and not only individual characters.
PS: In my testing your solution was actually the slowest needing about 20% more time than my first solution
TestTring
jfkldajflkajdflkajlkčfjaskljflakjflkdasjflkčjdfkldafjadklfjadklfjaldkakljfkldajflkčadjslfkjadklfjlkadčjflkajdflčkjadlkfjladkdjfkladjflkadjflkčjadklčfjaldkjfkladjfklajflkadjfkadgfkljdklfjawdkojfkladsjflčaksdjdfklčasjdklčfdfklčjadslkdfjlka
CharsToBeRemoved
asjk
Solution 1
1230 ms
Solution 2
1263 ms
Solution 3
534 ms
Your solution
1574 ms
This solution works with a very small footprint of code lines.
I just split the string on each occurence of a char which should be removed. After that I put the pieces together without the removed chars.
uses System.SysUtils;
function RemoveCharsFromString(sFullString: string; sCharsToBeRemoved: string): string;
var
splitted: TArray<String>;
begin
splitted := sFullString.Split(sCharsToBeRemoved.ToCharArray());
Result := string.Join('', splitted);
end;
string.Replace has an overload where you can pass flags to replace all instead of just one. Example:
sString := sString.Replace('1', '', [rfReplaceAll, rfIgnoreCase]);
Edit: Stringlist equivalent:
sString.Text := sString.Text.Replace('1', '', [rfReplaceAll, rfIgnoreCase]);
Working with strings spends more time, use PChar instead.
I think here's a slightly more optimized version
function RemoveCharsFromString(const AString, AChars: String): String;
var
i, j, k, LenString, LenChars : Integer;
PString, PChars : PChar;
label
Ends;
begin
PString := Pointer(AString);
PChars := Pointer(AChars);
LenString := AString.Length;
LenChars := AChars.Length;
k := 0;
for i := 0 to LenString - 1 do
begin
for j := 0 to LenChars - 1 do
if PString[i] = PChars[j] then
Goto Ends;
PString[k] := PString[i];
Inc(k);
Ends :
end;
PString[k] := #0;
Result := StrPas(PString);
end;
If you don't like Labels, use this code :
function RemoveCharsFromString(const AString, AChars: String): String;
var
i, j, k, LenString, LenChars : Integer;
PString, PChars : PChar;
found : Boolean;
begin
PString := Pointer(AString);
PChars := Pointer(AChars);
LenString := AString.Length;
LenChars := AChars.Length;
k := 0;
for i := 0 to LenString - 1 do
begin
found := False;
for j := 0 to LenChars - 1 do
if PString[i] = PChars[j] then
begin
found := True;
Break;
end;
if not found then
begin
PString[k] := PString[i];
Inc(k);
end;
end;
PString[k] := #0;
Result := StrPas(PString);
end;
You can call it like this :
sString := RemoveCharsFromString(sString, '0123456789.,');

How to remove repeated spaces from a string

I need to remove repeated spaces from a string.
The following code, grabbed from internet, works decently except that it duplicate the first char of the string.
Also maybe there is something faster that this.
function DeleteRepeatedSpaces(OldText: string): string;
var
i: integer;
s: string;
begin
if length(OldText) > 0 then
s := OldText[1]
else
s := '';
for i := 1 to length(OldText) do
begin
if OldText[i] = ' ' then
begin
if not (OldText[i - 1] = ' ') then
s := s + ' ';
end
else
begin
s := s + OldText[i];
end;
end;
DelDoubleSpaces := s;
end;
Function based on the simplest state machine (DFA). Minimum of memory reallocations.
State is number of continuous spaces.
J is count of deleted spaces.
function DeleteRepeatedSpaces(const s: string): string;
var
i, j, State: Integer;
begin
SetLength(Result, Length(s));
j := 0;
State := 0;
for i := 1 to Length(s) do begin
if s[i] = ' ' then
Inc(State)
else
State := 0;
if State < 2 then
Result[i - j] := s[i]
else
Inc(j);
end;
if j > 0 then
SetLength(Result, Length(s) - j);
end;
Iterate all members of the string, move the characters to the Result, but skip repeated spaces.
function DeleteRepeatedSpaces(const OldText: string): string;
var
i,j,hi: Integer;
begin
SetLength(Result,Length(OldText));
i := Low(OldText);
j := i;
hi := High(OldText);
while (i <= hi) do begin
Result[j] := OldText[i];
Inc(j);
if (OldText[i] = ' ') then begin
repeat //Skip additional spaces
Inc(i);
until (i > hi) or (OldText[i] <> ' ');
end
else
Inc(i);
end;
SetLength(Result,j-Low(Result)); // Set correct length
end;
The above code is rather fast (faster than any other contribution, so far).
Below is an even more optimized routine:
function DeleteRepeatedSpaces(const OldText: string): string;
var
pO,pR: PChar;
begin
SetLength(Result,Length(OldText));
pR := Pointer(Result);
pO := Pointer(OldText);
while (pO^ <> '') do begin
pR^ := pO^;
Inc(pR);
if (pO^ <> ' ') then begin
Inc(pO);
Continue;
end;
repeat // Skip additional spaces
Inc(pO);
until (pO^ = '') or (pO^ <> ' ');
end;
SetLength(Result,pR-Pointer(Result));
end;
The following isn't wildly efficient, but possibly more so that processiing the string character by character because it doesn't require a new string allocation for each character in the output:
function RemoveDupSpaces(const Input : String) : String;
var
P : Integer;
begin
Result := Input;
repeat
P := Pos(' ', Result); // that's two spaces
if P > 0 then
Delete(Result, P + 1, 1);
until P = 0;
end;
You can use something like this:
function DeleteRepeatedSpaces(const s: string):string;
var
i:integer;
begin
Result := '';
for i := 1 to Length(S) do begin
if not ((s[i]=' ') and (s[i-1]=' ')) then begin
Result := Result + s[i];
end;
end;
end;
Delete two o more spaces contiguous in a string.
This string (without spaces):
The string have groups of spaces inside
return this:
The string have groups of spaces inside
This string (with spaces groups inside):
The string have groups of spaces inside
Return this:
The string have groups of spaces inside

How to Copy a string to another leaving a blank character in the first position?

procedure Split(S: String; List: TStringList; Separator: Char);
var
P, C: PAnsiChar;
S, Buff: String;
begin
List.Clear;
if S = '' then
Exit;
List.BeginUpdate;
(* [Ajusting size - Slow *)
if S[1] = Separator then
Insert('', S, 1);
S := S + Separator;
(* Adjusting size] *)
//Get Pointer to data
P := PChar(S);
//initial position
C := P;
while P^ <> #0 do //check if reached the end of the string
begin
//when found a separator
if P^ = Separator then
begin
if P = C then //check if the slot is empty
Buff := ''
else //when it is not empty, make an string buffer
SetString(Buff, C, P-C);
List.Add(Buff); //add the string into the list
Inc(C, P-C+1); //moves the pointer C to the adress of the pointer P
end;
Inc(P); //go to next char in the string
end;
List.EndUpdate;
end;
This code is working fine but is moving the string 3 times in the memory:
In the Method Call (By copy)
In the Insert('', S, 1)
In the Concatenation: S := S + Separator;
I thought about adding const keyword in the S parameter, creating an internal string to copy the data more or less like this:
if S[1] = Separator then
begin
SetLength(Str, Length(S)+2);
//HERE!! how to copy the string
Str[1] := ' ';
end
else
begin
SetLength(Str, Length(S)+1);
//HERE!! how to copy the string
end;
//Add Separator in the last position
Str[Length(Str)] := Separator;
Thus:
if the S contains ';'
it will create an stringlist with 2 items ('','').
if the S contains ';A'
it will create an stringlist with 2 items ('','A').
if the S contains 'A;A'
it will create an stringlist with 2 items ('A','A').
if the S contains 'A;'
it will create an stringlist with 2 items ('A','').
Like this:
if S[1] = Separator then
begin
SetLength(Str, Length(S)+2);
Move(Pointer(S)^, Str[2], Length(S)*SizeOf(Char));
S[1] := ' '; // surely you mean Str[1] := ' '
end
else
begin
SetLength(Str, Length(S)+1);
Move(Pointer(S)^, Str[1], Length(S)*SizeOf(Char));
end;
//Add Separator in the last position
Str[Length(Str)] := Separator;
It would be easy enough to re-work this to avoid the duplication.
var
dest: PChar;
if S[1] = Separator then
begin
SetLength(Str, Length(S)+2);
dest := #Str[2];
S[1] := ' '; // surely you mean Str[1] := ' '
end
else
begin
SetLength(Str, Length(S)+1);
dest := #Str[1];
end;
Move(Pointer(S)^, dest^, Length(S)*SizeOf(Char));
//Add Separator in the last position
Str[Length(Str)] := Separator;
And so on. I'll leave it to you to polish it up.
The following routine is one I wrote (more accurately, adapted from SetDelimitedText and ExtractStrings) for Delphi 7 to handle the lack of the TStrings.StrictDelimiter property. Given the correct parameters, it'll return exactly the results you want.
{
SplitString will expand the delimited string S into its component parts and
store them in Strings. The primary difference between this routine and
Classes.ExtractStrings and TStrings.DelimitedText is that it does not treat
spaces, tabs, and CR/LF as delimiters whether you like it or not. If Quotes
is non-empty, then quoted strings will be handled correctly.
Leading and Trailing whitespace is significant if TrimStrings is False.
If you want to eliminate empty tokens, set SkipEmptyStrings to True.
If you want Strings to be cleared before parsing, set ClearStrings to True.
This procedure is especially useful for dealing with CSV files exported from
Excel, since Excel does not quote a string unless it contains a comma.
Using ExtractStrings or TStrings.CommaText will fail with such files.
In Delphi 2006+, TStrings has the StrictDelimiter property that renders this
routine largely useless.
}
procedure SplitString(const S: string; Separators, Quotes: TSysCharSet; const Strings: TStrings; ClearStrings, TrimStrings, SkipEmptyStrings: Boolean);
var
Head, Tail: PChar;
Item: string;
StringExists: Boolean;
{$IF NOT Declared(CharInSet)}
function CharInSet(C: Char; const CharSet: TSysCharSet): Boolean;
begin
Result := C in CharSet;
end;
{$IFEND}
begin
StringExists := False;
Strings.BeginUpdate;
try
if ClearStrings then
Strings.Clear;
if S = '' then
Exit;
Tail := PChar(S);
while Tail^ <> #0 do begin
if CharInSet(Tail^, Quotes) then
Item := AnsiExtractQuotedStr(Tail, Tail^)
else begin
// Mark beginning of token
Head := Tail;
// Look for end of token, delineated by end of string or separator
while (Tail^ <> #0) and not CharInSet(Tail^, Separators) do
Inc(Tail);
SetString(Item, Head, Tail - Head);
if TrimStrings then begin
Item := Trim(Item);
Head := PChar(Item);
if CharInSet(Head^, Quotes) then
Item := Trim(AnsiExtractQuotedStr(Head, Head^));
end;
if not (SkipEmptyStrings and (Item = '')) then
Strings.Append(Item);
end;
// If the last character in a string is a separator, then we need to mark
// that another string exists, otherwise the next Inc(Tail) call will
// place Tail^ at #0, we'll exit the while loop, and never know that there
// was an empty string there to add.
// --AAF
StringExists := Tail^ <> #0;
// Skip Separator
if StringExists then
Inc(Tail);
end;
// This can only happen if the very last character is a separator
if StringExists and not SkipEmptyStrings then
Strings.Append('');
finally
Strings.EndUpdate;
end;
end;

How to get a specific field from delimited text

I have a string of delimited text ie:
Value1:Value2:Value3:Value4:Value5:Value6
How would I extract, for example, a specific value Ie:
Label.caption := GetValuefromDelimitedText(2); to get Value2
Thanks in advance
Paul
Something like that - if you like compact code (but not as performant as Davids):
function GetValueFromDelimitedText(const s: string; Separator: char; Index: Integer): string;
var sl : TStringList;
begin
Result := '';
sl := TStringList.Create;
try
sl.Delimiter := Separator;
sl.DelimitedText := s;
if sl.Count > index then
Result := sl[index];
finally
sl.Free;
end;
end;
Hope that helps
This should do it:
function GetValueFromDelimitedText(
const s: string;
const Separator: char;
const Index: Integer
): string;
var
i, ItemIndex, Start: Integer;
begin
ItemIndex := 1;
Start := 1;
for i := 1 to Length(s) do begin
if s[i]=Separator then begin
if ItemIndex=Index then begin
Result := Copy(s, Start, i-Start);
exit;
end;
inc(ItemIndex);
Start := i+1;
end;
end;
if ItemIndex=Index then begin
Result := Copy(s, Start, Length(s)-Start+1);
end else begin
Result := '';
end;
end;
This version allows you to specify the separator, you would obviously pass ':'. If you ask for an item beyond the end then the function will return the empty string. You could change that to an exception if you preferred. Finally, I have arranged that this uses 1-based indexing as per your example, but I personally would choose 0-based indexing.
If using Delphi XE or higher you can also use StrUtils.SplitString like this:
function GetValueFromDelimitedText (const Str: string; Separator: Char; Index: Integer) : string;
begin
Result := SplitString (Str, Separator) [Index];
end;
In production code, you should check that Index is indeed a valid index.
This method returns a TStringDynArray (a dynamic array of strings) so you can also use it like this (using enumerators):
for Str in SplitString (Str, Separator) do
Writeln (Str);
which can be very useful IMHO.

Find and Count Words in a String in Delphi?

I have a string comprising numerous words. How do I find and count the total amount of times that a particular word appears?
E.g "hello-apple-banana-hello-pear"
How would I go about finding all the "hello's" in the example above?
Thanks.
In Delphi XE you can use StrUtils.SplitString.
Something like this
var
Words: TstringDynArray;
Word: string;
WordCount: Integer;
begin
WordCount := 0;
Words := SplitString('hello-apple-banana-hello-pear', '-');
for Word in Words do
begin
if Word = 'hello' then
inc(WordCount);
end;
This would depend entirely on how you define a word and the text from which you wish to pull the words. If a "word" is everything between spaces, or "-" in your example, then it becomes a fairly simple task. If, however, you want to deal with hyphenated words, abbreviations, contractions, etc. then it becomes a lot more difficult.
More information please.
EDIT: After rereading your post, and if the example you give is the only one you want, then I'd suggest this:
function CountStr(const ASearchFor, ASearchIn : string) : Integer;
var
Start : Integer;
begin
Result := 0;
Start := Pos(ASearchFor, ASearchIn);
while Start > 0 do
begin
Inc(Result);
Start := PosEx(ASearchFor, ASearchIn, Start + 1);
end;
end;
This will catch ALL instances of a sequence of characters.
I'm sure there is plenty of code around to do this sort of thing, but it's easy enough to do it yourself with the help of Generics.Collections.TDictionary<K,V>.
program WordCount;
{$APPTYPE CONSOLE}
uses
SysUtils, Character, Generics.Collections;
function IsSeparator(const c: char): Boolean;
begin
Result := TCharacter.IsWhiteSpace(c);//replace this with whatever you want
end;
procedure PopulateWordDictionary(const s: string; dict: TDictionary<string, Integer>);
procedure AddItem(Item: string);
var
Count: Integer;
begin
if Item='' then
exit;
Item := LowerCase(Item);
if dict.TryGetValue(Item, Count) then
dict[Item] := Count+1
else
dict.Add(Item, 1);
end;
var
i, len, Start: Integer;
Item: string;
begin
len := Length(s);
Start := 1;
for i := 1 to len do begin
if IsSeparator(s[i]) then begin
AddItem(Copy(s, Start, i-Start));
Start := i+1;
end;
end;
AddItem(Copy(s, Start, len-Start+1));
end;
procedure Main;
var
dict: TDictionary<string, Integer>;
pair: TPair<string, Integer>;
begin
dict := TDictionary<string, Integer>.Create;
try
PopulateWordDictionary('hello apple banana Hello pear', dict);
for pair in dict do
Writeln(pair.Key, ': ', pair.Value);
finally
dict.Free;
end;
end;
begin
try
Main;
except
on E: Exception do
Writeln(E.ClassName, ': ', E.Message);
end;
end.
Output:
hello: 2
banana: 1
apple: 1
pear: 1
Note: I'm working with Delphi 2010 and don't have SplitString() available.
A very clever implementation I saw somewhere on the web:
{ Returns a count of the number of occurences of SubText in Text }
function CountOccurences( const SubText: string;
const Text: string): Integer;
begin
if (SubText = '') OR (Text = '') OR (Pos(SubText, Text) = 0) then
Result := 0
else
Result := (Length(Text) - Length(StringReplace(Text, SubText, '', [rfReplaceAll]))) div Length(subtext);
end; { CountOccurences }

Resources