Remove non-numeric characters from string in Delphi - string

I have these three functions that successfully remove all non-numeric characters from a given string:
The first function loops through the characters of the input string, and if the current character is a number, it adds it to a new string that is returned as the result of the function.
function RemoveNonNumericChars(const s: string): string;
begin
Result := '';
for var i := 1 to Length(s) do
begin
if s[i] in ['0'..'9'] then
Result := Result + s[i];
end;
end;
The second function loops through the characters of the input string from right to left, and if the current character is not a number, it uses the Delete function to remove it from the string
function RemoveNonNumericChars(const s: string): string;
begin
Result := s;
for var i := Length(Result) downto 1 do
begin
if not(Result[i] in ['0'..'9']) then
Delete(Result, i, 1);
end;
end;
The third function uses a regular expression to replace all non-numeric characters with nothing, thus removing them. TRegEx is from the System.RegularExpressions unit.
function RemoveNonNumericChars(const s: string): string;
begin
var RegEx := TRegEx.Create('[^0-9]');
Result := RegEx.Replace(s, '');
end;
All three of them do what I need, but I want to know if there is maybe a built-in function in Delphi for this... Or maybe even a better way to do it than the way I'm doing it. What's the best and/or fastest way to remove non-numeric characters from a string in Delphi?

Both your approaches are slow because you constantly change the length of the string. Also, they only recognise Arabic digits.
To solve the performance issue, preallocate the maximum result length:
function RemoveNonDigits(const S: string): string;
begin
SetLength(Result, S.Length);
var LActualLength := 0;
for var i := 1 to S.Length do
if CharInSet(S[i], ['0'..'9']) then
begin
Inc(LActualLength);
Result[LActualLength] := S[i];
end;
SetLength(Result, LActualLength);
end;
To support non-Arabic digits, use the TCharacter.IsDigit function:
function RemoveNonDigits(const S: string): string;
begin
SetLength(Result, S.Length);
var LActualLength := 0;
for var i := 1 to S.Length do
if S[i].IsDigit then
begin
Inc(LActualLength);
Result[LActualLength] := S[i];
end;
SetLength(Result, LActualLength);
end;
To optimise even further, as suggested by Stefan Glienke, you can bypass the RTL's string handling machinery and write each character directly with some loss of code readability:
function RemoveNonDigits(const S: string): string;
begin
SetLength(Result, S.Length);
var ResChr := PChar(Result);
var LActualLength := 0;
for var i := 1 to S.Length do
if CharInSet(S[i], ['0'..'9']) then
begin
Inc(LActualLength);
ResChr^ := S[i];
Inc(ResChr);
end;
SetLength(Result, LActualLength);
end;
Benchmark
Just for fun I did a very primitive benchmark on random input strings with length < 100 and about 24% chance of a char being a digit:
program Benchmark;
{$APPTYPE CONSOLE}
{$R *.res}
uses
System.SysUtils, System.RegularExpressions, Windows;
function OP1(const s: string): string;
begin
Result := '';
for var i := 1 to Length(s) do
begin
if s[i] in ['0'..'9'] then
Result := Result + s[i];
end;
end;
function OP2(const s: string): string;
begin
Result := s;
for var i := Length(Result) downto 1 do
begin
if not(Result[i] in ['0'..'9']) then
Delete(Result, i, 1);
end;
end;
function OP3(const s: string): string;
begin
var RegEx := TRegEx.Create('[^0-9]');
Result := RegEx.Replace(s, '');
end;
function AR1(const S: string): string;
begin
SetLength(Result, S.Length);
var LActualLength := 0;
for var i := 1 to S.Length do
if CharInSet(S[i], ['0'..'9']) then
begin
Inc(LActualLength);
Result[LActualLength] := S[i];
end;
SetLength(Result, LActualLength);
end;
function AR2(const S: string): string;
begin
SetLength(Result, S.Length);
var ResChr := PChar(Result);
var LActualLength := 0;
for var i := 1 to S.Length do
if CharInSet(S[i], ['0'..'9']) then
begin
Inc(LActualLength);
ResChr^ := S[i];
Inc(ResChr);
end;
SetLength(Result, LActualLength);
end;
function AR3(const S: string): string;
begin
SetLength(Result, S.Length);
var ResChr := PChar(Result);
for var i := 1 to S.Length do
if CharInSet(S[i], ['0'..'9']) then
begin
ResChr^ := S[i];
Inc(ResChr);
end;
SetLength(Result, ResChr - PChar(Result));
end;
function RandomInputString: string;
begin
SetLength(Result, Random(100));
for var i := 1 to Result.Length do
Result[i] := Chr(Ord('0') + Random(42));
end;
begin
Randomize;
const N = 1000000;
var Data := TArray<string>(nil);
SetLength(Data, N);
for var i := 0 to N - 1 do
Data[i] := RandomInputString;
var f, c0, cOP1, cOP2, cOP3, cAR1, cAR2, cAR3: Int64;
QueryPerformanceFrequency(f);
QueryPerformanceCounter(c0);
for var i := 0 to High(Data) do
OP1(Data[i]);
QueryPerformanceCounter(cOP1);
Dec(cOP1, c0);
QueryPerformanceCounter(c0);
for var i := 0 to High(Data) do
OP2(Data[i]);
QueryPerformanceCounter(cOP2);
Dec(cOP2, c0);
QueryPerformanceCounter(c0);
for var i := 0 to High(Data) do
OP3(Data[i]);
QueryPerformanceCounter(cOP3);
Dec(cOP3, c0);
QueryPerformanceCounter(c0);
for var i := 0 to High(Data) do
AR1(Data[i]);
QueryPerformanceCounter(cAR1);
Dec(cAR1, c0);
QueryPerformanceCounter(c0);
for var i := 0 to High(Data) do
AR2(Data[i]);
QueryPerformanceCounter(cAR2);
Dec(cAR2, c0);
QueryPerformanceCounter(c0);
for var i := 0 to High(Data) do
AR3(Data[i]);
QueryPerformanceCounter(cAR3);
Dec(cAR3, c0);
Writeln('Computations per second:');
Writeln('OP1: ', Round(N / (cOP1 / f)));
Writeln('OP2: ', Round(N / (cOP2 / f)));
Writeln('OP3: ', Round(N / (cOP3 / f)));
Writeln('AR1: ', Round(N / (cAR1 / f)));
Writeln('AR2: ', Round(N / (cAR2 / f)));
Writeln('AR3: ', Round(N / (cAR3 / f)));
Readln;
end.
Result:
Computations per second:
OP1: 1398134
OP2: 875116
OP3: 39162
AR1: 3406172
AR2: 4063260
AR3: 4032343
As you can see, in this test at least, regular expressions are by far the slowest approach. And preallocating makes a major difference, while avoiding the _UniqueStringU issue appears to make only a relatively minor improvement.
But even with the very slow RegEx approach, you can do 40 000 calls per second. On my 13-year-old computer.

Related

Delphi (10.2): fast Integer conversion to string with separator

Let's say we have this Integer 1234567890, we want it converted to a string with a separator = 1.234.567.890, we could do Format('%n',[1234567890.0]); but it's very slow. I wrote a function to speed it up considerably (more than 2x faster). How could I improve it further, or can you come up with a faster routine?
function MyConvertDecToStrWithDot(Const n: UInt64): string;
Var a,b,x: Integer;
z,step: Integer;
l: SmallInt;
begin
Result := IntToStr(n);
if n < 1000 then Exit;
l := Length(Result);
a := l div 3;
b := l mod 3;
step := b+1;
z := 4;
if b <> 0 then begin
Insert('.',Result,step);
Inc(z,step);
end;
for x := 1 to (a-1) do begin
Insert('.',Result,z);
Inc(z,4);
end;
end;
procedure TForm1.Button1Click(Sender: TObject);
Var a: Integer;
s: string;
begin
PerfTimerInit;
for a := 1 to 1000000 do
s := MyConvertDecToStrWithDot(1234567890);
Memo1.lines.Add(PerfTimerStopMS.ToString);
caption := s;
end;
32-bit
Format: ~230ms
My function: ~79ms
64-bit
Format: ~440ms
My function: ~103ms
In my tests, the following is ever so slightly faster:
function ThousandsSepStringOf(Num: UInt64): string;
const
MaxChar = 30; // Probably, 26 is enough: 19 digits + 7 separators
var
Count: Integer;
Rem: UInt64;
Res: array[0..MaxChar] of Char;
WritePtr: PChar;
begin
WritePtr := #Res[MaxChar];
WritePtr^ := #0;
Count := 0;
while Num > 0 do
begin
DivMod(Num, 10, Num, Rem);
Dec(WritePtr);
WritePtr^ := Char(Byte(Rem) + Ord('0'));
Inc(Count);
if Count = 3 then
begin
Dec(WritePtr);
WritePtr^ := '.';
Count := 0;
end;
end;
if WritePtr^ = '.' then
Inc(WritePtr);
Count := MaxChar - ((NativeInt(WritePtr) - NativeInt(#Res)) shr 1);
SetLength(Result, Count);
Move(WritePtr^, PByte(Result)^, Count * SizeOf(Char));
end;
Tested with:
procedure TestHisCode;
Var
a: Integer;
s: string;
SW: TStopwatch;
begin
Writeln('His code');
SW := TStopwatch.StartNew;
for a := 1 to KLoops do
s := MyConvertDecToStrWithDot(1234567890);
Writeln(SW.ElapsedMilliseconds);
Writeln(s);
Writeln;
end;
procedure TestMyCode;
Var
a: Integer;
s: string;
SW: TStopwatch;
begin
Writeln('My code');
SW := TStopwatch.StartNew;
for a := 1 to KLoops do
s := ThousandsSepStringOf(1234567890);
Writeln(SW.ElapsedMilliseconds);
Writeln(s);
Writeln;
end;
and:
TestHisCode;
TestMyCode;
TestMyCode;
TestHisCode;
TestMyCode;
TestHisCode;
TestHisCode;
TestMyCode;
Haven't properly tested the performance of this, however it should be cross-platform and locale independent:
function Thousands(const ASource: string): string;
var
I, LLast: Integer;
begin
Result := ASource;
LLast := Length(Result);
I := LLast;
while I > 0 do
begin
if (LLast - I + 1) mod 3 = 0 then
begin
Insert(FormatSettings.ThousandSeparator, Result, I);
Dec(I, 2);
end
else
Dec(I);
end;
end;
Note: It obviously just works on integers
It's better to insert the separators directly while constructing the string instead of inserting separators later into the converted string because each insertion involves a lot of data movements and performance degradation. Besides avoid the division by 3 may improve performance a bit
This is what I get from my rusty Pascal after decades not using it
uses strutils;
function FormatNumber(n: integer): string;
var digit: integer;
count: integer;
isNegative: boolean;
begin
isNegative := (n < 0);
if isNegative then n := -n;
Result := '';
count := 3;
while n <> 0 do begin
digit := n mod 10;
n := n div 10;
if count = 0 then begin
Result := Result + '.';
count := 3;
end;
Result := Result + chr(ord('0') + digit);
dec(count);
end;
if isNegative then Result := Result + '-';
Result := reversestring(Result);
end;
See it in action: http://ideone.com/6O3e8w
It's also faster to just assign the characters directly instead of using concatenation operator/function like Victoria suggested. This is the improved version with only unsigned types
type string28 = string[28];
function FormatNumber(n: UInt64): string28;
var digit: integer;
length: integer;
count: integer;
c: char;
begin
count := 3;
length := 0;
while n <> 0 do begin
digit := n mod 10;
n := n div 10;
if count = 0 then begin
inc(length);
Result[length] := '.';
count := 3;
end;
inc(length);
Result[length] := chr(ord('0') + digit);
dec(count);
end;
for count := 1 to (length + 1) div 2 do begin
c := Result[count];
Result[count] := Result[length - count + 1];
Result[length - count + 1] := c;
end;
setlength(Result, length);
FormatNumber := Result;
end;
If the operation is done millions of times and is really a bottleneck after profiling, it's better to do in multiple threads along with SIMD

Inno Setup - Integer or Set/Range wildcard?

I need a wildcard that would match only numbers. I tried FileExistsWildcard function from How to test using wildcards whether a file exists in Inno Setup:
FileExistsWildcard(ExpandConstant('{app}\sav[1-9]'))
But Pascal Script obviously doesn't work that way. Is there such a wildcard or should I write a custom function or something?
P.S. Is there a wildcard matching list for Inno Setup at all?
The #TLama's FileExistsWildcard function internally uses Inno Setup FindFirst function, which in turn internally uses Windows FindFirstFile function.
And Windows supports only * and ? in its wildcards. The range/set pattern [a-z] is *nix thing only.
So it's not a Pascal (Script) limitation. It's a Windows limitation.
Implementing a generic matching function that supports all of ?, * and [a-z] is not easy.
I've tried to implement a matching function that is compatible with Windows matching (FindFirstFile) but supports a set pattern (including range set).
I was not identify exact rules how Windows treat . in the mask and the filename. So my matching function does not behave exactly the same in that respect. Otherwise, I believe, it is identical. And it supports [abc] set pattern as well as range set pattern [a-z], or any combination like [_0-9a-z].
function MatchesMaskEx(Mask: string; FileName: string): Boolean;
var
MaskI: Integer;
MaskC: Char;
FileNameI: Integer;
FileNameI2: Integer;
P: Integer;
Mask2: string;
EOSMatched: Boolean;
begin
Mask := LowerCase(Mask);
FileName := LowerCase(FileName);
MaskI := 1;
FileNameI := 1;
Result := True;
EOSMatched := False;
while (MaskI <= Length(Mask)) and Result do
begin
MaskC := Mask[MaskI];
if MaskC = '?' then
begin
{ noop, ? matches anything, even beyond end-of-string }
Inc(FileNameI);
end
else
if MaskC = '[' then
begin
if FileNameI > Length(FileName) then
begin
Result := False;
end
else
begin
P := Pos(']', Copy(Mask, MaskI + 1, Length(Mask) - MaskI));
if P = 0 then
begin
{ unclosed set - no match }
Result := False;
end
else
begin
Result := False;
P := P + MaskI;
Inc(MaskI);
while (MaskI < P) and (not Result) do
begin
MaskC := Mask[MaskI];
{ is it range (A-Z) ? }
if (MaskI + 2 < P) and (Mask[MaskI + 1] = '-') then
begin
MaskI := MaskI + 2;
end;
{ matching the range (or pseudo range A-A) }
if (MaskC <= FileName[FileNameI]) and
(FileName[FileNameI] <= Mask[MaskI]) then
begin
Inc(FileNameI);
Result := True;
MaskI := P - 1;
end;
Inc(MaskI);
end;
end;
end;
end
else
if MaskC = '*' then
begin
Mask2 := Copy(Mask, MaskI + 1, Length(Mask) - MaskI);
Result := False;
{ Find if the rest of the mask can match any remaining part }
{ of the filename => recursion }
for FileNameI2 := FileNameI to Length(FileName) + 1 do
begin
if MatchesMaskEx(
Mask2, Copy(FileName, FileNameI2, Length(FileName) - FileNameI2 + 1)) then
begin
Result := True;
MaskI := Length(Mask);
FileNameI := Length(FileName) + 1;
break;
end;
end;
end
else
begin
if (FileNameI <= Length(FileName)) and (FileName[FileNameI] = MaskC) then
begin
Inc(FileNameI);
end
else
begin
{ The dot can match EOS too, but only once }
if (MaskC = '.') and (FileNameI > Length(FileName)) and (not EOSMatched) then
begin
EOSMatched := True;
end
else
begin
Result := False;
end;
end;
end;
Inc(MaskI);
end;
if Result and (FileNameI <= Length(FileName)) then
begin
Result := False;
end;
end;
Use it like:
function FileExistsEx(Path: string): Boolean;
var
FindRec: TFindRec;
Mask: string;
begin
if FindFirst(AddBackslash(ExtractFilePath(Path)) + '*', FindRec) then
begin
Mask := ExtractFileName(Path);
try
repeat
if (FindRec.Attributes and FILE_ATTRIBUTE_DIRECTORY = 0) and
MatchesMaskEx(Mask, FindRec.Name) then
begin
Result := True;
Exit;
end;
until not FindNext(FindRec);
finally
FindClose(FindRec);
end;
end;
Result := False;
end;
For your specific needs, you can also use a simple ad-hoc function like:
function SpecialFileExists(Path: string): Boolean;
var
FindRec: TFindRec;
begin
if FindFirst(AddBackslash(Path) + '*', FindRec) then
begin
try
repeat
if (Length(FindRec.Name) = 4) and
(Copy(FindRec.Name, 1, 3) = 'sav') and
(FindRec.Name[4] >= '0') and (FindRec.Name[4] <= '9') then
begin
Result := True;
Exit;
end;
until not FindNext(FindRec);
finally
FindClose(FindRec);
end;
end;
Result := False;
end;
Use it like:
SpecialFileExists(ExpandConstant('{app}'))

Best way to replace every third character in a string in delphi

What's the most efficient way to replace every third character of the same type in a string?
I have a string like this:
str := 'c1'#9'c2'#9'c3'#9'c4'#9'c5'#9'c6'#9'
I want to replace every third #9 by #13#10, so that i get:
str1 := 'c1'#9'c2'#9'c3'#13#10'c4'#9'c5'#9'c6'#13#10'
I would do this in this way:
i:=0;
newStr:='';
lastPos := Pos(str,#9);
while lastPos > 0 do begin
if i mod 3 = 2 then begin
newStr := newStr + Copy(str,1,lastPos-1) + #13#10;
end else begin
newStr := newStr + Copy(str,1,lastPos);
end;
str := Copy(str,lastPos+1,MaxInt);
i := i + 1;
lastPos := Pos(str,#9);
end;
newStr := Copy(str,1,MaxInt);
But thats a lot of copying. Is there a string manipulation function to do this?
I think the problem as stated doesn't match the code you provided. Is every third character a #9? If so, do you want to change every third appearance of #9 for #13#10?
If so, I would do it this way:
function test(str: string): string;
var
i, c, l: integer;
begin
l := Length(str);
SetLength(Result, l + l div 9);
c := 1;
for i := 1 to l do
begin
if (i mod 9 = 0) and (i > 0) then
begin
Result[c] := #13;
Inc(c);
Result[c] := #10;
end
else
Result[c] := str[i];
Inc(c);
end;
end;
I actually have no idea if this function performs well. But given that the constraints aren't clear, I guess so.
If the position of the #9 character is unknown then this solution won't work at all.
Edit: as David points out, this is not nearly equivalent to the original code posted. This seems to work, but it requires two passes on the original string. The thing is, to know if its more efficient or not we need to know more about the input and context.
function OccurrencesOfChar(const S: string; const C: char): integer;
var
i: integer;
begin
result := 0;
for i := 1 to Length(S) do
if S[i] = C then
inc(result);
end;
function Test(str: string): string;
var
len, n, C, i: integer;
begin
C := 1;
len := Length(str);
n := OccurrencesOfChar(str, #9);
SetLength(result, len + n div 3);
n := 1;
for i := 1 to len do
begin
if str[i] = #9 then
begin
if n mod 3 = 0 then
begin
result[C] := #13;
inc(C);
result[C] := #10;
end
else
result[C] := #9;
Inc(n);
end
else
result[C] := str[i];
inc(C);
end;
end;
I expect this question will be closed, but just for fun, that would be my proposal.
Function Replace(const Instr:String;Re:Char;const ReWith:String):String;
var
i,o,cnt,l:Integer;
begin
cnt:=0;
o:=0;
SetLength(Result,Length(Instr)*Length(ReWith));// just for security
for I := 1 to Length(Instr) do
begin
if Instr[i]=Re then inc(cnt);
if cnt=3 then
begin
for l := 1 to Length(ReWith) do
begin
inc(o);
Result[o] := ReWith[l];
end;
cnt := 0;
end
else
begin
inc(o);
Result[o] := Instr[i];
end;
end;
SetLength(Result,o);
end;
procedure TForm3.Button1Click(Sender: TObject);
begin
Edit2.Text := Replace(Edit1.Text,'A','xxx')
end;
I would probably do something like this (coded in the browser). It only needs one string resize and should have less movement of data around. I exit when I have made the last replacement or if it didn't need any:
procedure ReplaceXChar(var aStringToReplace: string; const aIteration:
Integer; const aChar: Char; const aReplacement: string);
var
replaceCount: Integer;
cntr: Integer;
outputCntr: Integer;
lengthToReplace: Integer;
begin
// Find the number to replace
replaceCount := 0;
for cntr := 1 to Length(aStringToReplace) do
begin
if aStringToReplace[cntr] = aChar then
Inc(replaceCount);
end;
if replaceCount >= aIteration then
begin
// Now replace them
lengthToReplace := Length(aReplacement);
cntr := Length(aStringToReplace);
SetLength(aStringToReplace, cntr +
(replaceCount div aIteration) * (lengthToReplace - 1));
outputCntr := Length(aStringToReplace);
repeat
if aStringToReplace[cntr] = aChar then
begin
if (replaceCount mod aIteration) = 0 then
begin
Dec(outputCntr, lengthToReplace);
Move(aReplacement[1], aStringToReplace[outputCntr+1],
lengthToReplace * SizeOf(Char));
end
else
begin
aStringToReplace[outputCntr] := aStringToReplace[cntr];
Dec(outputCntr);
end;
Dec(replaceCount);
end
else
begin
aStringToReplace[outputCntr] := aStringToReplace[cntr];
Dec(outputCntr);
end;
Dec(cntr);
until replaceCount = 0;
end;
end;
Usage would be like this:
var
myString: String;
begin
myString := 'c1'#9'c2'#9'c3'#9'c4'#9'c5'#9'c6'#9;
ReplaceXChar(myString, 3, #9, #13#10);
ShowMessage(myString);
end;

How to split a string in Inno Setup

How can I split a string in Inno Setup?
Is there any special function in Inno Setup to split the string?
I want to get the following from the string '11.2.0.16':
tokens: array of string = ('11', '0', '2', '16');
Thanks in advance!
For anyone who prefers the function format, I have modified #cezarlamann's answer:
function StrSplit(Text: String; Separator: String): TArrayOfString;
var
i, p: Integer;
Dest: TArrayOfString;
begin
i := 0;
repeat
SetArrayLength(Dest, i+1);
p := Pos(Separator,Text);
if p > 0 then begin
Dest[i] := Copy(Text, 1, p-1);
Text := Copy(Text, p + Length(Separator), Length(Text));
i := i + 1;
end else begin
Dest[i] := Text;
Text := '';
end;
until Length(Text)=0;
Result := Dest
end;
I've been looking for the same thing today...
This one works just fine on Inno Setup Scripts. Paste this excerpt inside your script before the procedure/function which will call this "split" procedure.
You can also modify this onto a function, if you desire...
procedure Explode(var Dest: TArrayOfString; Text: String; Separator: String);
var
i, p: Integer;
begin
i := 0;
repeat
SetArrayLength(Dest, i+1);
p := Pos(Separator,Text);
if p > 0 then begin
Dest[i] := Copy(Text, 1, p-1);
Text := Copy(Text, p + Length(Separator), Length(Text));
i := i + 1;
end else begin
Dest[i] := Text;
Text := '';
end;
until Length(Text)=0;
end;
procedure Whatever();
var
str: String;
strArray: TArrayOfString;
i: Integer;
begin
Explode(strArray,str,'.');
for i:=0 to GetArrayLength(strArray)-1 do begin
{ do something }
end;
end;
Taken from here
Here's what I use:
procedure SplitString(S, Delim: string; var Dest: TArrayOfString);
var
Temp: string;
I, P: Integer;
begin
Temp := S;
I := StringChangeEx(Temp, Delim, '', true);
SetArrayLength(Dest, I + 1);
for I := 0 to GetArrayLength(Dest) - 1 do
begin
P := Pos(Delim, S);
if P > 0 then
begin
Dest[I] := Copy(S, 1, P - 1);
Delete(S, 1, P + Length(Delim) - 1);
end
else
Dest[I] := S;
end;
end;
This version avoids repeated array resizing by counting the delimiters using StringChangeEx and setting the array size only once. Since we then know the array size, we can just use a for loop. I also opted for Delete rather than Copy, which (IMO) makes the code easier to read. (This version also fixes the bug where the split does not occur correctly if the delimiter is longer than 1 character.)
If there is a possibility that the delimiter could also be right at the end of the string, then this is what I used (modified from #timyha's answer)
function StrSplit(Text: String; Separator: String): TArrayOfString;
var
i, p: Integer;
Dest: TArrayOfString;
begin
i := 0;
repeat
SetArrayLength(Dest, i+1);
p := Pos(Separator,Text);
if p > 0 then begin
Dest[i] := Copy(Text, 1, p-1);
Text := Copy(Text, p + Length(Separator), Length(Text));
i := i + 1;
//add an empty string if delim was at the end
if Text = '' then begin
Dest[i]:='';
i := i + 1;
end;
end else begin
Dest[i] := Text;
Text := '';
end;
until Length(Text)=0;
Result := Dest
end;

Extract string from a text file using 2 delimiters

I'm trying to extract a string from a text file using 2 delimiters. One to start and one to stop.
Example:
Hi my name is$John and I'm happy/today
What I need to do is to call a function that would return the string between $ and /. I've been looking everywhere but I can't seem to find something useful and I'm new to programming.
The above functions won't work if the 2nd text is also appearing before the 1st pattern...
You should use PosEx() instead of Pos():
You can do it with Pos and Copy:
function ExtractText(const Str: string; const Delim1, Delim2: string): string;
var
pos1, pos2: integer;
begin
result := '';
pos1 := Pos(Delim1, Str);
if pos1 > 0 then begin
pos2 := PosEx(Delim2, Str, pos1+1);
if pos2 > 0 then
result := Copy(Str, pos1 + 1, pos2 - pos1 - 1);
end;
end;
You can do it with Pos and Copy:
function ExtractText(const Str: string; const Delim1, Delim2: char): string;
var
pos1, pos2: integer;
begin
result := '';
pos1 := Pos(Delim1, Str);
pos2 := Pos(Delim2, Str);
if (pos1 > 0) and (pos2 > pos1) then
result := Copy(Str, pos1 + 1, pos2 - pos1 - 1);
end;
I'd do it something like this:
function ExtractDelimitedString(const s: string): string;
var
p1, p2: Integer;
begin
p1 := Pos('$', s);
p2 := Pos('/', s);
if (p1<>0) and (p2<>0) and (p2>p1) then begin
Result := Copy(s, p1+1, p2-p1-1);
end else begin
Result := '';//delimiters not found, or in the wrong order; raise error perhaps
end;
end;
Get em all
function ExtractText(const Str: string; const Delim1, Delim2: string): TStringList;
var
c,pos1, pos2: integer;
begin
result:=TStringList.Create;
c:=1;
pos1:=1;
while pos1>0 do
begin
pos1 := PosEx(Delim1, Str,c);
if pos1 > 0 then begin
pos2 := PosEx(Delim2, Str, pos1+1);
if pos2 > 0 then
result.Add(Copy(Str, pos1 + length(delim1), pos2 - (length(delim1) + pos1)));
c:=pos1+1;
end;
end;
end;
Gab, you can write a function to do this using a TFileStream class, and the Copy and Pos functions.
see this sample :
uses
Classes,
SysUtils;
function ExtractString(Const FileName: TFileName;Const IDel,FDel : AnsiString) : AnsiString;
Var
FileStream : TFileStream;
i,f : Integer;
begin
FileStream:= TFileStream.Create(FileName, fmOpenRead or fmShareDenyWrite); //oopen the file
try
try
SetLength(Result, FileStream.Size); //set the size of the string
FileStream.Read(Pointer(Result)^, FileStream.Size);//read the content into a string
i:=Pos(IDel,Result);//search the initial delimiter
f:=Pos(FDel,Result);//search the final delimiter
Result:=Copy(Result,i+1,f-i-1); //extract the value between the delimiters
except
Result := '';
raise;
end;
finally
FileStream.Free;
end;
end;
and use in this way
ExtractString('your_file_name','$','/');
In the newer Delphi's you can do it like this.. (yay)
program Project40; {$APPTYPE CONSOLE}
uses RegularExpressions;
const
str = 'Is$John and I''m happy/today';
function GetStr(const aStr: string): string;
begin
Result := TRegEx.Match(aStr, '\$.*/').Value;
Result := Copy(Result, 2, Length(Result) - 2);
end;
begin
Writeln(GetStr(str));
ReadLn;
end.
Assuming both delimiters are single characters as per your post:
function ExtractDelimitedValueFromFile(const aFilename: String;
const aOpenDelim: Char;
const aCloseDelim: Char;
var aValue: String): Boolean;
var
i: Integer;
strm: TStringStream;
delimStart: Integer;
delimEnd: Integer;
begin
result := FALSE;
aValue := '';
delimStart := -1;
delimEnd := -1;
strm := TStringStream.Create;
try
strm.LoadFromFile(aFileName);
for i := 1 to strm.Size do
begin
if (delimStart = -1) and (strm.DataString[i] = aOpenDelim) then
delimStart := i
else if (delimStart <> -1) and (strm.DataString[i] = aCloseDelim) then
delimEnd := i;
result := (delimStart <> -1) and (delimEnd <> -1);
if result then
begin
aValue := Copy(strm.DataString, delimStart + 1, delimEnd - delimStart - 1);
BREAK;
end;
end;
finally
strm.Free;
end;
end;
Usage:
var
str: String;
begin
if ExtractDelimitedValueFromFile('path\filename.ext', '$', '/', str) then
// work with str
else
// delimited value not found in file
end;

Resources