Convert multibyte hex back to UTF-8 string - string

In Delphi 10.4, I have a hexadecimal representation of a string:
function TForm1.Button2Click(Sender: TObject);
var
i, nr : integer;
Input, HexStr, h: String;
begin
HexStr := '';
Input := Edit2.Text;
for i:=1 to Length(Input) do begin
nr := Ord(Input[i]);
h := IntToHex(nr, 0);
if Length(h) = 1 then
h := '0' + h;
HexStr := HexStr + h;
end;
Edit3.Text := HexStr;
end;
For "abc€", I get "61626320AC" here. Note the € sign converts to "20AC".
Now for converting it back to a normal string, I split that hex into 2-char pairs, using StrToInt() with a '$' prefix. I have no indicator for it being 4 chars long for an € sign, instead of 2 chars, and this breaks the euro sign:
How do I convert back such a hex string without breaking multibyte characters?

I have no indicator for it being 4 chars long for an € sign, instead of 2 chars
And because of that reason alone, you simply won't be able to convert "61626320AC" back to "abc€", because you don't know which hex codes are 2 digits and which are 4 digits, you didn't deliminate them. For instance, the hex codes 6162 and 6263 also represent valid Unicode characters, too.
As #TomBrunberg mentioned in comments, you can use 4-digit hex codes for every character, eg:
function TForm1.Button2Click(Sender: TObject);
var
i, nr : integer;
Input, HexStr, h: String;
begin
HexStr := '';
Input := Edit2.Text;
for i := Low(Input) to High(Input) do begin
nr := Ord(Input[i]);
h := IntToHex(nr, 4);
HexStr := HexStr + h;
end;
Edit3.Text := HexStr;
end;
Then you can convert back by splitting up the hex string into 4-character groups when calling StrToInt(), eg:
function TForm1.Button3Click(Sender: TObject);
var
i, nr : integer;
Output, HexStr, h: String;
begin
HexStr := Edit3.Text;
Output := '';
i := Low(HexStr);
while i <= High(HexStr) do begin
h := Copy(HexStr, i, 4);
nr := StrToInt('$' + h);
Output := Output + Char(nr);
Inc(i, 4);
end;
Edit2.Text := Output;
end;
Or, as #AmigoJack hinted at, you can use UTF-8 instead, eg:
function TForm1.Button2Click(Sender: TObject);
var
i, nr : integer;
HexStr, h: String;
Input: UTF8String;
begin
HexStr := '';
Input := UTF8String(Edit2.Text);
for i := Low(Input) to High(Input) do begin
nr := Ord(Input[i]);
h := IntToHex(nr, 2);
HexStr := HexStr + h;
end;
Edit3.Text := HexStr;
end;
function TForm1.Button3Click(Sender: TObject);
var
i, nr : integer;
HexStr, h: String;
Output: UTF8String;
begin
HexStr := Edit3.Text;
Output := '';
i := Low(HexStr);
while i <= High(HexStr) do begin
h := Copy(HexStr, i, 2);
nr := StrToInt('$' + h);
Output := Output + AnsiChar(nr);
Inc(i, 2);
end;
Edit2.Text := string(Output);
end;

Related

Convert String to two chars and reverse the two chars

Hello I want to split a simple string into a group of two characters then reverse them.
For example MyStringToBeReversed to two charachters each My St ri ng To Be Re ve rs ed
then reverse each splited two charachters to :
St := ts
ri := ir
etc ...
Here's the code I'm using but it only displays one charachter on memo lines
procedure TForm7.Button1Click(Sender: TObject);
var
MyString: string;
i: integer;
k: integer;
j: integer;
begin
MyString:= Edit1.Text;
for i:= 1 to length(MyString) do
Memo1.Lines.Add(MyString[i]);
// Move(Str[1], MyString, Length(Str));
end;
end.
This should do what you need
function ReverseEachTwoChars(AInput : string) : string;
var
i : integer;
begin
if(Odd(Length(AInput))) then
begin
SetLength(AInput, Length(AInput) + 1);
AInput[Length(AInput)] := 'A';
end;
SetLength(Result, Length(AInput));
i := 1;
while(i < Length(Result)) do
begin
Result[i] := AInput[i + 1];
Result[i + 1] := AInput[i];
Inc(i, 2);
end;
end;
Test:
ShowMessage(ReverseEachTwoChars('010203040506'));
Output:
102030405060

Delphi (10.2): fast Integer conversion to string with separator

Let's say we have this Integer 1234567890, we want it converted to a string with a separator = 1.234.567.890, we could do Format('%n',[1234567890.0]); but it's very slow. I wrote a function to speed it up considerably (more than 2x faster). How could I improve it further, or can you come up with a faster routine?
function MyConvertDecToStrWithDot(Const n: UInt64): string;
Var a,b,x: Integer;
z,step: Integer;
l: SmallInt;
begin
Result := IntToStr(n);
if n < 1000 then Exit;
l := Length(Result);
a := l div 3;
b := l mod 3;
step := b+1;
z := 4;
if b <> 0 then begin
Insert('.',Result,step);
Inc(z,step);
end;
for x := 1 to (a-1) do begin
Insert('.',Result,z);
Inc(z,4);
end;
end;
procedure TForm1.Button1Click(Sender: TObject);
Var a: Integer;
s: string;
begin
PerfTimerInit;
for a := 1 to 1000000 do
s := MyConvertDecToStrWithDot(1234567890);
Memo1.lines.Add(PerfTimerStopMS.ToString);
caption := s;
end;
32-bit
Format: ~230ms
My function: ~79ms
64-bit
Format: ~440ms
My function: ~103ms
In my tests, the following is ever so slightly faster:
function ThousandsSepStringOf(Num: UInt64): string;
const
MaxChar = 30; // Probably, 26 is enough: 19 digits + 7 separators
var
Count: Integer;
Rem: UInt64;
Res: array[0..MaxChar] of Char;
WritePtr: PChar;
begin
WritePtr := #Res[MaxChar];
WritePtr^ := #0;
Count := 0;
while Num > 0 do
begin
DivMod(Num, 10, Num, Rem);
Dec(WritePtr);
WritePtr^ := Char(Byte(Rem) + Ord('0'));
Inc(Count);
if Count = 3 then
begin
Dec(WritePtr);
WritePtr^ := '.';
Count := 0;
end;
end;
if WritePtr^ = '.' then
Inc(WritePtr);
Count := MaxChar - ((NativeInt(WritePtr) - NativeInt(#Res)) shr 1);
SetLength(Result, Count);
Move(WritePtr^, PByte(Result)^, Count * SizeOf(Char));
end;
Tested with:
procedure TestHisCode;
Var
a: Integer;
s: string;
SW: TStopwatch;
begin
Writeln('His code');
SW := TStopwatch.StartNew;
for a := 1 to KLoops do
s := MyConvertDecToStrWithDot(1234567890);
Writeln(SW.ElapsedMilliseconds);
Writeln(s);
Writeln;
end;
procedure TestMyCode;
Var
a: Integer;
s: string;
SW: TStopwatch;
begin
Writeln('My code');
SW := TStopwatch.StartNew;
for a := 1 to KLoops do
s := ThousandsSepStringOf(1234567890);
Writeln(SW.ElapsedMilliseconds);
Writeln(s);
Writeln;
end;
and:
TestHisCode;
TestMyCode;
TestMyCode;
TestHisCode;
TestMyCode;
TestHisCode;
TestHisCode;
TestMyCode;
Haven't properly tested the performance of this, however it should be cross-platform and locale independent:
function Thousands(const ASource: string): string;
var
I, LLast: Integer;
begin
Result := ASource;
LLast := Length(Result);
I := LLast;
while I > 0 do
begin
if (LLast - I + 1) mod 3 = 0 then
begin
Insert(FormatSettings.ThousandSeparator, Result, I);
Dec(I, 2);
end
else
Dec(I);
end;
end;
Note: It obviously just works on integers
It's better to insert the separators directly while constructing the string instead of inserting separators later into the converted string because each insertion involves a lot of data movements and performance degradation. Besides avoid the division by 3 may improve performance a bit
This is what I get from my rusty Pascal after decades not using it
uses strutils;
function FormatNumber(n: integer): string;
var digit: integer;
count: integer;
isNegative: boolean;
begin
isNegative := (n < 0);
if isNegative then n := -n;
Result := '';
count := 3;
while n <> 0 do begin
digit := n mod 10;
n := n div 10;
if count = 0 then begin
Result := Result + '.';
count := 3;
end;
Result := Result + chr(ord('0') + digit);
dec(count);
end;
if isNegative then Result := Result + '-';
Result := reversestring(Result);
end;
See it in action: http://ideone.com/6O3e8w
It's also faster to just assign the characters directly instead of using concatenation operator/function like Victoria suggested. This is the improved version with only unsigned types
type string28 = string[28];
function FormatNumber(n: UInt64): string28;
var digit: integer;
length: integer;
count: integer;
c: char;
begin
count := 3;
length := 0;
while n <> 0 do begin
digit := n mod 10;
n := n div 10;
if count = 0 then begin
inc(length);
Result[length] := '.';
count := 3;
end;
inc(length);
Result[length] := chr(ord('0') + digit);
dec(count);
end;
for count := 1 to (length + 1) div 2 do begin
c := Result[count];
Result[count] := Result[length - count + 1];
Result[length - count + 1] := c;
end;
setlength(Result, length);
FormatNumber := Result;
end;
If the operation is done millions of times and is really a bottleneck after profiling, it's better to do in multiple threads along with SIMD

How to remove repeated spaces from a string

I need to remove repeated spaces from a string.
The following code, grabbed from internet, works decently except that it duplicate the first char of the string.
Also maybe there is something faster that this.
function DeleteRepeatedSpaces(OldText: string): string;
var
i: integer;
s: string;
begin
if length(OldText) > 0 then
s := OldText[1]
else
s := '';
for i := 1 to length(OldText) do
begin
if OldText[i] = ' ' then
begin
if not (OldText[i - 1] = ' ') then
s := s + ' ';
end
else
begin
s := s + OldText[i];
end;
end;
DelDoubleSpaces := s;
end;
Function based on the simplest state machine (DFA). Minimum of memory reallocations.
State is number of continuous spaces.
J is count of deleted spaces.
function DeleteRepeatedSpaces(const s: string): string;
var
i, j, State: Integer;
begin
SetLength(Result, Length(s));
j := 0;
State := 0;
for i := 1 to Length(s) do begin
if s[i] = ' ' then
Inc(State)
else
State := 0;
if State < 2 then
Result[i - j] := s[i]
else
Inc(j);
end;
if j > 0 then
SetLength(Result, Length(s) - j);
end;
Iterate all members of the string, move the characters to the Result, but skip repeated spaces.
function DeleteRepeatedSpaces(const OldText: string): string;
var
i,j,hi: Integer;
begin
SetLength(Result,Length(OldText));
i := Low(OldText);
j := i;
hi := High(OldText);
while (i <= hi) do begin
Result[j] := OldText[i];
Inc(j);
if (OldText[i] = ' ') then begin
repeat //Skip additional spaces
Inc(i);
until (i > hi) or (OldText[i] <> ' ');
end
else
Inc(i);
end;
SetLength(Result,j-Low(Result)); // Set correct length
end;
The above code is rather fast (faster than any other contribution, so far).
Below is an even more optimized routine:
function DeleteRepeatedSpaces(const OldText: string): string;
var
pO,pR: PChar;
begin
SetLength(Result,Length(OldText));
pR := Pointer(Result);
pO := Pointer(OldText);
while (pO^ <> '') do begin
pR^ := pO^;
Inc(pR);
if (pO^ <> ' ') then begin
Inc(pO);
Continue;
end;
repeat // Skip additional spaces
Inc(pO);
until (pO^ = '') or (pO^ <> ' ');
end;
SetLength(Result,pR-Pointer(Result));
end;
The following isn't wildly efficient, but possibly more so that processiing the string character by character because it doesn't require a new string allocation for each character in the output:
function RemoveDupSpaces(const Input : String) : String;
var
P : Integer;
begin
Result := Input;
repeat
P := Pos(' ', Result); // that's two spaces
if P > 0 then
Delete(Result, P + 1, 1);
until P = 0;
end;
You can use something like this:
function DeleteRepeatedSpaces(const s: string):string;
var
i:integer;
begin
Result := '';
for i := 1 to Length(S) do begin
if not ((s[i]=' ') and (s[i-1]=' ')) then begin
Result := Result + s[i];
end;
end;
end;
Delete two o more spaces contiguous in a string.
This string (without spaces):
The string have groups of spaces inside
return this:
The string have groups of spaces inside
This string (with spaces groups inside):
The string have groups of spaces inside
Return this:
The string have groups of spaces inside

Best way to replace every third character in a string in delphi

What's the most efficient way to replace every third character of the same type in a string?
I have a string like this:
str := 'c1'#9'c2'#9'c3'#9'c4'#9'c5'#9'c6'#9'
I want to replace every third #9 by #13#10, so that i get:
str1 := 'c1'#9'c2'#9'c3'#13#10'c4'#9'c5'#9'c6'#13#10'
I would do this in this way:
i:=0;
newStr:='';
lastPos := Pos(str,#9);
while lastPos > 0 do begin
if i mod 3 = 2 then begin
newStr := newStr + Copy(str,1,lastPos-1) + #13#10;
end else begin
newStr := newStr + Copy(str,1,lastPos);
end;
str := Copy(str,lastPos+1,MaxInt);
i := i + 1;
lastPos := Pos(str,#9);
end;
newStr := Copy(str,1,MaxInt);
But thats a lot of copying. Is there a string manipulation function to do this?
I think the problem as stated doesn't match the code you provided. Is every third character a #9? If so, do you want to change every third appearance of #9 for #13#10?
If so, I would do it this way:
function test(str: string): string;
var
i, c, l: integer;
begin
l := Length(str);
SetLength(Result, l + l div 9);
c := 1;
for i := 1 to l do
begin
if (i mod 9 = 0) and (i > 0) then
begin
Result[c] := #13;
Inc(c);
Result[c] := #10;
end
else
Result[c] := str[i];
Inc(c);
end;
end;
I actually have no idea if this function performs well. But given that the constraints aren't clear, I guess so.
If the position of the #9 character is unknown then this solution won't work at all.
Edit: as David points out, this is not nearly equivalent to the original code posted. This seems to work, but it requires two passes on the original string. The thing is, to know if its more efficient or not we need to know more about the input and context.
function OccurrencesOfChar(const S: string; const C: char): integer;
var
i: integer;
begin
result := 0;
for i := 1 to Length(S) do
if S[i] = C then
inc(result);
end;
function Test(str: string): string;
var
len, n, C, i: integer;
begin
C := 1;
len := Length(str);
n := OccurrencesOfChar(str, #9);
SetLength(result, len + n div 3);
n := 1;
for i := 1 to len do
begin
if str[i] = #9 then
begin
if n mod 3 = 0 then
begin
result[C] := #13;
inc(C);
result[C] := #10;
end
else
result[C] := #9;
Inc(n);
end
else
result[C] := str[i];
inc(C);
end;
end;
I expect this question will be closed, but just for fun, that would be my proposal.
Function Replace(const Instr:String;Re:Char;const ReWith:String):String;
var
i,o,cnt,l:Integer;
begin
cnt:=0;
o:=0;
SetLength(Result,Length(Instr)*Length(ReWith));// just for security
for I := 1 to Length(Instr) do
begin
if Instr[i]=Re then inc(cnt);
if cnt=3 then
begin
for l := 1 to Length(ReWith) do
begin
inc(o);
Result[o] := ReWith[l];
end;
cnt := 0;
end
else
begin
inc(o);
Result[o] := Instr[i];
end;
end;
SetLength(Result,o);
end;
procedure TForm3.Button1Click(Sender: TObject);
begin
Edit2.Text := Replace(Edit1.Text,'A','xxx')
end;
I would probably do something like this (coded in the browser). It only needs one string resize and should have less movement of data around. I exit when I have made the last replacement or if it didn't need any:
procedure ReplaceXChar(var aStringToReplace: string; const aIteration:
Integer; const aChar: Char; const aReplacement: string);
var
replaceCount: Integer;
cntr: Integer;
outputCntr: Integer;
lengthToReplace: Integer;
begin
// Find the number to replace
replaceCount := 0;
for cntr := 1 to Length(aStringToReplace) do
begin
if aStringToReplace[cntr] = aChar then
Inc(replaceCount);
end;
if replaceCount >= aIteration then
begin
// Now replace them
lengthToReplace := Length(aReplacement);
cntr := Length(aStringToReplace);
SetLength(aStringToReplace, cntr +
(replaceCount div aIteration) * (lengthToReplace - 1));
outputCntr := Length(aStringToReplace);
repeat
if aStringToReplace[cntr] = aChar then
begin
if (replaceCount mod aIteration) = 0 then
begin
Dec(outputCntr, lengthToReplace);
Move(aReplacement[1], aStringToReplace[outputCntr+1],
lengthToReplace * SizeOf(Char));
end
else
begin
aStringToReplace[outputCntr] := aStringToReplace[cntr];
Dec(outputCntr);
end;
Dec(replaceCount);
end
else
begin
aStringToReplace[outputCntr] := aStringToReplace[cntr];
Dec(outputCntr);
end;
Dec(cntr);
until replaceCount = 0;
end;
end;
Usage would be like this:
var
myString: String;
begin
myString := 'c1'#9'c2'#9'c3'#9'c4'#9'c5'#9'c6'#9;
ReplaceXChar(myString, 3, #9, #13#10);
ShowMessage(myString);
end;

How to split a string in Inno Setup

How can I split a string in Inno Setup?
Is there any special function in Inno Setup to split the string?
I want to get the following from the string '11.2.0.16':
tokens: array of string = ('11', '0', '2', '16');
Thanks in advance!
For anyone who prefers the function format, I have modified #cezarlamann's answer:
function StrSplit(Text: String; Separator: String): TArrayOfString;
var
i, p: Integer;
Dest: TArrayOfString;
begin
i := 0;
repeat
SetArrayLength(Dest, i+1);
p := Pos(Separator,Text);
if p > 0 then begin
Dest[i] := Copy(Text, 1, p-1);
Text := Copy(Text, p + Length(Separator), Length(Text));
i := i + 1;
end else begin
Dest[i] := Text;
Text := '';
end;
until Length(Text)=0;
Result := Dest
end;
I've been looking for the same thing today...
This one works just fine on Inno Setup Scripts. Paste this excerpt inside your script before the procedure/function which will call this "split" procedure.
You can also modify this onto a function, if you desire...
procedure Explode(var Dest: TArrayOfString; Text: String; Separator: String);
var
i, p: Integer;
begin
i := 0;
repeat
SetArrayLength(Dest, i+1);
p := Pos(Separator,Text);
if p > 0 then begin
Dest[i] := Copy(Text, 1, p-1);
Text := Copy(Text, p + Length(Separator), Length(Text));
i := i + 1;
end else begin
Dest[i] := Text;
Text := '';
end;
until Length(Text)=0;
end;
procedure Whatever();
var
str: String;
strArray: TArrayOfString;
i: Integer;
begin
Explode(strArray,str,'.');
for i:=0 to GetArrayLength(strArray)-1 do begin
{ do something }
end;
end;
Taken from here
Here's what I use:
procedure SplitString(S, Delim: string; var Dest: TArrayOfString);
var
Temp: string;
I, P: Integer;
begin
Temp := S;
I := StringChangeEx(Temp, Delim, '', true);
SetArrayLength(Dest, I + 1);
for I := 0 to GetArrayLength(Dest) - 1 do
begin
P := Pos(Delim, S);
if P > 0 then
begin
Dest[I] := Copy(S, 1, P - 1);
Delete(S, 1, P + Length(Delim) - 1);
end
else
Dest[I] := S;
end;
end;
This version avoids repeated array resizing by counting the delimiters using StringChangeEx and setting the array size only once. Since we then know the array size, we can just use a for loop. I also opted for Delete rather than Copy, which (IMO) makes the code easier to read. (This version also fixes the bug where the split does not occur correctly if the delimiter is longer than 1 character.)
If there is a possibility that the delimiter could also be right at the end of the string, then this is what I used (modified from #timyha's answer)
function StrSplit(Text: String; Separator: String): TArrayOfString;
var
i, p: Integer;
Dest: TArrayOfString;
begin
i := 0;
repeat
SetArrayLength(Dest, i+1);
p := Pos(Separator,Text);
if p > 0 then begin
Dest[i] := Copy(Text, 1, p-1);
Text := Copy(Text, p + Length(Separator), Length(Text));
i := i + 1;
//add an empty string if delim was at the end
if Text = '' then begin
Dest[i]:='';
i := i + 1;
end;
end else begin
Dest[i] := Text;
Text := '';
end;
until Length(Text)=0;
Result := Dest
end;

Resources