Swift string via string literal vs initializer - string

In other languages such as Java, under the hood there is actually a difference between string obtained via string literal vs initializer. In Swift, are they equivalent under the hood?
e.g.
var string:String = ""
var string:String = String()
Refer to this SO post for info on differences between literal and object in Java.

The declarations are equivalent according to the Apple docs:
Initializing an Empty String
To create an empty String value as the starting point for building a longer string, either assign an empty string literal to a variable, or initialize a new String instance with initializer syntax:
var emptyString = "" // empty string literal
var anotherEmptyString = String() // initializer syntax
// these two strings are both empty, and are equivalent to each other
Reference: https://developer.apple.com/library/prerelease/ios/documentation/Swift/Conceptual/Swift_Programming_Language/StringsAndCharacters.html
If we look at the assembly, we will see that the two constructors use identical instructions.
string.swift:
let str = String()
let str2 = ""
Compiled assembly (swiftc -emit-assembly string.swift):
.section __TEXT,__text,regular,pure_instructions
.macosx_version_min 14, 3
.globl _main
.align 4, 0x90
_main:
.cfi_startproc
pushq %rbp
Ltmp0:
.cfi_def_cfa_offset 16
Ltmp1:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Ltmp2:
.cfi_def_cfa_register %rbp
subq $16, %rsp
movq _globalinit_33_1BDF70FFC18749BAB495A73B459ED2F0_token4#GOTPCREL(%rip), %rax
movq _globalinit_33_1BDF70FFC18749BAB495A73B459ED2F0_func4#GOTPCREL(%rip), %rcx
xorl %edx, %edx
movl %edi, -4(%rbp)
movq %rax, %rdi
movq %rsi, -16(%rbp)
movq %rcx, %rsi
callq _swift_once
movq _globalinit_33_1BDF70FFC18749BAB495A73B459ED2F0_token5#GOTPCREL(%rip), %rdi
movq _globalinit_33_1BDF70FFC18749BAB495A73B459ED2F0_func5#GOTPCREL(%rip), %rax
xorl %r8d, %r8d
movl %r8d, %edx
movq __TZvOSs7Process5_argcVSs5Int32#GOTPCREL(%rip), %rcx
movl -4(%rbp), %r8d
movl %r8d, (%rcx)
movq %rax, %rsi
callq _swift_once
movq __TZvOSs7Process11_unsafeArgvGVSs20UnsafeMutablePointerGS0_VSs4Int8__#GOTPCREL(%rip), %rax
movq -16(%rbp), %rcx
movq %rcx, (%rax)
callq __TFSSCfMSSFT_SS
leaq L___unnamed_1(%rip), %rdi
xorl %r8d, %r8d
movl %r8d, %esi
movl $1, %r8d
movq %rax, __Tv6string3strSS(%rip)
movq %rdx, __Tv6string3strSS+8(%rip)
movq %rcx, __Tv6string3strSS+16(%rip)
movl %r8d, %edx
callq __TFSSCfMSSFT21_builtinStringLiteralBp8byteSizeBw7isASCIIBi1__SS
xorl %r8d, %r8d
movq %rax, __Tv6string4str2SS(%rip)
movq %rdx, __Tv6string4str2SS+8(%rip)
movq %rcx, __Tv6string4str2SS+16(%rip)
movl %r8d, %eax
addq $16, %rsp
popq %rbp
retq
.cfi_endproc
.globl __Tv6string3strSS
.zerofill __DATA,__common,__Tv6string3strSS,24,3
.globl __Tv6string4str2SS
.zerofill __DATA,__common,__Tv6string4str2SS,24,3
.section __TEXT,__cstring,cstring_literals
L___unnamed_1:
.space 1
.no_dead_strip __Tv6string3strSS
.no_dead_strip __Tv6string4str2SS
.linker_option "-lswiftCore"
.section __DATA,__objc_imageinfo,regular,no_dead_strip
L_OBJC_IMAGE_INFO:
.long 0
.long 512
.subsections_via_symbols
Notice that the declarations for str and str2 have identical instructions:
xorl %r8d, %r8d
movl %r8d, %esi
movl $1, %r8d
movq %rax, __Tv6string3strSS(%rip)
movq %rdx, __Tv6string3strSS+8(%rip)
movq %rcx, __Tv6string3strSS+16(%rip)
movl %r8d, %edx
# ...
xorl %r8d, %r8d
movq %rax, __Tv6string4str2SS(%rip)
movq %rdx, __Tv6string4str2SS+8(%rip)
movq %rcx, __Tv6string4str2SS+16(%rip)
movl %r8d, %eax
You can learn more about String literals by reviewing the Apple's documentation.

Related

Why is calculating based on a large number read by clap::Parser slower than when that number is hardcoded?

I have a really simple program that take an integer argument from the command line and perform the following task:
use clap::Parser ;
#[derive(Parser,Default)]
struct Args {
#[arg(short)]
number: u128
}
fn sum(n: u128) -> u128 {
let mut result: u128 = 0;
for i in 1..n { result += n/i; }
result
}
fn main() {
let args = Args::parse() ;
println!("{}", sum(args.number)) ;
}
Fairly simple, right? Well, when executing it for "large" numbers, let's say 999999999 for example, it takes almost ten seconds on my machine to get the result.
$ time ./target/release/main -n 999999999
20877697533
real 0m9.442s
user 0m9.370s
sys 0m0.030s
However, if I suppress any use of clap and hard-code the value
fn sum(n: u128) -> u128 {
let mut result: u128 = 0;
for i in 1..n { result += n/i; }
result
}
fn main() {
let n: u128 = 999999999 ;
println!("{}", sum(n)) ;
}
the execution time drops to around two seconds
$ time ./target/release/main
20877697533
real 0m2.398s
user 0m2.236s
sys 0m0.013s
So, what makes the clap version so slow?
999999999 fits into a 32-bit integer. With the hard-coded value, the compiler notices this and demotes all operations to 32-bit operations, which are far faster to execute than the 128-bit routines, which have no native compiler support.
Compare the assembly for the non-hardcoded loop (but with clap removed, as in Schwern's version):
xorl %esi, %esi
cmpq $2, %r15
movq %rdx, %rax
sbbq $0, %rax
movl $0, %r13d
jb .LBB5_9
movq %r15, %rsi
addq $-1, %rsi
movq %rdx, %rdi
adcq $-1, %rdi
movq %r15, %rax
addq $-2, %rax
movq %rdx, %rcx
adcq $-1, %rcx
movq %rsi, 32(%rsp)
andl $3, %esi
movq %rsi, 40(%rsp)
cmpq $3, %rax
sbbq $0, %rcx
movq %rdx, 8(%rsp)
jae .LBB5_4
xorl %ebp, %ebp
movl $1, %edx
xorl %r13d, %r13d
xorl %ecx, %ecx
jmp .LBB5_6
.LBB5_4:
andq $-4, 32(%rsp)
movl $1, %edx
xorl %ebp, %ebp
xorl %r13d, %r13d
xorl %ecx, %ecx
xorl %esi, %esi
xorl %ebx, %ebx
movq %rdi, 64(%rsp)
.LBB5_5:
movq %rbx, 88(%rsp)
movq %rsi, 96(%rsp)
movq %rcx, 16(%rsp)
movq %rdx, (%rsp)
addq $1, %rdx
movq %rdx, 24(%rsp)
movq %rcx, %rbx
adcq $0, %rbx
movq %r15, %rdi
movq 8(%rsp), %rsi
movq (%rsp), %rdx
movq 16(%rsp), %rcx
movq __udivti3#GOTPCREL(%rip), %r14
callq *%r14
movq %r14, %r8
movq %rax, %r14
movq %rdx, %r12
addq %rbp, %r14
adcq %r13, %r12
movq (%rsp), %rax
addq $2, %rax
movq %rax, 80(%rsp)
movq 16(%rsp), %rax
adcq $0, %rax
movq %rax, 72(%rsp)
movq %r15, %rdi
movq 8(%rsp), %rsi
movq 24(%rsp), %rdx
movq %rbx, %rcx
movq %r8, %rbx
callq *%r8
movq %rbx, %r8
movq %rax, %rbx
movq %r15, %r13
movq %rdx, %rbp
addq %r14, %rbx
adcq %r12, %rbp
movq (%rsp), %rax
addq $3, %rax
movq %rax, 24(%rsp)
movq 16(%rsp), %r15
adcq $0, %r15
movq %r13, %rdi
movq 8(%rsp), %rsi
movq 80(%rsp), %rdx
movq 72(%rsp), %rcx
movq %r8, %r14
callq *%r8
movq %r14, %r8
movq %rax, %r12
movq %rdx, %r14
addq %rbx, %r12
adcq %rbp, %r14
addq $4, (%rsp)
adcq $0, 16(%rsp)
movq %r13, %rdi
movq 8(%rsp), %rsi
movq 24(%rsp), %rdx
movq %r15, %rcx
movq %r13, %r15
callq *%r8
movq 88(%rsp), %rbx
movq 96(%rsp), %rsi
movq 64(%rsp), %rdi
movq %rax, %rbp
movq %rdx, %r13
movq (%rsp), %rdx
addq %r12, %rbp
adcq %r14, %r13
addq $4, %rsi
adcq $0, %rbx
movq %rsi, %rax
xorq 32(%rsp), %rax
movq %rbx, %rcx
xorq %rdi, %rcx
orq %rax, %rcx
movq 16(%rsp), %rcx
jne .LBB5_5
.LBB5_6:
cmpq $0, 40(%rsp)
movq %rbp, %rsi
je .LBB5_9
xorl %r12d, %r12d
xorl %ebp, %ebp
movq %rdx, %rbx
movq %rcx, %r14
.LBB5_8:
movq %rsi, (%rsp)
addq $1, %rbx
adcq $0, %r14
movq %r15, %rdi
movq 8(%rsp), %rsi
callq *__udivti3#GOTPCREL(%rip)
movq (%rsp), %rsi
addq %rax, %rsi
adcq %rdx, %r13
addq $1, %r12
adcq $0, %rbp
movq %r12, %rax
xorq 40(%rsp), %rax
orq %rbp, %rax
movq %rbx, %rdx
movq %r14, %rcx
jne .LBB5_8
to the hardcoded loop:
.LBB5_1:
movl $999999999, %eax
xorl %edx, %edx
divl %r8d
movl %eax, %r9d
addq %rcx, %r9
adcq $0, %rdi
addq $2, %r10
adcq $0, %r11
leal 1(%r8), %ecx
movl $999999999, %eax
xorl %edx, %edx
divl %ecx
movl %eax, %ecx
addq %r9, %rcx
adcq $0, %rdi
cmpq $999999997, %r8
sbbq $0, %rsi
movq %r10, %r8
movq %r11, %rsi
jb .LBB5_1
subq $88, %rsp
movq %rcx, 24(%rsp)
movq %rdi, 32(%rsp)
leaq 24(%rsp), %rax
movq %rax, 8(%rsp)
movq core::fmt::num::<impl core::fmt::Display for u128>::fmt#GOTPCREL(%rip), %rax
movq %rax, 16(%rsp)
leaq .L__unnamed_2(%rip), %rax
movq %rax, 56(%rsp)
movq $2, 64(%rsp)
movq $0, 40(%rsp)
leaq 8(%rsp), %rax
movq %rax, 72(%rsp)
movq $1, 80(%rsp)
leaq 40(%rsp), %rdi
callq *std::io::stdio::_print#GOTPCREL(%rip)
addq $88, %rsp
retq
Note in particular the total lack of calls to __udivti3 in this version.
fn main() {
let n: u128 = 999999999 ;
println!("{}", sum(n)) ;
}
With this version, n is known at compile time and the compiler can optimize sum for it.
We can test this by removing clap and doing our own command line parsing. This is just as slow as the clap version.
fn main() {
let args: Vec<String> = env::args().collect();
println!("{}", sum(u128::from_str(&args[2]).unwrap()));
}

Impossible to debug assembler code due to segmentation faullt

Here below I have a peace of code for two programs that cannot be debugged. Written in assembly language. Main point is that I need to debug it using DDD debugger. However, it displays "Segmentation Fault" in each case. I also tried to debug it in a simple way and with online debugger, but returns error meassage. Can someone help me with that?
Code A)
#Program asmddd.s for debugging with ddd
.global _start
.text
_start: movq $0x01289AB76EF34567, %rax
movb $-1, %al
movw $-1, %ax
movl $-1, %eax
movq $-1, %rax
movq $0x01289AB76EF34567, %rbx
movb $0xBB, %dl
movb %dl, %bl
movsbq %dl, %rbx
movzbq %dl, %rbx
movq $65535, %rbx
addq %rbx, %rax
subq %rax, %rbx
negq %rbx
subq %rbx, %rax
movq $60000, %rsi
movq $65535, %rdx
movq %rsi, %rax
imulq %rdx
cqto
idivq %rsi
testq %rsi,%rdi
jnz down1
up1: cmpq %rsi,%rdi
setl %al
movzbq %al, %rax
jmp down2
down1: pushq %rax
pushq %rbx
popq %rax
popq %rbx
movq %rdi, %rax
subq %rsi, %rax
movq %rsi, %rdx
subq %rdi, %rdx
cmpq %rsi, %rdi
cmovle %rdx, %rax
cmp %rdx, %rax
jz up1
down2: xorq %rax, %rbx
xorq %rbx, %rax
xorq %rax, %rbx
andq %rsi, %rax
orq %rsi, %rax
xorq %rax, %rax
notq %rax
negq %rax
incq %rax
decq %rax
addq $99,%rax
leaq (%rax, %rax, 8), %rax
leaq (%rax, %rax,4), %rax
salq $3, %rax
negq %rax
sarq %rax
ret
Code B)
.global _start
.data
n: .quad 0x0000000000000008
nfact: .quad 0x0000000000000000
.text
_start:
movq n, %rbx
movq %rbx,%rax
call ffact
movq $nfact, %rdi
movq %rax, (%rdi)
movq $60, %rax # system call 60 for exit
xorq %rdi, %rdi # return code 0 syscall
syscall
ffact: cmpq $1, %rbx
je L1
decq %rbx
pushq %rbx
call ffact
popq %rbx
mulq %rbx
L1: ret

How do canary words allow gcc to detect buffer overflows?

I could test using strncpy() with larger source string then the destination:
int main() {
char *ptr = malloc(12);
strcpy(ptr,"hello world!");
return 0;
}
Compiling with the flag -fstack-protector and using the -S option I got:
.file "malloc.c"
.text
.globl main
.type main, #function
main:
.LFB2:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
subq $32, %rsp
movl %edi, -20(%rbp)
movq %rsi, -32(%rbp)
movq %fs:40, %rax
movq %rax, -8(%rbp)
xorl %eax, %eax
movq $0, -16(%rbp)
movl $12, %edi
call malloc
movq %rax, -16(%rbp)
movq -16(%rbp), %rax
movabsq $8022916924116329800, %rdx
movq %rdx, (%rax)
movl $560229490, 8(%rax)
movb $0, 12(%rax)
movl $0, %eax
movq -8(%rbp), %rcx
xorq %fs:40, %rcx
je .L3
call __stack_chk_fail
.L3:
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE2:
.size main, .-main
Could someone explain to me how this works? And why isn't the "canary word" also overwritten by the \0 of the hello world! string?
Could someone explain to me how does this work ?
Canary word is read from fs:40 and store at top of frame here:
movq %fs:40, %rax
movq %rax, -8(%rbp)
It's below the return address so if your code happens to overflow the buffer (which will be below -8(%rbp)), it'll first overwrite the -8(%rbp) location. This will be detected by GCC prior to issuing ret here:
movq -8(%rbp), %rcx
xorq %fs:40, %rcx ; Checks that %fs:40 == -8(%rbp)
je .L3 ; Ok, return
call __stack_chk_fail ; Die
as overwritten contents of -8(%rbp) will likely to be different from proper value (installed from fs:40).
And why is not the canary word also overwritten by the \0 of the hello world!?
Your code has heap overflow, not buffer overflow so SSP can't help...

how do I call a external function in assembler?

I have tried to use an external function in assembly code:
.section .rodata
.LC0:
.string "My number is: %lld"
.text
.globl start
start:
pushq %rbp
movq %rsp, %rbp
subq $16, %rsp
movq $12345, -8(%rbp)
movq -8(%rbp), %rax
movq %rax, %rsi
movl $.LC0, %edi
movl $0, %eax
call printf # my external function
# exit-syscall
mov $1, %eax
mov $0, %ebx
int $0x80
I assembled and linked with:
as -o myObjfile.o mySourcefile.s
ld -e start -o myProgram -lc myObjfile.o
The executable is build, but it doesn't run, so what's wrong with it?
If I understand correctly if the function you want to call is in the C library you can just declare it as an external function.
.section .rodata
.LC0:
.string "My number is: %lld"
.text
.extern printf #declaring the external function
.globl start
start:
pushq %rbp
movq %rsp, %rbp
subq $16, %rsp
movq $12345, -8(%rbp)
movq -8(%rbp), %rax
movq %rax, %rsi
movl $.LC0, %edi
movl $0, %eax
call printf # my external function
# exit-syscall
mov $1, %eax
mov $0, %ebx
int $0x80
If this doesn't work try linking it explicitly with stdlib.h

Assembly - Why strtol clobbers %rcx register?

Context :
Linux 64.
GCC 4.8.2 (with -O3 -march=native)
The x86_64 abi under my left hand, opened at page 21.
The C code :
int main (int argc, char ** argv) {
printf("%d %s\n", atoi(argv[2]),argv[1] );
}
The assembly code :
(notice that the compiler replaced atoi with strtol by itself)
...
movl $10, %edx
movq 16(%rsi), %rdi
movq 8(%rsi), %rbx
xorl %esi, %esi
call strtol
movl $.LC0, %edi
movq %rbx, %rdx
movl %eax, %esi
xorl %eax, %eax
call printf
xorl %eax, %eax
popq %rbx
...
The question :
%rcx should be reserved for the 4th input integer argument.
strtol has 3 input args (respectively registers %rdi, %rsi, %rdx) and one return, %eax.
Why then is %rcx clobbered ?
This code won't make it :
...
movl $10, %edx
movq 16(%rsi), %rdi
movq 8(%rsi), %rcx <-- look I replaced with %ecx
xorl %esi, %esi
call strtol
movl $.LC0, %edi
movq %rcx, %rdx <-- look I replaced with %ecx
movl %eax, %esi
xorl %eax, %eax
call printf
xorl %eax, %eax
popq %rbx
...
Thanks
In each calling convention I know there are some registers that may be modified by the called function and some which must not be modified.
In 32-bit programs ecx may be modified while ebx must not be modified - or, to be more exact - must be re-stored before returning. For 64-bit programs this rule seems to be the same.
Indeed most functions modify most registers; for this reason there is a "popq %rbx" at the end of the code you posted because rbx must not be modified by the function. rcx may be modified and strtol obviously does that!

Resources