Here below I have a peace of code for two programs that cannot be debugged. Written in assembly language. Main point is that I need to debug it using DDD debugger. However, it displays "Segmentation Fault" in each case. I also tried to debug it in a simple way and with online debugger, but returns error meassage. Can someone help me with that?
Code A)
#Program asmddd.s for debugging with ddd
.global _start
.text
_start: movq $0x01289AB76EF34567, %rax
movb $-1, %al
movw $-1, %ax
movl $-1, %eax
movq $-1, %rax
movq $0x01289AB76EF34567, %rbx
movb $0xBB, %dl
movb %dl, %bl
movsbq %dl, %rbx
movzbq %dl, %rbx
movq $65535, %rbx
addq %rbx, %rax
subq %rax, %rbx
negq %rbx
subq %rbx, %rax
movq $60000, %rsi
movq $65535, %rdx
movq %rsi, %rax
imulq %rdx
cqto
idivq %rsi
testq %rsi,%rdi
jnz down1
up1: cmpq %rsi,%rdi
setl %al
movzbq %al, %rax
jmp down2
down1: pushq %rax
pushq %rbx
popq %rax
popq %rbx
movq %rdi, %rax
subq %rsi, %rax
movq %rsi, %rdx
subq %rdi, %rdx
cmpq %rsi, %rdi
cmovle %rdx, %rax
cmp %rdx, %rax
jz up1
down2: xorq %rax, %rbx
xorq %rbx, %rax
xorq %rax, %rbx
andq %rsi, %rax
orq %rsi, %rax
xorq %rax, %rax
notq %rax
negq %rax
incq %rax
decq %rax
addq $99,%rax
leaq (%rax, %rax, 8), %rax
leaq (%rax, %rax,4), %rax
salq $3, %rax
negq %rax
sarq %rax
ret
Code B)
.global _start
.data
n: .quad 0x0000000000000008
nfact: .quad 0x0000000000000000
.text
_start:
movq n, %rbx
movq %rbx,%rax
call ffact
movq $nfact, %rdi
movq %rax, (%rdi)
movq $60, %rax # system call 60 for exit
xorq %rdi, %rdi # return code 0 syscall
syscall
ffact: cmpq $1, %rbx
je L1
decq %rbx
pushq %rbx
call ffact
popq %rbx
mulq %rbx
L1: ret
Related
I have a really simple program that take an integer argument from the command line and perform the following task:
use clap::Parser ;
#[derive(Parser,Default)]
struct Args {
#[arg(short)]
number: u128
}
fn sum(n: u128) -> u128 {
let mut result: u128 = 0;
for i in 1..n { result += n/i; }
result
}
fn main() {
let args = Args::parse() ;
println!("{}", sum(args.number)) ;
}
Fairly simple, right? Well, when executing it for "large" numbers, let's say 999999999 for example, it takes almost ten seconds on my machine to get the result.
$ time ./target/release/main -n 999999999
20877697533
real 0m9.442s
user 0m9.370s
sys 0m0.030s
However, if I suppress any use of clap and hard-code the value
fn sum(n: u128) -> u128 {
let mut result: u128 = 0;
for i in 1..n { result += n/i; }
result
}
fn main() {
let n: u128 = 999999999 ;
println!("{}", sum(n)) ;
}
the execution time drops to around two seconds
$ time ./target/release/main
20877697533
real 0m2.398s
user 0m2.236s
sys 0m0.013s
So, what makes the clap version so slow?
999999999 fits into a 32-bit integer. With the hard-coded value, the compiler notices this and demotes all operations to 32-bit operations, which are far faster to execute than the 128-bit routines, which have no native compiler support.
Compare the assembly for the non-hardcoded loop (but with clap removed, as in Schwern's version):
xorl %esi, %esi
cmpq $2, %r15
movq %rdx, %rax
sbbq $0, %rax
movl $0, %r13d
jb .LBB5_9
movq %r15, %rsi
addq $-1, %rsi
movq %rdx, %rdi
adcq $-1, %rdi
movq %r15, %rax
addq $-2, %rax
movq %rdx, %rcx
adcq $-1, %rcx
movq %rsi, 32(%rsp)
andl $3, %esi
movq %rsi, 40(%rsp)
cmpq $3, %rax
sbbq $0, %rcx
movq %rdx, 8(%rsp)
jae .LBB5_4
xorl %ebp, %ebp
movl $1, %edx
xorl %r13d, %r13d
xorl %ecx, %ecx
jmp .LBB5_6
.LBB5_4:
andq $-4, 32(%rsp)
movl $1, %edx
xorl %ebp, %ebp
xorl %r13d, %r13d
xorl %ecx, %ecx
xorl %esi, %esi
xorl %ebx, %ebx
movq %rdi, 64(%rsp)
.LBB5_5:
movq %rbx, 88(%rsp)
movq %rsi, 96(%rsp)
movq %rcx, 16(%rsp)
movq %rdx, (%rsp)
addq $1, %rdx
movq %rdx, 24(%rsp)
movq %rcx, %rbx
adcq $0, %rbx
movq %r15, %rdi
movq 8(%rsp), %rsi
movq (%rsp), %rdx
movq 16(%rsp), %rcx
movq __udivti3#GOTPCREL(%rip), %r14
callq *%r14
movq %r14, %r8
movq %rax, %r14
movq %rdx, %r12
addq %rbp, %r14
adcq %r13, %r12
movq (%rsp), %rax
addq $2, %rax
movq %rax, 80(%rsp)
movq 16(%rsp), %rax
adcq $0, %rax
movq %rax, 72(%rsp)
movq %r15, %rdi
movq 8(%rsp), %rsi
movq 24(%rsp), %rdx
movq %rbx, %rcx
movq %r8, %rbx
callq *%r8
movq %rbx, %r8
movq %rax, %rbx
movq %r15, %r13
movq %rdx, %rbp
addq %r14, %rbx
adcq %r12, %rbp
movq (%rsp), %rax
addq $3, %rax
movq %rax, 24(%rsp)
movq 16(%rsp), %r15
adcq $0, %r15
movq %r13, %rdi
movq 8(%rsp), %rsi
movq 80(%rsp), %rdx
movq 72(%rsp), %rcx
movq %r8, %r14
callq *%r8
movq %r14, %r8
movq %rax, %r12
movq %rdx, %r14
addq %rbx, %r12
adcq %rbp, %r14
addq $4, (%rsp)
adcq $0, 16(%rsp)
movq %r13, %rdi
movq 8(%rsp), %rsi
movq 24(%rsp), %rdx
movq %r15, %rcx
movq %r13, %r15
callq *%r8
movq 88(%rsp), %rbx
movq 96(%rsp), %rsi
movq 64(%rsp), %rdi
movq %rax, %rbp
movq %rdx, %r13
movq (%rsp), %rdx
addq %r12, %rbp
adcq %r14, %r13
addq $4, %rsi
adcq $0, %rbx
movq %rsi, %rax
xorq 32(%rsp), %rax
movq %rbx, %rcx
xorq %rdi, %rcx
orq %rax, %rcx
movq 16(%rsp), %rcx
jne .LBB5_5
.LBB5_6:
cmpq $0, 40(%rsp)
movq %rbp, %rsi
je .LBB5_9
xorl %r12d, %r12d
xorl %ebp, %ebp
movq %rdx, %rbx
movq %rcx, %r14
.LBB5_8:
movq %rsi, (%rsp)
addq $1, %rbx
adcq $0, %r14
movq %r15, %rdi
movq 8(%rsp), %rsi
callq *__udivti3#GOTPCREL(%rip)
movq (%rsp), %rsi
addq %rax, %rsi
adcq %rdx, %r13
addq $1, %r12
adcq $0, %rbp
movq %r12, %rax
xorq 40(%rsp), %rax
orq %rbp, %rax
movq %rbx, %rdx
movq %r14, %rcx
jne .LBB5_8
to the hardcoded loop:
.LBB5_1:
movl $999999999, %eax
xorl %edx, %edx
divl %r8d
movl %eax, %r9d
addq %rcx, %r9
adcq $0, %rdi
addq $2, %r10
adcq $0, %r11
leal 1(%r8), %ecx
movl $999999999, %eax
xorl %edx, %edx
divl %ecx
movl %eax, %ecx
addq %r9, %rcx
adcq $0, %rdi
cmpq $999999997, %r8
sbbq $0, %rsi
movq %r10, %r8
movq %r11, %rsi
jb .LBB5_1
subq $88, %rsp
movq %rcx, 24(%rsp)
movq %rdi, 32(%rsp)
leaq 24(%rsp), %rax
movq %rax, 8(%rsp)
movq core::fmt::num::<impl core::fmt::Display for u128>::fmt#GOTPCREL(%rip), %rax
movq %rax, 16(%rsp)
leaq .L__unnamed_2(%rip), %rax
movq %rax, 56(%rsp)
movq $2, 64(%rsp)
movq $0, 40(%rsp)
leaq 8(%rsp), %rax
movq %rax, 72(%rsp)
movq $1, 80(%rsp)
leaq 40(%rsp), %rdi
callq *std::io::stdio::_print#GOTPCREL(%rip)
addq $88, %rsp
retq
Note in particular the total lack of calls to __udivti3 in this version.
fn main() {
let n: u128 = 999999999 ;
println!("{}", sum(n)) ;
}
With this version, n is known at compile time and the compiler can optimize sum for it.
We can test this by removing clap and doing our own command line parsing. This is just as slow as the clap version.
fn main() {
let args: Vec<String> = env::args().collect();
println!("{}", sum(u128::from_str(&args[2]).unwrap()));
}
.globl start
.section .text
_start:
movq $2, %rbx
movq $3, %rcx
movq $1, %rax
mainloop:
addq $0, %rcx
jz complete
mulq %rbx
decq %rcx
jmp mainloop
complete:
movq %rax, %rdi
movq $60, %rax
syscall
I have been trying to run this code, but keep getting an
illegal instruction
through the assembler.
I cannot figure why it is supposed to run through GNU assembler.
I am new to assembly and am aware that my assembly code may not be efficient or could be better. The comments of the assembly may be messed up a little due to constant changes. The goal is to print each character of the string individually and when comes across with a format identifier like %s, it prints a string from one of the parameters in place of %s.
So for example:
String: Hello, %s
Parameter (RSI): Foo
Output: Hello, Foo
So the code does what it suppose to do but give segmentation error at the end.
.bss
char: .byte 0
.text
.data
text1: .asciz "%s!\n"
text2: .asciz "My name is %s. I think I’ll get a %u for my exam. What does %r do? And %%?\n"
word1: .asciz "Piet"
.global main
main:
pushq %rbp # push the base pointer (and align the stack)
movq %rsp, %rbp # copy stack pointer value to base pointer
movq $text1, %rdi
movq $word1, %rsi
movq $word1, %rdx
movq $word1, %rcx
movq $word1, %r8
movq $word1, %r9
call myPrint
end:
movq %rbp, %rsp # clear local variables from stack
popq %rbp # restore base pointer location
movq $60, %rax
movq $0, %rdi
syscall
myPrint:
pushq %rbp
movq %rsp, %rbp
pushq %rsi
pushq %rdx
pushq %rcx
pushq %r8
pushq %r9
movq %rdi, %r12
regPush:
movq $0, %rbx
#rbx: counter
printLooper:
movb (%r12), %r14b #Get a byte of r12 to r14
cmpb $0, %r14b #Check if r14 is a null byte
je endPrint #If it is a null byte then go to 'endPrint'
cmpb $37, %r14b
je formatter
incq %r12 #Increment r12 to the next byte
skip:
mov $char, %r15 #Move char address to r15
mov %r14b, (%r15) #Move r14 byte into the value of r15
mov $char, %rcx #Move char address into rcx
movq $1, %r13 #For the number of byte
printer:
movq $0, %rsi #Clearing rsi
mov %rcx, %rsi #Move the address to rsi
movq $1, %rax #Sys write
movq $1, %rdi #Output
movq %r13, %rdx #Number of byte to rdx
syscall
jmp printLooper
formatter:
incq %r12 #Moving to char after "%"
movb (%r12), %r14b #Moving the char byte into r14
cmpb $115, %r14b #Compare 's' with r14
je formatString #If it is equal to 's' then jump to 'formatString'
movb -1(%r12), %r14b #Put back the previous char into r14
jmp skip
####String Formatter Start ##################################################
formatString:
addq $1, %rbx
movq $8, %rax
mulq %rbx
subq %rax, %rbp
movq (%rbp), %r15
pushq %r15 ### into the stack
movq $0, %r13 ### Byte counter
formatStringLoop:
movb (%r15), %r14b #Move char into r14
cmpb $0, %r14b #Compare r14 with null byte
je formatStringEnd #If it is equal, go to 'formatStringEnd'
incq %r15 #Increment to next char
addq $1, %r13 #Add 1 to the byte counter
jmp formatStringLoop#Loop again
formatStringEnd:
popq %rcx #Pop the address into rcx
incq %r12 #Moving r12 to next char
jmp printer
#######String Formatter End #############################################
endPrint:
movq %rbp, %rsp
popq %rbp
ret
In formatString you modify %rbp with subq %rax, %rbp, forgetting that you will restore %rsp from it. So when you mov %rbp, %rsp just before the function returns, you end up with %rsp pointing somewhere else, and so you get the wrong return address.
I guess you are subtracting some offset from %rbp to get some space on the stack. This seems unsafe because you've pushed lots of other stuff there. It is safe to use up to 128 bytes below the stack pointer as this is the red zone, but it would be more natural to use an offset from %rsp instead. Using SIB addressing you can access data at constant or variable offsets to %rsp without actually changing its value.
How I found this with gdb: by setting breakpoints at myPrint and endPrint, I found that %rsp was different at the ret than it was on entry. Its value could only have come from %rbp, so I did watch $rbp to have the debugger break when %rbp changed, and it pointed straight to the offending instruction in formatString. (Which I could also have found by searching the source code for %rbp.)
Also, your .text at the top of the file is misplaced, so all your code gets placed in the .data section. This actually works but it surely is not what you intended.
I could test using strncpy() with larger source string then the destination:
int main() {
char *ptr = malloc(12);
strcpy(ptr,"hello world!");
return 0;
}
Compiling with the flag -fstack-protector and using the -S option I got:
.file "malloc.c"
.text
.globl main
.type main, #function
main:
.LFB2:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
subq $32, %rsp
movl %edi, -20(%rbp)
movq %rsi, -32(%rbp)
movq %fs:40, %rax
movq %rax, -8(%rbp)
xorl %eax, %eax
movq $0, -16(%rbp)
movl $12, %edi
call malloc
movq %rax, -16(%rbp)
movq -16(%rbp), %rax
movabsq $8022916924116329800, %rdx
movq %rdx, (%rax)
movl $560229490, 8(%rax)
movb $0, 12(%rax)
movl $0, %eax
movq -8(%rbp), %rcx
xorq %fs:40, %rcx
je .L3
call __stack_chk_fail
.L3:
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE2:
.size main, .-main
Could someone explain to me how this works? And why isn't the "canary word" also overwritten by the \0 of the hello world! string?
Could someone explain to me how does this work ?
Canary word is read from fs:40 and store at top of frame here:
movq %fs:40, %rax
movq %rax, -8(%rbp)
It's below the return address so if your code happens to overflow the buffer (which will be below -8(%rbp)), it'll first overwrite the -8(%rbp) location. This will be detected by GCC prior to issuing ret here:
movq -8(%rbp), %rcx
xorq %fs:40, %rcx ; Checks that %fs:40 == -8(%rbp)
je .L3 ; Ok, return
call __stack_chk_fail ; Die
as overwritten contents of -8(%rbp) will likely to be different from proper value (installed from fs:40).
And why is not the canary word also overwritten by the \0 of the hello world!?
Your code has heap overflow, not buffer overflow so SSP can't help...
In other languages such as Java, under the hood there is actually a difference between string obtained via string literal vs initializer. In Swift, are they equivalent under the hood?
e.g.
var string:String = ""
var string:String = String()
Refer to this SO post for info on differences between literal and object in Java.
The declarations are equivalent according to the Apple docs:
Initializing an Empty String
To create an empty String value as the starting point for building a longer string, either assign an empty string literal to a variable, or initialize a new String instance with initializer syntax:
var emptyString = "" // empty string literal
var anotherEmptyString = String() // initializer syntax
// these two strings are both empty, and are equivalent to each other
Reference: https://developer.apple.com/library/prerelease/ios/documentation/Swift/Conceptual/Swift_Programming_Language/StringsAndCharacters.html
If we look at the assembly, we will see that the two constructors use identical instructions.
string.swift:
let str = String()
let str2 = ""
Compiled assembly (swiftc -emit-assembly string.swift):
.section __TEXT,__text,regular,pure_instructions
.macosx_version_min 14, 3
.globl _main
.align 4, 0x90
_main:
.cfi_startproc
pushq %rbp
Ltmp0:
.cfi_def_cfa_offset 16
Ltmp1:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Ltmp2:
.cfi_def_cfa_register %rbp
subq $16, %rsp
movq _globalinit_33_1BDF70FFC18749BAB495A73B459ED2F0_token4#GOTPCREL(%rip), %rax
movq _globalinit_33_1BDF70FFC18749BAB495A73B459ED2F0_func4#GOTPCREL(%rip), %rcx
xorl %edx, %edx
movl %edi, -4(%rbp)
movq %rax, %rdi
movq %rsi, -16(%rbp)
movq %rcx, %rsi
callq _swift_once
movq _globalinit_33_1BDF70FFC18749BAB495A73B459ED2F0_token5#GOTPCREL(%rip), %rdi
movq _globalinit_33_1BDF70FFC18749BAB495A73B459ED2F0_func5#GOTPCREL(%rip), %rax
xorl %r8d, %r8d
movl %r8d, %edx
movq __TZvOSs7Process5_argcVSs5Int32#GOTPCREL(%rip), %rcx
movl -4(%rbp), %r8d
movl %r8d, (%rcx)
movq %rax, %rsi
callq _swift_once
movq __TZvOSs7Process11_unsafeArgvGVSs20UnsafeMutablePointerGS0_VSs4Int8__#GOTPCREL(%rip), %rax
movq -16(%rbp), %rcx
movq %rcx, (%rax)
callq __TFSSCfMSSFT_SS
leaq L___unnamed_1(%rip), %rdi
xorl %r8d, %r8d
movl %r8d, %esi
movl $1, %r8d
movq %rax, __Tv6string3strSS(%rip)
movq %rdx, __Tv6string3strSS+8(%rip)
movq %rcx, __Tv6string3strSS+16(%rip)
movl %r8d, %edx
callq __TFSSCfMSSFT21_builtinStringLiteralBp8byteSizeBw7isASCIIBi1__SS
xorl %r8d, %r8d
movq %rax, __Tv6string4str2SS(%rip)
movq %rdx, __Tv6string4str2SS+8(%rip)
movq %rcx, __Tv6string4str2SS+16(%rip)
movl %r8d, %eax
addq $16, %rsp
popq %rbp
retq
.cfi_endproc
.globl __Tv6string3strSS
.zerofill __DATA,__common,__Tv6string3strSS,24,3
.globl __Tv6string4str2SS
.zerofill __DATA,__common,__Tv6string4str2SS,24,3
.section __TEXT,__cstring,cstring_literals
L___unnamed_1:
.space 1
.no_dead_strip __Tv6string3strSS
.no_dead_strip __Tv6string4str2SS
.linker_option "-lswiftCore"
.section __DATA,__objc_imageinfo,regular,no_dead_strip
L_OBJC_IMAGE_INFO:
.long 0
.long 512
.subsections_via_symbols
Notice that the declarations for str and str2 have identical instructions:
xorl %r8d, %r8d
movl %r8d, %esi
movl $1, %r8d
movq %rax, __Tv6string3strSS(%rip)
movq %rdx, __Tv6string3strSS+8(%rip)
movq %rcx, __Tv6string3strSS+16(%rip)
movl %r8d, %edx
# ...
xorl %r8d, %r8d
movq %rax, __Tv6string4str2SS(%rip)
movq %rdx, __Tv6string4str2SS+8(%rip)
movq %rcx, __Tv6string4str2SS+16(%rip)
movl %r8d, %eax
You can learn more about String literals by reviewing the Apple's documentation.