Assembly: how to convert number into ascii and write to display buffer - linux

I am new to assembly and am programming in linux 64 bit in AT&T syntax. If I store the number 1 in a register, how can I translate that to the ascii character "A"? For example:
movl $1, %ebx
addl $64, %ebx
Can I add 64 to 1 to make 65 (the decimal value of A), then somehow convert it to "A" and send this to the buffer using write system call?
EDIT 1: Posting my program code here.
.section .data
message:
.long 1, 2, 3, 4, 5, 6, 7, 8, 9, 10
length:
.long 10
.section .text
.globl _start
_start:
xorq %rdi, %rdi
xorq %rax, %rax
xorq %rbx, %rbx
xorq %rcx, %rcx
xorq %rdx, %rdx
movl length, %edx
loop:
cmpl %ecx, %edx
je loop_end
movl message(,%rdi,4), %eax
addl $64, %eax
pushq %rax
incq %rdi
incq %rcx
jmp loop
loop_end:
cmpq $0, %rcx
je exit
popq %rbx
pushq %rcx
movq $1, %rax
movq $1, %rdi
movq %rbx, %rsi
movl length, %edx
syscall
popq %rcx
decq %rcx
jmp loop_end
exit:
movq $60, %rax
movq $0, %rdi
syscall

I'm not entirely familiar with AT&T syntax, but the disassembly of NASM in what you're accustomed to should suffice.
You should try to avoid what is called hard coding constants as it makes your program harder to maintain, especially when it's hundreds if not thousands of lines in length. Therefore;
section .data
Values: db 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 26, 18, 12, 20, 19, 11
V_Size equ $ - Values
is preferable to this
message:
.long 1, 2, 3, 4, 5, 6, 7, 8, 9, 10
length:
.long 10
What you did is not wrong, but the method is predicated upon you counting, not the assembler. As it has already been pointed out, use the smallest data size required to get the job done. In this case char is better than long
This code in NASM
section .text
global _start
_start: xor ecx, ecx
push rcx ; Applications default return value
mov cl, V_Size
push rcx
mov ebx, Values
push rbx
Next:
or byte [ebx], 64
inc ebx
loop Next
pop rsi
pop rdx
pop rax
inc al
mov edi, eax
syscall
mov edi, eax
dec edi
mov eax, edi
mov al, 60
syscall
section .data
Values: db 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 26, 18, 12, 20, 19, 11
V_Size equ $ - Values
will yield
ABCDEFGHIJZRLTSK
with command prompt immediatly after "K".
section .data:
6000d8 01020304 05060708 090a1a12 0c14130b
section .text:
<_start>: These two instructions are idiosyncratic to my style of programming and not
essential to functionality of program.
4000b0: 31 c9 xor %ecx,%ecx
4000b2: 51 push %rcx
Setup RCX & RBX for LOOP instruction
4000b3: b1 10 mov $0x10,%cl
4000b5: 51 push %rcx ARG2 to syscall
4000b6: bb d8 00 60 00 mov $0x6000d8,%ebx
4000bb: 53 push %rbx ARG1 to syscall
<Next>: This conforms to the scope of your objective.
4000bc: 67 80 0b 40 orb $0x40,(%ebx) [ebx] += 'A'
4000c0: ff c3 inc %ebx
4000c2: e2 f8 loop 4000bc <Next>
ssize_t write (int fd, const void *buf, size_t count);
4000c4: 5e pop %rsi ARG1 = ASCII Pntr
4000c5: 5a pop %rdx ARG2 = # of chars
4000c6: 58 pop %rax
4000c7: fe c0 inc %al SYS_WRITE
4000c9: 89 c7 mov %eax,%edi ARG0 = STD_OUT
4000cb: 0f 05 syscall
Epilogue: Again, just a method I use.
4000cd: 89 c7 mov %eax,%edi
4000cf: ff cf dec %edi
4000d1: 89 f8 mov %edi,%eax
4000d3: b0 3c mov $0x3c,%al
4000d5: 0f 05 syscall

Related

Assembly x86 64 Linux AT&T: print routine segmentation error

I am new to assembly and am aware that my assembly code may not be efficient or could be better. The comments of the assembly may be messed up a little due to constant changes. The goal is to print each character of the string individually and when comes across with a format identifier like %s, it prints a string from one of the parameters in place of %s.
So for example:
String: Hello, %s
Parameter (RSI): Foo
Output: Hello, Foo
So the code does what it suppose to do but give segmentation error at the end.
.bss
char: .byte 0
.text
.data
text1: .asciz "%s!\n"
text2: .asciz "My name is %s. I think I’ll get a %u for my exam. What does %r do? And %%?\n"
word1: .asciz "Piet"
.global main
main:
pushq %rbp # push the base pointer (and align the stack)
movq %rsp, %rbp # copy stack pointer value to base pointer
movq $text1, %rdi
movq $word1, %rsi
movq $word1, %rdx
movq $word1, %rcx
movq $word1, %r8
movq $word1, %r9
call myPrint
end:
movq %rbp, %rsp # clear local variables from stack
popq %rbp # restore base pointer location
movq $60, %rax
movq $0, %rdi
syscall
myPrint:
pushq %rbp
movq %rsp, %rbp
pushq %rsi
pushq %rdx
pushq %rcx
pushq %r8
pushq %r9
movq %rdi, %r12
regPush:
movq $0, %rbx
#rbx: counter
printLooper:
movb (%r12), %r14b #Get a byte of r12 to r14
cmpb $0, %r14b #Check if r14 is a null byte
je endPrint #If it is a null byte then go to 'endPrint'
cmpb $37, %r14b
je formatter
incq %r12 #Increment r12 to the next byte
skip:
mov $char, %r15 #Move char address to r15
mov %r14b, (%r15) #Move r14 byte into the value of r15
mov $char, %rcx #Move char address into rcx
movq $1, %r13 #For the number of byte
printer:
movq $0, %rsi #Clearing rsi
mov %rcx, %rsi #Move the address to rsi
movq $1, %rax #Sys write
movq $1, %rdi #Output
movq %r13, %rdx #Number of byte to rdx
syscall
jmp printLooper
formatter:
incq %r12 #Moving to char after "%"
movb (%r12), %r14b #Moving the char byte into r14
cmpb $115, %r14b #Compare 's' with r14
je formatString #If it is equal to 's' then jump to 'formatString'
movb -1(%r12), %r14b #Put back the previous char into r14
jmp skip
####String Formatter Start ##################################################
formatString:
addq $1, %rbx
movq $8, %rax
mulq %rbx
subq %rax, %rbp
movq (%rbp), %r15
pushq %r15 ### into the stack
movq $0, %r13 ### Byte counter
formatStringLoop:
movb (%r15), %r14b #Move char into r14
cmpb $0, %r14b #Compare r14 with null byte
je formatStringEnd #If it is equal, go to 'formatStringEnd'
incq %r15 #Increment to next char
addq $1, %r13 #Add 1 to the byte counter
jmp formatStringLoop#Loop again
formatStringEnd:
popq %rcx #Pop the address into rcx
incq %r12 #Moving r12 to next char
jmp printer
#######String Formatter End #############################################
endPrint:
movq %rbp, %rsp
popq %rbp
ret
In formatString you modify %rbp with subq %rax, %rbp, forgetting that you will restore %rsp from it. So when you mov %rbp, %rsp just before the function returns, you end up with %rsp pointing somewhere else, and so you get the wrong return address.
I guess you are subtracting some offset from %rbp to get some space on the stack. This seems unsafe because you've pushed lots of other stuff there. It is safe to use up to 128 bytes below the stack pointer as this is the red zone, but it would be more natural to use an offset from %rsp instead. Using SIB addressing you can access data at constant or variable offsets to %rsp without actually changing its value.
How I found this with gdb: by setting breakpoints at myPrint and endPrint, I found that %rsp was different at the ret than it was on entry. Its value could only have come from %rbp, so I did watch $rbp to have the debugger break when %rbp changed, and it pointed straight to the offending instruction in formatString. (Which I could also have found by searching the source code for %rbp.)
Also, your .text at the top of the file is misplaced, so all your code gets placed in the .data section. This actually works but it surely is not what you intended.

Registers for IMUL instruction

I was writing this simple programm to calculate a ith element of a recursive sequence. The sequence basically looks like
a(n)=a(n-1)*a(n-2)
with first two elements being -1 and -3. I use imul for multiplying and due to my findings in the net I should be able to use any registers I want, but programm returns 0 for third element. When switched to add it works as intended.
Here's the fragment where I recursively call the function and multiply (as seen, I use stack to store my variables)
push %rcx
push %rax
call calculate
pop %rax
pop %rcx
imul %rcx, %rbx
Basically question is "why it doesn't work" :P
PS. In case full code is needed:
.data
STDOUT = 1
SYSWRITE = 1
HOW_MANY = 3 # which number to calculate
SYSEXIT = 60
EXIT_SUCCESS = 0
FIRST = -1 # first element of the sequence
SECOND = -3 # second element of the sequence
NUMBER_BEGIN = 0x30
OUTPUT_BASE = 10
NEW_LINE = '\n'
PLUS = '+'
MINUS = '-'
.bss
.comm textin, 512
.comm textout, 512
.comm text2, 512
.comm znak, 1
.text
.globl _start
_start:
#
# Calling function to calculate ith element
#
mov $HOW_MANY, %r8
sub $1, %r8
push %r8 # push r8 (function argument) to stack
call calculate # call function to calculate
add $8, %rsp # removing parameter from stack
# now we should've have result in rbx
#
mov $0, %r15 # Flaga znaku (domyślnie 0 = +)
cmp $0, %rbx
jge to_ascii # Pomiń jeśli liczba jest dodatnia
not %rbx # Odwrócenie bitów liczby i dodanie 1,
inc %rbx # aby uzyskać jej wartość bezwzględną.
mov $1, %r15 # Ustawienie flagi znaku na 1 = -.
to_ascii:
mov %rbx, %rax # result goes to rax
mov $OUTPUT_BASE, %rbx
mov $0, %rcx
jmp loop
loop:
mov $0, %rdx
div %rbx # divide rax by rbx, rest in rdx
add $NUMBER_BEGIN, %rdx # rest in rdx is a next position number
mov %dl, text2(, %rcx, 1)
inc %rcx
cmp $0, %rax
jne loop
jmp inverse
inverse:
mov $0, %rdi
mov %rcx, %rsi
dec %rsi
jmp inversev2
inversev2:
mov text2(, %rsi, 1), %rax
mov %rax, textout(, %rdi, 1)
inc %rdi
dec %rsi
cmp %rcx, %rdi
jle inversev2
push %rcx # legth of the answer goes to stack
mov $0, %r10 # want sign at the first position
movb $PLUS, znak(, %r10, 1)
cmp $0, %r15 # r15 register contains info about the sign
je next # 0 = +, so nothing has to be done
movb $MINUS, znak(, %r10, 1) # otherwise set it to minus
next: # show sign
mov $SYSWRITE, %rax
mov $STDOUT, %rdi
mov $znak, %rsi
mov $1, %rdx
syscall
pop %rcx
movb $NEW_LINE, textout(, %rcx, 1)
inc %rcx
mov $SYSWRITE, %rax
mov $STDOUT, %rdi
mov $textout, %rsi
mov %rcx, %rdx
syscall
mov $SYSEXIT, %rax
mov $EXIT_SUCCESS, %rdi
syscall
# recursive function calculating ith element of a given sequence
# sequence =
# n_1 = -1
# n_2 = -3
# n_i = n_(i-1)*n_(i-2)
calculate:
push %rbp # push rbp to stack to save it's value
mov %rsp, %rbp # now stack pointer is stored in rbp
sub $8, %rsp
mov 16(%rbp), %rax
cmp $1, %rax
jl first
je second
mov $0, %rcx
# wywołanie dla n_(i-1)
dec %rax
push %rcx
push %rax
call calculate
pop %rax
pop %rcx # przepisać na rejestry imula
imul %rcx, %rbx
# wywołanie dla n_(i-2)
dec %rax
push %rcx
push %rax
call calculate
pop %rax
pop %rcx
imul %rcx, %rbx
return:
mov %rcx, %rbx
mov %rbp, %rsp
pop %rbp
ret
first:
mov $FIRST, %rbx
mov %rbp, %rsp
pop %rbp
ret
second:
mov $SECOND, %rbx
mov %rbp, %rsp
pop %rbp
ret
You are seeding %rcx to zero, then multiplying into that, so you will always have a product of zero.
Perhaps you want to change
mov $0, %rcx
to
mov $1, %rcx
I think you also need to reverse the
imul %rcx, %rbx
to
imul %rbx, %rcx
(I'm not familiar with that flavor of assembler)

Printing floating point numbers in assembler

I'm trying to print a floating-point value from assemler calling a printf function. It works fine with strings and integer values but fails printing floats. Here is an example of working code:
global main
extern printf
section .data
message: db "String is: %d %x %s", 10, 0
end_message: db ".. end of string", 0
section .text
main:
mov eax, 0xff
mov edi, message
movsxd rsi, eax
mov rdx, 0xff
mov rcx, end_message
xor rax, rax
call printf
ret
String is: 255 ff .. end of string
So, the parameters are passed through registers: edi contains address of a formatting string, rsi and rdx contain the same number to print in decimal and hex styles, rcx contains end of a string, rax contains 0 as we do not have a float to print.
This code works fine but something changes while trying to print float:
global main
extern printf
section .data
val: dq 123.456
msg: db "Result is: %fl",10, 0
section .text
main:
mov rdi,msg
movsd xmm0,[val]
mov eax,1
call printf
mov rax, 0
ret
This code snipped can be compiled but returns segmentation fault being executed. It seems that the problem is in wrong value of xmm0 but trying to change movsd xmm0,[val] to movsd xmm0,val gives an
error: invalid combination of opcode and operands
message.
The compiler is NASM running on openSuSe 12.3
Update. I tried to make a c program and produce a .S assembly. It gives a very weird solution:
main:
.LFB2:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
subq $32, %rsp
movl %edi, -4(%rbp)
movq %rsi, -16(%rbp)
movq val(%rip), %rax
movq %rax, -24(%rbp)
movsd -24(%rbp), %xmm0
movl $.LC0, %edi
movl $1, %eax
call printf
movl $0, %eax
leave
.cfi_def_cfa 7, 8
ret
Is it possible to write a simple printf example?
for your assembler problem:
you need to align the stack before your main program starts.
insert
sub rsp, 8
right after main:
then add it again before ret:
add rsp, 8

How to read and display a value in Linux assembly?

I'm a beginner in Linux assembler and I have some questions. I'd like to read some characters from keyboard, convert it to value (I understand that this convertion should be from ASCII to decimal, right?), do some math (add, sub, multiply, whatever) and display the result in Terminal. How should I do that? I wrote some code but it probably doesn't make sense:
SYSEXIT = 1
EXIT_SUCC = 0
SYSWRITE = 4
SYSCALL = 0x80
SYSREAD = 3
.data
value: .space 5, 0
value_len: .long .-value
result: .long
result_len: .long .-result
.text
.global _start
_start:
movl $SYSREAD, %eax
movl $EXIT_SUCC, %ebx
movl $value, %ecx
movl value_len, %edx
int $SYSCALL
movl $0, %edx
movl value_len, %ecx
for:
movb value(, %edx, 1), %al
subb $48, %al
movb %al, result(, %edx, 1)
inc %edx
loop for
add $10, result
movl $0, %edx
movl result_len, %ecx
for1:
movb result(, %edx, 1), %al
add $48, %al
movb %al, result(, %edx, 1)
inc %edx
loop for1
movl $SYSWRITE, %eax
movl $SYSEXIT, %ebx
movl $result, %ecx
movl result_len, %edx
int $SYSCALL
movl $SYSEXIT, %eax
movl $EXIT_SUCC, %ebx
int $SYSCALL
I don't know if I should reserve memory by spaces? Or reading characters in loop?
How to convert it, to be able to make some math operation and then convert it to be able to display it?
I know that to get the value of ASCII char I should subtract 48, but what next?
I had an idea to multiply each bits by 2^k where k is 0,1,2...n it's good idea? If so, how to implement something like this?
As you can see I had a lot of questions, but I only need to someone show me how to do, what I am asking about. I saw some similar problems, but nothing like this in Linux.
Thank you in advance for the all information.
All the best.
At first reading and writing the console in Linux is by using the file functions with special console handles, that have always the same values: STDIN=0, STDOUT=1 and STDERR=2.
At second you will need some decent documentation about Linux system calls. Notice that the C-centric one (like "man") are not suitable, because C language does not use the system calls directly, but has wrappers that often change the arguments and the result values.
On the following site you can download an assembly-centric SDK for Linux, that contains the needed documentation and many examples and include files.
If you only need the help files, you can browse them online: Here
If the problem is the conversion from ASCII string to number and then back to string, here are two simple procedures that can do the job, if the requirements are not so big. The StrToNum is not so advanced, it simply convert decimal unsigned number:
; Arguments:
; esi - pointer to the string
; Return:
; CF=0
; eax - converted number
; edx - offset to the byte where convertion ended.
;
; CF=1 - the string contains invalid number.
;
StrToNum:
push ebx esi edi
xor ebx,ebx ; ebx will store our number
xor eax,eax
mov al,[esi]
cmp al,'0'
jb .error
cmp al,'9'
jbe .digit
jmp .error
.digit:
sub al,'0'
add ebx,eax
inc esi
mov al,[esi]
cmp al,'0'
jb .finish
cmp al,'9'
ja .finish
mov edx,ebx ; multiply ebx by 10
shl ebx,3
add ebx,edx
add ebx,edx
jmp .digit
.finish:
mov eax, ebx
mov edx, esi
clc
pop edi esi ebx
ret
.error:
stc
pop edi esi ebx
ret
NumToStr is pretty flexible. It converts number to a string in any radix and with sign:
;**********************************************************************************
; NumToStr converts the number in eax to the string in any radix approx. [2..26]
; Arguments:
; edi - pointer to the string buffer
; ecx - radix
; eax - number to convert.
; There is no parameter check, so be careful.
; returns: edi points to the end of a converted number
;**********************************************************************************
NumToStr:
test eax,eax
jns NumToStrU
neg eax
mov byte [edi],"-"
inc edi
NumToStrU:
cmp eax,ecx
jb .lessA
xor edx,edx
div ecx
push edx
call NumToStrU
pop eax
.lessA:
cmp al, 10
sbb al, 69h
das
stosb
ret

x86 Assembly 2-Digit Keyboard Input

It needs to allow a two-digit number for input that will be used to indicate how many times the name is printed. I can't figure out how to separate the second digit though and have it checked to make sure it is between 0x30 and 0x39. I also keep getting this weird box after the name that has 0017 inside it.
.data
input_msg_len: .long 26
input_msg: .ascii "Enter a two-digit number: "
name: .ascii "Michael Chabon\n"
name_len: .long 16
max: .long 0
count: .long 0
tmp: .long 0
input_str: .ascii "??"
.text
.global _start
_start:
mov $4, %eax
mov $1, %ebx
mov $input_msg, %ecx
mov input_msg_len, %edx
int $0x80
mov $3, %eax
mov $0, %ebx
mov $input_str, %ecx
mov $2, %edx
int $0x80
mov $input_str, %eax
add count, %eax
mov $input_str, %eax
mov (%eax), %bl
cmp $0x30, %bl
jl _start
cmp $0x39, %bl
jg _start
mov count, %eax
inc %eax
mov %eax, count
sub $0x30, %bl
mov %bl, max
mov $10, %bl
imul %bl
mov %bl, max
#Not sure how to check second char in input_str.
#Want to check it then subtract $0x30 and move to tmp before adding tmp to max.
mov $0, %edi
again:
cmp max, %edi
je end
mov $4, %eax
mov $1, %ebx
mov $name, %ecx
mov name_len, %edx
int $0x80
inc %edi
jmp again
end:
mov $1, %eax
int $0x80
Thanks in advance!
There are some bugs in your code.
Below, 2 first lines of that block are redundant, as mov $input_str, %eax overwrites eax anyway.
mov $input_str, %eax
add count, %eax
mov $input_str, %eax
Then here, it makes no sense to load count into eax here:
mov count, %eax
inc %eax
mov %eax, count
You can do this in a lot shorter and clearer way with:
incl count
Then, next bug is that you recently loaded count into eax, and then multiply the lowest 8 bits of count loaded into al with 10, in this piece of code:
mov (%eax), %bl // bl = first character
cmp $0x30, %bl
jl _start
cmp $0x39, %bl
jg _start
mov count, %eax // eax = count
inc %eax // eax++
mov %eax, count // count = eax
sub $0x30, %bl // 0 <= bl <= 9
mov %bl, max // max = bl <- you lose this value in the next mov %bl, max
mov $10, %bl // bl = 10
imul %bl // ax = 10 * and(count, 0xff) // ax = al*bl (signed multiply)
mov %bl, max // max = 10 <- here you overwrite the value of max with 10
So, according to my intuition you don't want to do ax = 10 * and(count, 0xff), but 10 * (first number). imul %bl does o signed multiply between al and bl, and stores the result in ax. So the code above could be changed to something like this:
mov (%eax), %bl // bl = first character
cmp $0x30, %bl
jl _start
cmp $0x39, %bl
jg _start
incl count
pushl %eax // push eax to stack
sub $0x30, %bl // 0 <= bl <= 9
mov $10, %al // al = 10
imul %bl // ax = 10 * bl (signed multiply)
mov %al, max // 0 <= max <= 90
Then, you can check the second character similarly to the first character:
pop %eax // pop eax from stack
incl %eax
mov (%eax), %bl // bl = second character
cmp $0x30, %bl
jl _start
cmp $0x39, %bl
jg _start
sub $0x30, %bl // 0 <= bl <= 9
add %bl, max // 0 <= max <= 99
I strongly recommend you to learn to use some debugger. gdb has several frontends, of which I think ddd works best according to my experience. gdbtui is also convenient.

Resources