How to use "repne scasb" in GAS assembly? - linux

So I have written a program based on NASM that receives user input (two numbers to be exact), and then it realizes the addition and difference between the two numbers, and then it prints it back. The program works fine in NASM but I am having trouble with GAS. The subroutine that counts the length of a string using the repne scasb instruction is giving me a headache because of a segmentation fault.
I have checked the code for segmentation faults, and I have located the fault in the repne scasb line.
What I am basically doing is translating a NASM code I made into its respective GAS code. However, as I said before, it's giving me a segmentation fault. After I get the first number from the user, to be more specific.
.section .data
msg: .ascii "Insert a number: "
msgLen = .-msg
msg2: .ascii "Insert another number: "
msg2Len = .-msg2
errorMsg: .ascii "Error: invalid character!\n"
errorMsgLen = .-errorMsg
displaySuma: .ascii "The result of the addition is: "
displaySumaLen = .-displaySuma
displayResta: .ascii "The result of the difference is: "
displayRestaLen = .-displayResta
enterChar: .ascii "\n"
terminator: .byte 0
.section .bss
.lcomm num1, 8
.lcomm num2, 8
.lcomm buffer, 10
.lcomm buffer2, 10
.section .text
.global _start
_start:
call _clear #clear registers. Probably an useless routine
call _msg1 #Display msg1
call _num1 #Read num1
movl num1, %edi
call _lenString #ECX now has num1 length
lea (num1), %esi
call _stringToInt #EAX now has num1 in integer
movl %eax, %r15d #R15D now has the integer
call _msg2 #Display msg2
call _num2 #Read num2
xor %edi, %edi #Clear EDI
movl num2, %edi #Moving num2 to EDI register to call _lenstring
xor %ecx, %ecx #Clear ECX
call _lenString #ECX has num2 length
xor %esi, %esi #clear ESI
lea (num2), %esi
call _stringToInt #EAX now has integer value of num2
mov %eax,%r14d ###R14D has num2 now
#Addition
#r8d = num1 + num2
mov %r15d, %r8d
add %r14d, %r8d #R8D has num1 + num2
#Difference
#If num1 > num2 =======> r9d = num1 - num2
#If num1 < num2 =======> r9d = num2 - num1
cmp %r14d, %r15d
jg .greater
mov %r14d, %r9d
sub %r15d, %r9d #R9D has num1 - num2
jmp .next
.greater:
mov %r15d, %r9d
sub %r14d, %r9d #R9D has num2 - num1
jmp .next
.next:
mov %r8d, %eax #Sum is now at EAX to convert it to ascii characters
lea (buffer), %esi
call _intToString
#EAX ascii of the sum
mov %eax, %r10d #Using R10D to store the new string
mov %r9d, %eax #Difference result is now at EAX
lea (buffer2), %esi
call _intToString
#EAX has the pointer to the difference result.
mov %eax, %r11d #Storing string in R10D
xor %edi, %edi
xor %r15d, %r15d
xor %r14d, %r14d
mov %r10d, %edi
call _lenString #ECX length of sum string
mov %ecx, %r15d #R15D now has that value
call _suma #This prints the sum result
xor %edi, %edi #Clear EDI
mov %r11d, %edi
call _lenString #ECX has length of dif. string
mov %ecx, %r14d #R14D has that value
call _resta #Print dif. result
movl $1, %eax #End of the program
movl $0, %ebx
int $0x80
_stringToInt:
xor %ebx, %ebx
.next_digit:
movzxb (%esi), %eax
cmp $0x30, %eax #These 4 lines check for invalid characters
jb _errorMsg
cmp $0x39, %eax
ja _errorMsg
inc %esi
sub $0x30, %eax ###Sub 48 (converts to integer)
imul $10, %ebx
add %eax, %ebx #ebx = ebx*10 + eax
loop .next_digit #loop [ECX] times
mov %ebx, %eax
ret
_intToString:
add $10, %esi
mov (terminator),%esi
mov $10, %ebx
.next_digit1:
xor %edx, %edx
div %ebx
add $0x30, %edx ##
dec %esi
mov %dl, (%esi)
test %eax, %eax
jnz .next_digit1
mov %esi, %eax
ret
#######################################################################################################
_msg1:
movl $4, %eax #msg1 routine
movl $1, %ebx
movl $msg, %ecx
movl $msgLen, %edx
int $0x80
ret
_num1:
movl $3, %eax #Reads first number
movl $0, %ebx
movl $num1, %ecx
movl $8, %edx
int $0x80
ret
_msg2:
movl $4, %eax #msg2 display
movl $1, %ebx
movl $msg2, %ecx
movl $msg2Len, %edx
int $0x80
ret
_num2:
movl $3, %eax #Reads the next number
movl $0, %ebx
movl $num2, %ecx
movl $8, %edx
int $0x80
ret
_salir:
movl $1, %eax #Exit
movl $0, %ebx
int $0x80
_errorMsg:
movl $4, %eax #Error msg
movl $1, %ebx
movl $errorMsg, %ecx
movl $errorMsgLen, %edx
int $0x80
jmp _salir
_lenString:
xor %ecx, %ecx
not %ecx
xor %al, %al
mov $0xA, %al
cld
repne scasb #Segmentation fault is caused by this line
not %ecx
dec %ecx
ret
_suma:
movl $4, %eax
movl $1, %ebx
movl $displaySuma, %ecx
movl $displayRestaLen, %edx
int $0x80
movl $4, %eax
movl $1, %ebx
mov %r10d, %ecx
mov %r15d, %edx
int $0x80
movl $4, %eax
movl $1, %ebx
movl $enterChar, %ecx
movl $1, %edx
int $0x80
ret
_resta:
movl $4, %eax
movl $1, %ebx
movl $displayResta, %ecx
movl $displayRestaLen, %edx
int $0x80
movl $4, %eax
movl $1, %ebx
mov %r11d, %ecx
mov %r14d, %edx
int $0x80
movl $4, %eax
movl $1, %ebx
movl $enterChar, %ecx
movl $1, %edx
int $0x80
ret
_clear:
xor %eax, %eax
xor %ebx, %ebx
xor %ecx, %ecx
xor %edx, %edx
xor %esi, %esi
xor %edi, %edi
xor %r8d, %r8d
xor %r9d, %r9d
xor %r10d, %r10d
xor %r11d, %r11d
xor %r14d, %r14d
xor %r15d, %r15d
ret
I am using this makefile to create the .o and .exe files (Given by my professor):
#*************************************************
# Executable name : hola
# Version : 2.0
# Created date : February 12, 2019
# Authors :
# Eduardo A. Canessa M
# Description : simple makefile for GAS
# Important Notice: To be used for GAS only
#*************************************************
#change the name "ejemplo" for the name of your source file
name=addSubInteger
#program to use as the assembler (you could use NASM or YASM for this makefile)
ASM=as
#flags for the assember
#ASM_F= #*** place here flags if needed ***
#program to use as linker
LINKER=ld
#link executable
$(name): $(name).o
$(LINKER) -o $(name) $(name).o
#assemble source code
$(name).o: $(name).s
$(ASM) $(ASM_F) -o $(name).o $(name).s
There is a segmentation fault error after the program reads the first user input.
This is the NASM code of my program (Hope you don't mind the spanish comments in it, but it's essentially the same program as the one written on GAS).
I know I have made some next level spaghetti code, but this is the solution I came to.

movl num1, %edi
call _lenString #ECX now has num1 length
lea (num1), %esi
call _stringToInt #EAX now has num1 in integer
The first instruction does not load the address in %edi. You can use lea like you did for the call to _stringToInt that follows next. Or if you care about a shorter encoding then write mov $num1, %edi.
lea (num1), %edi
call _lenString #ECX now has num1 length
The same problem exists for the second number:
movl num2, %edi SAME PROBLEM
xor %ecx, %ecx
call _lenString
The _intToString subroutine has 2 problems!
You destroy the address in %esi by writing a random value in it.
You (try to) write in memory beyond the buffer that was reserved via .lcomm buffer, 10. This will destroy the first byte in buffer2.
Since converting a 32-bit integer can produce (at most) 10 characters, you will need to enlarge your buffer to 11 bytes so you can safely store the byte-sized terminator.
.lcomm buffer, 11
.lcomm buffer2, 11
Then use this code:
_intToString:
mov $10, %ebx
add %ebx, %esi #Instead of 'ADD $10, %ESI' now that EBX==10
mov (terminator), %dl
mov %dl, (%esi)
.next_digit1:
xor %edx, %edx
div %ebx
add $0x30, %edx ##
dec %esi
mov %dl, (%esi)
test %eax, %eax
jnz .next_digit1
mov %esi, %eax
ret
The original NASM source uses
STRING_TERMINATOR equ 10
An equ does not consume memory at run-time. Your terminator: .byte 0 does use run-time memory! A good translation for the equ would be
.set terminator,0
Now you can write
_intToString:
mov $10, %ebx
add %ebx, %esi #Instead of 'ADD $10, %ESI' now that EBX==10
movb $terminator, (%esi)

Related

Factorial Assembly x86

I have this assembly code(Linux 32Bit compiled with gcc -m32) and I do not really understand why my program doesn't work.
.data
.bla:
.ascii "%d\n\0"
.globl main
.text
main: pushl $4
call factorial
movl $1, %eax
ret
factorial:
push %ebp
movl %esp, %ebp
movl 8(%ebp), %ecx
cmpl $1, %ecx
jg .rek
movl $1, %eax
movl %ebp, %esp
pop %ebp
ret
.rek:
decl %ecx
pushl %ecx
call factorial
addl $4, %esp
movl 8(%ebp), %ecx
imull %ecx, %eax
pushl %eax
pushl $.bla
call printf
addl $8, %esp
movl %ebp, %esp
pop %ebp
ret
Unfortunately every time a Segmentation Fault does occur + parameters bigger than 4 do not work.
This should be my stack when I run the program with "3":
3
ret add
ebp
2
ret add
ebp
1
ret add
ebp
When I reach the bottom of the recursion I take the return value saved in eax and multiply it with 8(%ebp) which should be the next value.
I really appreciate any help you can provide.
Three issues I see.
1) Your call maine (I'm assuming that was a typo, and you meant, main) should be call factorial
2) In your main program, you don't restore your stack pointer.
3) Your printf call modifies %eax and overwrites the result of your factorial before it returns.
Repaired program:
.data
.bla:
.ascii "%d\n\0"
.globl main
.text
main: pushl $5
call factorial
addl $4, %esp # ADDED THIS
movl $1, %eax
ret
factorial:
push %ebp
movl %esp, %ebp
movl 8(%ebp), %ecx
cmpl $1, %ecx
jg .rek
movl $1, %eax
movl %ebp, %esp
pop %ebp
ret
.rek:
decl %ecx
pushl %ecx
call factorial # FIXED THIS
addl $4, %esp
movl 8(%ebp), %ecx
imull %ecx, %eax
pushl %eax # ADDED THIS - SAVE RETURN VALUE
pushl %eax
pushl $.bla
call printf
addl $8, %esp # MODIFIED THIS
pop %eax # ADDED THIS (restore eax result)
movl %ebp, %esp
pop %ebp
ret

Jump instruction doesn't work

data
tekst: .ascii "heLLo WoRlD 93a9s\0"
tekst_len = . - tekst
.text
.global _start
_start:
mov $tekst_len, %edx
petla:
cmp $tekst_len, %edx
je koniec
cmpb $'a', tekst(,%edx,1)
jg zamien #?????????????????????????????????????
inc %edx
jmp petla
zamien:
movb $'X', tekst(,%edx,1)
inc %edx
jmp petla
koniec:
movl $4, %eax
movl $1, %ebx
movl $tekst, %ecx
movl $tekst_len, %edx
int $0x80
I have such a problem: the instruction 'jg zamien' doesn't work. No matter what condition I choose 'jl', 'je' it never executes. Can someone tell me why?
This is the reason:
mov $tekst_len, %edx
petla:
cmp $tekst_len, %edx <-- edx will equal $tekst_len here on the first iteration
je koniec
You're always jumping to koniec on the first iteration of the loop.
The mov should probably be mov $0,%edx.

Getting ASCII number value

Hi Guys I got some annoying problem ,so I try to write a code just to reverse small string sequential
I Already got this :
.section .data
string:
.ascii "AAAAAABBBBBB"
length:
.quad . -string #Dot = 'here'
.section .text
.globl _start #Make entry point visible to linker
_start:
movl $4, %eax #4=write
movl $1, %ebx #1=stdout
movl $string, %ecx
movl length, %edx
int $0x80 #Call Operating System
movl length,%edi #counter
shrl $1,%edi #half of string
movl $0,%ecx #start from index one
movl length,%edx #start from end
reverse:
movl string(,%ecx,1),%eax
movl string(,%edx,1),%ebx
movl %eax,string(,%edx,1)
movl %ebx,string(,%ecx,1)
inc %ecx
dec %edx
dec %edi
loop reverse #looping
movl $4, %eax #4=write
movl $1, %ebx #1=stdout
movl $string, %ecx
movl length, %edx
int $0x80 #Call Operating System
movl $0, %ebx #Make program return syscall exit status
movl $1, %eax #1=exit
int $0x80 #Call System Again
And it's not working correctly , cuz in gbd i get wrong values in registers after making
movl string(,%ecx,1),%eax
or the next steps I think there should be in %eax value for A letter but its doesn't any ideas ?
Working at 64arch but emulating in as --32 so its problem with my addressing i guess
You should be processing bytes not longs, so use movb with 8 bit registers (al and bl, for example). Also, the LOOP instruction uses ECX automatically, you probably meant JNZ there to repeat until EDI reaches zero.

Linux x86 assembly printing problems

I wrote this to print argv[0] in x86:
.section .data
newline: .int 0xa, 0
.section .text
.globl _start
_start:
sub %al, %al
movl 4(%esp), %edi /* Pointer to argv[0]. */
sub %ecx, %ecx /* Set %ecx to 0.*/
not %ecx /* Set %ecx to -1.*/
repne scasb /* Search for %al over and over.*/
not %ecx /* Set %ecx to |%ecx| - 1.*/
dec %ecx
movl %ecx, %edx /* Move the strlen of argv[0] into %edx.*/
movl $4, %eax
movl $1, %ebx
movl 4(%esp), %ecx
int $0x80
movl $newline, %ecx
movl $1, %edx
int $0x80
movl $1, %eax
movl $0, %ebx
int $0x80
When I run this file ("print"), the output is this:
[08:27 assembly]$ ./print test
./print[08:30 assembly]$
When I ran this through gdb, the actual string length held in edx is 27, and the string it's checking is "/home/robert/assembly/print", not "./print". So I changed the %esp offsets to 8, to check argv[1]. With the same command as before, the output is this:
test
[08:33 assembly]$
Why does checking argv[0] cause the strange output, when argv[1] does as expected?
I think gdb is "helping" you by adding the full path to argv[0]. After printing, %eax holds the number of characters printed, so you'll want to reload %eax for sys_write again to print the $newline (%ebx should still be okay) - by luck, "test" is the right length. Lord knows what system call you're getting with that longer string!
I'd say you're doing good! (might be a good idea to check argc to make sure argv[1] is there before you try to print it).

simple upper-casifier loop infinitely

simple upper-casifier loop infinitely
what's wrong with my code?
any advice?
my programming environment is linux, emacs, assembly, at&t syntax
.section .data
.section .bss
.lcomm buffer,1
.section .text
.global _start
_start:
movl %esp,%ebp
subl $8,%esp
#8(%ebp) is 2nd arg == input
#12(%ebp) is 3rd arg == output
#open,read,open,write,close
movl $5,%eax
movl 8(%ebp),%ebx
movl $0,%ecx
movl $0666,%edx
int $0x80
#%eax contains input's fd
#movl to first local var
movl %eax,-4(%ebp)
movl $5,%eax
movl 12(%ebp),%ebx
movl $03101,%ecx
movl $0666,%edx
int $0x80
#eax contains output's fd
#movl to second local var
movl %eax,-8(%ebp)
loop:
#read 1 byte from file 1st byte of data
movl $3,%eax
movl -4(%ebp),%ebx
movl $buffer,%ecx
movl $1,%edx
int $0x80
#buffer contains 1 byte of file
cmpb $0,buffer
je program_exit
pushl buffer
call convert #will return converted to %al
addl $4,%esp
movb %al,buffer
#write 1 byte from buffer to file
movl $1,%edx
movl $buffer,%ecx
movl -8(%ebp),%ebx
movl $4,%eax
int $0x80
jmp loop
program_exit:
movl buffer,%ebx
movl $1,%eax
int $0x80
.type convert,#function
convert:
pushl %ebp
movl %esp,%ebp
movb 8(%ebp),%al #1 byte data in the buffer
cmpb $'a',%al
jl convert_end
cmpb $'z',%al
jg convert_end
addb $32,%al #convert to upper
convert_end:
movl %ebp,%esp
popl %ebp
ret
Note that read(2) signals an end-of-file condition by returning 0. You're trying to find the end of the file by looking for an ascii NUL, which is very rare on Unix-derived systems. (If you want an easy way to make a terabyte-sized file, dd if=/dev/zero of=/tmp/huge bs=1048576 seek=1048576 count=1. The whole thing will be filled with ascii NUL characters. (Or the integer 0, however you want to interpret it.)
You need to modify your code to find the end of file by comparing the return value of the read(2) system call, not by looking at the data you receive into your buffer.

Resources