String reverse function x86 NASM assembly - string

I'm trying to write a function that reverses order of characters in a string using x86 NASM assembly language. I tried doing it using registers (I know it's more efficient to do it using stack) but I keep getting a segmentation fault, the c declaration looks as following
extern char* reverse(char*);
The assembly segment:
section .text
global reverse
reverse:
push ebp ; prologue
mov ebp, esp
mov eax, [ebp+8] ; eax <- points to string
mov edx, eax
look_for_last:
mov ch, [edx] ; put char from edx in ch
inc edx
test ch, ch
jnz look_for_last ; if char != 0 loop
sub edx, 2 ; found last
swap: ; eax = first, edx = last (characters in string)
test eax, edx
jg end ; if eax > edx reverse is done
mov cl, [eax] ; put char from eax in cl
mov ch, [edx] ; put char from edx in ch
mov [edx], cl ; put cl in edx
mov [eax], ch ; put ch in eax
inc eax
dec edx
jmp swap
end:
mov eax, [ebp+8] ; move char pointer to eax (func return)
pop ebp ; epilogue
ret
It seems like the line causing the segmentation fault is
mov cl, [eax]
Why is that happening? In my understanding eax never goes beyond the bounds of the string so there always is something in [eax]. How come I get a segmentation fault?

Ok I figured it out, I mistakenly used test eax, edx instead of which I should have used cmp eax, edx. It works now.

Related

String Reverse in FASM x86 architecture

I am making a program that reverses a given string from the user.
The problem that has appeared is that the program works well if the string is 5 bytes long but if the string is lower then the result doesn't appear when I execute it. The other problem is that if the string is more than 5 bytes long it reverses only the first five bytes.
Please keep in mind that I am new to assembly and this question may be basic but I would be grateful is someone tells me where the problem is.
Thank you to everyone, have a great day :)
P.S The file "training. inc" is a file that has "print_str, read_line" methods implemented.
entry start
include "win32a.inc"
MAX_USER_STR = 5h
section '.data' data readable writeable
enter_string db "Enter a string : ", 0
newline db 13,10,0
user_str db MAX_USER_STR dup(?), 0
section ".text" code readable executable
start:
mov esi, enter_string
call print_str
mov edi, user_str
call read_line
call str_len
mov edx, MAX_USER_STR
mov ebx, 0
mov ecx, 0
mov esi, user_str
call print_str
mov esi, newline
call print_str
mov esi, user_str
for_loop :
push eax
mov al, byte[esi]
inc esi
inc ebx
call print_eax
cmp edx, ebx
jb clear_register
jmp for_loop
for_loop2 :
call print_eax
mov byte[esi], al
inc esi
inc ecx
pop eax
cmp ecx, edx
ja break_loop
jmp for_loop2
break_loop:
;mov edi, 0
mov esi, user_str
call print_str
push 0
call [ExitProcess]
clear_register :
mov esi, user_str
jmp for_loop2
str_len :
push ecx
sub ecx, ecx
mov ecx, -1
sub al, al
cld
repne scasb
neg ecx
sub ecx, 1
mov eax, ecx
pop ecx
ret
include 'training.inc'
MAX_USER_STR = 5h
The name MAX_ already says it, but a buffer is to be defined according to the worst case scenario. If you want to be able to deal with strings that could be longer than 5 characters, then raise this value.
MAX_USER_STR = 256 ; A decent buffer
... if the string is lower then the result doesn't appear when I execute it.
The other problem is that if the string is more than 5 bytes long it reverses only the first five bytes.
That's because your code does not actually use the length of the string but rather the size of the smaller buffer. I hope you see that this should never happen, overflowing the buffer. Your code didn't complain too much since this buffer was the last item in the data section.
Your loops could use the true length if you write:
call str_len ; -> EAX
mov edx, eax
for_loop :
push eax
mov al, byte[esi]
If it's characters that you want to push, then I would expect the push eax to follow the load from the string!
Note that in a string-reversal, you never want to move the string terminator(s) to the front of the string.
This is your basic string reversal via the stack:
mov ecx, edx ; EDX has StrLen
mov esi, user_str
loop1:
movzx eax, byte [esi]
inc esi
push eax
dec ecx
jnz loop1
mov esi, user_str
loop2:
pop eax
mov [esi], al
inc esi
dec edx
jnz loop2

Swapping first and last characters of string results in seg fault

My goal is to swap the first character with the last character of the string some_str in x86-assembly.
Here is my attempt:
; assemble and link with:
; nasm -f elf32 -g test.asm && ld -melf_i386 test.asm.o -o test
section .text
global _start
extern printf
_start:
mov eax, some_str
_loop:
mov di, [eax + 4] ; ptr to end char
mov si, [eax] ; ptr to start char
mov dl, [di] ; DL = end char
mov al, [si] ; AL = start char
mov [si], dl ; start char = end char
mov [di], al ; end char = char 1
mov edx, len
mov ecx, eax
mov ebx, 1
mov eax, 4
int 0x80
ret
mov eax, 1
int 0x80
section .data
some_str db `abcd`, 0xa
len equ $ - some_str
For some reason I am oblivious to the lines:
mov dl, [di] ; DL = end char
mov al, [si] ; AL = start char
Causes the program to result in a segmentation fault.
The expected stdout is:
dbca
Actual stdout:
Segmentation fault (core dumped)`
Is there something I am missing? How do I correct this code to correctly swap the first and last character of some_str.
Your code seems to be doing something much more complicated than necessary. After mov eax, some_str, we have that eax points to one of the bytes that wants to be swapped, and eax+4 points to the other. So just load them into two 8-bit registers and then store them back the other way around.
mov eax, some_str
mov cl, [eax]
mov dl, [eax + 4]
mov [eax + 4], cl
mov [eax], dl
And you're done and can proceed to write out the result.
Note it isn't necessary to load the pointer into eax first; you could also do
mov cl, [some_str]
mov dl, [some_str + 4]
mov [some_str + 4], cl
mov [some_str], dl
If you really wanted to have two different registers to point to the two different bytes: first of all, they need to be 32-bit registers. Trying to address memory in 32-bit mode using 16-bit registers si, di is practically never going to work. Second, mov edi, [eax] would load edi with the contents of the memory at location eax, which is some bytes of your string, not a pointer. You'd want simply mov edi, eax. For the second one, you can use lea to do the arithmetic of an effective address calculation but keep the resulting pointer instead of doing a load. So I think the way to turn your code into something in the original (inefficient) spirit, but correct, would be
mov edi, eax
lea esi, [eax+4]
mov dl, [edi]
mov al, [esi]
mov [esi], dl
mov [edi], al

write number to file using NASM

How do I write a variable to a file using NASM?
For example, if I execute some mathematical operation - how do I write the result of the operation to write a file?
My file results have remained empty.
My code:
%include "io.inc"
section .bss
result db 2
section .data
filename db "Downloads/output.txt", 0
section .text
global CMAIN
CMAIN:
mov eax,5
add eax,17
mov [result],eax
PRINT_DEC 2,[result]
jmp write
write:
mov EAX, 8
mov EBX, filename
mov ECX, 0700
int 0x80
mov EBX, EAX
mov EAX, 4
mov ECX, [result]
int 0x80
mov EAX, 6
int 0x80
mov eax, 1
int 0x80
jmp exit
exit:
xor eax, eax
ret
You have to implement ito (integer to ascii) subsequently len for this manner. This code tested and works properly in Ubuntu.
section .bss
answer resb 64
section .data
filename db "./output.txt", 0
section .text
global main
main:
mov eax,5
add eax,44412
push eax ; Push the new calculated number onto the stack
call itoa
mov EAX, 8
mov EBX, filename
mov ECX, 0x0700
int 0x80
push answer
call len
mov EBX, EAX
mov EAX, 4
mov ECX, answer
movzx EDX, di ; move with extended zero edi. length of the string
int 0x80
mov EAX, 6
int 0x80
mov eax, 1
int 0x80
jmp exit
exit:
xor eax, eax
ret
itoa:
; Recursive function. This is going to convert the integer to the character.
push ebp ; Setup a new stack frame
mov ebp, esp
push eax ; Save the registers
push ebx
push ecx
push edx
mov eax, [ebp + 8] ; eax is going to contain the integer
mov ebx, dword 10 ; This is our "stop" value as well as our value to divide with
mov ecx, answer ; Put a pointer to answer into ecx
push ebx ; Push ebx on the field for our "stop" value
itoa_loop:
cmp eax, ebx ; Compare eax, and ebx
jl itoa_unroll ; Jump if eax is less than ebx (which is 10)
xor edx, edx ; Clear edx
div ebx ; Divide by ebx (10)
push edx ; Push the remainder onto the stack
jmp itoa_loop ; Jump back to the top of the loop
itoa_unroll:
add al, 0x30 ; Add 0x30 to the bottom part of eax to make it an ASCII char
mov [ecx], byte al ; Move the ASCII char into the memory references by ecx
inc ecx ; Increment ecx
pop eax ; Pop the next variable from the stack
cmp eax, ebx ; Compare if eax is ebx
jne itoa_unroll ; If they are not equal, we jump back to the unroll loop
; else we are done, and we execute the next few commands
mov [ecx], byte 0xa ; Add a newline character to the end of the character array
inc ecx ; Increment ecx
mov [ecx], byte 0 ; Add a null byte to ecx, so that when we pass it to our
; len function it will properly give us a length
pop edx ; Restore registers
pop ecx
pop ebx
pop eax
mov esp, ebp
pop ebp
ret
len:
; Returns the length of a string. The string has to be null terminated. Otherwise this function
; will fail miserably.
; Upon return. edi will contain the length of the string.
push ebp ; Save the previous stack pointer. We restore it on return
mov ebp, esp ; We setup a new stack frame
push eax ; Save registers we are going to use. edi returns the length of the string
push ecx
mov ecx, [ebp + 8] ; Move the pointer to eax; we want an offset of one, to jump over the return address
mov edi, 0 ; Set the counter to 0. We are going to increment this each loop
len_loop: ; Just a quick label to jump to
movzx eax, byte [ecx + edi] ; Move the character to eax.
movsx eax, al ; Move al to eax. al is part of eax.
inc di ; Increase di.
cmp eax, 0 ; Compare eax to 0.
jnz len_loop ; If it is not zero, we jump back to len_loop and repeat.
dec di ; Remove one from the count
pop ecx ; Restore registers
pop eax
mov esp, ebp ; Set esp back to what ebp used to be.
pop ebp ; Restore the stack frame
ret ; Return to caller

Printing an Int (or Int to String)

I am looking for a way to print an integer in assembler (the compiler I am using is NASM on Linux), however, after doing some research, I have not been able to find a truly viable solution. I was able to find a description for a basic algorithm to serve this purpose, and based on that I developed this code:
global _start
section .bss
digit: resb 16
count: resb 16
i: resb 16
section .data
section .text
_start:
mov dword[i], 108eh ; i = 4238
mov dword[count], 1
L01:
mov eax, dword[i]
cdq
mov ecx, 0Ah
div ecx
mov dword[digit], edx
add dword[digit], 30h ; add 48 to digit to make it an ASCII char
call write_digit
inc dword[count]
mov eax, dword[i]
cdq
mov ecx, 0Ah
div ecx
mov dword[i], eax
cmp dword[i], 0Ah
jg L01
add dword[i], 48 ; add 48 to i to make it an ASCII char
mov eax, 4 ; system call #4 = sys_write
mov ebx, 1 ; file descriptor 1 = stdout
mov ecx, i ; store *address* of i into ecx
mov edx, 16 ; byte size of 16
int 80h
jmp exit
exit:
mov eax, 01h ; exit()
xor ebx, ebx ; errno
int 80h
write_digit:
mov eax, 4 ; system call #4 = sys_write
mov ebx, 1 ; file descriptor 1 = stdout
mov ecx, digit ; store *address* of digit into ecx
mov edx, 16 ; byte size of 16
int 80h
ret
C# version of what I want to achieve (for clarity):
static string int2string(int i)
{
Stack<char> stack = new Stack<char>();
string s = "";
do
{
stack.Push((char)((i % 10) + 48));
i = i / 10;
} while (i > 10);
stack.Push((char)(i + 48));
foreach (char c in stack)
{
s += c;
}
return s;
}
The issue is that it outputs the characters in reverse, so for 4238, the output is 8324. At first, I thought that I could use the x86 stack to solve this problem, push the digits in, and pop them out and print them at the end, however when I tried implementing that feature, it flopped and I could no longer get an output.
As a result, I am a little bit perplexed about how I can implement a stack in to this algorithm in order to accomplish my goal, aka printing an integer. I would also be interested in a simpler/better solution if one is available (as it's one of my first assembler programs).
One approach is to use recursion. In this case you divide the number by 10 (getting a quotient and a remainder) and then call yourself with the quotient as the number to display; and then display the digit corresponding to the remainder.
An example of this would be:
;Input
; eax = number to display
section .data
const10: dd 10
section .text
printNumber:
push eax
push edx
xor edx,edx ;edx:eax = number
div dword [const10] ;eax = quotient, edx = remainder
test eax,eax ;Is quotient zero?
je .l1 ; yes, don't display it
call printNumber ;Display the quotient
.l1:
lea eax,[edx+'0']
call printCharacter ;Display the remainder
pop edx
pop eax
ret
Another approach is to avoid recursion by changing the divisor. An example of this would be:
;Input
; eax = number to display
section .data
divisorTable:
dd 1000000000
dd 100000000
dd 10000000
dd 1000000
dd 100000
dd 10000
dd 1000
dd 100
dd 10
dd 1
dd 0
section .text
printNumber:
push eax
push ebx
push edx
mov ebx,divisorTable
.nextDigit:
xor edx,edx ;edx:eax = number
div dword [ebx] ;eax = quotient, edx = remainder
add eax,'0'
call printCharacter ;Display the quotient
mov eax,edx ;eax = remainder
add ebx,4 ;ebx = address of next divisor
cmp dword [ebx],0 ;Have all divisors been done?
jne .nextDigit
pop edx
pop ebx
pop eax
ret
This example doesn't suppress leading zeros, but that would be easy to add.
I think that maybe implementing a stack is not the best way to do this (and I really think you could figure out how to do that, saying as how pop is just a mov and a decrement of sp, so you can really set up a stack anywhere you like by just allocating memory for it and setting one of your registers as your new 'stack pointer').
I think this code could be made clearer and more modular if you actually allocated memory for a c-style null delimited string, then create a function to convert the int to string, by the same algorithm you use, then pass the result to another function capable of printing those strings. It will avoid some of the spaghetti code syndrome you are suffering from, and fix your problem to boot. If you want me to demonstrate, just ask, but if you wrote the thing above, I think you can figure out how with the more split up process.
; Input
; EAX = pointer to the int to convert
; EDI = address of the result
; Output:
; None
int_to_string:
xor ebx, ebx ; clear the ebx, I will use as counter for stack pushes
.push_chars:
xor edx, edx ; clear edx
mov ecx, 10 ; ecx is divisor, devide by 10
div ecx ; devide edx by ecx, result in eax remainder in edx
add edx, 0x30 ; add 0x30 to edx convert int => ascii
push edx ; push result to stack
inc ebx ; increment my stack push counter
test eax, eax ; is eax 0?
jnz .push_chars ; if eax not 0 repeat
.pop_chars:
pop eax ; pop result from stack into eax
stosb ; store contents of eax in at the address of num which is in EDI
dec ebx ; decrement my stack push counter
cmp ebx, 0 ; check if stack push counter is 0
jg .pop_chars ; not 0 repeat
mov eax, 0x0a
stosb ; add line feed
ret ; return to main
; eax = number to stringify/output
; edi = location of buffer
intToString:
push edx
push ecx
push edi
push ebp
mov ebp, esp
mov ecx, 10
.pushDigits:
xor edx, edx ; zero-extend eax
div ecx ; divide by 10; now edx = next digit
add edx, 30h ; decimal value + 30h => ascii digit
push edx ; push the whole dword, cause that's how x86 rolls
test eax, eax ; leading zeros suck
jnz .pushDigits
.popDigits:
pop eax
stosb ; don't write the whole dword, just the low byte
cmp esp, ebp ; if esp==ebp, we've popped all the digits
jne .popDigits
xor eax, eax ; add trailing nul
stosb
mov eax, edi
pop ebp
pop edi
pop ecx
pop edx
sub eax, edi ; return number of bytes written
ret

strlen in assembly

I made my own implementation of strlen in assembly, but it doesn't return the correct value. It returns the string length + 4. Consequently. I don't see why.. and I hope any of you do...
Assembly source:
section .text
[GLOBAL stringlen:] ; C function
stringlen:
push ebp
mov ebp, esp ; setup the stack frame
mov ecx, [ebp+8]
xor eax, eax ; loop counter
startLoop:
xor edx, edx
mov edx, [ecx+eax]
inc eax
cmp edx, 0x0 ; null byte
jne startLoop
end:
pop ebp
ret
And the main routine:
#include <stdio.h>
extern int stringlen(char *);
int main(void)
{
printf("%d", stringlen("h"));
return 0;
}
Thanks
You are not accessing bytes (characters), but doublewords. So your code is not looking for a single terminating zero, it is looking for 4 consecutive zeroes. Note that won't always return correct value +4, it depends on what the memory after your string contains.
To fix, you should use byte accesses, for example by changing edx to dl.
Thanks for your answers. Under here working code for anyone who has the same problem as me.
section .text
[GLOBAL stringlen:]
stringlen:
push ebp
mov ebp, esp
mov edx, [ebp+8] ; the string
xor eax, eax ; loop counter
jmp if
then:
inc eax
if:
mov cl, [edx+eax]
cmp cl, 0x0
jne then
end:
pop ebp
ret
Not sure about the four, but it seems obvious it will always return the proper length + 1, since eax is always increased, even if the first byte read from the string is zero.
Change the line
mov edx, [ecx+eax]
to
mov dl, byte [ecx+eax]
and
cmp edx, 0x0 ; null byte
to
cmp dl, 0x0 ; null byte
Because you have to compare only byte at a time.
Following is the code. Your original code got off-by-one error. For "h" it will return two h + null character.
section .text
[GLOBAL stringlen:] ; C function
stringlen:
push ebp
mov ebp, esp ; setup the stack frame
mov ecx, [ebp+8]
xor eax, eax ; loop counter
startLoop:
xor dx, dx
mov dl, byte [ecx+eax]
inc eax
cmp dl, 0x0 ; null byte
jne startLoop
end:
pop ebp
ret
More easy way here(ASCII zero terminated string only):
REPE SCAS m8
http://pdos.csail.mit.edu/6.828/2006/readings/i386/REP.htm
I think your inc should be after the jne. I'm not familiar with this assembly, so I don't really know.

Resources