Concatenate two string in asm x86-64 intel syntax - string

I have tried to concatenate two strings but I keep getting segmentation faults, somebody could help me on what might be wrong with my code? Thanks!
To test it i just create to strings with dynamically allocated memory, this is a class exercise so I must free memory from the 2 parameters my fucntion gets
my code:
;int32_t strCmp(char* a, char* b)
strConcat:
push rbp ;stack frame
mov rbp, rsp
push r12 ; stringA
push r13 ; stringB
push r14 ; puntero inicio ; stringA
push r15 ; puntero inicio ; stringB
mov r12, rdi
mov r13, rsi
mov r14, r12
mov r15, r13
xor rcx, rcx ;clean rcx to do len(stringA)+leng(stringB)+1
call strLen ; len stringA
add rcx, rax
mov rdi, r13
call strLen ; len stringB
add rcx, rax
mov rdi, rcx
inc rdi
call malloc
; now rax has a pointer to new concatenated string space
.cicloA: ;loop to iterate over 1st string
cmp byte [r12], 0
jz .cicloB
xor rdx, rdx
mov dl, byte [r12]
mov byte [rax], dl
inc r12
inc rax
jmp .cicloA
.cicloB: ;loop to iterate over 2th string
cmp byte [r13], 0
jz .fin
xor rdx, rdx
mov dl, byte [r13]
mov byte [rax], dl
inc r13
inc rax
jmp .cicloB
.fin:
;add /0
mov byte [rax], 0
;release memory
mov rdi, r14
call free
mov rdi, r15
call free
pop r15
pop r14
pop r13
pop r12
pop rbp
ret
And my strLen function is
;uint32_t strLen(char* a)
strLen:
push rbp ;armo el stack frame
mov rbp, rsp
xor rax,rax
.avanzar: ;loop para ver si llegue al fin de un string
cmp byte [rdi], 0
je .fin
inc rdi
inc rax
jmp .avanzar
.fin:
pop rbp
ret
Valgrind Error:
==18885== Invalid read of size 1
==18885== at 0x400E4A: ??? (lib.asm:79)
==18885== by 0x400C49: test_strConcat (main.c:79)
==18885== by 0x400D28: main (main.c:109)
==18885== Address 0x2 is not stack'd, malloc'd or (recently) free'd

In the end, I reworte the fuction strConcat from scratch and now it works.
strConcat:
push rbp ;armo el stack frame
mov rbp, rsp
push rbx
push r12 ; stringA
push r13 ; stringB
sub rsp, 0X08
mov r12, rdi
mov r13, rsi
call strLen ;loongitud del primer string
mov ebx, eax
mov rdi, r13
call strLen ;longitud del segundo string;
add ebx, eax
mov edi, ebx
add edi, 1
call malloc
; ahora rax tiene el puntero a nuevo string para concatenar
;limpio contadores
xor r8, r8
xor r9, r9
.cicloA: ;loop para colocar stringA en nuevo string
cmp byte [r12 + r8], 0
jz .cicloB
mov dl, byte [r12 + r8]
mov byte [rax + r8], dl
inc r8
jmp .cicloA
.cicloB: ;loop para colocar stringB en nuevo string
cmp byte [r13 + r9], 0
jz .borrar
mov dl, byte [r13 + r9]
mov byte [rax + r8], dl
inc r8
inc r9
jmp .cicloB
.borrar:
;agrego el cero al final
mov byte [rax + r8], 0
mov rbx, rax
;reviso si son el mismo puntero
cmp r12, r13
je .igualitos
;libero memoria
mov rdi, r12
call free
mov rdi, r13
call free
jmp .final
.igualitos:
mov rdi, r12
call free
.final:
add rsp, 0x08
mov rax, rbx
pop r13
pop r12
pop rbx
pop rbp
ret

Related

Getting segmentation fault or bus error when moved some code from place before function ret after place just after function call in Assembly [duplicate]

This question already has answers here:
Why does the x86-64 / AMD64 System V ABI mandate a 16 byte stack alignment?
(1 answer)
glibc scanf Segmentation faults when called from a function that doesn't align RSP
(1 answer)
Calling printf in x86_64 using GNU assembler
(2 answers)
Closed 1 year ago.
Welcome.
I have following code:
%define ESC 0x1b
section .text
global _start
_start:
.drawer:
call getdata
mov rdi, 1
call sleep
jmp .drawer
mov rdi, 0
mov rax, 60 ;; sys_exit
syscall ;; exit
sleep:
mov [timespec.tv_sec], rdi
xor rdi, rdi
mov [timespec.tv_usec], rdi
mov rdi, timespec
xor rsi, rsi
mov rax, 35 ;; sys_nanosleep
syscall ;; nanosleep
ret
getuptime:
mov rdx, 16
mov rsi, data.uptimebuf
mov rdi, filename.uptime
call readfile
mov rdi, rsi
mov rsi, 0x2e ;; ASCII '.'
call strclen
mov rsi, rax
call stoin
mov qword [data.uptime], rax
mov rdi, qword [data.uptime]
call printi
ret
getdata:
call getuptime
ret
section .data
data:
.uptime dq 0
.uptimebuf times 16 db 0
timespec:
.tv_sec dq 0
.tv_usec dq 0
section .rodata
ansi:
.clear db ESC, "[2J", ESC, "[H"
.clearlen equ $ - .clear
filename:
.uptime db "/proc/uptime"
;; Content under this line is included on beginning like this:
;; %include "libasm.asm"
;; under line
;; %define ESC 0x1b
section .text
strlen: ;; string: rdi
push rbx
mov rbx, rdi
.again:
cmp byte [rbx], 0
jz .done
inc rbx
jmp .again
.done:
sub rbx, rdi
mov rax, rbx
pop rbx
ret
strclen: ;; string: rdi, char: rsi
push rbx
mov rbx, rsi
mov al, bl
mov rbx, rdi
.again:
cmp byte [rbx], 0
jz .done
cmp byte [rbx], al
je .done
inc rbx
jmp .again
.done:
sub rbx, rdi
mov rax, rbx
pop rbx
ret
printn: ;; string: rdi, len: rsi
push rdx
push rsi
push rdi
mov rdx, rsi
mov rsi, rdi
mov rdi, 1
mov rax, 1 ;; sys_write
syscall ;; write(unsigned int fd, const char *buf, size_t count)
pop rdi
pop rsi
pop rdx
ret
stoin: ;; string: rdi, len: rsi
push rdx
push rcx
push rbx
mov rdx, rdi
xor rcx, rcx
xor rax, rax
.again:
mov rbx, 10
cmp rcx, rsi
jge .done
push rdx
mul rbx
pop rdx
xor rbx, rbx
mov bl, byte [rdx]
sub bl, 0x30 ;; ASCII '0'
add rax, rbx
inc rdx
inc rcx
jmp .again
.done:
pop rbx
pop rcx
pop rdx
ret
readfile: ;; filename: rdi, buffer: rsi, length: rdx
push rsi
xor rsi, rsi
mov rax, 2 ;; sys_open
syscall ;; open(char *filename, int flags, int mode)
pop rsi
push rdi
mov rdi, rax
mov rax, 0 ;; sys_read
syscall ;; read(unsigned int fd, const char *buf, size_t count)
mov rax, 3 ;; sys_close
syscall ;; close(unsigned int fd);
pop rdi
ret
printi: ;; long: rdi
extern printf
section .data
fmt db "%ld", 10, 0
section .text
push rsi
mov rsi, rdi
mov rdi, fmt
call printf
pop rsi
ret
If it stays like that, everyething works properly, but I want to move last 2 instructions before ret in getuptime function to place just after calling getuptime, just like that:
getuptime:
mov rdx, 16
mov rsi, data.uptimebuf
mov rdi, filename.uptime
call readfile
mov rdi, rsi
mov rsi, 0x2e ;; ASCII '.'
call strclen
mov rsi, rax
call stoin
mov qword [data.uptime], rax
ret
getdata:
call getuptime
mov rdi, qword [data.uptime]
call printi
ret
but when I'll do it, i am getting Segmentation Fault or Bus Error. Why?

Itoa assembly implementation, div operation causes segfault? [duplicate]

This question already has answers here:
8086 assembly on DOSBox: Bug with idiv instruction?
(1 answer)
Why should EDX be 0 before using the DIV instruction?
(2 answers)
Closed 4 years ago.
So I'm trying to implement itoa, which converts an int into a string.
So far, the implementation is working if I don't loop in the .loop section, and stick to small numbers. As soon as it loops, my program segfaults.
Here is the code:
section .data
buffer times 11 db 0
section .text
global ft_itoa
extern ft_strrevd
extern malloc
ft_itoa:
mov rcx, 1 ;initialize our counter at 1 for the terminating null byte
mov rax, rdi ;move number in RAX for DIV instruction
push rbx ;save RBX
mov bl, 10
.check_negative:
and edi, 0xf0000000
mov rdi, buffer
jz .loop ;number is positive, proceed to main loop
not rax ;else
inc rax ;compute absolute value with binary complement
mov r9, 1 ;set neg flag
.loop:
cmp rax, 0
jz .check_neg_flag
div bl
add ah, 48 ;convert int to char
mov byte[rdi + rcx - 1], ah ;copy char in buffer
sub ah, 48
inc rcx
jmp .loop ;commenting this line prevents crash
.check_neg_flag:
cmp r9, 1
jne .dup
mov byte[rdi + rcx - 1], '-'
inc rcx
.dup:
mov byte[rdi + rcx - 1], 0
call ft_strrevd ;copy buffer string in memory and return pointer
.end:
pop rbx ;restore RBX
ret
It's most likely caused by the div, but I'm having trouble understanding how it works.
If anyone could point me towards a solution it'd be highly appreciated.
So in order to fix this I have to use div ebx instead of div bl, and xor edx, edx before each div.
Here is a working version:
section .data
buffer times 11 db 0
nega db "neg",0
posi db "pos",0
section .text
global ft_itoa
extern ft_strdup
extern malloc
ft_itoa:
xor rcx, rcx ;initialize counter
xor r9, r9 ;set neg flag to 0
mov eax, edi ;move number in RAX for DIV instruction
push rbx ;save RBX
mov ebx, 10
.check_negative:
and edi, 0x80000000
mov rdi, buffer
jz .divide ;number is positive, proceed to main loop
not eax ;else
inc eax ;compute absolute value with binary complement
inc r9 ;set neg flag
.divide:
xor edx, edx
div ebx
add edx, 48 ;convert int to char
push rdx
inc rcx
cmp eax, 0
jnz .divide
.check_neg_flag:
cmp r9, 1
jne .buff_string
mov byte[rdi], '-'
.buff_string:
pop rdx
mov byte[rdi + r9], dl
dec rcx
inc r9
cmp rcx, 0
jnz .buff_string
.dup:
mov byte[rdi + r9], 0
call ft_strdup ;copy buffer string in memory and return pointer
pop rbx ;restore RBX
ret

Wrong result after multiple read

I was wondering i someone could help me with my code, i want to read more than one time from the stdin in x86 but when i read the second time, it is ignoring the number and just add the 50. I want it to work so that i can call it as many times as i need to go adding the register EAX by an arbitrary number (which i will load from variables A,B,etc later on)
section .text
global _start
_start:
call leer
call atoi
add eax,10
call itoa
call imprimir
call limpiar
call leer
call atoi
add eax,50
call itoa
call imprimir
jmp salir
;-----------ATOI
atoi:
mov esi,Buffer ;move buffer address
mov eax,0 ;where im going to keep result
mov ebx,0 ;where i put char
.atoi_start:
mov bl, byte[esi] ;get the char
je .end_atoi
cmp bl, '0' ;check if null
jb .end_atoi
cmp bl,'9'
ja .end_atoi
imul eax,10 ;multiplico resultado por 10
sub bl,0x30 ;ascii->int
add eax,ebx ;agegue el nuevo digito
inc esi ;getting ready for next char
jmp .atoi_start
.end_atoi:
ret ;at this point i have int representation in eax
;-----------ITOA
itoa:
mov ebx, eax ;mueve el numero en eax a ebx
mov esi, Buffer
mov ebx,10
add esi,10
.itoa2:
xor edx,edx
div ebx
add dl,'0' ;lo convierte en char
mov [esi],dl
dec esi
test eax,eax
jnz .itoa2
jz .doneItoa
.doneItoa:
ret
;----------------------LIMPIAR
limpiar:
;limpia buffer usando eax
xor eax,eax;
xor edx,edx
xor ecx,ecx
mov ecx, 1100 ;tamano de veces que voy a limpiar
lea edx, [Buffer] ;direccion inicial del buffer
.loop:
mov [edx+ecx],eax ;voy a limpiar de atras para adelante
dec ecx ;decremento contador y verifico si es 0 para seguir limiando
jnz .loop
ret
imprimir:
mov eax,4
mov ebx,1
mov ecx,Buffer
mov edx,1100 ;tamano
int 0x80 ;syscall
;ahora imprimo nueva linea
mov eax,4
mov ebx,1
mov ecx,nuevaLinea
mov edx,1
int 0x80
ret
leer:
;vamos a poner el msg en pantalla
mov eax,4 ;sys_write
mov ebx,1 ;stdout
mov ecx,msg ;paso el mensaje
mov edx,lenMsg ;paso el largo del mensaje
int 80h
;ahora leemos
mov eax,3 ;sys_read
mov ebx,2 ;stdin
mov ecx,Buffer
mov edx,1100
int 80h
ret
salir:
mov rax,60 ;sys_exit
mov rdi,0 ;codigo de salida
syscall
section .data
msg db 'Ingrese un numero: '
lenMsg equ $-msg
nuevaLinea db 10;nueva linea
A dq 0
section .bss
Buffer: resb 1100
B: resb 1100
In atoi, the fifth instruction
je .end_atoi
tests the Z flag which has not been set in this function. So it is jumping based on the value of Z from whatever happened before atoi is called, which is the read system call in leer.
Either put
test bl, bl
before that je instruction or just remove the je, since the end of string will be caught by the cmp '0' that follows.
But that's not a complete fix: The read system call doesn't null terminate the input. Leer should use the number of bytes read (returned by the read system call) to place a null byte in the buffer after the input.

How to compare the count of command line arguments correctly in NASM?

I am learning x86_64 NASM assembly on Ubuntu 16.10 on Docker for Mac.
The following program takes two command line arguments, and sum these.
If number of command line arguments is not two, print error message (jump to argcError).
When I exec this program, it jump to argcError section despite passed to two command line arguments.
Why this program jump to argError?
section .data
SYS_WRITE equ 1
STD_IN equ 1
SYS_EXIT equ 60
EXIT_CODE equ 0
NEW_LINE db 0xa
WRONG_ARGC db "Must be two command line arguments", 0xa
section .text
global _start
_start:
pop rcx
cmp rcx, 3
jne argcError
add rsp, 8
pop rsi
call str_to_int
mov r10, rax
pop rsi
call str_to_int
mov r11, rax
add r10, r11
argcError:
mov rax, 1
mov rdi, 1
mov rsi, WRONG_ARGC
mov rdx, 35
syscall
jmp exit
str_to_int:
xor rax, rax
mov rcx, 10
next:
cmp [rsi], byte 0
je return_str
mov bl, [rsi]
sub bl, 48
mul rcx ; rax = rax * rcx
add rax, rbx
inc rsi
jmp next
return_str:
ret
int_to_str:
mov rdx, 0
mov rbx, 10
div rbx
add rdx, 48
add rdx, 0x0
push rdx
inc r12
cmp rax, 0x0
jne int_to_str
jmp print
print:
; calculate byte length of number string
mov rax, 1
mul r12
mov r12, 8
mul r12
mov rdx, rax
; print sum
mov rax, SYS_WRITE
mov rdi, STD_IN
mov rsi, rsp
syscall
jmp printNewline
printNewline:
mov rax, SYS_WRITE
mov rdi, STD_IN
mov rsi, NEW_LINE
mov rdx, 1
syscall
jmp exit
exit:
mov rax, SYS_EXIT
mov rdi, EXIT_CODE
syscall
There probably other errors in your code as pointed out by Micheal Petch, but the way you've initialized RSI is incorrect. Yes, ESP does point to the number of arguments passed, but popping it off the stack and then adding 8 to ESP again is functionally equivalent too.
mov rcx, [rsp]
Then by popping into RSI it only becomes a copy of RCX. If you want to do that it should look like this
pop rcx
.......
add rsp, 24 ; Now RSP is pointing to proper place in array of pointers
pop rsi
add rsp, 16 ; Now point to pointer to second argument
pop rsi
An alternative would be this next example only because my personal preference is not to use stack pointer for other than that which it was intended.
mov rsi, rsp
lodsq ; Read # of arguments passed by OS
add rsi, 8 ; bounce over application name
cmp al, 3
jnz argError
push rsi
lodsq
mov rsi, rax ; RSI points to first agument
call Convert
pop rsi
lodsq
mov rsi, rax
call Convert

Search for and replace characters in a string in assembly nasm issues

I've got this working to where it copies a string into another. I'm trying to make it search for a term and swap it. For some reason, if the replace function isn't commented, it somehow manages to delete the output in the console (literally goes backwards!). If I comment the replace function out, I just get an exact copy. Trying to change cat to dog.
bits 64
global main
extern printf
section .text
main:
; function setup
push rbp
mov rbp, rsp
sub rsp, 32
;
lea rdi, [rel message]
mov al, 0
call printf
;print source message
lea rdi, [rel source]
mov al, 0
call printf
;print target message
lea rdi, [rel target]
mov al, 0
call printf
lea rdi, [rel target]
lea rsi, [rel source]
cld
jmp Loop
Loop:
lodsb ;Load byte at address RSI into AL
stosb ;Store AL at address RDI
;push [rdi]
cmp byte RDI, 'c'
je replace
;pop [rdi]
test al,al ;code will jump only if al is not equ 0
jnz Loop
replace:
;lea rdi, [rel success]
mov byte [rdi], 'd'
;call printf
ret
;print new version of target
lea rdi, [rel target]
mov al, 0
call printf
; function return
mov eax, 0
add rsp, 32
pop rbp
ret
section .data
message: db 'Project:',0x0D,0x0a,'Author:',0x0D,0x0a,0x0D,0x0a,0
source: db "The cat chased the bird.",0x0a,0x0D,0
target: db '0000000000000000000000000000000000000000000',0x0D,0x0a,0
success: db "Success",0
This is what you want. I tested it in Ubuntu 64 with:
(assumed this file is a.asm)
nasm -f elf64 -l a.lst a.asm &
gcc -m64 -o a a.o
bits 64
global main
extern printf
section .text
main:
; function setup
push rbp
mov rbp, rsp
sub rsp, 32
;
lea rdi, [rel message]
mov al, 0
call printf
;print source message
lea rdi, [rel source]
mov al, 0
call printf
;print target message
lea rdi, [rel target]
mov al, 0
call printf
lea rdi, [rel target]
lea rsi, [rel source]
cld
Loop:
lodsb ;Load byte at address RSI into AL
stosb ;Store AL at address RDI
cmp al, 'c'
jne LoopBack
lodsb ;Load byte at address RSI into AL
stosb ;Store AL at address RDI
cmp al, 'a'
jne LoopBack
lodsb ;Load byte at address RSI into AL
stosb ;Store AL at address RDI
cmp al, 't'
jne LoopBack
sub rdi, 3
mov byte [rdi], 'd'
inc rdi
mov byte [rdi], 'o'
inc rdi
mov byte [rdi], 'g'
inc rdi
LoopBack:
cmp al, 0
jne Loop
;print new version of target
lea rdi, [rel target]
mov al, 0
call printf
; function return
mov eax, 0
add rsp, 32
pop rbp
ret
section .data
message: db 'Project:',0x0D,0x0a,'Author:',0x0D,0x0a,0x0D,0x0a,0
source: db "The cat chased the bird.",0x0a,0x0D,0
target: db '0000000000000000000000000000000000000000000',0x0D,0x0a,0
success: db "Success",0
The output is this:
Project:
Author:
The cat chased the bird.
0000000000000000000000000000000000000000000
The dog chased the bird.

Resources