replace a string in assembly - string

I wanna get a source string ,find a key in it and replace the key with a replace string so i copy the rest of source and the replace string in the result .
it outputs the correct prompt when the key doesnt exist in the source string : "The key does not appear in the string."
but when the source contains the key it stucks and doesnt continue running
(it looks sth in found label part have been missed and have an overflow)
can anyone help to correct the found part ?
any help will be appreciate :)
; program to search for one string embedded in another
; author: R. Detmer revised: 10/97
.386
.MODEL FLAT
ExitProcess PROTO NEAR32 stdcall, dwExitCode:DWORD
INCLUDE io.h
cr EQU 0dh ; carriage return character
Lf EQU 0ah ; linefeed character
.STACK 4096 ; reserve 4096-byte stack
.DATA
prompt1 BYTE "String to search? ", 0
prompt2 BYTE cr, Lf, "Key to search for? ", 0
prompt3 BYTE cr, Lf, "Word to replace? ", 0
source BYTE 100 DUP (?)
key BYTE 20 DUP (?)
replace BYTE 20 DUP (?)
srcLength DWORD ?
keyLength DWORD ?
repLength DWORD ?
BeginLength DWORD ?
restLength DWORD ?
cpyLength DWORD ?
lastPosn DWORD ?
restPosition DWORD ?
firstParam DWORD ?
secondParam DWORD ?
keyPosition DWORD ?
failure BYTE cr,Lf,Lf,"The key does not appear in the string.",cr,Lf,0
success BYTE cr,Lf,Lf, " The result string is : " ,cr,Lf,Lf
result BYTE 200 DUP (?)
PUBLIC _start ; make entry point public
.CODE
_start: output prompt1 ; ask for
input source,100 ; and input source string
lea eax, source ; find length of string
push eax ; length parameter
call strlen
mov srcLength,eax ; save length of source
output prompt2 ; ask for
input key,20 ; and input key string
lea eax, key ; find length of string
push eax ; length parameter
call strlen
mov keyLength,eax ; save length of key
output prompt3 ; ask for
input replace,20 ; and input replace string
lea eax, replace ; find length of string
push eax ; length parameter
call strlen
dec eax
mov repLength,eax ; save length of replace
; calculate last position of source to check
mov eax,srcLength
sub eax,keyLength
inc eax ; srcLength − keyLength + 1
mov lastPosn, eax
cld ; left to right comparison
mov eax,1 ; starting position
whilePosn: cmp eax,lastPosn ; position <= last_posn?
jnle endWhilePosn ; exit if past last position
lea esi,source ; address of source string
add esi,eax ; add position
dec esi ; address of position to check is incremented automatically
lea edi,key ; address of key
mov ecx,keyLength ; number of positions to check
repe cmpsb ; check
jz found ; exit on success
inc eax ; increment position
jmp whilePosn ; repeat
endWhilePosn:
output failure ; the search failed
jmp quit ; exit
;-------------------------------------------------------------
found:
mov keyPosition, eax ; position of key
mov ebx, eax ;copy start position of key
lea eax, source
sub ebx, eax ;position - source address
mov BeginLength, ebx ;begin Source length (before key)
add ebx, keyLength
mov eax, srcLength
sub eax, ebx
mov restLength, eax ;rest of Source length (after key)
mov eax, keyPosition
add eax, keyLength ; position + key
mov restPosition, eax
;source begin to result
lea eax, result
mov firstParam, eax ; destination address
lea eax, source
mov secondParam, eax
mov eax, BeginLength ; copy length
mov cpyLength, eax
mov esi,firstParam ;initial source address
mov edi,secondParam ;destination
mov ecx ,cpyLength
rep movsb ;copy bytes
;replace to result
mov eax, firstParam
add eax , BeginLength
mov firstParam, eax ; address of rest of result
lea eax, replace
mov secondParam, eax ; string to replace
mov eax, repLength ; copy length
mov cpyLength, eax
mov esi,firstParam ;initial source address
mov edi,secondParam ;destination
mov ecx ,cpyLength
rep movsb ;copy bytes
;Rest to result
mov eax, firstParam
add eax , repLength
mov firstParam, eax ; address of rest of result
mov eax, restPosition
mov secondParam, eax
mov eax, restLength
mov cpyLength, eax
mov esi,firstParam ;initial source address
mov edi,secondParam ;destination
mov ecx ,cpyLength
rep movsb ;copy bytes
mov BYTE PTR [edi],0 ;terminate destination string
output success
quit:
INVOKE ExitProcess, 0 ; exit with return code 0
;----------------------------------------------------------
strlen PROC NEAR32
; find length of string whose address is passed on stack
; length returned in EAX
push ebp ; establish stack frame
mov ebp, esp
pushf ; save flags
push ebx ; and EBX
sub eax, eax ; length := 0
mov ebx, [ebp+8] ; address of string
whileChar: cmp BYTE PTR [ebx], 0 ; null byte?
je endWhileChar ; exit if so
inc eax ; increment length
inc ebx ; point at next character
jmp whileChar ; repeat
endWhileChar:
pop ebx ; restore registers and flags
popf
pop ebp
ret 4 ; return, discarding parameter
strlen ENDP
END

found:
mov keyPosition, eax ; position of key
mov ebx, eax ;copy start position of key
lea eax, source
sub ebx, eax ;position - source address
mov BeginLength, ebx ;begin Source length (before key)
In these lines you have subtracted things that cannot be subtracted.
When you get at the label found, EAX has a 1-based relative position index that you copy to the EBX register. This value ranges from 1 to 100. Now you subtract the absolute address of your source buffer. This could be in the millions. That's clearly a mistake. It becomes disastrous when later on you use it as a loop counter and start corrupting memory!

success BYTE cr,Lf,Lf, " The result string is : " ,cr,Lf,Lf
You forgot to zero-terminate the success message.
It will disrupt your final macro call output success and so it would seem that the program didn't correctly replace the string.

Related

nasm zero byte omitted at the end of the string

I am studying Assembly language using this nasm tutorial. Here is the code that prints a string:
SECTION .data
msg db 'Hello!', 0Ah
SECTION .text
global _start
_start:
mov ebx, msg
mov eax, ebx
; calculate number of bytes in string
nextchar:
cmp byte [eax], 0
jz finished
inc eax
jmp nextchar
finished:
sub eax, ebx ; number of bytes in eax now
mov edx, eax ; number of bytes to write - one for each letter plus 0Ah (line feed character)
mov ecx, ebx ; move the memory address of our message string into ecx
mov ebx, 1 ; write to the STDOUT file
mov eax, 4 ; invoke sys_write (kernel opcode 4)
int 80h
mov ebx, 0 ; no errors
mov eax, 1 ; invoke sys_exit (kernel opcode 1)
int 80h
It works and successfully prints "Hello!\n" to STDOUT. One thing I don't understand: it searches for \0 byte in msg, but we didn't define it. Ideally, the correct message definition should be
msg db 'Hello!', 0Ah, 0h
How does it successfully get the zero byte at the end of the string?
The similar case is in exercise 7:
; String printing with line feed function
sprintLF:
call sprint
push eax ; push eax onto the stack to preserve it while we use the eax register in this function
mov eax, 0Ah ; move 0Ah into eax - 0Ah is the ascii character for a linefeed
push eax ; push the linefeed onto the stack so we can get the address
mov eax, esp ; move the address of the current stack pointer into eax for sprint
call sprint ; call our sprint function
pop eax ; remove our linefeed character from the stack
pop eax ; restore the original value of eax before our function was called
ret ; return to our program
It puts just 1 byte: 0Ah into eax without terminating 0h, but the string length is calculated correctly inside sprint. What is the cause?

subroutine not reading from stdin

code is as follows
getstr:
; get a LF terminated string from stdin
; in: EAX = dest buffer
; out: ax = bytes read
; EAX NOT preserved, all other registers preserved
;op mod opr1 opr2 comment
;--------------------------------------------------------
push ebx
push ecx
push edx
sub esp, 2 ; allocate memory
mov word [esp], 0x0000 ; zero memory
mov ecx, eax ; set the correct buffer
mov ebx, 0 ; stdin = 0
mov edx, 1 ; 1 byte reads
mov eax, 3 ; syscall read
.loop:
int 0x80 ; do read
test byte [ecx], 0xA
je .done
inc ecx
add word [esp], 1 ; increment the count
jmp .loop
.done:
mov byte [ecx],0x0
pop ax
pop edx
pop ecx
pop ebx
ret
gdb dump shows that 0 bytes were read
(gdb) info registers
eax 0x0 0
does anybody know what is going on here?
Two errors (assuming you use NASM):
First, int 80h / eax=3 changes eax. Thus, the next call to that function has not the wished eax, but the code 1 for exit. Move the label .loop just before the mov eax, 3 ; syscall read.
Second, test byte [ecx], 0xA doesn't compare the values. It performs an AND and sets the flags accordingly. The zero flag indicates that the result of the AND was zero. Change the line to cmp byte [ecx], 0xA.

Binary representation in processor's registers in Nasm

I would like to ask about process of put instructions into registers. For example: we want to overwrite count '50' into EBX (in ASCII '50' is count '2').
EBX consists of 32 bits. When we put '50' into it, it will be arranged as binary represent, yes? (0000000 | 00000000 | 00000000 | 00110010). Have a right? What happens with bits, when we place a string into register?
EAX holds 32 bits which Intel calls "integer". The programmer - and sometimes the assembler - decides how to interpret these bits. If you load EAX with the number 50 (not the string '50')
mov eax, 50
the assembler decides to generate a machine instruction that loads the 50 in a manner, that you can read it as number 50 in a binary system:
00000000000000000000000000110010
Try out, what the assembler does if you feed it with a string:
GLOBAL _start
SECTION .bss
outstr resb 40
SECTION .data
_start:
mov eax, 'Four' ; Load EAX with a string
call int2bin ; Convert it to a binary string in outstr
mov byte [edi], 10 ; Add a line feed
inc edi ; Increment the pointer
mov eax, 4 ; SYS_WRITE
mov ebx, 1 ; STDOUT
mov ecx, outstr ; Pointer to output buffer
mov edx, edi ; Count of bytes to send:
sub edx, outstr ; EDX = EDI (offset returned from int2bin) - offset of output buffer
int 0x80 ; Call kernel
mov eax, 1 ; SYS_EXIT
xor ebx, ebx ; Returncode: 0 (ok)
int 0x80 ; Call kernel
int2bin: ; Converts an integer in EAX to a binary string in outstr
mov edi, outstr ; Pointer to a string
mov ecx, 32 ; Loop counter
.LL1:
test cl, 0b111 ; CL%8 = 0 ?
jnz .F ; No: skip the next instructions
mov Byte [edi], ' ' ; Store a space
inc edi ; and increment the pointer
.F:
shl eax, 1 ; The leftmost bit into carry flag
setc dl ; Carry flag into DL
or dl, '0' ; Convert it to ASCII
mov [edi], dl ; Store it to outstr
inc edi ; Increment the pointer
loop .LL1 ; Loop ECX times
mov byte [edi], 0 ; Null termination if needed as C string (not needed here)
ret
Output:
01110010 01110101 01101111 01000110
NASM stored it backwards in EAX. The ASCII of leftmost character is stored in the rightmost byte of EAX, the second-to-last character is to be found in the second byte, and so on. Better to see when those bytes are printed as ASCII characters:
GLOBAL _start
SECTION .bss
outstr resb 40
SECTION .data
_start:
mov eax, 'Four' ; Load EAX with a string
call int2str ; Convert it to a binary string in outstr
mov byte [edi], 10 ; Add a line feed
inc edi ; Increment the pointer
mov eax, 4 ; SYS_WRITE
mov ebx, 1 ; STDOUT
mov ecx, outstr ; Pointer to output buffer
mov edx, edi ; Count of bytes to send:
sub edx, outstr ; EDX = EDI (offset returned from int2bin) - offset of output buffer
int 0x80 ; Call kernel
mov eax, 1 ; SYS_EXIT
xor ebx, ebx ; Returncode: 0 (ok)
int 0x80 ; Call kernel
int2str: ; Converts an integer in EAX to an ASCII string in outstr
mov edi, outstr ; Pointer to a string
mov ecx, 4 ; Loop counter
.LL1:
rol eax, 8
mov [edi], al ; Store it to outstr
inc edi ; Increment the pointer
loop .LL1 ; Loop ECX times
mov byte [edi], 0 ; Null termination if needed as C string (not needed here)
ret
Output:
ruoF
Both programs above show EAX in big endian order. This is the order you are familiar with looking at decimal numbers. The most significant digit is left and the least significant digit is right. However, EAX would be saved in memory or disk in little endian order, starting the sequence from the right with the least significant byte. Looking at the memory with a disassembler or debugger you would see 'F','o','u','r' as well as you had defined it in a .data section with db 'Four'. Therefore you'll get no difference when you load a register with a string, save it to memory and call the write routine of the kernel:
GLOBAL _start
SECTION .bss
outstr resb 40
SECTION .data
_start:
mov eax, 'Hell' ; Load EAX with the first part of the string
mov ebx, 'o wo' ; Load EBX with the second part
mov ecx, 'rld!' ; Load ECX with the third part
mov dword [outstr], eax ; Store the first part in outstr (little endian)
mov dword [outstr+4], ebx ; Append the second part
mov dword [outstr+8], ecx ; Append the third part
mov eax, 4 ; SYS_WRITE
mov ebx, 1 ; STDOUT
mov ecx, outstr ; Pointer to output buffer
mov edx, (3*4) ; Count of bytes to send (3 DWORD à 4 bytes)
int 0x80 ; Call kernel
mov eax, 1 ; SYS_EXIT
xor ebx, ebx ; Returncode: 0 (ok)
int 0x80 ; Call kernel
Output:
Hello world!
Please note: This behavior is made by the NASM programmers. Other assemblers might have a different behavior.

write number to file using NASM

How do I write a variable to a file using NASM?
For example, if I execute some mathematical operation - how do I write the result of the operation to write a file?
My file results have remained empty.
My code:
%include "io.inc"
section .bss
result db 2
section .data
filename db "Downloads/output.txt", 0
section .text
global CMAIN
CMAIN:
mov eax,5
add eax,17
mov [result],eax
PRINT_DEC 2,[result]
jmp write
write:
mov EAX, 8
mov EBX, filename
mov ECX, 0700
int 0x80
mov EBX, EAX
mov EAX, 4
mov ECX, [result]
int 0x80
mov EAX, 6
int 0x80
mov eax, 1
int 0x80
jmp exit
exit:
xor eax, eax
ret
You have to implement ito (integer to ascii) subsequently len for this manner. This code tested and works properly in Ubuntu.
section .bss
answer resb 64
section .data
filename db "./output.txt", 0
section .text
global main
main:
mov eax,5
add eax,44412
push eax ; Push the new calculated number onto the stack
call itoa
mov EAX, 8
mov EBX, filename
mov ECX, 0x0700
int 0x80
push answer
call len
mov EBX, EAX
mov EAX, 4
mov ECX, answer
movzx EDX, di ; move with extended zero edi. length of the string
int 0x80
mov EAX, 6
int 0x80
mov eax, 1
int 0x80
jmp exit
exit:
xor eax, eax
ret
itoa:
; Recursive function. This is going to convert the integer to the character.
push ebp ; Setup a new stack frame
mov ebp, esp
push eax ; Save the registers
push ebx
push ecx
push edx
mov eax, [ebp + 8] ; eax is going to contain the integer
mov ebx, dword 10 ; This is our "stop" value as well as our value to divide with
mov ecx, answer ; Put a pointer to answer into ecx
push ebx ; Push ebx on the field for our "stop" value
itoa_loop:
cmp eax, ebx ; Compare eax, and ebx
jl itoa_unroll ; Jump if eax is less than ebx (which is 10)
xor edx, edx ; Clear edx
div ebx ; Divide by ebx (10)
push edx ; Push the remainder onto the stack
jmp itoa_loop ; Jump back to the top of the loop
itoa_unroll:
add al, 0x30 ; Add 0x30 to the bottom part of eax to make it an ASCII char
mov [ecx], byte al ; Move the ASCII char into the memory references by ecx
inc ecx ; Increment ecx
pop eax ; Pop the next variable from the stack
cmp eax, ebx ; Compare if eax is ebx
jne itoa_unroll ; If they are not equal, we jump back to the unroll loop
; else we are done, and we execute the next few commands
mov [ecx], byte 0xa ; Add a newline character to the end of the character array
inc ecx ; Increment ecx
mov [ecx], byte 0 ; Add a null byte to ecx, so that when we pass it to our
; len function it will properly give us a length
pop edx ; Restore registers
pop ecx
pop ebx
pop eax
mov esp, ebp
pop ebp
ret
len:
; Returns the length of a string. The string has to be null terminated. Otherwise this function
; will fail miserably.
; Upon return. edi will contain the length of the string.
push ebp ; Save the previous stack pointer. We restore it on return
mov ebp, esp ; We setup a new stack frame
push eax ; Save registers we are going to use. edi returns the length of the string
push ecx
mov ecx, [ebp + 8] ; Move the pointer to eax; we want an offset of one, to jump over the return address
mov edi, 0 ; Set the counter to 0. We are going to increment this each loop
len_loop: ; Just a quick label to jump to
movzx eax, byte [ecx + edi] ; Move the character to eax.
movsx eax, al ; Move al to eax. al is part of eax.
inc di ; Increase di.
cmp eax, 0 ; Compare eax to 0.
jnz len_loop ; If it is not zero, we jump back to len_loop and repeat.
dec di ; Remove one from the count
pop ecx ; Restore registers
pop eax
mov esp, ebp ; Set esp back to what ebp used to be.
pop ebp ; Restore the stack frame
ret ; Return to caller

In assembly, I want to search a string, replace a word, and display the new string

I am taking an assembly course and I have gotten most of the program written out, I am just having trouble replacing the word and displaying the new string. The problem asks for a sentence, a word to find, and a word to replace it with. The program scans the string, replaces any instances of the word, and shows you the new string.
Example: "The sky is blue."
Word to find: "sky"
Word to replace it: "ocean"
New String: "The ocean is blue."
Here is what I have so far:
.586
.MODEL FLAT
INCLUDE io.h ; header file for input/output
.STACK 4096
.DATA
prompt1 BYTE "String to Search: ", 0
prompt2 BYTE "Word to Search For: ", 0
prompt3 BYTE "Word to replace with: ", 0
target BYTE 80 DUP (?)
key BYTE 80 DUP (?)
strSub BYTE 80 DUP (?)
trgtLength DWORD ?
keyLength DWORD ?
lastPosn DWORD ?
strSubLen DWORD ?
resultLbl BYTE "The new sentence is: ", 0
.CODE
_MainProc PROC
input prompt1, target, 80 ;input target string
lea eax, target ;address of target
push eax ;parameter
call strlen ;strlen(target)
add esp, 4 ;remove parameter
mov trgtLength, eax ;save length of target
input prompt2, key, 80 ;input key string
lea eax, key ;address of key
push eax ;parameter
call strlen ;strlen(key)
add esp, 4 ;remove parameter
mov keyLength, eax ;save length of key
input prompt3, strSub, 80 ;input word to search for
lea eax, strSub ;address of key
push eax ;parameter
call strlen ;strlen(strSub)
add esp, 4 ;remove parameter
mov strSubLen, eax ;save length of key
mov eax, trgtLength
sub eax, keyLength
inc eax ;trgtLength - keyLength +1
mov lastPosn, eax
cld ;Left to Right comparison
mov eax, 1 ;starting position
whilePosn:
cmp eax, lastPosn ;position <= last_posn?
jnle endWhilePosn ;exit if past last position
lea esi, target ;address of target string
add esi, eax ;add position
dec esi ;address of position to check
lea edi, key ;address of key
mov ecx, keyLength ;number of position to check
repe cmpsb ;check
jz found ;exit of success
inc eax ;increment position
jmp whilePosn ;repeat
endWhilePosn:
output resultLbl, [esi] ;display new sentence
jmp quit
found:
sub edi, keyLength
mov ecx, strSubLen
lea esi, strSub
cld
rep movsb
inc eax
jmp whilePosn
quit:
mov eax, 0 ; exit with return code 0
ret
_MainProc ENDP
strlen PROC
push ebp ;establish stack frame
mov ebp, esp
push ebx ;save EBX
sub eax, eax ;length := 0
mov ebx, [ebp+8] ;address of string
whileChar:
cmp BYTE PTR [ebx], 0 ;null byte?
je endWhileChar ;exit if so
inc eax ;increment length
inc ebx ;point at next character
jmp whileChar ;repeat
endWhileChar:
pop ebx ;restore registers
pop ebp
ret
strlen ENDP
END
The code works as far as finding the word that I want to switch, but actually switching the words is tricking me up. The book says that the destination string should be in EDI and the word to replace should be in ESI, but the code they give as an example has the destination string in ESI, and word to replace in EDI (like I have here).
The book also does a pretty horrible job of explaining the "rep" and "movs" instructions, so I am 90% sure that my "found" code block is going to be where the problem is. Any help is much appreciated.
When dealing with strings in x86 assembly, one has to quickly master the rep instruction. If not, you will probably pass away before your first program has been completed.
A very good introduction on string manipulation in assembly can be found here (or here), but what you really need here are the following ones:
rep stosb: Store string
rep scas: Scan string
rep cmpsb: Compare string

Resources