Compare two strings in x86 assembly - string

I want to compare two strings in x86 assembly. One of them is read from a file and the other one is read from keyboard and both are saved in a variable.
I have no idea how to compare these two variables. I'll be so excited if someone would give me an example or something that can help me.
.386
.model flat, stdcall
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
includelib msvcrt.lib
include Macro.asm
extern exit: proc
extern printf:proc
extern scanf:proc
extern strlen:proc
extern fscanf:proc
extern fopen:proc
extern fprintf:proc
extern fclose:proc
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
public start
.data
format_sir db "%s", 0
format_afisare db "%s", 0
format_zecimal db "&d", 0
mod_citire db "r",0
mod_scriere db "w",0
pointer_fisier_sursa dd 0
pointer_fisier_destinatie dd 0
nume_fisier db 20 dup(0)
fpass db 20 dup(0)
fuser db 20 dup(0)
user db 20 dup(0)
pass db 20 dup(0)
.code
start:
;read file name
citire_fisier:
push offset nume_fisier
push offset format_sir
call scanf
add esp,8
;open the file in read mode
deschide_fisier_citire nume_fisier, mod_citire, pointer_fisier_sursa
cmp pointer_fisier_sursa,0
jne citire_user_parola
jmp citire_fisier
citire_user_parola:
;read from file
Push offset fuser
Push offset format_sir
Push pointer_fisier_sursa
Call fscanf
Add esp,12
push offset fpass
push offset format_sir
push pointer_fisier_sursa
call fscanf
add esp,12
;read from keyboard
push offset user
push offset format_sir
call scanf
add esp,8
push offset pass
push offset format_sir
call scanf
add esp,8
Macro.asm :
deschide_fisier_citire macro nume_fisier, mod_deschidere, pointer_fisier_sursa
push offset mod_deschidere
push offset nume_fisier
call fopen
mov pointer_fisier_sursa ,eax
add esp,8
endm
inchidere_fisier macro pointer_fisier_sursa
push pointer_fisier_sursa
call fclose
add esp,4
endm

In HLA (High Level Assembly ) :more on http://www.plantation-productions.com/Webster/www.artofasm.com/Linux/HTML/StringInstructions.html
cld();
mov( AdrsString1, esi );
mov( AdrsString2, edi );
mov( 7, ecx ); # 7 is the length of the comparison ie size of strings
repe.cmpsb();
In a more classical way :
mov ecx,7
cld
mov esi, offset [stringA]
mov edi, offset [stringB]
repe cmpsb
cmp ecx,0
je strings_are_equal

Related

Why can't I push variable content on stack successfully?

section .data
msg db 'enter number ',10,0
output db '%hd',0
section .bss
val resw 1
section .text
global main
extern printf
extern scanf
main :
push msg
call printf
add esp,4
push val
push output
call scanf
add esp,8
;movzx eax,word[val]
;push eax
push word[val]
push output
call printf
add esp,8
ret
push word[val]
This only pushes a word and you need to have a dword on the stack.
You can do it like:
xor ax,ax
push ax ;Gives a high word of zero, much like your MOVZX did.
push word[val]
Your segmentation fault comes from the fact that you pushed a total of 6 bytes but removed a total of 8 bytes through add esp,8.

FInding length of String in NASM

I'm trying to check the length of a sting given in the program argument as part of a larger program. I need to place the value of the string length in a variable called n, which I've left uninitialized in BSS. I've tried a couple different methods of doing this, including the one Im trying right now which was given in another answer here. However when I try to print or use the result of this little algorithm, it always returns 7. I believe I'm getting the address of what I want and not the result but I cant figure what to change. Heres my code:
%include "asm_io.inc"
SECTION .data
fmt1: db "%d",10,0
argErrMsg: db "Needs 2 args",10,0
argOkMsg : db "Arguments ok",10,0
doneMsg: db "Program finished, now exiting",10,0
strErrMsg: db "String must be between 1 and 20 charaters",10,0
strOkMsg: db "String length ok",10,0
SECTION .bss
X: resd 20
i: resd 1
n: resd 1
k: resd 1
SECTION .text
global asm_main
extern printf
extern strlen
asm_main:
enter 0,0
pusha
CHECK_ARGS:
cmp dword [ebp+8],2
jne ERROR_ARGS
je OK_ARGS
ERROR_ARGS:
push dword argErrMsg
call printf
add esp,8
jmp EXIT
OK_ARGS:
push dword argOkMsg
call printf
add esp,8
jmp CHECK_STRING
CHECK_STRING:
mov eax, dword[ebp+16]
push eax ;This is the code I tried using from another answer
mov ecx,0
dec eax
count:
inc ecx
inc eax
cmp byte[eax],0
jnz count
dec ecx
ret
pop eax
mov [n],ecx ;Tried prining it here to see the result
push dword [n]
push dword fmt1
call printf
add esp,8
cmp byte [n],1
jl ERROR_STRING
cmp byte [n],20
jg ERROR_STRING
jmp OK_STRING ;The program always gets to here since [n] = 7?
ERROR_STRING:
push dword strErrMsg
call printf
add esp,8
jmp EXIT
OK_STRING:
push dword strOkMsg
call printf
add esp,8
jmp EXIT
EXIT:
push dword doneMsg
call printf
popa
leave
ret
To get the length of argv[1]:
mov edi,[ebp+12] ;edi = argv = &argv[0]
mov edi,[edi+4] ;edi = argv[1]
mov ecx,0ffffh ;scan for 0
xor eax,eax
repne scasb
sub ecx,0ffffh ;ecx = -(length + 1)
not ecx ;ecx = length
main should return a 0 in eax:
popa
xor eax,eax
leave
ret

How to read and write a byte from and to stdin/stdout using Syscall in x86 with NASM assembler?

I am trying to read a string from stdin and print it back out using x86, NASM, and Syscalls. Reading in a byte will be a function, and writing out a byte will be a function. I am reading the string from stdin and putting each char into an array. Here is my initial idea:
;read_writer.asm
section .data
arr times 100 db 0 ; array of 100 elements initialzed to 0
ptr dd 0
section .text
global _start
_start:
push ebp ; setup stack
mov ebp, esp ; setup stack
push, 0x10 ; allocate space for potential local variables
call readin: ;call read in func
push eax ;return char from readin will be in eax, push it for writeout
call writeout:
leave
mov eax, 1 ;exit
mov ebx, 0
int 0x80
readin:
push ebp
mov ebp, esp ; setup stack
mov eax, 3 ;read
mov ebx, 1 ;stdin
; i feel like i am missing code here bit not sure what
leave ;reset stack
ret ;return eax
writeout:
push ebp
mov ebp, esp ; setup stack
push eax ;push eax since it was pushed earlier
mov eax, 4 ;write
mov ebx, 1 ;stdout
; i feel like i am missing code here bit not sure what
leave ;reset stack
ret ;return eax
Sample input:
Hello World
Sample output:
Hello World
The functions should be used with cdecl, which I dont think I am doing correctly. I also realised I am not putting the chars into arr.
For starters, you're missing the int 0x80 in both readin and writeout.
And, as you can see here, both sys_read and sys_write expect a (const) char* in ecx. That address should point to the buffer where the bytes to write are stored / the read bytes should be stored.The value of edx should be set to the number of bytes to read / write.
So in the readin example you want something like:
mov eax, 3 ;read
mov ebx, 0 ;stdin. NOTE: stdin is 0, not 1
sub esp,4 ; Allocate some space on the stack
mov ecx,esp ; Read characters to the stack
mov edx,1
int 0x80
movzx eax,byte [esp] ; Place the character in eax, which is used for function return values
add esp,4
And similarly for writeout.

Beginner issues with NASM: converting from string to number, and output to the console

I am just starting to learn NASM, and I am doing a first program involving a matrix in a text file. The file contains an N*N matrix, where the first line contains N, and the other lines each contain one row of the matrix. To start along my way in completing my larger task, i borrowed some code that reads a file line by line and outputs each line to the console.
I intend to read in the first line, convert it from string to integer, move that to a register i will use as a counter, then print out that many lines of the array. I figure even if N=7 and i fiddle with the top line of the file to say 3, if i get 3 lines printed then it works! However, this didn't work. I got it to print out always one line, suggesting that the number i read in and converted to int wasn't converted properly. I tried to output this number after conversion, but attempting to do so causes a seg fault, to my suprise!
Here is my code for NASM under Linux:
; this program demonstrates how to open files for reading
; It reads a text file line by line and displays it on the screen
extern fopen
extern fgets
extern fclose
extern printf
extern exit
global main
segment .data
readmode: db "r",0
filename: db "hw6_1.dat",0 ; filename to open
error1: db "Cannot open file",10,0
format_1: db "%d",0
segment .bss
buflen: equ 256 ; buffer length
buffer: resd buflen ; input buffer
segment .text
main: pusha
; OPENING FILE FOR READING
push readmode ; 1- push pointer to openmode
push filename ; 2- push pointer to filename
call fopen ; fopen retuns a filehandle in eax
add esp, 8 ; or 0 if it cannot open the file
cmp eax, 0
jnz .L1
push error1 ; report an error and exit
call printf
add esp, 4
jmp .L4
; READING FROM FILE
.L1: mov ebx, eax ; save filepointer of opened file in ebx
; Get first line and pass to ecx
push ebx
push dword buflen
push buffer
call fgets
add esp, 12
cmp eax, 0
je .L3
;convert string -> numeric
push buffer
call parseInt
mov ecx, eax
.L2:
;debug
push ecx
push format_1
call printf
add esp, 8
push ebx ; 1- push filehandle for fgets
push dword buflen ; 2- push max number of read chars
push buffer ; 3- push pointer to text buffer
call fgets ; get a line of text
add esp, 12 ; clean up the stack
cmp eax, 0 ; eax=0 in case of error or EOF
je .L3
push buffer ; output the read string
call printf
add esp, 4 ; clean up the stack
dec ecx
cmp ecx, 0
jg .L2
;CLOSING FILE
.L3: push ebx ; push filehandle
call fclose ; close file
add esp, 4 ; clean up stack
.L4: popa
call exit
parseInt:
push ebp
mov ebp, esp
push ebx
push esi
mov esi, [ebp+8] ; esi points to the string
xor eax, eax ; clear the accumulator
.I1 cmp byte [esi], 0 ; end of string?
je .I2
mov ebx, 10
mul ebx ; eax *= 10
xor ebx, ebx
mov bl, [esi] ; bl = character
sub bl, 48 ; ASCII conversion
add eax, ebx
inc esi
jmp .I1
.I2: pop esi
pop ebx
pop ebp
ret 4
A sample data file is shown below, this is the one i was using:
4
2 45 16 22
17 21 67 29
45 67 97 35
68 34 90 72
I really dont understand how this is not working. The code to convert to integer was borrowed from WORKING programs, as is the code for output that i used to debug.
First, why are you calling printf with only one parameter? The proto for printf is:
int printf ( const char * format, ... );
Second, your program works almost fine, you are just not exiting the program correctly!! You are linking to the c library and it adds startup code, you need to call exit instead of ret. Actually, just a ret is not the correct way to exit any program in Linux or Windows.
Your exit code should be:
.L4:
popa
call exit
and add extern exit to your list of externs.
Your parseint seems to return an incorrect number
* EDIT *
Since you are still having problems with parseint, from the fgets docs at the c++ site, you are not reading the whole thing:
A newline character makes fgets stop reading, but it is considered a
valid character by the function and included in the string copied to
str.
So, what is happening is you are telling fgets to read in dword buflen number of bytes, which it will or it will stop reading when a newline is found and adds that to the buffer.
This:
; Get first line and pass to ecx
push ebx
push dword buflen
push buffer
call fgets
add esp, 12
should be:
; Get first line and pass to ecx
push ebx
push 1 ; <----- you only want to read 1 byte!
push buffer
call fgets
add esp, 12

Input a string of characters and output to uppercase

I'm trying to write a program which will convert a lowercase string of characters to uppercase, using a buffer to store the initial string. The problem that I'm experiencing is that my program will print out an infinite loop of characters which have to resemblence to the string I've given it.
Other problems that I believe exist in the code are as follows:
Some subroutines use ret at the end of the call. The problem that I'm having trouble with is figuring out which of these subroutines do not actually need a ret, and are better used with jmp. To be honest, I'm a little confused here between the semantics of the two. For example, does a subroutine called with ja need to be ret'ed at the end of the call?
I'm also trying to print out the number of iterations that occur within each iteration of the loop used to convert the values. For whatever reason, I'll inc the counter and resolve to print it with a PrintNumIter routine, which, alas, doesn't do anything unfortunately.
The complete program is as follows.
Codez
bits 32
[section .bss]
buf: resb 1024 ;allocate 1024 bytes of memory to buf
[section .data]
;*************
;* CONSTANTS *
;*************
;ASCII comparison/conversion
LowercaseA: equ 0x61
LowercaseZ: equ 0x7A
SubToUppercase: equ 0x20
;IO specifiers/descriptors
EOF: equ 0x0
sys_read: equ 0x3
sys_write: equ 0x4
stdin: equ 0x0
stdout: equ 0x1
stderr: equ 0x2
;Kernel Commands/Program Directives
_exit: equ 0x1
exit_success: equ 0x0
execute_cmd: equ 0x80
;Memory Usage
buflen: equ 0x400 ;1KB of memory
;*****************
;* NON-CONSTANTS *
;*****************
iteration_count: db 0
query : db "Please enter a string of lowercase characters, and I will output them for you in uppercase ^.^: ", 10
querylen : equ $-query
[section .text]
global _start
;===========================================
; Entry Point
;===========================================
_start:
nop ;keep GDB from complaining
call AskUser
call Read
call SetupBuf
call Scan
call Write
jmp Exit
;===========================================
; IO Instructions
;===========================================
Read:
mov eax, sys_read ;we're going to read in something
mov ebx, stdin ;where we obtain this is from stdin
mov ecx, buf ;read data into buf
mov edx, buflen ;amount of data to read
int execute_cmd ;invoke kernel to do its bidding
ret
Write:
mov eax, sys_write ;we're going to write something
mov ebx, stdout ;where we output this is going to be in stdout
mov ecx, buf ;buf goes into ecx; thus, whatever is in ecx gets written out to
mov edx, buflen ;write the entire buf
int execute_cmd ;invoke kernel to do its bidding
ret
AskUser:
mov eax, sys_write
mov ebx, stdout
mov ecx, query
mov edx, querylen
int execute_cmd
ret
PrintNumIter:
mov eax, sys_write
mov ebx, stdout
push ecx ;save ecx's address
mov ecx, iteration_count ;print the value of iteration_count
mov edx, 4 ;print 4 bytes of data
int execute_cmd
pop ecx ;grab the value back in
ret
;===========================================
; Program Preperation
;===========================================
SetupBuf:
mov ecx, esi ;place the number of bytes read into ecx
mov ebp, buf ;place the address of buf into ebp
dec ebp ;decrement buf by 1 to prevent "off by one" error
ret
;===========================================
; Conversion Routines
;===========================================
ToUpper:
sub dword [ebp + ecx], SubToLowercase ;grab the address of buf and sub its value to create uppercase character
Scan:
call PrintNumIter ;print the current iteration within the loop
cmp dword [ebp + ecx], LowercaseA ;Test input char against lowercase 'a'
jb ToUpper ;If below 'a' in ASCII, then is not lowercase - goto ToLower
cmp dword [ebp + ecx], LowercaseZ ;Test input char against lowercase 'z'
ja ToUpper ;If above 'z' in ASCII, then is not lowercase - goto ToLower
dec ecx ;decrement ecx by one, so we can get the next character
inc byte [iteration_count] ;increment the __value__ in iteration count by 1
jnz Scan ;if ecx != 0, then continue the process
ret
;===========================================
;Next:
; dec ecx ;decrement ecx by one
; jnz Scan ;if ecx != 0 scan
; ret
;===========================================
Exit:
mov eax, _exit
mov ebx, exit_success
int execute_cmd
Your problem is directly attributed to the fact that you never append a nul terminator to the end of your string buffer once you are done processing it (from what I remember, the read syscall doesn't read back a null).
unfortunately this is a little bit harder to do due to your odd control flow, but changing SetupBuf should do the trick (note, you should probably check that you haven't overflowed buf, but with 1KB, I doubt you'd need to worry for a learning program):
SetupBuf:
mov ecx, esi
mov ebp, buf
mov [ebp+ecx],0 ;make sure the string is nul terminated
dec ebp
ret
Just note
On to another issue that seems to plague your code (which you have aptly noticed), your odd control flow. So simple guidelines (note: not rules, just guidelines) that hopefully help you on your way to less spagetti code:
JMP (and the conditional jumps) should only be used to go to lables in the same procedure, else you start getting in a bind because you cannot unwind back. the only other time you can use jumps is for tail-calls, but at this stage you shouldn't worry about that, its more confusion.
Always use CALL when you are going to another procedure, this allows you to return to the call site correctly with the RETN/RET instruction, making the control flow more logical.
A simple example:
print_num: ;PROC: num to print in ecx, ecx is caller preserved
push ecx
push num_format ; "%d\n"
call _printf
sub esp,8 ;cleanup for printf
retn
print_loop_count: ;PROC: takes no args
mov ecx,0x10 ;loop 16 times
do_loop: ;LABEL: used as a jump target for the loop
;good idea to prefix jump lables with "." to differentiate them
push ecx ;save ecx
call print_num ;value to print is already in ecx
pop ecx ;restore ecx
dec ecx
jnz do_loop ;again?
retn

Resources