I'm using a code for split a string with a delimiter, but it save me the "right side" and I need the "left side" of the word.
For example, if the input is 15,20,x, then the outputs should be:
15
20
x
But it show me:
15,20,x
20,x
x
This is the code that I'm using
split:
mov esi,edi
mov ecx, 0
not ecx
mov al, ','
cld
repne scasb
not ecx
lea eax, [ecx-1]
;push eax
mov eax, edi
call println ;Here is where I want to print the "left side"
;pop eax
xchg esi, edi
mov eax, esi
call length
mov edi, esi
mov ecx, eax
cmp ecx, 1
je fin
jg split
ret
fin:
ret
After repne scasb the contents of ECX has changed from -1 to -4, you need to NOT ECX and then DEC ECX to obtain ECX=2 (size of the member "15"). Then println ECX bytes of the text at ESI and repeat split: There is a rub: as the last member "x" is not terminated with comma, repne scasb will crash. You should limit ECX to the total size of input text prior to scan. I tried this variant with EuroAssembler:
; Assembled on Ubuntu with
; wine euroasm.exe split.asm
; Linked with
; ld split.obj -o split -e Main -m elf_i386
; Run with
; ./split
EUROASM
split PROGRAM Format=COFF, Entry=Main:
INCLUDE linapi.htm,cpuext32.htm ; Library which defines StdInput,StdOutput.
[.text]
Main: StdOutput ="Enter comma-separated members: "
StdInput aString ; Read aString from console, return number of bytes in ECX.
MOV EDI,aString ; Pointer to the beginning of text.
LEA EBX,[EDI+ECX] ; Pointer to the end of text.
split: MOV ESI,EDI ; Position of the 1st byte.
MOV ECX,EBX
SUB ECX,EDI ; How many bytes is left in unparsed substring.
JNA fin:
MOV AL,','
REPNE SCASB
MOV ECX,EDI
DEC ECX ; Omit the delimiter.
SUB ECX,ESI
StdOutput ESI, Size=ECX, Eol=Yes
JMP split:
fin: TerminateProgram
[.bss]
aString DB 256 * BYTE
ENDPROGRAM split
And it worked well:
./split
Enter comma-separated members: 15,20,x
15
20
x
Related
I am studying Assembly language using this nasm tutorial. Here is the code that prints a string:
SECTION .data
msg db 'Hello!', 0Ah
SECTION .text
global _start
_start:
mov ebx, msg
mov eax, ebx
; calculate number of bytes in string
nextchar:
cmp byte [eax], 0
jz finished
inc eax
jmp nextchar
finished:
sub eax, ebx ; number of bytes in eax now
mov edx, eax ; number of bytes to write - one for each letter plus 0Ah (line feed character)
mov ecx, ebx ; move the memory address of our message string into ecx
mov ebx, 1 ; write to the STDOUT file
mov eax, 4 ; invoke sys_write (kernel opcode 4)
int 80h
mov ebx, 0 ; no errors
mov eax, 1 ; invoke sys_exit (kernel opcode 1)
int 80h
It works and successfully prints "Hello!\n" to STDOUT. One thing I don't understand: it searches for \0 byte in msg, but we didn't define it. Ideally, the correct message definition should be
msg db 'Hello!', 0Ah, 0h
How does it successfully get the zero byte at the end of the string?
The similar case is in exercise 7:
; String printing with line feed function
sprintLF:
call sprint
push eax ; push eax onto the stack to preserve it while we use the eax register in this function
mov eax, 0Ah ; move 0Ah into eax - 0Ah is the ascii character for a linefeed
push eax ; push the linefeed onto the stack so we can get the address
mov eax, esp ; move the address of the current stack pointer into eax for sprint
call sprint ; call our sprint function
pop eax ; remove our linefeed character from the stack
pop eax ; restore the original value of eax before our function was called
ret ; return to our program
It puts just 1 byte: 0Ah into eax without terminating 0h, but the string length is calculated correctly inside sprint. What is the cause?
I wanna get a source string ,find a key in it and replace the key with a replace string so i copy the rest of source and the replace string in the result .
it outputs the correct prompt when the key doesnt exist in the source string : "The key does not appear in the string."
but when the source contains the key it stucks and doesnt continue running
(it looks sth in found label part have been missed and have an overflow)
can anyone help to correct the found part ?
any help will be appreciate :)
; program to search for one string embedded in another
; author: R. Detmer revised: 10/97
.386
.MODEL FLAT
ExitProcess PROTO NEAR32 stdcall, dwExitCode:DWORD
INCLUDE io.h
cr EQU 0dh ; carriage return character
Lf EQU 0ah ; linefeed character
.STACK 4096 ; reserve 4096-byte stack
.DATA
prompt1 BYTE "String to search? ", 0
prompt2 BYTE cr, Lf, "Key to search for? ", 0
prompt3 BYTE cr, Lf, "Word to replace? ", 0
source BYTE 100 DUP (?)
key BYTE 20 DUP (?)
replace BYTE 20 DUP (?)
srcLength DWORD ?
keyLength DWORD ?
repLength DWORD ?
BeginLength DWORD ?
restLength DWORD ?
cpyLength DWORD ?
lastPosn DWORD ?
restPosition DWORD ?
firstParam DWORD ?
secondParam DWORD ?
keyPosition DWORD ?
failure BYTE cr,Lf,Lf,"The key does not appear in the string.",cr,Lf,0
success BYTE cr,Lf,Lf, " The result string is : " ,cr,Lf,Lf
result BYTE 200 DUP (?)
PUBLIC _start ; make entry point public
.CODE
_start: output prompt1 ; ask for
input source,100 ; and input source string
lea eax, source ; find length of string
push eax ; length parameter
call strlen
mov srcLength,eax ; save length of source
output prompt2 ; ask for
input key,20 ; and input key string
lea eax, key ; find length of string
push eax ; length parameter
call strlen
mov keyLength,eax ; save length of key
output prompt3 ; ask for
input replace,20 ; and input replace string
lea eax, replace ; find length of string
push eax ; length parameter
call strlen
dec eax
mov repLength,eax ; save length of replace
; calculate last position of source to check
mov eax,srcLength
sub eax,keyLength
inc eax ; srcLength − keyLength + 1
mov lastPosn, eax
cld ; left to right comparison
mov eax,1 ; starting position
whilePosn: cmp eax,lastPosn ; position <= last_posn?
jnle endWhilePosn ; exit if past last position
lea esi,source ; address of source string
add esi,eax ; add position
dec esi ; address of position to check is incremented automatically
lea edi,key ; address of key
mov ecx,keyLength ; number of positions to check
repe cmpsb ; check
jz found ; exit on success
inc eax ; increment position
jmp whilePosn ; repeat
endWhilePosn:
output failure ; the search failed
jmp quit ; exit
;-------------------------------------------------------------
found:
mov keyPosition, eax ; position of key
mov ebx, eax ;copy start position of key
lea eax, source
sub ebx, eax ;position - source address
mov BeginLength, ebx ;begin Source length (before key)
add ebx, keyLength
mov eax, srcLength
sub eax, ebx
mov restLength, eax ;rest of Source length (after key)
mov eax, keyPosition
add eax, keyLength ; position + key
mov restPosition, eax
;source begin to result
lea eax, result
mov firstParam, eax ; destination address
lea eax, source
mov secondParam, eax
mov eax, BeginLength ; copy length
mov cpyLength, eax
mov esi,firstParam ;initial source address
mov edi,secondParam ;destination
mov ecx ,cpyLength
rep movsb ;copy bytes
;replace to result
mov eax, firstParam
add eax , BeginLength
mov firstParam, eax ; address of rest of result
lea eax, replace
mov secondParam, eax ; string to replace
mov eax, repLength ; copy length
mov cpyLength, eax
mov esi,firstParam ;initial source address
mov edi,secondParam ;destination
mov ecx ,cpyLength
rep movsb ;copy bytes
;Rest to result
mov eax, firstParam
add eax , repLength
mov firstParam, eax ; address of rest of result
mov eax, restPosition
mov secondParam, eax
mov eax, restLength
mov cpyLength, eax
mov esi,firstParam ;initial source address
mov edi,secondParam ;destination
mov ecx ,cpyLength
rep movsb ;copy bytes
mov BYTE PTR [edi],0 ;terminate destination string
output success
quit:
INVOKE ExitProcess, 0 ; exit with return code 0
;----------------------------------------------------------
strlen PROC NEAR32
; find length of string whose address is passed on stack
; length returned in EAX
push ebp ; establish stack frame
mov ebp, esp
pushf ; save flags
push ebx ; and EBX
sub eax, eax ; length := 0
mov ebx, [ebp+8] ; address of string
whileChar: cmp BYTE PTR [ebx], 0 ; null byte?
je endWhileChar ; exit if so
inc eax ; increment length
inc ebx ; point at next character
jmp whileChar ; repeat
endWhileChar:
pop ebx ; restore registers and flags
popf
pop ebp
ret 4 ; return, discarding parameter
strlen ENDP
END
found:
mov keyPosition, eax ; position of key
mov ebx, eax ;copy start position of key
lea eax, source
sub ebx, eax ;position - source address
mov BeginLength, ebx ;begin Source length (before key)
In these lines you have subtracted things that cannot be subtracted.
When you get at the label found, EAX has a 1-based relative position index that you copy to the EBX register. This value ranges from 1 to 100. Now you subtract the absolute address of your source buffer. This could be in the millions. That's clearly a mistake. It becomes disastrous when later on you use it as a loop counter and start corrupting memory!
success BYTE cr,Lf,Lf, " The result string is : " ,cr,Lf,Lf
You forgot to zero-terminate the success message.
It will disrupt your final macro call output success and so it would seem that the program didn't correctly replace the string.
I would like to ask about process of put instructions into registers. For example: we want to overwrite count '50' into EBX (in ASCII '50' is count '2').
EBX consists of 32 bits. When we put '50' into it, it will be arranged as binary represent, yes? (0000000 | 00000000 | 00000000 | 00110010). Have a right? What happens with bits, when we place a string into register?
EAX holds 32 bits which Intel calls "integer". The programmer - and sometimes the assembler - decides how to interpret these bits. If you load EAX with the number 50 (not the string '50')
mov eax, 50
the assembler decides to generate a machine instruction that loads the 50 in a manner, that you can read it as number 50 in a binary system:
00000000000000000000000000110010
Try out, what the assembler does if you feed it with a string:
GLOBAL _start
SECTION .bss
outstr resb 40
SECTION .data
_start:
mov eax, 'Four' ; Load EAX with a string
call int2bin ; Convert it to a binary string in outstr
mov byte [edi], 10 ; Add a line feed
inc edi ; Increment the pointer
mov eax, 4 ; SYS_WRITE
mov ebx, 1 ; STDOUT
mov ecx, outstr ; Pointer to output buffer
mov edx, edi ; Count of bytes to send:
sub edx, outstr ; EDX = EDI (offset returned from int2bin) - offset of output buffer
int 0x80 ; Call kernel
mov eax, 1 ; SYS_EXIT
xor ebx, ebx ; Returncode: 0 (ok)
int 0x80 ; Call kernel
int2bin: ; Converts an integer in EAX to a binary string in outstr
mov edi, outstr ; Pointer to a string
mov ecx, 32 ; Loop counter
.LL1:
test cl, 0b111 ; CL%8 = 0 ?
jnz .F ; No: skip the next instructions
mov Byte [edi], ' ' ; Store a space
inc edi ; and increment the pointer
.F:
shl eax, 1 ; The leftmost bit into carry flag
setc dl ; Carry flag into DL
or dl, '0' ; Convert it to ASCII
mov [edi], dl ; Store it to outstr
inc edi ; Increment the pointer
loop .LL1 ; Loop ECX times
mov byte [edi], 0 ; Null termination if needed as C string (not needed here)
ret
Output:
01110010 01110101 01101111 01000110
NASM stored it backwards in EAX. The ASCII of leftmost character is stored in the rightmost byte of EAX, the second-to-last character is to be found in the second byte, and so on. Better to see when those bytes are printed as ASCII characters:
GLOBAL _start
SECTION .bss
outstr resb 40
SECTION .data
_start:
mov eax, 'Four' ; Load EAX with a string
call int2str ; Convert it to a binary string in outstr
mov byte [edi], 10 ; Add a line feed
inc edi ; Increment the pointer
mov eax, 4 ; SYS_WRITE
mov ebx, 1 ; STDOUT
mov ecx, outstr ; Pointer to output buffer
mov edx, edi ; Count of bytes to send:
sub edx, outstr ; EDX = EDI (offset returned from int2bin) - offset of output buffer
int 0x80 ; Call kernel
mov eax, 1 ; SYS_EXIT
xor ebx, ebx ; Returncode: 0 (ok)
int 0x80 ; Call kernel
int2str: ; Converts an integer in EAX to an ASCII string in outstr
mov edi, outstr ; Pointer to a string
mov ecx, 4 ; Loop counter
.LL1:
rol eax, 8
mov [edi], al ; Store it to outstr
inc edi ; Increment the pointer
loop .LL1 ; Loop ECX times
mov byte [edi], 0 ; Null termination if needed as C string (not needed here)
ret
Output:
ruoF
Both programs above show EAX in big endian order. This is the order you are familiar with looking at decimal numbers. The most significant digit is left and the least significant digit is right. However, EAX would be saved in memory or disk in little endian order, starting the sequence from the right with the least significant byte. Looking at the memory with a disassembler or debugger you would see 'F','o','u','r' as well as you had defined it in a .data section with db 'Four'. Therefore you'll get no difference when you load a register with a string, save it to memory and call the write routine of the kernel:
GLOBAL _start
SECTION .bss
outstr resb 40
SECTION .data
_start:
mov eax, 'Hell' ; Load EAX with the first part of the string
mov ebx, 'o wo' ; Load EBX with the second part
mov ecx, 'rld!' ; Load ECX with the third part
mov dword [outstr], eax ; Store the first part in outstr (little endian)
mov dword [outstr+4], ebx ; Append the second part
mov dword [outstr+8], ecx ; Append the third part
mov eax, 4 ; SYS_WRITE
mov ebx, 1 ; STDOUT
mov ecx, outstr ; Pointer to output buffer
mov edx, (3*4) ; Count of bytes to send (3 DWORD à 4 bytes)
int 0x80 ; Call kernel
mov eax, 1 ; SYS_EXIT
xor ebx, ebx ; Returncode: 0 (ok)
int 0x80 ; Call kernel
Output:
Hello world!
Please note: This behavior is made by the NASM programmers. Other assemblers might have a different behavior.
I am a developer who uses high level languages, learning assembly language in my spare time. Please see the NASM program below:
section .data
section .bss
section .text
global main
main:
mov eax,21
mov ebx,9
add eax,ebx
mov ecx,eax
mov eax,4
mov ebx,1
mov edx,4
int 0x80
push ebp
mov ebp,esp
mov esp,ebp
pop ebp
ret
Here are the commands I use:
ian#ubuntu:~/Desktop/NASM/Program4$ nasm -f elf -o asm.o SystemCalls.asm
ian#ubuntu:~/Desktop/NASM/Program4$ gcc -o program asm.o
ian#ubuntu:~/Desktop/NASM/Program4$ ./program
I don't get any errors, however nothing is printed to the terminal. I used the following link to ensure the registers contained the correct values: http://docs.cs.up.ac.za/programming/asm/derick_tut/syscalls.html
You'll have to convert the integer value to a string to be able to print it with sys_write (syscall 4). The conversion could be done like this (untested):
; Converts the integer value in EAX to a string in
; decimal representation.
; Returns a pointer to the resulting string in EAX.
int_to_string:
mov byte [buffer+9],0 ; add a string terminator at the end of the buffer
lea esi,[buffer+9]
mov ebx,10 ; divisor
int_to_string_loop:
xor edx,edx ; clear edx prior to dividing edx:eax by ebx
div ebx ; EAX /= 10
add dl,'0' ; take the remainder of the division and convert it from 0..9 -> '0'..'9'
dec esi ; store it in the buffer
mov [esi],dl
test eax,eax
jnz int_to_string_loop ; repeat until EAX==0
mov eax,esi
ret
buffer: resb 10
programming in assembly requires a knowledge of ASCII codes and a some basic conversion routines. example: hexadecimal to decimal, decimal to hexadecimal are good routines to keep somewhere on some storage.
No registers can be printed as they are, you have to convert (a lot).
To be a bit more helpfull:
ASCII 0 prints nothing but some text editors (kate in kde linux) will show something on screen (a square or ...). In higher level language like C and C++ is it used to indicate NULL pointers and end of strings.
Usefull to calculate string lengths too.
10 is end of line. depending Linux or Windows there will be a carriage return (Linux) too or not (Windows/Dos).
13 is carriage return
1B is the ESC key (Linux users will now more about this)
255 is a hard return, I never knew why it is good for but it must have its purpose.
check http://www.asciitable.com/ for the entire list.
Convert the integer value to a string.
Here i have used macros pack and unpack to convert integers to string and macro unpack to do the vice-versa
%macro write 2
mov eax, 4
mov ebx, 1
mov ecx, %1
mov edx, %2
int 80h
%endmacro
%macro read 2
mov eax,3
mov ebx,0
mov ecx,%1
mov edx,%2
int 80h
%endmacro
%macro pack 3 ; 1-> string ,2->length ,3->variable
mov esi, %1
mov ebx,0
%%l1:
cmp byte [esi], 10
je %%exit
imul ebx,10
movzx edx,byte [esi]
sub edx,'0'
add ebx,edx
inc esi
jmp %%l1
%%exit:
mov [%3],ebx
%endmacro
%macro unpack 3 ; 1-> string ,2->length ,3->variable
mov esi, %1
mov ebx,0
movzx eax, byte[%3]
mov byte[%2],0
cmp eax, 0
jne %%l1
mov byte[%2],1
push eax
jmp %%exit2
%%l1:
mov ecx,10
mov edx,0
div ecx
add edx,'0'
push edx
inc byte[%2]
cmp eax, 0
je %%exit2
jmp %%l1
%%exit2:
movzx ecx,byte[%2]
%%l2:
pop edx
mov [esi],dl
inc esi
loop %%l2
%endmacro
section .data ; data section
msg1: db "First number : " ;
len1: equ $-msg1 ;
msg2: db "Second number : " ;
len2: equ $-msg2 ;
msg3: db "Sum : " ;
len3: equ $-msg3 ;
ln: db 10
lnl: equ $-ln
var1: resb 10
var2: resb 10
str1: resb 10
str2: resb 10
ans: resb 10
ansvar: resb 10
ansl: db ''
l1: db ''
l2: db ''
section.text ;code
global _start
_start:
write msg1,len1
read str1,10
pack str1,l1,var1
write msg2,len2
read str2,10
pack str2,l2,var2
mov al,[var1]
add al,[var2]
mov [ansvar],al
unpack ans,ansl,ansvar
write msg3,len3
write ans,10
write ln,lnl
mov ebx,0 ; exit code, 0=normal
mov eax,1 ; exit command to kernel
int 0x80 ; interrupt 80 hex, call kernel
To assembler, link and run:
nasm -f elf add.asm
ld -s -o add add.o
./add
On NASM in Arch Linux, how can I append the character zero ('0') to a 32 bit variable? My reason for wanting to do this is so that I can output the number 10 by setting a single-digit input to 1 and appending a zero. I need to figure out how to append the zero.
The desirable situation:
Please enter a number: 9
10
Using this method, I want to be able to do this:
Please enter a number: 9999999
10000000
How can I do this?
Thanks in advance,
RileyH
Well, as Bo says... but I was bored. You seem resistant to doing this the easy way (convert your input to a number, add 1, and convert it back to text) so I tried it using characters. This is what I came up with. It's horrid, but "seems to work".
; enter a number and add 1 - the hard way!
; nasm -f elf32 myprog.asm
; ld -o myprog myprog.o -melf_i386
global _start
; you may have these in an ".inc" file
sys_exit equ 1
sys_read equ 3
sys_write equ 4
stdin equ 0
stdout equ 1
stderr equ 2
LF equ 10
section .data
prompt db "Enter a number - not more than 10 digits - no nondigits.", LF
prompt_size equ $ - prompt
errmsg db "Idiot human! Follow instructions next time!", LF
errmsg_size equ $ - errmsg
section .bss
buffer resb 16
fakecarry resb 1
section .text
_start:
nop
mov eax, sys_write
mov ebx, stdout
mov ecx, prompt
mov edx, prompt_size
int 80h
mov eax, sys_read
mov ebx, stdin
mov ecx, buffer + 1 ; leave a space for an extra digit in front
mov edx, 11
int 80h
cmp byte [buffer + 1 + eax - 1], LF
jz goodinput
; pesky user has tried to overflow us!
; flush the buffer, yell at him, and kick him out!
sub esp, 4 ; temporary "buffer"
flush:
mov eax, sys_read
; ebx still okay
mov ecx, esp ; buffer is on the stack
mov edx, 1
int 80h
cmp byte [ecx], LF
jnz flush
add esp, 4 ; "free" our "buffer"
jmp errexit
goodinput:
lea esi, [buffer + eax - 1] ; end of input characters
mov byte [fakecarry], 1 ; only because we want to add 1
xor edx, edx ; count length as we go
next:
; check for valid decimal digit
mov al, [esi]
cmp al, '0'
jb errexit
cmp al, '9'
ja errexit
add al, [fakecarry] ; from previous digit, or first... to add 1
mov byte [fakecarry], 0 ; reset it for next time
cmp al, '9' ; still good digit?
jna nocarry
; fake a "carry" for next digit
mov byte [fakecarry], 1
mov al, '0'
cmp esi, buffer + 1
jnz nocarry
; if first digit entered, we're done
; save last digit and add one ('1') into the space we left
mov [esi], al
inc edx
dec esi
mov byte [esi], '1'
inc edx
dec esi
jmp done
nocarry:
mov [esi], al
inc edx
dec esi
cmp esi, buffer
jnz next
done:
inc edx
inc edx
mov ecx, esi ; should be either buffer + 1, or buffer
mov ebx, stdout
mov eax, sys_write
int 80h
xor eax, eax ; claim "no error"
exit:
mov ebx, eax
mov eax, sys_exit
int 80h
errexit:
mov edx, errmsg_size
mov ecx, errmsg
mov ebx, stderr
mov eax, sys_write
int 80h
mov ebx, -1
jmp exit
;-----------------------------
Is that what you had in mind?