Manipulate string in assembly x86 (mov and print to screen) - string

I'm working on a larger project but I'm stuck with string manipulation. My assembly file includes math coprocessor operations (it starts the coprocessor with "FINIT") but I don't think it should be interfering at all.
Basically, I have some strings that are 50 bytes long each:
$s db 50 dup (?), '$'
_cte_14 db "hello world", '$', 39 dup (?)
I need to assign the value stored in variable "_cte_14" to variable "$s"
I attempted to use a register to temporarily store the value, like this:
mov cx, _cte_14
mov $s, cx
but I get the "operand types do not match" error.
Since I know the AX, BX, CX, DX registers only hold 16 bits, I thought maybe I need to work with the memory address of the first string character, so I tried:
mov bx, offset _cte_14
mov $s, bx
but the same error shows up.
I'm using TASM to compile for an x86 processor. What would be the right way to accomplish this?
Thanks a lot in advance.

Example for to copy the characters in a loop:
s db 51 dup ('$')
_cte_14 db "hello world"
len = ($ - _cte_14) ; (current location - offset _cte_14)
40 dup ('$')
mov si, offset _cte_14 ; get source offset
mov di, offset s ; get destination offset
mov cl, len ; length of the string
P1:
mov al, [si] ; get byte from DS:SI
mov [di], al ; store byte to DS:DI
dec cl ; decrease counter, set zero flag if zero
jnz P1 ; jump if zero flag is not set
-- Variation with using a string instruction together with a repeat instruction prefix:
mov si, offset _cte_14
mov di, offset s
mov cx, len ; length of the string
cld ; clear direction flag
rep movsb ; copy from DS:SI to ES:DI, increase SI+DI, decrease CX, repeat CX times

Related

print out skewed text in x86, but can't see why the 'monitored command dumped core' timeout occurs

I am trying to write skewed Text on Terminal via assembly to get my skills going.
Example [] = spaces :
H
[]E
[][]L
[][][] L
[][][][] O
But I seem to run into a timeout, because I am trying to move a character between registers / get the character via memory address - like getting a char at an index. But it won't really work. I tried working with allocating a byte to increment in the .bss section before, but that lead to a different error which I couldn't work around (relocation truncated to fit: R_X86_64_8 against `.bss')
Maybe there is a better approach to handle this, and if you could help me get on the right path I would be so so happy. To get more context, here is the 'problematic' snippet of my latest code.
inc rcx ; increase pointer of rcx
space_loop:
push rdi ;save destination register
cmp byte [rcx], 0 ;end of string found
je eos ; jump to exit subroutine
mov rax, 4; SYS Write
mov rbx, 0 ; STDOUT
cmp rsp,[rbp] ; compare spaceIndex to String Index
je print_char
mov rdi, space ;print space
mov rdx,1 ; length = 1
syscall
inc byte [rsp] ; increase spaceIndex
jnz space_loop
print_char:
;print out char
mov rax, 4
mov rbx, 0
mov rcx,[rcx] ; <- this line breaks it dont know how to access index here
mov rdx, 1
syscall
call new_Line ; make line break
inc rcx ; increase rcx counter
inc byte [rbp] ; increase index counter
mov byte [rsp], 0 ;reset space counter
jmp space_loop; next Line

How do i reverse a string on emu8086 assembly language [duplicate]

I have to do a simple calculator in assembly using EMU8086, but every time I try to launch it EMU8086 gives this error:
INT 21h, AH=09h -
address: 170B5
byte 24h not found after 2000 bytes.
; correct example of INT 21h/9h:
mov dx, offset msg
mov ah, 9
int 21h
ret
msg db "Hello$"
I checked the other stuff, but there were no mistakes:
data segment
choice db ?
snum1 db 4 dup(?)
snum2 db 4 dup(?)
sres db 4 dup(?)
num1 db ?
num2 db ?
res db ?
;;menu1 db "Chose a function to procced", 10, 13, "Add [+]", 10, 13, "Sub [-]", 10, 13
;;menu2 db "Mul [*]", 10, 13, "Div [/]", 10, 13, "Mod [%]", 10, 13, "Pow [^]", 10, 13, "Exit [x]$"
messStr db "Enter Your Choice:",10,13,"",10,13,"Add --> +",10,13,"Sub --> -",10,13,"Mul --> *",10,13,"Div --> /",10,13,"Mod --> %",10,13,"Pow --> ^",10,13,"Exit --> X",10,13,"$"
msg1 db "Enter first number$"
msg2 db "Enter second number$"
msg3 db "Press any key to procced$"
msg4 db "The result is $"
ends
stack segment
dw 128 dup(0)
ends
code segment
assume cs:code, ds:data, ss:stack
newline proc ;; new line
push ax
push dx
mov ah, 2
mov DL, 10
int 21h
mov ah, 2
mov DL, 13
int 21h
pop dx
pop ax
ret
endp
printstr proc ;; print string
push BP
mov BP, SP
push dx
push ax
mov dx, [BP+4]
mov ah, 9
int 21h
pop ax
pop dx
pop BP
ret 2
endp
inputstr proc ;; collect input
push BP
mov BP, SP
push bx
push ax
mov bx, [BP+4]
k1:
mov ah, 1
int 21h
cmp al, 13
je sofk
mov [bx], al
inc bx
jmp k1
sofk:
mov byte ptr [bx], '$'
pop ax
pop bx
pop BP
ret 2
endp
getNums proc ;; get the numbers
call newline
push offset msg1
call printstr
call newline
push offset snum1
call inputstr
call newline
push offset msg2
call printstr
call newline
push offset snum2
call inputstr
ret
endp
start:
mov ax, data
mov ds, ax
mov ax, stack
mov ss, ax
;; print the main menu
call newline
push offset msg4
call printstr
;; collect the input
call newline
mov bx, offset choice
mov ah, 1
int 21h
mov [bx], al
;; check it
mov al, choice
cmp al, '+'
jne cexit
call getNums
jmp cont
cexit:
cmp al, 'x'
je cend
cont:
;; pause before going to the main menu
call newline
push offset msg3
call printstr
mov bx, offset choice
mov ah, 1
int 21h
call newline
call newline
call newline
jmp start
cend:
mov ax, 4c00h
int 21h
ends
end start
I cut most of the code segment because it wasn't important here.
After experimenting with the code I found that the problem was related to the lengths of the messages in the data segment. menu1 & menu2 were too long and any message after them can't be printed (msg1 & msg2 are printed, but nothing after them). I checked if I should merge menu1 & menu2, but it didn't help out. Please help me find out what is wrong with it.
The error message means you use int 21h / AH=09h on a string that didn't end with a $ (ASCII 24h). The system-call handler checked 2000 bytes without finding one.
Often, that means your code or data is buggy, e.g. in a fixed string you forgot a $ at the end, or if copying bytes into a buffer then you maybe overwrote or never stored a '$' in the first place.
But in this case, it appears that EMU8086 has a bug assembling push offset msg4. (In a way that truncates the 00B5h 16-bit address to 8-bit, and sign-extends back to 16, creating a wrong pointer that points past where any $ characters are in your data.)
Based on the error message below I know you are using EMU8086 as your development environment.
INT 21h, AH=09h -
address: 170B5
byte 24h not found after 2000 bytes.
; correct example of INT 21h/9h:
mov dx, offset msg
mov ah, 9
int 21h
ret
msg db "Hello$"
I'm no expert on EMU8086 by any stretch of the imagination. I do know why your offsets don't work. I can't tell you if there is a proper way to resolve this, or if it's an EMU8086 bug. Someone with a better background on this emulator would know.
You have created a data segment with some variables. It seems okay to me (but I may be missing something). I decided to load up EMU8086 to actually try this code. It assembled without error. Using the debugger I single stepped to the push offset msg1 line near the beginning of the program. I knew right away from the instruction encoding what was going on. This is the decoded instruction I saw:
It shows the instruction was encoded as push 0b5h where 0b5h is the offset. The trouble is that it is encoded as a push imm8 . The two highlighted bytes on the left hand pane show it was encoded with these bytes:
6A B5
If you review an instruction set reference you'll find the encodings for PUSH instruction encoded with 6A is listed as:
Opcode* Instruction Op/En 64-Bit Mode Compat/Leg Mode Description
6A ib PUSH imm8 I Valid Valid Push imm8.
You may say that B5 fits within a byte (imm8) so what is the problem? The smallest value that can be pushed onto the stack with push in 16-bit mode is a 16-bit word. Since a byte is smaller than a word, the processor takes the byte and sign extends it to make a 16-bit value. The instruction set reference actually says this:
If the source operand is an immediate of size less than the operand size, a sign-extended value is pushed on the stack
B5 is binary 10110101 . The sign bit is the left most bit. Since it is 1 the upper 8 bits placed onto the stack will be 11111111b (FF). If the sign bit is 0 then then 00000000b is placed in the upper 8 bits. The emulator didn't place 00B5 onto the stack, it placed FFB5. That is incorrect! This can be confirmed if I step through the push 0b5h instruction and review the stack. This is what I saw:
Observe that the value placed on the stack is FFB5. I could not find an appropriate syntax (even using the word modifier) to force EMU8086 to encode this as push imm16. A push imm16 would be able to encode the entire word as push 00b5 which would work.
Two things you can do. You can place 256 bytes of dummy data in your data segment like this:
data segment
db 256 dup(?)
choice db ?
... rest of data
Why does this work? Every variable defined after the dummy data will be an offset that can't be represented in a single byte. Because of this EMU8086 is forced to encode push offset msg1 as a word push.
The cleaner solution is to use the LEA instruction. This is the load effective address instruction. It takes a memory operand and computes the address (in this case the offset relative to the data segment). You can replace all your code that uses offset with something like:
lea ax, [msg1]
push ax
AX can be any of the general purpose 16-bit registers. Once in a register, push the 16-bit register onto the stack.
Someone may have a better solution for this, or know a way to resolve this. If so please feel free to comment.
Given the information above, you may ask why did it seem to work when you moved the data around? The reason is that the way you reorganized all the strings (placing the long one last) caused all the variables to start with offsets that were less than < 128. Because of this the PUSH of an 8-bit immediate offset sign extended a 0 in the top bits when placed on the stack. The offsets would be correct. Once the offsets are >= 128 (and < 256) the sign bit is 1 and the value placed on the stack sign will have an upper 8 bits of 1 rather than 0.
There are other bugs in your program, I'm concentrating on the issue directly related to the error you are receiving.
I reviewed your code and concentrated on the following sequence of instructions:
mov bx, offset choice ; here you set BX to the address of 'choice'
mov ah, 1
int 21h ; here you 'READ CHARACTER FROM STANDARD INPUT, WITH ECHO'
mov [bx], al ; because INT 21h does preserve BX, you are writing back the result of the interrupt call (AL) back to the memory location at BX, which is named 'choice'
;; check it
mov al, choice ; HERE you are moving a BYTE variable named 'choice' to AL, overwriting the result of the last INT 21h call
cmp al, '+' ; ... and compare this variable to the ASCII value of '+'
jne cexit ; if this variable is unequal to '+' you jump to 'cexit'
call getNums ; otherwise you try to get another number from the input/STANDARD CONSOLE
So your sequence
mov bx, offset choice ; here you set BX to the address of 'choice'
...
mov [bx], al ; because INT 21h does preserve BX, you ...
...
mov al, choice
essentially means, that you are setting BX to the address of 'choice', then setting 'choice'([BX]) to AL and copying it back to AL.
This is redundant.
After that, you compare that char to '+' and...
if that char equals to '+', you get the next char with call getNums and then continue with cont:.
if that char does not equal to '+', you compare it to 'x', the exit-char. If it's not 'x', you fall through to cont:
No error here.
So your problem with menu1 and menu2 may stem from some escape characters included in your strings like %,/,\. For example, % is a MACRO character in some assemblers which may create problems.
simple solution is that your strings should always end in '$'
change DUP(?) to DUP('$') and all other strings end with ,'$'

Converting a string of numbers into an integer in Assembly x86

I'm trying to convert a user inputted string of numbers to an integer.
For example, user enters "1234" as a string I want 1234 stored in a DWORD variable.
I'm using lodsb and stosb to get the individual bytes. My problem is I can't get the algorithm right for it. My code is below:
mov ecx, (SIZEOF num)-1
mov esi, OFFSET num
mov edi, OFFSET ints
cld
counter:
lodsb
sub al,48
stosb
loop counter
I know that the ECX counter is going to be a bit off also because it's reading the entire string not just the 4 bytes, so it's actually 9 because the string is 10 bytes.
I was trying to use powers of 10 to multiply the individual bytes but I'm pretty new to Assembly and can't get the right syntax for it. If anybody can help with the algorithm that would be great. Thanks!
A simple implementation might be
mov ecx, digitCount
mov esi, numStrAddress
cld ; We want to move upward in mem
xor edx, edx ; edx = 0 (We want to have our result here)
xor eax, eax ; eax = 0 (We need that later)
counter:
imul edx, 10 ; Multiply prev digits by 10
lodsb ; Load next char to al
sub al,48 ; Convert to number
add edx, eax ; Add new number
; Here we used that the upper bytes of eax are zeroed
loop counter ; Move to next digit
; edx now contains the result
mov [resultIntAddress], edx
Of course there are ways to improve it, like avoiding the use of imul.
EDIT: Fixed the ecx value

x86 Assembly String Buffer Number to ASCII

I was writing an x86 assembly program to output a number in hexadecimal. The program was assembled using nasm and the image file ran by qemu. The behavior of the program confused me a lot. As the working program below suggests, I wouldn't have to add 0x30 to a digit to get it to print the character of that digit.
; Boot sector code offset: 0x7c00
[org 0x7c00]
mov dx, 0x1fb6 ; The hexadecimal to be printed
call print_hex ; call the function
jmp $ ; jump infinitely
%include "print_string.asm" ; Include the print_string function
print_hex:
pusha ; push all registers to stack
mov ax, 0x4 ; rotate through the number four times
print_hex_loop:
cmp ax, 0x0 ; compare the counter with 0
jle print_hex_end ; if it is zero then jump to the end
mov cx, dx ; move dx to cx
and cx, 0x000F ; take the lower four binary digits of cx
cmp cx, 0xa ;compare the digits with 0xa
jge print_hex_letter ; if it is larger than a, jump to printing character
add cx, 0x0 ; otherwise print the ascii of a number
jmp print_hex_modify_string ; jump to routine for modifing the template
print_hex_letter:
add cx, 0x7 ; print the ascii of a letter
print_hex_modify_string:
mov bx, HEX_OUT ; bring the address of HEX_OUT into dx
add bx, 0x1 ; skip the 0x
add bx, ax ; add the bias
add byte [bx], cl ; move the character into its position
shr dx, 4 ; shift right 4 bits
sub ax, 0x1 ; subtract 1 from the counter
jmp print_hex_loop ; jump back to the start of the function
print_hex_end:
mov bx, HEX_OUT ; move the address of HEX_OUT to bx
call print_string ; call the function print_string
popa ; pop all registers from stack
ret ; return to calling function
HEX_OUT:
db '0x0000',0 ; The template string for printing
times 510-($-$$) db 0 ; fill zeros
dw 0xaa55 ; MAGIC_FLAG for boot
boot_sect.asm
print_string:
pusha
mov ah, 0x0e
mov al, [bx]
print_string_loop:
cmp al, 0x0
je print_string_end
int 0x10
add bx, 0x1
mov al, [bx]
jmp print_string_loop
print_string_end:
popa
ret
print_string.asm
The output of this program is what I expected, but when I tried to add 0x30 on the numerals to get the ASCII code of the digits, the output was gibberish. Is there some trick to it or am I missing some key points here?
Thanks!
The answer to your original question:
Because you do add byte [bx], cl to write digit into buffer, and the buffer already contains '0', so the first time it will work correctly. Calling print_hex second time will produce gibberish again, as the HEX_OUT content is already modified (trivia: which hex number printed as first would allow also some second value to be printed correctly?).
Now just for fun I'm adding how I would probably do print_hex for myself. Maybe it will give you additional ideas for your x86 ASM programming, I tried to comment it a lot to explain why I'm doing things in a way I'm doing them:
First I would separate formatting function, so I could eventually reuse it elsewhere, so input is both number and target buffer pointer. I'm using LUT (look up table) for ASCII conversion, as the code is simpler. If you care about size, it's possible to do it in code with branching in less bytes and use the slower pusha/popa to save registers.
format_hex:
; dx = number, di = 4B output buffer for "%04X" format of number.
push bx ; used as temporary to calculate digits ASCII
push si ; used as pointer to buffer for writing chars
push dx
lea si,[di+4] ; buffer.end() pointer
format_hex_loop:
mov bx,dx ; bx = temporary to extract single digit
dec si ; si = where to write next digit
and bx,0x000F ; separate last digit (needs whole bx for LUT indexing)
shr dx,4 ; shift original number one hex-digit (4 bits) to right
mov bl,[format_hex_ascii_lut+bx] ; convert digit 0-15 value to ASCII
mov [si],bl ; write it into buffer
cmp di,si ; compare buffer.begin() with pointer-to-write
jb format_hex_loop ; loop till first digit was written
pop dx ; restore original values of all modified regs
pop si
pop bx
ret
format_hex_ascii_lut: ; LUT for 0-15 to ASCII conversion
db '0123456789ABCDEF'
Then for convenience a print_hex function may be added too, providing its own buffer for formatting with "0x" and nul terminator:
print_hex:
; dx = number to print
push di
push bx
; format the number
mov di,HEX_OUT+2
call format_hex
; print the result to screen
lea bx,[di-2] ; bx = HEX_OUT
; HEX_OUT was already set with "0x" and nul-terminator, otherwise I would do:
; mov word [bx],'0x'
; mov byte [bx+6],0
call print_string
pop bx
pop di
ret
HEX_OUT:
db '0x1234',0 ; The template string for printing
And finally example usage from the boot code:
mov dx,0x1fb6 ; The hexadecimal to be printed
call print_hex
mov dx,ax ; works also when called second time
call print_hex ; (but would be nicer to print some space between them)
jmp $ ; loop infinitely
(I did verify this code to some extend (that it will compile and run), although only by separate parts of it and in 32b environment (patching few lines to make it 32b), so some bug may have slipped in. I don't have 16b environment to verify it as complete boot code.)

How do I convert a string representing a signed hex int into its signed int Doubleword number in 80x86?

So I am taking an assembly language course and I am stuck on this problem from the book:
Using the windows 32 console (so I have an io.h to use), I am supposed to take a valid hex value inputted by the user and then display the actual hex value in the register EAX. So if the user entered "AB CD E2 18", then after the procedure EAX would hold the value: ABCDE218.
The parts that I am stuck on are the A-F values. If I use A for example, I can get the bits to read 00000010, but I don't know how to change that into its hex value A. Here is what I have so far:
.586
.MODEL FLAT
.CODE
hexToInt PROC
push ebp ; save base pointer
mov ebp, esp ; establish stack frame
sub esp, 4 ; local space for sign
push ebx ; Save registers
push edx
push esi
pushfd ; save flags
mov esi,[ebp+8] ; get parameter (source addr)
WhileBlankD:
cmp BYTE PTR [esi],' ' ; space?
jne EndWhileBlankD ; exit if not
inc esi ; increment character pointer
jmp WhileBlankD ; and try again
EndWhileBlankD:
mov eax,1 ; default sign multiplier
IfPlusD:cmp BYTE PTR [esi],'+' ; leading + ?
je SkipSignD ; if so, skip over
IfMinusD:
cmp BYTE PTR [esi],'-' ; leading - ?
jne EndIfSignD ; if not, save default +
mov eax,-1 ; -1 for minus sign
SkipSignD:
inc esi ; move past sign
EndIfSignD:
mov [ebp-4],eax ; save sign multiplier
mov eax,0 ; number being accumulated
WhileDigitD:
cmp BYTE PTR [esi],'0' ; compare next character to '0'
jb EndWhileDigitD ; not a digit if smaller than '0'
cmp BYTE PTR [esi],'9' ; compare to '9'
ja TestForHexD
mov bl,[esi] ; ASCII character to BL
and ebx,0000000Fh ; convert to single-digit integer
and eax, ebx
shl eax, 4
inc esi
jmp WhileDigitD
TestForHexD:
cmp BYTE PTR [esi], 'F'
ja EndWhileDigitD
mov bl, [esi]
sub bl, 31h
and ebx, 000000FFh
or al, bl
shl eax, 4
inc esi ; increment character pointer
jmp WhileDigitD ; go try next character
EndWhileDigitD:
; if value is < 80000000h, multiply by sign
cmp eax,80000000h ; 80000000h?
jnb endIfMaxD ; skip if not
imul DWORD PTR [ebp-4] ; make signed number
endIfMaxD:
popfd ; restore flags
pop esi ; restore registers
pop edx
pop ebx
mov esp, ebp ; delete local variable space
pop ebp
ret ; exit
hexToInt ENDP
END
The TestForHex label is where I am trying to convert the ASCII string to hex. I was looking around and read that I could accomplish my goal by shifting and masking, but I can't figure it out and I can't find any examples. At this point I am sure its something really small that I am just over looking, but I am stuck.
There are some bugs in your code.
First, in 0 ... 9 string to integer conversion code, you don't do ASCII to binary conversion as you should do, but instead you do and ebx,0Fh, which is incorrect. You need to subtract '0' (30h) from each ASCII character, like this:
mov bl,[esi]
sub bl,'0' ; sub bl,30h
Then, also in 0 ... 9 string to integer conversion code:
and eax, ebx
If the number consists of only 0...9 digits, and eax, ebx will produce always 0. It should be:
or al,bl
Then, you do shl eax,4, even if you don't know if there will be more digits. That means that the number will be 16 times bigger than it should.
Then, you give the example input with spaces, but your code does not handle spaces (20h) properly, it ends reading input for any value below '0' (30h), it seems to accept only leading spaces (skip this if you don't want to accept spaces in between).
So, the entire code block above should be:
WhileDigitD:
cmp byte ptr [esi], ' ' ; delete this if you don't want spaces in between.
je next_char ; delete this if you don't want spaces in between.
cmp BYTE PTR [esi],'0' ; compare next character to '0'
jb EndWhileDigitD ; not a digit if smaller than '0'
cmp BYTE PTR [esi],'9' ; compare to '9'
ja TestForHexD
mov bl,[esi] ; ASCII character to BL
sub bl,'0' ; sub bl,30h -> convert ASCII to binary.
shift_eax_by_4_and_add_bl:
shl eax,4 ; shift the current value 4 bits to left.
or al,bl ; add the value of the current digit.
next_char:
inc esi
jmp WhileDigitD
I also added labels next_char and shift_eax_by_4_and_add_bl. The reason for next_char should be evident, shift_eax_by_4_and_add_bl is to minimize duplicate code of 0...9 and A...F code blocks, see below.
You don't check that that the hexadecimal A...F digit is within range A ... F, only that it's below or equal to F. Otherwise it has same bug with shl eax,4. And as usually duplicate code should be avoided, I added shift_eax_by_4_and_add_bl label to minimize duplicate code.
So I think it should be:
Edit: corrected sub bl,31h -> sub bl,('A'-0Ah).
TestForHexD:
cmp BYTE PTR [esi], 'A'
jb EndWhileDigitD
cmp BYTE PTR [esi], 'F'
ja EndWhileDigitD
mov bl,[esi]
sub bl,('A'-0Ah) ; sub bl,55 -> convert ASCII to binary.
jmp shift_eax_by_4_and_add_bl
If you need to convert a character (for simplicity, say, in upper case) representing a hex digit into the value of that digit you need to do this:
IF char >= 'A'
value = char - 'A' + 10
ELSE
value = char - '0'
ENDIF
If you need to do the reverse, you do the reverse:
IF value >= 10
char = value - 10 + 'A'
ELSE
char = value + '0'
ENDIF
Here you exploit the fact that the ASCII characters 0 through 9 have consecutive ASCII codes and so do the ASCII characters A through F.

Resources