I am a beginner in assembly and I am trying to make a program where I should input 2 strings from the keyboard. The first string should be the main string and the second input is the substring which I need to look for in the main string. If I find it, I should display that it was found, and if not, I should display that it wasn't found.
I tried to compare the lengths of the strings so that if the first one has less characters than the second, the message "Invalid" would be displayed. Then I tried to compare the substring with the string until the substring is found in the string and the message "string found" gets displayed, if not, the message : "string not found" gets displayed. No matter what words I input, it will always say "Invalid". How can I change that?
Here is my code:
.model small
.stack 200h
prompt1 db "Input String: $"
prompt2 db 10,10, 13, "Input Word: $"
prompt3 db 10,10, 13, "Output: $"
found db "Word Found. $"
notfound db "Word Not Found. $"
invalid db 10,10, 13, "Invalid. $"
InputString db 21,?,21 dup("$")
InputWord db 21,?,21 dup("$")
actlen db ?
mov ax, #data
mov ds, ax
mov es, ax
;Getting input string
mov ah,09h
lea dx, prompt1
int 21h
lea si, InputString
mov ah, 0Ah
mov dx, si
int 21h
;Getting input word
mov ah,09h
lea dx, prompt2
int 21h
lea di, InputWord
mov ah, 0Ah
mov dx, di
int 21h
;To check if the length of substring is shorter than the main string
mov cl, [si+1]
mov ch, 0
add si, 2
add di, 2
mov bl, [di+1]
mov bh, 0
cmp bx, cx
ja invalid_length
je valid
jb matching
repe cmpsb
je found_display
jne notfound_display
mov bp, cx ;CX is length string (long)
sub bp, bx ;BX is length word (short)
inc bp
lea si, [InputString + 2]
lea di, [InputWord + 2]
mov al, [si] ;Next character from the string
cmp al, [di] ;Always the first character from the word
je check
inc si ;DI remains at start of the word
dec bp
jnz matching ;More tries to do
jmp notfound_display
push si
push di
mov cx, bx ;BX is length of word
repe cmpsb
pop di
pop si
jne continue
jmp found_display
mov si, ax
dec dx
lea di, InputWord
jmp matching
mov ah, 09h
lea dx, invalid
int 21h
jmp done
mov dx, offset found
mov ah, 09h
int 21h
jmp done
mov dx, offset notfound
mov ah, 09h
int 21h
;fallthrough is intentional
mov ax,4C00h
int 21h ;exit program and return to DOS
end start
I see that you have tried to apply some of the advice I gave in the answer at Finding the substring in an input string.
But it's gone wrong mostly because you decided to special-case where the inputted string has the same length as the inputted word. That's not a special case at all! If it so happens, my calculation of the number of possible finds will remain valid and yield a 1 in the BP register. In short, your problems originate from having inserted that valid part and not having edited the program accordingly.
add si, 2
add di, 2
je valid
jb matching
repe cmpsb
je found_display
jne notfound_display
You don't need all of the above once you drop the redundant valid part.
mov si, ax
dec dx
lea di, InputWord
jmp matching
And don't forget to remove any code that you don't actually need in your program, especially when you use code that you found on the internet.
; To check if the length of substring is shorter than the main string
mov cl, [si+1]
mov ch, 0
mov bl, [di+1]
mov bh, 0
mov bp, cx ; CX is length string (long)
sub bp, bx ; BX is length word (short)
jb notfound_display
inc bp ; -> BP is number of possible finds 1+
lea si, [InputString + 2]
lea di, [InputWord + 2]
mov al, [si] ; Next character from the string
cmp al, [di] ; Always the first character from the word
je check
inc si ; DI remains at start of the word
dec bp
jnz matching ; More tries to do
jmp notfound_display
push si
push di
mov cx, bx ; BX is length of word
repe cmpsb
pop di
pop si
jne continue
jmp found_display
Some optimization
I have absorbed the instructions cmp bx, cx ja invalid_length in the calculation of the number of possible finds (shaving off 2 bytes). If the subtraction produces a borrow, you know the word is longer than the string and so you can branch away. Whether you jump to invalid_length or notfound_display is up to you...
You can shorten this program by 2 bytes if you replace lea si, [InputString + 2] lea di, [InputWord + 2] by add si, 2 add di, 2.
This should work:
.model small
.stack 100h
print macro p
lea dx,p
mov ah,09h
int 21h
cn db 0
pn db 0
space db 10,13, " $"
msg db 10,13, "hjut$"
msg1 db "Introduceti primul sir:$"
msg2 db "Introduceti al doilea sir:$"
msg3 db "Al doilea sir nu se gaseste in primul.$"
msg4 db "Al doilea sir se gaseste in primul. $"
ar db 20 dup("$")
br db 20 dup("$")
mov ax,#data
mov ds,ax
mov si,01h
mov di,00h
mov cn,00h
print msg1
read1:mov ah,01h
int 21h
mov ar[si],al
inc si
cmp al,0dh
jnz read1
mov si,00h
print msg2
read2:mov ah,01h
int 21h
mov br[si],al
inc si
cmp al,0dh
jnz read2
mov si,00h
mov di,00h
jmp lop1
lop1: mov di,00h
inc si
mov bh,ar[si]
cmp bh,0dh
jz disp
mov bh,br[di]
cmp ar[si],bh
jnz lop1
jz lop2
lop2:inc si
inc di
mov bh,br[di]
cmp bh,0dh
jz l1
mov bh,br[di]
cmp ar[si],bh
jz lop2
jmp lop1
add cn,01h
dec si
jmp lop1
cmp cn,00h
jz disp1
print msg4
add cn,30h
mov dl,cn
mov ah,02h
int 21h
jmp exit
disp1:print msg3
exit:mov ah,4ch
int 21h
end start
I've been working on the project. The main goal is to calculate how many words do not contain letters from 'a' to 'k'. Given file has [0;1000] lines. Every line contains 6 columns.
The first two columns contain string with [1; 20] characters. Characters could be letters, numbers, and whitespaces.
3-5 columns contain integers in the range [-100; 100]. 6th column contains real numbers in the range [-9.99; 9.99] with only two digits after the decimal point.
Each section I separated by a semicolon ';'.
A11;bas morning;0;0;5;1.15
My problem is, that if there is a newLine at the end of the file, skip them, and sum up them in the final count.
TASK: calculate how many words (the word is one or more symbols without ' '(space)) in the first two columns do not contain letters 'B' or 'C'. And print that integer number.
I have tried to compare al with 0x20 and jump to the next section if it is smaller, but that didn't help me.
What I have done so far
; Main
mov si, 2
call procSkaitytiEilute
; Check the first two columns
;mov al, ';'
mov di, line
mov al, [di]
inc di
cmp al, ' '
je .skipSpaces
cmp al, ';'
je .q3
dec di
mov bx, 1
mov al, [di]
inc di
cmp al, ' '
je .q2
cmp al, ';'
je .q2
jmp .q8
cmp al, 20h
jl .skipLine
jmp .q7
cmp al, 'A'
jl .q5
jmp .q4
cmp al, 'K'
jae .q5
mov bx, 0 ; One or more invalid chars in current word
jmp .q1
cmp al, 'a'
jae .q6
jmp .q1
cmp al, 'k'
jae .q1
mov bx, 0
jmp .q1
add [lineCount], bx ; BX=[0,1] Counting the 'good' words
cmp al, ';'
jne .skipSpaces
dec si ; Next column?
jnz .skipSpaces
cmp [readLastLine], byte 0
je .whileNotTheEndOfFile
; Jeigu ne failo pabaiga, kartojame cikla
sub [lineCount], 2
cmp [readLastLine], byte 0
je .whileNotTheEndOfFile
; Hexadecimal convertion to decimal
mov dx, lineCount
mov ax, [lineCount]
call procUInt16ToStr
call procPutStr
mov si, dx
cmp al, 0
jne .writeToFile
sub si, dx
lea cx, [si-1]
mov bx, [writingDescriptor]
mov ah, 40h
int 21h
; Closing Files
mov bx, [writingDescriptor]
call procFClose
mov bx, [readingDescriptor]
call procFClose
%include 'yasmlib.asm'
; void procSkaitytiEilute()
; Read line to buffer ‘eilute’
push ax
push bx
push cx
push si
mov bx, [readingDescriptor]
mov si, 0
call procFGetChar
; End if the end of file or error
cmp ax, 0
je .endOfFile
jc .endOfFile
; Putting symbol to buffer
mov [line+si], cl
inc si
; Check if there is \n?
cmp cl, 0x0A
je .endOfLine
jmp .loop
mov [readLastLine], byte 1
mov [line+si], byte '$'
mov [lineLength], si
pop si
pop cx
pop bx
pop ax
section .data
db 'input.dat', 00
dw 0000
times 128 db 00
dw 0000
db 00
db 64
times 66 db '$'
dw 0000
times 128 db 00
My file:
A11;bas aaa;0;0;5;1.15
My output: 4
Needed output: 2 (because there are only 2 "good" words "XYZ" and "PPP"), my program counts 2 more words because of a new line at the end.
It would appear that in contrast with the original task description your file can contain one or more empty lines as well. Or even lines with spaces followed by the newline codes.
To deal with these empty lines (only containing the bytes 13 and 10), your idea to skip on codes below 32 is good, but it's in the wrong place.
Below is the quick fix. Please notice that the ASCII codes are to be treated as unsigned numbers. Don't use the signed jl instruction on them.
mov di, line
mov al, [di]
inc di
cmp al, ' '
je .skipSpaces
cmp al, ';'
je .q3
dec di
cmp al, 32 ; NEW
jb .skipLine ; NEW
mov bx, 1
mov al, [di]
inc di
cmp al, ' '
je .q2
cmp al, ';'
je .q2
! cmp al, 'A'
! jb .q1
! cmp al, 'K'
! jbe .badChar ; [A,K] is bad
! cmp al, 'a'
! jb .q1
! cmp al, 'k'
! ja .q1
! .badChar: ; [a,k] is bad
! mov bx, 0 ; One or more invalid chars in current word
jmp .q1
add [lineCount], bx ; BX=[0,1] Counting the 'good' words
cmp al, ';'
jne .skipSpaces
dec si ; Next column?
jnz .skipSpaces
cmp [readLastLine], byte 0
je .whileNotTheEndOfFile
The lines that I marked with an exclamation mark really should be:
or al, 32 ; Make lowercase
cmp al, 'a'
jb .q1
cmp al, 'k'
ja .q1
mov bx, 0 ; [A,K] and [a,k] are badChars
Why does your program contain next redundant lines?
; Jeigu ne failo pabaiga, kartojame cikla
sub [lineCount], 2
cmp [readLastLine], byte 0
je .whileNotTheEndOfFile
I am trying to write a program in Assembly Language where it will replace all the letter 'T' in the string STRVAR with 'U' and place the new string in OUTPUT. I figured that I should store each character one by one in OUTPUT throughout the loop process although after doing several trial and error with mov, I already ran out of ideas on how to store a character in a new memory location.
OUTPUT times 21 db 0
section .text
global CMAIN
;write your code here
lea esi, [STRVAR]
mov al, [esi]
cmp al, 0
cmp al, 'T'
inc esi
jmp L1
;store character here
inc esi
jmp L1
;store character here
inc esi
jmp L1
xor eax, eax
I followed the information shared by Jester and I finally get to have the program work based on what is stated in the specification. I realized I need to add section .data and introduced another point which in this case, lea edi, [OUTPUT] to store each character and use it to print a new string.
%include ""
section .data
OUTPUT times 21 db 0
section .text
global CMAIN
;write your code here
lea esi, [STRVAR]
lea edi, [OUTPUT]
mov al, [esi]
cmp al, 0
cmp al, 'T'
inc esi
inc edi
jmp L1
mov byte[edi], 'U'
inc esi
inc edi
jmp L1
mov byte[edi], al
inc esi
inc edi
jmp L1
mov byte [edi], 0
PRINT_DEC 1, [edi] ;check if the terminating 0 is also included
xor eax, eax
I have this assembly program where I need to find the substring in the main string I input. My problem is that it always outputs the "word found" even if I typed two completely different words. I don't know which part of my loop or condition is wrong. Please help me figure it out. Also, please suggest some string instructions that could be used in checking for a substring so that I can shorten my code. I am really confused with how the cmpsb works, I only tried to use it. Btw, I don't know how to use a debugger that's why I can't debug my code and I am just a newbie in assembly language.
Below is the logic part of my code.
prompt1 db "Input String: $"
prompt2 db 10,10, 13, "Input Word: $"
prompt3 db 10,10, 13, "Output: $"
found db "Word Found. $"
notfound db "Word Not Found. $"
invalid db 10,10, 13, "Invalid. $"
InputString db 21,?,21 dup("$")
InputWord db 21,?,21 dup("$")
actlen db ?
strlen dw ($-InputWord)
mov ax, #data
mov ds, ax
mov es, ax
;Getting input string
mov ah,09h
lea dx, prompt1
int 21h
lea si, InputString
mov ah, 0Ah
mov dx, si
int 21h
;Getting input word
mov ah,09h
lea dx, prompt2
int 21h
lea di, InputWord
mov ah, 0Ah
mov dx, di
int 21h
;To check if the length of substring is shorter than the main string
mov cl, [si+1]
mov ch, 0
add si, cx
mov bl, [di+1]
mov bh, 0
cmp bx, cx
ja invalid_length
je valid
jb matching
repe cmpsb
je found_display
jne notfound_display
mov al, [si]
mov ah, [di]
cmp al, ah
je check
jne iterate
inc si
mov dx, strlen
dec dx
cmp dx, 0
je notfound_display
jmp matching
mov cl, [di+1]
mov ch, 0
mov ax, si
add ax, 1
repe cmpsb
jne again
jmp found_display
mov si, ax
dec dx
lea di, InputWord
jmp matching
mov ah, 09h
lea dx, invalid
int 21h
strlen dw ($-InputWord)
This does nothing useful. The length that it calculate can not help you in any way!
;To check if the length of substring is shorter than the main string
mov cl, [si+1]
mov ch, 0
add si, cx
mov bl, [di+1]
mov bh, 0
cmp bx, cx
Here (as Jester told you) the add si, cx instruction is wrong. You need add si, 2 to set SI to the start of the string. You will also need to add add di, 2 to set DI to the start of the word. Do this and the valid part of your program will work correctly.
For the matching part:
Consider the case where the string has 7 characters and the word that you're looking for has 6 characters. You can find the word in at most 2 ways.
Consider the case where the string has 8 characters and the word that you're looking for has 6 characters. You can find the word in at most 3 ways.
Consider the case where the string has 9 characters and the word that you're looking for has 6 characters. You can find the word in at most 4 ways.
Notice the regularity? The number of possible finds is equal to the difference in length plus 1.
mov bp, cx ;CX is length string (long)
sub bp, bx ;BX is length word (short)
inc bp
This sets BP to the number of tries in your matching routine.
lea si, [InputString + 2]
lea di, [InputWord + 2]
mov al, [si] ;Next character from the string
cmp al, [di] ;Always the first character from the word
je check
inc si ;DI remains at start of the word
dec bp
jnz matching ;More tries to do
jmp notfound_display
The check part will use repe cmpsb to test for a match, but in the event that the match is not found, you must be able to return to the matching code at the continue label. You have to preserve the registers.
push si
push di
mov cx, bx ;BX is length of word
repe cmpsb
pop di
pop si
jne continue
jmp found_display
I am writing an assembly program that capitalizes the first letter of each word in the sentence I input.
My problem is that it doesn't capitalize the first letter of the words. What's wrong with my code?
Below is my code
.model small
.stack 100h
prompt1 db "Input String: $"
prompt2 db "Output String: $"
InputString db 21,?,21 dup("$")
newline db 10,13,"$"
mov ax, #data
mov ds, ax
; Getting input string
mov ah,09h
lea dx, prompt1
int 21h
lea si, InputString
mov ah, 0Ah
mov dx, si
int 21h
mov ah, 09h
lea dx, newline
int 21h
mov cl, [si+1]
mov ch,0
add si, cx
inc si
dec cx
cmp cx, 0
je exit
jmp checkspace
cmp si, " "
inc si
je checkletter
cmp si, "a"
jae checkletter2
cmp si, "z"
jbe capital
mov ah, [si]
xor ah, 00100000b
mov [si], ah
jmp loop1
mov ah,09h
lea dx, prompt2
int 21h
mov ah, 09h
mov dx, offset InputString+2
int 21h
mov ah, 4ch
int 21h
end start
Below is the updated part of my code. It can now only capitalize the first letter of the word. I don't know if the problem is with the loop. Please help me figure out which part of my code is wrong. I'm so sorry I don't know how to use a debugger for checking. Thank you.
mov cl, [si+1]
mov ch,0
add si,2
jmp checkletter
inc si
dec cx
cmp cx, 0
je exit
cmp si, " "
je checkletter3
jmp loop1
cmp si, 41h
jae checkletter2
cmp si, 5Ah
jbe capital
inc si
dec cx
jmp checkletter
mov ah, [si]
xor ah, 00100000b
mov [si], ah
jmp loop1
cmp si, " "
cmp si, "a"
cmp si, "z"
cmp si, 41h
cmp si, 5Ah
All of the above are comparing the address in SI with an immediate value. They are not comparing the byte that can be found at the address that SI is pointing to with the immediate! You need to dereference it.
cmp byte [si], " "
cmp byte [si], "a"
cmp byte [si], "z"
cmp byte [si], 41h
cmp byte [si], 5Ah
To solve the task of capitalizing just the first letter of each word
You first need to find the start of each word. This is done by skipping the whitespace in front of each word.
Once located, you compare to see if the starting character is in small caps, and only if it is, you convert it to uppercase by subtracting 32.
The rest of the word is again skipped until either the end of the line is encountered or another whitespace is found for which the process starts all over again from the top.
It look like this:
mov cl, [si+1]
test cl, cl ;Exit if the input was empty!!!
jz Exit
add si, 2 ;Point at first byte
mov al, [si]
cmp al, " "
jne FirstCharacter
inc si ;(*)
dec cl
jz Exit
jmp SkipSpace
cmp al, "a"
jb SkipRemainingCharacters
cmp al, "z"
ja SkipRemainingCharacters
sub al, 32 ;Capitalize
mov [si], al ; and write back in string
inc si
dec cl
jz Exit
mov al, [si]
cmp al, " " ;If not space then it's part of same word
jne SkipRemainingCharacters
jmp SkipSpace ;This could just as easily jump to (*)