Input a string of characters and output to uppercase - linux

I'm trying to write a program which will convert a lowercase string of characters to uppercase, using a buffer to store the initial string. The problem that I'm experiencing is that my program will print out an infinite loop of characters which have to resemblence to the string I've given it.
Other problems that I believe exist in the code are as follows:
Some subroutines use ret at the end of the call. The problem that I'm having trouble with is figuring out which of these subroutines do not actually need a ret, and are better used with jmp. To be honest, I'm a little confused here between the semantics of the two. For example, does a subroutine called with ja need to be ret'ed at the end of the call?
I'm also trying to print out the number of iterations that occur within each iteration of the loop used to convert the values. For whatever reason, I'll inc the counter and resolve to print it with a PrintNumIter routine, which, alas, doesn't do anything unfortunately.
The complete program is as follows.
Codez
bits 32
[section .bss]
buf: resb 1024 ;allocate 1024 bytes of memory to buf
[section .data]
;*************
;* CONSTANTS *
;*************
;ASCII comparison/conversion
LowercaseA: equ 0x61
LowercaseZ: equ 0x7A
SubToUppercase: equ 0x20
;IO specifiers/descriptors
EOF: equ 0x0
sys_read: equ 0x3
sys_write: equ 0x4
stdin: equ 0x0
stdout: equ 0x1
stderr: equ 0x2
;Kernel Commands/Program Directives
_exit: equ 0x1
exit_success: equ 0x0
execute_cmd: equ 0x80
;Memory Usage
buflen: equ 0x400 ;1KB of memory
;*****************
;* NON-CONSTANTS *
;*****************
iteration_count: db 0
query : db "Please enter a string of lowercase characters, and I will output them for you in uppercase ^.^: ", 10
querylen : equ $-query
[section .text]
global _start
;===========================================
; Entry Point
;===========================================
_start:
nop ;keep GDB from complaining
call AskUser
call Read
call SetupBuf
call Scan
call Write
jmp Exit
;===========================================
; IO Instructions
;===========================================
Read:
mov eax, sys_read ;we're going to read in something
mov ebx, stdin ;where we obtain this is from stdin
mov ecx, buf ;read data into buf
mov edx, buflen ;amount of data to read
int execute_cmd ;invoke kernel to do its bidding
ret
Write:
mov eax, sys_write ;we're going to write something
mov ebx, stdout ;where we output this is going to be in stdout
mov ecx, buf ;buf goes into ecx; thus, whatever is in ecx gets written out to
mov edx, buflen ;write the entire buf
int execute_cmd ;invoke kernel to do its bidding
ret
AskUser:
mov eax, sys_write
mov ebx, stdout
mov ecx, query
mov edx, querylen
int execute_cmd
ret
PrintNumIter:
mov eax, sys_write
mov ebx, stdout
push ecx ;save ecx's address
mov ecx, iteration_count ;print the value of iteration_count
mov edx, 4 ;print 4 bytes of data
int execute_cmd
pop ecx ;grab the value back in
ret
;===========================================
; Program Preperation
;===========================================
SetupBuf:
mov ecx, esi ;place the number of bytes read into ecx
mov ebp, buf ;place the address of buf into ebp
dec ebp ;decrement buf by 1 to prevent "off by one" error
ret
;===========================================
; Conversion Routines
;===========================================
ToUpper:
sub dword [ebp + ecx], SubToLowercase ;grab the address of buf and sub its value to create uppercase character
Scan:
call PrintNumIter ;print the current iteration within the loop
cmp dword [ebp + ecx], LowercaseA ;Test input char against lowercase 'a'
jb ToUpper ;If below 'a' in ASCII, then is not lowercase - goto ToLower
cmp dword [ebp + ecx], LowercaseZ ;Test input char against lowercase 'z'
ja ToUpper ;If above 'z' in ASCII, then is not lowercase - goto ToLower
dec ecx ;decrement ecx by one, so we can get the next character
inc byte [iteration_count] ;increment the __value__ in iteration count by 1
jnz Scan ;if ecx != 0, then continue the process
ret
;===========================================
;Next:
; dec ecx ;decrement ecx by one
; jnz Scan ;if ecx != 0 scan
; ret
;===========================================
Exit:
mov eax, _exit
mov ebx, exit_success
int execute_cmd

Your problem is directly attributed to the fact that you never append a nul terminator to the end of your string buffer once you are done processing it (from what I remember, the read syscall doesn't read back a null).
unfortunately this is a little bit harder to do due to your odd control flow, but changing SetupBuf should do the trick (note, you should probably check that you haven't overflowed buf, but with 1KB, I doubt you'd need to worry for a learning program):
SetupBuf:
mov ecx, esi
mov ebp, buf
mov [ebp+ecx],0 ;make sure the string is nul terminated
dec ebp
ret
Just note
On to another issue that seems to plague your code (which you have aptly noticed), your odd control flow. So simple guidelines (note: not rules, just guidelines) that hopefully help you on your way to less spagetti code:
JMP (and the conditional jumps) should only be used to go to lables in the same procedure, else you start getting in a bind because you cannot unwind back. the only other time you can use jumps is for tail-calls, but at this stage you shouldn't worry about that, its more confusion.
Always use CALL when you are going to another procedure, this allows you to return to the call site correctly with the RETN/RET instruction, making the control flow more logical.
A simple example:
print_num: ;PROC: num to print in ecx, ecx is caller preserved
push ecx
push num_format ; "%d\n"
call _printf
sub esp,8 ;cleanup for printf
retn
print_loop_count: ;PROC: takes no args
mov ecx,0x10 ;loop 16 times
do_loop: ;LABEL: used as a jump target for the loop
;good idea to prefix jump lables with "." to differentiate them
push ecx ;save ecx
call print_num ;value to print is already in ecx
pop ecx ;restore ecx
dec ecx
jnz do_loop ;again?
retn

Related

To display characters in reverse order using nasm [infinite loop running]

THE PROGRAM IS USED TO ACCEPT CHARACTERS AND DISPLAY THEM IN REVERSE ORDER
The code is included here:
section .bss
num resb 1
section .text
global _start
_start:
call inputkey
call outputkey
;Output the number entered
mov eax, 1
mov ebx, 0
int 80h
inputkey:
;Read and store the user input
mov eax, 3
mov ebx, 2
mov ecx, num
mov edx, 1
int 80h
cmp ecx, 1Ch
je .sub2
push ecx
jmp inputkey
.sub2:
push ecx
ret
outputkey:
pop ecx
;Output the message
mov eax, 4
mov ebx, 1
;mov ecx, num
mov edx, 1
int 80h
cmp ecx, 1Ch
je .sub1
jmp outputkey
.sub1:
ret
The code to compile and run the program
logic.asm
is given here:
nasm -f elf logic.asm
ld -m elf_i386 -s -o logic logic.o
./logic
There are a few problems with the code. Firstly, for the sys_read syscall (eax = 3) you supplied 2 as the file descriptor, however 2 refers to stderr, but in this case you'd want stdin, which is 0 (I like to remember it as the non-zero numbers 1 and 2 being the output).
Next, an important thing to realize about the ret instruction is that it pops the value off the top of the stack and returns to it (treating it as an address). Meaning that even if you got to the .sub2 label, you'd likely get a segfault. With this in mind, the stack also tends to not be permanent storage, as in it is not preserved throughout procedures, so I'd recommend just making your buffer larger to e.g. 256 bytes and increment a value to point to an index in the buffer. (Using a fixed-size buffer will keep you from getting into the complications of memory allocation early, though if you want to go down that route you could do an external malloc call or just an mmap syscall.)
To demonstrate what I mean by an index into the reserved buffer:
section .bss
buf resb 256
; ...
inputkey:
xor esi, esi ; clear esi register, we'll use it as the index
mov eax, 3
mov ebx, 0 ; stdin file descriptor
mov edx, 1 ; read one byte
.l1: ; loop can start here instead of earlier, since the values eax, ebx and edx remain unchanged
lea ecx, [buf+esi] ; load the address of buf + esi
int 80h
cmp [buf+esi], 0x0a ; check for a \n character, meaning the user hit enter
je .e1
inc esi
jmp .l1
.e1:
ret
In this case, we also get to preserve esi up until the output, meaning that to reverse the input, we just print in descending order.
outputkey:
mov eax, 4
mov ebx, 1 ; stdout
mov edx, 1
.l2:
lea ecx, [buf+esi]
int 80h
test esi, esi ; if esi is zero it will set the ZF flag
jz .e2:
jmp .l2
.e2:
ret
Note: I haven't tested this code, so if there are any issues with it let me know.

nasm zero byte omitted at the end of the string

I am studying Assembly language using this nasm tutorial. Here is the code that prints a string:
SECTION .data
msg db 'Hello!', 0Ah
SECTION .text
global _start
_start:
mov ebx, msg
mov eax, ebx
; calculate number of bytes in string
nextchar:
cmp byte [eax], 0
jz finished
inc eax
jmp nextchar
finished:
sub eax, ebx ; number of bytes in eax now
mov edx, eax ; number of bytes to write - one for each letter plus 0Ah (line feed character)
mov ecx, ebx ; move the memory address of our message string into ecx
mov ebx, 1 ; write to the STDOUT file
mov eax, 4 ; invoke sys_write (kernel opcode 4)
int 80h
mov ebx, 0 ; no errors
mov eax, 1 ; invoke sys_exit (kernel opcode 1)
int 80h
It works and successfully prints "Hello!\n" to STDOUT. One thing I don't understand: it searches for \0 byte in msg, but we didn't define it. Ideally, the correct message definition should be
msg db 'Hello!', 0Ah, 0h
How does it successfully get the zero byte at the end of the string?
The similar case is in exercise 7:
; String printing with line feed function
sprintLF:
call sprint
push eax ; push eax onto the stack to preserve it while we use the eax register in this function
mov eax, 0Ah ; move 0Ah into eax - 0Ah is the ascii character for a linefeed
push eax ; push the linefeed onto the stack so we can get the address
mov eax, esp ; move the address of the current stack pointer into eax for sprint
call sprint ; call our sprint function
pop eax ; remove our linefeed character from the stack
pop eax ; restore the original value of eax before our function was called
ret ; return to our program
It puts just 1 byte: 0Ah into eax without terminating 0h, but the string length is calculated correctly inside sprint. What is the cause?

Conditional jump fails in linux x86 intel syntax(NASM)

STORY(IM A NEWBIE):
I started reading a pdf tutorial about programming in assembly(x86 intel) using the famous nasm assembler and i have a problem executing a very basic assembly code(inspired by a code about loops from the tutorial).
THE PROBLEM(JE FAILS):
This assembly code should read a digit(a character(that means '0'+digit)) from stdin and then write to the screen digit times "Hello world\n".Really easy loop :decrease digit and if digit equals zero('0' not the integer the character) jump(je) to the exit(mov eax,1\nint 0x80).
Sounds really easy but when i try to execute the output is weird.(really weird and BIG)
It runs many times throught the loop and stops when digit equals '0'(weird because until the program stops the condition digit == '0' been tested many times and it should be true)
Actually my problem is that the code fails to jump when digit == '0'
THE CODE(IS BIG):
segment .text
global _start
_start:
;Print 'Input a digit:'.
mov eax,4
mov ebx,1
mov ecx,msg1
mov edx,len1
int 0x80
;Input the digit.
mov eax,3
mov ebx,0
mov ecx,dig
mov edx,2
int 0x80
;Mov the first byte(the digit) in the ecx register.
;mov ecx,0
mov ecx,[dig]
;Use ecx to loop dig[0]-'0' times.
loop:
mov [dig],ecx
mov eax,4
mov ebx,1
mov ecx,dig
mov edx,1
int 0x80
mov eax,4
mov ebx,1
mov ecx,Hello
mov edx,Hellolen
int 0x80
;For some debuging (make the loop stop until return pressed)
;mov eax,3
;mov ebx,0
;mov ecx,some
;mov edx,2
;int 0x80
;Just move dig[0](some like character '4' or '7') to ecx register and compare ecx with character '0'.
mov ecx,[dig]
dec ecx
cmp ecx,'0'
;If comparison says ecx and '0' are equal jump to exit(to end the loop)
je exit
;If not jump back to loop
jmp loop
;Other stuff ...(like an exit procedure and a data(data,bss) segment)
exit:
mov eax,1
int 0x80
segment .data
msg1 db "Input a digit:"
len1 equ $-msg1
Hello db ":Hello world",0xa
Hellolen equ $-Hello
segment .bss
dig resb 2
some resb 2
THE OUTPUT:
Input a digit:4
4:Hello world
3:Hello world
2:Hello world
1:Hello world
0:Hello world
...
...(many loops later)
...
5:Hello world
4:Hello world
3:Hello world
2:Hello world
1:Hello world
$
That is my question:What is wrong with this code?
Could you explain that ?
AND i dont need alternative codes that will magically(without explanation) run cause i try to learn(im a newbie)
That is my problem(and my first question in Stackoverflow.com )
ECX is 32 bit, a character is just 8 bit. Use a 8 bit register, such as CL instead of ECX.
As jester mentioned, ecx comes in as a character so you probably should use cl
loop:
mov [dig],cl
...
mov cl,[dig]
dec cl
cmp cl,'0'
jne loop
You can also load ecx with movzx which clears the top bits of the register (i.e. a zero-extedning load):
...
movzx ecx, byte [dig]
loop:
mov [dig], cl ; store just the low byte, if you want to store
...
movzx ecx, byte [dig]
dec ecx
cmp ecx, '0'
jne loop
Note that it is often suggested that you do not use the al, bl, cl, dl registers as their use is not fully optimized. Whether this is still true, I do not know.

Assembly - Convert value to ascii

I found here some topics about converting value to ascii. But I am a bit stuck here.
Here's my code:
;GetPID
SECTION .data
msg: db "Your PID is:" ;pidmsg
msgl: equ $-msg ;pidlen
lookup: db "0123456789"
SECTION .bss
pid: resb 8 ;test is a variable im testing to
;receive the eax from sys_getpid
SECTION .text
global _start
_start:
call getpid ;get pid number into eax
mov ebx,0xa
lea ebp, [pid+6]
call ASCIIC
jmp exit ;exit
exit:
mov eax,1
mov ebx,0
int 0x80
getpid:
mov eax,20 ;getpid function (sys_getpid)
int 0x80
ret
;ASCIIC was taken from http://theropfather.github.io/asm/getpid_tutorial.html
ASCIIC:
div ebx ;Divide the PID
mov byte cl, [lookup+edx] ;Copy ASCII value to CL
mov [ebp], cl ;Copy ASCII value to buffer
dec ebp ;Next byte into buffer
xor edx, edx ;Clear the remainder
inc eax ;Dec eax tricking jnz
dec eax ;Push back to original value
jnz ASCIIC ;Keep looping until eax is zero
call .printPID ;Print out the buffer
ret
.printPID:
mov ecx, msg ;message
mov edx, msgl ;msg len
mov ebx,0x1 ;FD stdout
mov eax, 0x4 ;sys_write call
int 0x80 ;Call
mov [pid+7], byte 0xA ;Push a newline to PID string
mov edx,0x8 ;Max length of 8 bytes
mov ecx,pid ;Push PID value
mov ebx,0x1 ;FD stdout
mov eax, 0x4 ;sys_write call
ret
So the point here is get the pid number of the current program (this) and print it.
The program calls sys_getpid (eax=20) and the pidnumber is returned. But, sys_write parameters is: ssize_t sys_write(unsigned int fd, const char * buf, size_t count)
second argument is a char so I need to convert into char for each number.
output is only: "Your PID is:"
Anyone ? :)
You forgot to int 0x80 a second time to actually print the number.
When you div ebx, what is calculated is eax = edx:eax / ebx, so you have to make sure that edx is zero in your case. So xor edx,edx before you div ebx.
You should pad the front of your buffer with spaces, otherwise you might run into trouble depending on where stdout goes in the end.
I am very suspicious about your use of ebp to hold the pointer to the current buffer position. While it works here, the base pointer is a special purpose register and I see no reason to abuse it here.

NASM addition program

I am a developer who uses high level languages, learning assembly language in my spare time. Please see the NASM program below:
section .data
section .bss
section .text
global main
main:
mov eax,21
mov ebx,9
add eax,ebx
mov ecx,eax
mov eax,4
mov ebx,1
mov edx,4
int 0x80
push ebp
mov ebp,esp
mov esp,ebp
pop ebp
ret
Here are the commands I use:
ian#ubuntu:~/Desktop/NASM/Program4$ nasm -f elf -o asm.o SystemCalls.asm
ian#ubuntu:~/Desktop/NASM/Program4$ gcc -o program asm.o
ian#ubuntu:~/Desktop/NASM/Program4$ ./program
I don't get any errors, however nothing is printed to the terminal. I used the following link to ensure the registers contained the correct values: http://docs.cs.up.ac.za/programming/asm/derick_tut/syscalls.html
You'll have to convert the integer value to a string to be able to print it with sys_write (syscall 4). The conversion could be done like this (untested):
; Converts the integer value in EAX to a string in
; decimal representation.
; Returns a pointer to the resulting string in EAX.
int_to_string:
mov byte [buffer+9],0 ; add a string terminator at the end of the buffer
lea esi,[buffer+9]
mov ebx,10 ; divisor
int_to_string_loop:
xor edx,edx ; clear edx prior to dividing edx:eax by ebx
div ebx ; EAX /= 10
add dl,'0' ; take the remainder of the division and convert it from 0..9 -> '0'..'9'
dec esi ; store it in the buffer
mov [esi],dl
test eax,eax
jnz int_to_string_loop ; repeat until EAX==0
mov eax,esi
ret
buffer: resb 10
programming in assembly requires a knowledge of ASCII codes and a some basic conversion routines. example: hexadecimal to decimal, decimal to hexadecimal are good routines to keep somewhere on some storage.
No registers can be printed as they are, you have to convert (a lot).
To be a bit more helpfull:
ASCII 0 prints nothing but some text editors (kate in kde linux) will show something on screen (a square or ...). In higher level language like C and C++ is it used to indicate NULL pointers and end of strings.
Usefull to calculate string lengths too.
10 is end of line. depending Linux or Windows there will be a carriage return (Linux) too or not (Windows/Dos).
13 is carriage return
1B is the ESC key (Linux users will now more about this)
255 is a hard return, I never knew why it is good for but it must have its purpose.
check http://www.asciitable.com/ for the entire list.
Convert the integer value to a string.
Here i have used macros pack and unpack to convert integers to string and macro unpack to do the vice-versa
%macro write 2
mov eax, 4
mov ebx, 1
mov ecx, %1
mov edx, %2
int 80h
%endmacro
%macro read 2
mov eax,3
mov ebx,0
mov ecx,%1
mov edx,%2
int 80h
%endmacro
%macro pack 3 ; 1-> string ,2->length ,3->variable
mov esi, %1
mov ebx,0
%%l1:
cmp byte [esi], 10
je %%exit
imul ebx,10
movzx edx,byte [esi]
sub edx,'0'
add ebx,edx
inc esi
jmp %%l1
%%exit:
mov [%3],ebx
%endmacro
%macro unpack 3 ; 1-> string ,2->length ,3->variable
mov esi, %1
mov ebx,0
movzx eax, byte[%3]
mov byte[%2],0
cmp eax, 0
jne %%l1
mov byte[%2],1
push eax
jmp %%exit2
%%l1:
mov ecx,10
mov edx,0
div ecx
add edx,'0'
push edx
inc byte[%2]
cmp eax, 0
je %%exit2
jmp %%l1
%%exit2:
movzx ecx,byte[%2]
%%l2:
pop edx
mov [esi],dl
inc esi
loop %%l2
%endmacro
section .data ; data section
msg1: db "First number : " ;
len1: equ $-msg1 ;
msg2: db "Second number : " ;
len2: equ $-msg2 ;
msg3: db "Sum : " ;
len3: equ $-msg3 ;
ln: db 10
lnl: equ $-ln
var1: resb 10
var2: resb 10
str1: resb 10
str2: resb 10
ans: resb 10
ansvar: resb 10
ansl: db ''
l1: db ''
l2: db ''
section.text ;code
global _start
_start:
write msg1,len1
read str1,10
pack str1,l1,var1
write msg2,len2
read str2,10
pack str2,l2,var2
mov al,[var1]
add al,[var2]
mov [ansvar],al
unpack ans,ansl,ansvar
write msg3,len3
write ans,10
write ln,lnl
mov ebx,0 ; exit code, 0=normal
mov eax,1 ; exit command to kernel
int 0x80 ; interrupt 80 hex, call kernel
To assembler, link and run:
nasm -f elf add.asm
ld -s -o add add.o
./add

Resources