simple upper-casifier loop infinitely
what's wrong with my code?
any advice?
my programming environment is linux, emacs, assembly, at&t syntax
.section .data
.section .bss
.lcomm buffer,1
.section .text
.global _start
_start:
movl %esp,%ebp
subl $8,%esp
#8(%ebp) is 2nd arg == input
#12(%ebp) is 3rd arg == output
#open,read,open,write,close
movl $5,%eax
movl 8(%ebp),%ebx
movl $0,%ecx
movl $0666,%edx
int $0x80
#%eax contains input's fd
#movl to first local var
movl %eax,-4(%ebp)
movl $5,%eax
movl 12(%ebp),%ebx
movl $03101,%ecx
movl $0666,%edx
int $0x80
#eax contains output's fd
#movl to second local var
movl %eax,-8(%ebp)
loop:
#read 1 byte from file 1st byte of data
movl $3,%eax
movl -4(%ebp),%ebx
movl $buffer,%ecx
movl $1,%edx
int $0x80
#buffer contains 1 byte of file
cmpb $0,buffer
je program_exit
pushl buffer
call convert #will return converted to %al
addl $4,%esp
movb %al,buffer
#write 1 byte from buffer to file
movl $1,%edx
movl $buffer,%ecx
movl -8(%ebp),%ebx
movl $4,%eax
int $0x80
jmp loop
program_exit:
movl buffer,%ebx
movl $1,%eax
int $0x80
.type convert,#function
convert:
pushl %ebp
movl %esp,%ebp
movb 8(%ebp),%al #1 byte data in the buffer
cmpb $'a',%al
jl convert_end
cmpb $'z',%al
jg convert_end
addb $32,%al #convert to upper
convert_end:
movl %ebp,%esp
popl %ebp
ret
Note that read(2) signals an end-of-file condition by returning 0. You're trying to find the end of the file by looking for an ascii NUL, which is very rare on Unix-derived systems. (If you want an easy way to make a terabyte-sized file, dd if=/dev/zero of=/tmp/huge bs=1048576 seek=1048576 count=1. The whole thing will be filled with ascii NUL characters. (Or the integer 0, however you want to interpret it.)
You need to modify your code to find the end of file by comparing the return value of the read(2) system call, not by looking at the data you receive into your buffer.
Related
This code reads data from an inputfile character by character and it writes to another file.
It should stop to read and write once it finds a character with value equal to 3.
I said should because the program doesn't stop once it finds a value equal to 3, instead it continues to read until the end of file.
Inputfile is like: 2 4 5 3 1 8
My code is:
.section .data
varInputHandle: .long 100
varOutputHandle: .long 100
varExitCode: .long 1
cont: .long 1
.section .bss
.lcomm varBuffer, 1
.section .text # declaring our .text segment
.globl _start # telling where program execution should start
_start:
popl %eax # Get the number of arguments
popl %ebx # Get the program name
# open input file first
popl %ebx # Get the first actual argument - file to read
movl $5, %eax # open
movl $0, %ecx # read-only mode
int $0x80
movl %eax, varInputHandle #store input file handle to memory
#open output file, make it writable, create if not exists
popl %ebx # Get the second actual argument - file to write
movl $5, %eax # open
movl $0101, %ecx # create flag + write only access (if google is telling me truth)
movl $0666, %edx #permissions for out file as rw-rw-rw-
int $0x80
movl %eax, varOutputHandle #store output file handle to memory
contToZero:
movl $0, cont
processingLoop:
incb cont
#read single char to varBuffer
movl $3, %eax
movl varInputHandle, %ebx
movl $varBuffer, %ecx
movl $1, %edx
int $0x80
#if no char was read (EOF?), jmp finishProcessing
cmpl $0, %eax
jz finishProcessing # looks like total success, finish cleanly
cmpl $3, varBuffer // this instruction is never true, don't know why
je exitToOs
#write it
movl $4, %eax
movl varOutputHandle, %ebx # file_descriptor
movl $varBuffer, %ecx
movl $1, %edx
int $0x80
# done, go for the next char
goForTheNextChar:
jmp processingLoop
finishProcessing:
movl $0, varExitCode #everything went OK, set exit code to 0
exitToOs:
movl varOutputHandle, %ebx # file_descriptor
movl varInputHandle, %ebx
movl $1, %eax
movl varExitCode, %ebx
int $0x80
closeFile:
cmpl $-1, %ebx
movl $6, %eax #sys_close
int $0x80
cmpl $3, varBuffer seems to never be true therefore I can't jump to exitToOs.
In your code you have:
cmpl $3, varBuffer
This can never be true because you do not have binary data in your file.
When you read character by character, you are reading ASCII values. In order to properly make this comparison you must do one of two things:
Convert the character read from ASCII to decimal
Compare the read value to an ASCII value
Since you are relying on $0 to identify when you have read zero bytes, I would suggest that you take the (easier) approach of checking for the ASCII value that you would like to find. In your case this would be:
cmpb $'3', varBuffer # Compare character to 0x33 / 51 / "3"
Hi Guys I got some annoying problem ,so I try to write a code just to reverse small string sequential
I Already got this :
.section .data
string:
.ascii "AAAAAABBBBBB"
length:
.quad . -string #Dot = 'here'
.section .text
.globl _start #Make entry point visible to linker
_start:
movl $4, %eax #4=write
movl $1, %ebx #1=stdout
movl $string, %ecx
movl length, %edx
int $0x80 #Call Operating System
movl length,%edi #counter
shrl $1,%edi #half of string
movl $0,%ecx #start from index one
movl length,%edx #start from end
reverse:
movl string(,%ecx,1),%eax
movl string(,%edx,1),%ebx
movl %eax,string(,%edx,1)
movl %ebx,string(,%ecx,1)
inc %ecx
dec %edx
dec %edi
loop reverse #looping
movl $4, %eax #4=write
movl $1, %ebx #1=stdout
movl $string, %ecx
movl length, %edx
int $0x80 #Call Operating System
movl $0, %ebx #Make program return syscall exit status
movl $1, %eax #1=exit
int $0x80 #Call System Again
And it's not working correctly , cuz in gbd i get wrong values in registers after making
movl string(,%ecx,1),%eax
or the next steps I think there should be in %eax value for A letter but its doesn't any ideas ?
Working at 64arch but emulating in as --32 so its problem with my addressing i guess
You should be processing bytes not longs, so use movb with 8 bit registers (al and bl, for example). Also, the LOOP instruction uses ECX automatically, you probably meant JNZ there to repeat until EDI reaches zero.
I wrote this to print argv[0] in x86:
.section .data
newline: .int 0xa, 0
.section .text
.globl _start
_start:
sub %al, %al
movl 4(%esp), %edi /* Pointer to argv[0]. */
sub %ecx, %ecx /* Set %ecx to 0.*/
not %ecx /* Set %ecx to -1.*/
repne scasb /* Search for %al over and over.*/
not %ecx /* Set %ecx to |%ecx| - 1.*/
dec %ecx
movl %ecx, %edx /* Move the strlen of argv[0] into %edx.*/
movl $4, %eax
movl $1, %ebx
movl 4(%esp), %ecx
int $0x80
movl $newline, %ecx
movl $1, %edx
int $0x80
movl $1, %eax
movl $0, %ebx
int $0x80
When I run this file ("print"), the output is this:
[08:27 assembly]$ ./print test
./print[08:30 assembly]$
When I ran this through gdb, the actual string length held in edx is 27, and the string it's checking is "/home/robert/assembly/print", not "./print". So I changed the %esp offsets to 8, to check argv[1]. With the same command as before, the output is this:
test
[08:33 assembly]$
Why does checking argv[0] cause the strange output, when argv[1] does as expected?
I think gdb is "helping" you by adding the full path to argv[0]. After printing, %eax holds the number of characters printed, so you'll want to reload %eax for sys_write again to print the $newline (%ebx should still be okay) - by luck, "test" is the right length. Lord knows what system call you're getting with that longer string!
I'd say you're doing good! (might be a good idea to check argc to make sure argv[1] is there before you try to print it).
I'm trying to print a range of ascii characters with this assembly program.
I'm trying to do it using only the registers, but haven't been having much luck. Everything looks fine to me, but I'm a novice at assembly programming and might have missed something obvious. Any insight will be appreciated. Thanks :)
emphasized text
.text
.global _start
_start:
movl $1, %edx
movl $65, %ebx
start_loop:
addl $1, %ebx
movl $0x04, %eax
int $0x80
cmpl $126, %ebx
jle start_loop
jmp start_loop
exit
movl $0, %ebx
movl $1, %eax
int $0x80
You are invoking the sys_write system call. sys_write() takes three arguments, file descriptor of the output device(it should be 1 for stdout),address of the buffer where you stored the value to be printed, and the size of the data to be printed. So you have to store file descriptor in %ebx, and store address of the buffer in %ecx and size of the data in %edx. To store the file descriptor you can use the following instruction.
movl $1, %ebx // store 1 (stdout) in ebx)
To store the size of the data you can use:
movl $1, %edx // size is 1 byte
Now, you have to store the address of the buffer, you need to put your data in the memory some where and need to store the address of the memory in %ecx. Assume that you want store the data in the stack it self, then you can do like this:
subl $4, %esp // get 4 bytes of memory in the stack
movl $65, (%esp) // store data in the memory where esp points to
movl %esp, %ecx // store address of the data in the ecx
Now you can issue the int 0x80.
movl $04, %eax // store syscall number in eax
int $0x80 // issue the trap interrupt
As a whole you can write the following code:
movl $1, %ebx
subl $0x4, %esp
movl $64, (%esp)
start_loop:
movl (%esp), %eax
addl $1, %eax
movl %eax, (%esp)
movl %esp, %ecx
movl $1, %edx
movl $0x04, %eax
int $0x80
movl (%esp), %eax
cmpl $126, %eax
jle start_loop
addl $0x4, %esp
See Linux System Calls Part2 at http://www.rulingminds.com/syscallspart2 to know more about registers and system calls usage.
"Thank you very much for the informative answer, but is there a way to store and retrieve the value to be printed in a register without pointing to it?" -- this should probably have been edited into the question.
If you insist on using only syscalls (int $0x80) to interface with the system then the answer is no. You have to somehow pass a buffer to write and rullingminds answer applies.
Using the libc putchar(3) it's straight forward. I use %ebx to keep the ascii code as this register is on linux preserved between function calls. Simply assemble using gcc filename.S (remembering to use -m32 if you are on x86_64).
.text
.extern putchar
.global main
main:
# make room for argument to putchar on the stack
sub $4, %esp
# initialize ebx with first value to print
mov $'A', %ebx
1:
# give character to print as argument
mov %ebx, (%esp)
call putchar
# move to next character
inc %ebx
# are we done?
cmp $'~', %ebx
jle 1b
# print newline
movl $10, (%esp)
call putchar
# adjust stack back to normal
add $4, %esp
# return 0 from main
mov $0, %eax
ret
I found a pdf file called: ProgrammingGroundUp-1-0-booksize.pdf, and one of the projects is to make an assembly program that takes in files and converts them to uppercase, `
.section .data
#######CONSTANTS########
#system call numbers
.equ SYS_OPEN, 5
.equ SYS_WRITE, 4
.equ SYS_READ, 3
.equ SYS_CLOSE, 6
.equ SYS_EXIT, 1
#options for open (look at
#/usr/include/asm/fcntl.h for
#various values. You can combine them
#by adding them or ORing them)
#This is discussed at greater length
#in "Counting Like a Computer"
.equ O_RDONLY, 0
.equ O_CREAT_WRONLY_TRUNC, 03101
#standard file descriptors
.equ STDIN, 0
.equ STDOUT, 1
.equ STDERR, 2
#system call interrupt
.equ LINUX_SYSCALL, 0x80
.equ END_OF_FILE, 0
#This is the return value
#of read which means we’ve
#hit the end of the file
.equ NUMBER_ARGUMENTS, 2
.section .bss
.equ BUFFER_SIZE, 500
.lcomm BUFFER_DATA, BUFFER_SIZE
.section .text
#STACK POSITIONS
.equ ST_SIZE_RESERVE, 8
.equ ST_FD_IN, -4
.equ ST_FD_OUT, -8
.equ ST_ARGC, 0
#Number of arguments
.equ ST_ARGV_0, 4
#Name of program
.equ ST_ARGV_1, 8
#Input file name
.equ ST_ARGV_2, 12
#Output file name
.globl _start
_start:
###INITIALIZE PROGRAM###
#save the stack pointer
movl %esp, %ebp
#Allocate space for our file descriptors
#on the stack
subl $ST_SIZE_RESERVE, %esp
open_files:
open_fd_in:
###OPEN INPUT FILE###
#open syscall
movl $SYS_OPEN, %eax
#input filename into %ebx
movl ST_ARGV_1(%ebp), %ebx
#read-only flag
movl $O_RDONLY, %ecx
#this doesn’t really matter for reading
movl $0666, %edx
#call Linux
int $LINUX_SYSCALL
store_fd_in:
#save the given file descriptor
movl %eax, ST_FD_IN(%ebp)
open_fd_out:
###OPEN OUTPUT FILE###
#open the file
movl $SYS_OPEN, %eax
#output filename into %ebx
movl ST_ARGV_2(%ebp), %ebx
#flags for writing to the file
movl $O_CREAT_WRONLY_TRUNC, %ecx
#mode for new file (if it’s created)
movl $0666, %edx
#call Linux
int
$LINUX_SYSCALL
store_fd_out:
#store the file descriptor here
movl %eax, ST_FD_OUT(%ebp)
###BEGIN MAIN LOOP###
read_loop_begin:
###READ IN A BLOCK FROM THE INPUT FILE###
movl $SYS_READ, %eax
#get the input file descriptor
movl ST_FD_IN(%ebp), %ebx
#the location to read into
movl $BUFFER_DATA, %ecx
#the size of the buffer
movl $BUFFER_SIZE, %edx
#Size of buffer read is returned in %eax
int
$LINUX_SYSCALL
###EXIT IF WE’VE REACHED THE END###
#check for end of file marker
cmpl $END_OF_FILE, %eax
#if found or on error, go to the end
jle end_loop
continue_read_loop:
###CONVERT THE BLOCK TO UPPER CASE###
pushl $BUFFER_DATA
#location of buffer
pushl %eax
#size of the buffer
call convert_to_upper
popl %eax
#get the size back
addl $4, %esp
#restore %esp
###WRITE THE BLOCK OUT TO THE OUTPUT FILE###
#size of the buffer
movl %eax, %edx
movl $SYS_WRITE, %eax
#file to use
movl ST_FD_OUT(%ebp), %ebx
#location of the buffer
movl $BUFFER_DATA, %ecx
int $LINUX_SYSCALL
###CONTINUE THE LOOP###
jmp read_loop_begin
end_loop:
###CLOSE THE FILES###
#NOTE - we don’t need to do error checking
movl $SYS_CLOSE, %eax
movl ST_FD_OUT(%ebp), %ebx
int $LINUX_SYSCALL
movl ST_FD_IN(%ebp), %ebx
movl $SYS_CLOSE, %eax
int $LINUX_SYSCALL
###EXIT###
movl $SYS_EXIT, %eax
movl $0, %ebx
int $LINUX_SYSCALL
###CONSTANTS##
#The lower boundary of our search
.equ LOWERCASE_A, ’a’
#The upper boundary of our search
.equ LOWERCASE_Z, ’z’
#Conversion between upper and lower case
.equ UPPER_CONVERSION, ’A’ - ’a’
###STACK STUFF###
.equ ST_BUFFER_LEN, 8 #Length of buffer
.equ ST_BUFFER, 12
#actual buffer
convert_to_upper:
pushl %ebp
movl %esp, %ebp
###SET UP VARIABLES###
movl ST_BUFFER(%ebp), %eax
movl ST_BUFFER_LEN(%ebp), %ebx
movl $0, %edi
#if a buffer with zero length was given
#to us, just leave
cmpl $0, %ebx
je end_convert_loop
convert_loop:
#get the current byte
movb (%eax,%edi,1), %cl
#go to the next byte unless it is between
#’a’ and ’z’
cmpb $LOWERCASE_A, %cl
jl next_byte
cmpb $LOWERCASE_Z, %cl
jg next_byte
#otherwise convert the byte to uppercase
addb $UPPER_CONVERSION, %cl
#and store it back
movb %cl, (%eax,%edi,1)
next_byte:
incl %edi
#next byte
cmpl %edi, %ebx
#continue unless
#we’ve reached the
#end
jne convert_loop
end_convert_loop:
#no return value, just leave
movl %ebp, %esp
popl %ebp
ret
`
the assembler is called "as", and the code is on page 87 of the file, you can easily get if you look it up on google, also the code doesn't copy and paste correctly for some reason, so you might sometimes see things like:
jmp
to_another_point
I tried to get rid of as many of those as I could, but there still might be some, and some of the comments were probably lost in the process, so I would suggest opening the original pdf file.
I am running Ubuntu linux version 10.04.1, I am using as version 2.20.1, and the problem is that it creates the file, but the file is always empty.
The source for this book is available, and if you browse the appropriate part of the CVS repository you can find .s source files for the various examples.
This code is toupper.s - you can just download the most recent revision instead of messing around with copy-and-paste.
I've just tried that (on a Debian system; I don't have anything running Ubuntu 10.04 to hand), assembling and linking exactly as described in the book, and it worked fine.
I think you can subtract 32 from the value of the letters to get it in Upper case, since they have a difference in value of 32. Like this:
make_cap: # function fore making capital letters
sub $32, al # subtract 32 from the value of the character
jmp other_function
pop
ret