file read buffer is empty in nasm - linux

I managed to build a NASM tutorial code for dealing with files. It outputs the contents of a file to stdout just fine, but when I try to access the data buffer, it contains just zeros. For example in the code below in middle loop EBX is always set to 0, when it should contain file bytes.
section .data
bufsize dw 1024
section .bss
buf resb 1024
section .text ; declaring our .text segment
global _start ; telling where program execution should start
_start: ; this is where code starts getting exec'ed
; get the filename in ebx
pop ebx ; argc
pop ebx ; argv[0]
pop ebx ; the first real arg, a filename
; open the file
mov eax, 5 ; open(
mov ecx, 0 ; read-only mode
int 80h ; );
; read the file
mov eax, 3 ; read(
mov ebx, eax ; file_descriptor,
mov ecx, buf ; *buf,
mov edx, bufsize ; *bufsize
int 80h ; );
mov ecx, 20
loop:
mov eax, 20
sub eax, ecx
mov ebx, [buf+eax*4]
loop loop
; write to STDOUT
mov eax, 4 ; write(
mov ebx, 1 ; STDOUT,
mov ecx, buf ; *buf
int 80h ; );
; exit
mov eax, 1 ; exit(
mov ebx, 0 ; 0
int 80h ; );

For example in the code below in middle loop EBX is always set to 0, when it should contain file bytes.
How are you determining this? (Running under a debugger, perhaps?)
Your code has an unfortunate bug:
; read the file
mov eax, 3 ; read(
mov ebx, eax ; file_descriptor,
You're overwriting EAX (which contains the file descriptor returned by the open syscall, if the open succeeded) with the value 3, before it gets moved to EBX as the file descriptor argument to read.
Normally, a process will start out with file descriptors 0, 1 and 2 assigned to stdin, stdout and stderr, and the first file descriptor that you explicitly open will be 3, so you'll get away with it!
But you might not be so lucky if you're running with a debugger. File descriptor 3 might be something else, and the read might fail (you don't check to see if the returned value is a negative error code), or read something completely unexpected...

Related

Segmentation fault in my assembly program

I am getting a segmentation fault from this simple starting program.
I am using Ubuntu 16.10 and kdbg for debugging. Affter reaching starting __int 80h__, it stops moving to the next line.
section .bss ; section containing uninitialized data
BUFFLEN equ 1024 ; length of buffer
Buff: resb BUFFLEN ; text buffer itself
section .data ; section containing initialzed data
section .text ; secttion containing code
global _start ; linker needs to find the entry point!
_start:
nop ; this no-op keeps gdb happy
; read buffer full of text form stdin:
read:
mov eax, 3 ; specify sys_read call
mov ebx, 0 ; specify file descriptor 0 : standard input
mov ecx, Buff ; pass offset of the buffer to read to
mov edx, BUFFLEN ; pass number of bytes to be read at one pass
int 80h ; call sys_read to fill the buffer
mov esi,eax ; copy sys_read return value for safekeeping
cmp eax, 0 ; if eax = 0 , sys_read reached EOF on stdin
je Done ; jump if Equal ( to o, form compare)
; set up the register for the process buffer step:
mov ecx, esi ; place the number of bytes read into ecx
mov ebp, Buff ; pace address of buffer into ebp
dec ebp ; adjust the count to offset
; go through the buffer and cnvert lowercase to uppercase characters:
Scan:
cmp byte [ebp+ecx], 61h ; test input char agaisnst lowercase 'a'
jb Next ; if Below 'a' in ASCII, not lowercase
cmp byte [ebp+ecx], 7Ah ; test against lowercase 'z'
ja Next
sub byte [ebx+ecx], 20h ; subtract 20h to give uppercase..
Next:
dec ecx ; Decrement counter
jnz Scan ; if characters reamin, loop back
; Write the buffer full of processed text to stdout:
Write:
mov eax,4 ; Specify sys_write call
mov ebx, 1 ; Specify file descriptor 1 : stdout
mov ecx, Buff ; pass the offset of the buffer
mov edx, esi ; pass the # of bytes of data in the buffer
int 80h ; make sys_write kernel call
jmp read ; loop back and load another buffer full
Done:
mov eax, 1 ; Code for Exit sys_call
mov ebx, 0 ; return code of Zero
int 80h
I used these commands:
nasm -f elf -g -F stabs uppercaser1.asm
ld -m elf_i386 -o uppercaser1 uppercaser1.o
./uppercaser < inputflie
I think this code is generally public so I am posting with that in mind.
You should use the code as a guide to understanding what may be wrong in your code; however, it does not conform to any coding standard so it is pointless just to copy and paste and turn it in as an assignment; assuming that is the case.
You will never increase your assembly programming skills by merely doing a copy/paste.
lsb_release -a
...
Description: Ubuntu 16.04.3 LTS
nasm -f elf32 -g uppercase1.s -o uppercase1.o && ld -m elf_i386 uppercase1.o -o uppercase1
section .bss
Buff resb 1
section .data
section .text
global _start
_start:
nop
Read:
mov eax, 3 ; read syscall
mov ebx, 0 ; stdin
mov ecx, Buff ; pass address of the buffer to read to
mov edx, 1 ; read one char or one byte
int 0x80 ;
cmp eax, 0 ; if syscall returns returns 0
je Exit ;
cmp byte [Buff], 0x61 ; lower case a
jb Write ; jump if byte is below a in ASCII chart
cmp byte [Buff], 0x7a ; lower case z
ja Write ; jump if byte is above z in ASCII chart
sub byte [Buff], 0x20 ; changes the value in the buffer to an uppercase char
Write:
mov eax, 4 ; write syscall
mov ebx, 1 ; stdout
mov ecx, Buff ; what to print
mov edx, 1 ; length is one byte - each char is a byte
int 0x80
jmp Read ; go back to Read
Exit:
mov eax, 1
mov ebx, 0
int 0x80
Sample output:
david#ubuntuserver00A:~/asm$ ./uppercase1 < uppercase1.s
SECTION .BSS
BUFF RESB 1
SECTION .DATA
SECTION .TEXT
GLOBAL _START
_START:
NOP
READ:
MOV EAX, 3 ; READ SYSCALL
MOV EBX, 0 ; STDIN
MOV ECX, BUFF ; PASS ADDRESS OF THE BUFFER TO READ TO
MOV EDX, 1 ; READ ONE CHAR OR ONE BYTE
INT 0X80 ;
CMP EAX, 0 ; IF SYSCALL RETURNS RETURNS 0
JE EXIT ;
CMP BYTE [BUFF], 0X61 ; LOWER CASE A
JB WRITE ; JUMP IF BYTE IS BELOW A IN ASCII CHART
CMP BYTE [BUFF], 0X7A ; LOWER CASE Z
JA WRITE ; JUMP IF BYTE IS ABOVE Z IN ASCII CHART
SUB BYTE [BUFF], 0X20 ; CHANGES THE VALUE IN THE BUFFER TO AN UPPERCASE CHAR
WRITE:
MOV EAX, 4 ; WRITE SYSCALL
MOV EBX, 1 ; STDOUT
MOV ECX, BUFF ; WHAT TO PRINT
MOV EDX, 1 ; LENGTH IS ONE BYTE - EACH CHAR IS A BYTE
INT 0X80
JMP READ ; GO BACK TO READ
EXIT:
MOV EAX, 1
MOV EBX, 0
INT 0X80

Register and variables not saving state after jump

I'm trying to learn NASM. I want to write a procedure to get one character at a time from the console until a newline (0xA) is encountered using only kernel calls. So far I have
global _start
section .data
sys_read equ 3
sys_write equ 4
stdin equ 0
stdout equ 1
section .bss
line resb 11
index resb 4
section .text
_start:
push ebp
mov ebp, esp
call _readLine
afterReadLine:
call _printLine
mov esp, ebp
pop ebp
jmp exit
_readLine:
; Reads into line until new line (0xA)
; Number of bytes read will be stored in index when _readLine returns
mov eax, sys_read ; syscall to read
mov ebx, stdin ; stdin
mov edx, [index] ; put index into edx
mov ecx, dword line ; put line addr in ecx
add ecx, edx ; add index to addr in ecx
mov edx, 1 ; read one char
int 0x80 ; call kernel to read char
mov ecx, [index] ; put index into ecx
cmp dword [line + ecx], 0xA ; compare value at line + ecx to new line char
inc byte [index] ; increment index
je afterReadLine ; if last char is newline return
jne _readLine ; if last char is not new line, loop
_printLine:
mov eax, sys_write
mov ebx, stdout
mov ecx, line
mov edx, [index]
int 0x80
ret
exit:
mov eax, 01h ; exit()
xor ebx, ebx ; errno
int 80h
When I test against the value stored in the index variable at the end, it always equals 0. I tried moving the value of index into eax, but it's also zero after the jump. I tried using the ret keyword, but that also seemed to overwrite my values. What is the best practice way to return the value of characters read from this procedure?
EDIT:
I tried the following and I'm still not getting any output. With input "abcd[newline]" the program outputs "abcd" if I hardcode the value of 4 in edx in the _printLine procedure, but as written won't output anything.
global _start
section .data
sys_read equ 3
sys_write equ 4
stdin equ 0
stdout equ 1
bytesRead dd 0
termios: times 36 db 0
ICANON: equ 1<<1
ECHO: equ 1<<3
section .bss
line resb 11
index resb 4
section .text
_start:
push ebp
mov ebp, esp
call canonical_off
call echo_off
call _readLine
call _printLine
call canonical_on
call echo_on
mov esp, ebp
pop ebp
jmp exit
_readLine:
; Reads into line until new line (0xA)
; Number of bytes read will be stored in bytesRead when _readLine returns
mov eax, sys_read ; syscall to read
mov ebx, stdin ; stdin
mov edx, [index] ; put index into edx
mov ecx, dword line ; put line addr in ecx
add ecx, edx ; add index to addr in ecx
mov edx, 1 ; read one char
int 0x80 ; call kernel to read char
mov ecx, [index] ; put index into ecx
cmp dword [line + ecx], 0xA ; compare value at line + ecx to new line char
inc byte [index] ; increment index
jne _readLine ; if last char is not new line, loop
ret
_printLine:
mov eax, sys_write
mov ebx, stdout
mov ecx, line
mov edx, [index] ; Works if hardcoded 4 here!
int 0x80
ret
canonical_off:
call read_stdin_termios
; clear canonical bit in local mode flags
;push rax
mov eax, ICANON
not eax
and [termios+12], eax
;pop rax
call write_stdin_termios
ret
echo_off:
call read_stdin_termios
; clear echo bit in local mode flags
;push rax
mov eax, ECHO
not eax
and [termios+12], eax
;pop rax
call write_stdin_termios
ret
canonical_on:
call read_stdin_termios
; set canonical bit in local mode flags
or dword [termios+12], ICANON
call write_stdin_termios
ret
echo_on:
call read_stdin_termios
; set echo bit in local mode flags
or dword [termios+12], ECHO
call write_stdin_termios
ret
read_stdin_termios:
; push rax
; push rbx
; push rcx
;push rdx
mov eax, 36h
mov ebx, stdin
mov ecx, 5401h
mov edx, termios
int 80h
;pop rdx
; pop rcx
;pop rbx
;pop rax
ret
write_stdin_termios:
; push rax
;push rbx
;push rcx
; push rdx
mov eax, 36h
mov ebx, stdin
mov ecx, 5402h
mov edx, termios
int 80h
;pop rdx
;pop rcx
; pop rbx
;pop rax
ret
exit:
mov eax, 01h ; exit()
xor ebx, ebx ; errno
int 80h
link here http://ideone.com/Lw3fyV
First off, you are calling _readLine but not returning from it, instead you are jumping to the label after call _readLine conveniently called afterReadLine. You should use ret to exit from your function.
Now to your question. By default, the terminal is in canonical mode (Cooked mode), meaning all input is buffered. The system will fill your buffer until the return key is pressed and adds 0xA to the end of the buffer.
What you want is non-canonical mode (raw mode). This means the system does not process the key presses but passes it on to you.
How do i read single character input from keyboard using nasm (assembly) under ubuntu?
By switching to raw mode, you can get each character pressed until the return key and do what you will with it.

How to save a file in assembly language with nasm in Linux x86?

I'm just wondering how I can throw a buffer to a file. I know I can set up the registers like:
mov eax, 4
mov ebx, (file descriptor here)
mov ecx, myBuffer
mov edx, myBufferLen
int 80h
and then close the file with:
mov eax, 6
int 80h
but I'm not sure how to get the file descriptor. Someone told me that whenever you open a file, after calling the service dispatcher, the eax has the file descriptor. Whatever I try, it doesn't create a new file or save the current one.
mov eax, 5 ; __NR_open
mov ebx, filename ; zero terminated!
mov ecx, 2 ; O_WRITE? Check me on this!
mov edx, 777q ; permissions - Check me on this, too!
int 80h
; did we succeed?
cmp eax, -4096
ja exit
mov [descriptor], eax
; and so on...
;...
xor eax, eax ; claim no error
exit:
mov ebx, eax
neg ebx
mov eax, 1 ; __NR_exit
int 80h
I'm uncertain on the "magic numbers" for open flags and permissions, and too lazy to look them up right now (fs.h?). You may need to "or" the open flags with O_CREATE to create a new file(?). The trick at "exit:" negates the (negative) ERRNO (if any) so you can read it easily with "echo $?". Something like that...
Thanks to #gnometorule for the solution. So this is how you store a buffer into file:
in this case you are receiving the file name as a first parameter when you run the program.
section .text
global _start
_start:
pop ebx ; this is the amount of parameter received (in this case 2)
pop ebx ; this is the direction of our program
pop ebx ; this is the first parameter (our file name eg. "text.txt")
so we open it and move it to a buffer to edit or whatever we want with it.
textEdit:
;we open the file
mov eax, 5
push ebx ; we save the file name in the stack for later use
mov ecx,0
int 80h
; and then we move the information read to buffer
mov eax, 3
mov ebx, eax ; really dont know how this works, but it does.
mov ecx, fileBuffer
mov edx, fileBufferSize
int 80h
now fileBuffer (wich is a buffer defined in section .bss) has what we had in the file we opened. Here you can edit the information. When we want to save it back to the file it goes like this:
saveFile:
;open the file
mov eax, 5
pop ebx ; the file name was stored in the stack,
push ebx ; so we retrieve it and save it back for later use
mov ecx, 2 ; ecx has the access mode ("2"aloud us to read/write)
mov edx, $0666
int 80h
;now we are going to write to the file
mov ebx, eax ; eax has the file descriptor we opened. so we move it to ebx.
mov eax, 4 ; we are going to use the write service (#4)
mov ecx, fileBuffer ; ecx has the adress of what we want to write
mov edx, fileBufferSize ; edx has the amount of bytes we want to write
int 80h
; close the file
mov eax, 6
int 80h
jmp exit ; jump wherever you want, or end your program... etc.
This worked for me.

Linux NASM detect EOF

I'm trying to learn the basics asm on linux and I can't find a very good reference. The NASM docs seem to assume you already know masm... I found no examples in the documentation of the cmp (outside the Intel instruction reference).
I'd written a program that reads a single byte from stdin and writes it to stdout. Below is my modification to try to detect EOF on stdin and exit when EOF is reached. The issue is it never exits. I just keeps printing the last char read from stdin. The issue is either in my EOF detection (cmp ecx, EOF) and/or my jump to the _exit label (je _exit) I think.
What am I doing wrong?
%define EOF -1
section .bss
char: resb 1
section .text
global _start
_exit:
mov eax, 1 ; exit
mov ebx, 0 ; exit status
int 80h
_start:
mov eax, 3 ; sys_read
mov ebx, 0 ; stdin
mov ecx, char ; buffer
cmp ecx, EOF ; EOF?
je _exit
mov edx, 1 ; read byte count
int 80h
mov eax, 4 ; sys_write
mov ebx, 1 ; stdout
mov ecx, char ; buffer
mov edx, 1 ; write byte count
int 80h
jmp _start
For the sake of sanity, I verified EOF is -1 with this C:
#include <stdio.h>
int main() { printf("%d\n", EOF); }
You are comparing the address of the buffer to EOF (-1) instead of the character stored in the buffer.
Having said that, the read system call does not return the value of EOF when end of file is reached, but it returns zero and doesn't stick anything in the buffer (see man 2 read). To identify end of file, just check the value of eax after the call to read:
section .bss
buf: resb 1
section .text
global _start
_exit:
mov eax, 1 ; exit
mov ebx, 0 ; exit status
int 80h
_start:
mov eax, 3 ; sys_read
mov ebx, 0 ; stdin
mov ecx, buf ; buffer
mov edx, 1 ; read byte count
int 80h
cmp eax, 0
je _exit
mov eax, 4 ; sys_write
mov ebx, 1 ; stdout
mov ecx, buf ; buffer
mov edx, 1 ; write byte count
int 80h
jmp _start
If you did want to properly compare the character to some value, use:
cmp byte [buf], VALUE
Also, I renamed char to buf. char is a basic C data type and a bad choice for a variable name.

NASM Linux Assembly Printing Integers

I am trying to print a single digit integer in nasm assembly on linux. What I currently have compiles fine, but nothing is being written to the screen. Can anyone explain to me what I am doing wrong here?
section .text
global _start
_start:
mov ecx, 1 ; stores 1 in rcx
add edx, ecx ; stores ecx in edx
add edx, 30h ; gets the ascii value in edx
mov ecx, edx ; ascii value is now in ecx
jmp write ; jumps to write
write:
mov eax, ecx ; moves ecx to eax for writing
mov eax, 4 ; sys call for write
mov ebx, 1 ; stdout
int 80h ; call kernel
mov eax,1 ; system exit
mov ebx,0 ; exit 0
int 80h ; call the kernel again
This is adding, not storing:
add edx, ecx ; stores ecx in edx
This copies ecx to eax and then overwrites it with 4:
mov eax, ecx ; moves ecx to eax for writing
mov eax, 4 ; sys call for write
EDIT:
For a 'write' system call:
eax = 4
ebx = file descriptor (1 = screen)
ecx = address of string
edx = length of string
After reviewing the other two answers this is what I finally came up with.
sys_exit equ 1
sys_write equ 4
stdout equ 1
section .bss
outputBuffer resb 4
section .text
global _start
_start:
mov ecx, 1 ; Number 1
add ecx, 0x30 ; Add 30 hex for ascii
mov [outputBuffer], ecx ; Save number in buffer
mov ecx, outputBuffer ; Store address of outputBuffer in ecx
mov eax, sys_write ; sys_write
mov ebx, stdout ; to STDOUT
mov edx, 1 ; length = one byte
int 0x80 ; Call the kernel
mov eax, sys_exit ; system exit
mov ebx, 0 ; exit 0
int 0x80 ; call the kernel again
From man 2 write
ssize_t write(int fd, const void *buf, size_t count);
In addition to the other errors that have been pointed out, write() takes a pointer to the data and a length, not an actual byte itself in a register as you are trying to provide.
So you will have to store your data from a register to memory and use that address (or if it's constant as it currently is, don't load the data into a register but load its address instead).

Resources