Segmentation fault in my assembly program

Segmentation fault in my assembly program - linux

I am getting a segmentation fault from this simple starting program.
I am using Ubuntu 16.10 and kdbg for debugging. Affter reaching starting __int 80h__, it stops moving to the next line.
section .bss ; section containing uninitialized data
BUFFLEN equ 1024 ; length of buffer
Buff: resb BUFFLEN ; text buffer itself
section .data ; section containing initialzed data
section .text ; secttion containing code
global _start ; linker needs to find the entry point!
_start:
nop ; this no-op keeps gdb happy
; read buffer full of text form stdin:
read:
mov eax, 3 ; specify sys_read call
mov ebx, 0 ; specify file descriptor 0 : standard input
mov ecx, Buff ; pass offset of the buffer to read to
mov edx, BUFFLEN ; pass number of bytes to be read at one pass
int 80h ; call sys_read to fill the buffer
mov esi,eax ; copy sys_read return value for safekeeping
cmp eax, 0 ; if eax = 0 , sys_read reached EOF on stdin
je Done ; jump if Equal ( to o, form compare)
; set up the register for the process buffer step:
mov ecx, esi ; place the number of bytes read into ecx
mov ebp, Buff ; pace address of buffer into ebp
dec ebp ; adjust the count to offset
; go through the buffer and cnvert lowercase to uppercase characters:
Scan:
cmp byte [ebp+ecx], 61h ; test input char agaisnst lowercase 'a'
jb Next ; if Below 'a' in ASCII, not lowercase
cmp byte [ebp+ecx], 7Ah ; test against lowercase 'z'
ja Next
sub byte [ebx+ecx], 20h ; subtract 20h to give uppercase..
Next:
dec ecx ; Decrement counter
jnz Scan ; if characters reamin, loop back
; Write the buffer full of processed text to stdout:
Write:
mov eax,4 ; Specify sys_write call
mov ebx, 1 ; Specify file descriptor 1 : stdout
mov ecx, Buff ; pass the offset of the buffer
mov edx, esi ; pass the # of bytes of data in the buffer
int 80h ; make sys_write kernel call
jmp read ; loop back and load another buffer full
Done:
mov eax, 1 ; Code for Exit sys_call
mov ebx, 0 ; return code of Zero
int 80h
I used these commands:
nasm -f elf -g -F stabs uppercaser1.asm
ld -m elf_i386 -o uppercaser1 uppercaser1.o
./uppercaser < inputflie

I think this code is generally public so I am posting with that in mind.
You should use the code as a guide to understanding what may be wrong in your code; however, it does not conform to any coding standard so it is pointless just to copy and paste and turn it in as an assignment; assuming that is the case.
You will never increase your assembly programming skills by merely doing a copy/paste.
lsb_release -a
...
Description: Ubuntu 16.04.3 LTS
nasm -f elf32 -g uppercase1.s -o uppercase1.o && ld -m elf_i386 uppercase1.o -o uppercase1
section .bss
Buff resb 1
section .data
section .text
global _start
_start:
nop
Read:
mov eax, 3 ; read syscall
mov ebx, 0 ; stdin
mov ecx, Buff ; pass address of the buffer to read to
mov edx, 1 ; read one char or one byte
int 0x80 ;
cmp eax, 0 ; if syscall returns returns 0
je Exit ;
cmp byte [Buff], 0x61 ; lower case a
jb Write ; jump if byte is below a in ASCII chart
cmp byte [Buff], 0x7a ; lower case z
ja Write ; jump if byte is above z in ASCII chart
sub byte [Buff], 0x20 ; changes the value in the buffer to an uppercase char
Write:
mov eax, 4 ; write syscall
mov ebx, 1 ; stdout
mov ecx, Buff ; what to print
mov edx, 1 ; length is one byte - each char is a byte
int 0x80
jmp Read ; go back to Read
Exit:
mov eax, 1
mov ebx, 0
int 0x80
Sample output:
david#ubuntuserver00A:~/asm$ ./uppercase1 < uppercase1.s
SECTION .BSS
BUFF RESB 1
SECTION .DATA
SECTION .TEXT
GLOBAL _START
_START:
NOP
READ:
MOV EAX, 3 ; READ SYSCALL
MOV EBX, 0 ; STDIN
MOV ECX, BUFF ; PASS ADDRESS OF THE BUFFER TO READ TO
MOV EDX, 1 ; READ ONE CHAR OR ONE BYTE
INT 0X80 ;
CMP EAX, 0 ; IF SYSCALL RETURNS RETURNS 0
JE EXIT ;
CMP BYTE [BUFF], 0X61 ; LOWER CASE A
JB WRITE ; JUMP IF BYTE IS BELOW A IN ASCII CHART
CMP BYTE [BUFF], 0X7A ; LOWER CASE Z
JA WRITE ; JUMP IF BYTE IS ABOVE Z IN ASCII CHART
SUB BYTE [BUFF], 0X20 ; CHANGES THE VALUE IN THE BUFFER TO AN UPPERCASE CHAR
WRITE:
MOV EAX, 4 ; WRITE SYSCALL
MOV EBX, 1 ; STDOUT
MOV ECX, BUFF ; WHAT TO PRINT
MOV EDX, 1 ; LENGTH IS ONE BYTE - EACH CHAR IS A BYTE
INT 0X80
JMP READ ; GO BACK TO READ
EXIT:
MOV EAX, 1
MOV EBX, 0
INT 0X80

Related

File permissions in Linux assembly

I'm trying to get information about file permissions. I am using the sys_access system call. Here is my code snippet:
mov eax, 33
mov ebx, fileName
mov ecx, 1
int 80h
cmp eax, 0
jl .error
If eax is -1 there is an error, and I am not getting one, but I need to check all the permissions of the file (owner, group, others). How do I do that?

You can use the kernel function sys_newstat (No. 106 - look at this table) to get the file permissions. The structure stat is a never ending horror, but the following example works at least on my Debian Wheezy 64 bit (NASM, 32-bit and 64-bit modes):
SECTION .data
filename db '/root' ; Just an example, can be replaced with any name
filename_len equ $ - filename ; Length of filename
db 0 ; Terminator for `Int 80h / EAX = 106`
perm_out db 'Permissions: '
perm db 'drwxrwxrwx'
perm_len equ $ - perm ; Index of last character in `perm`
lf db 10
perm_out_len equ $ - perm_out ; Length of `Permissions: ...\n`
SECTION .bss
stat resb 256 ; Way too much, but size is variable depending on OS
SECTION .text
global _start
_start:
mov eax,4 ; sys-out
mov edx,filename_len ; length of string to print
mov ecx,filename ; Pointer to string
mov ebx,1 ; StdOut
int 0x80 ; Call kernel
mov eax,4 ; sys-out
mov edx,1 ; Length of string to print
mov ecx, lf ; Pointer to string
mov ebx,1 ; StdOut
int 0x80 ; Call kernel
mov eax, 106 ; sys_newstat
mov ebx, filename ; Pointer to ASCIIZ file-name
mov ecx, stat ; Pointer to structure stat
int 80h
test eax, eax
jz .noerr
mov eax,1 ; sys_exit
mov ebx,1 ; Exit code, 1=not normal
int 0x80 ; Call kernel
.noerr:
movzx eax, word [stat + 8] ; st_mode (/usr/include/asm/stat.h)
mov ebx, perm_len
; rwx bits
mov ecx, 9
.L1:
sub ebx, 1
shr eax, 1
jc .J1
mov byte [perm + ebx], '-'
.J1:
loop .L1
; directory bit
sub ebx, 1
shr eax, 6
jc .J2
mov byte [perm + ebx], '-'
.J2:
mov eax,4 ; sys-out
mov edx,perm_out_len ; Length of string to print
mov ecx,perm_out ; Pointer to string
mov ebx,1 ; StdOut
int 0x80 ; Call kernel
mov eax,1 ; sys_exit
mov ebx,0 ; Exit code, 0=normal
int 0x80 ; Call kernel

Binary representation in processor's registers in Nasm

I would like to ask about process of put instructions into registers. For example: we want to overwrite count '50' into EBX (in ASCII '50' is count '2').
EBX consists of 32 bits. When we put '50' into it, it will be arranged as binary represent, yes? (0000000 | 00000000 | 00000000 | 00110010). Have a right? What happens with bits, when we place a string into register?

EAX holds 32 bits which Intel calls "integer". The programmer - and sometimes the assembler - decides how to interpret these bits. If you load EAX with the number 50 (not the string '50')
mov eax, 50
the assembler decides to generate a machine instruction that loads the 50 in a manner, that you can read it as number 50 in a binary system:
00000000000000000000000000110010
Try out, what the assembler does if you feed it with a string:
GLOBAL _start
SECTION .bss
outstr resb 40
SECTION .data
_start:
mov eax, 'Four' ; Load EAX with a string
call int2bin ; Convert it to a binary string in outstr
mov byte [edi], 10 ; Add a line feed
inc edi ; Increment the pointer
mov eax, 4 ; SYS_WRITE
mov ebx, 1 ; STDOUT
mov ecx, outstr ; Pointer to output buffer
mov edx, edi ; Count of bytes to send:
sub edx, outstr ; EDX = EDI (offset returned from int2bin) - offset of output buffer
int 0x80 ; Call kernel
mov eax, 1 ; SYS_EXIT
xor ebx, ebx ; Returncode: 0 (ok)
int 0x80 ; Call kernel
int2bin: ; Converts an integer in EAX to a binary string in outstr
mov edi, outstr ; Pointer to a string
mov ecx, 32 ; Loop counter
.LL1:
test cl, 0b111 ; CL%8 = 0 ?
jnz .F ; No: skip the next instructions
mov Byte [edi], ' ' ; Store a space
inc edi ; and increment the pointer
.F:
shl eax, 1 ; The leftmost bit into carry flag
setc dl ; Carry flag into DL
or dl, '0' ; Convert it to ASCII
mov [edi], dl ; Store it to outstr
inc edi ; Increment the pointer
loop .LL1 ; Loop ECX times
mov byte [edi], 0 ; Null termination if needed as C string (not needed here)
ret
Output:
01110010 01110101 01101111 01000110
NASM stored it backwards in EAX. The ASCII of leftmost character is stored in the rightmost byte of EAX, the second-to-last character is to be found in the second byte, and so on. Better to see when those bytes are printed as ASCII characters:
GLOBAL _start
SECTION .bss
outstr resb 40
SECTION .data
_start:
mov eax, 'Four' ; Load EAX with a string
call int2str ; Convert it to a binary string in outstr
mov byte [edi], 10 ; Add a line feed
inc edi ; Increment the pointer
mov eax, 4 ; SYS_WRITE
mov ebx, 1 ; STDOUT
mov ecx, outstr ; Pointer to output buffer
mov edx, edi ; Count of bytes to send:
sub edx, outstr ; EDX = EDI (offset returned from int2bin) - offset of output buffer
int 0x80 ; Call kernel
mov eax, 1 ; SYS_EXIT
xor ebx, ebx ; Returncode: 0 (ok)
int 0x80 ; Call kernel
int2str: ; Converts an integer in EAX to an ASCII string in outstr
mov edi, outstr ; Pointer to a string
mov ecx, 4 ; Loop counter
.LL1:
rol eax, 8
mov [edi], al ; Store it to outstr
inc edi ; Increment the pointer
loop .LL1 ; Loop ECX times
mov byte [edi], 0 ; Null termination if needed as C string (not needed here)
ret
Output:
ruoF
Both programs above show EAX in big endian order. This is the order you are familiar with looking at decimal numbers. The most significant digit is left and the least significant digit is right. However, EAX would be saved in memory or disk in little endian order, starting the sequence from the right with the least significant byte. Looking at the memory with a disassembler or debugger you would see 'F','o','u','r' as well as you had defined it in a .data section with db 'Four'. Therefore you'll get no difference when you load a register with a string, save it to memory and call the write routine of the kernel:
GLOBAL _start
SECTION .bss
outstr resb 40
SECTION .data
_start:
mov eax, 'Hell' ; Load EAX with the first part of the string
mov ebx, 'o wo' ; Load EBX with the second part
mov ecx, 'rld!' ; Load ECX with the third part
mov dword [outstr], eax ; Store the first part in outstr (little endian)
mov dword [outstr+4], ebx ; Append the second part
mov dword [outstr+8], ecx ; Append the third part
mov eax, 4 ; SYS_WRITE
mov ebx, 1 ; STDOUT
mov ecx, outstr ; Pointer to output buffer
mov edx, (3*4) ; Count of bytes to send (3 DWORD à 4 bytes)
int 0x80 ; Call kernel
mov eax, 1 ; SYS_EXIT
xor ebx, ebx ; Returncode: 0 (ok)
int 0x80 ; Call kernel
Output:
Hello world!
Please note: This behavior is made by the NASM programmers. Other assemblers might have a different behavior.

NASM addition program

I am a developer who uses high level languages, learning assembly language in my spare time. Please see the NASM program below:
section .data
section .bss
section .text
global main
main:
mov eax,21
mov ebx,9
add eax,ebx
mov ecx,eax
mov eax,4
mov ebx,1
mov edx,4
int 0x80
push ebp
mov ebp,esp
mov esp,ebp
pop ebp
ret
Here are the commands I use:
ian#ubuntu:~/Desktop/NASM/Program4$ nasm -f elf -o asm.o SystemCalls.asm
ian#ubuntu:~/Desktop/NASM/Program4$ gcc -o program asm.o
ian#ubuntu:~/Desktop/NASM/Program4$ ./program
I don't get any errors, however nothing is printed to the terminal. I used the following link to ensure the registers contained the correct values: http://docs.cs.up.ac.za/programming/asm/derick_tut/syscalls.html

You'll have to convert the integer value to a string to be able to print it with sys_write (syscall 4). The conversion could be done like this (untested):
; Converts the integer value in EAX to a string in
; decimal representation.
; Returns a pointer to the resulting string in EAX.
int_to_string:
mov byte [buffer+9],0 ; add a string terminator at the end of the buffer
lea esi,[buffer+9]
mov ebx,10 ; divisor
int_to_string_loop:
xor edx,edx ; clear edx prior to dividing edx:eax by ebx
div ebx ; EAX /= 10
add dl,'0' ; take the remainder of the division and convert it from 0..9 -> '0'..'9'
dec esi ; store it in the buffer
mov [esi],dl
test eax,eax
jnz int_to_string_loop ; repeat until EAX==0
mov eax,esi
ret
buffer: resb 10

programming in assembly requires a knowledge of ASCII codes and a some basic conversion routines. example: hexadecimal to decimal, decimal to hexadecimal are good routines to keep somewhere on some storage.
No registers can be printed as they are, you have to convert (a lot).
To be a bit more helpfull:
ASCII 0 prints nothing but some text editors (kate in kde linux) will show something on screen (a square or ...). In higher level language like C and C++ is it used to indicate NULL pointers and end of strings.
Usefull to calculate string lengths too.
10 is end of line. depending Linux or Windows there will be a carriage return (Linux) too or not (Windows/Dos).
13 is carriage return
1B is the ESC key (Linux users will now more about this)
255 is a hard return, I never knew why it is good for but it must have its purpose.
check http://www.asciitable.com/ for the entire list.

Convert the integer value to a string.
Here i have used macros pack and unpack to convert integers to string and macro unpack to do the vice-versa
%macro write 2
mov eax, 4
mov ebx, 1
mov ecx, %1
mov edx, %2
int 80h
%endmacro
%macro read 2
mov eax,3
mov ebx,0
mov ecx,%1
mov edx,%2
int 80h
%endmacro
%macro pack 3 ; 1-> string ,2->length ,3->variable
mov esi, %1
mov ebx,0
%%l1:
cmp byte [esi], 10
je %%exit
imul ebx,10
movzx edx,byte [esi]
sub edx,'0'
add ebx,edx
inc esi
jmp %%l1
%%exit:
mov [%3],ebx
%endmacro
%macro unpack 3 ; 1-> string ,2->length ,3->variable
mov esi, %1
mov ebx,0
movzx eax, byte[%3]
mov byte[%2],0
cmp eax, 0
jne %%l1
mov byte[%2],1
push eax
jmp %%exit2
%%l1:
mov ecx,10
mov edx,0
div ecx
add edx,'0'
push edx
inc byte[%2]
cmp eax, 0
je %%exit2
jmp %%l1
%%exit2:
movzx ecx,byte[%2]
%%l2:
pop edx
mov [esi],dl
inc esi
loop %%l2
%endmacro
section .data ; data section
msg1: db "First number : " ;
len1: equ $-msg1 ;
msg2: db "Second number : " ;
len2: equ $-msg2 ;
msg3: db "Sum : " ;
len3: equ $-msg3 ;
ln: db 10
lnl: equ $-ln
var1: resb 10
var2: resb 10
str1: resb 10
str2: resb 10
ans: resb 10
ansvar: resb 10
ansl: db ''
l1: db ''
l2: db ''
section.text ;code
global _start
_start:
write msg1,len1
read str1,10
pack str1,l1,var1
write msg2,len2
read str2,10
pack str2,l2,var2
mov al,[var1]
add al,[var2]
mov [ansvar],al
unpack ans,ansl,ansvar
write msg3,len3
write ans,10
write ln,lnl
mov ebx,0 ; exit code, 0=normal
mov eax,1 ; exit command to kernel
int 0x80 ; interrupt 80 hex, call kernel
To assembler, link and run:
nasm -f elf add.asm
ld -s -o add add.o
./add

Linux NASM detect EOF

I'm trying to learn the basics asm on linux and I can't find a very good reference. The NASM docs seem to assume you already know masm... I found no examples in the documentation of the cmp (outside the Intel instruction reference).
I'd written a program that reads a single byte from stdin and writes it to stdout. Below is my modification to try to detect EOF on stdin and exit when EOF is reached. The issue is it never exits. I just keeps printing the last char read from stdin. The issue is either in my EOF detection (cmp ecx, EOF) and/or my jump to the _exit label (je _exit) I think.
What am I doing wrong?
%define EOF -1
section .bss
char: resb 1
section .text
global _start
_exit:
mov eax, 1 ; exit
mov ebx, 0 ; exit status
int 80h
_start:
mov eax, 3 ; sys_read
mov ebx, 0 ; stdin
mov ecx, char ; buffer
cmp ecx, EOF ; EOF?
je _exit
mov edx, 1 ; read byte count
int 80h
mov eax, 4 ; sys_write
mov ebx, 1 ; stdout
mov ecx, char ; buffer
mov edx, 1 ; write byte count
int 80h
jmp _start
For the sake of sanity, I verified EOF is -1 with this C:
#include <stdio.h>
int main() { printf("%d\n", EOF); }

You are comparing the address of the buffer to EOF (-1) instead of the character stored in the buffer.
Having said that, the read system call does not return the value of EOF when end of file is reached, but it returns zero and doesn't stick anything in the buffer (see man 2 read). To identify end of file, just check the value of eax after the call to read:
section .bss
buf: resb 1
section .text
global _start
_exit:
mov eax, 1 ; exit
mov ebx, 0 ; exit status
int 80h
_start:
mov eax, 3 ; sys_read
mov ebx, 0 ; stdin
mov ecx, buf ; buffer
mov edx, 1 ; read byte count
int 80h
cmp eax, 0
je _exit
mov eax, 4 ; sys_write
mov ebx, 1 ; stdout
mov ecx, buf ; buffer
mov edx, 1 ; write byte count
int 80h
jmp _start
If you did want to properly compare the character to some value, use:
cmp byte [buf], VALUE
Also, I renamed char to buf. char is a basic C data type and a bad choice for a variable name.

NASM Print one Character at a Time

How come this program is not printing out to the screen, am I missing something on the INT 80 command?
section .bss
section .data
hello: db "Hello World",0xa ;10 is EOL
section .text
global _start
_start:
mov ecx, 0; ; int i = 0;
loop:
mov dl, byte [hello + ecx] ; while(data[i] != EOF) {
cmp dl, 0xa ;
je exit ;
mov ebx, ecx ; store conetents of i (ecx)
; Print single character
mov eax, 4 ; set sys_write syscall
mov ecx, byte [hello + ebx] ; ...
mov edx, 1 ; move one byte at a time
int 0x80 ;
inc ebx ; i++
mov ecx, ebx ; move ebx back to ecx
jmp loop ;
exit:
mov eax, 0x01 ; 0x01 = syscall for exit
int 0x80 ;
ADDITION
My Makefile:
sandbox: sandbox.o
ld -o sandbox sandbox.o
sandbox.o: sandbox.asm
nasm -f elf -g -F stabs sandbox.asm -l sandbox.lst
Modified Code:
section .bss
section .data
hello: db "Hello World",0xa ;10 is EOL
section .text
global _start
_start:
mov ecx, 0; ; int i = 0;
while:
mov dl, byte [hello + ecx] ; while(data[i] != EOF) {
cmp dl, 0xa ;
je exit ;
mov ebx, ecx ; store conetents of i (ecx)
; Print single character
mov eax, 4 ; set sys_write syscall
mov cl, byte [hello + ebx] ; ...
mov edx, 1 ; move one byte at a time
int 0x80 ;
inc ebx ; i++
mov ecx, ebx ; move ebx back to ecx
jmp while ;
exit:
mov eax, 0x01 ; 0x01 = syscall for exit
int 0x80 ;

One of the reasons it's not printing is because ebx is supposed to hold the value 1 to specify stdin, and another is because sys_write takes a pointer (the address of your string) as an argument, not an actual character value.
Anyway, let me show you a simpler way of structuring your program:
section .data
SYS_EXIT equ 1
SYS_WRITE equ 4
STDOUT equ 1
TRAP equ 0x80
NUL equ 0
hello: db "Hello World",0xA,NUL ; 0xA is linefeed, terminate with NUL
section .text
global _start
_start:
nop ; for good old gdb
mov ecx, hello ; ecx is the char* to be passed to sys_write
read:
cmp byte[ecx], NUL ; NUL indicates the end of the string
je exit ; if reached the NUL terminator, exit
; setup the registers for a sys_write call
mov eax, SYS_WRITE ; syscall number for sys_write
mov ebx, STDOUT ; print to stdout
mov edx, 1 ; write 1 char at a time
int TRAP; ; execute the syscall
inc ecx ; increment the pointer to the next char
jmp read ; loop back to read
exit:
mov eax, SYS_EXIT ; load the syscall number for sys_exit
mov ebx, 0 ; return a code of 0
int TRAP ; execute the syscall
It can be simpler to NUL terminate your string as I did, or you could also do $-hello to get it's length at compile time. I also set the registers up for sys_write at each iteration in the loop (as you do), since sys_write doesn't preserve all the registers.

I don't know how you got your code to assemble, but it doesn't assemble over here for a couple of very good reasons.
You cannot use loop as a label name because that name is reserved for the loop instruction.
Your line 20's instruction mov ecx, byte [hello + ebx] doesn't assemble either because the source and destination operands' sizes don't match (byte vs dword). Possible changes:
mov cl, byte [hello + ebx]
mov ecx, dword [hello + ebx]
movzx ecx, byte [hello + ebx]
Was the above not the actual code you had?

Develop Reference

node.js excel linux python-3.x azure haskell apache-spark rust .htaccess string

Segmentation fault in my assembly program - linux

Related

File permissions in Linux assembly

Binary representation in processor's registers in Nasm

NASM addition program

Linux NASM detect EOF

NASM Print one Character at a Time

Categories

Resources