MIPS assembly: print the Fibonacci sequence (with syscalls) - linux

Good afternoon! I have a MIPS instruction program that simulates the Fibonacci sequence. My program works by entering in a number (n) that will determine how many iterations the program will work through. The program spits out the correct results but i was wondering how i could output each sequence of number(s) of the Fib method. Heres my code first :
.data
msg1:.asciiz "Give a number: " # message for fib(n)
.text
main:
li $v0,4 #read string
la $a0,msg1 # set iterations to msg1 value
syscall
li $v0,5 # read an int
syscall
add $a0,$v0,$zero #move to $a0
jal fib #call fib
add $a0,$v0,$zero # add result into argument
li $v0,1 # output integer
syscall
li $v0,10
syscall
fib:
#a0=y
#if (y==0) return 0;
#if (y==1) return 1;
#return( fib(y-1)+fib(y-2) );
addi $sp,$sp,-12 #save in stack
sw $ra,0($sp) # save return adress to stack
sw $s0,4($sp)# save msg value
sw $s1,8($sp)# save fib(y-1)
add $s0,$a0,$zero # store msg value into s0
addi $t1,$zero,1 # comparable value
beq $s0,$zero,return0 # if s0 == 0 jump to return 0 block
beq $s0,$t1,return1# if s0 == t1 (1) jump to return 1 block
addi $a0,$s0,-1 # else decrement initial value by 1
jal fib
add $s1,$zero,$v0 #s1=fib(y-1)############################syscall needed#########################################
addi $a0,$s0,-2 # subtract two
jal fib #v0=fib(n-2)
add $v0,$v0,$s1 #v0=fib(n-2)+$s1 ##########################syscall needed#################################
exitfib:
lw $ra,0($sp) #read return adress from stack
lw $s0,4($sp) # read msg value from stack
lw $s1,8($sp) # read fib(y-1) from stack
addi $sp,$sp,12 #bring back stack pointer
jr $ra
return1:
li $v0,10 # return 1 to result
j exitfib
return0 : li $v0,0 # return 0 to result
j exitfib
Hopefully you saw, but i have two lines commented where i believe a syscall would be necessary to output each number per sequence ($s1 = fib(y-1))($v0 = fib(n-2) - $s1). I tried loading the correct code into the result register to output an integer but it still gives me problems (not displaying the process before the final result). How could i correctly implement a syscall so that i can print each number the fib method calculates before reaching a final result?

Related

RISC-V interrupts, setting up MTIMECMP

I am trying to write a program in RISC-V assembly for HiFive1 board to wake up with timer interrupt
This is my interrupt setup routine
.section .text
.align 2
.globl setupINTERRUPT
.equ MTIMECMP, 0x2004000
setupINTERRUPT:
addi sp, sp, -16 # allocate a stack frame, moves the stack up by 16 bits
sw ra, 12(sp) # save return adress on stack
li t0, 0x8 # time interval at which to triger the interrupt
li t1, MTIMECMP # MTIMECMP register of the CLINT memmory map
sw t0, 0(t1) # store the interval in MTIMECMP memory location
li t0, 0x800 # make a mask for 3rd bit
csrrs t1, mstatus, t0 # use CRS READ/SET instruction to set 3rd bit using previously defined mask
li t0, 0x3 # make a mask for 0th and 1st bit
csrrc t1, mtvec, t0 # use CSR READ/CLEAR instruction to clear 0th and 1st bit
li t0, 0x80 # make a mask for 7th bit
csrrs t1, mie, t0 # set 7th bit for MACHINE TIMER INTERRUPT ENABLE
lw ra, 12(sp) # restore the return address
addi sp, sp, 16 # dealocating stack frame
ret
I am not too sure if im setting the MTIMECMP correctly, i know its a 64 bit memory location.
I am trying to use this interrupt as a delay timer for a blinking LED (just trying to make sure the interrupt works before i move onto writing a handler)
here is my setLED program. (not that all the GPIO register setup was done previously and is known to work). I have WFI instruction before each of the ON and OFF functions. The LED doesn't light up, even though in the debug mode it does. I think in LED it skips the WFI instruction as if the interrupt was asserted.
.section .text
.align 2
.globl setLED
#include "memoryMap.inc"
#include "GPIO.inc"
.equ NOERROR, 0x0
.equ ERROR, 0x1
.equ LEDON, 0x1
# which LED to set comes into register a0
# desired On/Off state comes into a1
setLED:
addi sp, sp, -16 # allocate a stack frame, moves the stack up by 16 bits
sw ra, 12(sp) # save return adress on stack
li t0, GPIO_CTRL_ADDR # load GPIO adress
lw t1, GPIO_OUTPUT_VAL(t0) # get the current value of the pins
beqz a1, ledOff # Branch off to turn off led if a1 requests it
li t2, LEDON # load up valued of LEDON into temp register
beq a1, t2, ledOn # branch if on requested
li a0, ERROR # we got a bad status request, return an error
j exit
ledOn:
wfi
xor t1, t1, a0 # doing xor to only change the value of requested LED
sw t1, GPIO_OUTPUT_VAL(t0) # write the new output value to GPIO out
li a0, NOERROR # no error
j exit
ledOff:
wfi
xor a0, a0, 0xffffffff # invert everything so that all bits are one except the LED we are turning off
and t1, t1, a0 # and a0 and t1 to get the LED we want to turn off
sw t1, GPIO_OUTPUT_VAL(t0) # write the new output value
li a0, NOERROR
exit:
lw ra, 12(sp) # restore the return address
addi sp, sp, 16 # dealocating stack frame
ret

Why does a fully static Rust ELF binary have a Global Offset Table (GOT) section?

This code, when compiled for the x86_64-unknown-linux-musl target, produces a .got section:
fn main() {
println!("Hello, world!");
}
$ cargo build --release --target x86_64-unknown-linux-musl
$ readelf -S hello
There are 30 section headers, starting at offset 0x26dc08:
Section Headers:
[Nr] Name Type Address Offset
Size EntSize Flags Link Info Align
...
[12] .got PROGBITS 0000000000637b58 00037b58
00000000000004a8 0000000000000008 WA 0 0 8
...
According to this answer for analogous C code, the .got section is an artifact that can be safely removed. However, it segfaults for me:
$ objcopy -R.got hello hello_no_got
$ ./hello_no_got
[1] 3131 segmentation fault (core dumped) ./hello_no_got
Looking at the disassembly, I see that the GOT basically holds static function addresses:
$ objdump -d hello -M intel
...
0000000000400340 <_ZN5hello4main17h5d434a6e08b2e3b8E>:
...
40037c: ff 15 26 7a 23 00 call QWORD PTR [rip+0x237a26] # 637da8 <_GLOBAL_OFFSET_TABLE_+0x250>
...
$ objdump -s -j .got hello | grep 637da8
637da8 50434000 00000000 b0854000 00000000 PC#.......#.....
$ objdump -d hello -M intel | grep 404350
0000000000404350 <_ZN3std2io5stdio6_print17h522bda9f206d7fddE>:
404350: 41 57 push r15
The number 404350 comes from 50434000 00000000, which is a little-endian 0x00000000000404350 (this was not obvious; I had to run the binary under GDB to figure this out!)
This is perplexing, since Wikipedia says that
[GOT] is used by executed programs to find during runtime addresses of global variables, unknown in compile time. The global offset table is updated in process bootstrap by the dynamic linker.
Why is the GOT present? From the disassembly, it looks like the compiler knows all the needed addresses. As far as I know, there is no bootstrap done by the dynamic linker: there is neither INTERP nor DYNAMIC program headers present in my binary;
Why does the GOT store function pointers? Wikipedia says the GOT is only for global variables, and function pointers should be contained in the PLT.
TL;DR summary: the GOT is really a rudimentary build artifact, which I was able to get rid of via simple machine code manipulations.
Breakdown
If we look at
$ objdump -dj .text hello
and search for GLOBAL, we see only four distinct types of references to the GOT (constants differ):
40037c: ff 15 26 7a 23 00 call QWORD PTR [rip+0x237a26] # 637da8 <_GLOBAL_OFFSET_TABLE_+0x250>
425903: ff 25 5f 26 21 00 jmp QWORD PTR [rip+0x21265f] # 637f68 <_GLOBAL_OFFSET_TABLE_+0x410>
41d8b5: 48 3b 1d b4 a5 21 00 cmp rbx,QWORD PTR [rip+0x21a5b4] # 637e70 <_GLOBAL_OFFSET_TABLE_+0x318>
40b259: 48 83 3d 7f cb 22 00 cmp QWORD PTR [rip+0x22cb7f],0x0 # 637de0 <_GLOBAL_OFFSET_TABLE_+0x288>
40b260: 00
All of these are reading instructions, which means that the GOT is not modified at runtime. This in turn means that we can statically resolve the addresses that the GOT refers to! Let's consider the reference types one by one:
call QWORD PTR [rip+0x2126be] simply says "go to address [rip+0x2126be], take 8 bytes from there, interpret them as a function address and call the function". We can simply replace this instruction with a direct call:
40037c: e8 cf 3f 00 00 call 404350 <_ZN3std2io5stdio6_print17h522bda9f206d7fddE>
400381: 90 nop
Notice the nop at the end: we need to replace all the 6 bytes of the machine code that constitute the first instruction, but the instruction we replace it with is only 5 bytes, so we need to pad it. Fundamentally, as we are patching a compiled binary, we can replace an instruction with a another one only if it is not longer.
jmp QWORD PTR [rip+0x21265f] is the same as the previous one, but instead of calling an address it jumps to it. This turns into:
425903: e9 b8 f7 ff ff jmp 4250c0 <_ZN68_$LT$core..fmt..builders..PadAdapter$u20$as$u20$core..fmt..Write$GT$9write_str17hc384e51187942069E>
425908: 90 nop
cmp rbx,QWORD PTR [rip+0x21a5b4] - this takes 8 bytes from [rip+0x21a5b4] and compares them to the contents of rbx register. This one is tricky, since cmp can not compare register contents to an 64-bit immediate value. We could use another register for that, but we don't know which of the registers are used around this instruction. A careful solution would be something like
push rax
mov rax,0x0000006363c0
cmp rbx,rax
pop rax
But that would be way beyond our limit of 7 bytes. The real solution stems from an observation that the GOT contains only addresses; our address space is (roughly) contained in range [0x400000; 0x650000], which can be seen in the program headers:
$ readelf -l hello
...
Program Headers:
Type Offset VirtAddr PhysAddr
FileSiz MemSiz Flags Align
LOAD 0x0000000000000000 0x0000000000400000 0x0000000000400000
0x0000000000035b50 0x0000000000035b50 R E 0x200000
LOAD 0x0000000000036380 0x0000000000636380 0x0000000000636380
0x0000000000001dd0 0x0000000000003918 RW 0x200000
...
It follows that we can (mostly) get away with only comparing 4 bytes of a GOT entry instead of 8. So the substitution is:
41d8b5: 81 fb c0 63 63 00 cmp ebx,0x6363c0
41d8bb: 90 nop
The last one consists of two lines of objdump output, since 8 bytes do not fit in one line:
40b259: 48 83 3d 7f cb 22 00 cmp QWORD PTR [rip+0x22cb7f],0x0 # 637de0 <_GLOBAL_OFFSET_TABLE_+0x288>
40b260: 00
It just compares 8 bytes of the GOT to a constant (in this case, 0x0). In fact, we can do the comparison statically; if the operands compare equal, we replace the comparison with
40b259: 48 39 c0 cmp rax,rax
40b25c: 90 nop
40b25d: 90 nop
40b25e: 90 nop
40b25f: 90 nop
40b260: 90 nop
Obviously, a register is always equal to itself. A lot of padding needed here!
If the left operand is greater than the right one, we replace the comparison with
40b259: 48 83 fc 00 cmp rsp,0x0
40b25d: 90 nop
40b25e: 90 nop
40b25f: 90 nop
40b260: 90 nop
In practice, rsp is always greater than zero.
If the left operand is smaller than the right one, things get a bit more complicated, but since we have a whole lot of bytes (8!) we can manage:
40b259: 50 push rax
40b25a: 31 c0 xor eax,eax
40b25c: 83 f8 01 cmp eax,0x1
40b25f: 58 pop rax
40b260: 90 nop
Notice that the second and the third instructions use eax instead of rax, since cmp and xor involving eax take one less byte than with rax.
Testing
I have written a Python script to do all these substitutions automatically (it's a bit hacky and relies on parsing of objdump output though):
#!/usr/bin/env python3
import re
import sys
import argparse
import subprocess
def read_u64(binary):
return sum(binary[i] * 256 ** i for i in range(8))
def distance_u32(start, end):
assert abs(end - start) < 2 ** 31
diff = end - start
if diff < 0:
return 2 ** 32 + diff
else:
return diff
def to_u32(x):
assert 0 <= x < 2 ** 32
return bytes((x // (256 ** i)) % 256 for i in range(4))
class GotInstruction:
def __init__(self, lines, symbol_address, symbol_offset):
self.address = int(lines[0].split(":")[0].strip(), 16)
self.offset = symbol_offset + (self.address - symbol_address)
self.got_offset = int(lines[0].split("(File Offset: ")[1].strip().strip(")"), 16)
self.got_offset = self.got_offset % 0x200000 # No idea why the offset is actually wrong
self.bytes = []
for line in lines:
self.bytes += [int(x, 16) for x in line.split("\t")[1].split()]
class TextDump:
symbol_regex = re.compile(r"^([0-9,a-f]{16}) <(.*)> \(File Offset: 0x([0-9,a-f]*)\):")
def __init__(self, binary_path):
self.got_instructions = []
objdump_output = subprocess.check_output(["objdump", "-Fdj", ".text", "-M", "intel",
binary_path])
lines = objdump_output.decode("utf-8").split("\n")
current_symbol_address = 0
current_symbol_offset = 0
for line_group in self.group_lines(lines):
match = self.symbol_regex.match(line_group[0])
if match is not None:
current_symbol_address = int(match.group(1), 16)
current_symbol_offset = int(match.group(3), 16)
elif "_GLOBAL_OFFSET_TABLE_" in line_group[0]:
instruction = GotInstruction(line_group, current_symbol_address,
current_symbol_offset)
self.got_instructions.append(instruction)
#staticmethod
def group_lines(lines):
if not lines:
return
line_group = [lines[0]]
for line in lines[1:]:
if line.count("\t") == 1: # this line continues the previous one
line_group.append(line)
else:
yield line_group
line_group = [line]
yield line_group
def __iter__(self):
return iter(self.got_instructions)
def read_binary_file(path):
try:
with open(path, "rb") as f:
return f.read()
except (IOError, OSError) as exc:
print(f"Failed to open {path}: {exc.strerror}")
sys.exit(1)
def write_binary_file(path, content):
try:
with open(path, "wb") as f:
f.write(content)
except (IOError, OSError) as exc:
print(f"Failed to open {path}: {exc.strerror}")
sys.exit(1)
def patch_got_reference(instruction, binary_content):
got_data = read_u64(binary_content[instruction.got_offset:])
code = instruction.bytes
if code[0] == 0xff:
assert len(code) == 6
relative_address = distance_u32(instruction.address, got_data)
if code[1] == 0x15: # call QWORD PTR [rip+...]
patch = b"\xe8" + to_u32(relative_address - 5) + b"\x90"
elif code[1] == 0x25: # jmp QWORD PTR [rip+...]
patch = b"\xe9" + to_u32(relative_address - 5) + b"\x90"
else:
raise ValueError(f"unknown machine code: {code}")
elif code[:3] == [0x48, 0x83, 0x3d]: # cmp QWORD PTR [rip+...],<BYTE>
assert len(code) == 8
if got_data == code[7]:
patch = b"\x48\x39\xc0" + b"\x90" * 5 # cmp rax,rax
elif got_data > code[7]:
patch = b"\x48\x83\xfc\x00" + b"\x90" * 3 # cmp rsp,0x0
else:
patch = b"\x50\x31\xc0\x83\xf8\x01\x90" # push rax
# xor eax,eax
# cmp eax,0x1
# pop rax
elif code[:3] == [0x48, 0x3b, 0x1d]: # cmp rbx,QWORD PTR [rip+...]
assert len(code) == 7
patch = b"\x81\xfb" + to_u32(got_data) + b"\x90" # cmp ebx,<DWORD>
else:
raise ValueError(f"unknown machine code: {code}")
return dict(offset=instruction.offset, data=patch)
def make_got_patches(binary_path, binary_content):
patches = []
text_dump = TextDump(binary_path)
for instruction in text_dump.got_instructions:
patches.append(patch_got_reference(instruction, binary_content))
return patches
def apply_patches(binary_content, patches):
for patch in patches:
offset = patch["offset"]
data = patch["data"]
binary_content = binary_content[:offset] + data + binary_content[offset + len(data):]
return binary_content
def main():
parser = argparse.ArgumentParser()
parser.add_argument("binary_path", help="Path to ELF binary")
parser.add_argument("-o", "--output", help="Output file path", required=True)
args = parser.parse_args()
binary_content = read_binary_file(args.binary_path)
patches = make_got_patches(args.binary_path, binary_content)
patched_content = apply_patches(binary_content, patches)
write_binary_file(args.output, patched_content)
if __name__ == "__main__":
main()
Now we can get rid of the GOT for real:
$ cargo build --release --target x86_64-unknown-linux-musl
$ ./resolve_got.py target/x86_64-unknown-linux-musl/release/hello -o hello_no_got
$ objcopy -R.got hello_no_got
$ readelf -e hello_no_got | grep .got
$ ./hello_no_got
Hello, world!
I have also tested it on my ~3k LOC app, and it seems to work alright.
P.S. I am not an expert in assembly, so some of the above might be inaccurate.

Cannot find segmentation fault in insertion sort code for arm-v8 assembly

I have generated an array of random integers between 0 and 256 and I tried to sort them using an insertion sort however at some point along the line I messed up and got an error "segmentation fault (core dumped)
I have no idea exactly why I am getting this but I believe the problem has something to do with the macro j representing a loop counter. I have been working away at this for a few hours and I have been stuck on this for a while.
EDIT: After using the debugger I have isolated the problem to be the line:
bl printf
right at the very end when it tries to print the second sorted value.
I have tried to allocate more ram up to 416 bytes, I have tried moving the top2 loop label around (which is the loop that uses j as it's counter) and the farthest i have gotten is for the program to print the first element of the sorted array and then give the error
ALLOC =-(16+400*1)&-16
.equ DEALLOC, -ALLOC
size = 50
define(arraybase, x19)
i .req x20
define(j, x21)
define(temp, w22)
define(sort1, w23)
print1: .asciz "Array[%d]: %d\n"
print2: .string "\nSorted Array: \n"
print3: .asciz "top value equals %d \n"
...
mov i, 0
top1: add i, i, 1
ldrb temp, [arraybase, i ]
mov j, i
top2: mov x25, j
sub x25, x25, 1
ldrb sort1, [arraybase, x25 ]
cmp temp, sort1
b.ge skip1
strb sort1, [arraybase, j]
skip1: sub j, j, 1
cmp j, 0
b.gt top2
strb temp, [arraybase, j]
cmp i, size-1
b.lt top1
printing:
ldr x0, =print2
bl printf
mov i, 0
ldr x0, =print1
tprint: mov x1, i
ldrb w2, [arraybase, i]
bl printf
add i, i, 1
cmp i, size-1
b.le tprint
mov x0, 0
ldp x29, x30, [sp], DEALLOC
ret
The array should be printed in the random order it was initialized in, then it should print the value at the top of the array, then it is meant to print the sorted array in increasing order.
The exact error message I got was:
Segmentation fault (core dumped)
This appears after it prints the first value of the sorted array

68K Assembly Math Formula

I need to write some Lines in 68k Assembly Language with the math formula:
x^2-5x+6
I want to do it with ADD and SUB commands and MOVE yet somehow I cant define the variable x it says its an undefined Symbol and I cant actually realize where my problem is.
ORG $1000
START: ; first instruction of program
MOVE X*X, D0
MOVE (-5X),D2
MOVE 6,D3
ADD D0, D3
SUB D2, D1
SIMHALT
Errors:
LINE 10 Invalid Syntax
LINE 11 Invalid Syntax
Something like this, assuming basic 68000 (and not 68020 or better).
You may have to fix matters like whether X is a word or long word and deal with matters such as sign extension as its a long time since I did 68k assembler. X is defined as a word constant at the end.
ORG $1000
START: ; first instruction of program
CLR.L D7 ; Clear D0 - alternatively MOVEQ #0,D0
MOVE.W X,D7 ; Read X
; Output initial value...
LEA S1,A1
MOVE.W #255,D1
MOVE.L D7,D1
MOVEQ #17,D0
TRAP #15
LEA SNUL,A1
MOVEQ #13,D0
TRAP #15
MOVE.L D7,D6 ; copy of X
ASL.L #2,D6 ; Multiply by 4
ADD.L D7,D6 ; 4X plus another X = 5X
MULU.W D7,D7 ; X^2
SUB.L D6,D7 ; Subtract 5X from X^2
ADDQ.L #6,D7 ; plus 6
; Output answer...
LEA S2,A1
MOVE.L D7,D1
MOVEQ #17,D0
TRAP #15
SIMHALT ; halt simulator
* Put variables and constants here
S1: DC.B 'Initial :',0
S2: DC.B 'Answer :',0
SNUL: DC.B 0
X: DC.W 1234 ; Initial (fixed) value of X
END START ; last line of source
Declaring variables in assembly doesn't work like it would in C or other similar languages. Let's say you're trying to write the following C function:
int myFunction(int x)
{
return (x**2) + (-5x) + 6;
}
So what you would do is, you would choose a register, say D0, and let that be your input variable. It can also be where the output goes.
myFunction:
MOVE.L D0,D1
MULS D0,D1 ;D1 = x squared
MOVE.L D0,D2
ADD.L D0,D0
ADD.L D0,D0
ADD.L D2,D0 ;D0 = 5X
SUB.L D0,D1 ;D1 = (X^2) - 5X
ADD.L #6,D1 ;D1 = (X^2) - 5X + 6
MOVE.L D1,D0 ;return in D0
RTS
Now, if you wanted to use this function, you would first load the desired value of x into register D0 and then call the function:
MOVE.L #5,D0 ;as an example, calculate the function where x = 5.
JSR myFunction
;the program will resume here after the calculation is done,
;and the result will be in D0.

Trouble With Reading In A String With A Subroutine In LC3

So I believe that the way I store the string works. I am just having some issues passing the String out of the subroutine. I heard that in order to pass something out of a subroutine you need to store it in R1 but I can't get it to store into my WORD array
.orig x3000
AND R1,R1,0
LEA R0,PROMPT
PUTS
JSR GETS
ST R1,WORD
LEA R0,WORD
PUTS
halt
; ---------Data Area-------------
WORD .blkw 20
PROMPT .stringz "Enter String: "
; -------------------------------
GETS LEA R1,MEMORYBLOCK ; saves the address of the storage memory block
loop GETC ; input character -> r0
PUTC ; r0 -> console
; always points at the next available block
LD R2,EMPTY ; check for
ADD R2,R2,R0 ; end of line
BRz finish
LD R2,COUNTDOWN
ADD R2,R2,#-1
BRz finish
ST R2,COUNTDOWN
STR R0,R1,#0 ; r0 -> ( memory address stored in r1 + 0 )
ADD R1,R1,#1 ; increments the memory pointer so that it
BR loop
finish LEA R1,MEMORYBLOCK
RET
; ----Subroutine Data Area-------
EMPTY .fill xfff6
COUNTDOWN .fill #10
MEMORYBLOCK .BLKW 20
; -------------------------------
.end
The biggest problem here is the concept of "returning a string". What you're actually doing at the end of GETS is returning the memory address at which the string starts. When you then store this into WORD in the calling function, you are storing the memory address of the first byte of the string that was input (i.e. the memory address of MEMORYBLOCK) into the first byte of WORD. You aren't copying the entire string from MEMORYBLOCK into WORD.
The easiest "fix" for what you're trying to do would be to change
LEA R0,WORD
to
LD R0,WORD
and then for good measure:
WORD .blkw 20
to
WORD .fill 0
as now you're just using it to store a single value (i.e. the memory address of MEMORYBLOCK).
However, at this point you haven't made a copy of the string. If you want to do this, then you will need to make a loop whereby you walk through MEMORYBLOCK and copy each byte to WORD instead.
The final, cheaper, way to do this is to just use MEMORYBLOCK directly from the calling function. It's not really any less valid in a program of this size, unless there's project requirements that ask otherwise.

Resources