Reliable serial data buffering in 8051 - delay

In the near future, I will be the owner of HM-TRP radio modules that communicate via UART. I will be using an AT89C4051 micro controller as a bridge between the HM-TRP and a bigger circuit that uses an AT89S52 so everything can communicate. The HM-TRP will use a 9600bps baud rate for communication to the AT89C4051 and that chip will use a 22.1184Mhz crystal.
Since I have limited GPIO pins available for the two devices to connect with, I attempted to make a routine that can read a byte from and write a byte to the radio module one nibble at a time. I have designated space for two buffers. One for data to transmit to the module, and one to receive data from the module.
I want to improve this code so that I don't corrupt memory addresses used by other things such as SFRs and stack pointer data. Also the HM-TRP module claims to be half-duplex.
I am trying to make it as asynchronous as possible so that the other processor doesn't have to stall while the bridge (AT89C4051) stalls until the HM-TRP completes sending a byte.
What can I do to improve this? or am I overthinking here?
;uC to radio
DAT equ P1 ;Data channel (only bits 4-7 available)
NIB equ P3.7 ;Nibble Number
RW equ P3.5 ;1=Read, 0=Write
EXEC equ P3.4 ;Execute
R_EN equ P3.3 ;HM-TRP enable
R_CFG equ P3.2 ;HM-TRP config
BUFEND equ 10h ;receive buffer end
BUFENDT equ 3Fh ;transmit buffer end
NIBNO bit 7Fh ;Internal nibble #
IEXEC bit 7Eh ;Internal execution state
;setup everything
setb IEXEC
setb NIBNO
setb R_EN
setb RS0 ;R0-R7 addresses = high to not corrupt buffers
setb RS1
mov P1,#0FFh ;reset ports
mov P3,#0FFh
mov R0,#BUFEND
mov R1,#BUFENDT
mov R2,#0h ;R2=remaining transmit characters
mov R3,#0h ;R3=# received characters
mov SP,#50h ;Get SP out of our space
mov PCON,#80h ;use high-speed UART
mov TH1,#0F4h ;9.6kbps
mov SCON,#50h ;serial mode 1
mov TMOD,#22h ;auto reload for timers
acall delay
clr R_EN ;Enable HM-TRP radio
setb TR1 ;Start hardware UART
lcall setup
main:
;Async routines
;Check for received byte and process if any
jbc RI,recvok
rtx:
;Check for transmit byte and process if any
jbc TI,xmitok
txx:
;Do user function and repeat
ajmp ufunc
;executed when hardware serial port indicates character transmitted
xmitok:
mov SBUF,#R1 ;Pass buffered byte to serial
dec R1 ;adjust pointer
djnz R2,txx ;lower count
mov R1,#BUFENDT ;when count is 0, reset buffer pointer
ajmp txx ;back to main routine
;executed when hardware serial port indicates character received
recvok:
mov #R0,SBUF ;Store data into buffer right away
inc R3 ;increment character count
djnz R0,rtx ;start buffer at beginning when it reaches end
mov R0,#BUFEND
ajmp rtx ;back to main routine
ufunc:
;See state of EXEC gpio pin
jnb EXEC,rexec
;pin value needs to be high at first
clr IEXEC
rexec:
jb EXEC,noexec
jb IEXEC,noexec
;only enter if pin was high before set low
jb RW,d_rd
;Here, RW pin = low so Read from HM-TRP is executed
jb NIB,rnibid
jb NIBNO,rnibid
;NIB pin = low so we want 1st nibble
mov A,R3
jz skipread
;Only get nibble if at least a byte exists in cache
mov A,#R0 ;load byte from cache
inc R0 ;advance pointer
dec R3 ;lower byte count
mov R5,A ;save original byte
swap A ;prepare nibble
anl A,#0F0h ;we want 4 MSB
mov DAT,A ;send out 1st nibble
setb NIBNO ;prevent hold down of pin from executing this function forever
skipread:
rnibid:
jnb NIB,rnibid2
jnb NIBNO,rnibid2
;We want other nibble
mov A,R5 ;Get saved byte
anl A,#0F0h ;But strip lower 4-bits
mov DAT,A ;Send out 2nd nibble
clr NIBNO
rnibid2:
d_rd:
jnb RW,d_wr
jb NIB,nibid
jb NIBNO,nibid
mov A,DAT ;Read in 1st nibble
swap A ;and make it our low nibble
anl A,#0Fh ;strip the high part
mov R6,A ;and save it
setb NIBNO ;prevent hold down of pin from executing this function forever
nibid:
jnb NIB,nibid2
jnb NIBNO,nibid2
mov A,DAT ;Read in 2nd nibble
anl A,#0F0h ;Strip low part
orl A,R6 ;put both parts together to form byte
clr NIBNO
mov #R1,A ;Store byte in transmit buffer
dec R1 ;adjust pointer
inc R2 ;increment number of transmit characters
nibid2:
d_wr:
setb IEXEC ;prevent hold down of EXEC pin from executing things too much
noexec:
ajmp main
setup:
; Setup radio card
clr R_CFG
acall delay
mov R2,#2h ;2 0FFh's = end.
mov DPTR,#startconfig
configsu:
clr A
movc A,#A+DPTR ;Read byte from code memory
inc DPTR
inc A
jz novalc ;...and don't send it to radio if it's 0FFh
dec A
mov R2,#2h ;byte isn't FFh, so restore chances
clr TI
mov SBUF,A ;Send byte to radio
jnb TI,$ ;and stall until its sent
sjmp configsu
novalc:
;Strike 1+. One more FFh and its over.
acall delay
djnz R2,configsu
;finish up
setb R_CFG ;switch radio to data mode
acall delay
mov R2,#0h
clr TI
ret
delay:
djnz R7,$ ;Delay so radio module can accept setting
ret
;These are codes that get programmed into HM-TRP to get it up and running properly
startconfig:
mhz915:
db 0AAh,0FAh,0D2h,036h,089h,0CAh,0C0h,0FFh
mhz433:
db 0AAh,0FAh,0D2h,019h,0DEh,050h,080h,0FFh
bps9600:
db 0AAh,0FAh,0C3h,000h,000h,025h,080h,0FFh
ubps9600:
db 0AAh,0FAh,01Eh,000h,000h,025h,080h,0FFh
bw105: ;default receive bandwidth: 105khz
db 0AAh,0FAh,0B4h,000h,069h,0FFh
dev35: ;default deviation: 35khz
db 0AAh,0FAh,0A5h,023h,0FFh
xmitdbm: ;xmit power 20dbm max
db 0AAh,0FAh,096h,007h,0FFh
endofconfig:
db 0FFh,0FFh,0FFh,0FFh

Related

NASM ASSEMBLY - Print "Hello World"

I've created a string and turned it into an array. Looping through each index and moving to the al register so it can print out to the vga. The problem is, it prints the size of the string with no problem, but the characters in gibberish. Can you please help me figure out what the problem is in the code. It will be highly appreciated.
org 0
bits 16
section .text
global _start
_start:
mov si, msg
loop:
inc si
mov ah, 0x0e
mov al, [si]
or al, al
jz end
mov bh, 0x00
int 0x10
jmp loop
end:
jmp .done
.done:
jmp $
msg db 'Hello, world!',0xa
len equ $ - msg
TIMES 510 - ($ - $$) db 0
DW 0xAA55
bootloader code
ORG 0x7c00
BITS 16
boot:
mov ah, 0x02
mov al, 0x01
mov ch, 0x00
mov cl, 0x02
mov dh, 0x00
mov dl, 0x00
mov bx, 0x1000
mov es, bx
int 0x13
jmp 0x1000:0x00
times 510 - ($ - $$) db 0
dw 0xAA55
The bootloader
Before tackling the kernel code, let's look at the bootloader that brings the kernel in memory.
You have written a very minimalistic version of a bootloader, one that omits much of the usual stuff like setting up segment registers, but thanks to its reduced nature that's not really a problem.
What could be a problem is that you wrote mov dl, 0x00, hardcoding a zero to select the first floppy as your bootdisk. No problem if this is indeed the case, but it would be much better to just use whatever value the BIOS preloaded the DL register with. That's the ID for the disk that holds your bootloader and kernel.
What is a problem is that you load the kernel to the segmented address 0x1000:0x1000 and then later jump to the segmented address 0x1000:0x0000 which is 4096 bytes short of the kernel. You got lucky that the kernel code did run in the end, thanks to the memory between these two addresses most probably being filled with zero-bytes that (two by two) translate into the instruction add [bx+si], al. Because you omitted setting up the DS segment register, we don't know what unlucky byte got overwritten so many times. Let's hope it was not an important byte...
mov bx, 0x1000
mov es, bx
xor bx, bx <== You forgot to write this instruction!
int 0x13
jmp 0x1000:0x0000
What is a problem is that you ignore the possibility of encountering troubles when loading a sector from the disk. At the very least you should inspect the carry flag that the BIOS.ReadSector function 02h reports and if the flag is set you could abort cleanly. A more sophisticated approach would also retry a limited number of times, say 3 times.
ORG 0x7C00
BITS 16
; IN (dl)
mov dh, 0x00 ; DL is bootdrive
mov cx, 0x0002
mov bx, 0x1000
mov es, bx
xor bx, bx
mov ax, 0x0201 ; BIOS.ReadSector
int 0x13 ; -> AH CF
jc ERR
jmp 0x1000:0x0000
ERR:
cli
hlt
jmp ERR
times 510 - ($ - $$) db 0
dw 0xAA55
The kernel
After the jmp 0x1000:0x0000 instruction has brought you to the first instruction of your kernel, the CS code segment register holds the value 0x1000. None of the other segment registers did change, and since you did not setup any of them in the bootloader, we still don't know what any of them contain. However in order to retrieve the bytes from the message at msg with the mov al, [si] instruction, we need a correct value for the DS data segment register. In accordance with the ORG 0 directive, the correct value is the one we already have in CS. Just two 1-byte instructions are needed: push cs pop ds.
There's more to be said about the kernel code:
The printing loop uses a pre-increment on the pointer in the SI register. Because of this the first character of the string will not get displayed. You could compensate for this via mov si, msg - 1.
The printing loop processes a zero-terminating string. You don't need to prepare that len equate. What you do need is an explicit zero byte that terminates the string. You should not rely on that large number of zero bytes thattimes produced. In some future version of the code there might be no zero byte at all!
You (think you) have included a newline (0xa) in the string. For the BIOS.Teletype function 0Eh, this is merely a linefeed that moves down on the screen. To obtain a newline, you need to include both carriage return (13) and linefeed (10).
There's no reason for your kernel code to have the bootsector signature bytes at offset 510. Depending on how you get this code to the disk, it might be necessary to pad the code up to (a multiple of) 512, so keep times 512 - ($ - $$) db 0.
The kernel:
ORG 0
BITS 16
section .text
global _start
_start:
push cs
pop ds
mov si, msg
mov bx, 0x0007 ; DisplayPage=0, GraphicsColor=7 (White)
jmp BeginLoop
PrintLoop:
mov ah, 0x0E ; BIOS.Teletype
int 0x10
BeginLoop:
mov al, [si]
inc si
test al, al
jnz PrintLoop
cli
hlt
jmp $-2
msg db 'Hello, world!', 13, 10, 0
TIMES 512 - ($ - $$) db 0

Problems loading second stage of a bootloader and/or transferring control to it

my code for master boot record:
;bit16 ; 16bit by default
org 0x7c00
jmp short start
nop
bsOEM db "OS423 v.0.1" ; OEM String
start:
;;cls
mov ah,06h ;Function 06h (scroll screen)
mov al,0 ;Scroll all lines
mov bh,0x0f ;Attribute (lightgreen on blue)
mov ch,0 ;Upper left row is zero
mov cl,0 ;Upper left column is zero
mov dh,24 ;Lower left row is 24
mov dl,79 ;Lower left column is 79
int 10h ;BIOS Interrupt 10h (video services)
;;print welcome msg
mov ah,13h ;Function 13h (display string), XT machine only
mov al,1 ;Write mode is zero: cursor stay after last char
mov bh,0 ;Use video page of zero
mov bl,0x0f ;Attribute (lightgreen on blue)
mov cx,mlen ;Character string length
mov dh,0 ;Position on row 0
mov dl,0 ;And column 0
lea bp,[msg] ;Load the offset address of string into BP, es:bp
;Same as mov bp, msg
int 10h
;;load sector into memory & 5678h:1234h
mov bx, 0x5678 ;segmented address
mov es, bx ;move segemented address to es
mov bx,0x1234 ;base address to bx
mov ah, 02 ;function read sectors
mov al, 01 ;# of sectors to load
mov ch, 00 ;track to read
mov cl, 02 ;sector to read
mov dh, 00 ;head to read
mov dl, 00 ;drive number
int 0x13 ;call interrupt 13
jmp 0x5678:0x1234 ;jump to memory address
int 20
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;variables;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
msg: db 'Welcome to Pradox OS 0.1! Authored by Jiansong he', 10, 13, '$'
mlen equ $-msg
padding times 510-($-$$) db 0 ;to make MBR 512 bytes
bootSig db 0x55, 0xaa ;signature (optional)
my terminal command for nasm compiling and puting the binary files into my .img floppy disk:
line1: define a floppy dick named boot.img, block size 512 byte, in total of 2880 byte
line2: use nasm compiler, compile mbr.asm file into binary format, store it at file named mbr.bin(which is my master boot record)
line3: compile dt.asm into binary at dt.bin
line4: put mbr.bin's content into boot.img, block size 512, put total 1 block
line5: put dt.bin's content into boot.img, block size 512, at physical sector #2(logical sector #1)
dd if=/dev/zero of=boot.img bs=512 count=2880
nasm -f bin mbr.asm -o mbr.bin
nasm -f bin dt.asm -o dt.bin
dd if=mbr.bin of=boot.img bs=512 count=1 conv=notrunc
dd if=dt.bin of=boot.img bs=512 seek=1 count=1 conv=notrunc
code in dt.asm:
[BITS 16] ;Set code generation to 16 bit mode
ORG 0x5647:0x1234 ;set addressing to begin at 579b4H
startdt:
;call cls ;call routine to clear screen
;call dspmsg ;call routine to display message
call date
call cvtmo
call cvtday
call cvtcent
call cvtyear
call dspdate
call time
call cvthrs
call cvtmin
call cvtsec
call dsptime
int 20h ;halt operation (VERY IMPORTANT!!!)
cls:
mov ah,06h ;function 06h (Scroll Screen)
mov al,0 ;scroll all lines
mov bh,0x0f ;Attribute (bright white on blue)
mov ch,0 ;Upper left row is zero
mov cl,0 ;Upper left column is zero
mov dh,24 ;Lower left row is 24
mov dl,79 ;Lower left column is 79
int 10H ;BIOS Interrupt 10h (video services)
ret
dspmsg:
mov ah,13h ;function 13h (Display String)
mov al,1 ;Write mode is zero
mov bh,0 ;Use video page of zero
mov bl,0x0a ;Attribute (bright white on bright blue)
mov cx,mlen2 ;Character length
mov dh,0 ;position on row 0
mov dl,0 ;and column 0
lea bp,[welcom] ;load the offset address of string into BP
int 10H
ret
welcom: db 'jiansong Hes first Operating System :D',10,13,'$'
mlen2 equ $-welcom;
date:
;Get date from the system
mov ah,04h ;function 04h (get RTC date)
int 1Ah ;BIOS Interrupt 1Ah (Read Real Time Clock)
ret
;CH - Century
;CL - Year
;DH - Month
;DL - Day
cvtmo:
;Converts the system date from BCD to ASCII
mov bh,dh ;copy contents of month (dh) to bh
shr bh,1
shr bh,1
shr bh,1
shr bh,1
add bh,30h ;add 30h to convert to ascii
mov [dtfld],bh
mov bh,dh
and bh,0fh
add bh,30h
mov [dtfld + 1],bh
ret
cvtday:
mov bh,dl ;copy contents of day (dl) to bh
shr bh,1
shr bh,1
shr bh,1
shr bh,1
add bh,30h ;add 30h to convert to ascii
mov [dtfld + 3],bh
mov bh,dl
and bh,0fh
add bh,30h
mov [dtfld + 4],bh
ret
cvtcent:
mov bh,ch ;copy contents of century (ch) to bh
shr bh,1
shr bh,1
shr bh,1
shr bh,1
add bh,30h ;add 30h to convert to ascii
mov [dtfld + 6],bh
mov bh,ch
and bh,0fh
add bh,30h
mov [dtfld + 7],bh
ret
cvtyear:
mov bh,cl ;copy contents of year (cl) to bh
shr bh,1
shr bh,1
shr bh,1
shr bh,1
add bh,30h ;add 30h to convert to ascii
mov [dtfld + 8],bh
mov bh,cl
and bh,0fh
add bh,30h
mov [dtfld + 9],bh
ret
dtfld: db '00/00/0000'
dspdate:
;Display the system date
mov ah,13h ;function 13h (Display String)
mov al,0 ;Write mode is zero
mov bh,0 ;Use video page of zero
mov bl,0x0f ;Attribute
mov cx,10 ;Character string is 10 long
mov dh,4 ;position on row 4
mov dl,0 ;and column 28
push ds ;put ds register on stack
pop es ;pop it into es register
lea bp,[dtfld] ;load the offset address of string into BP
int 10H
ret
time:
;Get time from the system
mov ah,02h
int 1Ah
ret
;CH - Hours
;CL - Minutes
;DH - Seconds
cvthrs:
;Converts the system time from BCD to ASCII
mov bh,ch ;copy contents of hours (ch) to bh
shr bh,1
shr bh,1
shr bh,1
shr bh,1
add bh,30h ;add 30h to convert to ascii
mov [tmfld],bh
mov bh,ch
and bh,0fh
add bh,30h
mov [tmfld + 1],bh
ret
cvtmin:
mov bh,cl ;copy contents of minutes (cl) to bh
shr bh,1
shr bh,1
shr bh,1
shr bh,1
add bh,30h ;add 30h to convert to ascii
mov [tmfld + 3],bh
mov bh,cl
and bh,0fh
add bh,30h
mov [tmfld + 4],bh
ret
cvtsec:
mov bh,dh ;copy contents of seconds (dh) to bh
shr bh,1
shr bh,1
shr bh,1
shr bh,1
add bh,30h ;add 30h to convert to ascii
mov [tmfld + 6],bh
mov bh,dh
and bh,0fh
add bh,30h
mov [tmfld + 7],bh
ret
tmfld: db '00:00:00'
dsptime:
;Display the system time
mov ah,13h ;function 13h (Display String)
mov al,0 ;Write mode is zero
mov bh,0 ;Use video page of zero
mov bl,0x0f;Attribute
mov cx,8 ;Character string is 8 long
mov dh,5 ;position on row 5
mov dl,0;and column 0
push ds ;put ds register on stack
pop es ;pop it into es register
lea bp,[tmfld] ;load the offset address of string into BP
int 10H
ret
int 20H
my testing environment is dosbox, I can successfully display the welcome message on screen but failed to load another sector into memory starting at 0x5647:0x1234
thanks
Simple enough.
You can't just start the bootsector with the OEM string!
You have to jump over this text to the real start point.
Either use:
a 3-byte near jump
a 2-byte short jump followed by a nop instruction.
mov ch, 01 ;track to read
mov cl, 02 ;sector to read
mov dh, 01 ;head to read
If you want to read the 2nd sector of the drive, you will want to specify Cylinder=0, Head=0, and Sector=2
mov ch, 0 ;track to read
mov cl, 2 ;sector to read
mov dh, 0 ;head to read
Your boot message will perhaps no get displayed because you didn't setup the ES segment register. Given the org 0x7C00 you need to set ES=0. You mustn't trust BIOS to do this for you.
You might want to remove the int 20 instruction from the program. It can't help you.
In stead of using mov dl, 0 to specify the disk to load from, you would better use the contents of the DL register that you got when your bootsector was first called by BIOS. An emulator like DOSBOX might use some specific value here!
Since there are a number of issues not covered in the other answer, I'll provide a new answer.
I recommend looking at my General Bootloader Tips in this other Stackoverflow answer. In particular the first few tips apply here:
When the BIOS jumps to your code you can't rely on CS,DS,ES,SS,SP registers having valid or expected values. They should be set up appropriately when your bootloader starts. You can only be guaranteed that your bootloader will be loaded and run from physical address 0x00007c00 and that the boot drive number is loaded into the DL register.
The direction flag used by lodsb, movsb etc could be either set or cleared. If the direction flag is set improperly SI/DI registers may be adjusted in the wrong direction. Use STD/CLD to set it to the direction you wish (CLD=forward/STD=backwards). In this case the code assumes forward movement so one should use CLD. More on this can be found in an instruction set reference
When jumping to a kernel it is generally a good idea to FAR JMP to it so that it properly sets CS:IP to expected values. This can avoid problems with kernel code that may do absolute near JMPs and CALLs within the same segment.
In your code your bootloader should set up a stack pointer in SS:SP. Placing it just below the bootloader is reasonable at 0x0000:0x7c00. You should save DL register before destroying it because it contains the boot drive number. You could push it on the stack at the start and restore it when you set up the disk related routines accessed int 13h. You shouldn't assume that ES or DS (DS in particular) are set to the value of 0. Since you use an ORG of 0x7c00 the segments need 0x0000. (0x0000<<4)+0x7c00 = physical address 0x07c00.
To resolve these issues you could add these lines after the start label:
start:
mov [bootdrv],dl;Save the boot drive passed in via DL to the bootloader
xor ax,ax ;Set ES and DS to zero since we use ORG 0x7c00
mov es,ax
mov ds,ax
mov ss,ax ;Set SS:SP to 0x0000:0x7c00 below bootloader
mov sp,0x7c00
cld ;Set direction flag forward
You'll need to add a bootdrv variable after you have msg
bootdrv: db 0
Before you use the int 13h disk read feature you can now use the value in bootdrv and place it into DL before issuing the interrupt call. This line should be replaced:
mov dl, 00 ;drive number
With:
mov dl,[bootdrv];Get the boot drive saved at start of bootloader
You have int 20 in your bootloader after the jmp 0x5678:0x1234. I believe you meant int 20h. The JMP will never return so placing code after it will do nothing. However, int 20h is a DOS interrupt that is only available after MS-DOS system is loaded from disk into memory. You have no MS-DOS on your disk of course. In a bootloader (bare metal) environment just don't use DOS interrupts at all.
In dt.asm you have some issues. You FAR JMP to the newly loaded code at 0x5678:0x1234. In doing so CS was set to 0x5678. You'll need to manually set ES and DS, You can do this by copying the value in CS to DS and ES. You also need to set the appropriate ORG. In this case the origin point is 0x1234 from the beginning of the segment (0x5678) so you must use org 0x1234 . The top of dt.asm could be modified to look like:
BITS 16 ;Set code generation to 16 bit mode
ORG 0x1234 ;set origin point to 0x1234
mov ax, cs ;copy CS to DS and ES
;alternatively could have used mov ax, 0x5678
mov ds, ax
mov es, ax
startdt:
The issue with int 20h discussed previously is an issue in dt.asm. Remove all occurrences of it. Instead you can put the processor in an infinite loop. The last of your code in dt.asm that is executed is after call dsptime returns. After that CALL you can put an infinite loop with something like:
jmp $
A more preferable infinite loop that can take less processing power is to turn off the interrupts using CLI, use a HLT instruction and then for safe measure if HLT returns JMP back and do HLT again (Could happen if there is an NMI Non-Maskable Interrupt). HLT waits until the next interrupt occurs. This is what you'll often see instead:
cli
endloop:
hlt
jmp endloop
Other Observations
It appears in the first version of the code you posted the parameters to INT 13h/AH=2 disk read function were incorrect. Sector numbers start at 1 and heads and cylinders are zero based. The best source of interrupt information is Ralph Brown's Interrupt List which covers both BIOS and MS-DOS interrupts. If you need information on the parameters of the interrupts it is an excellent reference.
I'd recommend BOCHS for debugging bootloaders. It has a command line debugger that understands real mode addressing and can be used to watch instructions as they are executed,set breakpoints, display registers, examine memory etc.

How to use ORG addresses > 0xFFFF?

I am trying to write a simply bootloader in assembler.
The bootloader copies sector 2 from a floppy to address 0x5000 (segment 0x500, offset 0x0), jumps to the segment and prints a message.
However, when I change the segment address to 0x1000, the message does not get printed anymore. I suspect the org 0x10000 instruction has a problem, which might be related to segmentation. I tried org 0x1000:0 too, but the message won't be printed.
Here is my bootloader code, which gets written to the first sector of the floppy:
[BITS 16]
org 0x7C00
start:
mov ah, 0x02 ; Read sectors from drive
mov al, 1 ; Read 1 sector
mov ch, 0 ; Cylinder 0
mov cl, 2 ; Sector 2
mov dh, 0 ; Head 0
mov bx, sect2dest;
mov es, bx
mov bx, 0x0
int 0x13
jmp sect2dest:0;
data:
sect2dest equ 0x500
The magic identifier in the end is written by a custom linking script, so don't worry about that.
Here is my sector two, which should print a message:
[BITS 16]
org 0x5000
sect2:
mov ah, 0x13
mov al, 1
mov bl, 0x17
mov cx, msg_len
mov dh, 0
mov dl, 0
mov bh, 0
mov bp, 0
mov es, bp
mov bp, msg
int 0x10
jmp $
msg db 13,10,"Hello, World!"
msg_len equ $ - msg
As mentioned above, when I try writing sector 2 to any address larger than 0xFFFF, the message doesn't get printed.
Consider that bp is 16 bit, so if you use an ORG of 10000h any offset won't fit in it.
I was expecting the assembler to raise a warning but a quick test shown otherwise.
Remember also that generally is best to avoid challenging the BIOS, thought I don't know how it is actually handled, I would avoid to print a string that strides two segments.
Since you are putting zero in es, make sure that the ORG is at most 10000h-[msg_len], so that the whole string is reachable within es.

Print register value to console

I want to print the value in %RCX directly to the console, let's say an ASCII value. I've searched through some wise books and tutorials, but all use buffers to pass anything. Is it possible to print anything without creating special buffer for that purpose?
lets say i am here (all this answers are fat too complicated to me and use different syntax):
movq $5, %rax
...???(print %rax)
Output on console:
\>5
in example, to print buffer i use code:
SYSWRITE = 4
STDOUT = 1
EXIT_SUCCESS = 0
.text
buff: .ascii "Anything to print\n"
buff_len = . - buff
movq $SYSWRITE, %eax
mov $STDOUT, %ebx
mov $buff, %ecx
mov $buff_len, %edx
NO C CODE OR DIFFERENT ASS SYNTAX ALLOWED!!!
In order to print a register (in hex representation or numeric) the routine (write to stdout, stderr, etc.) expects ASCII characters. Just writing a register will cause the routine to try an display the ascii equivalent of the value in the register. You may get lucky sometimes if each of the bytes in the register happen to fall into the printable character range.
You will need to convert it vis-a-vis routines that convert to decimal or hex. Here is an example of converting a 64 bit register to the hex representation (using intel syntax w/nasm):
section .rodata
hex_xlat: db "0123456789abcdef"
section .text
; Called with RDI is the register to convert and
; RSI for the buffer to fill
;
register_to_hex:
push rsi ; Save for return
xor eax,eax
mov ecx, 16 ; looper
lea rdx, [rel hex_xlat] ; position-independent code can't index a static array directly
ALIGN 16
.loop:
rol rdi, 4 ; dil now has high bit nibble
mov al, dil ; capture low nibble
and al, 0x0f
mov al, byte [rdx+rax] ; look up the ASCII encoding for the hex digit
; rax is an 'index' with range 0x0 - 0xf.
; The upper bytes of rax are still zero from xor
mov byte [rsi], al ; store in print buffer
inc rsi ; position next pointer
dec ecx
jnz .loop
.exit:
pop rax ; Get original buffer pointer
ret
This answer is an addendum to the answer given by Frank, and utilizes the mechanism used there to do the conversion.
You mention the register %RCX in your question. This suggests you are looking at 64-bit code and that your environment is likely GCC/GAS (GNU Assembler) based since % is usually the AT&T style prefix for registers.
With that in mind I've created a quick and dirty macro that can be used inline anywhere you need to print a 64-bit register, 64-bit memory operand, or a 32-bit immediate value in GNU Assembly. This version was a proof of concept and could be amended to support 64 bit immediate values. All the registers that are used are preserved, and the code will also account for the Linux 64-bit System V ABI red zone.
The code below is commented to point out what is occurring at each step.
printmac.inc:
.macro memreg_to_hex src # Macro takes one input
# src = memory operand, register,
# or 32 bit constant to print
# Define the translation table only once for the current object
.ifndef MEMREG_TO_HEX_NOT_FIRST
.set MEMREG_TO_HEX_NOT_FIRST, 1
.PushSection .rodata
hex_xlat: .ascii "0123456789abcdef"
.PopSection
.endif
add $-128,%rsp # Avoid 128 byte red zone
push %rsi # Save all registers that will be used
push %rdi
push %rdx
push %rcx
push %rbx
push %rax
push %r11 # R11 is destroyed by SYSCALL
mov \src, %rdi # Move src value to RDI for processing
# Output buffer on stack at ESP-16 to ESP-1
lea -16(%rsp),%rsi # RSI = output buffer on stack
lea hex_xlat(%rip), %rdx # RDX = translation buffer address
xor %eax,%eax # RAX = Index into translation array
mov $16,%ecx # 16 nibbles to print
.align 16
1:
rol $4,%rdi # rotate high nibble to low nibble
mov %dil,%al # dil now has previous high nibble
and $0xf,%al # mask off all but low nibble
mov (%rdx,%rax,1),%al # Lookup in translation table
mov %al,(%rsi) # Store in output buffer
inc %rsi # Update output buffer address
dec %ecx
jne 1b # Loop until counter is 0
mov $1,%eax # Syscall 1 = sys_write
mov %eax,%edi # EDI = 1 = STDIN
mov $16,%edx # EDX = Number of chars to print
sub %rdx,%rsi # RSI = beginning of output buffer
syscall
pop %r11 # Restore all registers used
pop %rax
pop %rbx
pop %rcx
pop %rdx
pop %rdi
pop %rsi
sub $-128,%rsp # Restore stack
.endm
printtest.s
.include "printmac.inc"
.global main
.text
main:
mov $0x123456789abcdef,%rcx
memreg_to_hex %rcx # Print the 64-bit value 0x123456789abcdef
memreg_to_hex %rsp # Print address containing ret pointer
memreg_to_hex (%rsp) # Print return pointer
memreg_to_hex $0x402 # Doesn't support 64-bit immediates
# but can print anything that fits a DWORD
retq
This can be compiled and linked with:
gcc -m64 printtest.s -o printtest
The macro doesn't print an end of line character so the output of the test program looks like:
0123456789abcdef00007fff5283d74000007f5c4a080a500000000000000402
The memory addresses will be be different.
Since the macros are inlined, each time you invoke the macro the entire code will be emitted. The code is space inefficient. The bulk of the code could be moved to an object file you can include at link time. Then a stub macro could wrap a CALL to the main printing function.
The code doesn't use printf because at some point I thought I saw a comment that you couldn't use the C library. If that's not the case this can be simplified greatly by calling printf to format the output to print a 64-bit hexadecimal value.
Just for fun, here are a couple other sequences for storing a hex string from a register. Printing the buffer is not the interesting part, IMO; copy that part from Michael's excellent answer if needed.
I tested some of these. I've included a main that calls one of these functions and then uses printf("%s\n%lx\n", result, test_value); to make it easy to spot problems.
Test main():
extern printf
global main
main:
push rbx
mov rdi, 0x1230ff56dcba9911
mov rbx, rdi
sub rsp, 32
mov rsi, rsp
mov byte [rsi+16], 0
call register_to_hex_ssse3
mov rdx, rbx
mov edi, fmt
mov rsi, rsp
xor eax,eax
call printf
add rsp, 32
pop rbx
ret
section .rodata
fmt: db `%s\n%lx\n`, 0 ; YASM doesn't support `string with escapes`, so this only assembles with NASM.
; NASM needs
; %use smartalign
; ALIGNMODE p6, 32
; or similar, to stop it using braindead repeated single-byte NOPs for ALIGN
SSSE3 pshufb for the LUT
This version doesn't need a loop, but the code size is much larger than the rotate-loop versions because SSE instructions are longer.
section .rodata
ALIGN 16
hex_digits:
hex_xlat: db "0123456789abcdef"
section .text
;; rdi = val rsi = buffer
ALIGN 16
global register_to_hex_ssse3
register_to_hex_ssse3: ;;;; 0x39 bytes of code
;; use PSHUFB to do 16 nibble->ASCII LUT lookups in parallel
movaps xmm5, [rel hex_digits]
;; x86 is little-endian, but we want the hex digit for the high nibble to be the first character in the string
;; so reverse the bytes, and later unpack nibbles like [ LO HI ... LO HI ]
bswap rdi
movq xmm1, rdi
;; generate a constant on the fly, rather than loading
;; this is a bit silly: we already load the LUT, might as well load another 16B from the same cache line, a memory operand for PAND since we manage to only use it once
pcmpeqw xmm4,xmm4
psrlw xmm4, 12
packuswb xmm4,xmm4 ; [ 0x0f 0x0f 0x0f ... ] mask for low-nibble of each byte
movdqa xmm0, xmm1 ; xmm0 = low nibbles at the bottom of each byte
psrlw xmm1, 4 ; xmm1 = high nibbles at the bottom of each byte (with garbage from next byte)
punpcklbw xmm1, xmm0 ; unpacked nibbles (with garbage in the high 4b of some bytes)
pand xmm1, xmm4 ; mask off the garbage bits because pshufb reacts to the MSB of each element. Delaying until after interleaving the hi and lo nibbles means we only need one
pshufb xmm5, xmm1 ; xmm5 = the hex digit for the corresponding nibble in xmm0
movups [rsi], xmm5
ret
AVX2: you can do two integers at once, with something like
int64x2_to_hex_avx2: ; (const char buf[32], uint64_t first, uint64_t second)
bswap rsi ; We could replace the two bswaps with one 256b vpshufb, but that would require a mask
vmovq xmm1, rsi
bswap rdx
vpinsrq xmm1, xmm1, rdx, 1
vpmovzxbw ymm1, xmm1 ; upper lane = rdx, lower lane = rsi, with each byte zero-extended to a word element
vpsllw ymm1, ymm1, 12 ; shift the high nibbles out, leaving the low nibbles at the top of each word
vpor ymm0, ymm0, ymm1 ; merge while hi and lo elements both need the same shift
vpsrlw ymm1, ymm1, 4 ; low nibbles in elems 1, 3, 5, ...
; high nibbles in elems 0, 2, 4, ...
pshufb / store ymm0 / ret
Using pmovzx and shifts to avoid pand is a win compared to generating the constant on the fly, I think, but probably not otherwise. It takes 2 extra shifts and a por. It's an option for the 16B non-AVX version, but it's SSE4.1.
Optimized for code-size (fits in 32 (0x20) bytes)
(Derived from Frank's loop)
Using cmov instead of the LUT to handle 0-9 vs. a-f might take fewer than 16B of extra code size. That might be fun: edits welcome.
The ways to get a nibble from the bottom of rsi into an otherwise-zeroed rax include:
mov al, sil (3B (REX required for sil)) / and al, 0x0f (2B special encoding for and al, imm8).
mov eax, esi (2B) / and eax, 0x0f (3B): same size and doesn't require an xor beforehand to zero the upper bytes of rax.
Would be smaller if the args were reversed, so the dest buffer was already in rdi. stosb is a tiny instruction (but slower than mov [rdi], al / inc rdi), so it actually saved overall bytes to use xchg rdi, rsi to set up for it. changing the function signature could save 5 bytes: void reg_to_hex(char buf[16], uint64_t val) would save two bytes from not having to return buf in rax, and 3 bytes from dropping the xchg. The caller will probably use 16B of stack, and having the caller do a mov rdx, rsp instead of mov rdx, rax before calling another function / syscall on the buffer doesn't save anything.
The next function is probably going to ALIGN 16, though, so shrinking the function to even smaller than 32B isn't as useful as getting it inside half a cache-line.
Absolute addressing for the LUT (hex_xlat) would save a few bytes
(use mov al, byte [hex_xlat + rax] instead of needing the lea).
global register_to_hex_size
register_to_hex_size:
push rsi ; pushing/popping return value (instead of mov rax, rsi) frees up rax for stosb
xchg rdi, rsi ; allows stosb. Better: remove this and change the function signature
mov cl, 16 ; 3B shorter than mov ecx, 16
lea rdx, [rel hex_xlat]
;ALIGN 16
.loop:
rol rsi, 4
mov eax, esi ; mov al, sil to allow 2B AND AL,0xf requires a 2B xor eax,eax
and eax, 0x0f
mov al, byte [rdx+rax]
stosb
;; loop .loop ; setting up ecx instead of cl takes more bytes than loop saves
dec cl
jne .loop
pop rax ; get the return value back off the stack
ret
Using xlat costs 2B (to save/restore rbx), but saves 3B, for a net savings of 1B. It's a 3-uop instruction, with 7c latency, one per 2c throughput (Intel Skylake). The latency and throughput aren't a problem here, since each iteration is a separate dependency chain, and there's too much overhead for this to run at one clock per iteration anyway. So the main problem is that it's 3 uops, making it less uop-cache-friendly. With xlat, the loop becomes 10 uops instead of 8 (using stosb), so that sucks.
112: 89 f0 mov eax,esi
114: 24 0f and al,0xf
116: d7 xlat BYTE PTR ds:[rbx]
117: aa stos BYTE PTR es:[rdi],al
vs.
f1: 89 f0 mov eax,esi
f3: 83 e0 0f and eax,0xf
f6: 8a 04 02 mov al,BYTE PTR [rdx+rax*1]
f9: aa stos BYTE PTR es:[rdi],al
Interestingly, this still has no partial-register stalls, because we never read a wide register after writing only part of it. mov eax, esi is write-only, so it cleans up the partial-reg-ness from the load into al. So there would be no advantage to using movzx eax, byte [rdx+rax]. Even when we return to the caller, the pop rax doesn't leave the caller succeptible to partial-reg problems.
(If we don't bother returning the input pointer in rax, then the caller could have a problem. Except in that case it shouldn't be reading rax at all. Usually it only matters if you call with call-preserved registers in a partial-reg state, because the called function might push them. Or more obviously, with arg-passing / return-value registers.
Efficient version (uop-cache friendly)
Looping backwards didn't turn out to save any instructions or bytes, but I've included this version because it's more different from the version in Frank's answer.
ALIGN 16
global register_to_hex_countdown
register_to_hex_countdown:
;;; work backwards in the buffer, starting with the least-significant nibble as the last char
mov rax, rsi ; return value, and loop bound
add rsi, 15 ; last char of the buffer
lea rcx, [rel hex_xlat] ; position-independent code
ALIGN 16
.loop:
mov edx, edi
and edx, 0x0f ; isolate low nibble
mov dl, byte [rcx+rdx] ; look up the ascii encoding for the hex digit
; rdx is an 'index' with range 0x0 - 0xf
; non-PIC version: mov dl, [hex_digits + rdx]
mov byte [rsi], dl
shr rdi, 4
dec rsi
cmp rsi, rax
jae .loop ; rsi counts backwards down to its initial value
ret
The whole thing is only 12 insns (11 uops with macro-fusion, or 12 including the NOP for alignment). Some CPUs can fuse cmp/jcc but not dec/jcc (e.g. AMD, and Nehalem)
Another option for looping backwards was mov ecx, 15, and store with mov [rsi+rcx], dl, but two-register addressing modes can't micro-fuse. Still, that would only bring the loop up to 8 uops, so it would be fine.
Instead of always storing 16 digits, this version could use rdi becoming zero as the loop condition to avoid printing leading zeros. i.e.
add rsi, 16
...
.loop:
...
dec rsi
mov byte [rsi], dl
shr rdi, 4
jnz .loop
; lea rax, [rsi+1] ; correction not needed because of adjustments to how rsi is managed
mov rax, rsi
ret
printing from rax to the end of the buffer gives just the significant digits of the integer.

Boot time program running on virtual computer without OS

For school assignment I have to write a program described below and I would really like some help on how to approach this problem. To be clear, I don't want you to solve this, I just want some guidance on how to do it.
Problem:
Write a boot time program, which will be run in a virtual computer without an operating system. The program has to print out your name and the words "ALT key is pressed" or "ALT key is not pressed" according to status of the ALT key.
Additional hints:
- the program has to be written in 16 bit mode
compiled program including its data must be less than 510 bytes in size
directive "org 0x7c00" specifies the correct address in the memory where the program is loaded
write instructions before the data
program should execute in an endless loop
there is no printf function, you will have to use interrupt 0x10
to read the state of the alt keys you can use the interrupt 0x16
to position the output of text use interrupt 0x10
binary format of the executable should be "bin" (nasm -f bin -o boot.bin code.asm)
resize the binary file to the size of a floppy disk (truncate -s 1474560 boot.bin )
mark the binary file as bootable disk: at location 0x1FE save the value 0x55 and at
location 0x1FF save value 0xAA (use hexadecimal editor, for example: ghex2)
start the virtual machine with your binary file as a floppy disk: (nice -n 19 qemu -fda boot.bin)
I suggest you read this on assembly bootloaders. Taken from that article, here is hello world -
org 7C00h
jmp short Start ;Jump over the data (the 'short' keyword makes the jmp instruction smaller)
Msg: db "Hello World! "
EndMsg:
Start: mov bx, 000Fh ;Page 0, colour attribute 15 (white) for the int 10 calls below
mov cx, 1 ;We will want to write 1 character
xor dx, dx ;Start at top left corner
mov ds, dx ;Ensure ds = 0 (to let us load the message)
cld ;Ensure direction flag is cleared (for LODSB)
Print: mov si, Msg ;Loads the address of the first byte of the message, 7C02h in this case
;PC BIOS Interrupt 10 Subfunction 2 - Set cursor position
;AH = 2
Char: mov ah, 2 ;BH = page, DH = row, DL = column
int 10h
lodsb ;Load a byte of the message into AL.
;Remember that DS is 0 and SI holds the
;offset of one of the bytes of the message.
;PC BIOS Interrupt 10 Subfunction 9 - Write character and colour
;AH = 9
mov ah, 9 ;BH = page, AL = character, BL = attribute, CX = character count
int 10h
inc dl ;Advance cursor
cmp dl, 80 ;Wrap around edge of screen if necessary
jne Skip
xor dl, dl
inc dh
cmp dh, 25 ;Wrap around bottom of screen if necessary
jne Skip
xor dh, dh
Skip: cmp si, EndMsg ;If we're not at end of message,
jne Char ;continue loading characters
jmp Print ;otherwise restart from the beginning of the message
times 0200h - 2 - ($ - $$) db 0 ;Zerofill up to 510 bytes
dw 0AA55h ;Boot Sector signature
;OPTIONAL:
;To zerofill up to the size of a standard 1.44MB, 3.5" floppy disk
;times 1474560 - ($ - $$) db 0

Resources