I am developing a kernel in C and created something to print on screen on video memory. I expected that the first byte in video memory would be the character to print and the second byte tells the color. But my program has something different but it works!! It is very unexpected and unusual.
My kernel code -
#define VIDEO_MEM 0xb8000
void write_string( int colour, const unsigned char *string );
void main()
{
unsigned char *vid = (unsigned char*) VIDEO_MEM;
int i=0;
for (i = 0; i < 2000; i++)
{
*vid = ' ';
*(vid+2) = 0x1f;
vid += 2;
}
write_string(0x1f,"The Kernel has been loaded successfully!!");
}
void write_string( int colour, const unsigned char *string ) {
unsigned char *vid = (unsigned char*) VIDEO_MEM;
while(*string != 0)
{
*(vid) = *string;
*(vid+2) = colour;
++string;
vid+=2;
}
}
It prints the character on *vid and the color on *(vid+2) and then increments the vid by 2. It should then replace and print the next char on *(vid+2). So, the color should go but it still works.
Also, the color should be on *(vid+1)
When I use *(vid+1) instead of *(vid+2) to print the string, the screen shows down arrow characters (with ACII code 0x1f which I wanted to be the color) replacing the entire string.
Why does the code behave so unusual??
Can anyone help?
EDIT
I have edited my code and now it prints string. But another problem arose. I added a support for printing on particular line number. But now this shifts the string backwards by one character.
void write_string( int colour, const unsigned char *string, int pos ) {
unsigned char *vid = (unsigned char*) VIDEO_MEM;
vid+=pos*160;
while(*string != 0)
{
*vid = colour;
*(vid+1) = *string;
++string;
vid+=2;
}
}
So, If I tell it to print on line 10, it prints the first character on the last character of the 9th line and then continues.
I also have a character printing function that justs prints curly braces (}) instead of the given character and that too one character backwards of the given position (like the error in the write_string function). Also it doen't change the character background color given as argument.
void putChar(char character, short col, short row, char attr) {
unsigned char* vid_mem = (unsigned char *) VIDEO_MEM;
int offset = (row*80 + col)*2;
vid_mem += offset;
if(!attr) {
attr = 0x0f;
}
*vid_mem = (attr<<8)+character;
}
EDIT 2
My Boot Loader:
[org 0x7c00]
KERNEL equ 0x1000
mov [BOOT_DRIVE],dl
mov bp,0x9000
mov sp,bp
mov bx, msgReal
call print_string
call load_kernel
call switch_to_pm
jmp $
%include 'boot/bios.ASM'
%include 'boot/gdt.ASM'
%include 'boot/protected_mode.ASM'
%include 'boot/print32.ASM'
[bits 16]
load_kernel:
mov bx,msgKernel
call print_string
mov bx, KERNEL
mov dh, 15
mov dl, [BOOT_DRIVE]
call disk_load
ret
[bits 32]
BEGIN_PM:
mov ebx, msgProt
call print_string32
call KERNEL
jmp $
BOOT_DRIVE db 0
msgReal db "Booted in 16-bit mode",0
msgProt db "Successfully switched to 32-bit mode",0
msgKernel db "Loading the kernel onto memory",0
times 510-($-$$) db 0
dw 0xaa55
bios.ASM -
;BIOS Functions
[bits 16]
print_string:
pusha
mov cx,bx
mov ah,0x0e
printStringStart:
mov al,[bx]
cmp al,0
je done
int 0x10
inc bx
jmp printStringStart
done:
popa
ret
print_word:
pusha
mov ax,0x0000
mov cl,0x10
mov al,bh
div cl
call printDig
mov al,bh
and al,0x0f
call printDig
mov ax,0x0000
mov al,bl
div cl
call printDig
mov al,bl
and al,0x0f
call printDig
popa
ret
printDig:
cmp al,0x9
jg alpha
add al,'0'
mov ah,0x0e
int 0x10
jmp pDigDone
alpha:
sub al,0xa
add al,'A'
mov ah,0x0e
int 0x10
pDigDone:
ret
hex_prefix: db '0x',0
disk_load:
push dx
mov ah,0x02
mov al,dh
mov ch,0x00
mov dh,0x00
mov cl,0x02
int 0x13
jc disk_error
pop dx
cmp dh,al
jne disk_error
ret
disk_error:
mov ah,0x0e
mov al,'X'
int 0x10
mov bx,errMsg
call print_string
jmp $
errMsg:
db "Disk Read Error....."
times 80-20 db " "
db 0
gdt.ASM -
gdt_start:
gdt_null:
dd 0x0
dd 0x0
gdt_code:
dw 0xffff
dw 0x0
db 0x0
db 10011010b
db 11001111b
db 0x0
gdt_data:
dw 0xffff
dw 0x0
db 0x0
db 10010010b
db 11001111b
db 0x0
gdt_end:
gdt_descriptor:
dw gdt_end - gdt_start - 1
dd gdt_start
CODE_SEG equ gdt_code - gdt_start
DATA_SEG equ gdt_data - gdt_start
protected_mode.ASM -
[bits 16]
switch_to_pm:
cli
lgdt [gdt_descriptor]
mov eax, cr0
or eax, 0x1
mov cr0, eax
jmp CODE_SEG:init_pm
[bits 32]
init_pm:
mov ax, DATA_SEG
mov ds, ax
mov ss, ax
mov es, ax
mov fs, ax
mov gs, ax
mov ebp,0x90000
mov esp,0x90000
call BEGIN_PM
print32.ASM -
[bits 32]
VIDEO_MEM equ 0xb8000
DEF_COLOR equ 0x0f
print_string32:
pusha
mov edx,VIDEO_MEM
print_string32_loop:
mov al, [ebx]
mov ah, DEF_COLOR
cmp al,0
je print_string32_end
mov [edx],ax
inc ebx
add edx,2
jmp print_string32_loop
print_string32_end:
popa
ret
I also add a kernel_start.asm file just before the kernel while linking to call the main function -
[bits 32]
[extern main]
call main
jmp $
And here's my make file -
C_SOURCES = $(wildcard drivers/*.c kernel/*.c)
HEADERS = $(wildcard kernel/*.h drivers/*.h)
OBJ = ${C_SOURCES:.c=.o}
all: os-image
os-image: boot/boot_sector.bin kernel.bin
cat $^ > $#
kernel.bin: kernel/kernel_start.o ${OBJ}
ld -o $# -Ttext 0x1000 $^ --oformat binary
%.o : %.c
gcc -std=c99 -Wall -pedantic -ffreestanding -c $< -o $#
%.o : %.asm
nasm $< -f elf64 -o $#
%.bin : %.asm
nasm $< -f bin -o $#
clean:
rm -fr kernel/*.o
rm -fr drivers/*.o
rm -fr boot/*.bin
rm -fr os-image *.bin *.o
With the changes suggested in other answer and comments, your problem doesn't seem to be reproducible for me. The following code works for me. I've tried to maintain how you coded it just so it makes sense to you:
#define VIDEO_MEM 0xb8000
void write_string( unsigned char colour, const char *string );
void write_string_line( unsigned char colour, const char *string, int pos );
void putChar(char character, short col, short row, unsigned char attr);
/* Place this at top of file as first code in kernel.o */
__asm__ ("call main\r\n" \
"cli\r\n" \
"hlt\r\n"
);
void main()
{
volatile unsigned char *vid = (unsigned char*) VIDEO_MEM;
int i=0;
for (i = 0; i < 2000; i++)
{
*vid = ' ';
*(vid+1) = 0x1f;
vid += 2;
}
write_string(0x1f,"The Kernel has been loaded successfully!!");
write_string_line(0x1f,"Testing Here!!",1);
putChar('Z',3,3,0xf3);
}
void write_string( unsigned char colour, const char *string ) {
volatile unsigned char *vid = (unsigned char*) VIDEO_MEM;
while(*string != 0)
{
*(vid) = *string;
*(vid+1) = colour;
++string;
vid+=2;
}
}
void write_string_line( unsigned char colour, const char *string, int pos ) {
volatile unsigned char *vid = (unsigned char*) VIDEO_MEM;
vid+=pos*160;
while(*string != 0)
{
*vid = *string;
*(vid+1) = colour;
++string;
vid+=2;
}
}
void putChar(char character, short col, short row, unsigned char attr) {
volatile unsigned char* vid_mem = (unsigned char *) VIDEO_MEM;
int offset = (row*80 + col)*2;
vid_mem += offset;
if(!attr) {
attr = 0x0f;
}
*(unsigned short int *)vid_mem = (attr<<8)+character;
/* This would do the same as line above
*vid_mem = character;
*(vid_mem+1) = attr;
*/
}
I've added the __asm__ at the beginning to make sure that code is the first to appear in the generated object file. It likely works without it. I've modified all your *vid pointers to be volatile . Since video is memory mapped IO you don't want to have the compiler potentially remove screen writes when it optimizes. Likely your code will work without volatile, but it is proper to add it here to avoid potential problems.
When run BOCHS this code produces this screen output:
If you use the code provided here and it doesn't work that would suggest the issue you are having is likely related to the a code you write in your bootloader that read the disk, enabled A20, set the GDT, entered protected mode, and then called into your C code. It is also possible problems could occur depending on how you compile and link your kernel.
Likely Cause of Undefined Behavior
After all the code and the make file were made available in EDIT 2 it became clear that one significant problem was that most of the code was compiled and linked to 64-bit objects and executables. That code won't work in 32-bit protected mode.
In the make file make these adjustments:
When compiling with GCC you need to add -m32 option
When assembling with GNU Assembler (as) targeting 32-bit objects you need to use --32
When linking with LD you need to add the -melf_i386 option
When assembling with NASM targeting 32-bit objects you need to change -f elf64 to -f elf32
A preferable option to using a 64-bit compiler and tool chain from the host environment is to create a cross compiler toolchain for i686 or i386.
This should work.
Each VGA cell is of 2 bytes long, First byte stores Character while the second byte stores the color.
Also make sure you make marked the pointer volatile. To avoid any type of unexpected changes(or optimizations) made by the compiler on that local field.
void write_string( int colour, const unsigned char *string )
{
volatile unsigned char *vid = (unsigned char*) VIDEO_MEM;
while( *string != 0 )
{
*vid++ = *string++;
*vid++ = colour;
}
}
You use *(vid) for first video character for color
Related
My second day into assembly programming, I am trying to create a function to square a number
square.asm:
global square: ; do not need _start because gcc has one already
section .text
square:
mov rax, rsi; remember: first argument is always in rsi. (Order is: rsi, rdi, rdx, tcx, r8, r9)
mul rsi ; rax = rax * rsi (remember: accumulator is the implicit argument)
ret ; returns the value in accumulator
main.c:
#include <stdio.h>
long int square(long int);
int main() {
for (long int i = 1; i < 6; i++) {
printf("%li", square(i));
}
return 0;
}
result:
7556421251850319424-3765949904798924751-3765949904798924751-3765949904798924751-3765949904798924751
expected:
1491625
What have I missed?
Your first parameter is in rdi, not rsi, if you're using the System V ABI.
Use:
mov ras, rdi
mul rdi
ret
To make the output easier to read, you'll also want some whitespace or a newline in your print statement:
printf("%li\n", square(i));
And one nitpick - use either int main(void) or int main(int argc, char **argv) for your main function signature. Anything else is undefined.
I want to know how MSVC 2010 generates code for volatile local variables and have done a test. This simple test function uses a volatile local variable:
int proc1(int a, int b) {
int volatile volvar=0;
int c=a;
if (b>a)
c=0;
return c;
}
The initialization of the integer volvar should not be eliminated by the optimizer due to the volatile keyword. The generated 64bit assembly unexpectedly looks like this:
_TEXT SEGMENT
volvar$ = 8
a$ = 8
b$ = 16
?proc1##YAHHH#Z PROC ; proc1, COMDAT
; 3 : int volatile volvar=0;
xor eax, eax
; 4 :
; 5 : int c=a;
; 6 : if (b>a)
cmp edx, ecx
cmovg ecx, eax
mov DWORD PTR volvar$[rsp], eax;<---what is 'mov DWORD PTR [8+rsp], eax'?
; 7 : c=0;
; 8 : return c;
mov eax, ecx
; 9 : }
ret 0
?proc1##YAHHH#Z ENDP ; proc1
_TEXT ENDS
END
Notice the symbol volvar$ equals to 8, so the instruction generated for the volatile local variable assignment write to the address [8+rsp]. RSP wasn't modified so should point to the return address. But my understanding of the 64bit stack layout is that there is no longer any parameters above the return address. Instead, [8+rsp] should point to the RCX storage location for the CALLING FUNCTION which has nothing to do with our current function. Does that overwrite the stack of the calling function incorrectly?
Is it a bug with the compiler or my misunderstanding of the 64 bit stack frame layout?
I need to edit a string received from user in C++ code in assembly. I found this tutorial http://msdn.microsoft.com/en-US/library/y8b57x4b(v=vs.80).aspx and according to it my code should work
int main ()
{
char* s;
s=new char[80];
cin.getline(s,80);
__asm
{
mov eax, offset s
}
}
But the compiler shows an error on the line with mov "improper operand type". What is wrong and how can i fix it?
char* s is a local variable. It will be created when the function is called and "forgotten" when the function returns. There exists no "offset" (i.e an absolute memory address) for it at compiletime. But you can inline-assembler force to load the pointer:
#include <iostream>
using namespace std;
int main ()
{
char* s;
s=new char[80];
__asm
{
mov ebx, s ; = mov ebx, [ebp-4]
mov byte ptr [ebx], 'H'
mov byte ptr [ebx+1], 'e'
mov byte ptr [ebx+2], 'l'
mov byte ptr [ebx+3], 'l'
mov byte ptr [ebx+4], 'o'
mov byte ptr [ebx+5], 0 ; Don't forget the terminator!
}
cout << s << endl;
return 0;
}
I'm trying to understand what this test does exactly. This toy code
int _tmain(int argc, _TCHAR* argv[])
{
int i;
printf("%d", i);
return 0;
}
Compiles into this:
int _tmain(int argc, _TCHAR* argv[])
{
012C2DF0 push ebp
012C2DF1 mov ebp,esp
012C2DF3 sub esp,0D8h
012C2DF9 push ebx
012C2DFA push esi
012C2DFB push edi
012C2DFC lea edi,[ebp-0D8h]
012C2E02 mov ecx,36h
012C2E07 mov eax,0CCCCCCCCh
012C2E0C rep stos dword ptr es:[edi]
012C2E0E mov byte ptr [ebp-0D1h],0
int i;
printf("%d", i);
012C2E15 cmp byte ptr [ebp-0D1h],0
012C2E1C jne wmain+3Bh (012C2E2Bh)
012C2E1E push 12C2E5Ch
012C2E23 call __RTC_UninitUse (012C10B9h)
012C2E28 add esp,4
012C2E2B mov esi,esp
012C2E2D mov eax,dword ptr [i]
012C2E30 push eax
012C2E31 push 12C5858h
012C2E36 call dword ptr ds:[12C9114h]
012C2E3C add esp,8
012C2E3F cmp esi,esp
012C2E41 call __RTC_CheckEsp (012C1140h)
return 0;
012C2E46 xor eax,eax
}
012C2E48 pop edi
012C2E49 pop esi
012C2E4A pop ebx
012C2E4B add esp,0D8h
012C2E51 cmp ebp,esp
012C2E53 call __RTC_CheckEsp (012C1140h)
012C2E58 mov esp,ebp
012C2E5A pop ebp
012C2E5B ret
The 5 lines emphasized are the only ones removed by properly initializing the variable i. The lines 'push 12C2E5Ch, call __RTC_UninitUse' call the function that display the error box, with a pointer to a string containing the variable name ("i") as an argument.
What I can't understand are the 3 lines that perform the actual test:
012C2E0E mov byte ptr [ebp-0D1h],0
012C2E15 cmp byte ptr [ebp-0D1h],0
012C2E1C jne wmain+3Bh (012C2E2Bh)
It would have seemed the compiler is probing the stack area of i (setting a byte to zero and immediately testing whether it's zero), just to be sure it isn't initialized somewhere it couldn't see during build. However, the probed address, ebp-0D1h, has little to do with the actual address of i.
Even worse, it seems if there were such an external (other thread?) initialization that did initialize the probed address but to zero, this test would still shout about the variable being uninitialized.
What's going on? Maybe the probe is meant for something entirely different, say to test if a certain byte is writable?
[ebp-0D1h] is a temporary variable used by the compiler to track "initialized" status of variables. If we modify the source a bit, it will be more clear:
int _tmain(int argc, _TCHAR* argv[])
{
int i, j;
printf("%d %d", i, j);
i = 1;
printf("%d %d", i, j);
j = 2;
return 0;
}
Produces the following (irrelevant parts skipped):
mov DWORD PTR [ebp-12], -858993460 ; ccccccccH
mov DWORD PTR [ebp-8], -858993460 ; ccccccccH
mov DWORD PTR [ebp-4], -858993460 ; ccccccccH
mov BYTE PTR $T4694[ebp], 0
mov BYTE PTR $T4693[ebp], 0
In prolog, variables are filled with 0xCC, and two tracking variables (one for i and one for j) are set to 0.
; 7 : printf("%d %d", i, j);
cmp BYTE PTR $T4693[ebp], 0
jne SHORT $LN3#main
push OFFSET $LN4#main
call __RTC_UninitUse
add esp, 4
$LN3#main:
cmp BYTE PTR $T4694[ebp], 0
jne SHORT $LN5#main
push OFFSET $LN6#main
call __RTC_UninitUse
add esp, 4
$LN5#main:
mov eax, DWORD PTR _j$[ebp]
push eax
mov ecx, DWORD PTR _i$[ebp]
push ecx
push OFFSET $SG4678
call _printf
add esp, 12 ; 0000000cH
This corresponds roughly to:
if ( $T4693 == 0 )
_RTC_UninitUse("j");
if ( $T4694 == 0 )
_RTC_UninitUse("j");
printf("%d %d", i, j);
Next part:
; 8 : i = 1;
mov BYTE PTR $T4694[ebp], 1
mov DWORD PTR _i$[ebp], 1
So, once i is intialized, the tracking variable is set to 1.
; 10 : j = 2;
mov BYTE PTR $T4693[ebp], 1
mov DWORD PTR _j$[ebp], 2
Here, the same is happening for j.
Here is my guess: the compiler probably allocates flags in memory showing the initialization status of variables. In your case for variable i this is a single byte at [ebp-0D1h]. The zeroing of this byte means i is not initialized. I assume if you initialize i this byte will be set to non-zero. Try something run-time like this: if (argc > 1) i = 1; This should generate code instead of omitting the whole check. You can also add another variable, and see if you get two different flags.
The zeroing of the flag and the testing just happen to be consecutive in this case, but that might not always be the case.
C7060F000055 mov dword ptr [esi],5500000Fh
C746048BEC5151 mov dword ptr [esi+0004],5151EC8Bh
b. And one of its later generations:
BF0F000055 mov edi,5500000Fh
893E mov [esi],edi
5F pop edi
52 push edx
B640 mov dh,40
BA8BEC5151 mov edx,5151EC8Bh
53 push ebx
8BDA mov ebx,edx
895E04 mov [esi+0004],ebx
c. And yet another generation with recalculated ("encrypted") "constant" data:
BB0F000055 mov ebx,5500000Fh
891E mov [esi],ebx
5B pop ebx
51 push ecx
B9CB00C05F mov ecx,5FC000CBh
81C1C0EB91F1 add ecx,F191EBC0h ; ecx=5151EC8Bh
So my program is very simple. I have a string "Hello World" and I want to replace 'H' with 'A'. So here is my assembly code for MASM.
char* name = "Hello World";
_asm
{
mov eax, name;
mov ebx, 'A';
mov [eax], ebx;
}
printf("%s", name);
Visual Studio cannot compile this. It alerts me that this program is not working. I suspect my syntax for mov[eax], ebx might be wrong. All comments are appreciated. Thanks!
Here is the image of the alert: https://www.dropbox.com/s/e5ok96pj0mxi6sa/test%20program%20not%20working.PNG
"Hello World" is a literal, i.e a non-writeable constant string. 'name' is a pointer which points to that literal. You can instead define an array, which has to be populated with that literal, i.e. the literal is copied into the array:
#include <stdio.h>
int main (void)
{
char name[] = "Hello World";
_asm
{
lea eax, name; // EAX = address of name
mov ebx, 'A';
mov [eax], bl;
}
printf("%s", name);
return 0;
}
The original code works, if you use the C89-Compiler of MSVC (file-extension .c or command line option /TC), but that does not really meet the standard.
First Character
mov eax, _name; // EAX = address of name
mov bl, 'A';
mov byte[eax], bl;
Second Character
mov eax, _name; // EAX = address of name
mov bl, 'A';
mov byte[eax+1], bl;
MOVS
MOVS - This instruction moves 1 Byte, Word or Doubleword of data from memory location to another.
LODS
LODS - This instruction loads from memory. If the operand is of one byte, it is loaded into the AL register, if the operand is one word, it is loaded into the AX register and a doubleword is loaded into the EAX register.
STOS
STOS - This instruction stores data from register (AL, AX, or EAX) to memory.
CMPS
CMPS - This instruction compares two data items in memory. Data could be of a byte size, word or doubleword.
SCAS
SCAS - This instruction compares the contents of a register (AL, AX or EAX) with the contents of an item in memory.