Learning NASM Assembly on 32-bit Ubuntu.
I've been learning about recursive functions. I just did factorial, with your help here: Understanding recursive factorial function in NASM Assembly
Watching the code, I thought that maybe I could quickly implement fibonacci as well, using almost the same code. Here is the code, assuming that the parameter is always greater than 0:
SECTION .text
global main
main:
; -----------------------------------------------------------
; Main
; -----------------------------------------------------------
push 6
call fibonacci
mov [ECX],EAX
add byte [ECX],'0'
mov EDX,1
call print
; -----------------------------------------------------------
; Exit
; -----------------------------------------------------------
mov EAX,1
int 0x80
; -----------------------------------------------------------
; Fibonacci recursivo: f(n) = f(n - 1) + f(n - 2)
; -----------------------------------------------------------
fibonacci:
push EBP ; Retrieve parameter and put it
push EBX ; Save previous parameter
mov EBP,ESP ; into EBX register
add EBP,12 ;
mov EBX,[EBP] ; EBX = Param
cmp EBX,1 ; Check for base case
jle base ; It is base if (n <= 1)
dec EBX ; Decrement EBX to put it in the stack
push EBX ; Put (EBX - 1) in stack
inc EBX ; Restore EBX
call fibonacci ; Calculate fibonacci for (EBX - 1)
mov ESI,EAX ; EAX = (EAX + EBX)
pop EBX ; Retrieve EBX from the stack
sub EBX,2 ; Decrement EBX to put it in the stack
push EBX ; Put (EBX - 2) in stack
add EBX,2 ; Restore EBX
call fibonacci ; Calculate fibonacci for (EBX - 2)
mov EDX,EAX ; EAX = (EAX + EBX)
pop EBX ; Retrieve EBX from the stack
add ESI,EDX
mov EAX,ESI
jmp end
base: ; Base case
mov EAX,1 ; The result would be 1
end:
pop EBX ; Restore previous parameter
pop EBP ; Release EBP
ret
It is a bit crude. I calculate fibonacci for (parameter - 1), then I do it again for (parameter - 2), and just add them up and put them into EAX.
It doesn't work:
2 => 2
3 => 3
4 => 4
5 => 4
Fortunately I fixed the segmentation fault errors, but I probably broke something else doing that. Now I don't see what's the problem. Can you tell me why am I getting the wrong values?
One particular observation is that, for some reason, doing mov ECX,EAX gave me a segmentation fault error. That's why I used ESI instead. I'm not sure why, but I guess that it is related.
Whenever you're dealing with recursion, you have to be very careful about what the next layer in the recursive chain will do to the state of the current layer (e.g. register values). I'd suggest rewriting the function as follows:
fibonacci:
push EBP ; Retrieve parameter and put it
push EBX ; Save previous parameter
mov EBP,ESP ; into EBX register
add EBP,12 ;
mov EBX,[EBP] ; EBX = Param
cmp EBX,1 ; Check for base case
jle base ; It is base if (n <= 1)
lea ecx,[ebx-1]
push ecx ; push N-1
call fibonacci ; Calculate fibonacci for (EBX - 1)
pop ecx ; remove N-1 off the stack
push eax ; save the result of fibonacci(N-1)
lea ecx,[ebx-2]
push ecx ; push N-2
call fibonacci ; Calculate fibonacci for (EBX - 2)
pop ecx ; remove N-2 off the stack
pop ecx ; ecx = fibonacci(N-1)
add eax,ecx ; eax = fibonacci(N-2) + fibonacci(N-1)
jmp end
base: ; Base case
mov EAX,1 ; The result would be 1
end:
pop EBX ; Restore previous parameter
pop EBP ; Release EBP
ret
Related
I'm trying to implement insertion sort in 32bit assembly in linux using NASM and I get a segmentation fault mid-run (not to mention that for some reason 'printf' prints random garbage values, I'm not totally sure why), Here is the
code:
section .rodata
MSG: DB "welcome to sortMe, please sort me",10,0
S1: DB "%d",10,0 ; 10 = '\n' , 0 = '\0'
section .data
array DD 5,1,7,3,4,9,12,8,10,2,6,11 ; unsorted array
len DB 12
section .text
align 16
global main
extern printf
main:
push MSG ; print welcome message
call printf
add esp,4 ; clean the stack
call printArray ;print the unsorted array
;parameters
;push len
;push array
mov eax, len
mov ebx, array
push eax
push ebx
call myInsertionSort
call printArray ; print the sorted one
mov eax, 1 ;exit system call
int 0x80
printArray:
push ebp ;save old frame pointer
mov ebp,esp ;create new frame on stack
pushad ;save registers
mov eax,0
mov ebx,0
mov edi,0
mov esi,0 ;array index
mov bl, byte [len]
add edi,ebx ; edi = array size
print_loop:
cmp esi,edi
je print_end
push dword [array+esi*4]
push S1
call printf
add esp, 8 ;clean the stack
inc esi
jmp print_loop
print_end:
popa ;restore registers
mov esp,ebp ;clean the stack frame
pop ebp ;return to old stack frame
ret
myInsertionSort:
push ebp
mov ebp, esp
push ebx
push esi
push edi
mov ecx, [ebp+12]
movzx ecx, byte [ecx] ;put len in ecx, our loop variable
mov eax, 0
mov ebx, 0
mov esi, [ebp+8] ; the array
loop loop_1
loop_1:
cmp ecx, 0 ; if we're done
je done_1 ; then done with loop
mov edx, ecx
push ecx ; we save len, because loop command decrements ecx
sub edx, ecx
mov ecx, [esi+4*edx] ;;;;;; ecx now array[i] ? how do I access array[i] in a similar manner?
mov ebx, eax
shr ebx, 2 ; number of times for inner loop
loop_2:
cmp ebx, 0 ; we don't use loop to not affect ecx so we use ebx and compare it manually with 0
jl done_2
cmp [esi+ebx], ecx ;we see if array[ebx] os ecx so we can exit the loop
jle done_2
lea edx, [esi+ebx]
push dword [edx] ; pushing our array[ebx]
add edx, 4
pop dword [edx] ; popping the last one
dec ebx ; decrementing the loop iterator
jmp loop_2 ; looping again
done_2:
mov [esi+ebx+1], ecx
inc eax ; incrementing iterator
pop ecx ; len of array to compare now to eax and see if we're done
jmp loop_1
done_1:
pop edi
pop esi
pop ebx
pop ebp ; we pop them in opposite to how we pushed
ret
About the printf thing, I'm positive that I should push the parameters the opposite way (first S1 and then the integer so it'd be from left to right as we'd call it in C), and if I do switch them, nothing is printed at all while I'm getting a segmentation fault. I don't know what to do, it prints these as output:
welcome to sortMe, please sort me
5
16777216
65536
256
1
117440512
458752
1792
7
50331648
196608
768
mov ecx, [ebp+12] ;put len in ecx, our loop variab
This only moves the address of LEN into ECX not its value! You need to add movzx ecx, byte [ecx]
You also need to define LEN=48
loop loop_1
What's this bizare use of LOOP doing here?
You are mixing bytes and dwords on multiple occasions. You need to rework the code. p.e.
dec ebx ; ebx is now number of times we should go through inner loop
should become
shr ebx,2
This is not correct because you need the address and not the value. Change MOV into LEA.
jle done_2
mov edx, [esi+ebx]
Perhaps you can post your reworked code as an EDIT within your Original question.
Your edited code does not address ALL the problems signaled by user3144770!
The parameters to printf are correct but here are some additional problems with your printArray routine.
Since ESI is an index in an array of dwords you need to scale it up!
push dword [array+esi*4]
Are you sure pusha will save 32 bits ? Perhaps you'd better use pushad
ps Should you decide to rework your code and post the edit then please add the reworked code after the last line of the existing post. This way the original question will continue making sense to people viewing it the first time!
Hi every one I am trying to do a tower of Hanoi in assembly x86 but I am trying to use arrays. So this code gets a number from user as a parameter in Linux, then error checks a bunch of stuff. So now I just want to make the algorithm which use the three arrays i made (start, end, temp) and output them step by step. If someone can help it would be greatly appreciated. `
%include "asm_io.inc"
segment .data ; where all the predefined variables are stored
aofr: db "Argument out of Range", 0 ; define aofr as a String "Argument out of Range"
ia: db "Incorrect Argument", 0 ; define ia as a String "Incorrect Argument"
tma: db "Too many Arguments", 0 ; define tma as a String "Too many Arguments"
hantowNumber dd 0 ; define hantowNumber this is what the size of the tower will be stored in
start: dd 0,0,0,0,0,0,0,0,9 ; this array is where all the rings start at
end: dd 0,0,0,0,0,0,0,0,9 ; this array is where all the rings end up at
temp: dd 0,0,0,0,0,0,0,0,9 ; this array is used to help move the rings
test: dd 0,0,0,0,0,0,0,0,9
; The next couple lines define the strings to show the pegs and what they look like
towerName: db " Tower 1 Tower 2 Tower 3 ", 10, 0
lastLineRow: db "XXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXX ", 10, 0
buffer: db " ", 0
fmt:db "%d",10,0
segment .bss ; where all the input variables are stored
segment .text
global asm_main ; run the main function
extern printf
asm_main:
enter 0,0 ; setup routine
pusha
mov edx, dword 0 ; set edx to zero this is where the hantowNumber is saved for now
mov ecx, dword[ebp+8] ; ecx has how many arguments are given
mov eax, dword[ebp+12] ; save the first argument in eax
add eax, 4 ; move the pointer to the main argument
mov ebx, dword[eax] ; save the number into ebx
push ebx ; reserve ebx
push ecx ; reserve ecx
cmp ecx, dword 2 ; compare if there are more the one argument given
jg TmA ; if more then one argument is given then jump Too many Argument (TmA)
mov ecx, 0 ; ecx = 0
movzx eax, byte[ebx+ecx] ; eax is the first character number from the inputed number
sub eax, 48 ; subtract 48 to get the actual number/letter/symbol
cmp eax, 10 ; check if eax is less then 10
jg IA ; if eax is greater then 10 then it is a letter or symbol
string_To_int: ; change String to int procedure
add edx, eax ; put the number in edx
inc ecx ; increase counter (ecx)
movzx eax, byte[ebx+ecx] ; move the next number in eax
cmp eax, 0 ; if eax = 0 then there are no more numbers
mov [hantowNumber], edx ; change hantowNumber to what ever is in edx
je rangeCheck ; go to rangeCheck to check if between 2-8
sub eax, 48 ; subtract 48 to get the actual number/letter/symbol
cmp eax, 10 ; check if eax is less then 10
jg IA ; if eax is greater then 10 then it is a letter or symbol
imul edx, 10 ; multiply edx by 10 so the next number can be added to the end
jmp string_To_int ; jump back to string_To_int if not done
rangeCheck: ; check the range of the number
cmp edx, dword 2 ; compare edx with 2
jl AofR ; if hantowNumber (edx) < 2 then go to Argument out of Range (AofR)
cmp edx, dword 8 ; compare edx with 8
jg AofR ; if hantowNumber (edx) > 8 then go to Argument out of Range (AofR)
mov ecx, [hantowNumber] ; move the number enterd by user in ecx
mov esi, 28 ; esi == 28 for stack pointer counter
setStart: ; set the first array the starting peg
mov [start+esi], ecx ; put ecx into the array
sub esi, 4 ; take one away from stack pointer conter
dec ecx ; take one away from ecx so the next number can go in to the array
cmp ecx, 0 ; compare ecx with 0
jne setStart ; if ecx != 0 then go to setStart loop
; This is the section where the algoritham should go for tower of hanoi
mov ecx, [hantowNumber]
towerAlgorithm:
cmp ecx, 0
jg Exit ; jump to Exit at the end of the program
dec ecx
IA:
mov eax, ia ; put the string in eax
push eax ; reserve eax
call print_string ; output the string that is in eax
call print_nl ; print a new line after the output
pop eax ; put eax back to normal
add esp, 4 ; takes 4 from stack
jmp Exit ; jump to Exit at the end of the program
AofR:
mov eax, aofr ; put the string in eax
push eax ; reserve eax
call print_string ; output the string that is in eax
call print_nl ; print a new line after the output
pop eax ; put eax back to normal
add esp, 4 ; takes 4 from stack
jmp Exit ; jump to Exit at the end of the program
TmA:
mov eax, tma ; put the string in eax
push eax ; reserve eax
call print_string ; output the string that is in eax
call print_nl ; print a new line after the output
pop eax ; put eax back to normal
add esp, 4 ; takes 4 from stack
jmp Exit ; jump to Exit at the end of the program
Exit: ; ends the program when it jumps to here
add esp, 9 ; takes 8 from stack
popa
mov eax, 0 ; return back to C
leave
ret
haha I'm doing the exact same assignment and stuck on the algorithm however though when running your code it seems to identify "too many arguments" even though only one argument is provided, consider this algorithm when dealing with arguments(don't forget ./ is considered the "first argument" since it is the zeroth argument provided):
enter 0,0
pusha
; address of 1st argument is on stack at address ebp+12
; address of 2nd arg = address of 1st arg + 4
mov eax, dword [ebp+12] ;eax = address of 1st arg
add eax, 4 ;eax = address of 2nd arg
mov ebx, dword [eax] ;ebx = 2nd arg, it is pointer to string
mov eax, 0 ;clear the register
mov al, [ebx] ;it moves only 1 byte
sub eax, '0' ;now eax contains the numeric value of the firstcharacter of string
I'm reading a book(Assembly Language Step by Step, Programming with Linux by Jeff Duntemann) and I'm trying to change this program that show's arguments to instead show the environment variables. I'm trying to only use what was taught thus far(no C) and I've gotten the program to print environment variables but only after I counted how many I had and used an immediate, obviously not satisfying. Here's what I have:
global _start ; Linker needs this to find the entry point!
_start:
nop ; This no-op keeps gdb happy...
mov ebp,esp ; Save the initial stack pointer in EBP
; Copy the command line argument count from the stack and validate it:
cmp dword [ebp],MAXARGS ; See if the arg count exceeds MAXARGS
ja Error ; If so, exit with an error message
; Here we calculate argument lengths and store lengths in table ArgLens:
xor eax,eax ; Searching for 0, so clear AL to 0
xor ebx,ebx ; Stack address offset starts at 0
ScanOne:
mov ecx,0000ffffh ; Limit search to 65535 bytes max
mov edi,dword [ebp+16+ebx*4] ; Put address of string to search in EDI
mov edx,edi ; Copy starting address into EDX
cld ; Set search direction to up-memory
repne scasb ; Search for null (0 char) in string at edi
jnz Error ; REPNE SCASB ended without finding AL
mov byte [edi-1],10 ; Store an EOL where the null used to be
sub edi,edx ; Subtract position of 0 from start address
mov dword [ArgLens+ebx*4],edi ; Put length of arg into table
inc ebx ; Add 1 to argument counter
cmp ebx,44; See if arg counter exceeds argument count
jb ScanOne ; If not, loop back and do another one
; Display all arguments to stdout:
xor esi,esi ; Start (for table addressing reasons) at 0
Showem:
mov ecx,[ebp+16+esi*4] ; Pass offset of the message
mov eax,4 ; Specify sys_write call
mov ebx,1 ; Specify File Descriptor 1: Standard Output
mov edx,[ArgLens+esi*4] ; Pass the length of the message
int 80H ; Make kernel call
inc esi ; Increment the argument counter
cmp esi,44 ; See if we've displayed all the arguments
jb Showem ; If not, loop back and do another
jmp Exit ; We're done! Let's pack it in!
I moved the displacement up past the first null pointer to the first environment variable([ebp+4+ebx*4] > [ebp+16+ebx*4]) in both ScanOne and Showem. When I compare to the number of environment variables I have(44) it will print them just fine without a segfault, comparing to 45 only gives me a segfault.
I've tried using the pointers to compare to zero(in search of null pointer): cmp dword [ebp+16+ebx*4],0h but that just returns a segfault. I'm sure that the null pointer comes after the last environment variable in the stack but it's like it won't do anything up to and beyond that.
Where am I going wrong?
What if your program has 2, 3, or 0 args, would your code still work? Each section is separated by a NULL pointer (4 bytes of 0) You could just get the count of parameters and use that as your array index and skip over the args until you get to the NULL bytes. Now you have your Environment Block:
extern printf, exit
section .data
fmtstr db "%s", 10, 0
fmtint db "%d", 10, 0
global main
section .text
main:
push ebp
mov ebp, esp
mov ebx, [ebp + 4]
.SkipArgs:
mov edi, dword [ebp + 4 * ebx]
inc ebx
test edi, edi
jnz .SkipArgs
.ShowEnvBlock:
mov edi, dword [ebp + 4 * ebx]
test edi, edi
jz .NoMore
push edi
push fmtstr
call printf
add esp, 4 * 2
inc ebx
jmp .ShowEnvBlock
.NoMore:
push 0
call exit
Yes I use printf here, but you just swap that with your system call.
Want to go ahead and apologize, this always happens to me(fix it myself after asking question on stackoverflow). I think when I tried comparing pointer to 0h I typed something wrong. Here's what I did:
inc ebx
cmp dword [ebp+16+ebx*4],0h
jnz ScanOne
and
inc esi
cmp dword [ebp+16+esi*4],0h
jnz Showem
This worked.
Learning NASM Assembly on 32-bit Ubuntu. I'm now trying to learn about recursive functions, starting with factorial (note: here I am assuming that the parameter will always be non-negative).
Assuming that I have
push 3
call factorial
I want to end up with 6 in EAX.
Here is my attempt:
SECTION .text
global main
main:
; -----------------------------------------------------------
; Main
; -----------------------------------------------------------
push 3
call factorial
; -----------------------------------------------------------
; Exit
; -----------------------------------------------------------
mov EAX,1
int 0x80
; -----------------------------------------------------------
; Recursive Factorial: n! = n * (n - 1)!
; -----------------------------------------------------------
factorial:
push EBP ; Retrieve parameter and put it
mov EBP,ESP ; into EBX register
add EBP,8 ;
mov EBX,[EBP] ; EBX = Param
cmp EBX,0 ; Check for base case
je base ; It is base if (n == 0)
dec EBX ; Decrement EBX to put it in the stack
push EBX ; Put (EBX - 1) in stack
inc EBX ; Restore EBX
call factorial ; Calculate factorial for (EBX - 1)
mul EBX ; EAX = (EAX * EBX) = (EBX - 1)! * EBX
pop EBX ; Retrieve EBX from the stack
jmp end
base: ; Base case
mov EAX,1 ; The result would be 1
end:
pop EBP ; Release EBP
ret
At least it works for the base case, ha... But for any other value I push, it always returns 0. I had the suspicion that maybe since EAX is 0, MUL would always result in 0, explaining this. To test, I decided to give EAX a value of 2, expecting some non-zero value, but it kept resulting in 0.
Can you advice me on how to do a recursive factorial function that takes its parameter from the stack? I believe having seen some examples, but either they were not recursive or they took their parameters from other places, or they used a bunch of variables (when I think it can be done with just registers).
Note that factorial(n-1) will overwrite factorial(n)'s value of EBX the first thing it does, thereby rendering the inc EBX after the push pointless. After you've reached the base case you'll have the situation where EBX is 0 when you do the mul, and of course anything * 0 == 0.
The easiest fix would be to change the prologue to:
push EBP ; Retrieve parameter and put it
push EBX ; save previous param
mov EBP,ESP ; into EBX register
add EBP,12 ;
mov EBX,[EBP] ; EBX = Param
And the epilogue to:
pop EBX ; restore previous param
pop EBP ; Release EBP
ret
I am looking for a way to print an integer in assembler (the compiler I am using is NASM on Linux), however, after doing some research, I have not been able to find a truly viable solution. I was able to find a description for a basic algorithm to serve this purpose, and based on that I developed this code:
global _start
section .bss
digit: resb 16
count: resb 16
i: resb 16
section .data
section .text
_start:
mov dword[i], 108eh ; i = 4238
mov dword[count], 1
L01:
mov eax, dword[i]
cdq
mov ecx, 0Ah
div ecx
mov dword[digit], edx
add dword[digit], 30h ; add 48 to digit to make it an ASCII char
call write_digit
inc dword[count]
mov eax, dword[i]
cdq
mov ecx, 0Ah
div ecx
mov dword[i], eax
cmp dword[i], 0Ah
jg L01
add dword[i], 48 ; add 48 to i to make it an ASCII char
mov eax, 4 ; system call #4 = sys_write
mov ebx, 1 ; file descriptor 1 = stdout
mov ecx, i ; store *address* of i into ecx
mov edx, 16 ; byte size of 16
int 80h
jmp exit
exit:
mov eax, 01h ; exit()
xor ebx, ebx ; errno
int 80h
write_digit:
mov eax, 4 ; system call #4 = sys_write
mov ebx, 1 ; file descriptor 1 = stdout
mov ecx, digit ; store *address* of digit into ecx
mov edx, 16 ; byte size of 16
int 80h
ret
C# version of what I want to achieve (for clarity):
static string int2string(int i)
{
Stack<char> stack = new Stack<char>();
string s = "";
do
{
stack.Push((char)((i % 10) + 48));
i = i / 10;
} while (i > 10);
stack.Push((char)(i + 48));
foreach (char c in stack)
{
s += c;
}
return s;
}
The issue is that it outputs the characters in reverse, so for 4238, the output is 8324. At first, I thought that I could use the x86 stack to solve this problem, push the digits in, and pop them out and print them at the end, however when I tried implementing that feature, it flopped and I could no longer get an output.
As a result, I am a little bit perplexed about how I can implement a stack in to this algorithm in order to accomplish my goal, aka printing an integer. I would also be interested in a simpler/better solution if one is available (as it's one of my first assembler programs).
One approach is to use recursion. In this case you divide the number by 10 (getting a quotient and a remainder) and then call yourself with the quotient as the number to display; and then display the digit corresponding to the remainder.
An example of this would be:
;Input
; eax = number to display
section .data
const10: dd 10
section .text
printNumber:
push eax
push edx
xor edx,edx ;edx:eax = number
div dword [const10] ;eax = quotient, edx = remainder
test eax,eax ;Is quotient zero?
je .l1 ; yes, don't display it
call printNumber ;Display the quotient
.l1:
lea eax,[edx+'0']
call printCharacter ;Display the remainder
pop edx
pop eax
ret
Another approach is to avoid recursion by changing the divisor. An example of this would be:
;Input
; eax = number to display
section .data
divisorTable:
dd 1000000000
dd 100000000
dd 10000000
dd 1000000
dd 100000
dd 10000
dd 1000
dd 100
dd 10
dd 1
dd 0
section .text
printNumber:
push eax
push ebx
push edx
mov ebx,divisorTable
.nextDigit:
xor edx,edx ;edx:eax = number
div dword [ebx] ;eax = quotient, edx = remainder
add eax,'0'
call printCharacter ;Display the quotient
mov eax,edx ;eax = remainder
add ebx,4 ;ebx = address of next divisor
cmp dword [ebx],0 ;Have all divisors been done?
jne .nextDigit
pop edx
pop ebx
pop eax
ret
This example doesn't suppress leading zeros, but that would be easy to add.
I think that maybe implementing a stack is not the best way to do this (and I really think you could figure out how to do that, saying as how pop is just a mov and a decrement of sp, so you can really set up a stack anywhere you like by just allocating memory for it and setting one of your registers as your new 'stack pointer').
I think this code could be made clearer and more modular if you actually allocated memory for a c-style null delimited string, then create a function to convert the int to string, by the same algorithm you use, then pass the result to another function capable of printing those strings. It will avoid some of the spaghetti code syndrome you are suffering from, and fix your problem to boot. If you want me to demonstrate, just ask, but if you wrote the thing above, I think you can figure out how with the more split up process.
; Input
; EAX = pointer to the int to convert
; EDI = address of the result
; Output:
; None
int_to_string:
xor ebx, ebx ; clear the ebx, I will use as counter for stack pushes
.push_chars:
xor edx, edx ; clear edx
mov ecx, 10 ; ecx is divisor, devide by 10
div ecx ; devide edx by ecx, result in eax remainder in edx
add edx, 0x30 ; add 0x30 to edx convert int => ascii
push edx ; push result to stack
inc ebx ; increment my stack push counter
test eax, eax ; is eax 0?
jnz .push_chars ; if eax not 0 repeat
.pop_chars:
pop eax ; pop result from stack into eax
stosb ; store contents of eax in at the address of num which is in EDI
dec ebx ; decrement my stack push counter
cmp ebx, 0 ; check if stack push counter is 0
jg .pop_chars ; not 0 repeat
mov eax, 0x0a
stosb ; add line feed
ret ; return to main
; eax = number to stringify/output
; edi = location of buffer
intToString:
push edx
push ecx
push edi
push ebp
mov ebp, esp
mov ecx, 10
.pushDigits:
xor edx, edx ; zero-extend eax
div ecx ; divide by 10; now edx = next digit
add edx, 30h ; decimal value + 30h => ascii digit
push edx ; push the whole dword, cause that's how x86 rolls
test eax, eax ; leading zeros suck
jnz .pushDigits
.popDigits:
pop eax
stosb ; don't write the whole dword, just the low byte
cmp esp, ebp ; if esp==ebp, we've popped all the digits
jne .popDigits
xor eax, eax ; add trailing nul
stosb
mov eax, edi
pop ebp
pop edi
pop ecx
pop edx
sub eax, edi ; return number of bytes written
ret