.386
.model flat, stdcall
option casemap: none

include \masm32\include\windows.inc
include \masm32\include\masm32.inc
include \masm32\include\user32.inc
include \masm32\include\kernel32.inc
include \masm32\include\msvcrt.inc
include \masm32\include\fpu.inc

include \masm32\macros\macros.asm
include tinyptc.inc

includelib \masm32\lib\masm32.lib
includelib \masm32\lib\user32.lib
includelib \masm32\lib\kernel32.lib
includelib \masm32\lib\uuid.lib
includelib \masm32\lib\msvcrt.lib
includelib \masm32\lib\fpu.lib
includelib tinyptc.lib

.stack 8192

.data 

    dwWidth             dd 640 ; max 1024
    dwHeight            dd 480
    dwFrameLock         dd 1
    dwNoCeeling         dd 1
    szTitle             db "ASMCast by Chris Adams (Lithium)", 0
    szWelcomeTitle      db "Welcome to ASMCast...", 0
    szWelcomeText       db "The MASM32 Raycaster!", 0
    szGoodbyeTitle      db "Seeya...", 0
    szGoodbyeText       db "Later!", 0
    szFrameMsgTitle     db "Average Frames/Second:", 0

    dwERRORALLOC        dd 0
    dwERRORPTC          dd 1
    dwERRORMAPFILE      dd 2
    dwERRORLUTFILE      dd 3
    dwERRORBMPFILE      dd 4
    szError             db "Error", 0
    szErrorAlloc        db "Unable to Allocate Memory", 0
    szErrorPtc          db "Unable to Initialze TinyPTC", 0
    szErrorMapFile      db "Unable to Load Map File", 0
    szErrorLutFile      db "Unable to Load TrigLut File", 0
    szErrorBMPFile      db "Unable to Load Bitmap File", 0
    szMapFile           db "testmap.dat", 0
    szTrigLutFile       db "triglut.dat", 0

    dwBmpFlagFlip       dd 1
    dwBmpFlagNoHeader   dd 2

    szTextureFile       db "textures\a0.bmp", 0 ; offset 9, 10
    dwTextureCount      dd 4
    dwTextureSize       dd 64
    dwTextureSizeBits   dd 6
    dwTextures          dd 0

    szTestSprite1       db "sprites\test.bmp", 0
    dwTestSprite1       dd 0
    szTestSprite2       db "sprites\test2.bmp", 0
    dwTestSprite2       dd 0

    dwSpriteSizeBits    dd 0
    
    dwShadeTo           dd 0000000h
    dwDepthFactor       dd 7 ; 2^x  (the lesser, the quicker the fade)
    tLevel              dt -1.5
    rRedFactor          dd 1.0
    rGreenFactor        dd 1.0
    rBlueFactor         dd 1.1

    dbMapWidth          db 0
    dbMapHeight         db 0
    dwVMapSize          dd 128
    dwVMapSizeBits      dd 7
    MAP                 dd 0

    sSprites            dd 256*4 dup(0) ; 256 sprites, 4 fields: SprPtr, X, Y, Z
    sSpritesAlpha       dd 256 dup(0)
    dwSpriteCount       dd 0

    dwPosition          dd 5*2048, 5*2048, 0, 0 ; X, Y, Angle (Left/Right), Look (Up/Down)

    ang2rad             dd 0.024543692606170259675489401431871
    ang2rad16           dd 7.6699039394282061485904379474597e-4
    pRatio              dd 0.8125
    COS                 dd 0
    SIN                 dd 0
    COS16               dd 0
    SIN16               dd 0

    BUFVSIZE            dd ?
    BUFSIZE             dd ?
    BUF                 dd 0

.data?

    dwZBuffer           dd 512 dup(?)   ; 2x512 = max screen width
    dwHeightBuffer      dd 512 dup(?)
    dwTptrBuffer        dd 512 dup(?)
    DLUT                dd 6144 dup(?)
    ALUT                dd 16384 dup(?)
    dbBmpHeader         db 64 dup(?) 

.code

    main                    PROTO
    deinit                  PROTO
    doerror                 PROTO :DWORD
    load_lut                PROTO
    load_map                PROTO :DWORD
    load_textures           PROTO
    load_bmp                PROTO :DWORD, :DWORD, :DWORD
    gen_fc_lut              PROTO
    clear_buffer            PROTO :DWORD
    draw_bmp                PROTO :DWORD, :DWORD, :DWORD
    draw_minimap            PROTO :DWORD, :DWORD
    cast_ray                PROTO :DWORD, :DWORD, :DWORD, :DWORD,  :DWORD, :DWORD, :DWORD
    get_map                 PROTO :DWORD, :DWORD
    render_scene            PROTO
    draw_wall_slice         PROTO :DWORD, :DWORD, :DWORD
    draw_floor_ceeling      PROTO :DWORD, :DWORD, :DWORD
    get_texture_address     PROTO :DWORD, :DWORD, :DWORD
    gen_depth_lut           PROTO
    gen_alpha_lut           PROTO
    init_sprites            PROTO :DWORD
    bb_sprite               PROTO :DWORD, :DWORD, :DWORD, :DWORD
    bb_sprite_alpha         PROTO :DWORD, :DWORD, :DWORD, :DWORD, :DWORD
    render_sprites          PROTO
    render_sprite           PROTO :DWORD, :DWORD, :DWORD


mainCRTStartup proc

    invoke main
    ret

mainCRTStartup endp

main proc

    LOCAL x :DWORD, y :DWORD
    LOCAL xinc :SDWORD, yinc :SDWORD
    LOCAL framestart :DWORD
    LOCAL move :SDWORD
    LOCAL frames :DWORD
    LOCAL timestart :DWORD
    LOCAL framerate :DWORD

    ;invoke MessageBox, 0, ADDR szWelcomeText, ADDR szWelcomeTitle, MB_OK 

    invoke IntMul, dwWidth, dwHeight
    mov BUFVSIZE, eax
    shl eax, 2
    mov BUFSIZE, eax

    ; // All memory to be deallocated at exit must be allocated before ptc_open() is called.
    invoke load_lut

    invoke load_textures

    invoke init_sprites, 6

    invoke load_bmp, addr szTestSprite1, 0, 0
    mov dwTestSprite1, eax
    .IF dwTestSprite1 == 0
        invoke doerror, dwERRORBMPFILE
    .ENDIF    

    invoke load_bmp, addr szTestSprite2, 0, 0
    mov dwTestSprite2, eax
    .IF dwTestSprite2 == 0
        invoke doerror, dwERRORBMPFILE
    .ENDIF    

    invoke load_map, ADDR szMapFile

    invoke crt_malloc, BUFSIZE
    mov BUF, eax
    .IF BUF == 0
        invoke doerror, dwERRORALLOC
    .ENDIF
    
   
    invoke ptc_open, addr szTitle, dwWidth, dwHeight
    .IF eax == 0
        invoke doerror, dwERRORPTC
    .ENDIF

    invoke GetTickCount
    mov timestart, eax

    xor eax, eax
    mov frames, eax

loop1:

    invoke GetTickCount
    mov framestart, eax

    ;
    ; input
    ;

    ; left/right

    mov ebx, offset dwPosition
    add ebx, 8
    mov edx, dword ptr [ebx]

    push edx
    invoke GetAsyncKeyState, VK_LEFT
    pop edx
    and eax, 8000h

    cmp eax, 8000h
    jne skipTurnLeft
    add edx, 8192-64
    and edx, 8191
skipTurnLeft:

    push edx
    invoke GetAsyncKeyState, VK_RIGHT
    pop edx
    and eax, 8000h

    cmp eax, 8000h
    jne skipTurnRight
    add edx, 64
    and edx, 8191
skipTurnRight:

    mov dword ptr [ebx], edx

    ; forward/backward

    sub ebx, 8
    mov eax, dword ptr [ebx]
    mov x, eax
    add ebx, 4
    mov eax, dword ptr [ebx]
    mov y, eax

    xor eax, eax
    mov move, eax

    push edx
    invoke GetAsyncKeyState, VK_UP
    pop edx   
    and eax, 8000h

    .IF eax == 8000h
        inc move
    .ENDIF

    push edx
    invoke GetAsyncKeyState, VK_DOWN
    pop edx   
    and eax, 8000h

    .IF eax == 8000h
        dec move
    .ENDIF

    mov eax, move
    cmp eax, 0

    je skipMove

    mov eax, COS16
    shl edx, 2
    add eax, edx
    mov eax, dword ptr [eax]
    mov xinc, eax
    ;
    mov eax, SIN16
    add eax, edx
    mov eax, dword ptr [eax]
    mov yinc, eax
    shr edx, 2

    push edx
    push ebx

    mov eax, xinc
    cdq
    mov ebx, 6
    idiv ebx
    mov xinc, eax
    .IF move == 1
        add x, eax
    .ELSE
        sub x, eax
    .ENDIF

    mov eax, yinc
    cdq
    mov ebx, 6
    idiv ebx
    mov yinc, eax
    .IF move == 1
        add y, eax
    .ELSE
        sub y, eax
    .ENDIF

    pop ebx
    pop edx
    
skipMove:

    mov eax, y
    mov dword ptr [ebx], eax
    sub ebx, 4
    mov eax, x
    mov dword ptr [ebx], eax

    invoke GetAsyncKeyState, VK_F
    and eax, 8000h

    cmp eax, 8000h
    jne skipCheckFPS
    invoke GetTickCount
    sub eax, timestart
    mov ecx, eax
    mov eax, frames
    mov edx, 1000
    imul eax, edx
    cdq
    idiv ecx
    mov framerate, eax
    invoke MessageBox, 0, str$(framerate), ADDR szFrameMsgTitle, MB_OK
    invoke GetTickCount
    mov timestart, eax
    xor eax, eax
    mov frames, eax

 whileVK_F:
    invoke GetAsyncKeyState, VK_F
    and eax, 8000h

    cmp eax, 8000h
    jne skipCheckFPS
    jmp whileVK_F

skipCheckFPS:


    ;
    ; render
    ;

    invoke bb_sprite_alpha, dwTestSprite2, 8192, 8192, 0, 10
    invoke bb_sprite, dwTestSprite1, 4096, 8192, 0
    invoke bb_sprite, dwTestSprite1, 8192, 4096, 63
    invoke bb_sprite_alpha, dwTestSprite2, 20480, 20480, 0, 6
    ;invoke bb_sprite, 0, 9400, 8192, 0
    ;invoke bb_sprite, 0, 2048, 2048, 0

    invoke clear_buffer, 0h
    invoke render_scene
    invoke draw_minimap, 15, 15

    ;mov ebx, offset dwTextures
    ;mov ebx, dword ptr [ebx]

    ;invoke draw_bmp, 100, 15, dwTestSprite1
    ;invoke draw_bmp, 200, 15, dwTestSprite2

    invoke ptc_update, BUF

    inc frames

    mov eax, dwFrameLock
    cmp eax, 0
    je loop1

frame_lock:
    invoke GetTickCount
    sub eax, framestart
    cmp eax, 16
    jl frame_lock

  jmp loop1

    ; // Only called if we decide to exit manually...

normalExit:
    invoke ptc_close
    invoke ExitProcess, 0
    
    ret

main endp

deinit proc

    ; // Called by TinyPTC on Window Close.

    invoke crt_free, BUF
    invoke crt_free, MAP
    invoke crt_free, COS
    invoke crt_free, SIN
    invoke crt_free, COS16
    invoke crt_free, SIN16

    ; Free Textures
    mov ebx, offset dwTextures
    mov ecx, dwTextureCount
 goodbye_tex:
    mov edx, dword ptr [ebx]
    push ebx
    push ecx
    invoke crt_free, edx
    pop ecx
    pop ebx
    add ebx, 4
    dec ecx
    cmp ecx, 0
    jg goodbye_tex

    ; Free Sprites
    invoke crt_free, dwTestSprite1
    invoke crt_free, dwTestSprite2

    ;invoke MessageBox, 0, ADDR szGoodbyeText, ADDR szGoodbyeTitle, MB_OK 

    ret

deinit endp

render_scene proc

    LOCAL x :DWORD
    LOCAL raydist :DWORD, lastdist :SDWORD
    LOCAL rx :DWORD, ry :DWORD
    LOCAL px :DWORD, py :DWORD, pa :DWORD
    LOCAL tmp :DWORD, stmp :SDWORD
    LOCAL hitType :DWORD
    LOCAL rdx :SDWORD, rdy:SDWORD

    LOCAL factor2047 :DWORD
    LOCAL tmp1 :DWORD, tmp2 :DWORD, tmp3 :DWORD
    LOCAL fpa :REAL4, cpa :REAL4, spa :REAL4
    LOCAL sdx :REAL4

    mov eax, 2048
    mov factor2047, eax

    mov eax, offset dwPosition
    mov ebx, dword ptr [eax]
    mov px, ebx
    add eax, 4
    mov ebx, dword ptr [eax]
    mov py, ebx
    add eax, 4
    mov ebx, dword ptr [eax]
    mov pa, ebx

    mov ecx, 2048
    sub pa, ecx

    finit
    fild pa
    fmul ang2rad16
    fst fpa
    fcos
    fstp cpa
    fld fpa
    fsin
    fstp spa

    mov eax, dwWidth
    dec eax
    mov x, eax
    dec x

    ;invoke compute_fcbuf
    invoke draw_floor_ceeling, px, py, pa

 xloop:

    ; calculate (rdx, rdy) for this x

    ; x - (dwWidth/2)
    mov eax, x
    mov ecx, dwWidth
    shr ecx, 1
    sub eax, ecx
    mov tmp1, eax
    neg tmp1
    
    mov eax, dwWidth
    mov tmp2, eax

    finit
    fld cpa
    fld spa
    fild factor2047
    fild tmp1
    fild tmp2
    fld pRatio
    fmul st(1), st ; tmp2 * pRatio
    fincstp
    fdiv st(1), st ; rdx = (x-(dwWidth/2)) / (dwWidth * pRatio)
    fincstp
    fst sdx
    fmul st, st(3) ; cpa
    fsub st, st(2) ; spa
    fmul st, st(1)
    fistp rdx
    fld sdx
    fmul st, st(2) ; spa
    fadd st, st(3) ; cpa
    fmul st, st(1)
    fistp rdy

    ;print str$(rdx), ", "
    ;print str$(rdy), 13, 10

    ;invoke draw_floor_ceeling, x, px, py, rdx, rdy

    ;push edx
    ;push ebx    
    ;print str$(x), ": "
    ;pop ebx
    ;pop edx

    invoke cast_ray, px, py, rdx, rdy, addr rx, addr ry, addr hitType
   
    ; calculate distance

    mov eax, px
    mov ebx, rx
    shr eax, 4
    shr ebx, 4
    sub ebx, eax
    mov ecx, ebx
    mov eax, py
    mov ebx, ry
    shr eax, 4
    shr ebx, 4
    sub ebx, eax
    
    imul ecx, ecx
    imul ebx, ebx
    add ecx, ebx
    invoke IntSqrt, ecx
    mov raydist, eax

    ;print str$(raydist), 13, 10

    ;shl raydist, 4

    ; correct distance [raydist *= cos(atan(dx))]

    mov eax, 16
    mov tmp1, eax

    finit
    fld sdx
    fld1
    fpatan
    fcos
    fild tmp1
    fild raydist
    fmul st, st(2)
    fmul st, st(1)
    fistp raydist
    fwait

    mov eax, x
    shr eax, 1
    mov ebx, raydist
    mov dword ptr [dwZBuffer+eax*4], ebx

    ;print str$(raydist), 13, 10

    mov eax, raydist
    cmp eax, 0
    je skipWall

    ; draw wall slice

    invoke get_texture_address, rx, ry, hitType
    mov ebx, eax

    mov eax, x
    shr eax, 1
    mov dword ptr [dwTptrBuffer+eax*4], ebx

    skipWall:

    ; loop...

    sub x, 2
    cmp x, 0
    jge xloop

    ;
    ; render all walls here (dynamic or not)
    ;
    
    xor eax, eax
wallLoop:

    mov ebx, dword ptr [dwZBuffer+eax*4]
    mov raydist, ebx

    mov ebx, dword ptr [dwTptrBuffer+eax*4]

    push eax
    shl eax, 1
    mov ecx, eax
    invoke draw_wall_slice, ecx, raydist, ebx ; // x coordinate, distance too wall, wallblock id (from map grid)    
    pop eax

    inc eax
    mov ebx, dwWidth
    shr ebx, 1
    cmp eax, ebx
    jl wallLoop

    ;
    ; render all sprites
    ;

    invoke render_sprites

    ret

    ;mov eax, COS
    ;mov edx, dword ptr [eax]
    ;mov eax, SIN
    ;mov ebx, dword ptr [eax]

    ;invoke cast_ray, 4*256, 4*256, edx, ebx, addr x, addr y
    ;invoke get_map, x, y
    ;mov byte ptr [eax], 011h
    
render_scene endp

render_sprites proc

    LOCAL sprX[256] :DWORD
    LOCAL sprDist[256] :DWORD
    LOCAL sprNxt[256] :DWORD
    LOCAL hash[256] :DWORD
    LOCAL sprCount :DWORD
    LOCAL pa :DWORD, px :DWORD, py :DWORD
    LOCAL fpa :REAL4, cpa :REAL4, spa :REAL4
    LOCAL pdx :REAL4, pdy :REAL4
    LOCAL rdx :REAL4, rdy :REAL4
    LOCAL dist :REAL4, dwDist :DWORD
    LOCAL x :DWORD
    LOCAL tmp :DWORD
    LOCAL prev :DWORD, node :DWORD

    ; clear hash
    mov eax, -1
    mov ecx, 256
clearLoop:
    mov dword ptr [hash+ecx*4-4], eax
    dec ecx
    jnz clearLoop    

    ; pre-render

    mov eax, offset dwPosition
    mov ebx, dword ptr [eax]
    mov px, ebx
    add eax, 4
    mov ebx, dword ptr [eax]
    mov py, ebx
    add eax, 4
    mov ebx, dword ptr [eax]
    mov pa, ebx

    mov ecx, 2048
    sub pa, ecx

    finit
    fild pa
    fmul ang2rad16
    fst fpa
    fchs
    fcos
    fstp cpa
    fld fpa
    fchs
    fsin
    fstp spa

    mov ecx, 0
preLoop:

    cmp ecx, dwSpriteCount
    jge endPreLoop

    mov eax, ecx
    shl eax, 2

    finit
    fild px                ; load player x
    fild py                ; load player y
    fild dword ptr [sSprites+eax*4+4] ; sprite_x - player_x
    fsub st, st(2)
    fstp pdx
    fild dword ptr [sSprites+eax*4+8] ; sprite_y - player_y
    fsub st, st(1)
    fstp pdy

    fld spa                ; rotate sprite by player's angle
    fld cpa
    fld pdy
    fmul st, st(2) ; spa*pdy
    fld pdx
    fmul st, st(2) ; cpa*pdx
    fsub st, st(1) ; (cpa*pdx) - (spa*pdy)
    fstp rdx

    fld pdx
    fmul st, st(3) ; spa*pdx
    fld pdy
    fmul st, st(3) ; cpa*pdy
    fadd st, st(1) ; (spa*pdx) + (cpa*pdy)
    fist tmp
    fstp rdy
    fwait

    ; clip behind (rdy <= 0)
    cmp tmp, 0      
    jle skipSprite

    ; calculate distance too sprite from player

    ; sqrt(pdx*pdx + pdy*pdy)
    finit
    fld pdx
    fmul st, st
    fld pdy
    fmul st, st
    fadd st, st(1)
    fsqrt

    ; cos(atan(rdx / rdy)   
    fld rdx
    fld rdy
    fpatan
    fcos

    ; dist = sqrt(..) * cos(atan(..))
    fmul st, st(1)
    fist dwDist
    fstp dist

    fwait

    ; clip near (dwDist <= 256)
    cmp dwDist, 256
    jle skipSprite

    ; normalize rdy, rdy
    finit
    fld dist
    fld rdx
    fdiv st, st(1)
    fstp rdx
    fld rdy
    fdiv st, st(1)
    fstp rdy
    fwait

    mov eax, dwWidth
    shr eax, 1
    mov tmp, eax

    ; x = rdx*(dwWidth * pRatio)+(dwWidth/2)
    
    finit
    fild dwWidth
    fld pRatio
    fmul st, st(1) ; dwWidth*pRatio
    fld rdx
    fmul st, st(1) ; * rdx
    fchs
    fild tmp
    fadd st, st(1) ; + (dwWidth/2)
    fistp x
    fwait

    mov eax, x
    mov dword ptr [sprX+ecx*4], eax
    mov eax, dwDist
    mov dword ptr [sprDist+ecx*4], eax

    shr eax, 11 ; hash position
    mov ebx, dword ptr[hash+eax*4]

    mov edx, -1
    mov dword ptr[sprNxt+ecx*4], edx

    cmp ebx, -1
    jne doSort
    mov dword ptr[hash+eax*4], ecx

    jmp endSort
 doSort:
   
    mov edx, -1
    mov prev, edx

  sortLoop:

    mov edx, dwDist
    cmp edx, dword ptr[sprDist+ebx*4]
    jle skipSet

    mov dword ptr[sprNxt+ecx*4], ebx ; ebx no longer needed

    cmp prev, -1
    je srtElse
    mov ebx, prev
    mov dword ptr[sprNxt+ebx*4], ecx
    jmp srtEndIf
  srtElse:
    mov dword ptr[hash+eax*4], ecx
  srtEndIf:

    jmp endSort    

  skipSet:

    mov prev, ebx
    mov ebx, dword ptr[sprNxt+ebx*4]

    cmp ebx, -1
    jne sortLoop

    mov ebx, prev
    
    mov dword ptr[sprNxt+ebx*4], ecx

 endSort:    


skipSprite:
    inc ecx
    jmp preLoop

endPreLoop:


    ; render in depth order
    
    mov ecx, 255
renLoop:

    mov ebx, dword ptr [hash+ecx*4]
    
   lLoop:
    cmp ebx, -1
    je nextHash

    mov eax, dword ptr [sprDist+ebx*4]
    mov edx, dword ptr [sprX+ebx*4]

    push ecx
    push ebx
    invoke render_sprite, ebx, eax, edx
    pop ebx
    pop ecx

    mov ebx, dword ptr [sprNxt+ebx*4]
    jmp lLoop  
    
nextHash:
    dec ecx
    cmp ecx, 0
    jge renLoop

    xor eax, eax
    mov dwSpriteCount, eax
    
    ret

render_sprites endp

render_sprite proc sprID :DWORD, sprDist :DWORD, sprX :DWORD
    
    LOCAL x1 :DWORD, x2 :DWORD
    LOCAL y1 :DWORD, y2 :DWORD, _y2 :DWORD
    LOCAL sheight :DWORD, swidth :DWORD
    LOCAL z :DWORD
    LOCAL sprPtr :DWORD
    
    LOCAL _width :DWORD, widthBits :DWORD
    LOCAL _height :DWORD, heightBits :DWORD

    LOCAL xTInc :DWORD, yTInc :DWORD
    LOCAL xTex :DWORD, yTex :DWORD
    LOCAL xTexStart :DWORD, yTexStart :DWORD
    LOCAL texPos :DWORD, bfrPos :DWORD
    LOCAL yBfrInc :DWORD

    LOCAL x :DWORD, y :DWORD

    LOCAL alpha :DWORD
    LOCAL alphaBfr :DWORD

    LOCAL dbfr[192]:DWORD     ; for depth shading
    LOCAL dwDist :DWORD       ; ..

    mov eax, sprID
    shl eax, 2
    mov ebx, dword ptr sSprites[eax*4]
    mov sprPtr, ebx
    mov ebx, dword ptr sSprites[eax*4+12]
    mov z, ebx

    mov eax, sprPtr
    mov ebx, dword ptr [eax]
    mov _width, ebx
    mov ebx, dword ptr [eax+4]
    mov _height, ebx

    add sprPtr, 8

    ; load alpha setting
    mov eax, sprID
    mov eax, dword ptr sSpritesAlpha[eax*4]
    mov alpha, eax

    shl eax, 12
    add eax, offset ALUT
    mov alphaBfr, eax
    ;

    finit
    fld1
    fild _width
    fyl2x
    fistp widthBits
    fld1
    fild _height
    fyl2x
    fistp heightBits
    fwait

    ; sheight = (2048*dwHeight*spriteHeight/textureSize)/dist
    mov eax, dwHeight
    shl eax, 11
    mov ecx, heightBits
    shl eax, cl
    mov ecx, dwSpriteSizeBits
    shr eax, cl
    mov ebx, sprDist
    cdq
    idiv ebx
    mov sheight, eax

    ; swidth = (2048*dwHeight*spriteWidth/textureSize)/dist
    mov eax, dwHeight
    shl eax, 11
    mov ecx, widthBits
    shl eax, cl
    mov ecx, dwSpriteSizeBits
    shr eax, cl
    mov ebx, sprDist
    cdq
    idiv ebx
    mov swidth, eax

    ; x1 = sprX - (swidth/2)
    ; x2 = sprX + (swidth/2)
    mov eax, swidth
    shr eax, 1
    mov ebx, sprX
    mov x1, ebx
    mov x2, ebx
    sub x1, eax
    add x2, eax

    ; y2 = (floor)
    mov eax, dwHeight
    shl eax, 11
    xor edx, edx
    mov ebx, sprDist
    idiv ebx
    mov ebx, dwHeight
    shr ebx, 1
    shr eax, 1
    add ebx, eax
    mov y2, ebx

    ; account for Z
    mov eax, z
    imul dwHeight
    shl eax, 4 ; 11-7
    mov ebx, sprDist
    cdq
    idiv ebx
    sub y2, eax

    ; y1 = y2 - sheight
    mov eax, y2
    sub eax, sheight
    mov y1, eax

    ; full-image clipping

    cmp x2, 0
    jl skipSprite

    mov ebx, dwWidth
    cmp x1, ebx
    jge skipSprite

    cmp y2, 0
    jl skipSprite

    mov ebx, dwHeight
    cmp y1, ebx
    jge skipSprite

    ; find texture increments

    mov eax, _width
    shl eax, 11
    cdq
    idiv swidth
    mov xTInc, eax
    dec xTInc

    mov eax, _height
    shl eax, 11
    cdq
    idiv sheight
    mov yTInc, eax
    dec yTInc

    ; calculate initial texture positions & partial clipping

    xor eax, eax
    mov yTexStart, eax
    mov xTexStart, eax

    cmp y1, 0    
    jge noClipY1
        mov eax, yTInc
        mov ebx, y1
        neg ebx
        imul eax, ebx
        mov yTexStart, eax
        xor eax, eax
        mov y1, eax
    noClipY1:

    cmp x1, 0
    jge noClipX1
        mov eax, xTInc
        mov ebx, x1
        neg ebx
        imul eax, ebx
        mov xTexStart, eax
        xor eax, eax
        mov x1, eax
    noClipX1:

    mov ebx, dwHeight
    cmp y2, ebx
    jl noClipY2
        dec ebx
        mov y2, ebx
    noClipY2:

    ; (2 pixels at a time, skip every second x)
    mov ebx, dwWidth
    dec ebx
    cmp x2, ebx
    jl noClipX2
        dec ebx
        mov x2, ebx
    noClipX2:
    
    add widthBits, 2 ; shl x, widthBits ; x *= 2^widthBits * 4

    ; copy temporary y2
    mov eax, y2
    mov _y2, eax

    ; set initial buffer position
    mov ebx, dwWidth
    shl ebx, 2
    mov yBfrInc, ebx
    mov eax, y1
    imul eax, ebx
    mov ebx, x1
    shl ebx, 2
    add eax, ebx
    add eax, BUF
    mov bfrPos, eax

    ; setup dwDist, and depthshading lut

    mov eax, sprDist
    mov dwDist, eax
    mov ecx, dwDepthFactor
    add ecx, 2
    shr dwDist, cl
    cmp dwDist, 31
    jle skipDistClip1
    mov eax, 31
    mov dwDist, eax
   skipDistClip1:
    cmp dwDist, 1
    jge skipDistClip2
    mov eax, 1
    mov dwDist, eax
   skipDistClip2:
   
    shl dwDist, 6
    mov eax, dwDist
    add dwDist, eax
    add dwDist, eax
    shl dwDist, 2
    add dwDist, offset DLUT

    mov edx, dwDist
    mov ecx, 192
    lea eax, dbfr

  sLoadLoop:
    mov ebx, dword ptr[edx]
    mov dword ptr[eax], ebx
    add eax, 4
    add edx, 4
    dec ecx
    jnz sLoadLoop

    ; render

    mov ebx, xTexStart
    mov xTex, ebx

    mov ebx, x1
    mov x, ebx
    and x, 0FFFFFFFEh ; unset odd bit
 xLoop:

    mov eax, _y2
    mov y2, eax

    ; clip scanline if behind wall
    mov eax, x
    shr eax, 1
    mov ebx, dwZBuffer[eax*4]
    cmp ebx, sprDist
    jg notBehind
    mov ebx, dwHeightBuffer[eax*4]
    cmp ebx, y1
    jle nextX
    inc ebx
    mov y2, ebx
  notBehind:

    ; prepare for scanline

    mov eax, xTex
    shr eax, 11
    shl eax, 2
    mov texPos, eax
    mov eax, sprPtr
    add texPos, eax

    mov ebx, y1
    mov y, ebx
    mov ebx, yTexStart
    mov yTex, ebx 

    push bfrPos
    
  yLoop:

    ; get pixel

    shr ebx, 11     ; find scanline on sprite
    mov ecx, widthBits
    shl ebx, cl

    add ebx, texPos

    mov eax, dword ptr [ebx]

    cmp eax, 0ff00ffh ; transparent pixel
    je skipPixel

    ; --- send through depthshading lut
    ; eax = inpixel, edx = outpixel

    mov ebx, eax
    
    shr eax, 16
    and eax, 0FFh
    shr eax, 2   
    mov edx, dbfr[4*eax]

    mov eax, ebx
    shr eax, 8
    and eax, 0FFh
    shr eax, 2
    mov eax, dbfr[256+4*eax]
    or edx, eax

    and ebx, 0FFh
    shr ebx, 2
    mov eax, dbfr[512+4*ebx]
    or edx, eax
 
    ; ---

    cmp alpha, 15
    jge noAlpha

    mov ebx, bfrPos
    mov ebx, dword ptr [ebx]

    ; ebx = background pixel
    xor ecx, ecx
    mov eax, ebx
    and eax, 0FFh
    shr eax, 3
    shl eax, 5
    add ecx, eax
    mov eax, edx
    and eax, 0FFh
    shr eax, 3
    add ecx, eax
    shl ecx, 2
    add ecx, alphaBfr
    mov eax, dword ptr[ecx]
    and edx, 0FFFF00h
    or edx, eax

    xor ecx, ecx
    mov eax, ebx
    shr eax, 8
    and eax, 0FFh
    shr eax, 3
    shl eax, 5
    add ecx, eax
    mov eax, edx
    shr eax, 8
    and eax, 0FFh
    shr eax, 3
    add ecx, eax
    shl ecx, 2
    add ecx, alphaBfr
    mov eax, dword ptr[ecx]
    shl eax, 8
    and edx, 0FF00FFh
    or edx, eax

    xor ecx, ecx
    mov eax, ebx
    shr eax, 16
    and eax, 0FFh
    shr eax, 3
    shl eax, 5
    add ecx, eax
    mov eax, edx
    shr eax, 16
    and eax, 0FFh
    shr eax, 3
    add ecx, eax
    shl ecx, 2
    add ecx, alphaBfr
    mov eax, dword ptr[ecx]
    shl eax, 16
    and edx, 000FFFFh
    or edx, eax
    
    jmp writePixel
  noAlpha:

    ; write pixels
 writePixel:
    mov eax, bfrPos

    mov dword ptr [eax], edx
    mov dword ptr [eax+4], edx

 skipPixel:

    ; increment positions, loop

    mov ebx, yTex   ; update y texture coordinate
    add ebx, yTInc
    mov yTex, ebx

    mov eax, yBfrInc ; update buffer position
    add bfrPos, eax
    
    inc y
    mov eax, y2
    cmp y, eax
    jle yLoop 
    
    pop bfrPos

  nextX:

    mov ebx, xTInc
    add ebx, ebx
    add xTex, ebx
    add bfrPos, 8

    add x, 2
    mov ebx, x2
    cmp x, ebx
    jl xLoop

skipSprite:

    ret
    
render_sprite endp

init_sprites proc sprSizeBits :DWORD

    mov eax, sprSizeBits
    mov dwSpriteSizeBits, eax
    ret

init_sprites endp

bb_sprite proc sprPtr :DWORD, x :DWORD, y :DWORD, z :DWORD

    mov eax, dwSpriteCount

    cmp eax, 256
    je noSprite

    shl eax, 2
    
    mov ebx, sprPtr
    mov dword ptr sSprites[eax*4], ebx
    mov ebx, x
    mov dword ptr sSprites[eax*4+4], ebx
    mov ebx, y
    mov dword ptr sSprites[eax*4+8], ebx
    mov ebx, z
    mov dword ptr sSprites[eax*4+12], ebx

    mov ebx, 15
    mov dword ptr sSpritesAlpha[eax], ebx
    
    inc dwSpriteCount

    xor eax, eax
    ret

noSprite:
    mov eax, 1
    ret

bb_sprite endp

bb_sprite_alpha proc sprPtr :DWORD, x :DWORD, y :DWORD, z :DWORD, alpha :DWORD

    mov eax, dwSpriteCount

    cmp eax, 256
    je noSprite

    shl eax, 2
    
    mov ebx, sprPtr
    mov dword ptr sSprites[eax*4], ebx
    mov ebx, x
    mov dword ptr sSprites[eax*4+4], ebx
    mov ebx, y
    mov dword ptr sSprites[eax*4+8], ebx
    mov ebx, z
    mov dword ptr sSprites[eax*4+12], ebx

    mov ebx, alpha
    mov dword ptr sSpritesAlpha[eax], ebx
    
    inc dwSpriteCount

    xor eax, eax
    ret

noSprite:
    mov eax, 1
    ret

bb_sprite_alpha endp

gen_depth_lut proc

    LOCAL d :DWORD, r :DWORD, g :DWORD, b :DWORD ; current depth, r, g, b values
    LOCAL off :DWORD                             ; current offset to DLUT 
    LOCAL tr :DWORD, tg :DWORD, tb :DWORD        ; to colour
    LOCAL rr :DWORD, rg :DWORD, rb :DWORD        ; result colour
    LOCAL f256 :DWORD, f16                       ; 256, 16
    LOCAL dist :REAL4
    LOCAL f :REAL4
    LOCAL tmp :DWORD
    LOCAL f8 :TBYTE, tmp1 :TBYTE, tmp2 :TBYTE, tmp3 :TBYTE

    memalign DLUT, 768

    mov eax, 16
    mov f16, 16

    mov eax, 20+10
    mov f256, eax

    ; split dwShadeTo into tr, tg, tb

    mov eax, dwShadeTo
    mov ecx, eax
    and eax, 0ffh
    mov tb, eax
    mov eax, ecx
    shr eax, 8
    and eax, 0ffh
    mov tg, eax
    mov eax, ecx
    shr eax, 16
    and ecx, 0ffh
    mov tr, eax

    mov eax, 1
    mov d, eax

 dLoop:

    ;finit
    ;fild f256
    ;fild d
    ;fdiv st, st(1)
    ;fsqrt
    ;fst f

    add d, 10

    invoke FpuDiv, addr d, addr f256, addr tmp1, SRC1_DMEM or SRC2_DMEM or DEST_MEM
    invoke FpuMul, addr tmp1, addr tLevel, addr tmp2, SRC1_REAL or SRC2_REAL or DEST_MEM
    invoke FpuEexpX, addr tmp2, addr tmp1, SRC1_REAL or DEST_MEM

    sub d, 10

    finit
    fld tmp1
    fld1
    fsub st, st(1)
    
    fstp f
      
    ;
    ; Red
    ;

    xor eax, eax
    mov r, eax

    ; clr = before_clr + (to_clr - before_clr) * f

   rLoop:

    finit
    fld f
    fild r
    fild tr
    fsub st, st(1)
    fmul st, st(2)
    fadd st, st(1)
    fmul rRedFactor
    fist rr
    fwait


    cmp rr, 255
    jle skipClipR
    mov eax, 255
    mov rr, eax
   skipClipR:

    mov eax, d      ; d*64*3
    shl eax, 6
    mov ebx, eax
    add eax, ebx
    add eax, ebx
    
    mov ebx, r      ; r/4 (0-63)
    shr ebx, 2
    
    add eax, ebx
    mov ebx, rr
    shl ebx, 16
    and ebx, 0FF0000h
    shl eax, 2
    add eax, offset DLUT
    mov dword ptr [eax], ebx

    add r, 4
    cmp r, 256
    jl rLoop

    ;
    ; Green
    ;

    xor eax, eax
    mov g, eax

   gLoop:

    finit
    fld f
    fild g
    fild tg
    fsub st, st(1)
    fmul st, st(2)
    fadd st, st(1)
    fmul rGreenFactor
    fist rg
    fwait

    cmp rg, 255
    jle skipClipG
    mov eax, 255
    mov rg, eax
   skipClipG:

    mov eax, d      ; d*64*3
    shl eax, 6
    mov ebx, eax
    add eax, ebx
    add eax, ebx
    
    mov ebx, g      ; g/4 (0-63)
    shr ebx, 2
    
    add eax, ebx
    mov ebx, rg
    shl ebx, 8
    and ebx, 0FF00h
    add eax, 64
    shl eax, 2
    add eax, offset DLUT
    mov dword ptr [eax], ebx
        
    add g, 4
    cmp g, 256
    jl gLoop

    ;
    ; Blue
    ;

    xor eax, eax
    mov b, eax

   bLoop:

    finit
    fld f
    fild b
    fild tb
    fsub st, st(1)
    fmul st, st(2)
    fadd st, st(1)
    fmul rBlueFactor
    fist rb
    fwait

    cmp rb, 255
    jle skipClipB
    mov eax, 255
    mov rb, eax
   skipClipB:


    mov eax, d      ; d*64*3
    shl eax, 6
    mov ebx, eax
    add eax, ebx
    add eax, ebx
    
    mov ebx, b      ; b/4 (0-63)
    shr ebx, 2
    
    add eax, ebx
    mov ebx, rb
    and ebx, 0FFh
    add eax, 128
    shl eax, 2
    add eax, offset DLUT
    mov dword ptr [eax], ebx
    
    add b, 4
    cmp b, 256
    jl bLoop
    
    ;
    
    inc d
    cmp d, 32
    jl dLoop

    ret


gen_depth_lut endp

gen_alpha_lut proc

    LOCAL a :DWORD, ao :DWORD
    LOCAL fg :DWORD, bg :DWORD
    LOCAL fbScale :DWORD
    LOCAL aScale :DWORD
    LOCAL tmp :REAL4
    LOCAL oShade :DWORD

    mov eax, 8
    mov fbScale, eax
    
    mov eax, 15
    mov aScale, eax

    ;

    xor eax, eax
    mov a, eax

alphaLoop:

    xor eax, eax
    mov fg, eax

 fgLoop:

    xor eax, eax
    mov bg, eax

  bgLoop:

    finit
    fild fg
    fild fbScale
    fmul st, st(1)
    fild aScale
    fild a
    fdiv st, st(1)
    fmul st, st(2)
    fst tmp
    fwait

    finit
    fld tmp
    fild bg
    fild fbScale
    fmul st, st(1)
    fild aScale
    fild a
    fdiv st, st(1)
    fld1
    fsub st, st(1) ; 1.0-alpha
    fmul st, st(3)
    
    fadd st, st(5)
    fist oShade
    fwait

    cmp a, 7

    mov eax, a
    shl eax, 10
    mov ebx, bg
    shl ebx, 5
    add ebx, fg
    add eax, ebx
    mov edx, oShade
    mov dword ptr ALUT[eax*4], edx

    inc bg
    cmp bg, 31
    jle bgLoop ; next

    inc fg
    cmp fg, 31
    jle fgLoop ; next
  
    inc a
    cmp a, 15
    jle alphaLoop ; next

    ret

gen_alpha_lut endp

draw_floor_ceeling proc px :DWORD, py :DWORD, pa :DWORD

    LOCAL tmp :DWORD
    LOCAL vx :REAL4, vy :REAL4
    LOCAL v1x :REAL4, v1y :REAL4
    LOCAL y:DWORD, yend :DWORD, x:DWORD
    LOCAL dist :SDWORD
    LOCAL dwDist :DWORD
    LOCAL rx :SDWORD, ry :SDWORD
    LOCAL startx :SDWORD, starty :SDWORD
    LOCAL sWidth :DWORD
    LOCAL bfrPos :DWORD, bfrYInc :SDWORD
    LOCAL tmp1 :DWORD, tmp2 :DWORD, tmp3 :DWORD
    LOCAL fpa :REAL4, factor2047 :DWORD
    LOCAL cpa :REAL4, spa :REAL4
    LOCAL sdx :REAL4
    LOCAL sx :DWORD
    LOCAL dwMapHeight :DWORD, dwMapWidth :DWORD
    LOCAL textureSize :DWORD, textureSizeBits :DWORD, textureSizeBits2 :DWORD
    LOCAL fpx :REAL4, fpy :REAL4
    LOCAL dbfr[192] :DWORD
    LOCAL lastDist :DWORD
   
    mov edx, dwTextureSize
    mov textureSize, edx

    mov edx, dwTextureSizeBits
    mov textureSizeBits, edx

    mov textureSizeBits2, edx
    shl textureSizeBits2, 1
    add textureSizeBits2, 2

    xor edx, edx
    mov dl, dbMapHeight
    mov dwMapHeight, edx

    xor edx, edx
    mov dl, dbMapWidth
    mov dwMapWidth, edx

    mov eax, 2048
    mov factor2047, eax

    finit
    fild factor2047 ; convert p(x,y) to scale of 1 (from 2048)
    fild px
    fdiv st, st(1)
    fstp fpx
    fild py
    fdiv st, st(1)
    fstp fpy
    fwait

    finit
    fild pa
    fmul ang2rad16
    fst fpa
    fcos
    fstp cpa
    fld fpa
    fsin
    fstp spa

    mov eax, dwWidth
    mov sWidth, eax

    ;
    ; precalculations for floor and ceeling
    ;

    ; calculate vector: v(x, y) for x = 0
    xor eax, eax ; x = 0
    mov ecx, dwWidth
    shr ecx, 1
    sub eax, ecx
    mov tmp1, eax
    neg tmp1
    mov eax, dwWidth
    mov tmp2, eax
    finit
    fld cpa
    fld spa
    fild tmp1
    fild tmp2
    fld pRatio
    fmul st(1), st
    fincstp
    fdiv st(1), st
    fincstp
    fst sdx
    fmul st, st(2) ; cpa
    fsub st, st(1) ; spa
    fstp vx
    fld sdx
    fmul st, st(1) ; spa
    fadd st, st(2) ; cpa
    fstp vy
    fwait
         
    ; calculate vector: v1(x, y) for x = dwWidth - 1
    mov eax, sWidth
    mov ecx, dwWidth
    shr ecx, 1
    sub eax, ecx
    mov tmp1, eax
    neg tmp1
    mov eax, dwWidth
    mov tmp2, eax
    finit
    fld cpa
    fld spa
    fild tmp1
    fild tmp2
    fld pRatio
    fmul st(1), st
    fincstp
    fdiv st(1), st
    fincstp
    fst sdx
    fmul st, st(2) ; cpa
    fsub st, st(1) ; spa
    fstp v1x
    fld sdx
    fmul st, st(1) ; spa
    fadd st, st(2) ; cpa
    fstp v1y
    fwait


    ; render floor

    mov eax, dwHeight
    dec eax
    mov y, eax

    mov eax, dwHeight
    shr eax, 1
    mov yend, eax

    mov eax, y
    mov ebx, dwWidth
    shl ebx, 2
    mov bfrYInc, ebx
    imul eax, ebx
    add eax, BUF
    mov bfrPos, eax

 floorLoopY:

    ; dist = (dwHeight) / (2y - dwHeight)

    mov ebx, y
    shl ebx, 1
    sub ebx, dwHeight

    cmp ebx, 0
    jle skipFloorY

    mov tmp1, ebx
    finit
    fild tmp1
    fild dwHeight
    fdiv st(0), st(1)
    fstp dist
    ;fwait

    finit
    fld fpx  ;3
    fld fpy  ;2
    fld dist ;1

    ; start(x,y) = p(x,y) + v(x,y)*dist  (screen x = 0)

    fld vx   ;0
    fmul st, st(1)
    fadd st, st(3)
    fstp startx
    fld vy   ;0
    fmul st, st(1)
    fadd st, st(2)
    fstp starty 
    ; r(x,y) = p(x,y) + v1(x,y)*dist  (screen x = 15)

    fld v1x   ;0
    fmul st, st(1)
    fadd st, st(3)
    fstp rx
    fld v1y   ;0
    fmul st, st(1)
    fadd st, st(2)
    fstp ry

    ;fwait
    finit 

    ; calculate incerment r(x,y) = (x_equals_width_pos - x_equals_zero_pos) / screen_width

    fild factor2047
    fmul st, st
    
    fild sWidth
    fld startx
    fld rx
    fsub st, st(1)
    fdiv st, st(2)
    fmul st, st(3)
    fistp rx
    fmul st, st(2)
    fistp startx    

    fld starty
    fld ry
    fsub st, st(1)
    fdiv st, st(2)
    fmul st, st(3)
    fistp ry
    fmul st, st(2)
    fistp starty
  
    fwait

    sal rx, 1
    sal ry, 1

    ; -- setup dwdist for depthshading
    fild factor2047
    fld dist
    fmul st, st(1)
    fistp dwDist

    mov ecx, dwDepthFactor
    add ecx, 2
    shr dwDist, cl
    cmp dwDist, 31
    jle skipFloorDistClip1
    mov eax, 31
    mov dwDist, eax
   skipFloorDistClip1:
    cmp dwDist, 1
    jge skipFloorDistClip2
    mov eax, 1
    mov dwDist, eax
   skipFloorDistClip2:

    ; load line of DLUT into stack

    mov eax, lastDist
    cmp dwDist, eax
    je skipLoadF

    mov eax, dwDist
    mov lastDist, eax

    shl dwDist, 6
    mov eax, dwDist
    add dwDist, eax
    add dwDist, eax
    shl dwDist, 2
    add dwDist, offset DLUT

    mov edx, dwDist
    mov ecx, 192
    lea eax, dbfr

  fLoadLoop:
    mov ebx, dword ptr[edx]
    mov dword ptr[eax], ebx
    add eax, 4
    add edx, 4
    loop fLoadLoop

  skipLoadF:

    ; --


    ; render row   
    xor eax, eax
    mov x, eax ; x = 0

  floorLoopX:

    mov eax, startx
    mov ebx, starty
    shr eax, 22
    shr ebx, 22
    
    cmp eax, dwMapWidth
    jge skipFloorX
    cmp ebx, dwMapHeight
    jge skipFloorX

    ;mov ecx, MAPROW[ebx*4]
    ;add ecx, eax

    ;

    mov edx, 1

    ; get pixel from texture

    mov eax, dwTextures
    mov ecx, textureSizeBits2
    shl edx, cl
    add eax, edx    

    mov ecx, 22
    sub ecx, textureSizeBits

    mov ebx, startx
    mov edx, starty

    shr ebx, cl
    shr edx, cl

    mov ecx, textureSize
    dec ecx

    and ebx, ecx
    and edx, ecx

    mov ecx, textureSizeBits
    shl ebx, cl
    add edx, ebx

    mov eax, dword ptr [eax+edx*4] ; pixel colour now in eax

    ; --- send through depthshading lut
    ; eax = inpixel, edx = outpixel
    
    mov ebx, eax

    shr eax, 16
    and eax, 0FFh
    shr eax, 2   
    mov edx, dbfr[4*eax] 

    mov eax, ebx
    shr eax, 8
    and eax, 0FFh
    shr eax, 2
    mov ecx, dbfr[256+4*eax]
    or edx, ecx    

    and ebx, 0FFh
    shr ebx, 2
    mov ecx, dbfr[512+4*ebx]
    or edx, ecx

    ; ---

    ; write pixel (colour = edx)
    mov eax, bfrPos
    mov dword ptr [eax], edx
    mov dword ptr [eax+4], edx

  skipFloorX:

    ; increment map coordinates
    mov eax, rx
    add startx, eax
    mov eax, ry
    add starty, eax
    
    ; loop

    add bfrPos, 8
    add x, 2
    mov ebx, x
    cmp ebx, dwWidth
    jl floorLoopX

    mov ebx, bfrYInc
    sub bfrPos, ebx

  skipFloorY:
    mov ebx, bfrYInc
    sub bfrPos, ebx

    dec y
    mov eax, yend
    mov ebx, y
    cmp ebx, eax
    jge floorLoopY

    ;
    ; render ceeling
    ;

    cmp dwNoCeeling, 1
    je noCeeling

    mov eax, dwHeight
    shr eax, 1
    mov y, eax

    xor eax, eax
    mov yend, eax

    mov eax, y
    mov ebx, dwWidth
    shl ebx, 2
    mov bfrYInc, ebx
    imul eax, ebx
    add eax, BUF
    mov bfrPos, eax

 ceelingLoopY:

    ; dist = (dwHeight) / (2y + dwHeight)

    mov ebx, y
    shl ebx, 1
    sub ebx, dwHeight

    cmp ebx, 0
    jge skipCeelingY

    mov tmp1, ebx
    finit
    fild tmp1
    fild dwHeight
    fchs
    fdiv st(0), st(1)
    fstp dist
    fwait

    finit
    fld fpx
    fld fpy    
    fld dist ;1

    ; start(x,y) = p(x,y) + v(x,y)*dist  (screen x = 0)

    fld vx   ;0
    fmul st, st(1)
    fadd st, st(3)
    fstp startx
    fld vy   ;0
    fmul st, st(1)
    fadd st, st(2)
    fstp starty 

    ; r(x,y) = p(x,y) + v1(x,y)*dist  (screen x = 15)

    fld v1x   ;0
    fmul st, st(1)
    fadd st, st(3)
    fstp rx
    fld v1y   ;0
    fmul st, st(1)
    fadd st, st(2)
    fstp ry

    fwait
    finit 

    ; calculate incerment r(x,y) = (x_equals_width_pos - x_equals_zero_pos) / screen_width

    fild factor2047
    fmul st, st
    
    fild sWidth
    fld startx
    fld rx
    fsub st, st(1)
    fdiv st, st(2)
    fmul st, st(3)
    fistp rx
    fmul st, st(2)
    fistp startx    

    fld starty
    fld ry
    fsub st, st(1)
    fdiv st, st(2)
    fmul st, st(3)
    fistp ry
    fmul st, st(2)
    fistp starty
   
    fwait

    sal rx, 1
    sal ry, 1

    ; -- setup dwdist for depthshading
    fild factor2047
    fld dist
    fmul st, st(1)
    fistp dwDist

    mov ecx, dwDepthFactor
    add ecx, 2
    shr dwDist, cl
    cmp dwDist, 31
    jle skipCeelingDistClip1
    mov eax, 31
    mov dwDist, eax
   skipCeelingDistClip1:
    cmp dwDist, 1
    jge skipCeelingDistClip2
    mov eax, 1
    mov dwDist, eax
   skipCeelingDistClip2:

    mov eax, lastDist
    cmp dwDist, eax
    je skipLoadC

    mov eax, dwDist
    mov lastDist, eax

    shl dwDist, 6
    mov eax, dwDist
    add dwDist, eax
    add dwDist, eax
    shl dwDist, 2
    add dwDist, offset DLUT

    mov eax, lastDist
    cmp dwDist, eax
    je skipLoadC

    mov edx, dwDist
    mov ecx, 192
    lea eax, dbfr

  cLoadLoop:
    mov ebx, dword ptr[edx]
    mov dword ptr[eax], ebx
    add eax, 4
    add edx, 4
    loop cLoadLoop

  skipLoadC:
   
    ; --

    ; render row
    xor eax, eax
    mov x, eax ; x = 0

  ceelingLoopX:

    mov eax, startx
    mov ebx, starty
    shr eax, 22
    shr ebx, 22
    
    cmp eax, dwMapWidth
    jge skipCeelingX
    cmp ebx, dwMapHeight
    jge skipCeelingX

    ;mov ecx, MAPROW[ebx*4]
    ;add ecx, eax

    ;

    mov edx, 0

    ; get pixel from texture

    mov eax, dwTextures
    mov ecx, textureSizeBits2
    shl edx, cl
    add eax, edx    

    mov ecx, 22
    sub ecx, textureSizeBits

    mov ebx, startx
    mov edx, starty

    shr ebx, cl
    shr edx, cl

    mov ecx, textureSize
    dec ecx

    and ebx, ecx
    and edx, ecx

    mov ecx, textureSizeBits
    shl ebx, cl
    add edx, ebx

    mov eax, dword ptr [eax+edx*4] ; pixel colour now in eax
    
    ; --- send through depthshading lut
    ; eax = inpixel, edx = outpixel
    
    mov ebx, eax

    shr eax, 16
    and eax, 0FFh
    shr eax, 2   
    mov edx, dbfr[4*eax] 

    mov eax, ebx
    shr eax, 8
    and eax, 0FFh
    shr eax, 2
    mov ecx, dbfr[256+4*eax]
    or edx, ecx    

    and ebx, 0FFh
    shr ebx, 2
    mov ecx, dbfr[512+4*ebx]
    or edx, ecx
                    
    ; ---

    ; write pixel (colour = edx)
    mov eax, bfrPos
    mov dword ptr [eax], edx
    mov dword ptr [eax+4], edx

  skipCeelingX:

    ; increment map coordinates
    mov eax, rx
    add startx, eax
    mov eax, ry
    add starty, eax
    
    ; loop

    add bfrPos, 8
    add x, 2
    mov ebx, x
    cmp ebx, dwWidth
    jl ceelingLoopX

    mov ebx, bfrYInc
    sub bfrPos, ebx

  skipCeelingY:
    mov ebx, bfrYInc
    sub bfrPos, ebx

    dec y
    mov ebx, y
    cmp ebx, yend
    jge ceelingLoopY

noCeeling:

    ret

draw_floor_ceeling endp

draw_wall_slice proc x :DWORD, dist :DWORD, texPtr :DWORD

    LOCAL height :DWORD
    LOCAL starty :SDWORD, endy :SDWORD
    LOCAL cstarty :DWORD, cendy :DWORD
    LOCAL clr :DWORD
    LOCAL bfrinc :DWORD
    LOCAL texInc :DWORD
    LOCAL texPos :DWORD
    LOCAL dwDist :DWORD
    LOCAL tmp :DWORD, lastPos :DWORD, lastPixel :DWORD
    LOCAL dbfr[192]:DWORD

    ; calculate wall height
    mov eax, dwHeight
    shl eax, 11
    xor edx, edx
    mov ebx, dist
    idiv ebx
    mov height, eax

    cmp height, 0
    jne noret
    ret
   noret:

    ; setup dwDist for depthshading
    mov eax, dist
    mov dwDist, eax
    mov ecx, dwDepthFactor
    add ecx, 2
    shr dwDist, cl
    cmp dwDist, 31
    jle skipDistClip1
    mov eax, 31
    mov dwDist, eax
   skipDistClip1:
    cmp dwDist, 1
    jge skipDistClip2
    mov eax, 1
    mov dwDist, eax
   skipDistClip2:
   
    shl dwDist, 6
    mov eax, dwDist
    add dwDist, eax
    add dwDist, eax
    shl dwDist, 2
    add dwDist, offset DLUT

    mov edx, dwDist
    mov ecx, 192
    lea eax, dbfr

  wLoadLoop:
    mov ebx, dword ptr[edx]
    mov dword ptr[eax], ebx
    add eax, 4
    add edx, 4
    dec ecx
    jnz wLoadLoop    
  
    ; calculate y start/end positions
    mov eax, height
    shr eax, 1
    mov ebx, dwHeight
    shr ebx, 1
    sub ebx, eax
    mov starty, ebx
    add ebx, height
    mov endy, ebx

    ; calculate clipped positions
    mov ebx, starty
    cmp ebx, 0
    jge skipClipStart
    xor ebx, ebx
skipClipStart:
    mov cstarty, ebx

    mov ebx, endy
    mov ecx, dwHeight
    cmp ebx, ecx
    jl skipClipEnd
    dec ecx
    mov ebx, ecx
skipClipEnd:
    mov cendy, ebx

    ; get texture step
    mov eax, dwTextureSize
    shl eax, 18
    xor edx, edx
    mov ebx, height
    div ebx
    mov texInc, eax
    ; texInc = (2^11*dwTextureSize) / height

    mov ebx, cstarty
    .IF starty < ebx
        mov eax, cstarty
        mov ebx, starty
        sub eax, ebx
        mov ebx, texInc
        imul ebx
        mov texPos, eax
    .ELSE
        xor ebx, ebx
        mov texPos, ebx
    .ENDIF

    mov eax, 1000000
    mov lastPos, eax

    ; render

    mov eax, dwWidth
    shl eax, 2
    mov bfrinc, eax

    mov ecx, cstarty

    mov eax, cstarty
    mov ebx, bfrinc
    imul eax, ebx

    mov ebx, x
    shl ebx, 2
    add ebx, BUF
    add ebx, eax

    ; ecx y counter
    ; ebx buffer offset
    ; eax clr

wallLoop:

    ; get pixel
    
    mov eax, texPos
    shr eax, 18

    mov edx, lastPixel
    cmp eax, lastPos
    je dupPixel    

    mov lastPos, eax

    shl eax, 2
    add eax, texPtr
    mov eax, dword ptr [eax]

    ; set pixel

    ; --- send through depthshading lut
    ; eax = inpixel, edx = outpixel

    push ebx
    
    mov ebx, eax

    shr eax, 16
    and eax, 0FFh
    shr eax, 2   
    mov edx, dbfr[4*eax] 

    mov eax, ebx
    shr eax, 8
    and eax, 0FFh
    shr eax, 2
    mov eax, dbfr[256+4*eax]
    or edx, eax

    and ebx, 0FFh
    shr ebx, 2
    mov eax, dbfr[512+4*ebx]
    or edx, eax
                
    pop ebx

    ; ---

  dupPixel:

    mov dword ptr [ebx], edx
    mov dword ptr [ebx+4], edx
    mov lastPixel, edx

    mov edx, texInc
    add texPos, edx

    inc ecx
    add ebx, bfrinc

    cmp ecx, cendy
    
    jle wallLoop

    mov eax, x
    shr eax, 1

    mov ebx, cstarty
    mov dword ptr dwHeightBuffer[eax*4], ebx
    
    ret

draw_wall_slice endp

cast_ray proc x :DWORD, y :DWORD, vx :DWORD, vy :DWORD,  rx :DWORD, ry :DWORD, hitType :DWORD

    LOCAL sgnx :SDWORD, sgny :SDWORD
    LOCAL uvx :DWORD, uvy :DWORD
    LOCAL mx :DWORD, my :DWORD      ; slope
    LOCAL rax :DWORD, ray :DWORD    ; remainder y
    LOCAL crax :DWORD, cray :DWORD  ; current remainders
    LOCAL tx1 :DWORD, ty1 :DWORD
    LOCAL tx2 :DWORD, ty2 :DWORD
    LOCAL oob1 :DWORD, oob2 :DWORD
    LOCAL overflag :DWORD
    LOCAL stmp :SDWORD
    LOCAL dwMapHeight :DWORD, dwMapWidth :DWORD
    LOCAL distA :DWORD, distB :DWORD

    xor edx, edx
    mov dl, dbMapHeight
    mov dwMapHeight, edx
    mov dl, dbMapWidth
    mov dwMapWidth, edx

    xor eax, eax
    mov overflag, eax

    ;

    mov eax, vx
    mov ebx, vy

    mov edx, 1
    mov sgnx, edx
    cmp eax, 0
    jge dxpos
    neg edx
    mov sgnx, edx
    neg eax
 dxpos:
    mov uvx, eax

    mov edx, 1
    mov sgny, edx
    cmp ebx, 0
    jge dypos
    neg edx
    mov sgny, edx
    neg ebx
 dypos:
    mov uvy, ebx

    ; calculate ray-x (1)
    
    mov ebx, uvx
    cmp ebx, 0
    je skipRayx

    mov eax, uvy
    shl eax, 11
    xor edx, edx
    mov ebx, uvx
    div ebx
    add eax, 2
    mov mx, eax
    mov rax, edx
    xor edx, edx
    mov crax, edx

    mov ebx, x
    shr ebx, 11
    shl ebx, 11
    
    cmp sgnx, 0
    jge xpositive

 xnegative:
    mov ecx, x
    sub ecx, ebx
    mul ecx
    shr eax, 11
    sub ebx, 1 ; make sure we're in the right cell
    mov tx1, ebx ; set tx1

    mov edx, sgny
    cmp edx, 0
    jge ypositive11

    ; vy negative
    mov ecx, y
    sub ecx, eax
    mov ty1, ecx ; set ty1 (neg)    
    neg mx
    jmp skipypositive11
        
  ypositive11:
    ; vy positive
    mov ecx, y
    add ecx, eax
    mov ty1, ecx ; set ty1 (pos)

  skipypositive11:

  loop1:
  
    mov eax, tx1
    mov ebx, ty1

    ;sub eax, 1024 ; make sure we're in the right cell
    shr eax, 11 ; too map unit
    shr ebx, 11

    ; test for out of bounds 
    cmp eax, dwMapWidth
    jge exitLoop1OOB    
    cmp ebx, dwMapHeight
    jge exitLoop1OOB    
    cmp eax, 0
    jl exitLoop1OOB     
    cmp ebx, 0
    jl exitLoop1OOB     

    mov ecx, dwVMapSizeBits
    add ecx, 2
    shl ebx, cl
    add ebx, MAP
    mov edx, dword ptr [ebx + 4*eax]
    cmp edx, 0 ; if map location is not empty...
    jne exitLoop1

    mov eax, tx1 ; increment position
    sub eax, 2048
    mov ecx, ty1
    add ecx, mx

    ;
    ;push ebx
    ;mov ebx, rax
    ;mov edx, crax
    ;add edx, ebx
    ;mov ebx, uvx
    ;.IF edx >= ebx
    ;    sub edx, ebx
    ;    add ecx, sgny
    ;.ENDIF
    ;mov crax, edx
    ;pop ebx 
    ;

    mov tx1, eax
    mov ty1, ecx
   
    jmp loop1 ; loop...
    
  exitLoop1OOB:   ; set out of bounds flag
    mov eax, 1
    mov oob1, eax
    jmp endRayx
    
  exitLoop1:      ; set hit flag
    xor eax, eax
    mov oob1, eax
    jmp endRayx

    jmp skipxpositive
    
 xpositive:
    add ebx, 2048
    mov ecx, ebx
    mov edx, x
    sub ecx, edx
    mul ecx
    add ebx, 1 ; make sure we're in the right cell
    mov tx1, ebx ; set tx1

    shr eax, 11

    mov edx, sgny
    cmp edx, 0
    jge ypositive12

    ; vy negative
    mov ecx, y
    sub ecx, eax
    mov ty1, ecx ; set ty1 (neg)    
    neg mx
    jmp skipypositive12
        
  ypositive12:
    ; vy positive
    mov ecx, y
    add ecx, eax
    mov ty1, ecx ; set ty (pos)

  skipypositive12:

  loop2:
  
    mov eax, tx1
    mov ebx, ty1

    ;add eax, 1024 ; make sure we're in the right cell
    shr eax, 11 ; too map unit
    shr ebx, 11

    ; test for out of bounds 
    cmp eax, dwMapWidth
    jge exitLoop2OOB    
    cmp ebx, dwMapHeight
    jge exitLoop2OOB    
    cmp eax, 0
    jl exitLoop2OOB     
    cmp ebx, 0
    jl exitLoop2OOB     

    mov ecx, dwVMapSizeBits
    add ecx, 2
    shl ebx, cl
    add ebx, MAP
    mov edx, dword ptr [ebx + 4*eax]
    cmp edx, 0 ; if map location is not empty...
    jne exitLoop2
    
    mov eax, tx1 ; increment position
    add eax, 2048
    mov ecx, ty1
    add ecx, mx

    ;
    ;push ebx
    ;mov ebx, rax
    ;mov edx, crax
    ;add edx, ebx
    ;mov ebx, uvx
    ;.IF edx >= ebx
    ;    sub edx, ebx
    ;    add ecx, sgny
    ;.ENDIF
    ;mov crax, edx
    ;pop ebx 
    ;
    
    mov tx1, eax
    mov ty1, ecx
   
    jmp loop2 ; loop...
    
  exitLoop2OOB:   ; set out of bounds flag
    mov eax, 1
    mov oob1, eax
    jmp endRayx
    
  exitLoop2:      ; set hit flag
    xor eax, eax
    mov oob1, eax
    jmp endRayx
 
    
 skipxpositive:


skipRayx:

    mov eax, 1
    mov oob1, eax

endRayx:
    
    
    ; calculate ray-y (2)
    
    mov ebx, uvy
    cmp ebx, 0
    je skipRayy

    mov eax, uvx
    shl eax, 11
    xor edx, edx
    mov ebx, uvy
    div ebx
    add eax, 2
    mov my, eax
    mov ray, edx
    xor edx, edx
    mov cray, edx

    mov ebx, y
    shr ebx, 11
    shl ebx, 11
    
    cmp sgny, 0
    jge ypositive

 ynegative:
    mov ecx, y
    sub ecx, ebx
    mul ecx
    sub ebx, 1 ; make sure we're in the right cell
    mov ty2, ebx ; set ty2

    shr eax, 11

    mov edx, sgnx
    cmp edx, 0
    jge xpositive21

    ; vx negative
    mov ecx, x
    sub ecx, eax
    mov tx2, ecx ; set tx2 (neg)    
    neg my
    jmp skipxpositive21
        
  xpositive21:
    ; vx positive
    mov ecx, x
    add ecx, eax
    mov tx2, ecx ; set tx2 (pos)

  skipxpositive21:

  loop3:

    mov eax, tx2
    mov ebx, ty2

    ;sub ecx, 1024 ; make sure we're in the right cell
    shr eax, 11 ; too map unit
    shr ebx, 11

    ; test for out of bounds 
    cmp eax, dwMapWidth
    jge exitLoop3OOB    
    cmp ebx, dwMapHeight
    jge exitLoop3OOB    
    cmp eax, 0
    jl exitLoop3OOB     
    cmp ebx, 0
    jl exitLoop3OOB     

    mov ecx, dwVMapSizeBits
    add ecx, 2
    shl ebx, cl
    add ebx, MAP
    mov edx, dword ptr [ebx + 4*eax]
    cmp edx, 0 ; if map location is not empty...
    jne exitLoop3

    mov ecx, ty2 ; increment position
    sub ecx, 2048
    mov eax, tx2
    add eax, my

    ;
    ;push ebx
    ;mov ebx, ray
    ;mov edx, cray
    ;add edx, ebx
    ;mov ebx, uvy
    ;.IF edx >= ebx
    ;    sub edx, ebx
    ;    add eax, sgny
    ;.ENDIF
    ;mov cray, edx
    ;pop ebx 
    ;

    mov ty2, ecx
    mov tx2, eax

    jmp loop3 ; loop...
    
  exitLoop3OOB:   ; set out of bounds flag
    mov eax, 1
    mov oob2, eax
    jmp endRayy
    
  exitLoop3:      ; set hit flag
    xor eax, eax
    mov oob2, eax
    jmp endRayy

    jmp skipypositive

 ypositive:
    add ebx, 2048
    mov ecx, ebx
    mov edx, y
    sub ecx, edx
    mul ecx
    add ebx, 1 ; make sure we're in the right cell
    mov ty2, ebx ; set ty2

    shr eax, 11

    mov edx, sgnx
    cmp edx, 0
    jge xpositive22

    ; vx negative
    mov ecx, x
    sub ecx, eax
    mov tx2, ecx ; set tx2 (neg)    
    neg my
    jmp skipxpositive22
        
  xpositive22:
    ; vx positive
    mov ecx, x
    add ecx, eax
    mov tx2, ecx ; set tx2 (pos)

  skipxpositive22:

  loop4:

    mov eax, tx2
    mov ebx, ty2

    ;add ecx, 1024 ; make sure we're in the right cell
    shr eax, 11 ; too map unit
    shr ebx, 11

    ; test for out of bounds 
    cmp eax, dwMapWidth
    jge exitLoop4OOB    
    cmp ebx, dwMapHeight
    jge exitLoop4OOB    
    cmp eax, 0
    jl exitLoop4OOB     
    cmp ebx, 0
    jl exitLoop4OOB

    mov ecx, dwVMapSizeBits
    add ecx, 2
    shl ebx, cl
    add ebx, MAP
    mov edx, dword ptr [ebx + 4*eax]
    cmp edx, 0 ; if map location is not empty...
    jne exitLoop4

    mov ecx, ty2 ; increment position
    add ecx, 2048
    mov eax, tx2
    add eax, my

    ;
    ;push ebx
    ;mov ebx, ray
    ;mov edx, cray
    ;add edx, ebx
    ;mov ebx, uvy
    ;.IF edx >= ebx
    ;    sub edx, ebx
    ;    add eax, sgny
    ;.ENDIF
    ;mov cray, edx
    ;pop ebx 
    ;
    
    mov ty2, ecx
    mov tx2, eax

    jmp loop4 ; loop...
    
  exitLoop4OOB:   ; set out of bounds flag
    mov eax, 1
    mov oob2, eax
    jmp endRayy
    
  exitLoop4:      ; set hit flag
    xor eax, eax
    mov oob2, eax
    jmp endRayy
    
 skipypositive:

skipRayy:

    mov eax, 1
    mov oob2, eax

endRayy:
 
    jmp final

overflow1:
    mov eax, 1
    mov overflag, eax
    jmp final

overflow2:
    mov eax, 1
    mov overflag, eax
    jmp final

final:

    .IF oob1 == 1
        mov eax, tx2
        mov x, eax
        mov eax, ty2
        mov y, eax
        ;print "oob1"
        mov eax, hitType
        mov ebx, 1
        mov dword ptr [eax], ebx
    .ELSEIF oob2 == 1
        mov eax, tx1
        mov x, eax
        mov eax, ty1
        mov y, eax
        ;print "oob2"    
        mov eax, hitType
        mov ebx, 0
        mov dword ptr [eax], ebx
    .ELSE

        mov eax, x
        sub tx1, eax
        mov eax, y
        sub ty1, eax
        mov eax, x
        sub tx2, eax
        mov eax, y
        sub ty2, eax

        finit
        fild tx1
        fmul st, st
        fild ty1
        fmul st, st
        fadd st, st(1)
        fsqrt
        fistp distA
        fild tx2
        fmul st, st
        fild ty2
        fmul st, st
        fadd st, st(1)
        fsqrt
        fistp distB
        fwait

        ;.IF overflag == 0
        ;    mov eax, tx1
        ;    sub eax, x
        ;    mov ebx, ty1
        ;    sub ebx, y
        ;    imul eax, eax
        ;    jo overflow1
        ;    imul ebx, ebx
        ;    jo overflow1
        ;    add eax, ebx
        ;
        ;    mov ebx, tx2
        ;    sub ebx, x
        ;    mov ecx, ty2
        ;    sub ecx, y
        ;    imul ebx, ebx
        ;    jo overflow2
        ;    imul ecx, ecx
        ;    jo overflow2
        ;    add ebx, ecx
        ;.ELSE
        ;    mov eax, tx1
        ;    sub eax, x
        ;    sar eax, 8
        ;    mov ebx, ty1
        ;    sub ebx, y
        ;    sar ebx, 8
        ;    imul eax, eax
        ;    imul ebx, ebx
        ;    add eax, ebx
        ;    
        ;    mov ebx, tx2
        ;    sub ebx, x
        ;    sar ebx, 8
        ;    mov ecx, ty2
        ;    sub ecx, y
        ;    sar ecx, 8
        ;    imul ebx, ebx
        ;    imul ecx, ecx
        ;    add ebx, ecx
        ;.ENDIF    

        mov eax, distA
        mov ebx, distB
        .IF eax < ebx
            mov eax, tx1
            add eax, x
            mov x, eax
            mov eax, ty1
            add eax, y
            mov y, eax      
            ;print "ray-x less"
            mov eax, hitType
            mov ebx, 0
            mov dword ptr [eax], ebx
        .ELSE
            mov eax, tx2
            add eax, x
            mov x, eax
            mov eax, ty2
            add eax, y
            mov y, eax
            ;print "ray-y less"
            mov eax, hitType
            mov ebx, 1
            mov dword ptr [eax], ebx
        .ENDIF

        ;mov stmp, eax
        ;print str$(stmp), 13, 10
        ;mov stmp, ebx
        ;print str$(stmp), 13, 10
        ;print str$(overflag), 13, 10
       
    .ENDIF
    
    mov eax, rx
    mov ebx, x
    mov dword ptr [eax], ebx
    mov eax, ry
    mov ebx, y
    mov dword ptr [eax], ebx

    shr x, 11
    shr y, 11
    ;print str$(x), " "
    ;print str$(y), 13, 10
    ;print str$(sgnx), "(sx) "
    ;print str$(sgny), "(sy) ", 13, 10

    ;shr x, 16
    ;shr y, 16
    ;invoke MessageBox, 0, str$(x), str$(y), MB_OK

    ret

cast_ray endp

clear_buffer proc clr :DWORD

    mov ecx, BUFVSIZE
    mov ebx, BUF
    mov eax, clr

clear_loop:

    mov dword ptr [ebx], eax
    add ebx, 4
    dec ecx
    jnz clear_loop

    ret

clear_buffer endp

draw_minimap proc x :DWORD, y :DWORD

    LOCAL bptr :DWORD
    LOCAL mapsize :DWORD
    LOCAL onedown :DWORD

    mov ecx, dwVMapSize
    mov eax, dwWidth
    sub eax, ecx
    shl eax, 2
    mov onedown, eax

    mov edx, dwWidth
    mov eax, y
    imul edx, eax
    mov eax, x
    add edx, eax
    shl edx, 2
    add edx, BUF
    mov bptr, edx
    
    mov edx, dwVMapSize
    mov eax, dwVMapSize
    imul edx, eax
    mov mapsize, edx

    mov ecx, 0
    mov eax, 0
    mov edx, bptr

the_loop:

    mov ebx, MAP
    mov ebx, dword ptr [ebx+eax*4]
    cmp ebx, 0
    je noPixel
    mov ebx, 0ffh
    mov dword ptr[edx], ebx
  noPixel:

    add edx, 4
    inc ecx
    mov ebx, dwVMapSize
    cmp ecx, ebx
    jl notNextLine
    add edx, onedown
    mov ecx, 0
notNextLine:

    inc eax
    cmp eax, mapsize

    jl the_loop

    ret

draw_minimap endp

draw_bmp proc x :DWORD, y :DWORD, bmp :DWORD

    LOCAL bptr :DWORD
    LOCAL imgsize :DWORD
    LOCAL onedown :DWORD
    LOCAL bmpWidth :DWORD, bmpHeight :DWORD

    mov eax, bmp
    mov ebx, dword ptr[eax]
    mov bmpWidth, ebx
    add eax, 4
    mov ebx, dword ptr[eax]
    mov bmpHeight, ebx

    mov ecx, bmpWidth
    mov eax, dwWidth
    sub eax, ecx
    shl eax, 2
    mov onedown, eax

    mov edx, dwWidth
    mov eax, y
    imul edx, eax
    mov eax, x
    add edx, eax
    shl edx, 2
    add edx, BUF
    mov bptr, edx
    
    mov edx, bmpWidth
    mov eax, bmpHeight
    imul edx, eax
    mov imgsize, edx

    mov ecx, 0
    mov eax, 0
    mov edx, bptr

the_loop:

    push eax
    shl eax, 2
    add eax, bmp
    add eax, 8
    mov ebx, dword ptr [eax]
    mov dword ptr[edx], ebx
    pop eax

    add edx, 4
    inc ecx
    mov ebx, bmpWidth
    cmp ecx, ebx
    jl notNextLine
    add edx, onedown
    mov ecx, 0
notNextLine:

    inc eax
    cmp eax, imgsize

    jl the_loop

    ret

draw_bmp endp

load_textures proc

    LOCAL curTex :DWORD
    LOCAL texStep :DWORD
    LOCAL texPtr :DWORD
    LOCAL allocSize :DWORD

    mov ebx, dwTextureSize
    imul ebx, ebx
    shl ebx, 2
    mov texStep, ebx

    mov eax, dwTextureCount   
    imul eax, ebx
    mov allocSize, eax

    invoke crt_malloc, allocSize
    mov dwTextures, eax

    .IF dwTextures == 0
        invoke doerror, dwERRORALLOC
        ret
    .ENDIF

    mov eax, dwTextures
    mov texPtr, eax

    xor eax, eax
    mov curTex, eax

ltLoop:

    mov ebx, dwBmpFlagFlip
    or ebx, dwBmpFlagNoHeader
    invoke load_bmp, addr szTextureFile, ebx, texPtr

    mov eax, offset szTextureFile
    add eax, 10

    xor ebx, ebx
    mov bl, byte ptr [eax]

    inc ebx
    cmp ebx, '9'
    jle skipIncLetter
    mov ebx, '0'
    dec eax
    xor ecx, ecx
    mov cl, byte ptr [eax]
    inc ecx
    mov byte ptr [eax], cl
    inc eax
skipIncLetter:
    mov byte ptr [eax], bl

    mov eax, texStep
    add texPtr, eax

    inc curTex
    mov eax, curTex
    cmp eax, dwTextureCount
    jl ltLoop

    ret

load_textures endp

load_bmp proc szFile :DWORD, flags :DWORD, memaddr :DWORD

    LOCAL FH :DWORD
    LOCAL dwRval :DWORD
    LOCAL bmpWidth :DWORD, bmpHeight :DWORD
    LOCAL tmpBuffer :DWORD, finalBuffer :DWORD
    LOCAL tmpSize :DWORD, finalSize :DWORD
    LOCAL tmpY :DWORD, finalY :DWORD, curX :DWORD
    LOCAL tmpYOff :DWORD, finalYOff :DWORD
    LOCAL tmpWidth :DWORD

    LOCAL flip :DWORD
    LOCAL noheader :DWORD

    mov eax, flags
    and eax, dwBmpFlagFlip
    mov flip, eax

    mov eax, flags
    and eax, dwBmpFlagNoHeader
    shr eax, 1
    mov noheader, eax

    mov FH, fopen(szFile)

    .IF FH == 0
        invoke doerror, dwERRORBMPFILE
        ret
    .ENDIF

    mov dwRval, fread(FH, addr dbBmpHeader, 54)

    .IF dwRval == 0
        invoke doerror, dwERRORBMPFILE
        ret
    .ENDIF

    mov ebx, offset dbBmpHeader
    mov ax, word ptr[ebx]
    .IF ax != 19778
        invoke doerror, dwERRORBMPFILE
        ret
    .ENDIF

    mov ebx, offset dbBmpHeader
    add ebx, 28
    mov ax, word ptr[ebx]
    .IF ax != 24
        invoke doerror, dwERRORBMPFILE
        ret
    .ENDIF

    mov ebx, offset dbBmpHeader
    add ebx, 18
    mov eax, dword ptr[ebx]
    mov bmpWidth, eax
    add ebx, 4
    mov eax, dword ptr[ebx]
    mov bmpHeight, eax

    mov ecx, bmpWidth
    mov ebx, ecx
    add ecx, ebx
    add ecx, ebx
    mov ebx, ecx
    and ecx, 3
    .IF ecx > 0
        mov eax, 4
        sub eax, ecx
        add ebx, eax
        mov tmpWidth, ebx
    .ELSE
        mov tmpWidth, ebx
    .ENDIF
    
    invoke IntMul, tmpWidth, bmpHeight
    mov tmpSize, eax

    invoke IntMul, bmpWidth, bmpHeight
    mov finalSize, eax

    invoke IntMul, tmpSize, 3
    mov tmpSize, eax

    shl finalSize, 2 ; * 4
    add finalSize, 8

    invoke crt_malloc, tmpSize
    mov tmpBuffer, eax

    .IF tmpBuffer == 0
        invoke doerror, dwERRORALLOC
        ret
    .ENDIF    

    mov dwRval, fread(FH, tmpBuffer, tmpSize)
    
    fclose(FH)

    .IF memaddr == 0

        invoke crt_malloc, finalSize
        mov finalBuffer, eax

        .IF finalBuffer == 0
            invoke doerror, dwERRORALLOC
            ret
        .ENDIF

    .ELSE

        mov eax, memaddr
        mov finalBuffer, eax

    .ENDIF
    
    
    ; set final header
    .IF noheader == 0
        mov ecx, finalBuffer
        mov ebx, bmpWidth
        mov dword ptr [ecx], ebx
        mov ebx, bmpHeight
        mov dword ptr [ecx+4], ebx
        
        add finalBuffer, 8
    .ENDIF

    ; convert pixels

    xor eax, eax
    mov tmpY, eax
    mov eax, bmpHeight
    dec eax
    mov finalY, eax

nexty:

    xor eax, eax
    mov curX, eax

    invoke IntMul, tmpY, tmpWidth
    mov tmpYOff, eax

    .IF flip == 1
        mov eax, finalY
        mov finalYOff, eax
    .ELSE
        invoke IntMul, finalY, bmpWidth
        mov finalYOff, eax
    .ENDIF

  nextx:

    ; get pixel
    
    mov ebx, curX
    mov eax, ebx
    mov ecx, eax
    add eax, ecx
    add eax, ecx
    add eax, tmpYOff
    
    add eax, tmpBuffer
    
    xor ebx, ebx
    add eax, 2
    mov bl, byte ptr [eax] ; pixel stored in ebx
    shl ebx, 8
    dec eax
    mov bl, byte ptr [eax]
    shl ebx, 8
    dec eax
    mov bl, byte ptr [eax]

    .IF flip == 1
        mov eax, curX
        mov ecx, bmpWidth
        imul eax, ecx
        add eax, finalYOff
    .ELSE
        mov eax, finalYOff
        add eax, curX
    .ENDIF
    shl eax, 2

    add eax, finalBuffer

    mov dword ptr [eax], ebx

    ; set pixel

    add curX, 1
    mov eax, bmpWidth
    cmp curX, eax
    jl nextx
    ; loop x

    sub finalY, 1
    add tmpY, 1
    xor eax, eax
    cmp finalY, eax
    jge nexty
    ; loop y
   
    ;

    invoke crt_free, tmpBuffer

    mov eax, finalBuffer

    .IF noheader == 0
        sub eax, 8
    .ENDIF

    ret

load_bmp endp

load_lut proc

    LOCAL FH :DWORD
    LOCAL dwRval :DWORD
    LOCAL angle :DWORD
    LOCAL result :DWORD
    LOCAL factor255 :DWORD
    LOCAL factor2047 :DWORD

    mov eax, 255
    mov factor255, eax
    mov eax, 2047
    mov factor2047, eax

    ;mov FH, fopen(addr szTrigLutFile)

    ;mov eax, FH
    ;.IF eax == 0
    ;    invoke doerror, dwERRORLUTFILE
    ;    ret
    ;.ENDIF

    invoke gen_depth_lut
    invoke gen_alpha_lut

    invoke crt_malloc, 256*4
    .IF eax == 0
        invoke doerror, dwERRORALLOC
        ret
    .ENDIF
    mov COS, eax

    invoke crt_malloc, 256*4
    .IF eax == 0
        invoke doerror, dwERRORALLOC
        ret
    .ENDIF
    mov SIN, eax

    invoke crt_malloc, 8192*4
    .IF eax == 0
        invoke doerror, dwERRORALLOC
        ret
    .ENDIF
    mov COS16, eax

    invoke crt_malloc, 8192*4
    .IF eax == 0
        invoke doerror, dwERRORALLOC
        ret
    .ENDIF   
    mov SIN16, eax

    finit
    fld ang2rad
    fild factor255

    mov ecx, 0
    mov angle, ecx
 sincosLoop255:

    fild angle      ; load angle (0-255)
    fmul st, st(2)  ; convert too radians
    fcos            ; find cosine
    fmul st, st(1)  ; multiply by factor (2*pi/256)
    fistp result    ; store result
    mov edx, COS
    mov ecx, angle
    shl ecx, 2
    add edx, ecx
    mov eax, result
    mov dword ptr [edx], eax

    fild angle
    fmul st, st(2)
    fsin
    fmul st, st(1)
    fistp result
    mov edx, SIN
    mov ecx, angle
    shl ecx, 2
    add edx, ecx
    mov eax, result
    mov dword ptr [edx], eax

    inc angle
    mov ecx, angle
    cmp ecx, 256
    jl sincosLoop255


    finit
    fld ang2rad16
    fild factor2047

    mov ecx, 0
    mov angle, ecx
 sincosLoop2047:

    fild angle
    fmul st, st(2)
    fcos
    fmul st, st(1)
    fistp result
    mov edx, COS16
    mov ecx, angle
    shl ecx, 2
    add edx, ecx
    mov eax, result
    mov dword ptr [edx], eax

    fild angle
    fmul st, st(2)
    fsin
    fmul st, st(1)
    fistp result
    mov edx, SIN16
    mov ecx, angle
    shl ecx, 2
    add edx, ecx
    mov eax, result
    mov dword ptr [edx], eax

    inc angle
    mov ecx, angle
    cmp ecx, 8192
    jl sincosLoop2047  

    ;mov dwRval, fread(FH, COS, 256*4)
    ;mov dwRval, fread(FH, SIN, 256*4)
    ;mov dwRval, fread(FH, COS16, 8192*4)
    ;mov dwRval, fread(FH, SIN16, 8192*4)

    ;fclose(FH)

    ret

load_lut endp

load_map proc szFileName :DWORD

    LOCAL FH :DWORD
    LOCAL dwRval :DWORD
    LOCAL wSize :DWORD
    LOCAL tmpMap :DWORD
    LOCAL Ssize :DWORD

    LOCAL x :DWORD, y :DWORD

    mov FH, fopen(szFileName)
    mov eax, FH
    .IF eax == 0
        invoke doerror, dwERRORMAPFILE
        ret
    .ENDIF

    mov wSize, fsize(FH)

    invoke crt_malloc, wSize
    mov tmpMap, eax
    .IF tmpMap == 0
        invoke doerror, dwERRORALLOC
        ret
    .ENDIF

    mov dwRval, fread(FH, tmpMap, wSize)
    fclose(FH)

    mov ebx, dwVMapSize
    imul ebx, ebx
    shl ebx, 2
    mov Ssize, ebx
    invoke crt_malloc, Ssize
    mov MAP, eax

    .IF MAP == 0
        invoke doerror, dwERRORALLOC
        ret
    .ENDIF
   
    mov eax, MAP
    mov ecx, Ssize
    shr ecx, 2
    xor ebx, ebx
  zLoop:
    mov dword ptr [eax], ebx
    add eax, 4
    loop zLoop
    
    invoke IntSqrt, wSize
    mov Ssize, eax
    mov dbMapWidth, al
    mov dbMapHeight, al

    ; convert tmpMap -> MAP

    mov eax, tmpMap
    mov ebx, MAP
    mov ecx, Ssize

  yLoop:

    push ecx

    mov ecx, Ssize

   xLoop:

    xor edx, edx
    mov dl, byte ptr [eax-1 + ecx]
    mov dword ptr [ebx-4 + 4*ecx], edx    

    loop xLoop

    pop ecx
        
    add eax, Ssize
    mov edx, dwVMapSize
    shl edx, 2
    add ebx, edx
    loop yLoop
 
    ret

load_map endp

get_map proc x :DWORD, y :DWORD

    push edx
    push ecx
    push ebx

    mov eax, x
    mov ebx, y
    shr eax, 11
    shr ebx, 11
   
    mov ecx, dwVMapSizeBits
    add ecx, 2
    shl ebx, cl
    add ebx, MAP
    mov eax, dword ptr [ebx + 4*eax]
    ;mov eax, 1

    pop ebx
    pop ecx
    pop edx

    ret

get_map endp

get_texture_address proc x :DWORD, y :DWORD, hitType :DWORD

    LOCAL texX :DWORD
    LOCAL texN :DWORD

    invoke get_map, x, y
    mov ebx, eax
    dec ebx
    mov texN, ebx
    
    mov ecx, 11
    sub ecx, dwTextureSizeBits

    mov texX, ecx

    shr x, cl
    shr y, cl

    mov ecx, dwTextureSize
    dec ecx

    and x, ecx
    and y, ecx

    inc ecx
    shr ecx, 1
   
    .IF hitType == 0
        .IF x<ecx
            mov ebx, y
            mov texX, ebx
        .ELSE
            mov ebx, dwTextureSize
            dec ebx
            sub ebx, y
            mov texX, ebx
        .ENDIF
    .ELSE
        .IF y>ecx
            mov ebx, x
            mov texX, ebx
        .ELSE
            mov ebx, dwTextureSize
            dec ebx
            sub ebx, x
            mov texX, ebx
        .ENDIF
    .ENDIF

    mov eax, dwTextures
    
    mov ebx, texN
    mov ecx, dwTextureSizeBits
    shl ebx, cl
    shl ebx, cl
    shl ebx, 2
    add eax, ebx
    
    shl texX, cl ; dwTextureSizeBits
    shl texX, 2
    add eax, texX

    ret

get_texture_address endp

doerror proc dwCode :DWORD

    mov eax, dwCode

    cmp eax, dwERRORALLOC
    je errorAlloc

    cmp eax, dwERRORPTC
    je errorPtc

    cmp eax, dwERRORMAPFILE
    je errorMapFile

    cmp eax, dwERRORLUTFILE
    je errorLutFile

    cmp eax, dwERRORBMPFILE
    je errorBMPFile
    
    invoke ExitProcess, 0

errorBMPFile:
    invoke MessageBox, 0, ADDR szErrorBMPFile, ADDR szError, MB_OK 
    invoke ExitProcess, 0

errorLutFile:
    invoke MessageBox, 0, ADDR szErrorLutFile, ADDR szError, MB_OK 
    invoke ExitProcess, 0

errorMapFile:
    invoke MessageBox, 0, ADDR szErrorMapFile, ADDR szError, MB_OK 
    invoke ExitProcess, 0

errorAlloc:
    invoke MessageBox, 0, ADDR szErrorAlloc, ADDR szError, MB_OK 
    invoke ExitProcess, 0

errorPtc:
    invoke deinit
    invoke MessageBox, 0, ADDR szErrorPtc, ADDR szError, MB_OK 
    invoke ExitProcess, 0

doerror endp

END