; *****************************************************************************
; *****************************************************************************
;
;               Rendering Images to the Tile Line Buffer
;
;       Sorry this is so horrific but as we spend a lot of time doing
;       this I thought it needed to be quick !!!!!
;
;
; *****************************************************************************
; *****************************************************************************

; *****************************************************************************
;
;       Render CH characters of line at DS:DI into the TileBuffer
;       starting at BX
;
;       DH = Tile Horizontal# DL = Tile Vertical# (start of the line)
;       SI = Tile attribute table (BASE)
;
;       Remember, the attribute table works in 4 x 4 tile units
;       (Giving 8 x 7 attribute table items)
;
;       0,1 topleft     2,3 topright    4,5 bottomleft  6,7 bottomright
;
; *****************************************************************************

RenderLine:
        push    ax,bx,cx,dx,di,si,bp
        ;
        ;       if the line is a lower quadrant (e.g. v = 2,3 6,7 10,11 ...)
        ;       cl = 4, else cl = 0. We use this value to right shift
        ;       the attribute byte.
        ;
        mov     cl,0                    ; cl is no of bits to shift attribute
        test    dl,2                    ; if its line 2,3, 6,7  etc.
        jz      _RL2
        mov     cl,4                    ; shift attr byte 4 right !
_RL2:
        ;
        ;       calculate the tile attribute line. divide the tile
        ;       vertical by 4 (gives y in attrib table) and multiply
        ;       it by 8 (8 attribute bytes per horizontal line)
        ;
        mov     al,dl                   ; ax = tile vertical div 4 * 8
        xor     ah,ah                   ; divide by 4 because theres 4 vert
        shr     ax,2                    ; tiles per att,times 8 because theres
        shl     ax,3                    ; 8 attr bytes per line.
        add     si,ax                   ; si now points to the attributes
                                        ; for THIS line.
        ;
        ;       Do this for each rendered character
        ;
RenderLineLoop:
        ;
        ;       BP becomes a pointer into VROM (Char # * 16 + VROM)
        ;       if we are using PT2 for pattern data add 01000h
        ;
        mov     al,[di]                 ; get the first character
        xor     ah,ah                   ; ax = first character
        shl     ax,4                    ; ax = character * 16
        add     ax,VROM                 ; ax = pointer into VROM.
        test    byte [PPUCtrl1],010h    ; which pattern table are we using ?
        jz      _RL1                    ; if bit 4 of PPUCtrl1 is 1 we
        add     ax,01000h               ; use the pattern table at 1000h
_RL1:   mov     bp,ax                   ; ok, the pattern is in bp now
                                        ; for use in RenderBitPattern

        ;
        ;       Divide the tile horizontal by 4 to give an offset into
        ;       the attribute table , and get the byte out.
        ;
        ;       Shift it right 4 if we are in the lower quadrant
        ;       Shift it right 2 again if we are in the right quadrant
        ;
        push    bx                      ; save bx,we'll need it later

        mov     bl,dh                   ; bx = tile horizontal position
        xor     bh,bh
        shr     bx,2                    ; divide it by 4 gives us an offset
                                        ; into the tile table.
        mov     dl,[si+bx]              ; get the appropriate byte out.

        shr     dl,cl                   ; if we are on lines 2,3 6,7 etc.
                                        ; we use bits 4,5,6,7 (calc earlier)

        test    dh,2                    ; if x is 2,3 6,7
        jz      _RL3
        shr     dl,2                    ; we use the high 2 bits
_RL3:   and     dl,3                    ; and those 2 bits only !

        pop     bx                      ; get bx back

        push    cx,dx,si
        call    RenderBitPattern        ; render that graphic
        pop     si,dx,cx

        inc     di                      ; next character
        inc     dh                      ; next horizontal tile
        dec     ch                      ; do it CH times
        jnz     RenderLineLoop

        pop     bp,si,di,dx,cx,bx,ax
        ret

; *****************************************************************************
;
;   Render 8 bit patterns (from DS:BP onwards), each representing 8 colour
;   bits) into memory at DS:BX.
;
;   DL Contains the 2 bits from the attribute table (in 0 and 1)
;   The colours come from the TILE Palette table
;
;   On Exit AX,CX,DX,SI are corrupted,BX points to next horizontal space.
;
;   Each Byteline is 30 (27+3) instructions, so writing one character
;   takes 250 instructions. One screen should take about 200,000 instructions
;   to complete (if its completely redrawn)
;
; *****************************************************************************

byteline macro
        mov     al,#1[bp]
        mov     ah,#2[bp]
        call    BitLine
#em

RenderBitPattern:
        shl     dl,2                    ; now they are a base in the tile palette
        xor     dh,dh
        mov     si,NESPalette
        add     si,dx
        mov     cl,[si]
        mov     ch,1[si]
        mov     dl,2[si]
        mov     dh,3[si]
        mov     cl,[NESPalette]
        and     cx,3F3Fh
        add     cx,4040h
        and     dx,3F3Fh
        add     dx,4040h

        byteline 0,8                    ; do each of the bytelines
        byteline 1,9
        byteline 2,10
        byteline 3,11
        byteline 4,12
        byteline 5,13
        byteline 6,14
        byteline 7,15

        add     bx,8-320*8              ; next line across
        ret

; *****************************************************************************
;
;       Output a bitline (in AL,0 bits,AH 1 bits). The Colours are in
;       CL/CH/DL/DH
;
;       Each bitblast takes 5 instructions,so this function takes
;       27 instructions to complete
;
; *****************************************************************************

BitLine:cmp     ax,0
        jz      BLBgr
                                        ; here ah = h7-h4 h3-h0 al = l7-l4 l3-l0
        ror     al,4                    ; now  ah = h7-h4 h3-h0 al = l3-l0 l7-l4
        ror     ax,4                    ; now  ah = l7-l4 h7-h4 al = h3-h0 l3-l0
        ror     ah,4                    ; now  ah = h7-h4 l7-l4 al = h3-h0 l3-l0

        push    ax                      ; save al for later,we're doing ah first
        mov     al,ah                   ; si = 0 al
        xor     ah,ah
        add     ax,ax
        mov     si,ax
        call    BitBlastVec[cs:si]      ; call the writing vector
        add     bx,4                    ; next nibl

        pop     ax
        xor     ah,ah
        add     ax,ax
        mov     si,ax
        call    BitBlastVec[cs:si]
        add     bx,320-4                ; next line down
        ret

BlBgr:  mov     al,cl                   ; whole 8 bits is background
        mov     ah,cl
        mov     [bx],ax
        mov     2[bx],ax
        mov     4[bx],ax
        mov     6[bx],ax
        add     bx,320
        ret

; BitBlastVec + i * 2 writes the bits required
;
; i = h7 h6 h5 h4 l7 l6 l5 l4 for example
;
; each function writes (h7h3 h6h2 h5h1 h4h0) onto the screen
; using the data in cl ch dl dh
;
; its quicker than trying to decode the bloody thing.
