*
* Fast font Assembly sub-routines
* 
* Copyright 1988 by Darren M. Greenwald
*
* Rev. 3.0 9/27/88
*
* New blitter based routine to handle any size font
*
* This new routine relies on the brute force approach, and the blitter!
* The font data is unpacked, and the masking, shifting, and target
* addresses are pre-calculated for speed.
*
* This version of the fast text routines can render fonts of any height,
* and of any size from 4-16 pixels wide.  Wider fonts, proportional fonts,
* and styles are currently not supported.
*
* This routine is copyrighted, and freely distributable.  It may not be
* used in any commercial product, shareware product, or commercial
* publication  without permission.  It is permissible to release this file
* as part of a public domain collection, or on a telecommunication service
* as long as no "special charge" is made for this program;
* "special charge" does not include standard charges for replication
* of the media, or use of the system.
*
* The intent of the above should be clear - I did the work, and am giving
* the code away for free.  While you may use it, you may not
* profit from it.
*
* The previous fast text routines I released in 1987 were public domain
* in every sense of the word.  In order to use these routines in a
* commercial, or shareware venture, you must ask permission - at the most
* I may ask in return that you include credit for the use of these
* routines within your program's documentation, and/or free registration, or
* a copy of the software you are developing.  In addition, I will provide
* answers if you have any questions, and just maybe provide assistance
* with modifying the routines to suit your needs.  All I ask is that you
* ask!
*

            XDEF     _InitFastText   ;Declare these labels visible to linker
            XDEF     _FreeFastText
            XDEF     _FastText

            XREF     _GfxBase
            XREF     _LVOAllocMem
            XREF     _LVOFreeMem
            XREF     _LVOBltTemplate
            XREF     _LVOText
            XREF     _OwnBlitter
            XREF     _DisownBlitter
            XREF     _WaitBlit

            include  "exec/types.i"
            include  "graphics/text.i"
            include  "hardware/blit.i"
            include  "hardware/custom.i"
            include  "graphics/rastport.i"
            CSEG

* ---------------------------------------------------------------------
* Call this routine to free any memory allocated by InitFastText() 
* below.
* 
* This is a safe routine - it can be called safely even if InitFastText()
* fails.
* ---------------------------------------------------------------------
_FreeFastText:

            MOVE.L   FontData,D0       ;safety check for NULL ptr
            BEQ      nodata
            MOVEA.L  D0,A1             ;address of buffer to free
            MOVE.L   FontSize,D0       ;size of buffer to free
            MOVEA.L  $4,A6             ;ExecBase
            JSR      _LVOFreeMem(A6)
            CLR.L    FontData          ;set to NULL

nodata:

            MOVE.L   Imagebuf,D0
            BEQ      nobuf             ;safety valve
            MOVEA.L  D0,A1             ;address of buffer
            MOVE.L   Imagesize,D0      ;size of working buffer
            MOVEA.L  $4,A6
            JSR      _LVOFreeMem(A6)
            CLR.L    Imagebuf          ;set to NULL

nobuf:      RTS

* ---------------------------------------------------------------------
* This routine does a few things for you.
*
* It checks to make sure the font size is within 4 bits wide, and 16
* bits wide.  It also checks for PROP fonts.  Wider/thinner fonts, and
* PROP fonts will cause this routine to fall through, and set up to
* use Text() instead of the fast text routines.  This is transparent, so
* you do not have to write additional code to handle anything if
* the FastText() routine cannot be used.
*
* It allocates 2 chip ram buffers - one for the unpacked font data,
* and the other for an image buffer to draw in.  The amount of space
* required is calculated for you based on the font size per the pointer
* to a TextFont structure which is passed via the stack.
*
* The font data is unpacked into words using an edge-to-edge model.
* Extraneous bits are masked out, and the image data is left justified.
*
* The blitter masks, shifts, and destination addresses are pre-calcuated
* for speed during rendering.
*
* An indirect function pointer is set-up based on the font width.  8 bit
* wide fonts can be rendered faster then other fonts, so a special
* blitter routine is devoted to handling 8 bit wide fonts.  Often this is
* the most common font, so it makes sense to use this capability.
*
* ---------------------------------------------------------------------

font        EQU      04       ;stack offset
 
_InitFastText:

            MOVEA.L  font(SP),A2       ;address of a text font struct

            TST.L    tf_CharSpace(A2)  ;is this a prop font?
            BNE      Initfailed

            CLR.L    D0

            MOVE.W   tf_XSize(A2),D0
            MOVE.L   D0,XSize          ;save
            CMP.W    #16,D0            ;check bounds of this
            BHI      Initfailed

            MOVE.W   tf_YSize(A2),D0
            MOVE.L   D0,YSize          ;save
            CMP.W    #4,D0             ;check bounds of this
            BLT      Initfailed

            MOVE.W   tf_Baseline(A2),Baseline

* allocate space for font imagery in unpacked word format - egads, this
* uses a lot of memory, buts its the price to be paid I guess?
*

            CLR.L    FontData          ;NULL by default
            CLR.L    Imagebuf

            MULU     #512,D0           ;1 WORD per line * 256 chars
            MOVE.L   D0,FontSize       ;save size of buffer
            MOVE.L   #$10003,D1        ;MEMF_PUBLIC|MEMF_CHIP|MEMF_CLEAR
            MOVEA.L  $4,A6
            JSR      _LVOAllocMem(A6)  ;allocate space
            TST.L    D0
            BEQ      Initfailed        ;no memory for font data
            MOVE.L   D0,FontData       ;save pointer

* allocate an image buffer for me to draw text in

            MOVE.L   YSize,D0
            MULU     #128,D0           ;size of buffer calculated
            MOVE.L   D0,Imagesize      ;save size
            MOVEQ    #03,D1            ;MEMF_PUBLIC|MEMF_CHIP
            MOVEA.L  $4,A6
            JSR      _LVOAllocMem(A6)  ;allocate space
            TST.L    D0
            BEQ      NoImagebuf        ;no memory for font data
            MOVE.L   D0,Imagebuf       ;save pointer

* unpack font data

            MOVEA.L  tf_CharLoc(A2),A0 ;location of offsets|width
            MOVEA.L  tf_CharData(A2),A1
            MOVEA.L  FontData,A3       ;where to put unpacked imagery

            CLR.L    D0
            MOVE.B   tf_LoChar(A2),D0

            CLR.L    D1
            MOVE.B   tf_HiChar(A2),D1

            SUB.B    D0,D1             ;total # of chars to unpack

            LSL.L    #1,D0             ;BYTE to WORD offset
            ADDA.L   D0,A3             ;adjust pntr into image array now

dounpack:   MOVE.L   (A0)+,D0          ;fetch offset|width of each char

            CLR.L    D2
            MOVE.W   D0,D2             ;width in D2

            SWAP     D0                ;pix offset in D0
            CLR.L    D3
            MOVE.W   D0,D3             ;word offset in D3

            AND.L    #$000F,D0
            LSR.W    #4,D3             ;calc word offset
            LSL.W    #1,D3

            CLR.L    D4                ;create mask
            NOT.L    D4                ;all ones!
            LSR.L    D2,D4             ;mask for this character
            NOT.L    D4                ;invert mask

            MOVE.L   YSize,D5
            SUBQ.W   #1,D5             ;-1 for DBF loop
            CLR.L    D6

unpkfont:   MOVE.L   0(A1,D3.W),D7     ;create in D7

            LSL.L    D0,D7             ;left justify?
            AND.L    D4,D7             ;mask lower bits
            SWAP     D7                ;move to lower word
            
            MOVE.W   D7,0(A3,D6.L)     ;store

            ADD.W    tf_Modulo(A2),D3  ;adjust word offset
            ADD.L    #512,D6

            DBF      D5,unpkfont

            ADDQ.L   #2,A3

            DBF      D1,dounpack

* set up arrays of pre-inited masks, shift values, and target addresses

            MOVE.W   #DEST+SRCB+SRCA+$FC,D7

            MOVE.L   XSize,D0
            CMP.W    #08,D0            ;if equal to 8, use FASTEST mode!
            BNE      docon0
            MOVEQ    #00,D7            ;bltcon mask for bltcon1, or 0

docon0:

            MOVE.W   #255,D1           ;loop 256 times
            MOVEQ    #00,D0            ;initial pixel offset

            MOVEA.L  #masks,A0
            MOVEA.L  #shift,A1
            MOVEA.L  #local,A2

            MOVEQ    #12,D5            ;shift value used later
            MOVE.L   XSize,D6          ;used later

setup:      MOVE.L   D0,D2
            MOVE.L   D2,D3

            LSR.W    #4,D2             ;calculate word offset
            LSL.W    #1,D2             ; div by 16 * 2 for WORD offset

            AND.W    #$000F,D3         ;mask out all but pix offset

            MOVE.L   D2,D4
            ADD.L    Imagebuf,D4       ;address to write this too
            MOVE.L   D4,(A2)+          ;save address

            CLR.W    D4                ;create mask
            NOT.W    D4                ;all ones!
            LSR.W    D3,D4             ;mask for this character
            NOT.W    D4                ;invert mask

            MOVE.W   D4,(A0)+          ;save mask

            LSL.W    D5,D3             ;shift for bltcon format
            OR.W     D7,D3             ;depends of con0, or con1
            MOVE.W   D3,(A1)+          ;save shift

            ADD.L    D6,D0
            DBF      D1,setup

* determine maximum # of characters which can be drawn

            MOVE.L   XSize,D0
            MOVE.L   #1008,D1
            DIVU     D0,D1
            MOVE.W   D1,Maxchars

* set-up blitter control functions based on font width
* optimize for 8 bit wide fonts!

            CMP.W    #08,D0            ;if equal to 8, use FASTER mode!
            BEQ      faster

slower:     MOVE.L   #Bltslow,OutPut
            MOVEQ    #2,D0             ;width
            BRA      setsize

faster:     MOVE.L   #Bltfast,OutPut
            MOVEQ    #1,D0             ;width

setsize:    MOVE.L   YSize,D1          ;height
            LSL.W    #6,D1             ;per blitter's delight
            OR.W     D1,D0
            MOVE.L   D0,blitsize

            RTS                        ;return TRUE


* some error, return false

NoImagebuf: JSR      _FreeFastText     ;release alloced memory

* indicates regular system Text() may be used via these calls

Initfailed: CLR.L    OutPut            ;default to calling Text()
            MOVEQ    #00,D0            ;return FALSE
            RTS


* ---------------------------------------------------------------------
* Routines to render the line via the blitter!
*
* The blitter routines vary somewhat to optimize for speed!  8 bit wide
* fonts are drawn much faster then other width fonts, but all fonts
* are drawn much faster then Text()!  This makes sense to do this since
* 8 bit wide fonts are the most common, and because I can make use of
* some special blitter capabilities to speed the process along!
* ---------------------------------------------------------------------


rp          EQU      0004        ;stack offsets
txbptr      EQU      rp+4
count       EQU      txbptr+4

_FastText:  TST.L    OutPut
            BEQ      CallText    ;if InitFastText() failed!

            MOVE.L   count(SP),D3
            BEQ      Noway       ;why would you put a 0 for # of chars?
            CMP.W    Maxchars,D3
            BLS      getblitter
            MOVE.W   Maxchars,D3 ;prevent overflow

* ok, I need that blitter for me only!

getblitter: SUBQ.W   #1,D3
            JSR      _OwnBlitter
            JSR      _WaitBlit

* set-up globals first

            MOVEA.L  txbptr(SP),A0
            MOVEM.L  group,D4-D7/A1-A3

* call appropriate routine

            MOVEA.L  OutPut,A6
            JSR      (A6)

* free blitter for someone else

            JSR      _DisownBlitter

* now blast this into the rastport!

            MOVEA.L  Imagebuf,A0       ;address of bitplane
            MOVEQ    #00,D0            ;no offset
            MOVE.L   #128,D1           ;src modulo
            MOVEA.L  rp(SP),A1         ;rastport
            CLR.L    D2
            MOVE.W   rp_cp_x(A1),D2    ;x dest
            ADD.W    D4,rp_cp_x(A1)    ;modify Move() position
            CLR.L    D3
            MOVE.W   rp_cp_y(A1),D3    ;dest y
            SUB.W    Baseline,D3       ;- baseline of font
            MOVE.L   YSize,D5          ;y size
            MOVEA.L  _GfxBase,A6
            JSR      _LVOBltTemplate(A6)
Noway:      RTS


* If InitFastText() failed, Text() may still be called via the
* FastText() function.
*
* This makes use in a system which must use any size font easier to program.
*
CallText:   MOVEA.L  rp(SP),A1
            MOVEA.L  txbptr(SP),A0
            MOVE.L   count(SP),D0
            MOVEA.L  _GfxBase,A6
            JSR      _LVOText(A6)
            RTS

* --------------------------------------------------
* This is the faster logic for an 8 bit wide fonts!
* --------------------------------------------------
Bltfast:    MOVEA.L  #$dff000,A6       ;address of custom chips

* once set, these blitter registers dont change

            MOVE.W   #DEST+SRCB+SRCA+$FC,bltcon0(A6)
            MOVE.W   #$FFFF,bltalwm(A6)   ;no last word mask
            MOVE.W   #126,bltdmod(A6)
            MOVE.W   #126,bltamod(A6)
            MOVE.W   #510,bltbmod(A6)

* do as much set-up as possible while last blit is busy

blit8:      CLR.L    D0
            MOVE.B   (A0)+,D0
            BEQ      blt8done          ;terminate if NULL found

            LSL.W    #1,D0             ;cast to WORD offset
            ADD.L    D5,D0             ;ready with pointer!

            ADD.L    D6,D4             ;add to width

Wait8:      BTST     #6,dmaconr(A6)    ;wait for last blit to complete
            BNE      Wait8

* these blitter register change for each blit

            MOVE.L   D0,bltbpt(A6)     ;b src
            MOVE.L   (A3),bltdpt(A6)   ;d src pntr
            MOVE.L   (A3)+,bltapt(A6)  ;a src pntr
            MOVE.W   (A1)+,bltafwm(A6) ;first word mask           
            MOVE.W   (A2)+,bltcon1(A6) ;shift value for b

            MOVE.W   D7,bltsize(A6)    ;trigger blit
            DBF      D3,blit8          ;char counter

blt8done:   RTS

* --------------------------------------------------
* This is the slower mode for odd width fonts!
* --------------------------------------------------
Bltslow:    MOVEA.L  #$dff000,A6       ;address of custom chips
            MOVEQ    #00,D1            ;address of destination here

* these registers dont change once set

            CLR.W    bltalwm(A6)       ;last word mask
            CLR.W    bltcon1(A6)       ;no special modes
            MOVE.W   #124,bltbmod(A6)  ;modulos
            MOVE.W   #124,bltdmod(A6)

* this bit clears the image buffer as needed - speed optimized
* thinner fonts will be drawn faster due to less hits
* a hit is TRUE whenever the font imagery rolls over into the next word

doblits:    MOVE.W   (A1)+,D0
            MOVE.L   (A3)+,D2
            CMP.L    D2,D1
            BEQ      dochars           ;working on same word

Waitlast:   BTST     #6,dmaconr(A6)    ;wait if needed
            BNE      Waitlast

            MOVE.L   D2,D1             ;save for next time around

            MOVE.W   #124,bltamod(A6)
            MOVE.W   D0,bltafwm(A6)
            MOVE.L   D2,bltdpt(A6)     ;d src pntr
            MOVE.L   D2,bltapt(A6)     ;a src pntr
            MOVE.W   #DEST+SRCA+$F0,bltcon0(A6) ;d=a
            MOVE.W   D7,bltsize(A6)    ;trigger blitter clear

* setup while last blit is busy

dochars:    CLR.L    D0
            MOVE.B   (A0)+,D0
            BEQ      bltodddone        ;terminate if NULL found

            LSL.W    #1,D0             ;cast to WORD offset
            ADD.L    D5,D0             ;ready with pointer!

            ADD.L    D6,D4             ;add to width

Waitpack:   BTST     #6,dmaconr(A6)    ;wait for image packing
            BNE      Waitpack

* set up blitter for character image move

            MOVE.W   #508,bltamod(A6)
            MOVE.W   #$FFFF,bltafwm(A6)
            MOVE.L   D0,bltapt(A6)     ;a src
            MOVE.L   D2,bltbpt(A6)     ;b src pntr
            MOVE.L   D2,bltdpt(A6)     ;d src pntr
            MOVE.W   (A2)+,bltcon0(A6) ;shift value for a
            MOVE.W   D7,bltsize(A6)    ;trigger blit

            DBF      D3,doblits        ;checks count

bltodddone: RTS

            RTS


            DSEG
* global variables I use - private!

Imagebuf:   dc.l     0     ;address of image buffer - part of above struct
Imagesize:  ds.l     1     ;size of working buffer

FontSize:   ds.l     1     ;size of font data buffer

YSize:      ds.l     1     ;size of font height

Maxchars:   ds.w     1     ;Max chars which can be drawn at one time

Baseline:   ds.w     1     ;baseline of font

OutPut:     dc.l     0     ;address of function to call or NULL for Text()

masks:      ds.w     256   ;256 words for masks
shift:      ds.w     256   ;256 shift values
local:      ds.l     256   ;256 pointers to target location

* these are grouped for a good reason - the MOVEM instruction is used
* by the rendering routines for efficiency - DONT play with the order
* of these unless you know what you are doing.

group:      dc.l     0     ;0 to init width of line register
FontData:   dc.l     0     ;address of font data
XSize:      dc.l     0     ;size of font width
blitsize:   dc.l     0     ;blit size value
            dc.l     masks ;address of arrays
            dc.l     shift
            dc.l     local


            END

