\ Copyright 1989 NerveWare
\ No portion of this code may used for commercial purposes,
\ nor may any executable version of this code be disributed for 
\ commercial purposes without the author's express written permission.
\ This code is shareware, all rights reserved.

\ Nick Didkovsky					2/17/89

\  			MandelBitMasks2.ASM


\ This file is used to tag pixels as being already calculated or not, as 
\ well as for membership in the mandelbrot set. 

getmodule includes

 include? mandel-width VariScreen.JF
 include? MandelMax MandelShift2.ASM

decimal

ANEW task-MandelBitMasks_ASM

decimal

\ mark-table is used to set bits for pixels already generated
\ mandeltable is used to set those pixels which also are in the mandelbrot set

VARIABLE mark-table
VARIABLE mandel-table

32000 CONSTANT table-size 

\ ************************ ALLOCATE TABLES  *******************************

: INIT.TABLES ( -- )
  mark-table off
  mandel-table off
  MEMF_CLEAR table-size allocblock ?dup 
        if mark-table ! cr ." MARK-TABLE allocated successfully" cr
        else ." can't allocate MARK-TABLE!" cr abort 
        then
  MEMF_CLEAR table-size allocblock ?dup 
        if mandel-table ! ." MANDEL-TABLE allocated successfully" cr
        else ." can't allocate MANDEL-TABLE!" cr abort 
        then
;

: FREE.TABLES
  mark-table @ ?dup IF freeblock  cr ." mark-table freed" cr THEN
  mandel-table @ ?dup IF freeblock  ." mandel-table freed" cr THEN
;

\ reset all bytes to zero

: CLEAR.TABLES ( -- )
  mark-table @ table-size erase
  mandel-table @ table-size erase
;

: LOOK.TABLES ( -- )
cr ." MARK-TABLE" cr
mark-table @ 50 dump
cr ." MANDEL-TABLE" cr
mandel-table @ 50 dump
;

\ *************************** BIT MASKS *********************************
\ Eight pixels tagged per byte.  BIT-MASKS is used to single out the bit 
\ in a byte associated with the pixel we want to tag.

CREATE BIT-MASKS  1 C, 2 C, 4 C, 8 C, 16 C, 32 C, 64 C, 128 C,

\ sets a bit at byte address of mark-table with bitmask.
\ 3.04 sec for 64000 of these
 : CSET     ( mask ^mark-table -- )  TUCK  C@  OR  SWAP C!  ; 

\ 2.16 sec for 64000 of these
ASM CSET.ASM ( mask ^mark-table -- )
move.l	tos,d0
moveq.l	#0,tos
move.b	$0(org,d0.l),tos 	\ c@ to tos
or.l	tos,(dsp)		\ OR gives newmask on (dsp)
move.l	d0,tos
move.b	$3(dsp),$0(org,tos.l)	\ c!
addq.l	#$4,dsp 		\ nip 
move.l	(dsp)+,tos		\ drop
END-CODE

\ given a pixel offset, returns a bit mask and address offset into table.
\ 64000 calls take 3.70 sec 
: GET-MASK   ( offset -- mask address-offset)
  bit-masks ( -- offset ^table)
  over      ( -- offset ^table offset)
  7 and     ( -- offset ^table offset-AND-7)
  +         ( -- offset ^table+offset-AND-7)
  c@        ( -- offset byte.at.^table+offsetAND7)
  swap      ( -- byte.at.^table+offsetAND7 offset)
  3 -shift  ( -- byte.at.^table+offsetAND7 offset-3shifted)
;

\ 64000 calls take 2.98 sec
ASM GET-MASK.ASM ( offset -- mask address-offset)
callcfa	bit-masks		\ ( -- offset ^table)
move.l	(dsp),d1		\ ( -- offset ^table), copy offset to d1
and.l	#$7,d1			\ fancy quick modulo 8, offset-AND-7 in d1
add.l	tos,d1			\ ( -- offset ^table) ^table+offsetAND7 in d1
moveq.l	#0,tos			\ ( -- offset 0 ) c@ prep
move.b	$0(org,d1.l),tos	\ byte.at^table+offsetAND7 to TOS
move.l	(dsp),d1		\ move (dsp) to data reg to do asr
asr.l	#$3,d1			\ asl to d1
move.l	tos,(dsp)		\ switch these guys around to conform to 
move.l	d1,tos 			\ stack diagram
end-code

\ returns mask and MARK-TABLE address associated with a pixel.
: PIXEL#>MASK  ( Pixel# -- mask ^MARK-TABLE)
  get-mask.asm mark-table @ +  ; 

\ returns mask and MANDEL-TABLE address associated with a pixel.
: PIXEL#>MANDELMASK  ( Pixel# -- mask ^MANDEL-TABLE)
  get-mask.asm mandel-table @ +  ; 

\ used to tag a bit in both tables
: PIXEL#>MASK.BOTH ( pixel# -- mask ^mandel-table mask ^mark-table)
  get-mask.asm 2dup ( -- mask addr-off mask addr-off)
  mark-table @ + >r >r
  mandel-table @ + r> r>
;

\ ************************ SET.PIXEL, GET.PIXEL ****************************

\ mark a pixel with a bit in MARK-TABLE
: SET.PIXEL ( pixel# -- )
  \ pixel#>mask 
  get-mask.asm mark-table @ + 
  dup>r c@ or r> cset.asm  
;

\ check if a pixel has been marked in MARK-TABLE
: GET.PIXEL ( pixel# -- mask | 0)
\  pixel#>mask 
  get-mask.asm mark-table @ + 
  c@ and
;

\ if the pixel turns out to be a mandelbrot pixel, mark it in both tables!
: SET.MANDELBROT.PIXEL ( pixel# -- )
\  pixel#>mask.both  ( -- mask addr mask addr)
  get-mask.asm 2dup ( -- mask addr-off mask addr-off)
  mark-table @ + >r >r
  mandel-table @ + r> r>
  dup>r c@ or r> cset.asm
  dup>r c@ or r> cset.asm
;

\ check if a pixel has been marked in MANDELBROT-TABLE
: GET.MANDELBROT.PIXEL ( pixel# -- mask | 0)
\  pixel#>mandelmask
  get-mask.asm mandel-table @ +  c@ and
;

\ ***************************** POINT>PIXEL# ********************************

\ take an x,y and convert to pixel# depending on mandel-width
\ 64000 calls to this takes 2.36 sec
ASM POINT>PIXEL# ( x y -- pixel#)
callcfa 	mandel-width
move.l		$0(org,tos.l),tos
move.l 		(dsp)+,d0
muls.l		tos,d0
add.l		(dsp)+,d0
move.l		d0,tos
end-code

\ mark non-mandelbrot pixel associated with (x,y) in mark-table
: MARK.NON.POINT ( x y -- )
  point>pixel# 
  get-mask.asm mark-table @ + 
  dup>r c@ or r> cset.asm  
\ set.pixel
;

\ mark mandelbrot pixel associated with (x,y) in mandel-table
: MARK.MANDELBROT.POINT ( x y -- )
  point>pixel# 
  get-mask.asm 2dup ( -- mask addr-off mask addr-off)
  mark-table @ + >r >r
  mandel-table @ + r> r>
  dup>r c@ or r> cset.asm
  dup>r c@ or r> cset.asm
\ set.mandelbrot.pixel 
;

: MARK.POINT ( x y iterations -- )
  mandelmax = IF mark.mandelbrot.point ELSE mark.non.point THEN ;

: GET.POINT ( x y -- mask | 0)
  point>pixel# 
  get-mask.asm mark-table @ + 
  c@ and
\ get.pixel
;

: GET.MANDELBROT.POINT ( x y -- mask | 0)
  point>pixel# 
  get-mask.asm mandel-table @ +  c@ and
\ get.mandelbrot.pixel 
;

  
\ ******************************** BENCHMARKS *****************************

\ 64000 loops take 7.92 sec
\ with cset.asm now takes 6.3 sec
: BENCH.SET.PIXEL ( #loops -- )
  0 do
    400 set.pixel
  loop
;

\ 64000 loops take 5.60 sec
\ with get-mask.asm now takes 4.84 sec
: BENCH.GET.PIXEL ( #loops -- )
  0 do
    400 get.pixel drop
  loop
;

\ 64000 loops take 12.88 sec.
\ with asm takes 10.48 sec
: BENCH.SET.MPIXEL ( #loops -- )
  0 do
    400 set.mandelbrot.pixel
  loop
;

\ 64000 loops take 5.56 sec
\ with asm takes 4.84 sec
: BENCH.GET.MPIXEL ( #loops -- )
  0 do
    400 get.mandelbrot.pixel drop
  loop
;
 

