\ Copyright 1989 NerveWare
\ No portion of this code may used for commercial purposes,
\ nor may any executable version of this code be disributed for 
\ commercial purposes without the author's express written permission.
\ This code is shareware, all rights reserved.
\ Nick Didkovsky
\ 12/21/88
\ 
\                            MandelShift2.ASM 

\ MOD: Uses 16384 as max possible scaler!             12/20/88
\ MOD: 350 as mandelmax seems optimal at this scale   2/11/89
\ MOD: storing mandelmax iterations in register A1    2/11/89
\ MOD: storing registers d2,d5 in scratch registers a0,a2  2/11/89

\ 4 := 65536
\ 2 := 32768

\ This is the speedy assembler Mandelbrot routine.
\ A little roundoff error is introduced by integer math,
\ visible as a little roughness along the high-contrast boundaries of the 
\ image generated. But it's insanely fast.

\ MandelMax iterations of the loop qualifies a given complex 
\ number as belonging to the Mandelbrot Set.

\                          THE ALGORITHM
\ The iterative function applied to a given complex number C is: 
\ Z := Z^2+C
\ In terms of the variables below, C = aconst + (bconst)i , where i
\ is the square root of -1.
\ The function is initialized with Z=0+0i 
\ The function's result is fed into Z again, recalculated, fed in, 
\ recalculated, over and over.
\ Each intermediate result's magnitude is tested.  If the magnitude >= 2,
\ break out of the loop, leaving the number of iterations it took to 
\ "blow up" to this value on the stack.
\ If, after 254 iterations of this function, the magnitude is still < 2,
\ tested number C is considered a member of the Mandelbrot Set, and 254 is left
\ on the stack.

\ SPEED SHORTCUTS:

\ 1) The magnitude of a complex number equals the square root of the sum
\    of the squares of its real and imaginary parts (simple "distance formula"
\    from high school).  
\    Shortcut: Don't bother taking the square root and checking it
\    against 2, rather test the sum of the squares against 4.

\ 2) As mentioned before, I use scaled integer math.  This is MUCH faster
\    than floating point.  Additionally, instead of scaling by a nice looking 
\    number like 10000, I use 16384, which is a power of 2.  So, instead of
\    scaling results with a DIVISION by 10000 (slow), I scale it by
\    SHIFTING register contents to the right 14 places (very very fast).

\ 3) The Z := Z^2+C function can be broken down into two calculations:
\    one calculates what happens to the real part of Z (call this value az), 
\    the other calculates what happens to the imaginary part (call this bz).
\        NEWaz := az*az - bz*bz + aconst
\        NEWbz := 2*az*bz + bconst
\    Speedup: Notice that az*az and bz*bz are needed to calculate the new az,
\    as well as to check the magnitude of the result.  So I only calculate these
\    squares ONCE in each iteration, storing the results in registers d5 and d6,
\    and use them for both purposes.

\ 4) The routine uses 68000 registers ONLY.  No subroutine calls at all.  This
\    makes for very fast arithmetic!  Register's previous contents are stored 
\    at the top of the routine, then restored at the bottom before it exits.

anew task_Mandelbrot-ASM

\ maximum iterations is hard coded in this version of the program = 350

350 CONSTANT MandelMax

\ These variables determine the region generated.
\ Their values represent scaled values, where the scaler = 16384.
\ That means unity is represented by 16384.
\ The smallest value in our scaled arithmetic is obviously "1",
\ which represents the fraction (1 / 16384) = 0.000061
\ So, smallest distance representable between two adjacent pixels is 0.000061
\ ok so it's not double precision ... 

VARIABLE acorner  \ region's top left real part ( x value)
VARIABLE bcorner  \ region's top left imaginary part ( y value)
VARIABLE aconst   \ real part of number being tested
VARIABLE bconst   \ imaginary part of number being tested
VARIABLE xgap     \ horizontal distance between adjacent pixels
VARIABLE ygap     \ vertical distance between adjacent pixels

\ the following variables are used to store register contents during
\ the execution of the loop.  Registers are restored when this code returns.

VARIABLE temp.d3   
VARIABLE temp.d4   
VARIABLE temp.d6   

\ Speed notes:
\ 1000 worst-case calls to this routine takes 2.72 sec. with MandelMax = 35
\ (using 0+0i, which is a Mandelbrot member)
\ Old version, using 10000 scaler and DIVS took 4.58 sec!!!


\ This uses registers only for super speed. 

\ d0 = az
\ d1 = bz
\ d2 = loop counter
\ d3 = aconst
\ d4 = bconst
\ d5 = azsqr
\ d6 = bzsqr
\ a1 = mandelmax

ASM quick.guts  ( -- iterations)
\ save d3,d4,d6 in variables
    callcfa	temp.d3
    move.l	d3,$0(org,tos.l)
    move.l	(dsp)+,tos
    callcfa	temp.d4
    move.l	d4,$0(org,tos.l)
    move.l	(dsp)+,tos
    callcfa	temp.d6
    move.l	d6,$0(org,tos.l)
    move.l	(dsp)+,tos


\ ********************** initialize d3, d4 with aconst and bconst *********

    callcfa	aconst
    move.l	$0(org,tos.l),d3
    move.l	(dsp)+,tos
    callcfa	bconst
    move.l	$0(org,tos.l),d4
    move.l	(dsp)+,tos
    callcfa	MandelMax
    move.l	tos,a1
    move.l      (dsp)+,tos
\ save d2,d5 in scratch registers a0,a2
    move.l	d2,a0
    move.l	d5,a2

\ ************************ more init's ************************************
    
    moveq	#0,d2			\ initialize loop counter
    moveq	#0,d0			\ initialize az
    moveq	#0,d1			\ initialize bz

\ ******************  loop starts, calculate azsqr, bzsqr *****************

\ Check if operand greater than 32768.  If so, don't bother squaring: exit loop
\ because magnitude will automatically be greater than 4 = 65536.

\ AZSQR
1$: move.l	d0,d5       		\ az copied to d5
    tst.l	d5			\ make sure positive
    bpl.s	22$
    neg.l	d5
22$: cmpi.l	#32768,d5	       	\ operand >= 32768 ?
    bge		2$			\ if so, exit loop
    muls.l	d5,d5			\ else square it, d5 now contains azsqr
    asr.l	#$8,d5			\ FAST SCALE BY 16384 !!!
    asr.l	#$6,d5

\ BZSQR
    move.l	d1,d6			\ bz copied to d6
    tst.l	d6			\ make sure positive
    bpl.s	32$
    neg.l	d6
32$: cmpi.l	#32768,d6	       	\ operand >= 32768
    bge		2$			\ if so, exit loop
    muls.l	d6,d6			\ else square it, d6 now contains bzsqr
    asr.l	#8,d6			\ FAST SCALE!!!
    asr.l	#6,d6


\ ************************** new BZ **************************


   tst.l d0				\ test sign of d0
   bpl.s 11$ 				\ d0 positive, goto 11$
   neg.l d0				\ else negate d0
   tst.l d1				\ and test d1
   bpl.l 31$				\ d1 pos, d0 was neg, goto 31$
   neg.l d1				\ both neg, negate d1 
   bra   21$				\ and goto 21$ 

11$: tst.l d1				\ d0 pos, test d1
     bpl.s 21$				\ both positive, goto 21$
     neg.l d1				\ else negate d1
     bra   31$				\ and goto 31$

21$: muls.l	d1,d0			\ * bz
     asr.l      #8,d0			\ FAST SCALE !!!
     asr.l	#6,d0
     bra 	41$			\ leave answer positive
 
31$: muls.l	d1,d0			\ * bz
     asr.l      #8,d0			\ FAST SCALE !!!
     asr.l	#6,d0
     neg.l	d0			\ make final answer negative


41$: add.l	d0,d0			\ 2* 
     add.l	d4,d0			\ + bconst
     move.l	d0,d1			\ new bz now in d1

\ ************************** NEW AZ ***********************************

    move.l	d6,d0			\ copy bzsqr out to d0
    neg.l	d0			\ negate its copy
    add.l	d5,d0			\ azsqr-bzsqr
    add.l	d3,d0			\ + aconst, new az now in d0 

	
\ ******************* TEST FOR PREMATURE EXIT FROM LOOP ******************

    add.l	d5,d6			\ azsqr+bzsqr, magnitude is in d6 now
    cmpi.l	#65536,d6		\ check if magnitude > 65536=4 
    bgt		2$			\ if > , exit loop, else... 
    addi.l	#1,d2 			\ ... increment loop counter
    cmp.l	a1,d2 			\ MandelMax iterations stored in A1
    blt		1$ 			\ back to loop start
2$: move.l 	tos,-(dsp)
    move.l	d2,tos          \ leave iteration count on tos

\ now restore register contents

    move.l 	a0,d2
    move.l	a2,d5

    callcfa	temp.d3
    move.l	$0(org,tos.l),d3	 \ restore d3 !!!
    move.l	(dsp)+,tos
    callcfa	temp.d4
    move.l	$0(org,tos.l),d4	 \ restore d4 !!!
    move.l	(dsp)+,tos
    callcfa	temp.d6
    move.l	$0(org,tos.l),d6	 \ restore d6 !!!
    move.l	(dsp)+,tos
END-CODE

\ ************************ end quick guts **********************************

