/*
 * MandelVroom 2.0
 *
 * (c) Copyright 1987,1989  Kevin L. Clague, San Jose, CA
 *
 * All rights reserved.
 *
 * Permission is hereby granted to distribute this program's source
 * executable, and documentation for non-comercial purposes, so long as the
 * copyright notices are not removed from the sources, executable or
 * documentation.  This program may not be distributed for a profit without
 * the express written consent of the author Kevin L. Clague.
 *
 * This program is not in the public domain.
 *
 * Fred Fish is expressly granted permission to distribute this program's
 * source and executable as part of the "Fred Fish freely redistributable
 * Amiga software library."
 *
 * Permission is expressly granted for this program and it's source to be
 * distributed as part of the Amicus Amiga software disks, and the
 * First Amiga User Group's Hot Mix disks.
 *
 * contents: this file contains the functions to calculate Mandelbrot and
 * Julia pictures using a special and fast fixed point (scaled ints) format.
 * It has generators for 68000 and 68020 in assembly.
 */

#include "mandp.h"

#include "parms.h"

#define BITS2SHIFT 27

/*
 * 32 bit fixed point generator
 */
MandelbrotInt32( Pict )
  register struct Picture *Pict;
{
  register LONG   i, j, k;
  register SHORT *CountPtr;

  double gap;

  UBYTE ConvFlag;
  LONG  gapx, gapy, startx;

  struct IntPotParms Parms;
  struct RastPort   *Rp;

  if (Pict->Flags & NO_RAM_GENERATE)
    CountPtr = Pict->Counts;
  else
    CountPtr = Pict->Counts + Pict->CurLine*Pict->CountX;

  /* figure out horizontal and verticle distances between points in plane.
   * convert them to fixed point format                                    */

  gapy = (int) (Pict->ImagGap*((double)(1<<BITS2SHIFT)));
  gapx = (int) (Pict->RealGap*((double)(1<<BITS2SHIFT)));

  startx = (int)(Pict->RealLow*((double)(1<<BITS2SHIFT)));

  /*
   * for each point in the image, calculate Mandelbrot
   */
  Parms.C_Imag = (int)(Pict->ImagLow*((double)(1<<BITS2SHIFT)));

  Parms.C_Imag += Pict->CurLine*gapy;

  Parms.ScreenReal = 0;
  Parms.ScreenImag = 0;

  Parms.MaxIteration = Pict->MaxIteration;

  for (i = Pict->CurLine; i < Pict->CountY; i++) {

    Parms.C_Real = startx;

    ConvFlag = 0;

    if ( Pict->Flags & NO_RAM_GENERATE )
      CountPtr = Pict->Counts;

    for (j = 0; j < Pict->CountX; j++) {

      if (*CountPtr == 0) {

        /*
         * if the last pixel was mandelbrot, then use trace table
         */
        if (ConvFlag) {

          k = Int32_Trace_Height( &Parms );
        } else {

          k = Int32_Height( &Parms );
        }

        ConvFlag = k == Pict->MaxIteration;
        *CountPtr = k;
      }
      CountPtr++;

      Parms.C_Real += gapx;

      ChildPause( Pict );
    }
    Parms.C_Imag += gapy;

    CheckEOL( Pict );
  }
} /* Mandelbrot32Int */

#ifdef CHECK_TASK_STACK

int stackerr;
LONG stacklow;
LONG stackhi;
LONG stackovfl;
LONG stackmax;

CheckStack() {
  register struct Task *Task;
  register LONG hi,reg,low;
  register LONG size;

  Task = FindTask(0L);

  hi = Task->tc_SPUpper;
  reg = Task->tc_SPReg;
  low = Task->tc_SPLower;

  size = hi - reg;

  if (reg < low) {

    if (size > stackmax) {
      stackerr  = 1;

      stacklow   = low;
      stackhi    = hi;
      stackovfl  = reg;
      stackmax   = size;
    }
  } else {

    if (stackerr == 0 && size > stackmax) {
      stacklow   = low;
      stackhi    = hi;
      stackovfl  = reg;
      stackmax   = size;
    }
  }
}

PrintStack()
{
  if (stackerr != 0)
    printf("********* STACK ERR ***********\n");

  printf("Low   %08x\nHigh  %08x\nSP     %08x\n",
          stacklow, stackhi, stackovfl );

  printf("MinStack %d\n",stackmax);
}

#endif

/*
 * 32 bit fixed point generator
 */
JuliaInt32( Pict )
  register struct Picture *Pict;
{
  register LONG   i, j, k;
  register SHORT *CountPtr;

  LONG  gapx, gapy, startx;
  UBYTE ConvFlag;

  struct IntPotParms Parms;

  if (Pict->Flags & NO_RAM_GENERATE)
    CountPtr = Pict->Counts;
  else
    CountPtr = Pict->Counts + (Pict->CurLine*Pict->CountX);

  /* figure out horizontal and verticle distances between points in plane.
   * convert them to fixed point format                                    */

  gapx = (int) ( Pict->RealGap * (double) (1 << BITS2SHIFT) );
  gapy = (int) ( Pict->ImagGap * (double) (1 << BITS2SHIFT) );

  /*
   * for each point in the image, calculate Julia
   */
  Parms.ScreenImag  = (int) ( Pict->ImagLow * (double) (1 << BITS2SHIFT) );
  Parms.ScreenImag += Pict->CurLine*gapy;

  startx = (int) ( Pict->RealLow * (double) (1 << BITS2SHIFT) );

  Parms.C_Real = (int) ( Pict->Real * (double) (1 << BITS2SHIFT) );
  Parms.C_Imag = (int) ( Pict->Imag * (double) (1 << BITS2SHIFT) );

  Parms.MaxIteration = Pict->MaxIteration;

  for (i = Pict->CurLine; i < Pict->CountY; i++) {

    Parms.ScreenReal = startx;
    ConvFlag = 0;

    if ( Pict->Flags & NO_RAM_GENERATE )
      CountPtr = Pict->Counts;

    for (j = 0; j < Pict->CountX; j++) {

      if (*CountPtr == 0) {

        /*
         * if the last pixel was mandelbrot, then use trace table
         */

        if ( ConvFlag ) {
          k = Int32_Trace_Height( &Parms );
        } else {
          k = Int32_Height( &Parms );
        }

        ConvFlag = k == Pict->MaxIteration;
        *CountPtr = k;
      }
      CountPtr++;

      Parms.ScreenReal += gapx;

      ChildPause( Pict );
    }
    Parms.ScreenImag += gapy;

    CheckEOL( Pict );
  }
} /* Julia32Int */

/*
 * 32 bit fixed point generator
 */
MandelbrotInt32II( Pict )
  register struct Picture *Pict;
{
  register LONG   i, j, k;
  register struct RastPort *Rp;

  double gap;

  SHORT *CountPtr;
  LONG  gapx, gapy, startx;

  struct IntPotParms Parms;

  if (Pict->Flags & NO_RAM_GENERATE)
    CountPtr = Pict->Counts;
  else
    CountPtr = Pict->Counts + Pict->CurLine*Pict->CountX;

  /* figure out horizontal and verticle distances between points in plane.
   * convert them to fixed point format                                    */

  gapy = (int) (Pict->ImagGap*((double)(1<<BITS2SHIFT)));
  gapx = (int) (Pict->RealGap*((double)(1<<BITS2SHIFT)));

  startx = (int)(Pict->RealLow*((double)(1<<BITS2SHIFT)));

  /*
   * for each point in the image, calculate Mandelbrot
   */
  Parms.C_Imag = (int)(Pict->ImagLow*((double)(1<<BITS2SHIFT)));
  Parms.C_Imag += Pict->CurLine*gapy;

  Parms.ScreenReal = 0;
  Parms.ScreenImag = 0;

  Parms.MaxIteration = Pict->MaxIteration;

  for (i = Pict->CurLine; i < Pict->CountY; i++) {

    Parms.C_Real = startx;

    if ( Pict->Flags & NO_RAM_GENERATE )
      CountPtr = Pict->Counts;

    for (j = 0; j < Pict->CountX; j++) {

      /*
       * if the last pixel was mandelbrot, then use trace table
       */

      k = Int32_Height_Fast( &Parms );

      *CountPtr++ = k;

      Parms.C_Real += gapx;

      ChildPause( Pict );
    }
    Parms.C_Imag += gapy;

    CheckEOL( Pict );
  }
} /* MandelbrotInt32II */

/*
 *  This code does mandelbrot without any trace lookup.
 *  It is the fastest 68000 generator in the house.
 */
Int32_Height( Parms )

  struct IntPotParms *Parms;
{
  LONG Height;

  register struct IntPotParms *P = Parms;

  register LONG cura, curb, cura2, curb2;

#asm
height  equ -4
Bits2Shift equ  5
;
;
;  d1 - BITS2SHIFT
;  d2 - k
;  d4 - a
;  d5 - b
;  d6 - a2
;  d7 - b2
;
screenr  equ 0
screeni  equ 4
curx     equ 8
cury     equ 12
maxi     equ 16
;
   move.l   #Bits2Shift,d1
   move.l   screenr(a2),d4
   move.l   screeni(a2),d5
   move.w   maxi(a2),d2
   bra      Fposa2
;
FKLoop
;
;  cura = cura2 - curb2 + curx;
   exg      d6,d4       ; exchange cura and cura2
   sub.l    d7,d4       ; subtract curb
   add.l    curx(a2),d4 ; add curx
;
;  curb = cura * curb >> 12;
   move.l   d6,d7      ; get copy of op1 sign bit
   bpl      Fpos1       ; get absolute value of op1
   neg.l    d6
Fpos1
   eor.l    d5,d7      ; calculate result sign
   tst.l    d5         ; get absolute value of op2
   bpl      Fpos2
   neg.l    d5
Fpos2
   move.l   d6,d0      ; get a copy of op1
   swap     d0         ; get high half of op1
   move.w   d0,d7      ; save a copy of high half
   mulu     d5,d0      ; multiply op2 low by op1 high
   clr.w    d0         ;  clear least significant part
   swap     d0         ;  put it in it's place
   swap     d5         ; get high half of op2
   mulu     d5,d6      ; multiply op2 high with op1 low
   clr.w    d6         ;  clear least significant part
   swap     d6         ;  put it in its place
   mulu     d7,d5      ; multiply op2 high by op1 high
   add.l    d0,d5      ; add partial results
   add.l    d6,d5      ; add partial results
   tst.l    d7         ; is the result negative?
   bpl      Fpos3
   neg.l    d5         ; yes, better negate it.
Fpos3
   asl.l    d1,d5      ; now, rescale it.
;
;  curb += curb + cury;
   add.l    d5,d5      ; double it and add cury
   add.l    cury(a2),d5
Fposa2
;
;  cura2 = cura * cura;
   move.l   d4,d0      ; get absolute value of a in d0
   bpl      Fposa
   neg.l    d0
Fposa
   move.l   d0,d6      ; copy absolute value into d6
   swap     d6         ; get high part in d6
   mulu     d6,d0      ; multiply high and low destroying low
   clr.w    d0         ; clear the least significant part
   swap     d0         ; put most sig. part in low half
   mulu     d6,d6      ; multiply high and high destroing high
   add.l    d0,d6      ; add in lower half twice
   add.l    d0,d6
   asl.l    d1,d6      ; get radix point back in correct place
   bvs      Fbailout
;
;  curb2 = curb * curb;
   move.l   d5,d0      ; get absolute value of a in d0
   bpl      Fposb
   neg.l    d0
Fposb
   move.l   d0,d7      ; copy absolute value into d7
   swap     d7         ; get high part in d7
   mulu     d7,d0      ; multiply high and low destroying low
   clr.w    d0         ; clear the least significant part
   swap     d0         ; put most sig. part in low half
   mulu     d7,d7      ; multiply high and high destroing high
   add.l    d0,d7      ; add in lower half twice
   add.l    d0,d7
   asl.l    d1,d7      ; get radix point back in correct place
   bvs      Fbailout
;
   move.l   d6,d0      ; if (cura2 + curb2 >= 4) goto bailout;
   add.l    d7,d0
   bvs      Fbailout
;
   dbra     d2,FKLoop
;  addq     #1,d2
Fbailout
   move.w   maxi(a2),d0
   sub.w    d2,d0
   sub.w    #1,d0
   ext.l    d0
#endasm
   ;;
}

Int32_Trace_Height( Parms )

  register struct IntPotParms *Parms;
{
  SHORT  k,TLen;
  LONG  OldA[17];

  register LONG cura, curb, cura2, curb2;

#asm
;
;
;  d1 - BITS2SHIFT
;  d2 - k
;  d4 - a
;  d5 - b
;  d6 - a2
;  d7 - b2
;  a0 - Trace Table pointer
;
Trace equ 16
OldA  equ -132
k     equ -2
TLen  equ -4

   move.l   #Bits2Shift,d1
   move.l   screenr(a2),d4
   move.l   screeni(a2),d5
;
   lea      OldA(a5),a0          ; Set up Trace table pointer
   move.w   #Trace,d2            ; Set up Trace table len
;
   move.w   #-1,k(a5)
   bra      TPosa2               ; branch in to middle to get a2 = a * a
;
TLoop
;
;  cura = cura2 - curb2 + curx;
   exg      d6,d4       ; exchange cura and cura2
   sub.l    d7,d4       ; subtract curb
   add.l    curx(a2),d4 ; add curx
;
;  curb = cura * curb >> 12;
   move.l   d6,d7      ; get copy of op1 sign bit
   bpl      TPos1       ; get absolute value of op1
   neg.l    d6
TPos1
   eor.l    d5,d7      ; calculate result sign
   tst.l    d5         ; get absolute value of op2
   bpl      TPos2
   neg.l    d5
TPos2
   move.l   d6,d0      ; get a copy of op1
   swap     d0         ; get high half of op1
   move.w   d0,d7      ; save a copy of high half
   mulu     d5,d0      ; multiply op2 low by op1 high
   clr.w    d0         ;  clear least significant part
   swap     d0         ;  put it in it's place
   swap     d5         ; get high half of op2
   mulu     d5,d6      ; multiply op2 high with op1 low
   clr.w    d6         ;  clear least significant part
   swap     d6         ;  put it in its place
   mulu     d7,d5      ; multiply op2 high by op1 high
   add.l    d0,d5      ; add partial results
   add.l    d6,d5      ; add partial results
   tst.l    d7         ; is the result negative?
   bpl      TPos3
   neg.l    d5         ; yes, better negate it.
TPos3
   asl.l    d1,d5      ; now, rescale it.
;
;  curb += curb + cury;
   add.l    d5,d5      ; double it and add cury
   add.l    cury(a2),d5
TPosa2
;
;  cura2 = cura * cura;
   move.l   d4,d0      ; get absolute value of a in d0
   bpl      TPosa
   neg.l    d0
TPosa
   move.l   d0,d6      ; copy absolute value into d6
   swap     d6         ; get high part in d6
   mulu     d6,d0      ; multiply high and low destroying low
   clr.w    d0         ; clear the least significant part
   swap     d0         ; put most sig. part in low half
   mulu     d6,d6      ; multiply high and high destroing high
   add.l    d0,d6      ; add in lower half twice
   add.l    d0,d6
   asl.l    d1,d6      ; get radix point back in correct place
   bvs      bailout
;
;  curb2 = curb * curb;
   move.l   d5,d0      ; get absolute value of a in d0
   bpl      TPosb
   neg.l    d0
TPosb
   move.l   d0,d7      ; copy absolute value into d7
   swap     d7         ; get high part in d7
   mulu     d7,d0      ; multiply high and low destroying low
   clr.w    d0         ; clear the least significant part
   swap     d0         ; put most sig. part in low half
   mulu     d7,d7      ; multiply high and high destroing high
   add.l    d0,d7      ; add in lower half twice
   add.l    d0,d7
   asl.l    d1,d7      ; get radix point back in correct place
   bvs      bailout

   move.l   d6,d0      ; if (cura2 + curb2 >= 4) goto bailout;
   add.l    d7,d0
   bvs      bailout

   move     d0,(a0)+   ; save magnitude in trace table

   addq.w   #1,k(a5)   ; are we out of iterations?
   move.w   maxi(a2),d0
   cmp.w    k(a5),d0
   ble      GotIt
   dbra     d2,TLoop   ; nope, so try again till trace table full

   move.w   -(a0),d0   ; get last entry in the trace

   move.w   #Trace-1,d2  ; set up length and address for comparison loop
   lea      OldA(a5),a0

TCLoop
   cmp      (a0)+,d0
   beq      GotIt       ; did we find it?
   dbra     d2,TCLoop

   lea      OldA(a5),a0 ; no match, so prepare for the next round
   move     d0,(a0)+    ; move last entry of the table

   move.w   #Trace,d2
   bra      TLoop

GotIt
   move.w   maxi(a2),k(a5)
bailout
   move.w   k(a5),d0
   ext.l    d0
#endasm
   ;;
}

/*
 * 32 bit fixed point generator
 */
Int32_Height_Fast( Parms )
  struct IntPotParms *Parms;
{
  register struct IntPotParms *P = Parms;
  register LONG cura,curb,cura2,curb2;
  /*
   *  This is the fastest generator in the house.
   */
#asm
   machine mc68020
fscreenr  equ 0
fscreeni  equ 4
fcurx     equ 8
fcury     equ 12
fmaxi     equ 16

Zcurx      equ   8
Zcury      equ  12
;
;  d1 - BITS2SHIFT
;  d2 - k
;  d4 - a
;  d5 - b
;  d6 - a2
;  d7 - b2
;
;  cura = curb = curc = curd = 0;
   move.w   maxi(a2),d1
   move.l   #27,d2
   move.l   fscreenr(a2),d4
   move.l   fscreeni(a2),d5
   move.l   fcurx(a2),a0
   move.l   fcury(a2),a1
   bra      Zpos2

ZLoop
;
;  cura = cura2 - curb2 + curx;
;
   exg      d6,d4
   sub.l    d7,d4
;   move.l   a0,d0
   add.l    a0,d4
;
;  curb = cura * curb
;
   muls.l   d6,d0:d5
   asl.l    #5,d0
   lsr.l    d2,d5
   or.l     d0,d5
;
;  curb += curb + cury;
;
   add.l    d5,d5
;   move.l   a1,d0
   add.l    a1,d5
Zpos2
;
;  cura2 = cura * cura;
;
   move.l   d4,d6
   muls.l   d6,d0:d6
   asl.l    #5,d0
   bvs      Zbailout
   lsr.l    d2,d6
   or.l     d0,d6
;
;  curb2 = curb * curb;
;
   move.l   d5,d7
   muls.l   d7,d0:d7
   asl.l    #5,d0
   bvs      Zbailout
   lsr.l    d2,d7
   or.l     d0,d7
;
   move.l   d6,d0
   add.l    d7,d0
   bvs      Zbailout
;
;  cmp.l    #536870912,d0
;   bge      Zbailout
;
   dbra     d1,ZLoop
;
;  addq     #1,d1
Zbailout
   move.w   fmaxi(a2),d0
   sub.w    d1,d0
   sub.w    #1,d0
   ext.l    d0
#endasm
   ;
}
