\ FFT.ASM - Fast Fourier Transform assembly support words.
\
\ KFFT V1.1 (C)Copyright 1989, Jerry Kallaus.  All rights reserved. 
\ May be freely redistributed for non-commercial use (FREEWARE).

\ See file fft.asm.doc.
\ These words assume that registors A0-A1,D0-D3 are trashable.

ANEW TASK-fft.asm

anew task-fft.asm

auto_scale_fft? NOT CONSTANT nonauto?

variable save-here

: bpl$+4  $ 6a02 w, ; immediate \ used to keep assembler from scribling
                                \ over erased code

: markhere ( -- ) here save-here ! ;
: backhere ( flag -- , backup to markhere if flag )
     if save-here @ here - allot then ;

asm 2**                ( n -- 2**n )
    moveq.l #1,d0
    asl.l   tos,d0
    move.l  d0,tos
    forth{ both }
end-code

asm 2CELL+             ( n -- n+8 )
    addq.l  #8,tos
    forth{ both }
end-code

asm 2CELL-             ( n -- n-8 )
    subq.l  #8,tos
    forth{ both }
end-code

asm 2CELLS             ( n -- n*8 )
    asl.l   #3,tos
    forth{ both }
end-code

asm 4DUP               ( 1 2 3 4 -- 1 2 3 4 1 2 3 4 )
    move.l  dsp,a0
    movem.l (a0)+,d1-d3
    move.l  tos,-(dsp)
    movem.l d1-d3,-(dsp)
    forth{ both }
end-code

asm Z@                 ( addr -- real imag )
    move.l  $0(org,tos.l),-(dsp)
    move.l  $4(org,tos.l),tos
    forth{ both }
end-code

asm Z!                 ( real imag addr -- )
    move.l  (dsp)+,$4(org,tos.l)
    move.l  (dsp)+,$0(org,tos.l)
    move.l  (dsp)+,tos
    forth{ both }
end-code

asm Z+                    ( z1 z2 -- z1+z2 )
    movem.l (dsp)+,d0-d2
    add.l   d0,d2
    add.l   d1,tos
    move.l  d2,-(dsp)
    forth{ both }
end-code

asm Z-                    ( z1 z2 -- z1-z2 )
    movem.l (dsp)+,d0-d2
    sub.l   d0,d2
    sub.l   d1,tos
    neg.l   tos
    move.l  d2,-(dsp)
    forth{ both }
end-code


\ Z* - complex multiply -  ( a b c d -- ac-bd ad+bc )
\      scaled 2**14
\      registers  3 2 1 7 = a b c d

asm Z*                    ( z1 z2 -- z1*z2 )
    movem.l (dsp)+,d1-d3
    move.l  d3,d0         a
    muls    d1,d0         ac
    muls    d2,d1         bc
    muls    tos,d2        bd
    sub.l   d2,d0         ac-bd
    asl.l   #2,d0
    swap    d0
    ext.l   d0
    move.l  d0,-(dsp)
    muls    d3,tos        ad
    add.l   d1,tos        ad+bc
    asl.l   #2,tos
    swap    tos
    ext.l   tos
    forth{ both }
end-code

asm ZNEGATE              ( z -- -z )
    neg.l   tos
    move.l  (dsp),d0
    neg.l   d0
    move.l  d0,(dsp)
    forth{ both }
end-code

asm Z2/                   ( z -- z/2 )
    asr.l   #1,tos
    move.l  (dsp),d0
    asr.l   #1,d0
    move.l  d0,(dsp)
    forth{ both }
end-code

asm Z/2**N                ( z n -- z/2**n )
    movem.l (dsp)+,d0-d1
    asr.l   tos,d0
    asr.l   tos,d1
    move.l  d0,tos
    move.l  d1,-(dsp)
    forth{ both }
end-code

asm NSBITS    ( value --  number of significant abs bits plus 1 sign bit )
    tst.l   tos
    beq.s   9$          exception is return zero if value is zero
    bgt.s   1$
    not.l   tos
    bgt.s   1$
    moveq.l #1,tos
    bra.s   9$
1$: moveq.l #33,d0
2$: asl.l   #1,tos
    dblt.w  d0,2$
    move.l  d0,tos
9$:
    forth{ both }
end-code


asm OR.ABS.ARRAY     ( addr ncells -- or'd-magnitude-bits-of-array )
    move.l  (dsp)+,a0
    adda.l  org,a0
    moveq.l #0,d1
    move.l  tos,d2        64k counter
    swap    d2
    bra.s   3$
1$: move.l  (a0)+,d0
    bpl.s   2$
    not.l   d0
2$: or.l    d0,d1
3$: dbra.w  tos,1$
    dbra.w  d2,1$
    move.l  d1,tos
    forth{ both }
end-code

                      ( Arithmetic shift array of n-cells by n-bits. )
                      ( Left for n-bits positive, right for n-bits neg.)
asm ASHIFT.ARRAY      ( array-addr n-cells n-bits -- )
    movem.l  d4-d5,-(rp)   ( Limited to arrays up to 256k cells )
    move.l  (dsp)+,d4          n
    ble.l   8$
    move.l  d4,a1
    lsr.l   #2,d4
    move.l  (dsp)+,d0
    lea     $0(org,d0.l),a0    addr
    moveq.l #16,d5
    tst.l   tos
    beq.l   9$
    bgt.l   4$
    neg.l   tos
    subq.l  #1,d4           start of right shift code
    bmi.s   2$
1$: movem.l (a0)+,d0-d3
    asr.l   tos,d0
    asr.l   tos,d1
    asr.l   tos,d2
    asr.l   tos,d3
    movem.l d0-d3,-(a0)
    adda.l  d5,a0
    dbra.w  d4,1$
2$: move.l  a1,d4
    moveq.l #3,d0
    and.l   d0,d4
    beq.s   9$
    subq.l  #1,d4
3$: move.l  (a0),d0
    asr.l   tos,d0
    move.l  d0,(a0)+
    dbra.w  d4,3$
    bra.s   9$
4$: subq.l  #1,d4           start of left shift code
    bmi.s   6$
5$: movem.l (a0)+,d0-d3
    asl.l   tos,d0
    asl.l   tos,d1
    asl.l   tos,d2
    asl.l   tos,d3
    movem.l d0-d3,-(a0)
    adda.l  d5,a0
    dbra.w  d4,5$
6$: move.l  a1,d4
    moveq.l #3,d0
    and.l   d0,d4
    beq.s   9$
    subq.l  #1,d4
7$: move.l  (a0),d0
    asl.l   tos,d0
    move.l  d0,(a0)+
    dbra.w  d4,7$
    bra.s   9$
8$: adda.w  #4,dsp         pop data addr off stack
9$: movem.l (rp)+,d4-d5
    move.l  (dsp)+,tos
end-code


asm STATS.ARRAY       ( array-addr n -- max min sumlo sumhi )
    movem.l d4-d5,-(rp)
    move.l  (dsp)+,a0
    adda.l  org,a0
    move.l  tos,d0
    move.l  tos,d5           64k counter
    swap    d5
    moveq.l #0,tos           init sum to 0
    moveq.l #0,d1
    move.l  #$-80000000,d3   init max to -inf
    move.l  #$3FFFFFFF,d2    init min to +inf
    bra.s   4$
1$: move.l  (a0)+,d4
    cmp.l   d4,d3
    bge.s   2$
    move.l  d4,d3            max
2$: cmp.l   d4,d2
    ble.s   3$
    move.l  d4,d2            min
3$: add.l   d4,d1            sum
    tst.l   d4
    smi.b   d4
    ext.w   d4
    ext.l   d4
    addx.l  d4,tos
4$: dbra.w  d0,1$
    dbra.w  d5,1$
    movem.l  d1-d3,-(dsp)
\    move.l  d3,-(dsp)
\    move.l  d1,-(dsp)
    movem.l (rp)+,d4-d5
end-code


asm QUICK.REVERSAL    ( array-data  reversal-map-of-swap-pairs -- )
    move.l  a3,-(rp)           save regs on return stack
    move.l  a5,-(rp)
    lea     $0(org,tos.l),a0   r
    move.l  (dsp)+,a1
    adda.l  org,a1             a
    move.l  (a0)+,tos          i
1$: move.l  (a0)+,d0           next j
    lea     $0(a1,tos.l),a3    abs i
    lea     $0(a1,d0.l),a5     abs j
    move.l  (a3),d1            swap cmplx a[i] with a[j]
    move.l  (a5),d2
    move.l  d1,(a5)+
    move.l  d2,(a3)+
    move.l  (a3),d1
    move.l  (a5),d2
    move.l  d1,(a5)
    move.l  d2,(a3)
    move.l  (a0)+,tos          next i
    bne.l   1$                 zero terminator in swap map
    move.l  (dsp)+,tos         cache tos
    move.l  (rp)+,a5           restore regs
    move.l  (rp)+,a3
end-code

\  inner.loop register usage
\  i   a   n           ss  le le1                ui      ur
\  0dr 1   2   3   4   5   6   7     0ar 1   2   3   4   5   6   7
\  i  air aii ur-i ur+i ss ur le1    ai aip  an  ur      ui
\                             high               le

asm INNER.LOOP    ( u le ss n a i le1 -- hi )
    movem.l d4-d6/a2-a3/a5,-(rp)       save regs on return stack
    movem.l (dsp)+,d0-d2/d5-d6/a3/a5
    lea     $0(a4,d1.l),a0       a
    lea     $0(a0,d2.l),a2       an
    adda.l  d0,a0                ai
    lea     $0(a0,tos.l),a1      aip
    moveq.l #0,tos               hi - abs all output or'd in 7dr
    move.l  a5,d3                ur
    move.l  a5,d4
    sub.l   a3,d3                ur-ui
    add.l   a3,d4                ur+ui
    subq.l  #4,d6
    move.l  d6,a3                le
1$:
    move.l  (a1),d1              a[ip]
    move.l  $4(a1),d2
    asr.l   d5,d1                scale-down
    asr.l   d5,d2
    move.l  d1,d0
    sub.l   d2,d0                c-d
    move.l  a5,d6                ur
    muls    d6,d0                z
    muls    d3,d2                fd
    add.l   d0,d2                fd+z
    muls    d4,d1                gc
    sub.l   d0,d1                gc-z
    asl.l   #2,d1                scale-down cmplx * result
    swap    d1
    ext.l   d1
    asl.l   #2,d2
    swap    d2
    ext.l   d2
    move.l  (a0),d0              a[i] real
    asr.l   d5,d0                scale-down
    move.l  d0,d6
    sub.l   d2,d6                a[i]-t
    move.l  d6,(a1)+             a[ip]
    forth{ markhere bpl$+4 }
    neg.l   d6
    or.l    d6,tos
    forth{ nonauto? backhere }
    add.l   d2,d0                a[i]+t
    move.l  d0,(a0)+             a[i]
    forth{ markhere bpl$+4 }
    neg.l   d0
    or.l    d0,tos
    forth{ nonauto? backhere }
    move.l  (a0),d0              a[i] imag
    asr.l   d5,d0                scale-down
    move.l  d0,d6
    sub.l   d1,d6                a[i]-t
    move.l  d6,(a1)              a[ip]
    forth{ markhere bpl$+4 }
    neg.l   d6
    or.l    d6,tos
    forth{ nonauto? backhere }
    add.l   d1,d0                a[i]+t
    move.l  d0,(a0)              a[i]
    forth{ markhere bpl$+4 }
    neg.l   d0
    or.l    d0,tos
    forth{ nonauto? backhere }
    adda.l  a3,a1
    adda.l  a3,a0
    cmp.l   a2,a0
    blt.l   1$
    movem.l (rp)+,d4-d6/a2-a3/a5
end-code
