/*------------------------------------------------------------------------------

    File    :	FFT_float.c

    Author  :	Stéphane TAVENARD

    $VER:   FFT_float.c  0.1  (17/09/1997)

    (C) Copyright 1995-1995 Stéphane TAVENARD
	All Rights Reserved

    #Rev|   Date   |			  Comment
    ----|----------|--------------------------------------------------------
    0	|22/04/1995| Initial revision
    1   |17/09/1997| added support for faster FFT routine (Henryk Richter)
    ------------------------------------------------------------------------

    FFT for floating point values

------------------------------------------------------------------------------*/

#define FFT_Buggs

#include <stdio.h>
#include <math.h>

#ifdef FFT_Buggs
#include "FastFT_float_asm.h"
#endif

#include "FFT_float_asm.h"
#include "FFT_float.h"

#define FFT_ASM

static WORD INDEX[ FFT_RANGE_MAX ];
static FLOAT COS[ FFT_RANGE_MAX/2 ];
static FLOAT SIN[ FFT_RANGE_MAX/2 ];

static FLOAT temp_real[ FFT_RANGE_MAX ];
static FLOAT temp_imag[ FFT_RANGE_MAX ];

static int range = 0;
static int power = 0;

static void build_index( int power )
{
   WORD n, i, j, l, m;

   n = 1<<power;
   for( i=0; i<n; i++ ) {
      INDEX[ i ] = 0;
      l = 1;
      m = n;
      for( j=0; j<power; j++ ) {
	 m = m>>1;
	 if( i & l ) INDEX[ i ] += m;
	 l = l<<1;
      }
//fprintf( stderr, "%04X -> %04X\n", i, INDEX[ i ] );
   }
}

static void build_sin_cos( int power )
{
   WORD i, n;
   double p;

//fprintf( stderr, "Build sin/cos power=%ld\n", power );
   n = 1<<power;
   p = (2*PI)/(double)n;
   n = n>>1;
   for( i=0; i<n; i++ ) {
      COS[ i ] = (FLOAT)cos( p * (double)i );
      SIN[ i ] = (FLOAT)sin( p * (double)i );
   }
}

void FFT_FLOAT_forward( FLOAT *x_real, FLOAT *x_imag,
			FLOAT *energy, FLOAT *phi, int n )
{
   WORD li, i;
   register WORD d, j, l;
   WORD *index;
   FLOAT *real_ptr, *imag_ptr;
   FLOAT *t_real_ptr, *t_imag_ptr;
   FLOAT *en_ptr, *phi_ptr;
   FLOAT real, imag;
   FLOAT *cos_ptr, *sin_ptr;
   FLOAT en;

   if( n > FFT_RANGE_MAX ) {
      fprintf( stderr, "FFT_DOUBLE_forward: n is out of range (%ld>%ld)\n",
		       n, FFT_RANGE_MAX );
      return;
   }
   if( n != range ) {
      range = 1;
      power = 0;
      while( range < n ) {
	 range = range << 1;
	 power++;
      }
      if( range != n ) {
	 range = 0;
	 power = 0;
	 fprintf( stderr, "FFT_DOUBLE_forward: n is not a power of 2 (%ld)\n", n );
	 return;
      }
#ifndef FFT_Buggs
      build_index( power );
      build_sin_cos( power );
#endif
   }

#ifdef FFT_Buggs
   ASM_FastFT_main_loop( x_real, x_imag, power );
   ASM_FastFT_energy_phi( x_real, x_imag, energy, phi, n );
#else

   index = INDEX;
   real_ptr = x_real;
   imag_ptr = x_imag;
   for( i=0; i<n; i++ ) {
      j = *index++;
      if( i < j ) {
	 real = x_real[ j ];
	 imag = x_imag[ j ];
	 x_real[ j ] = *real_ptr;
	 x_imag[ j ] = *imag_ptr;
	 *real_ptr++ = real;
	 *imag_ptr++ = imag;
      }
      else {
	 real_ptr++;
	 imag_ptr++;
      }
   }

#ifndef FFT_ASM
   d = 1;
   li = 1<<( power-1 );

   for( i=0; i<power-1; i++ ) {
      real_ptr = x_real;
      imag_ptr = x_imag;
      t_real_ptr = &x_real[ d ];
      t_imag_ptr = &x_imag[ d ];
      for( j=0; j<li; j++ ) {
	 for( l=0; l<d; l++ ) {
	    real = *real_ptr + *t_real_ptr;
	    imag = *imag_ptr + *t_imag_ptr;
	    *t_real_ptr = *real_ptr - *t_real_ptr;
	    *t_imag_ptr = *imag_ptr - *t_imag_ptr;
	    t_real_ptr++;
	    t_imag_ptr++;
	    *real_ptr++ = real;
	    *imag_ptr++ = imag;
	 }
	 real_ptr += d;
	 imag_ptr += d;
	 t_real_ptr += d;
	 t_imag_ptr += d;
      }
      d = d<<1;
      li = li>>1;
      real_ptr = &x_real[ d ];
      imag_ptr = &x_imag[ d ];
      for( j=0; j<li; j++ ) {
	 cos_ptr = COS;
	 sin_ptr = SIN;
	 for( l=0; l<d; l++ ) {
	    real = (*real_ptr)*(*cos_ptr) + (*imag_ptr)*(*sin_ptr);
	    imag = (*imag_ptr)*(*cos_ptr) - (*real_ptr)*(*sin_ptr);
	    *real_ptr++ = real;
	    *imag_ptr++ = imag;
	    cos_ptr += li;
	    sin_ptr += li;
	 }
	 real_ptr += d;
	 imag_ptr += d;
      }
   }

   /* Optimized last loop */
   real_ptr = x_real;
   imag_ptr = x_imag;
   t_real_ptr = &x_real[ d ];
   t_imag_ptr = &x_imag[ d ];
   for( j=0; j<d; j++ ) {
      real = *real_ptr + *t_real_ptr;
      imag = *imag_ptr + *t_imag_ptr;
      *t_real_ptr = *real_ptr - *t_real_ptr;
      *t_imag_ptr = *imag_ptr - *t_imag_ptr;
      t_real_ptr++;
      t_imag_ptr++;
      *real_ptr++ = real;
      *imag_ptr++ = imag;
   }
#else
   ASM_FFT_main_loop( x_real, x_imag, COS, SIN, power );
#endif

#ifndef FFT_ASM
   real_ptr = x_real;
   imag_ptr = x_imag;
   en_ptr = energy;
   phi_ptr = phi;
   for( j=0; j<n; j++ ) {
      real = *real_ptr++;
      imag = *imag_ptr++;
      en = real*real + imag*imag;
      if( en == 0 ) {
	 *phi_ptr++ = 0;
      }
      else {
	 *phi_ptr++ = atan2( (double)imag, (double)real );
      }
      *en_ptr++ = en;
   }
#else
   ASM_FFT_energy_phi( x_real, x_imag, energy, phi, n );
#endif

#endif

}

void FFT_FLOAT_reverse( FLOAT *x_real, FLOAT *x_imag,
			FLOAT *energy, FLOAT *phi, int n )
{
   WORD li, i;
   register WORD d, j, l;
   FLOAT *out_real, *out_imag;
   FLOAT en, ph;
   if( n > FFT_RANGE_MAX ) {
      fprintf( stderr, "FFT_DOUBLE_forward: n is out of range (%ld>%ld)\n",
		       n, FFT_RANGE_MAX );
      return;
   }
   if( n != range ) {
      range = 1;
      power = 0;
      while( range < n ) {
	 range = range << 1;
	 power++;
      }
      if( range != n ) {
	 range = 0;
	 power = 0;
	 fprintf( stderr, "FFT_DOUBLE_forward: n is not a power of 2 (%ld)\n", n );
	 return;
      }
      build_index( power );
      build_sin_cos( power );
   }

   out_real = energy;
   out_imag = phi;

   for( i=0; i<n; i++ ) {
      out_real[ i ] = x_real[ INDEX[ i ] ];
      out_imag[ i ] = x_imag[ INDEX[ i ] ];
   }

   d = 1;
   li = 1<<( power - 2 );
   for( i=0; i<power; i++ ) {
      for( j=0; j<n; j++ ) {
	 if( j & d ) {
	    temp_real[ j ] = out_real[ j - d ] - out_real[ j ];
	    temp_imag[ j ] = out_imag[ j - d ] - out_imag[ j ];
	 }
	 else {
	    temp_real[ j ] = out_real[ j ] + out_real[ j + d ];
	    temp_imag[ j ] = out_imag[ j ] + out_imag[ j + d ];
	 }
      }
      d = d<<1;
      l = 0;
      for( j=0; j<n; j++ ) {
	 if( j & d ) {
	    out_real[ j ] = temp_real[ j ]*COS[ l ] - temp_imag[ j ]*SIN[ l ];
	    out_imag[ j ] = temp_imag[ j ]*COS[ l ] + temp_real[ j ]*SIN[ l ];
	    l += li;
	 }
	 else {
	    l = 0;
	    out_real[ j ] = temp_real[ j ];
	    out_imag[ j ] = temp_imag[ j ];
	 }
      }
      li = li>>1;
   }
   for( i=0; i<n; i++ ) {
      x_real[ i ] = out_real[ i ]/n;
      x_imag[ i ] = out_imag[ i ]/n;
      en = x_real[ i ]*x_real[ i ] + x_imag[ i ]*x_imag[ i ];
      if( en == 0 ) {
	 ph = 0;
      }
      else {
	 ph = atan2( (double)x_imag[ i ], (double)x_real[ i ] );
      }
      energy[ i ] = en;
      phi[ i ] = ph;
   }
}


