// V2.0 Keith Harrison. CIS:100431,1675
// Gimme feedback ... tell me if it don't work !!

// Turbo C++ 3.0
// Fixed point math functions 386/i486/Pentium(tm).

// THESE TWO CONSTANTS MUST BE ALTERED IN UNISON
const int   PRECISIONBITS  = 16;       // Number of bits in fractional part.
				       // MUST BE AN EVEN NUMBER ! (see sqrt).
				       // Default is PRECISIONBITS = 16.
const float PRECISIONPOWER = 65536.0F; // 2 to the power of PRECISIONBITS.
				       // Default is PRECISIONPOWER = 65536.0.

//Define opcode for 386/486/586 instructions. The semi-colon is essential !
#define i386 db 0x66;

//SHRD (shift right double) needs direct hardcoding...
#define SHRD_EAX_EDX db 0x66, 0x0f, 0xac, 0xd0, PRECISIONBITS

class fixed32 {

  long fp32; //This is all the data we need. 

  public:
    // Constructors.
    fixed32() { fp32 = 0; };
    fixed32(int initvalue) { fp32 = initvalue << PRECISIONBITS; };
    fixed32(long initvalue) { fp32 = initvalue << PRECISIONBITS; };
    fixed32(float initvalue) { fp32 = initvalue * PRECISIONPOWER; } ;
    fixed32(double initvalue) { fp32 = initvalue * PRECISIONPOWER; };
    // Unary operators.
    fixed32 operator = (int op2);
    fixed32 operator = (float op2);
    fixed32 operator = (fixed32 op2);
    // Conversion of fixed32 to int will lose all fractional part, but cannot
    // be avoided.
    operator int() {return fp32 >> PRECISIONBITS;};
    operator long() {return fp32 >> PRECISIONBITS;};
    operator float() {return fp32 / PRECISIONPOWER;};
    // Binary operators using fixed point only.
    friend fixed32 operator + (fixed32 op1, fixed32 op2);
    friend fixed32 operator - (fixed32 op1, fixed32 op2);
    friend fixed32 operator * (fixed32 op1, fixed32 op2);
    friend fixed32 operator / (fixed32 op1, fixed32 op2);
    // Binary operators using mixed integer & fixed point.
    // Seperately spaced out for individual optimisation.
    friend fixed32 operator + (fixed32 op1, int     op2);
    friend fixed32 operator + (int     op1, fixed32 op2);
    friend fixed32 operator - (fixed32 op1, int     op2);
    friend fixed32 operator - (int     op1, fixed32 op2);
    friend fixed32 operator * (fixed32 op1, int     op2);
    friend fixed32 operator * (int     op1, fixed32 op2);
    friend fixed32 operator / (fixed32 op1, int     op2);
    friend fixed32 operator / (int     op1, fixed32 op2);
    friend fixed32 operator >> (fixed32 op1, int op2);
    friend fixed32 operator << (fixed32 op1, int op2);
    // Stream operators for cout and cin.
    friend ostream &operator << (ostream &stream, fixed32 obj);
    friend istream &operator >> (istream &stream, fixed32 &obj)
    // Functions (feel free to add your own).
    friend fixed32 recip  (fixed32 op2);  // = 1/op2
    friend fixed32 square (fixed32 op2);  // = op2 * op2
    friend fixed32 sqrt   (fixed32 op2);  // = square_root( op2 )
};

//Note: No assembly allowed in inline functions (damn...)

inline fixed32 fixed32::operator = (int op2)
{
  fp32 = (long)op2 << PRECISIONBITS;
  return *this;
};

inline fixed32 fixed32::operator = (float op2)
{
  fp32 = (long)(op2 * PRECISIONPOWER);
  return *this;
};

inline fixed32 fixed32::operator = (fixed32 op2)
{
  fp32 = op2.fp32;
  return *this;
};

fixed32 operator + (fixed32 op1, fixed32 op2)
{
  fixed32 temp;
  //temp.fp32 = op1.fp32 + op2.fp32;
  asm {
	i386    mov     ax, word ptr op1  // mov eax, DWORD PTR op1
	i386    add     ax, word ptr op2  // add eax, DWORD PTR op2
	i386    mov     word ptr temp, ax // mov DWORD PTR temp, eax
  }
  return temp;
};

fixed32 operator - (fixed32 op1, fixed32 op2)
{
  fixed32 temp;
  //temp.fp32 = op1.fp32 - op2.fp32;
  asm {
	i386    mov     ax, word ptr op1  // mov eax, DWORD PTR op1
	i386    sub     ax, word ptr op2  // sub eax, DWORD PTR op2
	i386    mov     word ptr temp, ax // mov DWORD PTR temp, eax
  }
  return temp;
};

fixed32 operator * (fixed32 op1, fixed32 op2)
{
  fixed32 temp;
  //temp.fp32 = (op1.fp32 * op2.fp32) >> PRECISIONBITS;
  asm {
	i386    mov     ax, word ptr op1  // mov eax, DWORD PTR op1
	i386    imul    word ptr op2      // imul DWORD PTR op2
		SHRD_EAX_EDX
	i386    mov     word ptr temp, ax // mov DWORD PTR temp, eax
  }
  return temp;
};

fixed32 operator / (fixed32 op1, fixed32 op2)
{
  fixed32 temp;
  //temp.fp32 = (op1.fp32 / op2.fp32) << PRECISIONBITS;
  asm {
	i386    mov     dx, word ptr op1 // mov edx, DWORD PTR op1
	i386    xor     ax,ax
		SHRD_EAX_EDX
	i386    db      0xc1, 0xfa, PRECISIONBITS // sar edx, PRECISIONBITS
	i386    idiv    word ptr op2 // idiv DWORD PTR op2
	i386    mov     word ptr temp, ax // mov DWORD PTR temp, eax
  }
  return temp;
};

/* Note: the conversion of integer to fixed32 (long) would have been best
       : using MOVZX EAX,WORD PTR op2, but its safer to use the below due
       : to the type of code generated by the compiler to access the
       : class variable...                                               */

inline fixed32 operator + (fixed32 op1, int op2)
{
  fixed32 temp;
  temp = op2;
  return ( op1 + temp );
}

inline fixed32 operator + (int     op1, fixed32 op2)
{
  fixed32 temp;
  temp = op1;
  return (temp + op2);
}

fixed32 operator - (fixed32 op1, int     op2)
{
  fixed32 temp;
  // Note: a -b = a + (-b) = (-b) + a
  asm {
	i386    xor     ax,ax
		mov     ax, word ptr op2 // movzx would have been better.
	i386	db      0xc1, 0xe0, PRECISIONBITS // shl eax,PRECISIONBITS
	i386    neg     ax // neg eax
	i386    add     ax, word ptr op1 // add eax, DWORD PTR op1
	i386    mov     word ptr temp, ax // mov DWORD PTR temp, eax
  }
  return temp;
}

fixed32 operator - (int     op1, fixed32 op2)
{
  fixed32 temp;
  asm {
	i386    xor     ax,ax
		mov     ax, word ptr op1
	i386	db      0xc1, 0xe0, PRECISIONBITS // shl eax,PRECISIONBITS
	i386    sub     ax, word ptr op2 // add eax, DWORD PTR op1
	i386    mov     word ptr temp, ax // mov DWORD PTR temp, eax
  }
  return temp;
}

fixed32 operator * (fixed32 op1, int     op2)
{
  fixed32 temp;
  asm   {
	i386    xor     dx,dx
	i386    mov     ax, word ptr op1 // mov eax, DWORD PTR op1
	i386    xor     bx,bx
		mov     bx, word ptr op2
	i386    imul    bx // imul ebx
	i386    mov     word ptr temp, ax // mov DWORD PTR temp, eax
  }
  return temp;
}

fixed32 operator * (int     op1, fixed32 op2)
{
  fixed32 temp;
  asm   {
	i386    xor     dx,dx
	i386    mov     ax, word ptr op2 	// mov eax, DWORD PTR op1
	i386    xor     bx,bx
		mov     bx, word ptr op1
	i386    imul    bx 			// imul ebx
	i386    mov     word ptr temp, ax 	// mov DWORD PTR temp, eax
  }
  return temp;
}

fixed32 operator / (fixed32 op1, int     op2)
{
  fixed32 temp;
  asm {
	mov     ax, word ptr op1+2
	mov     dx, word ptr op1
	idiv    word ptr op2 			// DX:AX / op2
	mov     word ptr temp+2,ax              // No 386 needed !
	mov     word ptr temp,dx
  }
  return temp;
}

inline fixed32 operator / (int     op1, fixed32 op2)
{
  fixed32 temp;
  temp = op1;
  temp = temp /op2;
  return temp;
}

fixed32 operator >> (fixed32 op1, int op2)
{
  fixed32 temp;
  asm   {
	i386    mov     ax, word ptr op1
		mov     cl, byte ptr op2
	i386    shr     ax, cl
	i386    mov     word ptr temp, ax
  }
  return temp;
}

fixed32 operator << (fixed32 op1, int op2)
{
  fixed32 temp;
  asm   {
	i386    mov     ax, word ptr op1
		mov     cl, byte ptr op2
	i386    shl     ax, cl
	i386    mov     word ptr temp, ax
  }
  return temp;
}

inline ostream &operator << (ostream &stream, fixed32 obj)
{
  float temp = (float)obj.fp32 / PRECISIONPOWER;
  stream << temp;
  return stream;
};

inline istream &operator >> (istream &stream, fixed32 &obj)
{
  float temp;
  stream >> temp; // Get a floating point number from console.
  obj = temp;     // Implicit conversion to fixed point.
  return stream;
};

fixed32 recip(fixed32 op2) // Reciprocal (1/n).
{
  fixed32 temp;
  asm {
	i386    xor     ax,ax
	i386    mov     dx,1               // mov edx, DWORD 1
		dw      0       	   // could have used movzx edx,1
	i386    idiv    word ptr op2       // idiv DWORD PTR op2.fp32
	i386    mov     word ptr temp, ax  // mov DWORD PTR fp32, eax
  }
  return temp;
}

fixed32 square(fixed32 op2)
{
  fixed32 temp;
  asm {
	i386    xor     dx,dx
	i386    mov     ax, word ptr op2  // mov eax, DWORD PTR op2
	i386    imul    ax                // imul eax
		SHRD_EAX_EDX
	i386    mov     word ptr temp, ax // mov DWORD PTR fp32, eax
  }
  return temp;
}

unsigned int roottable[32] = {1,2,2,4,5,8,11,16,22,32,45,64,90,128,181,256,
			      362,512,724,1024,1448,2048,2896,4096,5792,
			      8192,11585,16384,23170,32768U,46340U,65535U};

fixed32 sqrt_np(fixed32 op2) // Normal precision
{
  fixed32 temp;
  asm  {
	i386	mov     ax,word ptr op2
	i386    db      0x0f, 0xbd, 0xd8 // bsr ebx, eax
		jz      found_32
		add     bx,bx
		mov     di,[word ptr roottable + bx]
		xor     si,si
		mov     cx,di
	i386    mov     bx,ax // mov ebx, eax
	}
  loop_32:
	asm {
		cmp     si,di
		je      found_32
		mov     ax,si
		add     ax,di
		rcr     ax,1
		cmp     ax,cx
		je      found_32
		mov     cx,ax
	i386    db      0x0f, 0xb7, 0xd0 // movzx edx, ax
	i386    db      0x0f, 0xaf, 0xd2 // imul edx, edx
	i386    cmp     dx, bx
		je      found_32
		jns     bigger_32
		mov     si,ax
		jmp     near loop_32:
	}
  bigger_32:
	asm     mov     di,ax
	asm     jmp     near loop_32
  found_32:
	// Result is in AX. Note that it cannot be more than 16 bits.
  asm {
	i386    db      0x0f, 0xb7, 0xc0 // movzx eax,ax
	i386    db      0xc1, 0xe0, (PRECISIONBITS/2) // shl eax, PRECISIONBITS
	i386    mov     word ptr temp,ax // mov DWORD PTR temp, eax
  }
  return temp;
}

fixed32 sqrt_lp(fixed32 op2) // Low precision.
{
  fixed32 temp;
  asm {
		push    bp
	i386    mov     ax,word ptr op2
	i386    db      0x0f, 0xbd, 0xd8 // bsr ebx, eax
		jz      found_32
		mov     bp,bx
		shr     bp,1
		add     bx,bx
		mov     di,[word ptr roottable + bx]
		xor     si,si
		mov     cx,di
	i386    mov     bx,ax // mov ebx, eax
  }
  loop_32:
  asm {
		cmp     si,di
		je      found_32
		dec     bp
		jz      found_32
		mov     ax,si
		add     ax,di
		rcr     ax,1
		cmp     ax,cx
		je      found_32
		mov     cx,ax
	i386    db      0x0f, 0xb7, 0xd0 // movzx edx, ax
	i386    db      0x0f, 0xaf, 0xd2 // imul edx, edx
	i386    cmp     dx, bx
		je      found_32
		jns     bigger_32
		mov     si,ax
		jmp     near loop_32:
  }
  bigger_32:
	asm     mov     di,ax
	asm     jmp     near loop_32
  found_32:
	asm     pop     bp
	// Result is in AX. Note that it cannot be more than 16 bits.
  asm {
	i386    db      0x0f, 0xb7, 0xc0 // movzx eax,ax
	i386    db      0xc1, 0xe0, (PRECISIONBITS/2) // shl eax, PRECISIONBITS
	i386    mov     word ptr temp,ax // mov DWORD PTR temp, eax
  }
  return temp;
}
