/* Subroutines needed by GCC output code for the 68000/20. 

   Compile using -O flag with gcc. 
   Use the -m68000 flag if you have a 68000

   This package includes 32 bit integer and 64-bit floating
   point routines.

   WARNING: alpha-test version (July 1988) -- probably full of bugs.
   If you find a bug, send bugs reports to jsm@phoenix.princeton.edu,
   or

	   Scott McCauley
	   PPPL P. O. Box 451
	   Princeton NJ 08543

   Known bugs/features:

   1) Depending on the version of GCC, this may produce a lot of
      warnings about conversion clashes, etc. It should still compile
      as is. Also, there appears to be a bug in the register-allocation
      parts of gcc that makes the compiler sometimes core dump
      or run into an infinite loop. This version works -- if you
      decide to change routines these compiler bugs can bite very hard....

   2) all single-precision gets converted to double precision in the
      math routines (in accordance with K&R), so doing things in
      double precision is faster than single precision....

(not any more -- jrd )

   3) double precision floating point division may not be accurate to
      the last four or so bits. Most other routines round to the
      lsb.

(not any more -- jrd )

   4) no support of NaN and Inf

   5) beware of undefined operations: i.e. a float to integer conversion
      when the float is larger than MAXIINT.
     
*/

#include <stdio.h>
#include <math.h>

#include "flonum.h"
			
#define umultl __umulsi
#define multl __mulsi3
#define udivl __udivsi3 
#define divl __divsi3
#define ddiv __divdf3
#define qmult __qmult
#define dmult __muldf3
#define dneg __negdf2
#define dadd __adddf3
#define dcmp __cmpdf2
#define dtoul __fixunsdfsi
#define dtol __fixdfsi
#define ltod __floatsidf


#ifdef L_eprintf
#include <stdio.h>
/* This is used by the `assert' macro.  */
void
_eprintf (string, line)
     char *string;
     int line;
{
  fprintf (stderr, string, line);
}
#endif

#ifdef L_umulsi3

/*_umulsi3 (a, b) unsigned a, b; { return a * b; } */

unsigned long umultl(a,b)
unsigned long a, b;
{
	register unsigned long d7, d6, d5, d4;
	
	d7 = a;
	d6 = b;
	d5 = d6;
	d4 = d6;
	/* without the next line, gcc may core dump. Gcc sometimes
	   gets confused if you have too many registers */

	&a; &b;

	/*printf("a %u b %u\n", a, b);*/

	/* low word */
	MUL(d7, d6);
	SWAP(d5);
	MUL(d7, d5);
	SWAP(d7);
	MUL(d7, d4);
	d4 += d5;
	SWAP(d4);
	d4 &= 0xFFFF0000;
	d4 += d6;
	return(d4);
}
#endif

#ifdef L_mulsi3
/* _mulsi3 (a, b) int a, b; { return a * b; } */

long multl(a, b)
long a, b;
{
	int sign = 0;
	long umultl();
	if ((a ^ b) < 0) sign = 1;
	if (a < 0) a = -a;
	if (b < 0) b = -b;
	/*printf("a is %d b is %d\n", a, b);*/
	if (sign) return(- umultl(a,b));
	else return(umultl(a,b));
}

#endif

#ifdef L_udivsi3
/*_udivsi3 (a, b) unsigned a, b; { return a / b; } */

/*
 this routine based on one in the PD forth package for the sun by Mitch Bradley
*/

unsigned udivl(u, v)
register unsigned long u, v;
{
	register unsigned short um, ul, vm, vl;
	unsigned long ans;
	unsigned long u1, v1;
	long i;
	long rem;

	if (v == 0) {
		/* should cause an exception condition */
		DIV(u, v);
		fprintf(stderr, "division by zero\n");
	}
	if (v > u) return(0);

	ul = u; SWAP(u); um = u;
	vl = v; SWAP(v); vm = v;
	if (vm == 0) {
		u = vl; v = um;
		DIV(u, v);
		/* note -- if you delete the next line, gcc goes into
		   an infinite loop */
		if (vm) printf("result is %d\n", v);
		vm = v & 0xFFFF; /* dividend is in low word */
		v &= 0xFFFF0000; /* remainder is in high word */
		v += ul;
		DIV(vl, v);
		v &= 0xFFFF; /* dividend is in low word */
		u = vm;
		SWAP(u);
		return(v + u);
		/*ans = ((um / vl) << 16) + ((((um % vl) << 16) + ul) / vl);
		return(ans);*/
	}

	if (vl == 0) return(um / vm);
	SWAP(u); SWAP(v);
	if ( (u >> 3) < v) {
		for(i = 0; u >= v; i++, u -= v);
		/*printf("lt 8\n");*/
		return(i);
	}
	u1 = u; v1 = v;

	/* scale divisor */
	v1--;
	for(i = 0; ((unsigned) v1) >= 0xFFFF; v1 >>= 1, i++);
	if (++v1 > 0xFFFF) {
		i++; v1 >>=1;
	}
	u1 >>= i;
	/*printf("i is %d, u1 is %x, v1 is %x\n", i, u1, v1);*/
	ans = u1 / v1;
	rem = u - (ans * v);
	if (rem > v) {ans++; rem -= v; }
	if (rem > v) {printf("oops\n");}
	return(ans);
}
#endif

#ifdef L_divsi3

long divl(a, b)
long a, b;
{
	int sign = 0;
	if ((a ^ b) < 0) sign = 1;
	if (a < 0) a = -a;
	if (b < 0) b = -b;
	if (sign) return(-udivl(a,b));
	else return(udivl(a,b));
}
#endif

#ifdef L_umodsi3
_umodsi3 (a, b)
     unsigned a, b;
{
  /*return a % b;*/
  return (a - ((a/b)*b));
}
#endif

#ifdef L_modsi3
_modsi3 (a, b)
     int a, b;
{
  /*return a % b;*/
  return( a - ((a/b) * b));
}
#endif

#ifdef L_lshrsi3
_lshrsi3 (a, b)
     unsigned a, b;
{
  return a >> b;
}
#endif

#ifdef L_lshlsi3
_lshlsi3 (a, b)
     unsigned a, b;
{
  return a << b;
}
#endif

#ifdef L_ashrsi3
_ashrsi3 (a, b)
     int a, b;
{
  return a >> b;
}
#endif

#ifdef L_ashlsi3
_ashlsi3 (a, b)
     int a, b;
{
  return a << b;
}
#endif


/* this divide stuff is hopelessly broken, in addition to being much
more complicated than in needs to be.  The new version's in divdf3.c */

#if 0
#ifdef L_divdf3

/*double _divdf3 (a, b) double a, b; { return a / b; } */

double drecip1(f1)
double f1;
{
	struct bitdouble *bdp = &f1;
	unsigned m1, m2;

printf("drecip1(%X)", f1);
	if (bdp->exp == 0 ) return(0L);
	if (bdp->mant1 == 0L) {
		bdp->exp = 0x3ff + 0x3ff - bdp->exp;
		bdp->mant2 = 0L;
		return(f1);
	}
	bdp->exp = 0x3ff + 0x3ff - bdp->exp - 1;
	m1 = (0x00100000 + bdp->mant1) >> 5;
	m2 = (0x80000000 / m1);
	/*printf("m1 %x m2 %x\n", m1, m2);*/
	m2 <<= 5;
	m2 &= 0xFFFFF;
	/*printf("exp %x mant %x\n", bdp->exp, m2);*/
	bdp->mant1 = m2;
	bdp->mant2 = 0L;
printf("drecip1->%X\n", f1);
	return(f1);
}

double drecip(f)
double f;
{
	struct bitdouble *bdp;
	double quotient, remainder;

printf("drecip(%X)", f);
	quotient = drecip1(f);
	remainder = /* 1.0 */ ((double)one) - quotient * f;
	bdp = &remainder;
	for(; bdp->exp > 0x3ca; ) {
printf("drc: exp=%X ", bdp->exp);
		remainder = /* 1.0 */ ((double)one) - (quotient*f);
printf("rem=%X ", remainder);
		quotient = quotient + (drecip1(f)*remainder);
	}
printf("drecip->%X\n", quotient);
	return(quotient);
}


double ddiv(f1, f2)
double f1, f2;
{
	return(f1 * drecip(f2));
}
#endif
#endif			/* commented out divide routines */

#ifdef L_muldf3
/*double _muldf3 (a, b) double a, b; { return a * b; } */

#ifdef SLOW_KLUDGEY_QMULT
qmult(m11, m12, m21, m22, p1, p2)
unsigned long m11, m12, m21, m22, *p1, *p2;
{
	register long d2 = m11;
	register unsigned long d3 = m12, d4 = m21, d5 = m22, d6 =0, d7 = 0;
	int i;
	/* guess what happens if you delete the next line.... */
/*	&i; */
	for (i = 0; i < 11; i++) ASL2(d2, d3);
	for (i = 0; i < 9; i++) ASL2(d4, d5);
	
	for (i = 0; i < 64; i++) {
		if (d2 < 0) { ADD2(d4, d5, d6, d7);}
		ASL2(d2, d3);
		ASR2(d4, d5);
	}
	d2 = d6;
	d3 = d7;
	for (i = 0; i < 9; i++) ASR2(d2, d3);
	*p1 = d2; *p2 = d3;
}
#else
/* new qmult */

/* a set of totally local kludges, for summing partial results into
   result vector */

#define XADDL(partial, target_ptr) \
	{ register unsigned long temp = *target_ptr; \
	asm volatile("addl %2,%0" : "=d" (temp) : "0" (temp), "g" (partial)); \
	*target_ptr-- = temp; temp = *target_ptr; \
	asm volatile("addxl #0,%0" : "=d" (temp) : "0" (temp)); \
	*target_ptr = temp; }

static long constant_zero_kludge = 0;

#define XXADDL(partial, target) \
	{ register unsigned long * zero = &constant_zero_kludge + 1; \
	asm volatile("addl %2,%0@" : "=a" (target) : "0" (target), "g" (partial)); \
	asm volatile("addxl %0@-,%1@-" : "=a" (zero) : "a" (target), "0" (zero)); }

/*
#define ADDL(partial, target_ptr) \
	{ register unsigned long temp = *target_ptr; \
	asm volatile("addl %2,%0" : "=d" (temp) : "0" (temp), "g" (partial)); \
	*target_ptr-- = temp }

#define ADDXL(partial, target_ptr) \
	{ register unsigned long temp = *target_ptr; \
	asm volatile("addxl %2,%0" : "=d" (temp) : "0" (temp), "g" (partial)); \
	*target_ptr-- = temp }
	
#define ADDW(partial, target_ptr) \
	{ register unsigned short temp = *(unsigned short * )target_ptr; \
	asm volatile("addw %2,%0" : "=d" (temp) : "0" (temp), "g" (partial)); \
	*(unsigned short * )target_ptr-- = temp }

#define ADDXW(partial, target_ptr) \
	{ register unsigned sort temp = *(unsigned short * )target_ptr; \
	asm volatile("addxw %2,%0" : "=d" (temp) : "0" (temp), "g" (partial)); \
	*(unsigned short * )target_ptr-- = temp }
*/	

static char component_index[] = 
	{
	3, 3,				/* last ones */

	2, 3,				/* next to last x last */
	3, 2,				/* ... */

	1, 3,
	2, 2,
	3, 1,

	0, 3,
	1, 2,
	2, 1,
	3, 0,
	
	0, 2,
	1, 1,
	2, 0,
	
	0, 1,
	1, 0,
	
	0, 0
	};

qmult(m1_hi, m1_lo, m2_hi, m2_lo, result_hi, result_lo)
unsigned long m1_hi, m1_lo, m2_hi, m2_lo, * result_hi, * result_lo;
{
  unsigned short * m1 = (unsigned short * )(&m1_hi);
  unsigned short * m2 = (unsigned short * )(&m2_hi);
  unsigned short result[10];		/* two extra for XADDL */
  register unsigned long mult_register;
  register unsigned long res1, res2, res3;
  long * kludge;
  short i, j;
  char * idx_p = (char * )&component_index;
/*
fprintf(stderr, "  qmult: %08X:%08X * %08X:%08X\n", 
	m1_hi, m1_lo, m2_hi, m2_lo);
*/
  for (i = 0 ; i < 10 ; i++) result[i] = 0;

/* walk thru 'vectors' of mantissa pieces, doing unsigned multiplies
   and summing results into result vector.  Note that the order is 
   chosen to guarantee that no more adds than generated by XADDL are
   needed.  */

  for ( ; ; )
	{
	i = *idx_p++;
	j = *idx_p++;
	mult_register = m1[i]; 
	MUL(m2[j], mult_register);
	kludge = (long * )(&result[i + j + 2]);
	XXADDL(mult_register, kludge);
/*
fprintf(stderr, "  qmult: %d[%04X] * %d[%04X] -> %08X\n",
  i, m1[i], j, m2[j], mult_register);
fprintf(stderr, "       %04X %04X %04X %04X %04X %04X %04X %04X\n",
  result[2], result[3], result[4], result[5], 
  result[6], result[7], result[8], result[9]);
*/
	if ((i == 0) && (j == 0))
		break;
	}

  /* get the result shifted to the right place.  Unfortunately, we need
     the 53 bits that's 22 bits down in the result vec.  sigh */
  kludge = (long * )(&result[2]);
  res1 = *kludge++;
  res2 = *kludge++;
  res3 = *kludge;
  for (i = 0 ; i < 12 ; i++)
	ASL3(res1, res2, res3);
  /* now put the result back */
  *result_hi = res1;
  *result_lo = res2;
}
#endif

double dmult(f1, f2)
double f1, f2;
{
	register long d2;
	register unsigned d3, d4, d5, d6, d7;
	unsigned long p1, p2;

	struct bitdouble *bdp1 = &f1, *bdp2 = &f2;
	int exp1, exp2, i;

	exp1 = bdp1->exp; exp2 = bdp2->exp;
	/* check for zero */
	if (! exp1) return(0.0);
	if (! exp2) return(0.0);
	d2 = 0x00100000 + bdp1->mant1;
	d3 = bdp1->mant2;
	d4 = 0x00100000 + bdp2->mant1;
	d5 = bdp2->mant2;
	qmult(d2, d3, d4, d5, &p1, &p2);
	d6 = p1; d7 = p2;

	if (d6 & 0x00200000) {
		ASR2(d6, d7);
		exp1++;
	}

	if (bdp1->sign == bdp2->sign) bdp1->sign = 0;
	else bdp1->sign = 1;
	bdp1->exp = exp1 + exp2 - 0x3ff;
	bdp1->mant1 = d6;
	bdp1->mant2 = d7;
	return(f1);
}
#endif

#ifdef L_negdf2
/*double _negdf2 (a) double a; { return -a; } */

double dneg(num)
double num;
{
	long *i = &num;
	if (*i)			/* only negate if non-zero */
		*i ^= 0x80000000;
	return(num);
}
#endif

#ifdef L_adddf3
/*double _adddf3 (a, b) double a, b; { return a + b; } */

double dadd(f1, f2)
double f1, f2;
{

	register long d4, d5, d6, d7;
	struct bitdouble *bdp1 = &f1, *bdp2 = &f2;
	short exp1, exp2, sign1, sign2, howmuch, temp;

	exp1 = bdp1->exp; exp2 = bdp2->exp;

	/* check for zero */
	if (! exp1) return(f2); if (! exp2) return(f1);

	/* align them */
	if (exp1 < exp2) {
		bdp1 = &f2; bdp2 = &f1;
		exp1 = bdp1->exp; exp2 = bdp2->exp;
	}
	howmuch = exp1 - exp2;
	if (howmuch > 53) return(f1);

	d7 = bdp2->mant1 + 0x00100000;
	d6 = bdp2->mant2;

	d5 = bdp1->mant1 + 0x00100000;
	d4 = bdp1->mant2;

	for (temp = 0; temp < howmuch; temp++) ASR2(d7, d6);

	/* take care of negative signs */
	if (bdp1->sign) 
		{
		NEG(d5, d4);
		}
	if (bdp2->sign)
		{
		NEG(d7, d6);
		}

	ADD2(d7, d6, d5, d4);
	bdp1 = &f1;

	if (d5 < 0) {
		NEG(d5, d4);
		bdp1->sign = 1;
	} else bdp1->sign = 0;

	if (d5 == 0 && d4 == 0) return(0.0);

	for(howmuch = 0; d5 >= 0; howmuch++) ASL2(d5, d4);

	ASL2(d5, d4);
	for (temp = 0; temp < 12; temp++) ASR2(d5, d4);
	bdp1->mant1 = d5;
	bdp1->mant2 = d4;
	bdp1->exp = exp1 + 11 - howmuch;
	return(f1); 
}

#endif

#ifdef L_subdf3
double
__subdf3 (a, b)
     double a, b;
{
  return a+(-b);
}
#endif

#ifdef L_cmpdf2 /*int _cmpdf2 (a, b) double a, b; { if (a > b) return 1;
  else if (a < b) return -1; return 0; } */

int dcmp(f1, f2)
double f1, f2;
{
	struct bitdouble *bdp1, *bdp2;
	unsigned int s1, s2;
	bdp1 = &f1;
	bdp2 = &f2;
	s1 = bdp1->sign;
	s2 = bdp2->sign;
	if (s1 > s2) return(-1);
	if (s1 < s2) return(1);
	/* if sign of both negative, switch them */
	if (s1 != 0) {
		bdp1 = &f1;
		bdp2 = &f2;
	}
	s1 = bdp1->exp;
	s2 = bdp2->exp;
	if (s1 > s2) return(1);
	if (s1 < s2) return(-1);
	/* same exponent -- have to compare mantissa */
	s1 = bdp1->mant1;
	s2 = bdp2->mant1;
	if (s1 > s2) return(1);
	if (s1 < s2) return(-1);
	s1 = bdp1->mant2;
	s2 = bdp2->mant2;
	if (s1 > s2) return(1);
	if (s1 < s2) return(-1);
	return(0); /* the same! */
}
#endif

#ifdef L_fixunsdfsi
/*_fixunsdfsi (a) double a; { return (unsigned int) a; } */

/* #ifdef L_fixdfsi _fixdfsi (a) double a; { return (int) a; } #endif */

unsigned long dtoul(f)
double f;
{
	struct bitdouble *bdp;
	int si, ex, mant1, mant2;
	bdp = &f;
	si = bdp->sign;
	ex = bdp->exp;
	mant1 = bdp->mant1 + 0x00100000;
	mant2 = bdp->mant2;

	/* zero value */
	if (ex == 0) return(0L);
	/* less than 1 */
	if (ex < 0x3ff) return(0L);
	/* less than 0 */
	if (si ) return(0L);
	mant1 <<= 10;
	mant1 += (mant2 >> 22);
	mant1 >>= 30 + (0x3ff - ex);
	return(mant1);
}

long dtol(f)
double f;
{
	struct bitdouble *bdp = &f;

	if (bdp->sign) {
		bdp->sign = 0;
		return( - dtoul(f));
	}
	return(dtoul(f));
}
#endif

#ifdef L_fixunsdfdi

/*double _fixunsdfdi (a) double a; { union double_di u;
  u.i[LOW] = (unsigned int) a; u.i[HIGH] = 0; return u.d; } */
#endif


#ifdef L_fixdfdi
double
_fixdfdi (a)
     double a;
{
  union double_di u;
  u.i[LOW] = (int) a;
  u.i[HIGH] = (int) a < 0 ? -1 : 0;
  return u.d;
}
#endif

#ifdef L_floatsidf
/* double _floatsidf (a) int a; { return (double) a; } */

double ltod(i)
long i;
{
	int expo, shift;
	double retval;
	struct bitdouble *bdp = &retval;
	if (i == 0) {
		long *t = &retval;
		t[0] = 0L;
		t[1] = 0L;
		return(retval);
	}
	if (i < 0) {
		bdp->sign = 1;
		i = -i;
	} else bdp->sign = 0;
	shift = i;
	for (expo = 0x3ff + 31 ; shift > 0; expo--, shift <<= 1);
	shift <<= 1;
	bdp->exp = expo;
	bdp->mant1 = shift >> 12;
	bdp->mant2 = shift << 20;
	return(retval);
}

#endif

#ifdef L_floatdidf
/* ok as is -- will call other routines */
double
_floatdidf (u)
     union double_di u;
{
  register double hi
    = ((double) u.i[HIGH]) * (double) 0x10000 * (double) 0x10000;
  register double low = (unsigned int) u.i[LOW];
  return hi + low;
}
#endif

#ifdef L_addsf3
SFVALUE
_addsf3 (a, b)
     union flt_or_int a, b;
{
  union flt_or_int intify; return INTIFY ((double) a.f + (double) b.f);
}
#endif

#ifdef L_negsf2
SFVALUE
_negsf2 (a)
     union flt_or_int a;
{
  union flt_or_int intify;
  return INTIFY (-((double) a.f));
}
#endif

#ifdef L_subsf3
SFVALUE
_subsf3 (a, b)
     union flt_or_int a, b;
{
  union flt_or_int intify;
  return INTIFY (((double) a.f - (double) b.f));
}
#endif

#ifdef L_cmpsf2
SFVALUE
_cmpsf2 (a, b)
     union flt_or_int a, b;
{
  union flt_or_int intify;
  double a1, b1;
  a1 = a.f; b1 = b.f;
  if ( a1 > b1)
    return 1;
  else if (a1 < b1)
    return -1;
  return 0;
}
#endif

#ifdef L_mulsf3
SFVALUE
_mulsf3 (a, b)
     union flt_or_int a, b;
{
  union flt_or_int intify;
  return INTIFY (((double) a.f * (double) b.f));
}
#endif

#ifdef L_divsf3
SFVALUE
_divsf3 (a, b)
     union flt_or_int a, b;
{
  union flt_or_int intify;
  return INTIFY (((double) a.f / (double) b.f));
}
#endif

#ifdef L_truncdfsf2
float dtof(d)
double d;
{
	struct bitdouble *bdp = &d;
	float retval;
	struct bitfloat *bfp = &retval;
	int tempval;

	bfp->sign = bdp->sign;
	if (bdp->exp == 0) return ((float) 0.0);
	bfp->exp = bdp->exp - 0x400 + 0x80;
	tempval = (bdp->mant1 << 4 ) + ((0xF0000000 & bdp->mant2) >> 28);
	/* round */
	tempval++;
	if (tempval == 0x01000000) bfp->exp++;
	bfp->mant = tempval >> 1;
	return(retval);
}

SFVALUE
__truncdfsf2 (a)
     double a;
{
  union flt_or_int intify;
  return INTIFY (dtof(a));
}
#endif

#ifdef L_extendsfdf2
double ftod(f)
union flt_or_int f;
{
	double retval;
	struct bitfloat *bfp = &f.f;
	struct bitdouble *bdp = &retval;
	if (bfp->exp == 0) return(0.0);
	bdp->sign = bfp->sign;
	bdp->exp = 0x400 - 0x80 + bfp->exp;
	bdp->mant1 = bfp->mant >> 3;
	bdp->mant2 = (bfp->mant & 0x7) << 29;
	/*printf("returning %f from extendsfdf2\n", retval);*/
	return(retval);
}

double
__extendsfdf2 (a)
     union flt_or_int a;
{
  union flt_or_int intify;
  double retval;
  return (ftod(a));
}
#endif