/* A working Gnulib68k, thanks to Scott McCauley for the origonal
   version, and John Dunning (jrd@STONY-BROOK.SCRC.Symbolics.COM)
   who got this working.
   
   Please not that only the double float. stuff is picked up from
   here, the other long-int and single float stuff come from
   fixnum.s and sflonum.s (see flonum.h for the appr. #defs).
   ++jrb
   */

/* Subroutines needed by GCC output code for the 68000/20. 
   
   Compile using -O flag with gcc. 
   Use the -m68000 flag if you have a 68000
   
   This package includes 32 bit integer and 64-bit floating
   point routines.
   
   WARNING: alpha-test version (July 1988) -- probably full of bugs.
   If you find a bug, send bugs reports to jsm@phoenix.princeton.edu,
   or
   
   Scott McCauley
   PPPL P. O. Box 451
   Princeton NJ 08543
   
   Known bugs/features:
   
   1) Depending on the version of GCC, this may produce a lot of
   warnings about conversion clashes, etc. It should still compile
   as is. Also, there appears to be a bug in the register-allocation
   parts of gcc that makes the compiler sometimes core dump
   or run into an infinite loop. This version works -- if you
   decide to change routines these compiler bugs can bite very hard....
   
   2) all single-precision gets converted to double precision in the
   math routines (in accordance with K&R), so doing things in
   double precision is faster than single precision....

(not any more -- jrd )
   
   3) double precision floating point division may not be accurate to
   the last four or so bits. Most other routines round to the
   lsb.

(not any more -- jrd )
   
   4) no support of NaN and Inf
   
   5) beware of undefined operations: i.e. a float to integer conversion
   when the float is larger than MAXIINT.
   
   */

#include "flonum.h"

#ifdef __GCC_OLD__
#define umultl _umulsi
#define multl _mulsi3
#define udivl _udivsi3 
#define divl _divsi3
#define ddiv _divdf3
#define qmult _qmult
#define dmult _muldf3
#define dneg _negdf2
#define dadd _adddf3
#define dcmp _cmpdf2
#define dtoul _fixunsdfsi
#define dtol _fixdfsi
#define ltod _floatsidf
#else
#define umultl __umulsi
#define multl __mulsi3
#define udivl __udivsi3 
#define divl __divsi3
#define ddiv __divdf3
#define qmult __qmult
#define dmult __muldf3
#define dneg __negdf2
#define dadd __adddf3
#define dcmp __cmpdf2
#define dtoul __fixunsdfsi
#define dtol __fixdfsi
#define ltod __floatsidf
#endif

#ifdef L_divdf3

/* new double-float divide routine, by jrd */
/* thanks jrd !! */

#ifdef __GCC_OLD__
double _divdf3(num, denom)
#else
double __divdf3(num, denom)
#endif
double num, denom;
{
    double local_num = num;
    double local_denom = denom;
    struct bitdouble * num_bdp = (struct bitdouble *)(&local_num);
    struct bitdouble * den_bdp = (struct bitdouble *)(&local_denom);
    short num_exp = num_bdp->exp,
          den_exp = den_bdp->exp;
    short result_sign = 0;
    /*  register */ unsigned long num_m1, num_m2, num_m3, num_m4;
    register unsigned long den_m1, den_m2, den_m3, den_m4;
    unsigned long result_mant[3];
    unsigned long result_mask;
    short result_idx;
    
    if ((num_exp == 0) || (den_exp == 0)) /* zzz should really cause trap */
  	return(0.0);
    
    /* deal with signs */
    result_sign = result_sign + num_bdp->sign - den_bdp->sign;
    
    /* unpack the numbers */
    num_m1 = num_bdp->mant1 | 0x00100000;		/* hidden bit */
    num_m2 = num_bdp->mant2;
    num_m4 = num_m3 = 0;
    den_m1 = den_bdp->mant1 | 0x00100000;		/* hidden bit */
    den_m2 = den_bdp->mant2;
    den_m4 = den_m3 = 0;
    
#if 0
    /* buy a little extra accuracy by shifting num and denum up 10 bits */
    for (result_idx /* loop counter */ = 0 ; result_idx < 10 ; result_idx++)
    {
	ASL3(num_m1, num_m2, num_m3);
	ASL3(den_m1, den_m2, den_m3);
    }
#endif
    
    /* hot wire result mask and index */
    result_mask = 0x00100000;			/* start at top mantissa bit */
    result_idx = 0;				/* of first word */
    result_mant[0] = 0;
    result_mant[1] = 0;
    result_mant[2] = 0;
    
    /* if denom is greater than num here, shift denom right one and dec num expt */
    if (den_m1 < num_m1)
  	goto kludge1;				/* C is assembly language,
						   remember? */
    if (den_m1 > num_m1)
  	goto kludge0;
    if (den_m2 <= num_m2)				/* first word eq, try 2nd */
  	goto kludge1;
    
  kludge0:
    
    num_exp--;
    ASR4(den_m1, den_m2, den_m3, den_m4);
    
  kludge1:
    
    for ( ; !((result_idx == 2) && (result_mask & 0x40000000)) ; )
    {
	/* if num >= den, subtract den from num and set bit in result */
	if (num_m1 > den_m1) goto kludge2;
	if (num_m1 < den_m1) goto kludge3;
	if (num_m2 > den_m2) goto kludge2;
	if (num_m2 < den_m2) goto kludge3;
	if (num_m3 > den_m3) goto kludge2;
	if (num_m3 < den_m3) goto kludge3;
	if (num_m4 < den_m4) goto kludge3;
	
      kludge2:
	result_mant[result_idx] |= result_mask;
	SUB4(den_m1, den_m2, den_m3, den_m4, num_m1, num_m2, num_m3, num_m4);
      kludge3:
	ASR4(den_m1, den_m2, den_m3, den_m4);
	result_mask >>= 1;
	if (result_mask == 0)		/* next word? */
	{
	    result_mask = 0x80000000;
	    result_idx++;
	}
    }

/* stick in last half-bit if necessary */
  if (result_mant[2])
	{
	den_m1 = 0;		/* handy register */
	den_m2 = 1;
/* busted
	ADD2(den_m1, den_m2, result_mant[0], result_mant[1]);
*/
	result_mant[1]++;
	if (result_mant[1] == 0)
		result_mant[0]++;

	if (result_mant[0] & 0x00200000)	/* overflow? */
		{
		num_exp--;
		}
	}
  /* compute the resultant exponent */
  num_exp = num_exp - den_exp + 0x3FF;

  /* reconstruct the result in local_num */
  num_bdp->sign = result_sign;
  num_bdp->exp = num_exp;
  num_bdp->mant1 = result_mant[0] & 0xFFFFF;
  num_bdp->mant2 = result_mant[1];
  
  /* done! */
  return(local_num);
}
#endif

#ifdef L_muldf3
/*double _muldf3 (a, b) double a, b; { return a * b; } */

/* a set of totally local kludges, for summing partial results into
   result vector */

static unsigned long constant_zero_kludge = 0;
#define XXADDL(partial, target) \
	{ register unsigned long * zero = &constant_zero_kludge + 1; \
	  asm volatile("addl  %3,%0@;\
			addxl %1@-,%0@-" \
			: "=a" (target), "=a" (zero) \
			: "0" (target), "g" (partial), "1" (zero)); \
	}

static char component_index[] = 
	{
	3, 3,				/* last ones */

	2, 3,				/* next to last x last */
	3, 2,				/* ... */

	1, 3,
	2, 2,
	3, 1,

	0, 3,
	1, 2,
	2, 1,
	3, 0,
	
	0, 2,
	1, 1,
	2, 0,
	
	0, 1,
	1, 0,
	
	0, 0
	};

qmult(m1_hi, m1_lo, m2_hi, m2_lo, result_hi, result_lo)
unsigned long m1_hi, m1_lo, m2_hi, m2_lo, * result_hi, * result_lo;
{
  unsigned short * m1 = (unsigned short * )(&m1_hi);
  unsigned short * m2 = (unsigned short * )(&m2_hi);
  unsigned short result[10];		/* two extra for XADDL */
  register unsigned long mult_register;
  register unsigned long res1, res2, res3;
  long * kludge;
  short i, j;
  char * idx_p = (char * )&component_index;
/*
fprintf(stderr, "  qmult: %08X:%08X * %08X:%08X\n", 
	m1_hi, m1_lo, m2_hi, m2_lo);
*/
  for (i = 0 ; i < 10 ; i++) result[i] = 0;

/* walk thru 'vectors' of mantissa pieces, doing unsigned multiplies
   and summing results into result vector.  Note that the order is 
   chosen to guarantee that no more adds than generated by XADDL are
   needed.  */

  for ( ; ; )
	{
	i = *idx_p++;
	j = *idx_p++;
	mult_register = m1[i]; 
	MUL(m2[j], mult_register);
	kludge = (long * )(&result[i + j + 2]);
	XXADDL(mult_register, kludge);
/*
fprintf(stderr, "  qmult: %d[%04X] * %d[%04X] -> %08X\n",
  i, m1[i], j, m2[j], mult_register);
fprintf(stderr, "       %04X %04X %04X %04X %04X %04X %04X %04X\n",
  result[2], result[3], result[4], result[5], 
  result[6], result[7], result[8], result[9]);
*/
	if ((i == 0) && (j == 0))
		break;
	}

  /* get the result shifted to the right place.  Unfortunately, we need
     the 53 bits that's 22 bits down in the result vec.  sigh */
  kludge = (long * )(&result[2]);
  res1 = *kludge++;
  res2 = *kludge++;
  res3 = *kludge;
  for (i = 0 ; i < 12 ; i++)
	ASL3(res1, res2, res3);
  /* now put the result back */
  *result_hi = res1;
  *result_lo = res2;
}

double dmult(f1, f2)
double f1, f2;
{
    register long d2;
    register unsigned d3, d4, d5, d6, d7;
    unsigned long p1, p2;
    
    struct bitdouble
	*bdp1 = (struct bitdouble *)&f1,
	*bdp2 = (struct bitdouble *)&f2;
    int exp1, exp2, i;
    
    exp1 = bdp1->exp; exp2 = bdp2->exp;
    /* check for zero */
    if (! exp1) return(0.0);
    if (! exp2) return(0.0);
    d2 = 0x00100000 | bdp1->mant1;
    d3 = bdp1->mant2;
    d4 = 0x00100000 | bdp2->mant1;
    d5 = bdp2->mant2;
    __qmult(d2, d3, d4, d5, &p1, &p2);
    d6 = p1; d7 = p2;
    
    if (d6 & 0x00200000) {
	ASR2(d6, d7);
	exp1++;
    }
    
    if (bdp1->sign == bdp2->sign) bdp1->sign = 0;
    else bdp1->sign = 1;
    bdp1->exp = exp1 + exp2 - 0x3ff;
    bdp1->mant1 = d6;
    bdp1->mant2 = d7;
    return(f1);
}
#endif

#ifdef L_negdf2
/*double _negdf2 (a) double a; { return -a; } */

double dneg(num)
double num;
{
    long *i = (long *)&num;
	if (*i)			/* only negate if non-zero */
	    *i ^= 0x80000000;
    return(num);
}
#endif

#ifdef L_adddf3
/*double _adddf3 (a, b) double a, b; { return a + b; } */

double dadd(f1, f2)
double f1, f2;
{
    
    register long d4, d5, d6, d7;
    struct bitdouble
	*bdp1 = (struct bitdouble *)&f1,
    	*bdp2 = (struct bitdouble *)&f2;
    short exp1, exp2, sign1, sign2, howmuch, temp;
    
    exp1 = bdp1->exp; exp2 = bdp2->exp;
    
    /* check for zero */
    if (! exp1) return(f2); if (! exp2) return(f1);
    
    /* align them */
    if (exp1 < exp2) {
	bdp1 = (struct bitdouble *)&f2; bdp2 = (struct bitdouble *)&f1;
	exp1 = bdp1->exp; exp2 = bdp2->exp;
    }
    howmuch = exp1 - exp2;
    if (howmuch > 53) return(f1);
    
    d7 = bdp2->mant1 | 0x00100000;
    d6 = bdp2->mant2;
    
    d5 = bdp1->mant1 | 0x00100000;
    d4 = bdp1->mant2;
    
    for (temp = 0; temp < howmuch; temp++) ASR2(d7, d6);
    
    /* take care of negative signs */
    if (bdp1->sign) 
    {
	NEG(d5, d4);
    }
    if (bdp2->sign)
    {
	NEG(d7, d6);
    }
    
    ADD2(d7, d6, d5, d4);
    bdp1 = (struct bitdouble *)&f1;
    
    if (d5 < 0) {
	NEG(d5, d4);
	bdp1->sign = 1;
    } else bdp1->sign = 0;
    
    if (d5 == 0 && d4 == 0) return(0.0);
    
    for(howmuch = 0; d5 >= 0; howmuch++) ASL2(d5, d4);
    
    ASL2(d5, d4);
    for (temp = 0; temp < 12; temp++) ASR2(d5, d4);
    bdp1->mant1 = d5;
    bdp1->mant2 = d4;
    bdp1->exp = exp1 + 11 - howmuch;
    return(f1); 
}

#endif

#ifdef L_subdf3
double
#ifdef __GCC_OLD__
    _subdf3 (a, b)
#else
    __subdf3 (a, b)
#endif
double a, b;
{
    return a+(-b);
}
#endif

#ifdef L_cmpdf2
/*
  int _cmpdf2 (a, b) double a, b; { if (a > b) return 1;
  else if (a < b) return -1; return 0; } 
  */

int dcmp(f1, f2)
double f1, f2;
{
    struct bitdouble *bdp1, *bdp2;
    unsigned int s1, s2;
    bdp1 = (struct bitdouble *)&f1;
    bdp2 = (struct bitdouble *)&f2;
    s1 = bdp1->sign;
    s2 = bdp2->sign;
    if (s1 > s2) return(-1);
    if (s1 < s2) return(1);
    /* if sign of both negative, switch them */
    if (s1 != 0) {
	bdp1 = (struct bitdouble *)&f2;
	bdp2 = (struct bitdouble *)&f1;
    }
    s1 = bdp1->exp;
    s2 = bdp2->exp;
    if (s1 > s2) return(1);
    if (s1 < s2) return(-1);
    /* same exponent -- have to compare mantissa */
    s1 = bdp1->mant1;
    s2 = bdp2->mant1;
    if (s1 > s2) return(1);
    if (s1 < s2) return(-1);
    s1 = bdp1->mant2;
    s2 = bdp2->mant2;
    if (s1 > s2) return(1);
    if (s1 < s2) return(-1);
    return(0); /* the same! */
}
#endif

#ifdef L_fixunsdfsi
/*_fixunsdfsi (a) double a; { return (unsigned int) a; } */

unsigned long dtoul(f)
double f;
{
    struct bitdouble *bdp;
    int si, ex;
    unsigned long  mant1, mant2;
    bdp = (struct bitdouble *)&f;
    si = bdp->sign;
    ex = bdp->exp;
    mant1 = bdp->mant1 | 0x00100000;
    mant2 = bdp->mant2;
    
    /* zero value */
/*    if (ex == 0) return(0L); */ /* this is covered in next line */
    /* less than 1 */
    if (ex < 0x3ff) return(0L);
    /* less than 0 */
    if (si ) return(0L);
    mant1 <<= 10;
    mant1 += (mant2 >> 22);
    mant1 >>= 30 + (0x3ff - ex);
    return(mant1);
}

long dtol(f)
double f;
{
    struct bitdouble *bdp = (struct bitdouble *)&f;
    
    if (bdp->sign) {
	bdp->sign = 0;
	return( - dtoul(f));
    }
    return(dtoul(f));
}
#endif

#ifdef L_fixunsdfdi

/*double _fixunsdfdi (a) double a; { union double_di u;
  u.i[LOW] = (unsigned int) a; u.i[HIGH] = 0; return u.d; } */
#endif

#ifdef L_fixdfdi
double
#ifdef __GCC_OLD__
    _fixdfdi (a)
#else
    __fixdfdi (a)
#endif
double a;
{
    union double_di u;
    u.i[LOW] = (int) a;
    u.i[HIGH] = (int) a < 0 ? -1 : 0;
    return u.d;
}
#endif

#ifdef L_floatsidf
/* double _floatsidf (a) int a; { return (double) a; } */

double ltod(i)
long i;
{
    int expo, shift;
    double retval;
    struct bitdouble *bdp = (struct bitdouble *)&retval;
    if (i == 0) {
	long *t = (long *)&retval;
	t[0] = 0L;
	t[1] = 0L;
	return(retval);
    }
    if (i < 0) {
	bdp->sign = 1;
	i = -i;
    } else bdp->sign = 0;
    shift = i;
    for (expo = 0x3ff + 31 ; shift > 0; expo--, shift <<= 1);
    shift <<= 1;
    bdp->exp = expo;
    bdp->mant1 = shift >> 12;
    bdp->mant2 = shift << 20;
    return(retval);
}

#endif

#ifdef L_floatdidf
/* ok as is -- will call other routines */
double
#ifdef __GCC_OLD__
    _floatdidf (u)
#else
    __floatdidf (u)
#endif
union double_di u;
{
    register double hi
	= ((double) u.i[HIGH]) * (double) 0x10000 * (double) 0x10000;
    register double low = (unsigned int) u.i[LOW];
    return hi + low;
}
#endif

#ifdef L_truncdfsf2
float __dtof(d)
double d;
{
    struct bitdouble *bdp = (struct bitdouble *)&d;
    float retval;
    struct bitfloat *bfp = (struct bitfloat *)&retval;
    int tempval;
    
    bfp->sign = bdp->sign;
    if (bdp->exp == 0) return ((float) 0.0);
    bfp->exp = bdp->exp - 0x400 + 0x80;
    tempval = (bdp->mant1 << 4 ) + ((0xF0000000 & bdp->mant2) >> 28);
    /* round */
    tempval++;
    if (tempval == 0x01000000) bfp->exp++;
    bfp->mant = tempval >> 1;
    return(retval);
}

SFVALUE
#ifdef __GCC_OLD__
    _truncdfsf2 (a)
#else
    __truncdfsf2 (a)
#endif
double a;
{
    union flt_or_int intify;
    return INTIFY (__dtof(a));
}
#endif

#ifdef L_extendsfdf2
double __ftod(f)
union flt_or_int f;
{
    double retval;
    struct bitfloat *bfp = (struct bitfloat *)&f.f;
    struct bitdouble *bdp = (struct bitdouble *)&retval;
    if (bfp->exp == 0) return(0.0);
    bdp->sign = bfp->sign;
    bdp->exp = 0x400 - 0x80 + bfp->exp;
    bdp->mant1 = bfp->mant >> 3;
    bdp->mant2 = (bfp->mant & 0x7) << 29;
    /*printf("returning %f from extendsfdf2\n", retval);*/
    return(retval);
}

double
#ifdef __GCC_OLD__
    _extendsfdf2 (a)
#else
    __extendsfdf2 (a)
#endif
union flt_or_int a;
{
    union flt_or_int intify;
    double retval;
    return (__ftod(a));
}
#endif