#
#	Single prec. Floating point support code.  
#
#	A float looks like:
#
#	|S|E.x.p ... |M.a.n.t.i.s.s.a ... |
#	+-+----------+--------------------+
#
#	where s is the sign bit, Exp is 8 bits of exponent, interpreted
#	as E + 126, and Mantissa is 23 bits of fraction, with a
#	hidden bit.  The point is to the left of the hidden bit.
#
#	All these routines have calling sequences like c routines,
#	ie args on stack in backwards order, return values in d0
#


.text
	.even
.globl ___addsf3
.globl __addsf3
___addsf3:
__addsf3:
	link	a6,#0		| dont need any locals
	moveml	#0x3F00,sp@-	| save all data registers
	movel	a6@(8),d0	| get a
	beq	addsf_ret_b	|  zero .. just return b
	movel	#23,d6		| shift count
	movel	d0,d2		| get the exponent
	lsrl	d6,d2		| and shift right
	andl	#0xFF,d2	| no sign bit
	subl	#126,d2		| offset the exponent
	movel	a6@(12),d1	| get b
	beq	addsf_ret_a
	movel	d1,d3		| get the exponent for b
	lsrl	d6,d3		| and shift right, with implicit extend
	andl	#0xFF,d3	| make sure we didnt get a sign bit
	subl	#126,d3		| off set this one too

	andl	#0x7FFFFF,d0	| mask a for mantissa
	orl	#0x800000,d0	| and put in hidden bit
	tstl	a6@(8)		| test the original value
	bpl	addsf_1		| pos, ok
	negl	d0		| neg, negate the mantissa
addsf_1:
	andl	#0x7FFFFF,d1	| mask b for mantissa
	orl	#0x800000,d1	| ditto
	tstl	a6@(12)		| test ...
	bpl	addsf_2
	negl	d1		| negate this one
addsf_2:
	cmpl	d2,d3		| compare Ea to Eb
	blt	addsf_3		| Ea > Eb

	movel	d3,d5		| get Eb
	subl	d2,d5		| subtract Ea
	asrl	d5,d0		|  yielding count to shift Ma right
	movel	d3,d5		| use this as resultant exponent
	bra	addsf_4		| and go rejoin common part
addsf_3:
	movel	d2,d5		| get Ea
	subl	d3,d5		| subtract Eb
	asrl	d5,d1		|  yielding count to shift Mb right
	movel	d2,d5		| use this as resultant exponent

addsf_4:
	clrl	d7		| zap sign flag
	addl	d1,d0		| add Mb to Ma


	beq	addsf_z		| zero? ok, go return zero
	bpl	addsf_5		| positive? ok, go scale it
	negl	d0		| negate Mr
	movel	#1,d7		| remember sign
addsf_5:
	btst	#24,d0		| carry?
	beq	addsf_6		| nope, its ok as is
	asrl	#1,d0		| shift right one
	addql	#1,d5		| inc exp

| zzz check for overflow in here someplace

addsf_6:
	btst	#23,d0		| got a bit in the right place yet?
	bne	addsf_7		| yes, were done
	lsll	#1,d0		| shift left one
	subql	#1,d5		| dec exponent
	bra	addsf_6
addsf_7:
	andl	#0x7FFFFF,d0	| zap out hidden bit
	addl	#126,d5		| add offset to exp
	andl	#0xFF,d5	| zap to 8 bits
	movel	#23,d6		| shift count
	lsll	d6,d5		| shift the exp up
	orl	d5,d0		| stick the exp in
	tstl	d7		| negative?
	beq	addsf_ret_a
	orl	#0x80000000,d0	| yup, negate it
	bra	addsf_ret_a
addsf_z:
	clrl	d0
	bra	addsf_ret_a
addsf_ret_b:
	movel	a6@(12),d0
addsf_ret_a:
	moveml	sp@+,#0x00FC	| snarf back all regs
	unlk	a6
	rts			| sigh
# 
# #ifdef L_negsf2
# int
# _negsf2 (a)
#      union flt_or_int a;
# {
#   union flt_or_int intify;
#   return INTIFY (-a.f);
# }
# #endif

.text
	.even
.globl ___negsf2
.globl __negsf2
___negsf2:
__negsf2:
	movel	sp@(4),d0
	beq	negsf2_z
	eorl	#0x80000000,d0
negsf2_z:
	rts			| sigh
# 
# #ifdef L_subsf3
# int
# _subsf3 (a, b)
#      union flt_or_int a, b;
# {
#   union flt_or_int intify;
#   return INTIFY (a.f - b.f);
# }
# #endif

.text
	.even
.globl ___subsf3
.globl __subsf3
___subsf3:
__subsf3:
	tstl	sp@(8)		| kludge.  just negate b and add
	beq	subsf_bz	| zero.  dont bother
	eorl	#0x80000000,sp@(8)	| negate it
	jmp	___addsf3
subsf_bz:
	movel	sp@(4),d0
	rts

# 
# #ifdef L_cmpsf2
# int
# _cmpsf2 (a, b)
#      union flt_or_int a, b;
# {
#   union flt_or_int intify;
#   if (a.f > b.f)
#     return 1;
#   else if (a.f < b.f)
#     return -1;
#   return 0;
# }
# #endif

.text
	.even
.globl ___cmpsf2
.globl __cmpsf2
___cmpsf2:
__cmpsf2:
	movel	sp@(4),d0	| get a
	movel	sp@(8),d1	| get b

	bpl	cmpsf2_p
	tstl	d0
	bpl	cmpsf2_p
	cmpl	d1,d0
	bgt	cmpsf2_m
	blt	cmpsf2_1
cmpsf2_z:
	clrl	d0
	rts
cmpsf2_p:
	cmpl	d1,d0
	bgt	cmpsf2_1
	beq	cmpsf2_z
cmpsf2_m:
	movel	#-1,d0
	rts
cmpsf2_1:
	movel	#1,d0
	rts			| sigh
# 
# #ifdef L_mulsf3
# int
# _mulsf3 (a, b)
#      union flt_or_int a, b;
# {
#   union flt_or_int intify;
#   return INTIFY (a.f * b.f);
# }
# #endif

.text
	.even
.globl ___mulsf3
.globl __mulsf3
___mulsf3:
__mulsf3:
|
| multiply.  take the numbers apart.  shift each exponent down to
| 16 bits.  unsigned multiply those.  shift that down to 24 bits.
| exponent is Ea + Eb.
|

	link	a6,#-8		| 64 bit accum for mult
	moveml	#0x3F00,sp@-	| save all data registers
	movel	a6@(8),d0	| get a
	beq	mulsf3_z
	movel	a6@(12),d1	| get b
	beq	mulsf3_z
	movel	#23,d6		| shift count
	movel	d0,d2		| get the exponent
	lsrl	d6,d2		| and shift right
	andl	#0xFF,d2
	subl	#126,d2		| offset the exponent
	movel	d1,d3		| get the exponent for b
	lsrl	d6,d3		| and shift right
	andl	#0xFF,d2
	subl	#126,d3		| off set this one too

	clrl	d7		| negative result flag
	andl	#0x7FFFFF,d0	| mask a for mantissa
	orl	#0x800000,d0	| and put in hidden bit
	tstl	a6@(8)		| test the original value
	bpl	mulsf3_1	| pos, ok
	eorl	#1,d7		| remember negative
mulsf3_1:
	andl	#0x7FFFFF,d1	| mask b for mantissa
	orl	#0x800000,d1	| ditto
	tstl	a6@(12)		| test ...
	bpl	mulsf3_2
	eorl	#1,d7
mulsf3_2:
|	lsrl	#8,d1		| shift this one down
|	lsrl	#8,d0		| this one too...
|	mulu	d1,d0		| do the multiply

|	beq	mulsf3_ret	| zero? ok, just return
|	lsrl	#8,d0		| shift right again

|
| we have mantissas as follows:
|
|	|...ah...|...al...|	|...bh...|...bl...|
|
| product is composed as:
|
|			|....al * bl....|
|		|....al * bh....|
|		|....ah * bl....|
|	|....ah * bh....|
|
| then take the 24 bit chunk thats 16 bits in.

	movel	d0,d4
	andl	#0xFFFF,d4	| al
	movel	d1,d5
	andl	#0xFFFF,d5	| bl
	mulu	d5,d4		| thats al * bl
	movel	d4,a6@(-4)	| into the accum
	clrl	a6@(-8)		| zap the top part

	movel	d0,d4
	andl	#0xFFFF,d4	| al
	movel	d1,d5
	movel	#16,d6		| shift count
	lsrl	d6,d5		| bh
	mulu	d5,d4		| al * bh
	addl	d4,a6@(-6)

	movel	d0,d4
	lsrl	d6,d4		| ah
	movel	d1,d5
	andl	#0xFFFF,d5	| bl
	mulu	d5,d4		| ah * bl
	addl	d4,a6@(-6)

	movel	d0,d4
	lsrl	d6,d4		| ah
	movel	d1,d5
	lsrl	d6,d5		| bh
	mulu	d5,d4		| ah * bh
	addl	d4,a6@(-8)

	movel	a6@(-6),d0	| get the relevant part
	lsrl	#8,d0		| and shift it down

mulsf3_norm:
	btst	#23,d0		| normalized?
	bne	mulsf3_ok
	lsll	#1,d0
	subql	#1,d2
	bra	mulsf3_norm

mulsf3_ok:
	andl	#0x7FFFFF,d0	| zap hidden bit
	addl	d3,d2		| add Eb to Ea
	addl	#126,d2		| fix offset
	andl	#0xFF,d2	| whack to 8 bits
	movel	#23,d6		| shift count
	lsll	d6,d2		| shift up to right place
	orl	d2,d0		| shove it in
	tstl	d7		| sign neg?
	beq	mulsf3_ret
	orl	#0x80000000,d0	| set sign bit
	bra	mulsf3_ret
mulsf3_z:
	clrl	d0
mulsf3_ret:
	moveml	sp@+,#0x00FC	| snarf back all regs
	unlk	a6
	rts			| sigh
# 
# #ifdef L_divsf3
# int
# _divsf3 (a, b)
#      union flt_or_int a, b;
# {
#   union flt_or_int intify;
#   return INTIFY (a.f / b.f);
# }
# #endif

.text
	.even
.globl ___divsf3
.globl __divsf3
___divsf3:
__divsf3:
|
| divide.  sort of like mult, exc we do shifts and subtracts to
| do the division of the mantissa.  resultant exponent is Ea - Eb.
|

	link	a6,#0		| dont need any locals
	moveml	#0x3F00,sp@-	| save all data registers
	movel	a6@(8),d0	| get a
	movel	a6@(12),d1	| get b
	movel	#23,d6		| shift count
	movel	d0,d2		| get the exponent
	lsrl	d6,d2		| and shift right
	andl	#0xFF,d2
	subl	#127,d2		| offset the exponent
	movel	d1,d3		| get the exponent for b
	lsrl	d6,d3		| and shift right
	andl	#0xFF,d3
	subl	#127,d3		| off set this one too

	clrl	d7		| negative result flag
	andl	#0x7FFFFF,d0	| mask a for mantissa
	orl	#0x800000,d0	| and put in hidden bit
	tstl	a6@(8)		| test the original value
	bpl	divsf3_1	| pos, ok
	eorl	#1,d7		| remember negative
divsf3_1:
	andl	#0x7FFFFF,d1	| mask b for mantissa
	orl	#0x800000,d1	| ditto
	tstl	a6@(12)		| test ...
	bpl	divsf3_2
	eorl	#1,d7
divsf3_2:
|
| for now, kludge.  shift Ma left and Mb right, then do an unsigned divide
| and shift the result left.  Blech
|

|	lsrl	#8,d1		| shift this one down
|	lsll	#7,d0		| this one up
|	divu	d1,d0		| do the divide
|	andl	#0xFFFF,d0	| and mask off cruft

|	beq	divsf3_ret	| zero? ok, just return
|	lsll	#8,d0		| shift left again

| same sort of trick as long divide, exc its easier here, cause
| the numbers (mantissas) are already bit-aligned.

	clrl	d4		| accumulator
	movel	#0x800000,d5	| bit
	lsll	#7,d0		| buy a little extra accuracy...
	lsll	#7,d1
divsf3_2a:
	cmpl	d1,d0		| compare dividend to divisor
	bmi	divsf3_2b	| nope, no bit here
	orl	d5,d4		| put in the bit
	subl	d1,d0		| and subtract
divsf3_2b:
	lsrl	#1,d1		| slide divisor down
	lsrl	#1,d5		| slide bit down
	bne	divsf3_2a	| and go round again
	movel	d4,d0		| leave the result here

divsf3_3:
	btst	#23,d0		| right place yet?
	bne	divsf3_4
	lsll	#1,d0
	subql	#1,d2
	bra	divsf3_3
divsf3_4:
	andl	#0x7FFFFF,d0	| zap hidden bit
	subl	d3,d2		| sub Eb from Ea
	addl	#127,d2		| fix offset
	andl	#0xFF,d2	| whack to 8 bits
	lsll	d6,d2		| shift up to right place
	orl	d2,d0		| shove it in
	tstl	d7		| sign neg?
	beq	divsf3_ret
	orl	#0x80000000,d0	| set sign bit
divsf3_ret:
	moveml	sp@+,#0x00FC	| snarf back all regs
	unlk	a6

	rts			| sigh
