#APP
#
|
| Subroutines needed by GNU C for long integer arithmetic.
|
| Revision 1.4, jrb 09-06-89
| More bug fixes, fixed mod routines to push args correctly
|
| Revision 1.3, jrb 08-15-89
| Merged in kai-uwe's stuff (thanks!). minor cleanup, some hacks and
| bug fixes. Dont run this thru cpp anymore.
|
| Revision 1.2, kub 05-23-89
| Modified by Kai-Uwe Bloem (I5110401@DBSTU1.BITNET for now) to get some
| speed out of this. Completely rewrote the modulo stuff.
|
| Revision 1.1, kub
| initial revision from jrd

|
| _umulsi3 (a, b)
|      unsigned long a, b;
| {
|   return a * b;
| }
|
	.text
	.even
	.globl ___umulsi3
	.globl __umulsi3
___umulsi3:
__umulsi3:
	movel d2,a1		| save d2

	moveq #0,d0		| prepare result
	movew a7@(10),d1	| get lo part of b
	beq umul_1		| zero, no multiplies
	movew a7@(4),d2		| get hi part of a
	beq umul_0		| zero, skip this
	mulu d1,d2		| mul a hi by b lo
	swap d2			| shift over
	clrw d2			|  ...by 16
	movel d2,d0		| save that
umul_0:
	movew a7@(6),d2		| get lo part of a
	beq umul_ret		| zero, no multiplies
	mulu d2,d1		| mul a lo by b lo
	addl d1,d0		| add that in
umul_1:
	movew a7@(8),d1		| get b hi
	beq umul_ret
	movew a7@(6),d2		| and a lo
	beq umul_ret
	mulu d1,d2		| mult those
	swap d2			| shift over
	clrw d2			|  ...by 16
	addl d2,d0		| and add that in
umul_ret:
	movel a1,d2		| get d2 back

	rts
|
| _mulsi3 (a, b)
|      long a, b;
| {
|   return a * b;
| }
|
	.text
	.even
	.globl ___mulsi3
	.globl __mulsi3
___mulsi3:
__mulsi3:
	link a6,#-2		| one word of local
	clrw a6@(-2)		| zap the local; it's our neg flg

	movel a6@(12),d0	| get b
	bpl muls1		| pos, ok
	negl d0			| negate it
	addqw #1,a6@(-2)	| and inc the flag
muls1:
	movel d0,sp@-		| push adjusted b
	movel a6@(8),d0		| get a
	bpl muls2		| pos, ok
	negl d0			| negate it
	subqw #1,a6@(-2)	| and dec the flag
muls2:
	movel d0,sp@-		| push adjusted a
	jbsr ___umulsi3		| do an unsigned mult
	addql #8,sp		| flush args
	tstw a6@(-2)		| flag set?
	beq muls3		| nope, go ahead and return
	negl d0			| negate the result
muls3:
	unlk a6			| unlink the frame
	rts			| done
|
| _udivsi3 (a, b)
|      unsigned long a, b;
| {
|   return a / b in d0
|   return a % b in d1
| }
|
	.text
	.even
	.globl ___udivsi3
	.globl __udivsi3
___udivsi3:
__udivsi3:
	movel d2,a0		| save the work area
	movel d3,a1

	moveq #0,d0		| d0 will be accum
	movel a7@(8),d2		| d2 is b, divisor
	beq udiv_err		| b = 0 => error
	movel a7@(4),d1		| d1 is a, dividend
	beq udiv_done		| a = 0 => result 0
	cmpl d2,d1		| b > a => result 0
	bcs udiv_done
	moveq #31,d0		| d0 is bit num for testing need to sub
	tstb a7@(4)		| find byte with highest bit of dividend
	bne udiv_bit
	subql #8,d0
	tstb a7@(5)
	bne udiv_bit
	subql #8,d0
	tstb a7@(6)
	bne udiv_bit
	subql #8,d0
udiv_bit:
	btst d0,d1		| byte found, now find bit
	dbne d0,udiv_bit
	moveql #1,d3		| get 1 into bit 0 of bit mask
	| d0 now contains the number of the highest 1-bit set in the dividend.
	| Now shift the divisor left to move a 1-bit at this position.
	| Possible because divisor (in d2) <= dividend (in d1)!
udiv_sl:
	btst d0,d2		| bit get aligned to highest bit of a yet?
	bne udiv_start		| yes, start testing for subtraction
	addl d3,d3		| shift bitmask left
	addl d2,d2		| shift divisor left
	bra udiv_sl
udiv_start:
	| valid condition now : d2 <= d1 <= 2*d2
	| Now apply the same algorithm as used to divide manually :
	| subtract and shift right....
	moveq #0,d0		| prepare accumulator
udiv_sr:
	cmpl d2,d1		| divisor > dividend?
	bcs udiv_s
	orl d3,d0		| set this bit in d0
	subl d2,d1		| take divisor (shifted) out of dividend
	beq udiv_done		| nothing left to do
udiv_s:
	lsrl #1,d2		| shift right one
	lsrl #1,d3		| and the bitnum too
	bne udiv_sr		| more bits, keep dividing

udiv_done:
	| d0 = divisor DIV dividend, d1 = divisor MOD dividend. Think of it !
	| This makes implementation of % fast and easy, see below !
	movel a0,d2		| get work regs back
	movel a1,d3
	rts
udiv_err:
	divs d2,d1		| cause division trap
	bra udiv_done		| back to user after exception handling
|
| _divsi3 (a, b)
|      long a, b;
| {
|   return a / b in d0
|   return a % b in d1
| }
|
	.text
	.even
	.globl ___divsi3
	.globl __divsi3
___divsi3:
__divsi3:
	link a6,#-2		| 2 bytes of local
	clrw a6@(-2)		| zap the local; it's our neg flg

	movel a6@(12),d0	| get b
	bge divs1		| pos, ok
	negl d0			| negate it
	addqw #1,a6@(-2)	| and inc the flag
divs1:
	movel d0,sp@-		| push adjusted b
	movel a6@(8),d0		| get a
	bge divs2		| pos, ok
	negl d0			| negate it
	subqw #1,a6@(-2)	| and dec the flag
divs2:
	movel d0,sp@-		| push adjusted a
	jbsr __udivsi3		| do an unsigned div
	addql #8,sp		| flush args
	tstw a6@(-2)		| flag set?
	beq divs3		| nope, go ahead and return
	negl d0			| negate the result
divs3:
	tstb a6@(8)
	bpl divs4
	negl d1			| correct the remainder
	| The next line would be applied if a modulus as defined in algebra
	| is required (well, i think so).
	| In computer science % is mostly defined as a % b = a - (a/b)*b, so...
|	addl a6@(8),d1
divs4:
	unlk a6			| unlink the frame
	rts			| done!
|
| _umodsi3 (a, b)
|      unsigned long a, b;
| {
|   return a % b;
| }
|
	.text
	.even
	.globl ___umodsi3
	.globl __umodsi3
___umodsi3:
__umodsi3:
	movel sp@(8),sp@-	|  push b
	movel sp@(8),sp@-	|  push a sp has been dec by 4 by previous push
	jbsr __udivsi3
	movel d1,d0		| The div operation just delivers the remainder
	addql #8,sp
	rts
|
| _modsi3 (a, b)
|      long a, b;
| {
|   return a % b;
| }
|
	.text
	.even
	.globl ___modsi3
	.globl __modsi3
___modsi3:
__modsi3:
	movel sp@(8),sp@-	| push b
	movel sp@(8),sp@-	| push a	sp dec. by 4 by previous push
	jbsr __divsi3
	movel d1,d0		| The div operation just delivers the remainder
	addql #8,sp
	rts
| 
| eof
|
