	title	BCDASM -- Copyright 1997, Morten Elling
	subttl	Signed division and modulo of packed signed BCDs

	include	model.inc
	include	modelt.inc
	include	bcd.ash

	@CODESEG

;//////////////////////////////////////////////////////////////////////
;//	Name	bcdIdiv
;//	Desc	Signed division and modulo of two packed signed BCDs
;//		(lo(dst) = dst / src,  hi(dst) = dst % src)
;//
;//
;//	Entry	Passed args (see note)
;//	Exit	Acc > 0:
;//		  Low (destination) = packed signed BCD quotient
;//		  high(destination) = packed signed BCD remainder
;//		  replacing the dividend.
;//		  The byte size of each of these results is half the
;//		  size of the original destination operand.
;//
;//		  The sign of the quotient is determined from the
;//		  signs of divisor and dividend.
;//		  The remainder keeps the sign of the dividend.
;//
;//		Acc = 0:
;//		  Error: division by zero or overflow (result would
;//		  exceed destination's size); in these cases, the
;//		  dividend is unchanged.
;//
;//
;//	Note	Before calling this procedure, both operands must be
;//		scaled to double-size, for example after a call to
;//		bcdImul (dividend) or bcdSx (divisor) both of which
;//		create a double-size packed signed BCD result.
;//		The most-significant half of the divisor must not
;//		contain any significant BCD digits (i.e. all zeros,
;//		except for the sign bit).
;//
;//		The divisor is left unchanged by this procedure (but
;//		provides temporary work space used during execution).
;//
;//
;//	ToDo	Write wrapper to make 'bcdMod' procedure.

bcdIdiv proc			; NZB = non-zero byte ('MSB')
arg	dstBCD	:dataptr, \	; Addr of dividend (size = srcsz)
	srcBCD	:dataptr, \	; Addr of divisor
	srcsz	:@uint		; Byte size of divisor (see note)
local	@@signs :@uint, \	; src's sign + (dst's sign SHL 8)
	@@nzbs	:@uint, \	; src's NZB + (dst's NZB SHL 8)
	@@srcsb :@uint, \	; # significant bytes in divisor
	@@dstsb :@uint, \	; # significant bytes in dividend
	@@src2p :@uint, \	; Addr of shifted divisor
	@@quotp :@uint		; Addr of quotient insertion point
@uses	ds,es,rsi,rdi,rbx,rcx,rdx
;.
; Procedure notes
;
; The format of the operands has been chosen to keep a consistent,
; simple calling interface (no separate work buffer). As a side
; effect, we note that maximum two segment registers are required
; to access the data.
;
;
; The fact that dividend and divisor are in packed BCD format
; allows us to easily compute the byte size of the quotient and
; remainder (one byte = two digits), once we know the lengths of
; the operands.
;
; First, then, count the significant bytes in each of the operands.
; Second, do away with two special cases: division by zero (which
; can't be done) and division overflow (result exceeds destination
; size). Third, if the dividend is smaller than the divisor, the
; division loop isn't required.


; ----- Determine no. of significant
;	bytes in divisor
	std			; Auto-decrement index registers
	mov   rbx, [srcsz]
	dec   rbx		; Size excl. sign byte
	@LES  rdi, [srcBCD]
	add   rdi, rbx
	dec   rdi
	mov   rcx, rbx
	sub   rax, rax		; Zero accumulator
	repz  scasb
	jz sh @@divz		; Exit if divisor is zero
	adc   rcx, rax
	mov   [@@srcsb], rcx
	mov   dl, @ES [rdi+1]	; Get first non-zero byte (NZB)

; ----- Determine no. of significant
;	bytes in dividend
	@LES  rdi, [dstBCD]
	add   rdi, rbx
	dec   rdi
	mov   rcx, rbx
	repz  scasb
	adc   rcx, rax
	mov   [@@dstsb], rcx
	mov   dh, @ES [rdi+1]
        mov   [@@nzbs], rdx

; ----- Check for possible overflow
	lea   rdx, [rbx+1]	; Quotient and remainder must
	shr   rdx, 1		;   each fit within srcsz/2-1
	mov   rax, rcx		; Dividend's significant bytes
	sub   rax, [@@srcsb]	; - divisor's ditto
	jnc sh @@p1		; (If divisor > dividend
	cmp   rcx, rdx		;  remainder = dividend
	jnc sh @@divz		;  which must also fit,
	sub   rax, rax		;  quotient = 0, i.e. 1 byte)
@@p1:	inc   rax		; + a possible odd BCD digit
	cmp   rax, rdx		; = max. signif. bytes in quotient
	jnc sh @@divz		; If not below limit then exit

	; ToDo: Allow division when = limit and no odd BCD
	; 	byte will appear. As the code is, a 36-digit
	;	number can't be divided by an 18-digit ditto.

; ----- Get, save, and reset operands's signs
;	rbx = srcsz-1, rdx = srcsz/2
	mov   rdi, @uiptr [dstBCD]
	add   rdi, rbx		; Point to sign byte
	sub   rax, rax		; Zero accumulator
	xchg  ah, @ES [rdi]	; Set dividend's sign = 0
	@LDS  rsi, [srcBCD]
	add   rsi, rbx		; Point to sign byte
	xchg  al, [rsi]		; Set divisor's sign = 0
	mov   [@@signs], rax	; Save original signs

; ----- 'Early out' condition?
	mov   rbx, [@@dstsb]	; Compare length of operands
	sub   rbx, [@@srcsb]
	ja sh @@loopinit
	jb sh @@p2
	mov   rcx, [@@nzbs]	; Same length, compare 1st byte
	cmp   ch, cl
	jae sh @@loopinit

; ----- Special case: divisor > dividend
;	Set remainder = dividend and quotient = zero
;	(direction flag is set)
@@p2:	mov   [rsi], al 	; Restore divisor's sign
	mov   al, ah
	stosb			; Remainder keeps dividend's sign
	@LDSEGM ds, es
	mov   rsi, rdi
	sub   rsi, rdx
	mov   rcx, rdx
	dec   rcx		; (srcsz/2 - 1)
	rep   movsb		; Transfer dividend to hi(dst)
	mov   rcx, rdx
	sub   rax, rax
	rep   stosb		; Reset quotient to zero
	jmp   @@ok		; Return 1

; ----- Special cases: divisor is zero / overflow
@@divz: sub   rax, rax		; Return 0
	jmp   @@ret


;-/////////////////////////////////////////////////////////////////
; Notes on BCD division algorithm
;
; Quotient and remainder are formed by repeated subtraction in
; each digit position. This algorithm requires two loops, the
; first (subtracting the divisor shifted left by 4 bits) creates
; the high nibble, the second (subtracting the original divisor)
; creates the low nibble.
;
; For storage efficiency, the quotient is stored in the part of
; the destination that is vacated by the diminishing remainder.
; For speed, the shifted divisor is kept in the unused part of
; source storage (alternatives: shift divisor left and right on
; each loop, or loop up to 99 times on a byte -- slow and ugly,
; but it will keep memory requirements at a minimum, i.e.
; normal-size divisor).
;
;
; Storage use:
;	Destination	Source
;	srrrrrrrrrrr	szzzzzzddddd	On entry to procedure
;	zrrrrrrrrrrr	DDDDDDzddddd	Before division
;	qqq..rrrrrrr	DDDDDDzddddd	During division
;	srrrrrsqqqqq	szzzzzzddddd	On exit from procedure
;
;	(r)emainder	(s)ign
;	(q)uotient	(z)ero
;	(d)ivisor
;	(D)ivisor shifted left by 4 bits
;
;	Note that the top bytes (signs) are used as work space.
;
;
; Division example:
;	+4+3+2+1+0 <= index into dividend (2 digits per byte)
;	0026189023 / 8038 = 3258 (rem. 1219)
;	..080380.. (x3)  (#1 hi)
;	..008038.. (x2)  (#1 lo)
;	....080380 (x5)  (#2 hi)
;	....008038 (x8)  (#2 lo)
;
;	loop #						#1  #2
;	length of dividend	@@dstsb			 4   3
;	length of divisor	@@srcsb + j		 3   3
;	subtraction index	@@dstsb - @@srcsb - i	 1   0
;	exit condition		index < 0
;
;	j = 1
;	i = 1 if ((@@dstsb > @@srcsb) and (dst's NZB < src's NZB))
;	(need i to keep within limits, and j to propagate carry,
;	e.g. when computing 100/9)
;-

; ----- Compute auxiliary variables
;	for fast access
@@loopinit:
	; ds = srcBCD segment, es = dstBCD segment
	; rbx = (@@dstsb - @@srcsb)
	cld			; Clear direction flag
	mov   rdi, @uiptr [dstBCD]
	mov   rax, [srcsz]
	add   rdi, rax
	dec   rdi		; Point to top byte of dst
	mov   [@@quotp], rdi	; = Quotient insertion point
	shr   rax, 1
	mov   rsi, @uiptr [srcBCD]
	mov   rdi, rsi
	add   rdi, rax		; Point to high half of src
	mov   [@@src2p], rdi	; = Addr of shifted divisor

; ----- Copy divisor to high half of src
;	shifting it left by one nibble
        mov   rcx, [@@srcsb]
	sub   dl, dl
@@svl:	lodsb
	@shl  rax, 4
	or    al, dl
	mov   [rdi], al
	and   ah, 0fh
	mov   dl, ah
	inc   rdi
	dec   rcx
	jnz   @@svl
	mov   [rdi], ah 	; Store odd nibble

; ----- Adjust subtraction point and
;	divisor's length if needed
	test  rbx, rbx
	jz sh @@vl1
        mov   rax, [@@nzbs]
	cmp   ah, al
	sbb   rbx, 0		; Subtraction point
@@vl1:	mov   rdx, [@@srcsb]
	inc   rdx		; Divisor's length


; ///// Top of loop ////////////
;	Usage:
;	rbx = subtraction point
;	rdx = divisor's length (constant)
;	rsi = divisor pointer
;	rdi = dividend pointer
;       ah  = quotient byte (BCD)
@@looptop:
	add   rbx, @uiptr [dstBCD] ; Base register = subtraction point
	sub   ah, ah		; Initialize quotient digit pair

; ----- Do 'high' subtraction (shifted divisor)
@@hi1:	mov   rdi, rbx		; Subtraction point
	mov   rsi, [@@src2p]	; Point to shifted divisor
	mov   rcx, rdx		; Loop count is divisor length
	cmp   al, al		; Clear carry
	@alignn
@@hi2:	mov   al, @ES [rdi]	; Get two BCD digits of remainder
	sbb   al, [rsi]		; Subtract carry and divisor digits
	das			; Decimal adjust after subtraction
	stosb			; Store byte back
	inc   rsi		; Step divisor pointer
	dec   rcx		; Loop
	jnz   @@hi2		;   until done
	jc sh @@hi3		; If result negative, loop is done
	add   ah, 10h		; Count 'high' subtractions
	jmp   @@hi1		; Do next subtraction

	; Add back since we went too far
@@hi3:	sub   rdi, rdx		; Reset
	sub   rsi, rdx		;   pointers
	mov   rcx, rdx		; cf=0
	@alignn
@@hi4:	mov   al, @ES [rdi]	; Get byte from remainder
	adc   al, [rsi]		; Add carry and byte from divisor
	daa			; Decimal adjust after addition
	stosb			; Store byte back
	inc   rsi		; Step divisor pointer
	dec   rcx		; Loop
	jnz   @@hi4		;   until done


; ----- Do 'low' subtraction (unshifted divisor)
@@lo1:	mov   rdi, rbx		; Subtraction point
	mov   rsi, @uiptr [srcBCD] ; Point to unshifted divisor
	mov   rcx, rdx		; Loop count is divisor's length
	cmp   al, al		; Clear carry
	@alignn
@@lo2:	mov   al, @ES [rdi]
	sbb   al, [rsi]
	das
	stosb
	inc   rsi
	dec   rcx
	jnz   @@lo2
	jc sh @@lo3		; If result negative, loop is done
	inc   ah		; Count no. of 'low' subtractions
	jmp   @@lo1

	; Add back since we went too far
@@lo3:	sub   rdi, rdx
	sub   rsi, rdx
	mov   rcx, rdx		; cf=0
	@alignn
@@lo4:	mov   al, @ES [rdi]
	adc   al, [rsi]
	daa
	stosb
	inc   rsi
	dec   rcx
	jnz   @@lo4

; ----- Store the digit pair just generated
;	into the quotient
	mov   rdi, [@@quotp]
	mov   @ES [rdi], ah
	dec   [@@quotp]		; Decrement quotient insertion point
	sub   rbx, @uiptr [dstBCD] ; Make index
	jz sh @@loopend 	; Time to finish when zero
	dec   rbx		; Decrement subtraction index
	jmp   @@looptop


; ///// End of loop ////////////
@@loopend: ;

; ----- Move and zero-extend
;	quotient to hi(dst)
	mov   rbx, [srcsz]
	shr   rbx, 1		; (srcsz/2)
	@LES  rdi, [dstBCD]
	add   rdi, rbx
	lea   rcx, [rdi+rbx]
	mov   rsi, [@@quotp]
	if @isDataFar
	@LDSEGM ds, es
	endif
	inc   rsi		; Point to quotient (rsi > rdi)
	sub   rcx, rsi
	mov   rdx, rcx
	rep   movsb
	mov   rcx, rdx
	neg   rcx
	add   rcx, rbx
	sub   rax, rax
	rep   stosb

; ----- Swap lo(dst) and hi(dst)
	mov   rsi, @uiptr [dstBCD]
	lea   rdi, [rsi+rbx]
	mov   rcx, rbx
	sub   rdx, rdx
	@alignn
@@swp:	mov   al, [rsi]
	mov   ah, [rdi]
	mov   [rdi], al 	; Remainder's bytes go into hi(dst)
	mov   [rsi], ah 	; Quotient's bytes go into lo(dst)
	or    rdx, rax		; OR bytes together for zero check
	inc   rsi
	inc   rdi
	dec   rcx
	jnz   @@swp

; ----- Set result signs, adjust if zero results
	xchg  dl, dh
	neg   dl
	sbb   dl, dl		; 0 if quotient zero, else 0ffh
	neg   dh
	sbb   dh, dh		; 0 if remainder zero, else 0ffh
	mov   rax, [@@signs]	; Get original signs
	xor   al, ah		; Quotient's sign is the complement
	;			; Remainder keeps dividend's sign
	and   rax, 8080h	; Isolate sign bits
	and   rax, rdx		; Remove sign if result was zero
	mov   [rsi-1], al	; Store
	mov   [rdi-1], ah	;   signs

; ----- Restore original divisor
	@LES  rdi, [srcBCD]
	mov   rcx, [srcsz]
	shr   rcx, 1
	add   rdi, rcx
	dec   rdi
	sub   al, al
	rep   stosb
	mov   al, @bptr [@@signs]
	stosb
	;
@@ok:	sub   rax, rax		; Return 1 (success)
	inc   rax
@@ret:	cld			; Clear direction flag
	RET			; Back to caller
bcdIdiv endp

	END
