#APP

| void bcopy(void *src, void *dst, long cnt)
|	handle overlap (both ways), odd/even alignment etc
|	++jrb	bammi@dsrgsun.ces.cwru.edu
| 
	.text
	.even
	.globl _bcopy
_bcopy:
	movl	sp@(4),a0	| src -> a0
	movl	sp@(8),a1	| dst -> a1
	movl	sp@(12),d0	| cnt -> d0
	jle	return		| cnt <= 0
	movl	d2,sp@-		| save d2

	| check overlap
	movl	a0,d1		| (abs(src - dst)) < cnt =>  overlap
	subl	a1,d1
	jge	L1
	negl	d1
L1:	cmpl	d1,d0
	jgt	overlap

	| check for odd src or dst
	movw	a0,d1
	movw	a1,d2
	eorb	d1,d2
	btst	#0,d2
	jne	oddeven
	btst	#0,d1
	jeq	eveneven
	movb	a0@+,a1@+	| odd  odd
	subql	#1,d0		| now even even 

eveneven:			| may want long alignment for 020/030 etc
	movl	d0,d1
	andl	#0x1c,d1	| 4 bytes/copy  32 bytes max/iter
	lsrl	#1,d1		| calc index into loop (each movl == 2bytes)
	negl	d1		|  
	addl	#18,d1		| 16 + 2 bytes for jmp ext word - d1 == index
	jmp	pc@(0,d1)	|  dive into loop at appro spot
loop1:
	movl	a0@+,a1@+
	movl	a0@+,a1@+
	movl	a0@+,a1@+
	movl	a0@+,a1@+

	movl	a0@+,a1@+
	movl	a0@+,a1@+
	movl	a0@+,a1@+
	movl	a0@+,a1@+

	subl	#32,d0
	jge	loop1

	btst	#1,d0
	jeq	L4
	movw	a0@+,a1@+	| residual word
L4:	btst	#0,d0
	jeq	ret
	movb	a0@,a1@		| residual byte

ret:	movl	sp@+,d2
return:	rts

oddeven:
upcopy:				| byte-by-byte forward
	subql	#1,d0
	jlt	ret
	movb	a0@+,a1@+
	jra	upcopy

overlap:
	cmpl	a0,a1
	jmi	upcopy		| (src > dst) go do byte/byte forward copy
	addl	d0,a0		| otherwise backwards copy
	addl	d0,a1		| note we use predec so 1 is not sub from addr

downcopy:			| byte-by-byte backward
	subql	#1,d0
	jlt	ret
	movb	a0@-,a1@-
	jra	downcopy
