;---------------------------------------------------------------------
;
;
;
; super awsome fast cpu+blitter poly fill routine
; only works for bitplanes of interleave format , 1bit per pixel
; gets the 3 slopes of the three sides
; then draws a horizontal fill for each scan line between two
; imaginary points!!  the HorizontalFill() is the crucial one to
; be modified/optimized for different speeds/gfx_formats/cpu's
;
;--------------------------------------------------------------------


;		serase modulo,address,size
serase:		macro
		move	#$8440,$96(a6)
		move	#$0900,$40(a6)	;bltcon0
		clr	$42(a6)		;bltcon1
		clr.l	$44(a6)	;mask read all
		clr	$64(a6)
		move	#\1,$66(a6)	;destination	mod
		move.l	#\2,$54(a6)	;Destination	D
		move	#\3,$58(a6)
		move	#$0400,$96(a6)
		endm


;-------------------------------------------------------------------
; start of the actual program!!!
;
;
	section	drawit,code

	movem.l	d0-d7/a0-a6,-(sp)
	lea	copper,a0
	lea	$dff000,a6
	move.l	a0,$84(a6)
	move	#$4000,$9a(a6)
	clr	$8a(a6)

	moveq	#4-1,d7
	move.l	#oddframe,d0
	lea	bit1,a0
mkbit	move	d0,6(a0)
	swap	d0
	move	d0,2(a0)
	swap	d0
	add	#40,d0
	lea	8(a0),a0
	dbf	d7,mkbit


	bsr	initvect

vb	cmp.b	#$40,$dff006
	bne.s	vb

	lea	$dff000,a6
	move	#$f00,$180(a6)
	btst	#10,$16(a6)
	beq.s	noc
	jsr	clear
noc	move	#$00f,$180(A6)
	bsr	drawpoly
	move	#$0f0,$180(a6)
	move	#$000,$180(a6)

	moveq	#0,d0
	moveq	#0,d1
	move	$a(a6),d0
	move	d0,d1
	and	#$ff,d0
	lsl	#1,d0
	sub.l	#127,d0
	lsr	#8,d1
	movem.l	d0/d1,temp

	btst	#6,$bfe001
	bne.s	vb


	move	#$c000,$9a(a6)
	movem.l	(sp)+,d0-d7/a0-a6
	rts


;-------------- test polygon data 2d, after 3d->2d conversions
;-------------- (definatly will be expanded)
;
testpoly	dc.w	$96<<2
		dc.l	100,50		;1st xy..
		dc.l	150,75		;2nd xy..
temp		dc.l	50,100		;3rd xy..


findinx		macro			;findinx x1,y1,x2,y2,outinx
		move.l	\3,d6
		move.l	\4,d7
		sub.l	\1,d6  ;dx
		sub.l	\2,d7  ;dy
		tst.l	d7
		beq.s	.zf
			divs	d7,d6
		addq	#1,d6		;inc dinx to make it work 100%
		ext.l	d6
.zf		move.l	d6,\5
		endm			;\3 = X increment for each Y line


;--------------
; each register contains both X and Y values in each of the 16 bit halfs
; this speeds up XY swaps and uses less registers!!
;
drawpoly	lea	testpoly(pc),a0
		lea	polyLEtab,a5		;choose cross hatch
		move	(a0)+,d7		;read color
		lea	$dff000,a6		;custom chip pointer
		mulu	#40,d7			;multiply to get offset
		lea	Landtabs(pc),a4		;pntr -> and tables
		lea	(a5,d7.l),a5		;source crosshatch pixels

		movem.l	(a0),d0-d5	;get coord1-coord3
fsort		cmp.l	d1,d5		;sort the xy cords to their y values!
		bgt.s	.low1
			exg.l	d1,d5
			exg.l	d0,d4
.low1		cmp.l	d1,d3
		bgt.s	.low2
			exg.l	d1,d3
			exg.l	d0,d2
.low2		cmp.l	d3,d5
		bgt.s	.low3
			exg.l	d2,d4
			exg.l	d3,d5
.low3

		asl.l	#7,d0			;scale X0 X1 X2
		asl.l	#7,d2
		asl.l	#7,d4

zf0a		findinx	d0,d1,d2,d3,a0		;point 1->2  A0...dinx
zf1a		findinx	d0,d1,d4,d5,a1		;point 1->3  A1... "
zf2a		findinx	d2,d3,d4,d5,a2		;point 2->3  A2... "

		lea	lastYpos(pc),a3
		move	d5,(a3)			;LAST Y-pos
		move.l	d6,2(a3)		;store to middleslope

		move.l	d1,d2			;INC Y
		mulu	#40*4,d1		;Y*40
		lea	oddframe,a3
		lea	(a3,d1.w),a3		;get start address:start PLANE

		move.l	a0,d5
		move.l	a1,d6
		move.l	d0,d1			;xfill D0---->D1

		move.l	#$07ca0000,$40(a6)
		move	#-1,$74(a6)
 move #$08f,$180(a6)
		sf	polflag			;set the direction
		cmp.l	d5,d6
		bge.s	polfillA
			exg.l	d5,d6
			st	polflag

polfillA	bsr	polfillB
		move	lastYpos(pc),d3
		cmp	d3,d2			;if (yinc == ylast) stoppoly
		beq	polystop
			tst	polflag
			beq.s	slopeA
				move.l	middleslope(pc),d6
				bra.s	polfillB
slopeA			move.l	middleslope(pc),d5

polfillB	bsr	fillline		;drawline from X1-->X2
		add.l	d5,d0			;slope the sides
		add.l	d6,d1
		lea	40*4(a3),a3
		addq	#1,d2			;yinc += 1
		cmp	d3,d2			;if (yinc > halfy) x1inc = x3inc
		blt	polfillB
polystop	rts


polflag		dc.w	0	;flag (wether turning point is left/
lastYpos	dc.w	0	;last Ypos			;right)
middleslope	dc.l	0	;3rd slope #


;----------------------------------------------------
; fill a line of pointer A3 ,coords D0-->D1
;         bitpattern  -: A5
;         mask tables -: A4
fillline	movem.l	d0-d1,-(sp)
		movem	d2/d3,-(sp)

		asr.l	#7,d0			;scale X1 X1 down to x/128 (x>>7)
		asr.l	#7,d1


		tst.l	d0			;clipping
		bge.s	.x1ispos
			moveq	#0,d0
.x1ispos	tst.l	d1
		bge.s	.x2ispos
			moveq	#0,d1
.x2ispos	move.l	#320-1,d2
		cmp.l	d2,d0
		ble.s	.x1isin
			move.l	d2,d0
.x1isin		cmp.l	d2,d1
		ble.s	.x2isin
			move.l	d2,d1
.x2isin		cmp.l	d0,d1
		ble.s	fillend


		move	d0,d3
		move	d1,d2			;number of words to fill
		lsr	#4,d2
		lsr	#4,d3
		sub	d3,d2
		addq	#1,d2
 movem.l	d0-7,$200.w

		add	d3,d3
		lea	(a3,d3.w),a2		;get screen start address

		moveq	#%1111,d3
		and	d3,d0			;get 1st nibble
		and	d3,d1			;get 2nd nibble
		add	d0,d0
		add	d1,d1
		move	00(a4,d0.w),d0		;get right mask value
		move	32(a4,d1.w),d1		;get left mask value
		move	d2,d3
		add	d3,d3
		sub	#40,d3
		neg	d3			;modulo = -((xwordsize*2)-40)
		and	#%11111,d2
		or	#4<<6,d2		;blit size!! (1line only)


		btst	#$e,$2(a6)
.wait		btst	#$e,$2(a6)		;wait till blitter finished!
		bne.s	.wait

		movem	d0/d1,$44(a6)
		move	d3,$60(a6)
		move	d3,$62(a6)
		move	d3,$66(a6)
		move.l	a2,$54(a6)		;dest
		movem.l	a2/a5,$48(a6)		;dest,source
		move	d2,$58(a6)		;go blitter!!

fillend		movem	(sp)+,d2/d3
		movem.l	(sp)+,d0-d1
		rts


Landtabs	dc.w	%1111111111111111
		dc.w	%0111111111111111
		dc.w	%0011111111111111
		dc.w	%0001111111111111
		dc.w	%0000111111111111
		dc.w	%0000011111111111
		dc.w	%0000001111111111
		dc.w	%0000000111111111
		dc.w	%0000000011111111
		dc.w	%0000000001111111
		dc.w	%0000000000111111
		dc.w	%0000000000011111
		dc.w	%0000000000001111
		dc.w	%0000000000000111
		dc.w	%0000000000000011
		dc.w	%0000000000000001

		dc.w	%1000000000000000
		dc.w	%1100000000000000
		dc.w	%1110000000000000
		dc.w	%1111000000000000
		dc.w	%1111100000000000
		dc.w	%1111110000000000
		dc.w	%1111111000000000
		dc.w	%1111111100000000
		dc.w	%1111111110000000
		dc.w	%1111111111000000
		dc.w	%1111111111100000
		dc.w	%1111111111110000
		dc.w	%1111111111111000
		dc.w	%1111111111111100
		dc.w	%1111111111111110
		dc.w	%1111111111111111


;-------------- overall init routine wich calls all othe stuff
initvect	lea	polyLEtab,a0
		move	#$aaaa,d0		;-- make left even poly
		move	#$5555,d1
		move	#0,rotflag		;lsr
		bsr	makepolytab

		lea	polyLOtab,a0
		move	#$aaaa,d1		;-- make left even poly
		move	#$5555,d0
		move	#1,rotflag		;lsr
		bsr	makepolytab
		rts

;------------------------------------------------
; makes a 8bit color table out out a 4bit color palette    (256)
; ie.  using crosshatching
; could be done using real8bit color to make 16bit palette (65535)
;-----------
makepolytab	moveq	#0,d7			;# of combos (shifts)
		move	#$ffff,d2		;overall mask
.nextword	move	#256-1,d6		;# of colors total odd+even

		moveq	#4-1,d5
.makeeven	btst	d5,d6			;2nd nibble odd bits
		beq.w	.t1
			bsr	do1plane
.t1		dbf	d5,.makeeven
		moveq	#8-1,d5
.makeodd	btst	d5,d6			;2nd nibble odd bits
		beq.w	.t2
			bsr	do2plane
.t2		subq	#1,d5
		cmp.b	#4,d5
		bge.s	.makeodd

		lea	4*40(a0),a0
		dbf	d6,.makeeven	;next color

		and	d2,d0
		and	d2,d1
		tst	rotflag
		beq.s	.rotl
			lsr	#1,d2
			bra.s	.notrot
.rotl			lsl	#1,d2
.notrot		dbf	d7,.nextword
		rts

do1plane	move	d5,d4		;copy idx
		and	#%011,d4	;invert it
		mulu	#40,d4		;*40
		moveq	#20-1,d3
.doplane1		move	d0,(a0,d4.w)	;store bits to ram!
			addq	#2,d4
			dbf	d3,.doplane1
			rts

do2plane	move	d5,d4		;copy idx
		and	#%011,d4	;invert it
		mulu	#40,d4		;*40
		moveq	#20-1,d3
.doplane2		or	d1,(a0,d4.w)	;store bits to ram!
			addq	#2,d4
			dbf	d3,.doplane2
			rts


rotflag		dc.w	0


; tables of side of polygons in the following format

xw = 40
yl = 200
bp = 4


		section	cop,data_C
copper	dc.l $008e3960,$009001d8,$00920038,$009400d0
	dc.l $01004304,$01020000,$01040000
	dc.l $01080078,$010a0078

spr1	dc.l $012003f0,$012203f0

col1	dc.l $01800000,$01820fff,$01840eee,$01860ddd
	dc.l $01880ccc,$018a0bbb,$018c0aaa,$018e0999
	dc.l $01900888,$01920777,$01940666,$01960555
	dc.l $01980444,$019a0333,$019c0222,$019e0111

bit1	dc.l $00e00001,$00e20000,$00e40001,$00e60000
	dc.l $00e80001,$00ea0000,$00ec0001,$00ee0000
	dc.l $00f00001,$00f20000
	dc.l $ffdffffe,-2


		section	xx,code_C
clear
		serase	0,oddframe,((200*4*64)+20)
		rts


		section	scr,bss_C
oddframe	ds.b	xw*bp*yl
evenframe	ds.b	xw*bp*yl
polyLEtab	ds.w	256*4*20		;left even lines to use
polyLOtab	ds.w	256*4*20		;left even lines to use