/* These are optimized x86 assembly versions of pfield_linetoscr.
 * Feel free to send me Sparc/PPC/Alpha versions of this... :)
 */

#ifdef __DOS__
#define SYM(NAME) _##NAME
#define _color_regs _bpl_info
#else
#define SYM(NAME) NAME
#endif

/* Urgl... */
#define color_regs bpl_info

#ifndef __DOS__
	.local	lastcolor
	.comm	lastcolor,2
	.align 16
#else
	.comm	lastcolor,2
	.align 8
#endif

.globl pfield_linetoscr_full8
pfield_linetoscr_full8:
	pushl %ebp
	pushl %edi
	pushl %esi
	pushl %ebx
	movl 20(%esp),%esi
	movl 24(%esp),%edi
	
	xorl %edx,%edx
	movl SYM(diwfirstword),%eax
	cmpl $4,%eax
	jc .Loops1
	movl %edx,SYM(pixdata)-4(%eax)
	movl %edx,SYM(spixstate)-4(%eax)
.Loops1:	
	movl SYM(diwlastword),%eax
	movl %edx,SYM(pixdata)(%eax)
	movl %edx,SYM(spixstate)(%eax)
	
	movl SYM(acolors),%edx
	movb %dl,%dh
	movw %dx,%ax
	bswapl %eax
	movw %dx,%ax
	
	movl SYM(xlinebuffer),%ecx
	leal 4(%esi),%edx
	cmpl %edx,SYM(diwfirstword)
	jle .La_start_ok
.La_start_line:
	cmpl %edi,%esi
	jge .La_start_ok
	movl %eax,(%ecx)
	addl $4,%ecx
	movl %edx,%esi

	leal 4(%esi),%edx
	cmpl %edx,SYM(diwfirstword)
	jg .La_start_line
.La_start_ok:
        movl %ecx,SYM(xlinebuffer)

        cmpl $0,SYM(bplham)
	je .La_noham
	cmpl $6,SYM(bplplanecnt)
	jne .La_noham
	
	/* HAM 6 */
	movl SYM(xlinebuffer),%ebp
	xorl %edx,%edx
.La_hamloop:
	cmpl %esi,SYM(diwlastword)
	jle .La_hamend
	cmpl %edi,%esi
	jge .La_hamend
		
	movl SYM(ham_linebuf)+12(,%esi,4),%eax
	movb SYM(xcolors)(,%eax,4),%bl
	movl SYM(ham_linebuf)+8(,%esi,4),%eax
	movb SYM(xcolors)(,%eax,4),%bh
	bswapl %ebx
	movl SYM(ham_linebuf)+4(,%esi,4),%eax
	movb SYM(xcolors)(,%eax,4),%bh
	movl SYM(ham_linebuf)(,%esi,4),%eax
	movb SYM(xcolors)(,%eax,4),%bl
	movl %ebx,(%ebp)
	addl $4,%ebp
	addl $4,%esi
	jmp .La_hamloop

.La_hamend:
	movl %ebp,SYM(xlinebuffer)
	jmp .La_finish_line
	
.La_noham:
	cmpl $0,SYM(bpldualpf)
	je .La_normal
	
	/* Dual Playfield */
	
	xorl %edx,%edx

	movl $SYM(dblpf_ind1),%ebp
	cmpl $0,SYM(bpldualpfpri)
	je .La_dp_loop
	movl $SYM(dblpf_ind2),%ebp
.La_dp_loop:
	cmpl %esi,SYM(diwlastword)
	jle .La_finish_line
	
	cmpl %edi,%esi
	jge .La_finish_line

	xorl %edx,%edx
	movl SYM(pixdata)(%esi),%eax
	movl SYM(spixstate)(%esi),%ebx
	testl %ebx,%ebx
	jne .La_dp_sprites
	
	/* no sprites */

	movb %al,%dl
	movb (%ebp,%edx,4),%dl
	movb SYM(acolors)(,%edx,4),%cl
	movb %ah,%dl
	movb (%ebp,%edx,4),%dl
	movb SYM(acolors)(,%edx,4),%ch
	bswapl %eax
	bswapl %ecx
	movb %al,%dl
	movb (%ebp,%edx,4),%dl
	movb SYM(acolors)(,%edx,4),%cl
	movb %ah,%dl
	movb (%ebp,%edx,4),%dl
	movb SYM(acolors)(,%edx,4),%ch
	bswapl %ecx
	
	movl SYM(xlinebuffer),%eax
	movl %ecx,(%eax)
	addl $4,SYM(xlinebuffer)
	addl $4,%esi
	jmp .La_dp_loop
	
.La_dp_sprites:

        /* dual playfield with sprites */

	movb %al,%dl
	testb %bl,%bl
	je .La3
	movb SYM(acolors)(,%edx,4),%cl
	jmp .La1
.La3:
	movb (%ebp,%edx,4),%dl
	movb SYM(acolors)(,%edx,4),%cl
.La1:

	movb %ah,%dl
	testb %bh,%bh
	je .La6
	movb SYM(acolors)(,%edx,4),%ch
	jmp .La4
.La6:
	movb (%ebp,%edx,4),%dl
	movb SYM(acolors)(,%edx,4),%ch
.La4:
	bswapl %ecx	
	bswapl %eax
	bswapl %ebx
	
	movb %al,%dl
	testb %bl,%bl
	je .La9
	movb SYM(acolors)(,%edx,4),%cl
	jmp .La7
.La9:
	movb (%ebp,%edx,4),%dl
	movb SYM(acolors)(,%edx,4),%cl
.La7:

	movb %ah,%dl
	testb %bh,%bh
	je .La12
	movb SYM(acolors)(,%edx,4),%ch
	jmp .La10
.La12:
	movb (%ebp,%edx,4),%dl
	movb SYM(acolors)(,%edx,4),%ch
.La10:
	addl $4,%esi
	
	bswapl %ecx
	movl SYM(xlinebuffer),%eax
	movl %ecx,(%eax)
	addl $4,SYM(xlinebuffer)
	
	jmp .La_dp_loop
	
	/* normal case */	
.La_normal:
	xorl %edx,%edx
	movl SYM(xlinebuffer),%ebx
.La_normal_loop:
	cmpl %esi,SYM(diwlastword)
	jle .La_normal_end
	cmpl %edi,%esi
	jge .La_normal_end
	
	movl SYM(pixdata)(%esi),%eax
	
	movb %al,%dl
	movb SYM(acolors)(,%edx,4),%cl
	movb %ah,%dl
	movb SYM(acolors)(,%edx,4),%ch
	bswapl %eax
	bswapl %ecx
	movb %al,%dl
	movb SYM(acolors)(,%edx,4),%cl
	movb %ah,%dl
	movb SYM(acolors)(,%edx,4),%ch
	bswapl %ecx
	
	movl %ecx,(%ebx)
	addl $4,%ebx
	addl $4,%esi
	jmp .La_normal_loop

.La_normal_end:
	movl %ebx,SYM(xlinebuffer)

.La_finish_line:
	/* Finish off the line */
	
	movl SYM(acolors),%edx
	movb %dl,%dh
	movw %dx,%ax
	bswapl %eax
	movw %dx,%ax
	
	movl SYM(xlinebuffer),%ecx
.La_fin_loop:
	cmpl %edi,%esi
	jge .La_fin_end
	
	movl %eax,(%ecx)
	addl $4,%ecx
	addl $4,%esi
	jmp  .La_fin_loop

.La_fin_end:
        movl %ecx,SYM(xlinebuffer)
	
	popl %ebx
	popl %esi
	popl %edi
	popl %ebp
	ret

#ifndef __DOS__
	.align 16
#else
	.align 8
#endif
.globl pfield_linetoscr_full16
pfield_linetoscr_full16:
	pushl %ebp
	pushl %edi
	pushl %esi
	pushl %ebx
	
	movl 20(%esp),%esi
	movl 24(%esp),%edi
	
	xorl %edx,%edx
	movl SYM(diwfirstword),%eax
	cmpl $4,%eax
	jc .Lboops1
	movl %edx,SYM(pixdata)-4(%eax)
	movl %edx,SYM(spixstate)-4(%eax)
.Lboops1:	
	movl SYM(diwlastword),%eax
	movl %edx,SYM(pixdata)(%eax)
	movl %edx,SYM(spixstate)(%eax)
	
	movl SYM(acolors),%edx
	movw %dx,%ax
	sall $16,%eax
	movw %dx,%ax
	
	movl SYM(xlinebuffer),%ecx
	leal 4(%esi),%edx
	cmpl %edx,SYM(diwfirstword)
	jle .Lb_start_ok
.Lb_start_line:
	cmpl %edi,%esi
	jge .Lb_start_ok
	movl %eax,(%ecx)
	movl %eax,4(%ecx)
	addl $8,%ecx
	movl %edx,%esi

	leal 4(%esi),%edx
	cmpl %edx,SYM(diwfirstword)
	jg .Lb_start_line
.Lb_start_ok:
        movl %ecx,SYM(xlinebuffer)

        cmpl $0,SYM(bplham)
	je .Lb_noham
	cmpl $6,SYM(bplplanecnt)
	jne .Lb_noham
	
	/* HAM 6 */
	movl SYM(xlinebuffer),%ebp
	xorl %edx,%edx
.Lb_hamloop:
	cmpl %esi,SYM(diwlastword)
	jle .Lb_hamend
	cmpl %edi,%esi
	jge .Lb_hamend

	movl SYM(ham_linebuf)+4(,%esi,4),%eax
	movw SYM(xcolors)(,%eax,4),%bx
	sall $16,%ebx
	movl SYM(ham_linebuf)(,%esi,4),%eax
	movw SYM(xcolors)(,%eax,4),%bx
	movl %ebx,(%ebp)

	movl SYM(ham_linebuf)+12(,%esi,4),%eax
	movw SYM(xcolors)(,%eax,4),%bx
	sall $16,%ebx
	movl SYM(ham_linebuf)+8(,%esi,4),%eax
	movw SYM(xcolors)(,%eax,4),%bx
	movl %ebx,4(%ebp)
	addl $8,%ebp
	addl $4,%esi
	jmp .Lb_hamloop

.Lb_hamend:
	movl %ebp,SYM(xlinebuffer)
	jmp .Lb_finish_line
	
.Lb_noham:
	cmpl $0,SYM(bpldualpf)
	je .Lb_normal
	
	/* Dual Playfield */
	
	xorl %edx,%edx
	movl $SYM(dblpf_ind1),%ebp
	cmpl $0,SYM(bpldualpfpri)
	je .Lb_dp_loop
	movl $SYM(dblpf_ind2),%ebp
.Lb_dp_loop:
	cmpl %esi,SYM(diwlastword)
	jle .Lb_finish_line
	
	cmpl %edi,%esi
	jge .Lb_finish_line

	xorl %edx,%edx
	movl SYM(pixdata)(%esi),%eax
	movl SYM(spixstate)(%esi),%ebx
	testl %ebx,%ebx
	jne .Lb_dp_sprites
	
	/* no sprites */

        movl SYM(xlinebuffer),%ebx

	movb %ah,%dl
	movb (%ebp,%edx,4),%dl
	movw SYM(acolors)(,%edx,4),%cx
	sall $16,%ecx
	movb %al,%dl
	movb (%ebp,%edx,4),%dl
	movw SYM(acolors)(,%edx,4),%cx
	
	movl %ecx,(%ebx)

	bswapl %eax
	
	movb %al,%dl
	movb (%ebp,%edx,4),%dl
	movw SYM(acolors)(,%edx,4),%cx
	sall $16,%ecx
	movb %ah,%dl
	movb (%ebp,%edx,4),%dl
	movw SYM(acolors)(,%edx,4),%cx
	
	movl %ecx,4(%ebx)
	addl $8,SYM(xlinebuffer)
	addl $4,%esi
	jmp .Lb_dp_loop
	
.Lb_dp_sprites:

        /* dual playfield with sprites */

	movb %ah,%dl
	testb %bh,%bh
	je .Lb3
	movw SYM(acolors)(,%edx,4),%cx
	jmp .Lb1
.Lb3:
	movb (%ebp,%edx,4),%dl
	movw SYM(acolors)(,%edx,4),%cx
.Lb1:
        sall $16,%ecx

	movb %al,%dl
	testb %bl,%bl
	je .Lb6
	movw SYM(acolors)(,%edx,4),%cx
	jmp .Lb4
.Lb6:
	movb (%ebp,%edx,4),%dl
	mov SYM(acolors)(,%edx,4),%cx
.Lb4:
	movl SYM(xlinebuffer),%edx
	movl %ecx,(%edx)
	xorl %edx,%edx

	bswapl %eax
	bswapl %ebx
	
	movb %al,%dl
	testb %bl,%bl
	je .Lb9
	movw SYM(acolors)(,%edx,4),%cx
	jmp .Lb7
.Lb9:
	movb (%ebp,%edx,4),%dl
	movw SYM(acolors)(,%edx,4),%cx
.Lb7:
        sall $16,%ecx

	movb %ah,%dl
	testb %bh,%bh
	je .Lb12
	movw SYM(acolors)(,%edx,4),%cx
	jmp .Lb10
.Lb12:
	movb (%ebp,%edx,4),%dl
	movw SYM(acolors)(,%edx,4),%cx
.Lb10:
	addl $4,%esi
	
	movl SYM(xlinebuffer),%eax
	movl %ecx,4(%eax)
	addl $8,SYM(xlinebuffer)
	
	jmp .Lb_dp_loop
	
	/* normal case */	
.Lb_normal:
	xorl %edx,%edx
	movl SYM(xlinebuffer),%ebx
.Lb_normal_loop:
	cmpl %esi,SYM(diwlastword)
	jle .Lb_normal_end
	cmpl %edi,%esi
	jge .Lb_normal_end
	
	movl SYM(pixdata)(%esi),%eax
	
	movb %ah,%dl
	movw SYM(acolors)(,%edx,4),%cx
	sall $16,%ecx
	movb %al,%dl
	movw SYM(acolors)(,%edx,4),%cx
	movl %ecx,(%ebx)
	bswapl %eax
	movb %al,%dl
	movw SYM(acolors)(,%edx,4),%cx
	sall $16,%ecx
	movb %ah,%dl
	movw SYM(acolors)(,%edx,4),%cx
	movl %ecx,4(%ebx)
	
	addl $8,%ebx
	addl $4,%esi
	jmp .Lb_normal_loop

.Lb_normal_end:
	movl %ebx,SYM(xlinebuffer)

.Lb_finish_line:
	/* Finish off the line */
	
	movw SYM(acolors),%edx
	movw %dx,%ax
	sall $16,%eax
	movw %dx,%ax
	
	movl SYM(xlinebuffer),%ecx
.Lb_fin_loop:
	cmpl %edi,%esi
	jge .Lb_fin_end
	
	movl %eax,(%ecx)
	movl %eax,4(%ecx)
	addl $8,%ecx
	addl $4,%esi
	jmp  .Lb_fin_loop

.Lb_fin_end:
        movl %ecx,SYM(xlinebuffer)
	
	popl %ebx
	popl %esi
	popl %edi
	popl %ebp
	ret

        .ident	"Crux CC 3.14.15"

#ifndef __DOS__
	.align 16
.globl DitherLine
	.type	 DitherLine,@function
#else
	.align 8
.globl DitherLine
#endif
DitherLine:
	pushl %ebp
	pushl %edi
	pushl %esi
	pushl %ebx
	movl 20(%esp),%edi
	xorl %ebx,%ebx
	movw 36(%esp),%bx
	movl 32(%esp),%edx
	andl $3,%edx
	sall $15,%edx
	movl 28(%esp),%eax
	andl $3,%eax
	sall $12,%eax
	leal SYM(cidx)(%edx,%eax),%ebp
	xorb %dl,%dl
	movl $8,%ecx
	testl %ebx,%ebx
	je .Li_end
	cmpl $8,40(%esp)
	je .Li_fast

	movl 24(%esp),%esi
.Li_loop:
	movzwl (%esi),%eax
	movzbl (%eax,%ebp),%eax
	subl 40(%esp),%ecx
	sall %cl,%eax
	orb %al,%dl
	testl %ecx,%ecx
	jne .Li_1
	movb %dl,(%edi)
	incl %edi
	movl $8,%ecx
	xorb %dl,%dl
.Li_1:
	movzwl 2(%esi),%eax
	movzbl 4096(%ebp,%eax),%eax
	subl 40(%esp),%ecx
	sall %cl,%eax
	orb %al,%dl
	testl %ecx,%ecx
	jne .Li_2
	movb %dl,(%edi)
	incl %edi
	movl $8,%ecx
	xorb %dl,%dl
.Li_2:
	movzwl 4(%esi),%eax
	movzbl 8192(%ebp,%eax),%eax
	subl 40(%esp),%ecx
	sall %cl,%eax
	orb %al,%dl
	testl %ecx,%ecx
	jne .Li_3
	movb %dl,(%edi)
	incl %edi
	movl $8,%ecx
	xorb %dl,%dl
.Li_3:
	movzwl 6(%esi),%eax
	movzbl 12288(%ebp,%eax),%eax
	addl $8,%esi
	subl 40(%esp),%ecx
	sall %cl,%eax
	orb %al,%dl
	testl %ecx,%ecx
	jne .Li_4
	movb %dl,(%edi)
	incl %edi
	movl $8,%ecx
	xorb %dl,%dl
.Li_4:
	subl $4,%ebx
	jne .Li_loop
	jmp .Li_end
	
	/* Fast 8-bit version */
.Li_fast:
	movl 24(%esp),%esi
	xorl %edx,%edx
.Li_fast_loop:
	movl (%esi),%ecx
	movw %cx,%dx
	movb (%edx,%ebp),%al
	
	sarl $16,%ecx
	movw %cx,%dx
	movb 4096(%ebp,%edx),%ah
	bswapl %eax
	
	movl 4(%esi),%ecx
	movw %cx,%dx
	movb 8192(%ebp,%edx),%ah
	
	sarl $16,%ecx
	movw %cx,%dx
	movb 12288(%ebp,%edx),%al
	
	bswapl %eax
	movl %eax,(%edi)
	addl $4,%edi
	addl $8,%esi
	
	subl $4,%ebx
	jne .Li_fast_loop

.Li_end:
	popl %ebx
	popl %esi
	popl %edi
	popl %ebp
	ret
