include "work:prog/assembler/ASM-One/LVO3.0/graphics_lib.i"
            include "work:prog/assembler/ASM-One/include/hardware/custom.i"
            include "work:prog/assembler/ASM-One/include/rtgmaster/rtgc2p.i"
            include "work:prog/assembler/ASM-One/include/rtgmaster/rtgmaster.i"
            include "work:prog/assembler/ASM-One/include/rtgmaster/rtgsublibs.i"
            include "work:prog/assembler/ASM-One/include/rtgmaster/rtgAMI.i"
            include "work:prog/assembler/ASM-One/include/intuition/screens.i"
            include "work:prog/assembler/ASM-one/include/exec/memory.i"

; This c2p is a conversion of the c2p "Cpu3Blit1.asm" to rtgmaster format
; It uses asynchrone blitting and is 030 optimized

; An example of how to use this routine:
;
;       move.w  #320,d0
;       move.w  #256,d1
;       clr.w   d3
;       move.l  #10240,d5
;       bsr     c2p1x1_cpu3blit1_queue_init
;       ...
;main:
;       lea     chunkyscreen,a0
;       move.l  screenptr,a1
;       bsr     c2p1x1_cpu3blit1_queue
;       bsr     effect
;       btst    #6,$bfe001
;       bne.s   main
;
; The screenswapping should be done in the c2p_waitblit routine,
; which is called by the C2P rout.
;
; I hope I didn't forget anything... :)
;


    moveq #0,d0
    rts
  dc.l "c2p!"
    dc.l 0
    dc.l 1
    dc.l Structure

Structure
    dc.w CI_256+CI_64+CI_EHB
    dc.w CI_68060D+CI_68040+CI_68030D+CI_68020D
    dc.w 0
    dc.b 0
    dc.b 0
    dc.l c2p_1x1
    dc.w 1
    dc.w 1
    dc.w 0
    dc.b 'AMI '
    dc.l Name
    dc.l Init
    dc.l Expunge
    dc.l chunky2planar
    dc.l 0
    dc.l 0
    dc.l 0
    dc.b 0
    dc.b 0,0,0 ; Test
    even

Init:
    movem.l a0/a1/a2/a6/d0/d1/d2,-(sp)
    lea gfxname,a1
    moveq #0,d0
    movem.l a0/a6,-(sp)
    move.l $4,a6
    jsr -408(a6)
    movem.l (sp)+,a0/a6
    move.l d0,gfxbase
    move.l #0,rs_c2pdata(a0)
    move.l rsAMI_ScreenHandle(a0),a1
    clr.l d0
    clr.l d1
    move.w sc_Width(a1),d0
    move.w sc_Height(a1),d1
    move.l d1,d5
    mulu.l d0,d5
    lsr.l #3,d5
    clr.w d3
    jsr c2p1x1_cpu5_init
    movem.l (sp)+,a0/a1/a2/a6/d0/d1/d2
    move.l #0,d0
    rts
Error:
Name: dc.b '8 Bit 030 optimized c2p with Blitter',0
    even
gfxname: dc.b 'graphics.library',0
    even

Expunge:
    movem.l a6/d0/d1/d2/a0/a1/a2,-(sp)
    move.l gfxbase,a1
    move.l $4,a6
    jsr -414(a6)
    movem.l (sp)+,a6/d0/d1/d2/a0/a1/a2
    rts
chunky2planar:
                movem.l d0/d3/a3/d7,-(sp)
                move.l #0,d7
                cmp.l #c2p_1x1,d0
                beq .OK
                cmp.l #c2p_Best,d0
                beq .OK
                cmp.l #c2p_BestD,d0
                beq .OK
                cmp.l #c2p_Fastest,d0
                beq .OK
                cmp.l #c2p_FastestD,d0
                beq .OK
                cmp.l #c2p_Selected,d0
                beq .Selected
                cmp.l #c2p_SelectedD,d0
                beq .Selected
                move.l #c2p_warn_Wrong_Pixelmode,d7
                bra .OK
.Selected:
                move.l rs_c2pcurr(a3),d3
                cmp.l #c2p_1x1,d3
                beq .OK
                move.l #c2p_warn_Wrong_Pixelmode,d7
                bra .OK
.OK:
                cmp.l #1,.Merk
                beq c2p
                move.l #1,.Merk
                lea rsAMI_Bitmap1(a3),a3
                sub.l d3,d3
                move.b bm_Depth(a3),d3
                cmp.l #8,d3
                beq c2p
                cmp.l #6,d3
                beq c2p
                move.l a3,-(sp)
                bra .Mistake
.Merk: dc.l 0
.Merk2: dc.l 0
.Mistake:
                move.l (sp)+,a3
                move.l #1,.Merk2
                movem.l (sp)+,d0/d3/a3/a7
                move.l d7,d0
                movem.l d2/a6,-(sp)
                move.l $4,a6
                move.l #0,a1
                jsr -294(a6)
                move.l d0,a1
                movem.l (sp)+,d0/a6
                move.l a6,-(sp)
                move.l $4,a6
                move.l d0,d1
                sub.l d0,d0
                bset d1,d0
                jsr -324(a6)
                move.l (sp)+,a6
                move.l #c2p_err_internal,d0
                rts


c2p:
_chunky2planar:
                ;a0 = chunky buffer
                ;a1 = first bitplane

        movem.l d2-d7/a2-a6,-(sp)
        jsr     c2p1x1_cpu5
        movem.l (sp)+,d2-d7/a2-a6
Raus:

        move.l d7,d0
        movem.l (sp)+,d0/d3/a3/d7
        movem.l d2/a6,-(sp)
        move.l $4,a6
        move.l #0,a1
        jsr -294(a6)
        move.l d0,a1
        movem.l (sp)+,d0/a6
        move.l a6,-(sp)
        move.l $4,a6
        move.l d0,d1
        sub.l d0,d0
        bset d1,d0
        jsr -324(a6)
        move.l (sp)+,a6
        move.l #0,d0
        rts


        ;IFND    C2P_DOUBLEBUFFER
C2P_DOUBLEBUFFER EQU 1
        ;ENDC
        IFND    CHUNKYXMAX
CHUNKYXMAX EQU  320
        ENDC
        IFND    CHUNKYYMAX
CHUNKYYMAX EQU  256
        ENDC
BPLSIZE EQU 10240
        section c2p,code
; d0.w  chunkyx [chunky-pixels]
; d1.w  chunkyy [chunky-pixels]
; d2.w  (scroffsx) [screen-pixels]
; d3.w  scroffsy [screen-pixels]
; d4.w  (rowlen) [bytes] -- offset between one row and the next in a bpl
; d5.l  (bplsize) [bytes] -- offset between one row in one bpl and the next bpl

c2p1x1_cpu5_init
        movem.l d2-d3,-(sp)
        lea     c2p_datanew(pc),a0
        andi.l  #$ffff,d0
        mulu.w  d0,d3
        lsr.l   #3,d3
        move.l  d3,c2p_scroffs-c2p_data(a0)
        mulu.w  d0,d1
        move.l  d1,c2p_pixels-c2p_data(a0)
        movem.l (sp)+,d2-d3
        rts

c2p_blitcleanup
        st      c2p_blitfin-c2p_bltnode(a1)
        sf      c2p_blitactive-c2p_bltnode(a1)
        rts

c2p_waitblit
        tst.b   c2p_blitactive(pc)
        beq.s   .n
.y      tst.b   c2p_blitfin(pc)
        beq.s   .y
.n      rts

; a0    c2pscreen
; a1    bitplanes

c2p1x1_cpu5
        movem.l d2-d7/a2-a6,-(sp)

        bsr     c2p_copyinitblock

        lea     c2p_data(pc),a2

        move.l  #$33333333,d5
        move.l  #$55555555,d6
        move.l  #$00ff00ff,a6

        add.w   #BPLSIZE,a1
        add.l   c2p_scroffs-c2p_data(a2),a1

        movem.l a0-a1,-(sp)

        move.l  c2p_pixels-c2p_data(a2),a2
        add.l   a0,a2
        cmp.l   a0,a2
        beq     .none

        move.l  (a0)+,d0
        move.l  (a0)+,d2
        move.l  (a0)+,d1
        move.l  (a0)+,d3

        move.l  #$0f0f0f0f,d4           ; Merge 4x1, part 1
        and.l   d4,d0
        and.l   d4,d2
        lsl.l   #4,d0
        or.l    d2,d0

        and.l   d4,d1
        and.l   d4,d3
        lsl.l   #4,d1
        or.l    d3,d1

        move.l  d1,a3

        move.l  (a0)+,d2
        move.l  (a0)+,d1
        move.l  (a0)+,d3
        move.l  (a0)+,d7

        and.l   d4,d1                   ; Merge 4x1, part 2
        and.l   d4,d2
        lsl.l   #4,d2
        or.l    d1,d2

        and.l   d4,d3
        and.l   d4,d7
        lsl.l   #4,d3
        or.l    d7,d3

        move.l  a3,d1

        swap    d2                      ; Swap 16x2
        move.w  d0,d7
        move.w  d2,d0
        move.w  d7,d2
        swap    d2

        swap    d3
        move.w  d1,d7
        move.w  d3,d1
        move.w  d7,d3
        swap    d3

        bra.s   .start1
.x1
        move.l  (a0)+,d0
        move.l  (a0)+,d2
        move.l  (a0)+,d1
        move.l  (a0)+,d3

        move.l  d7,BPLSIZE(a1)

        move.l  #$0f0f0f0f,d4           ; Merge 4x1, part 1
        and.l   d4,d0
        and.l   d4,d2
        lsl.l   #4,d0
        or.l    d2,d0

        and.l   d4,d1
        and.l   d4,d3
        lsl.l   #4,d1
        or.l    d3,d1

        move.l  d1,a3

        move.l  (a0)+,d2
        move.l  (a0)+,d1
        move.l  (a0)+,d3
        move.l  (a0)+,d7

        move.l  a4,(a1)+

        and.l   d4,d1                   ; Merge 4x1, part 2
        and.l   d4,d2
        lsl.l   #4,d2
        or.l    d1,d2

        and.l   d4,d3
        and.l   d4,d7
        lsl.l   #4,d3
        or.l    d7,d3

        move.l  a3,d1

        swap    d2                      ; Swap 16x2
        move.w  d0,d7
        move.w  d2,d0
        move.w  d7,d2
        swap    d2

        swap    d3
        move.w  d1,d7
        move.w  d3,d1
        move.w  d7,d3
        swap    d3

        move.l  a5,-BPLSIZE-4(a1)
.start1
        move.l  a6,d4

        move.l  d2,d7                   ; Swap 2x2
        lsr.l   #2,d7
        eor.l   d0,d7
        and.l   d5,d7
        eor.l   d7,d0
        lsl.l   #2,d7
        eor.l   d7,d2

        move.l  d3,d7
        lsr.l   #2,d7
        eor.l   d1,d7
        and.l   d5,d7
        eor.l   d7,d1
        lsl.l   #2,d7
        eor.l   d7,d3

        move.l  d1,d7
        lsr.l   #8,d7
        eor.l   d0,d7
        and.l   d4,d7
        eor.l   d7,d0
        lsl.l   #8,d7
        eor.l   d7,d1

        move.l  d1,d7
        lsr.l   d7
        eor.l   d0,d7
        and.l   d6,d7
        eor.l   d7,d0
        move.l  d0,BPLSIZE*2(a1)
        add.l   d7,d7
        eor.l   d1,d7

        move.l  d3,d1
        lsr.l   #8,d1
        eor.l   d2,d1
        and.l   d4,d1
        eor.l   d1,d2
        lsl.l   #8,d1
        eor.l   d1,d3

        move.l  d3,d1
        lsr.l   d1
        eor.l   d2,d1
        and.l   d6,d1
        eor.l   d1,d2
        add.l   d1,d1
        eor.l   d1,d3

        move.l  d2,a4
        move.l  d3,a5

        cmpa.l  a0,a2
        bne     .x1

        move.l  d7,BPLSIZE(a1)
        move.l  a4,(a1)+
        move.l  a5,-BPLSIZE-4(a1)

        movem.l (sp)+,a0-a1
        add.l   #BPLSIZE*4,a1

        move.l  (a0)+,d0
        move.l  (a0)+,d2
        move.l  (a0)+,d1
        move.l  (a0)+,d3

        move.l  #$f0f0f0f0,d4           ; Merge 4x1, part 1
        and.l   d4,d0
        and.l   d4,d2
        lsr.l   #4,d2
        or.l    d2,d0

        and.l   d4,d1
        and.l   d4,d3
        lsr.l   #4,d3
        or.l    d3,d1

        move.l  d1,a3

        move.l  (a0)+,d2
        move.l  (a0)+,d1
        move.l  (a0)+,d3
        move.l  (a0)+,d7

        and.l   d4,d1                   ; Merge 4x1, part 2
        and.l   d4,d2
        lsr.l   #4,d1
        or.l    d1,d2

        and.l   d4,d3
        and.l   d4,d7
        lsr.l   #4,d7
        or.l    d7,d3

        move.l  a3,d1

        swap    d2                      ; Swap 16x2
        move.w  d0,d7
        move.w  d2,d0
        move.w  d7,d2
        swap    d2

        swap    d3
        move.w  d1,d7
        move.w  d3,d1
        move.w  d7,d3
        swap    d3

        bra.s   .start2
.x2
        move.l  (a0)+,d0
        move.l  (a0)+,d2
        move.l  (a0)+,d1
        move.l  (a0)+,d3

        move.l  d7,BPLSIZE(a1)

        move.l  #$f0f0f0f0,d4           ; Merge 4x1, part 1
        and.l   d4,d0
        and.l   d4,d2
        lsr.l   #4,d2
        or.l    d2,d0

        and.l   d4,d1
        and.l   d4,d3
        lsr.l   #4,d3
        or.l    d3,d1

        move.l  d1,a3

        move.l  (a0)+,d2
        move.l  (a0)+,d1
        move.l  (a0)+,d3
        move.l  (a0)+,d7

        move.l  a4,(a1)+

        and.l   d4,d1                   ; Merge 4x1, part 2
        and.l   d4,d2
        lsr.l   #4,d1
        or.l    d1,d2

        and.l   d4,d3
        and.l   d4,d7
        lsr.l   #4,d7
        or.l    d7,d3

        move.l  a3,d1

        swap    d2                      ; Swap 16x2
        move.w  d0,d7
        move.w  d2,d0
        move.w  d7,d2
        swap    d2

        swap    d3
        move.w  d1,d7
        move.w  d3,d1
        move.w  d7,d3
        swap    d3

        move.l  a5,-BPLSIZE-4(a1)
.start2
        move.l  a6,d4

        move.l  d2,d7                   ; Swap 2x2
        lsr.l   #2,d7
        eor.l   d0,d7
        and.l   d5,d7
        eor.l   d7,d0
        lsl.l   #2,d7
        eor.l   d7,d2

        move.l  d3,d7
        lsr.l   #2,d7
        eor.l   d1,d7
        and.l   d5,d7
        eor.l   d7,d1
        lsl.l   #2,d7
        eor.l   d7,d3

        move.l  d1,d7
        lsr.l   #8,d7
        eor.l   d0,d7
        and.l   d4,d7
        eor.l   d7,d0
        lsl.l   #8,d7
        eor.l   d7,d1

        move.l  d1,d7
        lsr.l   d7
        eor.l   d0,d7
        and.l   d6,d7
        eor.l   d7,d0
        move.l  d0,BPLSIZE*2(a1)
        add.l   d7,d7
        eor.l   d1,d7

        move.l  d3,d1
        lsr.l   #8,d1
        eor.l   d2,d1
        and.l   d4,d1
        eor.l   d1,d2
        lsl.l   #8,d1
        eor.l   d1,d3

        move.l  d3,d1
        lsr.l   d1
        eor.l   d2,d1
        and.l   d6,d1
        eor.l   d1,d2
        add.l   d1,d1
        eor.l   d1,d3

        move.l  d2,a4
        move.l  d3,a5

        cmpa.l  a0,a2
        bne     .x2

        move.l  d7,BPLSIZE(a1)
        move.l  a4,(a1)+
        move.l  a5,-BPLSIZE-4(a1)

.none
        movem.l (sp)+,d2-d7/a2-a6
        rts

c2p_copyinitblock
        movem.l a0-a1,-(sp)
        lea     c2p_datanew,a0
        lea     c2p_data,a1
        moveq   #16-1,d0
.copy   move.l  (a0)+,(a1)+
        dbf     d0,.copy
        movem.l (sp)+,a0-a1
        rts

        cnop 0,4
c2p_bltnode
        dc.l    0
c2p_bltroutptr
        dc.l    0
        dc.b    $40,0
        dc.l    0
c2p_bltroutcleanup
        dc.l    c2p_blitcleanup
c2p_blitfin dc.b 0
c2p_blitactive dc.b 0

        cnop    0,4

gfxbase dc.l 0
c2p_data
c2p_screen dc.l 0
c2p_scroffs dc.l 0
c2p_scroffs2 dc.l 0
c2p_bplsize dc.l 0
c2p_pixels dc.l 0
c2p_pixels2 dc.l 0
c2p_pixels4 dc.l 0
c2p_pixels8 dc.l 0
c2p_pixels16 dc.l 0
c2p_chunkyx16 dc.w 0
c2p_chunkyx32 dc.w 0
c2p_chunkyy dc.w 0
c2p_rowmod dc.w 0
        ds.l    16

        cnop 0,4
c2p_datanew
        ds.l    16