;
; Some practical assembler utility routines. See sutils.h for corresponding C prototypes
; They correspond to functions defined in util.c
;
; Michael Rausch  14-2-94  1:13:04
;

	SECTION	text,CODE

ER	EQU	-1

END_OF_BLOCK EQU 62
ESCAPE	EQU	61

RUN_MASK EQU	$fc00
RUN_SHIFT EQU	10

LEVEL_MASK EQU	$03f0
LEVEL_SHIFT EQU	4

NUM_MASK EQU	$000f


	XREF	@correct_underflow
	XREF	_bitBuffer
	XREF	_bufLength
	XREF	_bitBuffer
	XREF	_bitOffset
;	XREF	_bitCount	; ANALYSIS
;	XREF	_mbCoeffPtr	; ANALYSIS

	XREF	_dct_coeff_first
	XREF	_dct_coeff_next
	XREF	_dct_coeff_tbl_0
	XREF	_dct_coeff_tbl_1
	XREF	_dct_coeff_tbl_2
	XREF	_dct_coeff_tbl_3

	XREF	_zigzag_direct


; ***************************************************************

; Fills in the data if a new quantization matrix is encountered in the stream.
; In addition, it creates the qscale multiplication tables.

	XDEF	@new_matrix
@new_matrix:
	movem.l	d2-d4/a2-a3,-(sp)
	move.l	a0,a2

	moveq	#16,d1
	cmp.l	_bufLength(a4),d1
	ble.s	nm_nc
	bsr	@correct_underflow
nm_nc:

	move.l	a2,a1

	move.l	_bitOffset(a4),d1
	move.l	_bitBuffer(a4),a0

	moveq	#8,d3
	moveq	#32,d2

	moveq	#63,d4
get64bytesfromstream:

	bfextu	(a0){d1:d3},d0
	move.w	d0,(a1)+		; was indexed via zigzag_direct, but changed in utilisation as well

	add.l	d3,d1
	cmp.l	d2,d1
	blt.s	nmxw
	sub.l	d2,d1
	addq.l	#4,a0
	subq.l	#1,_bufLength(a4)
nmxw:
	dbra	d4,get64bytesfromstream

;	add.l	#64*8,_bitCount(a4)	; ANALYSIS
	move.l	a0,_bitBuffer(a4)
	move.l	d1,_bitOffset(a4)


	move.l	a2,a3			; last scale
	moveq	#29,d0
allqscales:
	move.l	a2,a0			; first scale
	moveq	#15,d1
qscaletab:
	rept 2
	move.l	(a0)+,d2		; get first qscale
	add.l	(a3)+,d2		; add last qscale
	move.l	d2,(a1)+		; is new qscale	
	endr
	dbra	d1,qscaletab
	dbra	d0,allqscales


	movem.l	(sp)+,d2-d4/a2-a3
	rts


; ***************************************************************

process_intra MACRO (label, sparereg)

	bfextu	(a1){d7:d6},d0

	move.l	d0,d1
	rol.l	#8,d1			; only .b-tests are valid!

	cmp.b	#3,d1
	bls	.adcd_others

	and.w	#$ff,d1
	moveq	#0,d5
	move.w	(a2,d1.w*2),d5		; value

	move.l	d5,d3
	lsr.w	#RUN_SHIFT-8,d3		; run
	lsr.w	#8,d3
	cmp.w	#END_OF_BLOCK,d3
	beq	\1			; reci_ready
	cmp.w	#ESCAPE,d3
	beq.s	.aescape

.astd_handle:

	move.w	d5,d4			; flushed
	and.w	#NUM_MASK,d4
	addq.w	#2,d4

	and.w	#LEVEL_MASK,d5
	lsr.w	#LEVEL_SHIFT,d5		; level

	moveq.l	#32,d1
	sub.w	d4,d1
	btst	d1,d0
	beq	.aoutblockescape
	neg.l	d5
	bra	.aoutblockescape		; fertig
.aescape:

	move.w	d5,d4			; flushed
	and.w	#NUM_MASK,d4
	add.w	#1+22,d4

	moveq	#18+22-8,d1
	sub.w	d4,d1
	move.l	d0,d3
	lsr.l	d1,d3			; temp

	move.w	d3,d5			; ?!??
	lsr.l	#8,d3

	and.w	#$3fff,d3		; run/level

	tst.b	d3
	beq.s	.aoutblockescape3

	cmp.b	#128,d3
	beq.s	.ais128
	move.b	d3,d5
	subq.w	#8,d4
	bra.s	.aoutblockescape3

.ais128:
	sub.w	#256,d5
	ext.l	d5
	bra.s	.aoutblockescape2

.adcd_others:
	beq.s	.adc3			; cmp #3 from above
	tst.b	d1
	bne.s	.adc1
	moveq	#16,d1
	move.l	#255,d3
	lea	_dct_coeff_tbl_0(a4),\2
	bra.s	.adcdo_gotval
.adc1:	subq.b	#1,d1
	bne.s	.adc2
	moveq	#20,d1
	moveq	#15,d3
	lea	_dct_coeff_tbl_1(a4),\2
	bra.s	.adcdo_gotval
.adc2:	lea	_dct_coeff_tbl_2(a4),\2
	bra.s	.adc32
.adc3:	lea	_dct_coeff_tbl_3(a4),\2
.adc32:	moveq	#22,d1
	moveq	#3,d3

.adcdo_gotval:
	move.l	d0,d5
	lsr.l	d1,d5
	and.l	d3,d5
	move.w	(\2,d5.w*2),d5	; value

	move.l	d5,d3
	lsr.w	#RUN_SHIFT-8,d3	; run
	lsr.w	#8,d3

	bra	.astd_handle


.aoutblockescape3:
	extb.l	d5
.aoutblockescape2:
	lsr.w	#8,d3
.aoutblockescape:

;	ext.l	d4
;	add.l	d4,_bitCount(a4)	; ANALYSIS
	add.w	d4,d7
	cmp.l	d6,d7
	blt.s	.adcds	;endofblock
	sub.l	d6,d7
	addq.l	#4,a1
	subq.l	#1,_bufLength(a4)
.adcds:			;endofblock:
	endm

; *******************


	XDEF	_recon_nonintra
_recon_nonintra:
ri_regs REG	d2-d7/a2-a3/a5-a6
	movem.l	ri_regs,-(sp)

	swap	d0
	move.l	d0,-(sp)			; qscale
	move.l	a1,a6

	lea	_zigzag_direct(a4),a3
	lsl.l	#2,d1				; i, index to zigzag
	add.l	d1,a3

	lea	_dct_coeff_next(a4),a2

	move.l	_bitBuffer(a4),a1
	move.l	_bitOffset(a4),d7
	moveq	#32,d6

rneci_loop1:
	process_intra	rneci_ready,a0

	addq.l	#1,d3
;	add.l	d3,d3
;	add.l	d3,d3
	lsl.l	#2,d3
	add.l	d3,a3				; i+= run+1  (implicit zigzag)
	move.l	(a3),d1				; pos
	muls.w	(sp),d5				; qscale
	asr.w	#3,d5
	move.w	d5,(a6,d1.l*2)
	bra	rneci_loop1

rneci_ready:
	addq.l	#2,d7
	cmp.l	d6,d7
	blt.s	rneci_flush
	sub.l	d6,d7
	subq.l	#1,_bufLength(a4)
	addq.l	#4,a1
rneci_flush:
	move.l	a1,_bitBuffer(a4)
	move.l 	d7,_bitOffset(a4)

; this won't work anymore ... take a3 instead!
; 	addq.l	#1,([_mbCoeffPtr,a4],d1.l*4)	; ANALYSIS

	addq.l	#4,sp

	move.l	a6,a0
	bra	jrevdct



; ***********************

	XDEF	@recon_non2intra
@recon_non2intra:
	movem.l	ri_regs,-(sp)
	move.l	a0,a5

	lea	_zigzag_direct(a4),a3
	add.l	d0,d0
	add.l	d0,a5
	add.l	d0,d0
	add.l	d0,a3				; i+= run

	bra	r2ni_entry

	XDEF	@recon_intra
@recon_intra:
	movem.l	ri_regs,-(sp)
	move.l	a0,a5

	lea	_zigzag_direct(a4),a3

r2ni_entry:
	move.l	a1,a6
	lea	_dct_coeff_next(a4),a2

	move.l	_bitBuffer(a4),a1
	move.l	_bitOffset(a4),d7
	moveq	#32,d6

reci_loop1:
	process_intra	reci_ready,a0

	addq.l	#1,d3
	add.l	d3,d3
	add.l	d3,a5
	add.l	d3,d3
	add.l	d3,a3				; i+= run+1  (implicit zigzag)
	move.l	(a3),d1				; pos
	muls.w	(a5),d5
	asr.w	#3,d5
; das vorzeichen von (a5) wird benutzt, um d5 mittels lsb auf/abrunden
; ...
	move.w	d5,(a6,d1.l*2)
	bra	reci_loop1

reci_ready:
	addq.l	#2,d7
	cmp.l	d6,d7
	blt.s	reci_flush
	sub.l	d6,d7
	subq.l	#1,_bufLength(a4)
	addq.l	#4,a1
reci_flush:
	move.l	a1,_bitBuffer(a4)
	move.l 	d7,_bitOffset(a4)


; this won't work anymore ... reference a3 instead!
; 	addq.l	#1,([_mbCoeffPtr,a4],d1.l*4)	; ANALYSIS

	move.l	a6,a0
;	bra	jrevdct

	include	"sjrevdct.s"

; ***************************************************************

	XDEF	_s_DecodeDCTCoeff
_s_DecodeDCTCoeff:
dcdregs	REG	d2-d7
	movem.l	dcdregs,-(sp)

	move.l	_bitBuffer(a4),a1
	move.l	_bitOffset(a4),d7
	moveq	#32,d6

	process_intra	endofblock,a2

	move.l	a1,_bitBuffer(a4)
	move.l 	d7,_bitOffset(a4)

	move.l	d5,(a0)		; int *level

endofblock:
	move.w	d3,d0		; run = return value!
	ext.l	d0

	movem.l	(sp)+,dcdregs
	rts


; ***************************************************************

	END
