	.286p
	ifndef	??version
publicdll macro	name
	public	name
	endm
$comm	macro	name,dist,size,count
	comm	dist name:BYTE:count*size
	endm
	else
$comm	macro	name,dist,size,count
	comm	dist name[size]:BYTE:count
	endm
	endif
_TEXT	segment byte public 'CODE'
_TEXT	ends
DGROUP	group	_DATA,_BSS
	assume	cs:_TEXT,ds:DGROUP
_DATA	segment word public 'DATA'
d@	label	byte
d@w	label	word
_DATA	ends
_BSS	segment word public 'BSS'
b@	label	byte
b@w	label	word
_BSS	ends
_TEXT	segment byte public 'CODE'
	;
	;	GLOBAL void
	;
	assume	cs:_TEXT
_j_rev_dct	proc	near
	push	bp
	mov	bp,sp
	sub	sp,236
	push	si
	push	di
	;
	;	j_rev_dct (DCTBLOCK data)
	;	{
	;	  int pass, rowctr;
	;	  register DCTELEM *inptr, *outptr;
   ;	  DCTBLOCK workspace;
   ;	
   ;	  /* Each iteration of the inner loop performs one 8-point 1-D IDCT.
   ;	   * It reads from a *row* of the input matrix and stores into a *column*
	;	   * of the output matrix.  In the first pass, we read from the data[] array
   ;	   * and store into the local workspace[].  In the second pass, we read from
	;	   * the workspace[] array and store into data[], thus performing the
   ;	   * equivalent of a columnar IDCT pass with no variable array indexing.
   ;	   */
   ;	
   ;	
	mov	si,word ptr [bp+4]
   ;	
   ;	  inptr = data;			/* initialize pointers for first pass */
   ;	
	lea	ax,word ptr [bp-236]
	mov	di,ax
   ;	
   ;	  outptr = workspace;
   ;	
	mov	word ptr [bp-2],1
	jmp	@1@194
@1@50:
   ;	
	;	  for (pass = 1; pass >= 0; pass--) {
   ;	
	mov	word ptr [bp-4],7
	jmp	@1@122
@1@74:
   ;	
   ;	    for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) {
   ;	      /* many tmps have nonoverlapping lifetime -- flashy register colourers
	;	       * should be able to do this lot very well
   ;	       */
	;	      INT32 in0, in1, in2, in3, in4, in5, in6, in7;
	;	      INT32 tmp10, tmp11, tmp12, tmp13;
	;	      INT32 tmp20, tmp21, tmp22, tmp23;
	;	      INT32 tmp30, tmp31;
	;	      INT32 tmp40, tmp41, tmp42, tmp43;
	;	      INT32 tmp50, tmp51, tmp52, tmp53;
	;	      SHIFT_TEMPS
	;
	;
	.386
	movsx eax, word ptr [si+4]
	mov	[bp-16], eax
	;
	;			in2 = inptr[2];
	;
	movsx ebx, word ptr [si+8]
	mov	[bp-24], ebx
	;
	;			in4 = inptr[4];
	;
	movsx edx, word ptr [si+10]
	mov	[bp-28], edx
	;
	;			in5 = inptr[5];
	;
	movsx ecx, word ptr [si+12]
	mov	[bp-32], ecx
	;
	;			in6 = inptr[6];
	;
	movsx ecx, word ptr [si]
	mov	[bp-8], ecx
	;
	;			in0 = inptr[0];
	;
	;			/* These values are scaled by DCT_SCALE */
	;
	;
	add	ecx, ebx
	imul	ecx, 46341
	mov	[bp-40], ecx
	;
	;			tmp10 = (in0 + in4) * COS_1_4;
	;
	mov	edx, [bp-32]
	imul	edx, 25080
	mov	ebx, [bp-16]
	imul	ebx, 60547
	add	edx, ebx
	mov	[bp-52], edx
	;
	;			tmp13 = in6 * SIN_1_8 + in2 * COS_1_8;
	;
	add	edx, ecx
	mov	[bp-56], edx
	;
	;			tmp20 = tmp10 + tmp13;
	;
	sub	ecx, [bp-52]
	mov	[bp-68], ecx
	;
	;			tmp23 = tmp10 - tmp13;
	;
	;
	mov	ecx, [bp-8]
	sub	ecx, [bp-24]
	imul	ecx, 46341
	mov	[bp-44], ecx
	;
	;			tmp11 = (in0 - in4) * COS_1_4;
	;
	mov	ecx, [bp-16]
	imul	ecx, 25080
	mov	ebx,[bp-32]
	imul	ebx, 060547
	sub	ecx, ebx
	mov	[bp-48], ecx
	;
	;			tmp12 = in2 * SIN_1_8 - in6 * COS_1_8;
	;
	add	ecx, [bp-44]
	mov	[bp-60], ecx
	;
	;			tmp21 = tmp11 + tmp12;
	;
	mov	ebx, [bp-44]
	sub	ebx, [bp-48]
	mov	[bp-64], ebx
	;
	;			tmp22 = tmp11 - tmp12;
	;
	;
	;			/* These values are scaled by OVERSCALE */
	;
	;
	movsx	eax, word ptr [si+6]
	mov	[bp-20], eax
	;
	;			in3 = inptr[3];
	;
	add	eax, [bp-28]
	imul	eax, 46341
;	add	eax, 8192
	sar	eax, 14
	mov	[bp-72], eax
	;
	;			tmp30 = UNFIXO((in3 + in5) * COS_1_4);
	;
	mov ecx, [bp-20]
	sub ecx, [bp-28]
	imul ecx, 46341
;	add ecx, 8192
	sar ecx, 14
	mov [bp-76], ecx
	;
	;			tmp31 = UNFIXO((in3 - in5) * COS_1_4);
	;
	;
	movsx	ebx, word ptr [si+2]
	sal	ebx, 2
	mov	[bp-12], ebx
	;
	;			in1 = inptr[1];
	;			OVERSHIFT(in1);
	;
	movsx	edx, word ptr [si+14]
	sal	edx, 2
	mov	[bp-36], edx
	;
	;			in7 = inptr[7];
	;			OVERSHIFT(in7);
	;
	;
	add	ecx, edx
	mov	[bp-84], ecx
	;
	;			tmp41 = in7 + tmp31;
	;
	sub	edx, [bp-76]
	mov	[bp-92], edx
	;
	;			tmp43 = in7 - tmp31;
	;
	mov	edx, [bp-12]
	add	edx, eax
	mov	[bp-80], edx
	;
	;			tmp40 = in1 + tmp30;
	;
	mov	ebx, [bp-12]
	sub	ebx, eax
	mov	[bp-88], ebx
	;
	;			tmp42 = in1 - tmp30;
	;
	;			/* And these are scaled by DCT_SCALE */
	;
	;
	imul	edx, 3196
	imul	ecx, 16069
	sub	edx, ecx
	mov	[bp-100], edx
	;
	;			tmp51 = tmp40 * OSIN_1_16 - tmp41 * OCOS_1_16;
	;
	;
	add	edx, [bp-68]
	add	edx, 65536	; do round up on upper word
	sar	edx, 17
	mov	[di+48], dx
	;
	;			outptr[DCTSIZE*3] = (DCTELEM) UNFIXH(tmp23 + tmp51);
	;
	mov	edx, [bp-68]
	sub	edx, [bp-100]
;	add	edx, 65536
	sar	edx, 17
	mov	[di+64], dx
	;
	;			outptr[DCTSIZE*4] = (DCTELEM) UNFIXH(tmp23 - tmp51);
	;
	;
	mov	ecx, [bp-84]
	imul	ecx, 3196
	mov	edx, [bp-80]
	imul	edx, 16069
	add	edx, ecx
	mov	[bp-96], edx
	;
	;			tmp50 = tmp40 * OCOS_1_16 + tmp41 * OSIN_1_16;
	;
	mov	edx, [bp-56]
	add	edx, [bp-96]
;	add	edx, 65536
	sar	edx, 17
	mov	[di], dx
	;
	;			outptr[        0] = (DCTELEM) UNFIXH(tmp20 + tmp50);
	;
	mov	edx, [bp-56]
	sub	edx, [bp-96]
;	add	edx, 65536
	sar	edx, 17
	mov	[di+112], dx
	;
	;			outptr[DCTSIZE*7] = (DCTELEM) UNFIXH(tmp20 - tmp50);
	;
	;
	imul	ebx, 13623
	mov	edx, [bp-92]
	imul	edx, 9102
	sub	ebx, edx
	mov	[bp-108], ebx
	;
	;			tmp53 = tmp42 * OSIN_5_16 - tmp43 * OCOS_5_16;
	;
	mov	edx, [bp-60]
	sub	edx, [bp-108]
;	add	edx, 65536
	sar	edx, 17
	mov	[di+96], dx
	;
	;			outptr[DCTSIZE*6] = (DCTELEM) UNFIXH(tmp21 - tmp53);
	;
	mov	edx, [bp-60]
	add	edx, [bp-108]
;	add	edx, 65536
	sar	edx, 17
	mov	[di+16], dx
	;
	;			outptr[DCTSIZE  ] = (DCTELEM) UNFIXH(tmp21 + tmp53);
	;
	;
	mov	edx, [bp-88]
	imul	edx, 9102
	mov	ecx, [bp-92]
	imul	ecx, 13623
	add	edx, ecx
	mov	ecx, edx		;keep a spare 	;	mov	[bp-104], edx
	;
	;			tmp52 = tmp42 * OCOS_5_16 + tmp43 * OSIN_5_16;
	;
	mov	ebx, [bp-64]		; a spare copy
	add	edx, ebx
;	add	edx, 65536
	sar	edx, 17
	mov	[di+32], dx
	;
	;			outptr[DCTSIZE*2] = (DCTELEM) UNFIXH(tmp22 + tmp52);
	;
	sub	ebx, ecx
;	add	ebx, 65536
	sar	ebx, 17
	mov	[di+80], bx
	.286
	;
	;			outptr[DCTSIZE*5] = (DCTELEM) UNFIXH(tmp22 - tmp52);
	;
	;
	add	si,16
	;
	;			inptr += DCTSIZE;		/* advance inptr to next row */
	;
;	add	di, 2
	inc	di
	inc	di
	dec	word ptr [bp-4]
@1@122:
	cmp	word ptr [bp-4],0
	jl	@@0
	jmp	@1@74
@@0:
	;
	;			outptr++;			/* advance outptr to next column */
	;		 }
	;		 /* end of pass; in case it was pass 1, set up for pass 2 */
	;
	lea	ax,word ptr [bp-236]
	mov	si,ax
   ;	
   ;		 inptr = workspace;
	;
	mov	di,word ptr [bp+4]
	dec	word ptr [bp-2]
@1@194:
	cmp	word ptr [bp-2],0
	jl	@@1
	jmp	@1@50
@@1:
	;
	;		 outptr = data;
	;	  }
	;
	pop	di
	pop	si
	leave
	ret
_j_rev_dct	endp
_TEXT	ends
_DATA	segment word public 'DATA'
s@	label	byte
_DATA	ends
_TEXT	segment byte public 'CODE'
_TEXT	ends
	public	_j_rev_dct
;	extrn	N_LXMUL@:far
;	extrn	N_LXLSH@:far
;	extrn	N_LXRSH@:far
	end
