Comment ~RM132

TASM version of Douglas Jones' Splay Tree data compression technique. See
Communications of the Association for Computing Machinery, August 1988.

This code implements a FAST, efficient and deceptively simple single pass
algorithm for data compression and expansion. To use as a unit with a Turbo
Pascal program: compile SPLAY.PAS and include "Uses SPLAY" in your program
or unit's interface section. You will have to elaborate the stubs provided 
for data I/O -- the getbyte and writebyte procedures.

As the names of the data I/O procedures suggest, this implementation is
byte oriented. You may compress and expand any byte value from 0 to 255

with

  compress(byte);  { uses writebyte to output compressed data }
  byte := expand;  { uses getbyte to input uncompressed data  }

respectively, where byte is a suitable variable.


Before compressing or expanding data the procedures

  InitCompress
&
  InitExpand

should be executed to perform respective initialisation functions.


When you have finished compressing data the procedure

  EndCompress

should be executed. Typical sequences are, therefore,


InitCompress;
repeat
  compress(byte);
until HeartsContent;
EndCompress;

and

InitExpand;
repeat
  byte := expand;
until HeartsContent;


In this implementation the EndCompress procedure simply outputs any
unflushed data, it does NOT output an end of file marker. Ordinarily, the 
character used to signal the end of a splayed file is simply MaxChar + 1, 
where MaxChar is the ordinal value of the highest source character (255 for
the PC character set). As it is not possible to represent the value 256
in a byte this code cannot output an end of file marker with this value.
You must either

1. infer from the data context where the end of the data occurs
   (during input)

  or

2. use a source alphabet with less than 255 characters, i.e. a subset of
   the PC character set, so that an end of file character may be used.

Alternatively, you can alter the source code to use 9 bit data. Very
little of SPLAY.ASM would need to be changed -- begin by changing the
UP array of pointers from bytes to words -- but getbyte and writebyte
would need to be changed to handle 9 bit bytes. As these bytes would be
represented in the first place within 16 bit words this approach would
incur both a storage and processing overhead.

If you elected to use a 7 bit character set (ASCII 0 to 127) you could
compress and decompress data as follows:

Const
  EofChar = 128;

InitCompress;
repeat
  compress(byte);
until endofdata;
compress(EofChar);
EndCompress;


InitExpand;
while byte <> EofChar do
  byte := expand;


The splay prefix algorithm is locally adaptive. It will progressively
reduce the number of bits used to represent a character repeated in the
source document to a point where the character may be represented by a
single bit. On small files it can achieve results better than or close
to those obtained with slower and more complicated methods. I have found
it particularly useful in circumstances where it's possible to pack data
before compression.

Paul O'Nolan
CIS 72007,242
2nd May 1989

Newstead, Forbes Place, Hatton of Fintray, Aberdeen AB2 0YB, Scotland UK

EndComment~
; RM132
	.MODEL	TPASCAL
	.DATA

MAXCHAR		EQU	255		; Ordinal of Highest Source Character
SUCCMAX		EQU	MaxChar + 1	; Ord HSC+1, marks eof compressed file
TWICEMAX	EQU	2 * MaxChar + 1	; --space for internal nodes of tree
SUCCTWICEMAX	EQU	TwiceMax + 1
ROOT		EQU	1		; Index of root node

; the splay tree

UP		DB	SUCCTWICEMAX	DUP (?)
LEFT		DW	SUCCMAX		DUP (?)
RIGHT		DW	SUCCMAX		DUP (?)

; Up, Left and Right [0] not used, hence SuccMax to allocate MaxChar bytes

; the stack used to reverse bit order

SPLSTK		DB	MAXCHAR		DUP (?)

; material for packing bits into a byte

BITVAL		DB	?
PACKBYTE	DB	?

	.CODE

EXTRN	WRITEBYTE:	FAR	; procedure writebyte (output: byte)
EXTRN	GETBYTE:	FAR	; function getbyte: byte

;LDį

; InitSplay

; var
;  AX,BX,DI: I

; Initialize the splay tree - as a balanced code tree. Any initial tree will
; do so long as the same tree is used for compression and decompression.

InitSplay PROC NEAR

; initialise up pointers

	MOV	DI,2			; for I := 2 to TwiceMax do
UPWEGO:
	MOV	AX,DI
	SHR	AX,1
	MOV	BYTE PTR UP[DI],AL	;   UP[I] := I div 2
	INC	DI
	CMP	DI,TWICEMAX
	JLE	UPWEGO

; initialise down pointers

	MOV	DI,1			; for I := 1 to MaxChar do
GODOWN:					;   begin
	MOV	AX,DI
	MOV	BX,DI			; BX used to double DX for word ptr
	SHL	AX,1
	MOV	WORD PTR LEFT[DI+BX],AX	;     left[I] := I * 2
	INC	AX
	MOV	WORD PTR RIGHT[DI+BX],AX;     right[I] := I * 2 + 1
	INC	DI
	CMP	DI,MAXCHAR
	JLE	GODOWN			;   end
	RET

InitSplay ENDP

;LDį

; InitCompress

InitCompress PROC FAR
	PUBLIC InitCompress

	MOV	BYTE PTR BITVAL,8	; BitVal = 8
	MOV	BYTE PTR PACKBYTE,0	; PackByte = 0
	CALL	INITSPLAY		; initialise the splay tree
	RET

InitCompress ENDP

;LDį

; InitExpand

InitExpand PROC FAR
	PUBLIC InitExpand

	MOV	BYTE PTR BITVAL,1	; BitVal := 1
	CALL	INITSPLAY		; initialise the splay tree
	RET

InitExpand ENDP

;LDį

; Splay (plain: byte);

; Var
;   A,B: DownIndex  (word)
;   C,D: UpIndex    (byte)

Splay PROC NEAR PLAIN:BYTE

	MOV	AL,PLAIN		; load code for splay
	XOR	AH,AH			; zap AH for 16 bit add
	MOV	SI,SUCCMAX		; load SuccMax
	ADD	SI,AX			; SI points to leaf
	XOR	DH,DH			; DI will be used likewise

; walk up tree semi-rotating pairs of nodes

SPLLP:					; repeat
	MOV	DL,UP[SI]		;   C := up[A]
	MOV	DI,DX
	CMP	DX,ROOT			;   if C <> root then
	JE	SPLNOP			;     begin

; a pair remains to semi-rotate

	PUSH	SI			; A
	XOR	AH,AH			; AH not needed, now load AL
	MOV	AL,UP[DI]		;       D := up[C]
	MOV	SI,AX			; SI <- D
	POP	AX			; AX <- A

	SHL	SI,1
	MOV	BX,LEFT[SI]		;       B := left[D]
	CMP	DI,BX			;       if C = B then
	JNE	SPLNEU			;         begin
	MOV	BX,RIGHT[SI]		;           B := right[D]
	MOV	WORD PTR RIGHT[SI],AX	;           right[D] := A
	JMP	SPLUPQ			;         end
SPLNEU:					;       else
	MOV	WORD PTR LEFT[SI],AX	;         left[D] := A
SPLUPQ:
	SHL	DI,1
	CMP	WORD PTR LEFT[DI],AX	;       if left[C] = A then
	JNE	SPLNED			;         begin
	MOV	WORD PTR LEFT[DI],BX	;           left[C] := B
	JMP	SPLDNQ			;         end
SPLNED:					;       else
	MOV	WORD PTR RIGHT[DI],BX	;         right[C] := B
SPLDNQ:
	MOV	BYTE PTR UP[BX],DL	;       up[B] := C
	MOV	DI,AX			; 1     C := A
	SHR	SI,1
	MOV	AX,SI			;*
	MOV	BYTE PTR UP[DI],AL	; 2     up[C] := D
	JMP	SPLLPQ			; 1+2 effectively = up[A] := D

; no pair remains

SPLNOP:					;     end
	MOV	SI,DX			;       A := C
SPLLPQ:
	CMP	SI,ROOT			; until A = root
	JNE	SPLLP
	RET

; * no need to XCHG before and after as AX is changed before being used again

Splay	ENDP

;LDį

; Compress (plain: byte);

; var
;  local	global
;  AL:		BitVal
;  BX: U	PackByte
;  SI: A
;  DI: Sp

Compress PROC FAR PLAIN:BYTE
	PUBLIC Compress

	XOR	AH,AH			; clear AH for 16 add
	MOV	AL,PLAIN		; load plain
	PUSH	AX			; save it for splay later
	MOV	SI,SUCCMAX		; load for A := Plain + SuccMax
	ADD	SI,AX			; SI points to leaf (SI <- A )

	XOR	BH,BH			; BX used for 8 bit variable: U
	MOV	DI,OFFSET SPLSTK	; -> stack base (Sp := 1, DI <- Sp)
CMPLPP:					; repeat
	MOV	BL,UP[SI]		;   U := Up[A]
	SHL	BX,1			; convert BX to word pointer
	CMP	SI,WORD PTR RIGHT[BX]	;   if Right[U] <> A then
	JE	CMPONE
	MOV	BYTE PTR [DI],0		;     splstk[Sp] := 0
	JMP	CMPLPT			;   else
CMPONE:
	MOV	BYTE PTR [DI],1		;     splstk[Sp] := 1
CMPLPT:
	INC	DI			;   inc(Sp)
	SHR	BX,1			; restore BX to byte pointer
	MOV	SI,BX			;   A := U
	CMP	SI,ROOT			; until A = root
	JNE	CMPLPP

	MOV	AL,BYTE PTR BITVAL	; AL := BitVal
	MOV	BL,BYTE PTR PACKBYTE	; BL := PackByte
CMPLPS:					; repeat
	DEC	DI			;   dec(Sp)
	ROL	BL,1			;   PackByte := PackByte shl 1
	OR	BL,BYTE PTR [DI]	;   PackByte := PackByte or splstk[Sp]
	DEC	AL			;   dec(BitVal)
	JNZ	CMPLPQ			;   if BitVal = 0 then

	PUSH	DI			;     begin
	PUSH	BX
	CALL	WRITEBYTE		;       writebyte
	POP	DI

	XOR	BL,BL			;       PackByte := 0
	MOV	AL,8			;       BitVal := 8
CMPLPQ:					;     end
	CMP	DI,OFFSET SPLSTK	; until Sp = 1
	JNE	CMPLPS

	MOV	BYTE PTR PACKBYTE,BL	; update PackByte
	MOV	BYTE PTR BITVAL,AL	; update BitVal

	POP	SI			; restore plain and
	PUSH	SI			; pass on stack
	CALL	SPLAY			; Splay(plain) -- update the tree
	RET

Compress	ENDP

;LDį

; function expand: byte;

; var
;  AL: PackByte
;  BL: BitVal
;  SI: A

Expand	PROC FAR; RETURNS Decoded: BYTE
	PUBLIC Expand

	MOV	SI,ROOT			; A := root
	MOV	BL,BYTE PTR BITVAL	; BL := BitVal, set to 1 initially
	MOV	AL,BYTE PTR PACKBYTE	; AL := PackByte

GOTOIT:					; repeat
	DEC	BL			;  dec(BitVal)
	JNZ	NXTBIT			;  if BitVal = 0 then

	PUSH	SI			;     begin { get next byte to decode }
	CALL	GETBYTE			;       AL <- next byte
	POP	SI
	MOV	BL,8			;       BitVal := 8

NXTBIT:					;     end
	SHL	SI,1			; convert to word pointer
	ROL	AL,1			; xfer high bit to carry flag
	JC	GORIGHT			;   if InByte high bit = 0 then
	MOV	SI,WORD PTR LEFT[SI]	;     A := Left[A]
	JMP	GOON			;   else
GORIGHT:
	MOV	SI,WORD PTR RIGHT[SI]	;     A := Right[A]
GOON:
	CMP	SI,MAXCHAR		; until A > MaxChar
	JNG	GOTOIT

	MOV	BYTE PTR BITVAL,BL	; update BitVal
	MOV	BYTE PTR PACKBYTE,AL	; update PackByte

; update the code tree

	SUB	SI,SUCCMAX		; Dec(A,SuccMax)
	PUSH	SI			; save A and
	PUSH	SI			;   pass it to Splay on stack
	CALL	SPLAY			; Splay(A)

; return the character

	POP	AX			; AL = 'SL'
	RET

Expand	ENDP

;LDį

; EndCompress

; var
;  AL: PackByte
;  CL: BitVal

EndCompress PROC FAR
	PUBLIC EndCompress

	CMP	BYTE PTR BITVAL,8	; any bits not output?
	JZ	NOBITS			; if not just exit

	MOV	CL,BYTE PTR BITVAL	; how many bits left
	MOV	AL,BYTE PTR PACKBYTE	; otherwise load the last byte
	ROL	AL,CL			; shift it BitVal bits
	PUSH	AX			; pass it on the stack
	CALL	WRITEBYTE		; and output it
NOBITS:
	RET

EndCompress ENDP

;LDį

CODE	ENDS
	END
