	title	lzdcmp - file decompressor using limpel-ziev algorithm

;Tom Pfau
;Digital Equipment Corporation
;Parsippany, NJ

;Constants
CLEAR		equ	256
EOF		equ	257
FIRST_FREE	equ	258
;MAXMAX		equ	4096

	include	macros1.mlb

;Hash table entry
hash_rec	struc
next	dw	?			; prefix code
char	db	?			; suffix char
hash_rec	ends

;Declare segments
code	segment byte public 'code'
code	ends
stack	segment word stack 'data'
	dw	128 dup (?)
stack	ends
data	segment word public 'data'
data	ends
memory	segment para public 'memory'
hash	label	hash_rec
memory	ends

;Start coding
code	segment
	assume	CS:code,DS:data,ss:stack,ES:data

start	proc	far
	mov	bx,seg hash		;End of program
	mov	ax,DS			;Beginning of program
	sub	bx,ax			;Size of program
	inc	bx			;Make sure
	setmem	bx			;Set program size
	mov	bx,data			;Address data segment
	mov	ES,bx
	mov	DS,bx
	print	input_prompt		;Get file names
	input	input_file
	print	crlf
	print	output_prompt
	input	output_file
	print	crlf
	mov	al,input_file+1		;Terminate with nulls
	xor	ah,ah
	mov	si,ax
	mov	input_file+2[si],ah	;TH 0
	mov	al,output_file+1
	mov	si,ax
	mov	output_file+2[si],ah	;TH 0
	hopen	input_file+2,0		;Open input and output
	mov	input_handle,ax
	hcreat	output_file+2,0
	mov	output_handle,ax
	call	decompress		;Decompress files
	hclose	input_handle
	hclose	output_handle
	exit				;Done
start	endp

data	segment
input_prompt	db	'Input file: $'
output_prompt	db	'Output file: $'
input_file	db	80,0,80 dup (?)
output_file	db	80,0,80 dup (?)
crlf		db	13,10,'$'
input_handle	dw	?
output_handle	dw	?
data	ends

decompress	proc	near
	malloc	768			;Allocate space for hash table
	mov	hash_seg,ax		;Save segment address
	hread	input_handle,input_buffer,1024	;Read from input
l1:	call	read_code		;Get a code
	cmp	ax,EOF			;End of file?
	jne	l2			;no

	cmp	output_offset,0		;Data in output buffer?
	je	l1a			;no
	 hwrite	output_handle,output_buffer,output_offset	;Flush buffer
l1a:	ret				;done

l2:	cmp	ax,CLEAR		;Clear code?
	jne	l7			;no

	call	init_tab		;Initialize table
	call	read_code		;Read next code
	mov	cur_code,ax		;Initialize variables
	mov	old_code,ax
	mov	k,al
	mov	fin_char,al
;	mov	al,k
	call	write_char		;Write character
	jmp	l1			;Get next code

l7:	mov	cur_code,ax		;Save new code
	mov	in_code,ax
	mov	ES,hash_seg		;Point to hash table
	cmp	ax,free_code		;Code in table? (k<w>k<w>k)
	jl	l11			;yes
	 mov	ax,old_code		;get previous code
	 mov	cur_code,ax		;make current
	 mov	al,fin_char		;get old last char
	 push	ax			;push it
	 inc	stack_count
l11:	cmp	cur_code,255		;Code or character?
	jle	l15			;Char

	mov	bx,cur_code		;Convert code to address
	call	index
	mov	al,ES:2[bx]		;Get suffix char
	push	ax			;push it
	inc	stack_count
	mov	ax,ES:[bx]		;Get prefix code
	mov	cur_code,ax		;Save it
	jmp	l11			;Translate again

l15:
;	push	DS			;Restore seg reg
;	pop	ES
	mov	ax,DS
	mov	ES,ax
	mov	ax,cur_code		;Get code
	mov	fin_char,al		;Save as final, k
	mov	k,al
	push	ax			;Push it
	inc	stack_count
	mov	cx,stack_count		;Pop stack
	jcxz	l18			;If anything there
l17:	 pop	ax
	 call	write_char
	 loop	l17
l18:	mov	stack_count,cx		;Clear count on stack
	call	add_code		;Add new code to table
	mov	ax,in_code		;Save input code
	mov	old_code,ax
	mov	bx,free_code		;Hit table limit?
	cmp	bx,max_code
	jl	l23			;Less means no
	 cmp	nbits,12		;Still within twelve bits?
	 je	l23			;no (next code should be clear)
	  inc	nbits			;Increase code size
	  shl	max_code,1		;Double max code
l23:	jmp	l1			;Get next code
decompress	endp	

data	segment
hash_seg	dw	?
cur_code	dw	?
old_code	dw	?
in_code		dw	?
free_code	dw	FIRST_FREE
stack_count	dw	0
nbits		dw	9
max_code	dw	512
fin_char	db	?
k		db	?
data	ends

read_code	proc	near
	mov	ax,bit_offset		;Get bit offset
	add	ax,nbits		;Adjust by code size
	xchg	bit_offset,ax		;Swap
	mov	cx,8			;Calculate byte offset
	xor	dx,dx
	div	cx
	cmp	ax,1021			;Approaching end of buffer?
	jl	rd0			;no

	push	dx			;Save offset in byte
	add	dx,nbits		;Calculate new bit offset
	mov	bit_offset,dx
	mov	cx,1024			;1k buffer
	mov	bp,ax			;save byte offset
	sub	cx,ax			;Calculate bytes left
;	add	ax,offset input_buffer	;Point to char
	mov	di,offset input_buffer	;TH Point to beginning of buffer
	add	ax,di			;TH Point to char

	mov	si,ax
;	lea	di,input_buffer		;Point to beginning of buffer
rep	movsb				;Move last chars down
	hread	input_handle,[di],bp	;Fill rest of buffer
	xor	ax,ax			;Clear ax
	pop	dx			;Restore offset in byte
rd0:	add	ax,offset input_buffer	;Point to char
	mov	si,ax
	lodsw				;Get word
	mov	bx,ax			;Save in AX
	lodsb				;Next byte
	mov	cx,dx			;Offset in byte
	jcxz	rd2			;If zero, skip shifts
rd1:	 shr	al,1			;Put code in low (code size) bits of BX
	 rcr	bx,1
	 loop	rd1
rd2:	mov	ax,bx			;put code in ax
	mov	bx,nbits		;mask off unwanted bits
	sub	bx,9
	shl	bx,1
	and	ax,masks[bx]
	ret
read_code	endp

data	segment
masks		dw	1ffh,3ffh,7ffh,0fffh
input_buffer	db	1024 dup (?)
bit_offset	dw	0
data	ends

init_tab	proc	near
	mov	nbits,9			;Initialize variables
	mov	max_code,512
	mov	free_code,FIRST_FREE
	ret
init_tab	endp

write_char	proc	near
	mov	di,output_offset	;Get offset in buffer
	cmp	di,1024			;Full?
	jl	wc1			;no
	 push	ax			;Save registers
	 push	cx
	 hwrite	output_handle,output_buffer,di	;Write buffer to file
	 pop	cx
	 pop	ax
	 xor	di,di	;TH		;Point to beginning of buffer
	 mov	output_offset,di
wc1:	lea	di,output_buffer[di]	;Point into buffer
	stosb				;Store char
	inc	output_offset		;Increment number of chars in buffer
	ret
write_char	endp

data	segment
output_offset	dw	0
output_buffer	db	1024 dup (?)
data	ends

index		proc	near
	mov	bp,bx			;bx = bx * 3 (3 byte entries)
	shl	bx,1			;bp = bx
	add	bx,bp			;bx = bx * 2 + bp
	ret
index		endp

add_code	proc	near
	mov	bx,free_code		;Get new code
	call	index			;convert to address
	push	ES			;point to hash table
	mov	ES,hash_seg
	mov	al,k			;get suffix char
	mov	ES:[bx].char,al		;save it
	mov	ax,old_code		;get prefix code
	mov	ES:[bx].next,ax		;save it
	pop	ES
	inc	free_code		;set next code
	ret
add_code	endp

code	ends

	end	start
