/* 
 * $Header: f:/src.ssb/util/compress.c,v 1.1.1.1 1991/12/12 21:28:18 ak Exp $
 *
 *	Compress - data compression program 
 *
 * $Log: compress.c,v $
 * Revision 1.1.1.1  1991/12/12  21:28:18  ak
 * Initial checkin.
 *
 * Revision 1.1  1991/12/12  21:28:09  ak
 * Initial revision
 *
 */
static char rcs_ident[] = "@(#) compress,v 4.1 (DOS) 89/11/10 02:43:00 doug Release $";

#ifdef __EMX__
# define PROTO
# define OS2 2
#else
#ifdef __IBMC__
# define PROTO
# define OS2 2
#else
# define i8088
# define MSC
#endif
#endif

/*
 * compress.c - File compression ala IEEE Computer, June 1984.
 *
 * Authors:	Spencer W. Thomas	(decvax!harpo!utah-cs!utah-gr!thomas)
 *		Jim McKie		(decvax!mcvax!jim)
 *		Steve Davies		(decvax!vax135!petsd!peora!srd)
 *		Ken Turkowski		(decvax!decwrl!turtlevax!ken)
 *		James A. Woods		(decvax!ihnp4!ames!jaw)
 *		Joe Orost		(decvax!vax135!petsd!joe)
 *		Doug Graham		(uunet!mitel!sce!tsmith!graham)
 *
 * Revision 4.1 (DOS) 89/11/10 02:43:00 doug
 * Ported to MSDOS. Still works elsewhere, but maybe not as quickly.
 * Removed as much long arithmetic as possible for speed on 16 bit machines.
 * Use unsigned short's instead. Changed secondary hashing function to limit
 * hash table size to 64K. This means table indexes can be 16 bit shorts.
 * This compress will not generate codes from MAXMAXCODE (0xf000) thru
 * 0xffff. Doesn't appear to hurt compression much. Removed speed hacks for
 * other machines so I could understand the code. Added some for the i8088.
 * Send CLEAR immediately when hash table fills instead of waiting for the
 * compression ratio to drop. This is faster, and in some cases improves
 * compression (but more often reduces it slightly). Junked the variable
 * size hash table stuff because I am depending on 16 bit unsigned integer
 * wrap around for indexing into hash table, so the table must have 2^16
 * entries. Took out the XENIX_16 stuff. The DOS way ought to work on Xenix
 * as well, and should be faster, but I don't have access to Xenix in order
 * to find out. Added some extra error checking on decompression to try to
 * avoid blowing the machine out of the water when decompressing a corrupt
 * file. Add "okunlink" to avoid the problem of losing the output file as
 * well as the input file if ^C is hit at the wrong time. Lot's of other
 * cosmetic changes.
 *
 * Revision 4.0  85/07/30  12:50:00  joe
 * Removed ferror() calls in output routine on every output except first.
 * Prepared for release to the world.
 * 
 * Revision 3.6  85/07/04  01:22:21  joe
 * Remove much wasted storage by overlaying hash table with the tables
 * used by decompress: tab_suffix[1<<BITS], stack[8000].  Updated USERMEM
 * computations.  Fixed dump_tab() DEBUG routine.
 *
 * Revision 3.5  85/06/30  20:47:21  jaw
 * Change hash function to use exclusive-or.  Rip out hash cache.  These
 * speedups render the megamemory version defunct, for now.  Make decoder
 * stack global.  Parts of the RCS trunks 2.7, 2.6, and 2.1 no longer apply.
 *
 * Revision 3.4  85/06/27  12:00:00  ken
 * Get rid of all floating-point calculations by doing all compression ratio
 * calculations in fixed point.
 *
 * Revision 3.3  85/06/24  21:53:24  joe
 * Incorporate portability suggestion for M_XENIX.  Got rid of text on #else
 * and #endif lines.  Cleaned up #ifdefs for vax and interdata.
 *
 * Revision 3.2  85/06/06  21:53:24  jaw
 * Incorporate portability suggestions for Z8000, IBM PC/XT from mailing list.
 * Default to "quiet" output (no compression statistics).
 *
 * Revision 3.1  85/05/12  18:56:13  jaw
 * Integrate decompress() stack speedups (from early pointer mods by McKie).
 * Repair multi-file USERMEM gaffe.  Unify 'force' flags to mimic semantics
 * of SVR2 'pack'.  Streamline block-compress table clear logic.  Increase 
 * output byte count by magic number size.
 * 
 * Revision 3.0   84/11/27  11:50:00  petsd!joe
 * Set HSIZE depending on BITS.  Set BITS depending on USERMEM.  Unrolled
 * loops in clear routines.  Added "-C" flag for 2.0 compatibility.  Used
 * unsigned compares on Perkin-Elmer.  Fixed foreground check.
 *
 * Revision 2.7   84/11/16  19:35:39  ames!jaw
 * Cache common hash codes based on input statistics; this improves
 * performance for low-density raster images.  Pass on #ifdef bundle
 * from Turkowski.
 *
 * Revision 2.6   84/11/05  19:18:21  ames!jaw
 * Vary size of hash tables to reduce time for small files.
 * Tune PDP-11 hash function.
 *
 * Revision 2.5   84/10/30  20:15:14  ames!jaw
 * Junk chaining; replace with the simpler (and, on the VAX, faster)
 * double hashing, discussed within.  Make block compression standard.
 *
 * Revision 2.4   84/10/16  11:11:11  ames!jaw
 * Introduce adaptive reset for block compression, to boost the rate
 * another several percent.  (See mailing list notes.)
 *
 * Revision 2.3   84/09/22  22:00:00  petsd!joe
 * Implemented "-B" block compress.  Implemented REVERSE sorting of tab_next.
 * Bug fix for last bits.  Changed fwrite to putchar loop everywhere.
 *
 * Revision 2.2   84/09/18  14:12:21  ames!jaw
 * Fold in news changes, small machine typedef from thomas,
 * #ifdef interdata from joe.
 *
 * Revision 2.1   84/09/10  12:34:56  ames!jaw
 * Configured fast table lookup for 32-bit machines.
 * This cuts user time in half for b <= FBITS, and is useful for news batching
 * from VAX to PDP sites.  Also sped up decompress() [fwrite->putc] and
 * added signal catcher [plus beef in writeerr()] to delete effluvia.
 *
 * Revision 2.0   84/08/28  22:00:00  petsd!joe
 * Add check for foreground before prompting user.  Insert maxbits into
 * compressed file.  Force file being uncompressed to end with ".Z".
 * Added "-c" flag and "zcat".  Prepared for release.
 *
 * Revision 1.10  84/08/24  18:28:00  turtlevax!ken
 * Will only compress regular files (no directories), added a magic number
 * header (plus an undocumented -n flag to handle old files without headers),
 * added -f flag to force overwriting of possibly existing destination file,
 * otherwise the user is prompted for a response.  Will tack on a .Z to a
 * filename if it doesn't have one when decompressing.  Will only replace
 * file if it was compressed.
 *
 * Revision 1.9  84/08/16  17:28:00  turtlevax!ken
 * Removed scanargs(), getopt(), added .Z extension and unlimited number of
 * filenames to compress.  Flags may be clustered (-Ddvb12) or separated
 * (-D -d -v -b 12), or combination thereof.  Modes and other status is
 * copied with copystat().  -O bug for 4.2 seems to have disappeared with
 * 1.8.
 *
 * Revision 1.8  84/08/09  23:15:00  joe
 * Made it compatible with vax version, installed jim's fixes/enhancements
 *
 * Revision 1.6  84/08/01  22:08:00  joe
 * Sped up algorithm significantly by sorting the compress chain.
 *
 * Revision 1.5  84/07/13  13:11:00  srd
 * Added C version of vax asm routines.  Changed structure to arrays to
 * save much memory.  Do unsigned compares where possible (faster on
 * Perkin-Elmer)
 *
 * Revision 1.4  84/07/05  03:11:11  thomas
 * Clean up the code a little and lint it.  (Lint complains about all
 * the regs used in the asm, but I'm not going to "fix" this.)
 *
 * Revision 1.3  84/07/05  02:06:54  thomas
 * Minor fixes.
 *
 * Revision 1.2  84/07/05  00:27:27  thomas
 * Add variable bit length output.
 *
 */

#include <stdio.h>
#include <ctype.h>
#include <signal.h>
#include <sys/types.h>
#include <sys/stat.h>
#ifndef __ZTC__
#include <malloc.h>
#endif
#ifndef BSD4_2
#include <stdlib.h>
#include <io.h>
#endif
#include <string.h>
#include <fcntl.h>
#ifdef MSDOS
#include <dos.h>
#endif

#ifdef __IBMC__
	/* cannot set stdin/out to binary, but can fdopen 0/1 */
# define stdin	std_in
# define stdout	std_out
  FILE *	std_in;
  FILE *	std_out;

# define S_IFMT	0xF000
#endif

#ifdef PROTO
/*
 * Zortech appears to be missing this prototype, and MSC uses some
 * silly structure as the second arg. Turbo C doesn't support this
 * call at all.
 */
extern int utime(char *path, time_t times[]);
#endif

#define BITS		16		/* max number of bits/code */
#define INIT_BITS	9		/* initial number of bits/code */

#define MAXCODE(n_bits)		((code_t)((1L << (n_bits)) - 1))

/*
 * Magic numbers which should appear at the beginning of a compressed file.
 */
#define MAGIC0	0x1f
#define MAGIC1	0x9d

/*
 * Defines for third byte of header
 */
#define BIT_MASK	0x1f
#define BLOCK_MASK	0x80

#if 0
#define CHECK_GAP	10000		/* ratio check interval */
#endif

/*
 * the next two codes should not be changed lightly, as they must not
 * lie within the contiguous general code space.
 */ 
#define FIRST	257		/* first free entry */
#define	CLEAR	256		/* table clear output code */

#define DE_STACKLEN	8192	/* Size of decoder stack */

#define HSIZE	(1L << 16)	/* Size of the hash table. Don't change this */

typedef unsigned char	uchar;
typedef unsigned long	ulong;
typedef unsigned short	code_t;
typedef	unsigned short	hash_t;

#ifdef PROTO
#define ARGS(x)	x
#else
#define ARGS(x)	()
#endif

void		main ARGS((int argc, char **argv));
void		Usage ARGS((void));
void		version ARGS((void));
void		compress ARGS((void));
void		decompress ARGS((void));
void		copystat ARGS((void));
void		writeerr ARGS((void));
void		cl_hash ARGS((void));
void		putcode ARGS((code_t code));
void		prratio ARGS((long num, long den));
int		ofopen ARGS((char *filename));
int		ifopen ARGS((char *filename));
int		check_magic ARGS((void));
int		need_clear ARGS((void));
void		onintr ARGS(());
void		oops ARGS(());
int		taballoc ARGS((void));
void		clearhash ARGS((void));

/*
 * block compression parameters -- after all codes are used up,
 * and compression rate changes, start over.
 */
int		block_compress = BLOCK_MASK;

int		maxbits = BITS;		/* user settable max # bits/code */
int		magic = 1;		/* 3-byte magic number header */
int		zcat_flg = 0;		/* Output on stdout */
int		verbose = 0;		/* don't tell me about compression */
int		force = 0;		/* Force overwrite of output file */
int		do_decomp = 0;		/* Decompress rather than compress. */
char		ofname[100];		/* Output file name */
int		foreground;		/* Running in foreground? */
int		exit_stat = 0;		/* Exit status */
uchar		bitbuf[BITS+2];		/* For (dis)assembling code bytes */
int		okunlink;		/* OK for sig handler to unlink output file */
char		*ifname;

#ifdef i8088

uchar		*de_stack;
uchar far	*charptr1;
uchar far	*codeptrs1[2];
uchar far	*codeptrs2[2];

#define de_suffixof(i)	charptr1[i]
#define de_prefixof(i)	(*(code_t far *)&codeptrs1[i&1][i&~1])

#define en_hashchar(i)	charptr1[i]
#define en_hashent(i)	(*(code_t far *)&codeptrs1[i&1][i&~1])
#define en_hashcode(i)	(*(code_t far *)&codeptrs2[i&1][i&~1])

#ifndef MK_FP
#define MK_FP(seg, ofs) \
	((void far *)(((ulong)(seg) << 16) | (unsigned)(ofs)))
#endif

#define	PARA	16		/* Size of a paragraph */

/*
 * Return a segment address which is the segment part of the normalized
 * version of "fp" rounded upwards.
 * I use this on the far pointers returned by "farmalloc". While
 * they are probably already normalized, I have never seen this
 * stated anywhere in the doc's.
 *
 * There is a lot of junk below which would be unecessary if only
 * there were a reasonably compiler independent way of allocating
 * a given number of PARAGRAPHS (like TC's allocmem). I can't find
 * one though.
 */
#define FP_SEGCEIL(fp) \
	(FP_SEG(fp) + (FP_OFF(fp) + PARA - 1)/PARA)

/*
 * Allocate space for the tables used in {en,de}coding. These tables
 * reside in the far heap. It may seem inefficient to be using far pointers
 * for the base of these tables, because the offset portion will always be zero.
 * We could just keep the segment address of the base, and then do something
 * like:
 *		 *MK_FP(baseseg, offset) = blahblah;
 *
 * whenever we need to access the table. This SHOULD be more efficient,
 * but the compilers do not appear to generate very efficient code in this
 * case. Huge pointers are not used, because they are slow, and because
 * Zortech does not support them.
 */

#ifdef MSC
#define farmalloc(n)	halloc(n, 1)
#endif

int taballoc()
{
	char far *X;

	if (do_decomp) {
		if ((de_stack = malloc(DE_STACKLEN)) == 0)
			return (0);
	}
	else {
		if ((X = farmalloc(HSIZE/2 * sizeof(code_t))) == 0)
			return (0);
		codeptrs2[0] = X;
		if ((X = farmalloc(HSIZE/2 * sizeof(code_t))) == 0)
			return (0);
		codeptrs2[1] = X;
	}

	if ((X = farmalloc(HSIZE * sizeof(char))) == 0)
		return (0);
	charptr1 = X;

	if ((X = farmalloc(HSIZE/2 * sizeof(code_t))) == 0)
		return (0);
	codeptrs1[0] = X;
	if ((X = farmalloc(HSIZE/2 * sizeof(code_t))) == 0)
		return (0);
	codeptrs1[1] = X;

	return (1);
}

#else

uchar	chartab1[HSIZE];
code_t	codetab1[HSIZE];
code_t	codetab2[HSIZE];

#define de_suffixof(i)	chartab1[i]
#define de_prefixof(i)	codetab1[i]
#define de_stack	(uchar *)codetab2

#define en_hashchar(i)	chartab1[i]
#define en_hashent(i)	codetab1[i]
#define en_hashcode(i)	codetab2[i]

#endif

void Usage()
{
	fprintf(stderr, "Usage: compress [-dfvcVnC] [-b maxbits] [file ...]\n");
	fprintf(stderr, "	-V => print Version\n");
	fprintf(stderr, "	-d => decompress\n");
	fprintf(stderr, "	-v => verbose\n");
	fprintf(stderr, "	-f => force overwrite of output file\n");
	fprintf(stderr, "	-n => no header: useful to uncompress old files\n");
	fprintf(stderr, "	-b maxbits => maxbits. Default %d\n", BITS);
	fprintf(stderr, "	-c => cat all output to stdout\n");
	fprintf(stderr, "	-C => generate output compatible with compress 2.0.\n");
}

/*****************************************************************
 * TAG( main )
 *
 * Algorithm from "A Technique for High Performance Data Compression",
 * Terry A. Welch, IEEE Computer Vol 17, No 6 (June 1984), pp 8-19.
 *
 * Usage: compress [-dfvc] [-b bits] [file ...]
 * Inputs:
 *	-d:	    If given, decompression is done instead.
 *
 *      -c:         Write output on stdout, don't remove original.
 *
 *      -b:         Parameter limits the max number of bits/code.
 *
 *	-f:	    Forces output file to be generated, even if one already
 *		    exists, and even if no space is saved by compressing.
 *		    If -f is not used, the user will be prompted if stdin is
 *		    a tty, otherwise, the output file will not be overwritten.
 *
 *      -v:	    Write compression statistics
 *
 * 	file ...:   Files to be compressed.  If none specified, stdin
 *		    is used.
 * Outputs:
 *	file.Z:	    Compressed form of file with same mode, owner, and utimes
 * 	or stdout   (if stdin used as input)
 *
 * Assumptions:
 *	When filenames are given, replaces with the compressed version
 *	(.Z suffix) only if the file decreases in size.
 * Algorithm:
 * 	Modified Lempel-Ziv method (LZW).  Basically finds common
 * substrings and replaces them with a variable size code.  This is
 * deterministic, and can be done on the fly.  Thus, the decompression
 * procedure needs no input table, but tracks the way the table was built.
 */

#ifdef __ZTC__
#include <int.h>
int silly_nonsense(struct INT_DATA *foo) {raise(SIGINT); return 1;}
#endif

#define ARGVAL() (*++(*argv) || (--argc && *++argv))

void main(argc, argv)
int argc;
char **argv;
{
	char tempname[100], *cp;

	if (signal(SIGINT, SIG_IGN) != SIG_IGN) {
		signal(SIGINT, onintr);
#ifdef __ZTC__
		/*
		 * The "signal" call above isn't good enough for Zortech
		 */
		int_intercept(0x23, silly_nonsense, 256);
#endif
#ifdef SIGSEGV
		signal(SIGSEGV, oops);
#endif
		if (isatty(2))
			foreground = 1;
	}

#if !defined(MSDOS) && !defined(OS2)
	if ((cp = strrchr(argv[0], '/')) != 0)
		cp++;
	else
		cp = argv[0];
#else
	for (cp = argv[0]; *cp; cp++)
		if (*cp == '/' || *cp == '\\')
			argv[0] = cp + 1;
	cp = strlwr(argv[0]);
#endif
	/* Limited to 8 char filenames under DOS */
	if (strncmp(cp, "uncompress", 8) == 0)
		do_decomp = 1;
	else if (strncmp(cp, "zcat", 4) == 0) {
		do_decomp = 1;
		zcat_flg = 1;
	}

#ifdef BSD4_2
	/* 4.2BSD dependent - take it out if not */
	setlinebuf(stderr);
#endif /* BSD4_2 */

	for (argc--, argv++; argc > 0 && **argv == '-'; argc--, argv++) {
		while (*++(*argv)) {	/* Process all flags in this arg */
			switch (**argv) {
			case 'V':
				version();
				break;
			case 'v':
				verbose = 1;
				break;
			case 'd':
				do_decomp = 1;
				break;
			case 'f':
			case 'F':
				force = 1;
				break;
			case 'n':
				magic = 0;
				break;
			case 'C':
				block_compress = 0;
				break;
			case 'b':
				if (!ARGVAL()) {
					fprintf(stderr, "Missing maxbits\n");
					Usage();
					exit(1);
				}
				maxbits = atoi(*argv);
				goto nextarg;
			case 'c':
				zcat_flg = 1;
				break;
			case 'q':
				verbose = 0;
				break;
			default:
				fprintf(stderr, "Unknown flag: '%c'; ", **argv);
				Usage();
				exit(1);
			}
		}
nextarg:;
	}

#ifdef i8088
	if (! taballoc()) {
		fprintf(stderr, "compress: out of memory\n");
		exit(1);
	}
#endif
	/*
	 * If no filename args, do standard input.
	 */
	if (argc <= 0) {
		if (! ifopen((char *)0) || ! ofopen((char *)0))
			exit(1);

		ifname = "stdin";

		if (do_decomp) {
			if (!check_magic())
				exit(1);
			decompress();
		}
		else {
			compress();
			if (verbose)
				putc('\n', stderr);
		}
		exit(exit_stat);
	}

	while (--argc >= 0) {
		char *suf;

		ifname = *argv++;
		suf = strrchr(ifname, '.');

		exit_stat = 0;
		okunlink = 0;

		if (do_decomp) {		/* DECOMPRESSION */
			if (!suf || (strcmp(suf, ".Z") && strcmp(suf, ".z"))) {
				strcpy(tempname, ifname);
				strcat(tempname, ".Z");
				ifname = tempname;
			}
			if (! ifopen(ifname) || !check_magic())
				continue;
			if (zcat_flg)
				ofname[0] = '\0';
			else {
				strcpy(ofname, ifname);
				ofname[strlen(ifname) - 2] = '\0';
			}
			if (!ofopen(ofname))
				continue;
			if (!zcat_flg && verbose)
				fprintf(stderr, "%s: ", ifname);
			decompress();
		}
		else {				/* COMPRESSION */
			if (suf && (!strcmp(suf, ".Z") || !strcmp(suf, ".z"))) {
				fprintf(stderr, "%s: already has .Z suffix -- no change\n",
				    ifname);
				continue;
			}
			if (! ifopen(ifname))
				continue;
			if (zcat_flg)
				ofname[0] = 0;
			else {
				strcpy(ofname, ifname);
#ifndef MSDOS	/* We'll let ofopen do the complaining */
#ifndef BSD4_2
#ifndef OS2
				if ((cp = strrchr(ofname, '/')) != NULL)
					cp++;
				else
					cp = ofname;
				if (strlen(cp) > 12) {
					fprintf(stderr,"%s: filename too long to tack on .Z\n",cp);
					continue;
				}
#endif
#endif
#endif
				strcat(ofname, ".Z");
			}
			if (! ofopen(ofname))
				continue;
			if (! zcat_flg && verbose)
				fprintf(stderr, "%s: ", ifname);
			compress();
		}

		if (! zcat_flg) {
			copystat();
			if ((exit_stat == 1) || verbose)
				putc('\n', stderr);
		}
	}
	exit(exit_stat);
}

/*
 * compress stdin to stdout
 *
 * Algorithm:  use open addressing double hashing (no chaining) on the 
 * prefix code / next character combination.  We do a variant of Knuth's
 * algorithm D (vol. 3, sec. 6.4) along with G. Knott's relatively-prime
 * secondary probe.  Here, the modular division first probe is gives way
 * to a faster exclusive-or manipulation.  Also do block compression with
 * an adaptive reset, whereby the code table is cleared when the compression
 * ratio decreases, but after the table fills.  The variable-length output
 * codes are re-sized at this point, and a special CLEAR code is generated
 * for the decompressor.  Late addition:  construct the table according to
 * file size for noticeable speed improvement on small files.  Please direct
 * questions about this implementation to ames!jaw.
 *
 * Secondary hash function changed slightly for DOS. Hash table used to be
 * > 64K. This is slow on a 16 bit machine because it means long arithmetic,
 * and more complicated addressing of tables in the far address space.
 * We now restrict the table size to 64K, and, so that the table does
 * not overfill, restrict the codes that we will generate to MAXMAXCODE.
 * This causes slightly poorer compression in some cases, but, interestingly
 * enough, also causes better compression ratios in certain other cases.
 * Yes, this is all compatible with other compresses.
 */
static long	in_count;		/* length of input */
static long	out_count;		/* length of compressed output */
static long	ratio;			/* in_count/out_count * 256 */
static int	n_bits;			/* number of bits/code */
static int	n_bits8;		/* bits/code times 8 */
static int	bitoffset;		/* Offset into bitbuf */

#define NOENT		((code_t)0xffff)
#define MAXMAXCODE	((code_t)0xf000)

/*
 * Clear out the hash table. We try to do this as quickly as possible, because
 * it's running time dominates for small files. For big files, it doesn't matter
 * much because it doesn't get called often. Now I understand why the original
 * had a variable size hash table.
 */
void clearhash()
{
#ifdef i8088
	register unsigned i;
	code_t far *hp;

	hp = (code_t far *)codeptrs1[0];
	i = (unsigned)(HSIZE/2);
	do
		*hp++ = NOENT;
	while (--i > 0);

	hp = (code_t far *)codeptrs1[1];
	i = (unsigned)(HSIZE/2);
	do
		*hp++ = NOENT;
	while (--i > 0);
#else
	/*
	 * WARNING: assumes that NOENT == 0xffff
	 */
	memset((char *)codetab1, 0xff, HSIZE*sizeof(code_t));
#endif
}

/*
 * Compress stdin to stdout.
 */
void compress()
{
	register hash_t	i;
	register code_t	ent;
	hash_t		disp;
	int		c;
	code_t		freecode;	/* first unused entry */
	code_t		maxcode;	/* maximum code, given n_bits */
	code_t		maxmaxcode;
	code_t		k;
#ifdef CHECK_GAP
	long		checkpoint = 0;
#endif

	if (maxbits < INIT_BITS)
		maxbits = INIT_BITS;
	if (maxbits > BITS)
		maxbits = BITS;

	if (magic) {
		putchar(MAGIC0); putchar(MAGIC1);
		putchar(maxbits | block_compress);
		if (ferror(stdout))
			writeerr();
	}

	bitbuf[bitoffset = 0] = 0;
	out_count = 3;			/* includes 3-byte header mojo */
	ratio = 0;
	in_count = 1;

	n_bits = INIT_BITS;
	n_bits8 = INIT_BITS << 3;
	maxcode = MAXCODE(INIT_BITS);
	maxmaxcode = MAXCODE(maxbits);
	if (maxmaxcode > MAXMAXCODE)
		maxmaxcode = MAXMAXCODE;

	freecode = ((block_compress) ? FIRST : 256);

	clearhash();

	ent = getchar();

	while ((c = getchar()) != EOF) {
		in_count++;

		i = (hash_t)(c << 8) ^ ent;		/* xor hashing */

		if ((k = en_hashent(i)) == ent && en_hashchar(i) == (uchar)c) {
			ent = en_hashcode(i);
			goto Continue;
		}

		if (k != NOENT) {
			/*
			 * New secondary hash for 64K table.
			 * Experiment shows that the shift by 6 works well.
			 * Beats me why. "disp" must be relatively
			 * prime to the table size. Since the table size is a
			 * power of 2, this means "disp" must be odd.
			 *
			 * Note that we do not do a range check before doing
			 * "i -= disp". It is assumed that the hash table size
			 * (HSIZE) is 64K, and that the type "hash_t" (which
			 * is unsigned short) is 16 bits. Thus it is impossible
			 * for "i" to be out of range. On a machine with something
			 * other than 16 bit shorts, this would have to change.
			 */
			disp = ((hash_t)(c << 6) ^ ent) | 1;
			do {
				i -= disp;
				if ((k = en_hashent(i)) == ent &&
				    en_hashchar(i) == (uchar)c) {
					ent = en_hashcode(i);
					goto Continue;
				}
			} while (k != NOENT);
		}

		putcode(ent);

		if (freecode <= maxmaxcode) {
			/*
			 * Add the new entry.
			 */
			en_hashchar(i) = (uchar)c;
			en_hashent(i) = ent;
			en_hashcode(i) = freecode;

			/*
			 * If the next entry is going to be too big for the
			 * code size, then increase it, if possible.
			 */
			if (freecode++ > maxcode) {
				while (bitoffset)
					putcode(0);
				++n_bits;
				n_bits8 += 8;
				maxcode = MAXCODE(n_bits);
			}
		}
#ifdef CHECK_GAP
		else if (in_count >= checkpoint && block_compress) {
			checkpoint = in_count + CHECK_GAP;
			if (need_clear()) {
#else
		else if (block_compress) {
			if (1) {
#endif
				putcode(CLEAR);
				while (bitoffset > 0)
					putcode(0);
				clearhash();
				freecode = FIRST;
				maxcode = MAXCODE(INIT_BITS);
				n_bits = INIT_BITS;
				n_bits8 = n_bits << 3;
			}
		}
		ent = c;
Continue:;
	}
	/*
	 * Put out the final code.
	 */
	putcode(ent);

	/*
	 * At EOF, write the rest of the buffer.
	 */
	if (bitoffset > 0)
		fwrite(bitbuf, 1, (bitoffset + 7) / 8, stdout);
	out_count += (bitoffset + 7) / 8;
	fflush(stdout);
	if (ferror(stdout))
		writeerr();

	/*
	 * Print out stats on stderr
	 */
	if (! zcat_flg && verbose) {
		fprintf(stderr, "Compression: ");
		prratio(in_count - out_count, in_count);
	}
	if (out_count > in_count)	/* exit(2) if no savings */
		exit_stat = 2;
}

/*
 * Output the given code. Assumes that chars are 8 bits.
 * "n_bits" output bytes (containing 8 codes) are assembled
 * in in "bitbuf", and then written out.
 */
#ifdef PROTO
void putcode(code_t code)
#else
void putcode(code)
code_t code;
#endif
{
	register int i;
	register uchar *bp;

	bp = &bitbuf[(bitoffset >> 3)];
	i = bitoffset & 7;
	bp[0] |= (uchar)(code << i);
	bp[1] = (uchar)(code >>= (8 - i));
	bp[2] = (uchar)(code >> 8);

	if ((bitoffset += n_bits) == n_bits8) {
		bp = bitbuf;
		i = n_bits;
		out_count += i;
		do
			putchar(*bp++);
		while (--i);
		bitbuf[bitoffset = 0] = 0;
	}
}

#ifdef CHECK_GAP
/*
 * Compute the current compression ratio, and return non-zero if
 * it is has decreased since the last we checked.
 *
 * Don't use this anymore. Whenever the hash table fills,
 * we send a CLEAR immediately (if block_compress). This is faster,
 * and doesn't appear to affect the compression ratio much.
 */
int need_clear()
{
	long rat;

	if (in_count > 0x007fffffL) {		/* shift will overflow */
		rat = out_count >> 8;
		if (rat == 0)	     		/* Don't divide by zero */
			rat = 0x7fffffffL;
		else
			rat = in_count / rat;
	} else
		rat = (in_count << 8) / out_count;

	if (rat > ratio) {
		ratio = rat;
		return (0);
	}
	else {
		ratio = 0;
		return (1);
	}
}
#endif

/*
 * Decompress stdin to stdout. This code assumes that chars are 8 bits.
 */
void decompress()
{
	register uchar	*stackp;
	register code_t	code;
	code_t		oldcode, incode;
	code_t		codemask;
	code_t		freecode;		/* first unused entry */
	code_t		maxcode;		/* maximum code, given n_bits */
	code_t		maxmaxcode;
	int		finchar;
	int		size;			/* #bits in bitbuf */
	int		bitoff;			/* Offset into bitbuf */
	int		n_bits;			/* number of bits/code */
#ifndef i8088
	register uchar	*bp;
#endif

	n_bits = INIT_BITS;
	maxcode = MAXCODE(INIT_BITS) - 1;
	codemask = MAXCODE(INIT_BITS);
	freecode = ((block_compress) ? FIRST : 256) - 1;
	maxmaxcode = MAXCODE(maxbits);

	/*
	 * Read the first code into "oldcode"
	 */
	if ((size = fread(bitbuf, 1, n_bits, stdin)) <= 0)
		return;
	size = (size << 3) - (n_bits - 1);
	oldcode = (bitbuf[0] | (bitbuf[1] << 8)) & codemask;
	bitoff = n_bits;

	/*
	 * First code must be 8 bits == char. Write it, and die
	 * if it can't be written.
	 */
	putchar(finchar = oldcode);
	if (ferror(stdout))
		writeerr();

	stackp = de_stack;

	for ( ; ; ) {
		if (bitoff >= size) {
			if ((size = fread(bitbuf, 1, n_bits, stdin)) <= 0)
				break;
			/* Round size down to integral number of codes */
			size = (size << 3) - (n_bits - 1);
			bitoff = 0;
		}
		/*
		 * Read the next code into "code". On the 8088,
		 * a slight speedup is possible because it has the right byte
		 * order, and no alignment restrictions.
		 */
#ifdef i8088
		code = ((code_t)(*(long *)&bitbuf[(bitoff >> 3)] >>
			 (bitoff&7))) & codemask;
#else
		bp = &bitbuf[(bitoff >> 3)];
		code = (code_t)(((bp[0] | (code_t)bp[1] << 8) |
		     (ulong)bp[2] << 16) >> (bitoff & 7)) & codemask;
#endif
		bitoff += n_bits;

		if ((code == CLEAR) && block_compress) {
			n_bits = INIT_BITS;
    			maxcode = MAXCODE(INIT_BITS) - 1;
			codemask = MAXCODE(INIT_BITS);
			freecode = (FIRST - 1) - 1;
			size = 0;
			continue;
		}
		incode = code;

		/*
		 * Special case for KwKwK string.
		 */
		if (code > freecode) {
			if (code != freecode + 1)
				oops();
        		*stackp++ = (uchar)finchar;
			code = oldcode;
		}

		/*
		 * Generate output characters in reverse order
		 */
		while (code >= 256) {
			*stackp++ = de_suffixof(code);
			code = de_prefixof(code);
		}

		/*
		 * And write them out in the forward order.
		 */
		putchar(finchar = code);
		for (code = (stackp - de_stack) + 1; --code != 0; )
			putchar(*--stackp);

		/*
		 * Generate the new entry.
		 */
		if (freecode < maxmaxcode) {
			if (++freecode > maxcode) {
				if (++n_bits == maxbits)
					maxcode = maxmaxcode;
				else
					maxcode = MAXCODE(n_bits) - 1;
				size = 0;
				codemask = MAXCODE(n_bits);
			}
			de_prefixof(freecode) = oldcode;
			de_suffixof(freecode) = (uchar)finchar;
		} 
		/*
		 * Remember previous code.
		 */
		oldcode = incode;
	}
	fflush(stdout);
	if (ferror(stdout))
		writeerr();
}

/*
 * Check a compressed file to make sure it has the proper magic number
 * at the beginning. Also read the third byte to determine "maxbits",
 * and "block_compress".
 */
int check_magic()
{
	if (! magic)
		return (1);
	if ((getchar() != MAGIC0) || (getchar() != MAGIC1)) {
		fprintf(stderr, "%s: not in compressed format\n", ifname);
		return (0);
	}
	maxbits = getchar();	/* set -b from file */
	block_compress = maxbits & BLOCK_MASK;
	maxbits &= BIT_MASK;
	if (maxbits > BITS) {
		fprintf(stderr,
		   "%s: compressed with %d bits, can only handle %d bits\n",
		    ifname, maxbits, BITS);
		return (0);
	}
	return (1);
}

void writeerr()
{
	perror(ofname);
	fclose(stdout);
	unlink(ofname);
	exit(1);
}

/*
 * Copy the permissions and file times from the input file to the
 * output.
 */
void copystat()
{
	struct stat statbuf;
	int mode;
	void (* ss)();
#ifndef __TURBOC__
	time_t timep[2];
#else
	struct ftime filetime;
	int fd;
#endif

	fclose(stdout);
	if (stat(ifname, &statbuf)) {		/* Get stat on input file */
		perror(ifname);
		return;
	}
	if ((statbuf.st_mode & S_IFMT) != S_IFREG) {
		if (! verbose)
		    	fprintf(stderr, "%s: ", ifname);
		fprintf(stderr, " -- not a regular file: unchanged");
		exit_stat = 1;
	}
	else if (statbuf.st_nlink > 1) {
		if (! verbose)
			fprintf(stderr, "%s: ", ifname);
		fprintf(stderr, " -- has %d other links: unchanged",
			statbuf.st_nlink - 1);
		exit_stat = 1;
	}
	else if (exit_stat == 2 && !force) { /* No compression: remove file.Z */
		if (verbose)
			fprintf(stderr, " -- file unchanged");
	}
	else {			/* ***** Successful Compression ***** */
		exit_stat = 0;
		mode = statbuf.st_mode & 07777;
#ifndef __ZTC__
		if (chmod(ofname, mode))		/* Copy modes */
			perror(ofname);
#endif
#ifndef MSDOS
#ifndef OS2
		chown(ofname, statbuf.st_uid, statbuf.st_gid);	/* Copy ownership */
#endif
#endif
#ifndef __TURBOC__
		timep[0] = statbuf.st_atime;
		timep[1] = statbuf.st_mtime;
		utime(ofname, timep);
#else
		if ((fd = open(ofname, O_RDONLY)) >= 0) {
			if (getftime(fileno(stdin), &filetime) == 0)
				setftime(fd, &filetime);
			close(fd);
		}
#endif
		fclose(stdin);
		ss = signal(SIGINT, SIG_IGN);
		okunlink = 0;
		/* ^C here would leave both input, and output files around */
		if (unlink(ifname))	/* Remove input file */
			perror(ifname);
		signal(SIGINT, ss);
		if (verbose)
			fprintf(stderr, " -- replaced with %s", ofname);
		return;		/* Successful return */
	}

	/* Unsuccessful return -- one of the tests failed */

	if (unlink(ofname))
		perror(ofname);
}

void onintr()
{
	fclose(stdout);
	if (okunlink)
		unlink(ofname);
	exit(1);
}

void oops()	/* wild pointer -- assume bad input */
{
	if (do_decomp) 
		fprintf (stderr, "uncompress: %s is corrupt.\n", ifname);
	fclose(stdout);
	if (okunlink)
		unlink(ofname);
	exit(1);
}

void prratio(num, den)
long int num, den;
{
	register int q;				/* Doesn't need to be long */

	if (num > 214748L)			/* 2147483647/10000 */
		q = (int)(num / (den / 10000L));
	else
		q = (int)(10000L * num / den);	/* Long calculations, though */
	if (q < 0) {
		putc('-', stderr);
		q = -q;
	}
	fprintf(stderr, "%d.%02d%%", q / 100, q % 100);
}

void version()
{
	fprintf(stderr, "%s\n", rcs_ident);
	fprintf(stderr, "BITS = %d\n", BITS);
}

/*
 * Open the file "ofname" for binary output with possible check
 * for overwrite. If all goes well, return non-zero, else zero.
 */
int ofopen(filename)
char *filename;
{
	static char IOoutbuf[8192];
	struct stat statbuf;

	if (filename && !*filename)
		filename = 0;

	/*
	 * Check for overwrite of existing file
	 */
	if (filename && !force && stat(filename, &statbuf) == 0) {
		char response[2];
		response[0] = 'n';
		fprintf(stderr, "%s already exists;", filename);
		if (foreground) {
			fprintf(stderr, " do you wish to overwrite %s (y or n)? ", filename);
			fflush(stderr);
			read(2, response, 2);
			while (response[1] != '\n') {
				if (read(2, response+1, 1) < 0)	{ /* Ack! */
					perror("stderr");
					break;
				}
			}
		}
		if (response[0] != 'y') {
			fprintf(stderr, "\tnot overwritten\n");
			return (0);
		}
	}

	okunlink = 1;
	/*
	 * Open the output file.
	 */
#ifdef __IBMC__
	if (filename) {
		if ((stdout = fopen(filename, "wb")) == NULL) {
			perror(filename);
			return (0);
		}
	} else {
		if ((stdout = fdopen(1, "wb")) == NULL) {
			perror("stdout");
			return (0);
		}
	}
#else
	if (filename && !freopen(filename, "wb", stdout)) {
		perror(filename);
		return (0);
	}
# ifdef __ZTC__
	/*
	 * I'm sure there must be a better way in Zortech C to change the
	 * mode of an already opened file, but I can't find it. It doesn't
	 * have a "setmode" call it seems.
	 */
	stdout->_flag &= ~_IOTRAN;
# else
#  ifdef O_BINARY
	setmode(fileno(stdout), O_BINARY);
#  endif
# endif
#endif
	setvbuf(stdout, IOoutbuf, _IOFBF, sizeof(IOoutbuf));
	return (1);
}

ifopen(filename)
char *filename;
{
	static char IOinbuf[8192];

#ifdef __IBMC__
	if (filename) {
		if ((stdin = fopen(filename, "rb")) == NULL) {
			perror(filename);
			return (0);
		}
	} else {
		if ((stdin = fdopen(0, "rb")) == NULL) {
			perror("stdin");
			return (0);
		}
	}
#else
	if (filename && !freopen(filename, "rb", stdin)) {
		perror(filename);
		return (0);
	}
# ifdef __ZTC__
	stdin->_flag &= ~_IOTRAN;
# else
#  ifdef O_BINARY
	setmode(fileno(stdin), O_BINARY);
#  endif
# endif
#endif
	setvbuf(stdin, IOinbuf, _IOFBF, sizeof(IOinbuf));
	return (1);
}
