
/*
 *  COMPRESS.C
 *
 */

#include "defs.h"

#define ngetchar()  oreadchar()
#define nputchar(n) mputc(n)

#ifndef min
#define min(a,b)        ((a>b) ? b : a)
#endif

#define BITS		13

#ifdef _DCC
#define HSIZE  9001
#else

#if BITS == 16
#define HSIZE  69001	       /* 95% occupancy */
#endif
#if BITS == 15
#define HSIZE  35023	       /* 94% occupancy */
#endif
#if BITS == 14
#define HSIZE  18013	       /* 91% occupancy */
#endif
#if BITS == 13
#define HSIZE  9001	       /* 91% occupancy */
#endif
#if BITS <= 12
#define HSIZE  5003	       /* 80% occupancy */
#endif

#endif

typedef long		code_int;
typedef long		count_int;
typedef unsigned char	char_type;

#define MAXCODE(n_bits)  ((1 << (n_bits)) - 1)
#define INIT_BITS 9			/* initial number of bits/code */

int n_bits;				/* number of bits/code		    */
int maxbits;				/* user settable max # bits/code    */
code_int maxcode;			/* maximum code, given n_bits	    */
code_int maxmaxcode;			/* should NEVER generate this code  */

__far count_int   htab[HSIZE];
__far uword	  codetab[HSIZE];

#define htabof(i)       htab[i]
#define codetabof(i)    codetab[i]

code_int hsize = HSIZE; 		/* for dynamic table sizing */

#define tab_prefixof(i)     codetabof(i)
#define tab_suffixof(i)     ((char_type *)(htab))[i]
#define de_stack	    ((char_type *)&tab_suffixof(1<<BITS))

code_int free_ent;			/* first unused entry */
code_int getcode();
void output (code_int);
void cl_block(void);
void cl_hash(count_int);

#define CHECK_GAP 10000 /* ratio check interval */

int	block_compress = 1;
int	clear_flg;
long	ratio;
count_int checkpoint;

/*
 * the next two codes should not be changed lightly, as they must not
 * lie within the contiguous general code space.
 */

#define FIRST	257	/* first free entry */
#define CLEAR	256	/* table clear output code */

static int offset;
long int in_count = 1;			/* length of input */

/*
 *  Compress a file to memory-buffers and return TRUE if the compressed
 *  size is smaller than the actual size.
 */

long
CompressFile(name, fsize)
char *name;
long fsize;
{
    long fcode;
    code_int i = 0;
    int c;
    code_int ent;
    int disp;
    code_int hsize_reg;
    int hshift;

    {
	register NODE *node;
	for (node = (NODE *)CSList.mlh_Head; node->ln_Succ; node = node->ln_Succ) {
	    if (WildCmp(node->ln_Name, name)) {
		if (Verbose)
		    printf("  Will not compress %s\n", name);
		return(0);
	    }
	}
    }

    if (WildCmp("*.Z", name) || WildCmp("*.ARC", name) || WildCmp("*.ZOO", name)) {
	if (Verbose)
	    printf("  Will not compress %s\n", name);
	return(0);
    }

    CLen = 0;
    CWrite = GetHead(&CList);
    if (CWrite == NULL)
	CWrite = NewSComp();
    CWrite->N = 0;

    setmem(htab, sizeof(htab), 0);
    setmem(codetab, sizeof(codetab), 0);

    hsize = HSIZE;
    if ( fsize < (1 << 12) )
	hsize = min ( 5003, HSIZE );
    else if ( fsize < (1 << 13) )
	hsize = min ( 9001, HSIZE );
    else if ( fsize < (1 << 14) )
	hsize = min ( 18013, HSIZE );
    else if ( fsize < (1 << 15) )
	hsize = min ( 35023, HSIZE );
    else if ( fsize < 47000 )
	hsize = min ( 50021, HSIZE );

    offset = clear_flg = ratio = 0;
    in_count = 1;
    checkpoint = CHECK_GAP;
    n_bits  = INIT_BITS;		/* number of bits/code		    */
    maxbits = BITS;			/* user settable max # bits/code    */
    maxcode = MAXCODE(INIT_BITS);       /* maximum code, given n_bits       */
    maxmaxcode = 1 << BITS;		/* should NEVER generate this code  */
    free_ent = ((block_compress) ? FIRST : 256 );

    ent = ngetchar();

    hshift = 0;
    for ( fcode = (long) hsize;  fcode < 65536L; fcode *= 2L )
	hshift++;
    hshift = 8 - hshift;		/* set hash code range bound */

    hsize_reg = hsize;
    cl_hash((count_int)hsize_reg);      /* clear hash table */

    /*printf("hshift %d hsr %d,%d\n", hshift, hsize_reg, hsize);*/

    while ((c = ngetchar()) != EOF) {
	in_count++;
	fcode = (long) (((long) c << maxbits) + ent);
	i = ((c << hshift) ^ ent);      /* xor hashing */

	/*printf("c %02x fcode %08lx i %d ent %08lx\n", c, fcode, i, ent);*/

	if (i >= hsize_reg) {
	    /*printf("compress error A1 : %d on $%02x ent %lx\n", i, c, ent);*/
	    if (i >= HSIZE) {
		puts("XXX1");
		continue;
	    }
	}

	if (htabof (i) == fcode) {
	    ent = codetabof(i);
	    continue;
	} else if ((long)htabof (i) < 0)    /* empty slot */
	    goto nomatch;
	disp = hsize_reg - i;		/* secondary hash (after G. Knott) */
	if (i == 0)
	    disp = 1;
probe:
	if ((i -= disp) < 0)
	    i += hsize_reg;

	if (i >= hsize_reg || i < 0) {
	    /*printf("compress error A2 : %d on $%02x\n", i, c);*/
	    if (i >= HSIZE) {
		puts("XXX2");
		continue;
	    }
	}


	if (htabof (i) == fcode) {
	    ent = codetabof(i);
	    continue;
	}
	if ((long)htabof (i) > 0)
	    goto probe;
nomatch:
	output ((code_int) ent);
	ent = c;
	if (free_ent < maxmaxcode) {
	    codetabof(i) = free_ent++; /* code -> hashtable */
	    htabof(i) = fcode;
	}
	else if ((count_int)in_count >= checkpoint && block_compress)
	    cl_block ();
    }

    /*
     * Put out the final code.
     */

    output((code_int)ent);
    output((code_int)-1);

    return(CLen < fsize);
}

static char buf[BITS];

char_type lmask[9] = {0xff, 0xfe, 0xfc, 0xf8, 0xf0, 0xe0, 0xc0, 0x80, 0x00};
char_type rmask[9] = {0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff};

void
output( code )
code_int  code;
{
    register int r_off = offset, bits= n_bits;
    register char * bp = buf;

    if ( code >= 0 ) {
	/*
	 * Get to the first byte.
	 */
	bp += (r_off >> 3);
	r_off &= 7;
	/*
	 * Since code is always >= 8 bits, only need to mask the first
	 * hunk on the left.
	 */
	*bp = (*bp & rmask[r_off]) | (code << r_off) & lmask[r_off];
	bp++;
	bits -= (8 - r_off);
	code >>= 8 - r_off;
	/* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
	if ( bits >= 8 ) {
	    *bp++ = code;
	    code >>= 8;
	    bits -= 8;
	}
	/* Last bits. */
	if(bits)
	    *bp = code;

	offset += n_bits;
	if (offset == (n_bits << 3)) {
	    bp = buf;
	    bits = n_bits;
	    mwrite(bp, bits);
	    bp += bits;
	    bits = 0;
	    offset = 0;
	}

	/*
	 * If the next entry is going to be too big for the code size,
	 * then increase it, if possible.
	 */

	if (free_ent > maxcode || (clear_flg > 0)) {
	    /*
	     * Write the whole buffer, because the input side won't
	     * discover the size increase until after it has read it.
	     */
	    if (offset > 0)
		mwrite(buf, n_bits);
	    offset = 0;

	    if (clear_flg) {
		n_bits = INIT_BITS;
		maxcode = MAXCODE(INIT_BITS);
		clear_flg = 0;
	    } else {
		n_bits++;
		if (n_bits == maxbits)
		    maxcode = maxmaxcode;
		else
		    maxcode = MAXCODE(n_bits);
	    }
	}
    } else {
	/*
	 * At EOF, write the rest of the buffer.
	 */
	if (offset > 0)
	    mwrite(buf, (offset + 7) / 8);
	offset = 0;
    }
}


char *
xrindex(s, c)            /* For those who don't have it in libc.a */
register char *s, c;
{
    char *p;
    for (p = NULL; *s; s++) {
	if (*s == c)
	    p = s;
    }
    return(p);
}

void
cl_block()             /* table clear for block compress */
{
    register long int rat;

    checkpoint = in_count + CHECK_GAP;

    if (in_count > 0x007fffff) { /* shift will overflow */
	rat = CLen >> 8;
	if (rat == 0) {          /* Don't divide by zero */
	    rat = 0x7fffffff;
	} else {
	    rat = in_count / rat;
	}
    } else {
	rat = (in_count << 8) / CLen;      /* 8 fractional bits */
    }
    if (rat > ratio) {
	ratio = rat;
    } else {
	ratio = 0;
	cl_hash ( (count_int) hsize );
	free_ent = FIRST;
	clear_flg = 1;
	output ( (code_int) CLEAR );
    }
}

void
cl_hash(hsize)          /* reset code table */
	register count_int hsize;
{
	register count_int *htab_p = htab+hsize;
	register long i;
	register long m1 = -1;

	i = hsize - 16;
	do {				/* might use Sys V memset(3) here */
		*(htab_p-16) = m1;
		*(htab_p-15) = m1;
		*(htab_p-14) = m1;
		*(htab_p-13) = m1;
		*(htab_p-12) = m1;
		*(htab_p-11) = m1;
		*(htab_p-10) = m1;
		*(htab_p-9) = m1;
		*(htab_p-8) = m1;
		*(htab_p-7) = m1;
		*(htab_p-6) = m1;
		*(htab_p-5) = m1;
		*(htab_p-4) = m1;
		*(htab_p-3) = m1;
		*(htab_p-2) = m1;
		*(htab_p-1) = m1;
		htab_p -= 16;
	} while ((i -= 16) >= 0);
	for ( i += 16; i > 0; i-- )
		*--htab_p = m1;
}

void
UnCompressFile(insize)
long insize;
{
    register char_type *stackp;
    register int finchar;
    register code_int code, oldcode, incode;

    /*
     * As above, initialize the first 256 entries in the table.
     */

    setmem(htab, sizeof(htab), 0);
    setmem(codetab, sizeof(codetab), 0);

    offset = clear_flg = ratio = 0;
    in_count = 1;
    checkpoint = CHECK_GAP;
    n_bits  = INIT_BITS;		/* number of bits/code		    */
    maxbits = BITS;			/* user settable max # bits/code    */
    maxcode = MAXCODE(INIT_BITS);       /* maximum code, given n_bits       */
    maxmaxcode = 1 << BITS;		/* should NEVER generate this code  */

    for ( code = 255; code >= 0; code-- ) {
	tab_prefixof(code) = 0;
	tab_suffixof(code) = (char_type)code;
    }
    free_ent = ((block_compress) ? FIRST : 256 );

    finchar = oldcode = getcode();
    if (oldcode == -1)          /* EOF already? */
	return; 		/* Get out of here */
    oputc((char)finchar);       /* first code must be 8 bits = char */
    stackp = de_stack;

    while ((code = getcode()) > -1) {
	if ((code == CLEAR) && block_compress) {
	    for (code = 255; code >= 0; code--)
		tab_prefixof(code) = 0;
	    clear_flg = 1;
	    free_ent = FIRST - 1;
	    if ((code = getcode()) == -1)   /* O, untimely death! */
		break;
	}
	incode = code;
	/*
	 * Special case for KwKwK string.
	 */
	if (code >= free_ent) {
	    *stackp++ = finchar;
	    code = oldcode;
	}

	/*
	 * Generate output characters in reverse order
	 */
	while ( code >= 256 ) {
	    *stackp++ = tab_suffixof(code);
	    code = tab_prefixof(code);
	}
	*stackp++ = finchar = tab_suffixof(code);

	/*
	 * And put them out in forward order
	 */
	do
	    oputc (*--stackp);
	while (stackp > de_stack);

	/*
	 * Generate the new entry.
	 */
	if ((code=free_ent) < maxmaxcode) {
	    tab_prefixof(code) = (unsigned short)oldcode;
	    tab_suffixof(code) = finchar;
	    free_ent = code+1;
	}
	/*
	 * Remember previous code.
	 */
	oldcode = incode;
    }
}

code_int
getcode()
{
    /*
     * On the VAX, it is important to have the register declarations
     * in exactly the order given, or the asm will break.
     */

    register code_int code;
    static int offset = 0, size = 0;
    static char_type buf[BITS];
    register int r_off, bits;
    register char_type *bp = buf;

    if (clear_flg > 0 || offset >= size || free_ent > maxcode) {
	/*
	 * If the next entry will be too big for the current code
	 * size, then we must increase the size.  This implies reading
	 * a new buffer full, too.
	 */
	if ( free_ent > maxcode ) {
	    n_bits++;
	    if ( n_bits == maxbits )
		maxcode = maxmaxcode;	/* won't get any bigger now */
	    else
		maxcode = MAXCODE(n_bits);
	}
	if ( clear_flg > 0) {
	    maxcode = MAXCODE (n_bits = INIT_BITS);
	    clear_flg = 0;
	}
	size = oread(buf, n_bits);
	if (size <= 0)
	    return -1;			/* end of file */
	offset = 0;
	size = (size << 3) - (n_bits - 1);
    }
    r_off = offset;
    bits = n_bits;

	/*
	 * Get to the first byte.
	 */
	bp += (r_off >> 3);
	r_off &= 7;
	/* Get first part (low order bits) */

	code = (*bp++ >> r_off);

	bits -= (8 - r_off);
	r_off = 8 - r_off;		/* now, offset into code word */
	/* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
	if ( bits >= 8 ) {
	    code |= *bp++ << r_off;
	    r_off += 8;
	    bits -= 8;
	}
	/* high order bits. */
	code |= (*bp & rmask[bits]) << r_off;

    offset += n_bits;

    return code;
}
