/*
 * $Header: /usr/people/tcl/src/uutar/RCS/codes.c,v 1.1.1.4 1993/09/11 22:42:44 tcl Exp $
 * Tom Lawrence
 * tcl@sgi.com
 */

#include <stdio.h>
#include <stdlib.h>
#include "codes.h"

/* see codes.h */
int numchars;
struct code codes[256];

/* initialize a subset of the codes array. val and len define a variable
 * length bitfield. The array elements will be set up so that any element
 * of the array whose index is a left-aligned superset of this bitfield
 * will contain the given output ascii character. E.g. if the bitfield is
 * 10010, then all elements in the array with subscript 10010xxx for all
 * xxx, will store the given ascii code, and the length of the bitfield,
 * in this case 5.
 */
static void
init_encodeval(codes, val, len, ascii)
    struct code *codes;
    int val;
    int len;
    int ascii;
{
    int shift, stop;

    /* determine how far the code must be shifted to be 
     * MSB justified in the byte
     */
    shift = 8 - len;

    /* calculate the upper bound of indices which this bitfield
     * will match
     */
    stop = (val + 1) << shift;

    /* shift the code over to the left edge of the byte */
    val <<= shift;

    /* thus, for every index in the 256 element array which has
     * this code as a prefix
     */
    for(; val < stop; val++) {
	/* store the code length and the printable character 
	 * it represents
	 */
	codes[val].len = (char)len;
	codes[val].code = (char)ascii;
    }
}

/* convert an ascii character code to an integer. The character code may
 * be in decimal, hex or octal, or it may be an actual character escaped
 * with a back-slash
 */
static int
str2val(str)
    char *str;
{
    int val;
    char *end;

    while(*str == ' ' || *str == '\t')
	str++;

    /* check if this is an escaped character */
    if (*str == '\\') {
	str++;
	if (*str == 0) {
	    fprintf(stderr, "missing character in alphabet\n");
	    exit(1);
	}
	return((int)*str);
    }

    val = (int)strtol(str, &end, 0);
    if (end == str) {
	if (*str)
	    fprintf(stderr, "invalid char \'%c\' in alphabet\n", *str);
	else
	    fprintf(stderr, "empty numerical field in alphabet\n");
	exit(1);
    }
    return(val);
}

/* parse a range of characters for the output character set and mark each
 * character as in use in the codes array. A range is either in the form
 * num-num or just num
 */
static void
parse_charval_range(range)
    char *range;
{
    char *c, savec = 0;
    int start, end, x;

    for(c = range; *c && *c != '-'; c++);
    savec = *c;
    *c = 0;

    start = str2val(range);
    if (savec == '-') {
	end = str2val(c + 1);
	*c = savec;
	for(x = start; x <= end; x++)
	    codes[x].inuse = 1;
    }
    else
	codes[start].inuse = 1;
}

/* parse a list of character ranges for the output character set and then
 * parse each range found. A list is of the form range,range,...
 */
void
parse_charval_list(list)
    char *list;
{
    char *c1, *c2, savec2;
    int x;

    for(x = 0; x < 256; x++)
	codes[x].inuse = 0;

    c1 = list;
    while(*c1) {
	while(*c1 == ',')
	    c1++;
	if (*c1 == 0)
	    return;
	for(c2 = c1; *c2 && *c2 != ','; c2++);
	savec2 = *c2;
	*c2 = 0;
	parse_charval_range(c1);
	*c2 = savec2;
	c1 = c2;
    }
    return;
}

/* print out the character set in the form of a list of ranges, encoded
 * in decimal
 */
void
print_charval_list(fp)
    FILE *fp;
{
    int x, usecomma;

    usecomma = 0;
    for(x = 0; x < 256; x++) {
	if (codes[x].inuse) {
	    if (usecomma)
		putc(',', fp);
	    fprintf(fp, "%d", x);
	    usecomma = 1;
	    if (x < 255 && codes[x+1].inuse) {
		putc('-', fp);
		while(++x < 256 && codes[x].inuse);
		fprintf(fp, "%d", x-1);
	    }
	}
    }
}

/*
 * Initialize the tables for encoding or decoding depending on the given
 * direction. 
 */
void
init_codes(direction)
    int direction;
{
    int x, code, numchars;
    int pof2, pof2len, half, whole;

    /* count how big our character set is */
    numchars = 0;
    for(x = 0; x < 256; x++)
	if (codes[x].inuse)
	    numchars++;

    if (numchars < 2) {
	fprintf(stderr,
		"uutar: alphabet doesn't contain enough characters.\n");
	exit(1);
    }

    /* determine the lowest power of 2 that is >= numchars, and the number
     * of bits needed to store that many values.
     */
    for(pof2 = 2, pof2len = 1; pof2 < numchars;
	pof2 <<= 1, pof2len++);

    /* compute how many half codes we need */
    half = pof2 - numchars;

    /* compute how many whole codes we need */
    whole = numchars - half;

    /* create a variable length code for each valid entry */
    code = 0;
    x = -1;

    /* create the whole codes */
    while(whole--) {
	/* get next slot */
	do x++; while(codes[x].inuse == 0);

	if (direction == DECODE) {
	    codes[x].code = (char)code;
	    codes[x].len = (char)pof2len;
	}
	else
	    init_encodeval(codes, code, pof2len, x);
	code++;
    }
    
    /* chop off LSB to form the half codes */
    code >>= 1;
    pof2len--;
    
    /* create the half codes */
    while(half--) {
	do x++; while(codes[x].inuse == 0);

	if (direction == DECODE) {
	    codes[x].code = (char)code;
	    codes[x].len = (char)pof2len;
	}
	else
	    init_encodeval(codes, code, pof2len, x);
	code++;
    }
}
