/*
 * $Header: /usr/people/tcl/src/uutar/RCS/decode.c,v 1.1.1.3 1993/09/11 18:42:17 tcl Exp $
 * Tom Lawrence
 * tcl@sgi.com
 */

#include <stdio.h>
#include <fcntl.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include <ctype.h>
#include "codes.h"

/*
 * given a string with n tokens separated by white space in it, and a
 * pointer to a char vector, create a vector with each pointer pointing
 * to a successive token and null terminate the tokens.  Return the
 * number of tokens or -1 on error.  This routine is destructive to the
 * passed string
 */
#define IS_WHITE_SPACE(c) (c == ' ' || c == '\t')
#define VECLEN 10

FILE *infp, *outfp;

static int
tokenize(string, vector)
    char *string;
    char **vector;
{
    int tokens;
    enum {
	WHITE_SPACE,
	TOKEN
    } state;
    char *c;

    /* scan through the string setting up the vector pointers and
     * null terminating the tokens
     */
    tokens = 0;
    state = WHITE_SPACE;

    for(c = string; *c; c++) {
	if (state == WHITE_SPACE && !IS_WHITE_SPACE(*c)) {
	    /* just hit beginning of a token */
	    vector[tokens] = c;
	    tokens++;
	    state = TOKEN;

	    if (tokens >= VECLEN) {
		fprintf(stderr, "too many tokens in input\n");
		exit(1);
	    }
	}
	else if (state == TOKEN && IS_WHITE_SPACE(*c)) {
	    /* just ended a token */
	    *c = 0;
	    state = WHITE_SPACE;
	}
    }
    return(tokens);
}

/* normally I'd use strtol for this, but strtol can't handle
 * unsigned values greater than 0x7FFFFFFF on some machines.
 */
static unsigned int
hex2long(str)
    char *str;
{
    unsigned long ret = 0;
    char *c, c1;

    for(c = str; *c; c++) {
	c1 = *c;
	if (c1 >= '0' && c1 <= '9')
	    c1 -= '0';
	else if (c1 >= 'a' && c1 <= 'f')
	    c1 -= ('a' - 10);
	else if (c1 >= 'A' && c1 <= 'F')
	    c1 -= ('A' - 10);
	ret = (ret << 4) + c1;
    }
    return(ret);
}

static void
usage()
{
    printf("options:\n");
    printf("-i <inputfile>\n");
    printf("-o <outputfile>\n");
    exit(1);
}

/* parse command line arguments */
static void
parse(argc, argv)
    int argc;
    char **argv;
{
    char *infile, *outfile;

    infile = outfile = 0;

    while(--argc) {
	argv++;
	if (!strcmp(*argv, "-i")) {
	    if (argc < 2)
		usage();
	    argc--;
	    argv++;
	    infile = *argv;
	}
	else if (!strcmp(*argv, "-o")) {
	    if (argc < 2)
		usage();
	    argc--;
	    argv++;
	    outfile = *argv;
	}
	else
	    usage();
    }

    /* open input stream */
    if (infile) {
	if ((infp = fopen(infile, "r")) == 0) {
	    perror(infile);
	    exit(1);
	}
    }
    else
	infp = stdin;

    /* open output stream or leave it for later if no output file
     * was specified 
     */
    if (outfile) {
	if ((outfp = fopen(outfile, "w")) == 0) {
	    perror(outfile);
	    exit(1);
	}
    }
    else
	outfp = 0;
}

main(argc, argv)
    int argc;
    char **argv;
{
    char buffer[1024], *tokens[VECLEN], *c, out;
    int state, numtokens, outfd, buf_offset, lookforend;
    unsigned int cksum;
    unsigned short buf;

    /* parse command line arguments */
    parse(argc, argv);

    state = 0;

    /* clear the output buffer */
    buf = 0;
    buf_offset = 16;

    cksum = 0;
    lookforend = 0;

    /* scan the input file */
    while(fgets(buffer, sizeof(buffer), infp)) {
	/* remove any newlines */
	if (c = index(buffer, '\n'))
	    *c = 0;

	/* if this line is blank, check for and END keyword
	 * on the next line
	 */
	if (*buffer == 0) {
	    lookforend = 1;
	    continue;
	}

	/* state 0 == haven't seen BEGIN yet */
	if (state == 0) {
	    if (!strncmp(buffer, "BEGIN ", 6)) {
		state = 1;
		numtokens = tokenize(buffer, tokens);
		if (numtokens < 4) {
		    fprintf(stderr, "incomplete BEGIN line in encoded file\n");
		    exit(1);
		}

		/* if output file wasn't specified on command line, use the
		 * one encoded in the input file
		 */
		if (outfp == 0) {
		    /* use open() so we can specify the mode */
		    if ((outfd = open(tokens[2], O_WRONLY | O_CREAT | O_TRUNC, 
				      strtol(tokens[1], 0, 8))) < 0) {
			perror(tokens[2]);
			exit(1);
		    }
		    outfp = fdopen(outfd, "w");
		}
		/* parse the character set and initialize the
		 * codes accordingly
		 */
		parse_charval_list(tokens[3]);
		init_codes(DECODE);
	    }
	}

	/* state != 0 and we're looking for the END token */
	else if (lookforend && !strncmp(buffer, "END ", 4)) {
	    numtokens = tokenize(buffer, tokens);

	    /* issue checksum error if there's a mismatch */
	    if (numtokens < 2 || hex2long(tokens[1]) != cksum) {
		fprintf(stderr, "checksum error.\n");
		fprintf(stderr, "saw %X, computed %X\n",
			hex2long(tokens[1]), cksum);
		exit(1);
	    }
	    exit(0);
	}

	/* state != 0 so this is a data line. Decode it */
	else {
	    for(c = buffer; *c; c++) {

		/* check for garbage characters in the input */
		if (!codes[*c].inuse) {
		    fprintf(stderr, "invalid char ");
		    if (isprint(*c))
			fprintf(stderr, "\'%c\' ", *c);
		    fprintf(stderr, "(%d) in input", *c);
		    exit(1);
		}

		/* append the variable length bitfield that maps to
		 * this input character to the output bitstream
		 */
		buf_offset -= codes[*c].len;
		buf |= (((unsigned short)(codes[*c].code) << buf_offset));

		/* if we've got an entire byte available in the output
		 * buffer, append it to the output file
		 */
		if (buf_offset < 9) {
		    out = (char)(buf >> 8);
		    putc(out, outfp);
		    cksum = ((cksum << 7) | (cksum >> 25)) ^
			(unsigned char)out;

		    /* advance the output buffer */
		    buf_offset += 8;
		    buf <<= 8;
		}
	    }
	}
	lookforend = 0;
    }
}
