/*
 * $Header: arcpack.c,v 1.9 88/06/02 16:27:36 hyc Locked $
 */

/*  ARC - Archive utility - ARCPACK

    Version 3.49, created on 04/21/87 at 11:26:51

(C) COPYRIGHT 1985-87 by System Enhancement Associates; ALL RIGHTS RESERVED

    By:  Thom Henderson

    Description:
     This file contains the routines used to compress a file
     when placing it in an archive.

    Language:
     Computer Innovations Optimizing C86

*/

/*
 *                        ARC 5.21A modifications
 *
 * The problem:
 *
 *     ARC 5.21 seemingly at random will create an archive that is
 * larger than the sum of its stored files. This is most noticeable on
 * large files (for good reason, as will be shown!)
 * 
 * The reason:
 *
 *     Unless "no compaction" is specified when adding files to an
 * archive, ARC 5.21 automatically assumes that "crunching" (or if
 * specified, "squashing") will be the most efficient method of storing
 * the file. Therefore, as ARC analyzes the file, it appends the file to
 * the archive in the "crunched" or "squashed" mode. While it is doing this,
 * it is also calculating what the compacted size of the file would be
 * using its "packing" and "sqeezing" algorithms. After the file has been
 * completely analyzed, the method that results in the most compacted file
 * is the one selected. Note that on some kinds of data, no method is any
 * better than plain "storing" (or uncompacted).
 *     With some kinds of files (such as other archives), crunching or
 * squashing the file results in a LARGER file than the original. When
 * that occurs, ARC rewinds the archive file pointer back to where the
 * compacted file begins in the archive. Then by "storing", "packing", or
 * "squeezing", adds the file to the archive.
 *     However, because ARC had been appending the file in crunched or
 * squashed mode all along, the archive file has been 'stretched' beyond
 * its needs to hold the smaller compacted file just added to it. If
 * subsequent files added to the archive 'fill' the added space, there is
 * no problem. The problem manifests when the remaining, if any, files
 * don't fill the extra space. In that case, the extra space is just idly
 * sitting there.
 * 
 * The solution:
 *
 *     Don't append ANYTHING to the archive until it has been determined
 * what is the best method.
 * 
 * The other shoe:
 *
 *     ARC 5.21A ('A' for Atari?) reads and analyzes completely the
 * files to be added to an archive before beginning to add them. If indeed
 * "crunching" or "squashing" is determined to be the most efficient
 * method, ARC then has to re-read and compact the file. However, in
 * informal tests that compare the speed of 5.21 vs. 5.21A, the newer
 * version takes up to HALF as long to add files to an archive! And of
 * course, there is never garbage appended to the archive.
 *     One main reason for the speed increase is the better efficiency of
 * analyzing the files first. This eliminates writing out the compacted
 * file twice when crunching or squashing is not the optimal method.
 *     The other reason for the speed increase is that the buffers in
 * ARC 5.21A have been increased substantially in size. In ARC 5.21, the
 * file I/O buffers are 512 bytes. ARC 5.21A's (and MARC 5.21A's) buffers
 * are 16,384 bytes long.
 * 
 * Implementation:
 *
 *     arcpack() has been modified to not make the assumption that
 * crunching/squashing method will be best. When sqputc_cm() and
 * sqpred_cm() pass a NULL file pointer, the length of the file when
 * "squashed" is calculated only. When init_cm(), putc_cm(), and pred_cm()
 * pass a NULL file pointer, the length of the file when "crunched" is
 * calculated only. Only if putc_pak() is passed a valid file pointer does
 * it write out the byte.
 *
 *
 *  -Patrick Dell'Era   08-18-89 at 15:39
 */

#include <stdio.h>
#include "arc.h"
#if     MTS
#include <ctype.h>
#endif

void    setcode(), sqinit_cm(), sqputc_cm(), init_cm(), putc_cm();
void    filecopy(), abort(), putc_tst();
int     getch(), addcrc();

/* stuff for non-repeat packing */

#define DLE 0x90                /* repeat sequence marker */

static unsigned char state;     /* current packing state */

/* non-repeat packing states */

#define NOHIST  0               /* don't consider previous input */
#define SENTCHAR 1              /* lastchar set, no lookahead yet */
#define SENDNEWC 2              /* run over, send new char next */
#define SENDCNT 3               /* newchar set, send count next */

/* packing results */

static long     stdlen;         /* length for standard packing */
static short    crcval;         /* CRC check value */

void
pack(f, t, hdr)                 /* pack file into an archive */
    FILE           *f, *t;      /* source, destination */
    struct heads   *hdr;        /* pointer to header data */
{
    int             c;          /* one character of stream */
    long            ncrlen;     /* length after packing */
    long            huflen;     /* length after squeezing */
    long            lzwlen;     /* length after crunching */
    long            pred_sq(), file_sq();   /* stuff for squeezing */
    long            pred_cm(), sqpred_cm(); /* dynamic crunching cleanup */
    int             getch();
    int             getc_ncr();
    void            putc_pak();

    /* first pass - see which method is best */

    if (!nocomp) {              /* if storage kludge not active */
        if (note) {
            printf(" analyzing, ");
            fflush(stdout);
        }
        state = NOHIST;         /* initialize ncr packing */
        stdlen = ncrlen = 0;    /* reset size counters */
        crcval = 0;             /* initialize CRC check value */
        setcode();              /* initialize encryption */
        init_sq();              /* initialize for squeeze scan */

        if (dosquash) {
            sqinit_cm();                        /* initialize for squashing */
            while ((c = getch(f)) != EOF) {     /* for each byte of file */
                ncrlen++;                       /* one more packed byte */
                scan_sq(c);                     /* see what squeezing can do */
                sqputc_cm(c, NULL);             /* calculate what squashing
                                                 * can do */
            }
            lzwlen = sqpred_cm(NULL);           /* calculating only */
        } else {
            init_cm(NULL);                      /* initialize for calculating
                                                 * crunching */
        
            while ((c = getc_ncr(f)) != EOF) {  /* for each byte of file */
                ncrlen++;                       /* one more packed byte */
                scan_sq(c);                     /* see what squeezing can do */
                putc_cm(c, NULL);               /* calculate what crunching
                                                 * can do */
            }
            lzwlen = pred_cm(NULL);             /* finish up after calculating
                                                 * crunching */
        }
        huflen = pred_sq();                     /* finish up after squeezing */
    } else {                                    /* else kludge the method */
        stdlen = 0;                             /* make standard look best */
        ncrlen = huflen = lzwlen = 1;
    }

    /* standard set-ups common to all methods */

    fseek(f, 0L, 0);            /* rewind input */
    hdr->crc = crcval;          /* note CRC check value */
    hdr->length = stdlen;       /* set actual file length */
    state = NOHIST;             /* reinitialize ncr packing */
    setcode();                  /* reinitialize encryption */

    /* choose and use the shortest method */

    if (kludge && note)
        printf("\n\tS:%ld  P:%ld  S:%ld  C:%ld,\t ",
            stdlen, ncrlen, huflen, lzwlen);

    if (stdlen <= ncrlen && stdlen <= huflen && stdlen <= lzwlen) {
        if (note) {
            printf("storing, ");            /* store without compression */
            fflush(stdout);
        }
        hdrver = 2;                         /* note packing method */
        stdlen = crcval = 0;                /* recalc these for kludge */
        while ((c = getch(f)) != EOF)       /* store it straight */
            putc_pak(c, t);
        hdr->crc = crcval;
        hdr->length = hdr->size = stdlen;
    } else if (ncrlen < lzwlen && ncrlen < huflen) {
        if (note) {
            printf("packing, ");            /* pack with repeat suppression */
            fflush(stdout);
        }
        hdrver = 3;                         /* note packing method */
        hdr->size = ncrlen;                 /* set data length */
        while ((c = getc_ncr(f)) != EOF)
            putc_pak(c, t);
    } else if (huflen < lzwlen) {
        if (note) {
            printf("squeezing, ");
            fflush(stdout);
        }
        hdrver = 4;                         /* note packing method */
        hdr->size = file_sq(f, t);          /* note final size */
    } else if (! dosquash){
        if (note)
            printf("crunching, ");
        init_cm(t);                         /* initialize for crunching */
        while ((c = getc_ncr(f)) != EOF)    /* for each byte of file */
            putc_cm(c, t);                  /* crunch it */
            
        hdr->size = pred_cm(t);             /* finish up after crunching */
        hdrver = 8;                         /* note packing method */
    } else {
        if (note)
            printf("squashing, ");
        sqinit_cm();                        /* initialize for squashing */
        while ((c = getch(f)) != EOF)       /* for each byte of file */
            sqputc_cm(c, t);                /* squash it */
        hdr->size = sqpred_cm(t);           /* finish up after squashing */
        hdrver = 9;                         /* note packing method */
    }

    /* standard cleanups common to all methods */

    if (note)
        printf("done. (%ld%%)\n",100L - (100L*hdr->size)/hdr->length);
}

/*
 * Non-repeat compression - text is passed through normally, except that a
 * run of more than two is encoded as:
 * 
 * <char> <DLE> <count>
 * 
 * Special case: a count of zero indicates that the DLE is really a DLE, not a
 * repeat marker.
 */

int
getc_ncr(f)                             /* get bytes with collapsed runs */
    FILE           *f;                  /* file to get from */
{
    static int      lastc;              /* value returned on last call */
    static int      repcnt;             /* repetition counter */
    static int      c;                  /* latest value seen */

    switch (state) {                    /* depends on our state */
    case NOHIST:                        /* no relevant history */
        state = SENTCHAR;
        return lastc = getch(f);        /* remember the value next time */

    case SENTCHAR:                      /* char was sent. look ahead */
        switch (lastc) {                /* action depends on char */
        case DLE:                       /* if we sent a real DLE */
            state = NOHIST;             /* then start over again */
            return 0;                   /* but note that the DLE was real */

        case EOF:                       /* EOF is always a special case */
            return EOF;

        default:                        /* else test for a repeat */
            for (repcnt = 1; (c = getch(f)) == lastc && repcnt < 255; repcnt++);
            /* find end of run */

            switch (repcnt) {           /* action depends on run size */
            case 1:                     /* not a repeat */
                return lastc = c;       /* but remember value next time */

            case 2:                     /* a repeat, but too short */
                state = SENDNEWC;       /* send the second one next time */
                return lastc;

            default:                    /* a run - compress it */
                state = SENDCNT;        /* send repeat count next time */
                return DLE;             /* send repeat marker this time */
            }
        }

    case SENDNEWC:                      /* send second char of short run */
        state = SENTCHAR;
        return lastc = c;

    case SENDCNT:                       /* sent DLE, now send count */
        state = SENDNEWC;
        return repcnt;

    default:
        abort("Bug - bad ncr state\n");
    }
}

int
getch(f)                                /* special get char for packing */
    FILE           *f;                  /* file to get from */
{
    int             c;                  /* a char from the file */
#if     !DOS
    static int      cr = 0;             /* add \r before \n ? */

    if (cr) {
        c = '\n';
#if     MTS
        c = toascii(c);
#endif
        crcval = addcrc(crcval, c);
        stdlen++;
        cr = 0;
        return (c);
    }
#endif

    if ((c = fgetc(f)) != EOF) {        /* if not the end of file */
#if     !DOS
        if (!image && (c == '\n')) {
            c = '\r';
            cr = 1;
        }
#endif
#if     MTS
        if (!image)
            c = toascii(c);
#endif
        crcval = addcrc(crcval, c);     /* then update CRC check value */
        stdlen++;                       /* and bump length counter */
    }
    return c;
}

void
putc_pak(c, f)                          /* put a packed byte into archive */
    char            c;                  /* byte to put */
    FILE           *f;                  /* archive to put it in */
{
    unsigned char           code();
    if(f)                               /* if really writing, not
                                         * just calculating */
        putc_tst(code(c), f);           /* put encoded byte, with checks */
}
