/*
 * TEA database builder
 *
 * Version 0.90
 *
 * Usage: TEABUILD <ascii_input_file>
 */

#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <stdlib.h>

typedef int Boolean;
#define False           (0)
#define True            (!False)

#define LENMIN          1
#define LENMAX          40

#define BUFMAX          1024

#define OFFSETMAX       0x7

#define CODE_APOSTROPHE '{'
#define CODE_CAP        '|'
#define CODE_HYPHEN     '}'
#define CODE_SPACE      '~'

#define OUTPUT_PREFIX   "words"

/*
 * Cache of the encode state for all the different entry lengths required.
 * The program can work with fewer file handles than the number of output
 * files it is writing, if necessary.
 */
typedef struct  _encodeState {
    struct _encodeState *next;
    struct _encodeState *prev;
    FILE                *fp;
    Boolean             created;
    char                frame[LENMAX+1];
    int                 offset;
} EncodeState;

EncodeState encodeStateCache[LENMAX];
struct _encodeState *encodeStateCacheHead;

static void output(fp, code, offset)
FILE *fp;
int code;
int offset;
{
    /*
     * Output a single encoded character or modifier
     */

    if (fputc(offset | ((code - 'a') << 3), fp) == EOF)
    {
        fprintf(stderr, "Error writing output file\n");
        exit(1);
    }
}

static void openall()
{
    /*
     * Initialise all the encode state structures
     */
    EncodeState *esp;

    encodeStateCacheHead = NULL;

    for (esp = encodeStateCache; esp < (encodeStateCache+LENMAX); esp++)
    {
        esp->next = NULL;
        esp->prev = NULL;
        esp->fp = NULL;
        esp->created = False;
        memset(esp->frame, '\0', sizeof(esp->frame));
        esp->offset = 0;
    }
}

static EncodeState *encache(len)
int len;
{
    /*
     * Ensure the output file associated with entries of length 'len'
     * is open and return a pointer to its encode state.
     */
    EncodeState *esp = &encodeStateCache[len-1], *espOld;
    char *type;
    char outputname[BUFMAX];

    if (esp->fp == NULL)
    {
        /* file must be (re-)opened */

        /* get output file name */
        sprintf(outputname, "%s.%d", OUTPUT_PREFIX, len);

        /* get output file open mode */
        type = esp->created ? "ab" : "wb";
        esp->created = True;

        /* try opening the file; on error, close the file handle
           for the least recently used entry length and try again */
        while ((esp->fp = fopen(outputname, type)) == NULL)
        {
            if (encodeStateCacheHead == NULL)
            {
                fprintf(stderr, "Couldn't open \"%s\"\n", outputname);
                exit(1);
            }

            espOld = encodeStateCacheHead->prev;
            fclose(espOld->fp);

            if (espOld == encodeStateCacheHead)
            {
                /* only entry in cache */
                encodeStateCacheHead = NULL;
                espOld->next = NULL;
                espOld->prev = NULL;
                esp->fp = NULL;
            }
            else
            {
                /* other entries in cache */
                espOld->prev->next = espOld->next;
                espOld->next->prev = espOld->prev;
                espOld->next = NULL;
                espOld->prev = NULL;
                espOld->fp = NULL;
            }
        }

        /* add the entry to the front of the cache */
        if (encodeStateCacheHead == NULL)
        {
            esp->next = esp;
            esp->prev = esp;
        }
        else
        {
            esp->next = encodeStateCacheHead;
            esp->prev = encodeStateCacheHead->prev;
        }
        encodeStateCacheHead = esp;

        esp->next->prev = esp;
        esp->prev->next = esp;
    }
    else if (esp != encodeStateCacheHead)
    {
        /* remove entry from current position */
        esp->prev->next = esp->next;
        esp->next->prev = esp->prev;

        /* add entry at front of cache */
        esp->next = encodeStateCacheHead;
        esp->prev = encodeStateCacheHead->prev;
        encodeStateCacheHead = esp;

        esp->next->prev = esp;
        esp->prev->next = esp;
    }

    return(esp);
}

static void closeall()
{
    /*
     * Flush the encode states through. A special case must be dealt
     * with where the alphabetic characters of the last word are identical
     * to those of the penultimate word and the last word has no
     * "modifiers" (space, hyphen, apostrophe, shift). In this case,
     * a "redundant" character must be output to ensure the last word
     * is unambigously represented in the output file.
     */
    EncodeState *esp;
    int i;

    for (i = 0; i < LENMAX; i++)
    {
        if (    encodeStateCache[i].created
             && encodeStateCache[i].offset > i)
        {
            /* last entry must be flushed through */
            esp = encache(i+1);
            output(esp->fp, esp->frame[0], 0);
        }
    }

    /* close any open files */
    if ((esp = encodeStateCacheHead) != NULL)
    {
        do
        {
            fclose(esp->fp);
            esp = esp->next;
        } while (esp != encodeStateCacheHead);
    }
}

static void encode(entry)
char *entry;
{
    /*
     * Add a single entry to the appropriate output file
     */
    char word[BUFMAX], *wp;
    int cursor, wordlength;
    EncodeState *esp;

    /* convert the word into lower case + modifiers */
    wp = word;
    wordlength = 0;
    for ( ; *entry != '\0'; entry++)
    {
        if (*entry == ' ')
        {
            *wp++ = CODE_SPACE;
        }
        else if (*entry == '\'')
        {
            *wp++ = CODE_APOSTROPHE;
        }
        else if (*entry == '-')
        {
            *wp++ = CODE_HYPHEN;
        }
        else if (isalpha(*entry))
        {
            if (isupper(*entry))
            {
                *wp++ = CODE_CAP;
                *wp++ = tolower(*entry);
            }
            else
            {
                *wp++ = *entry;
            }
            wordlength++;
        }
    }
    *wp = '\0';

    if (wordlength <= LENMAX)
    {
        /* ensure the handle to the output file is available */
        esp = encache(wordlength);

        cursor = 0;
        for (wp = word; *wp != '\0'; wp++)
        {
            /* check if a modifier needs to be output */
            if (*wp >= CODE_APOSTROPHE)
            {
                if (cursor < wordlength)
                {
                    output(esp->fp, *wp, esp->offset);
                    esp->offset = 0;
                }
            }
            /* check if a letter need to be output */
            else if (    (*wp != esp->frame[cursor])
                      || (esp->offset == OFFSETMAX) )
            {
                output(esp->fp, *wp, esp->offset);
                esp->offset = 0;

                esp->frame[cursor++] = *wp;
            }
            else
            {
                esp->offset++;
                cursor++;
            }
        }
    }
    else
    {
        fprintf(stderr, "Skipping long entry\n");
    }
}

main(argc, argv)
int argc;
char *argv[];
{
    char linebuf[BUFMAX], *lbp;
    FILE *ifp;

    /* check usage */
    if (argc != 2)
    {
        fprintf(stderr, "Usage: TEABUILD <ascii_input_file>\n");
        exit(1);
    }

    /* open ASCII input file */
    if ((ifp = fopen(argv[1], "r")) == NULL)
    {
        fprintf(stderr, "Couldn't open \"%s\" for input\n", argv[1]);
        exit(1);
    }

    /* initialise the encode state cache */
    openall();

    /* do a single pass of the input file, presenting each line
       as an entry to be encoded */
    while (fgets(linebuf, sizeof(linebuf), ifp) != NULL)
    {
        if ((lbp = strchr(linebuf, '\n')) == NULL)
        {
            fprintf(stderr, "Skipping long entry\n");
        }
        else
        {
            /* delete the newline */
            *lbp = '\0';
            encode(linebuf);
        }
    }

    /* flush the encode state cache */
    closeall();

    return(0);
}
