
/*********************************   stop.c   ********************************

    Purpose:     Stop list filter finite state machine generator and driver.

    Provenence:  Written by and unit tested by C. Fox, 1990.
                 Changed by C. Fox, July 1991.
                    - added ANSI C declarations
                    - branch tested to 95% coverage

    Notes:       This module implements a fast finite state machine 
                 generator, and a driver, for implementing stop list 
                 filters.  The strlist module is a simple string array
                 data type implementation.
**/

#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <malloc.h>

#include "stop.h"
#include "strlist.h"

/******************************************************************************/

#define FALSE                        0
#define TRUE                         1
#define EOS                        '\0'

         /**************   Character Classification   ***************/
         /* Tokenizing requires that ASCII be broken into character */
         /* classes distinguished for tokenizing.  Delimiter chars  */
         /* separate tokens.  Digits and letters make up the body   */
         /* of search terms.                                        */

typedef enum {

                DELIM_CH,              /* whitespace, punctuation, etc. */
                DIGIT_CH,              /* the digits */
                LETTER_CH,             /* upper and lower case */

            } CharClassType;

static CharClassType char_class[128] = {
      /* ^@ */  DELIM_CH,    /* ^A */  DELIM_CH,    /* ^B */  DELIM_CH,
      /* ^C */  DELIM_CH,    /* ^D */  DELIM_CH,    /* ^E */  DELIM_CH,
      /* ^F */  DELIM_CH,    /* ^G */  DELIM_CH,    /* ^H */  DELIM_CH,
      /* ^I */  DELIM_CH,    /* ^J */  DELIM_CH,    /* ^K */  DELIM_CH,
      /* ^L */  DELIM_CH,    /* ^M */  DELIM_CH,    /* ^N */  DELIM_CH,
      /* ^O */  DELIM_CH,    /* ^P */  DELIM_CH,    /* ^Q */  DELIM_CH,
      /* ^R */  DELIM_CH,    /* ^S */  DELIM_CH,    /* ^T */  DELIM_CH,
      /* ^U */  DELIM_CH,    /* ^V */  DELIM_CH,    /* ^W */  DELIM_CH,
      /* ^X */  DELIM_CH,    /* ^Y */  DELIM_CH,    /* ^Z */  DELIM_CH,
      /* ^[ */  DELIM_CH,    /* ^\ */  DELIM_CH,    /* ^] */  DELIM_CH,
      /* ^^ */  DELIM_CH,    /* ^_ */  DELIM_CH,    /*    */  DELIM_CH,
      /*  ! */  DELIM_CH,    /*  " */  DELIM_CH,    /*  # */  DELIM_CH,
      /*  $ */  DELIM_CH,    /*  % */  DELIM_CH,    /*  & */  DELIM_CH,
      /*  ' */  DELIM_CH,    /*  ( */  DELIM_CH,    /*  ) */  DELIM_CH,
      /*  * */  DELIM_CH,    /*  + */  DELIM_CH,    /*  , */  DELIM_CH,
      /*  - */  DELIM_CH,    /*  . */  DELIM_CH,    /*  / */  DELIM_CH,
      /*  0 */  DIGIT_CH,    /*  1 */  DIGIT_CH,    /*  2 */  DIGIT_CH,
      /*  3 */  DIGIT_CH,    /*  4 */  DIGIT_CH,    /*  5 */  DIGIT_CH,
      /*  6 */  DIGIT_CH,    /*  7 */  DIGIT_CH,    /*  8 */  DIGIT_CH,
      /*  9 */  DIGIT_CH,    /*  : */  DELIM_CH,    /*  ; */  DELIM_CH,
      /*  < */  DELIM_CH,    /*  = */  DELIM_CH,    /*  > */  DELIM_CH,
      /*  ? */  DELIM_CH,    /*  @ */  DELIM_CH,    /*  A */  LETTER_CH,
      /*  B */  LETTER_CH,   /*  C */  LETTER_CH,   /*  D */  LETTER_CH,
      /*  E */  LETTER_CH,   /*  F */  LETTER_CH,   /*  G */  LETTER_CH,
      /*  H */  LETTER_CH,   /*  I */  LETTER_CH,   /*  J */  LETTER_CH,
      /*  K */  LETTER_CH,   /*  L */  LETTER_CH,   /*  M */  LETTER_CH,
      /*  N */  LETTER_CH,   /*  O */  LETTER_CH,   /*  P */  LETTER_CH,
      /*  Q */  LETTER_CH,   /*  R */  LETTER_CH,   /*  S */  LETTER_CH,
      /*  T */  LETTER_CH,   /*  U */  LETTER_CH,   /*  V */  LETTER_CH,
      /*  W */  LETTER_CH,   /*  X */  LETTER_CH,   /*  Y */  LETTER_CH,
      /*  Z */  LETTER_CH,   /*  [ */  DELIM_CH,    /*  \ */  DELIM_CH,
      /*  ] */  DELIM_CH,    /*  ^ */  DELIM_CH,    /*  _ */  DELIM_CH,
      /*  ` */  DELIM_CH,    /*  a */  LETTER_CH,   /*  b */  LETTER_CH,
      /*  c */  LETTER_CH,   /*  d */  LETTER_CH,   /*  e */  LETTER_CH,
      /*  f */  LETTER_CH,   /*  g */  LETTER_CH,   /*  h */  LETTER_CH,
      /*  i */  LETTER_CH,   /*  j */  LETTER_CH,   /*  k */  LETTER_CH,
      /*  l */  LETTER_CH,   /*  m */  LETTER_CH,   /*  n */  LETTER_CH,
      /*  o */  LETTER_CH,   /*  p */  LETTER_CH,   /*  q */  LETTER_CH,
      /*  r */  LETTER_CH,   /*  s */  LETTER_CH,   /*  t */  LETTER_CH,
      /*  u */  LETTER_CH,   /*  v */  LETTER_CH,   /*  w */  LETTER_CH,
      /*  x */  LETTER_CH,   /*  y */  LETTER_CH,   /*  z */  LETTER_CH,
      /*  { */  DELIM_CH,    /*  | */  DELIM_CH,    /*  } */  DELIM_CH,
      /*  ~ */  DELIM_CH,    /* ^? */  DELIM_CH,                          };

         /**************   Character Case Conversion   **************/
         /* Term text must be accumulated in a single case.  This   */
         /* array is used to convert letter case but otherwise      */
         /* preserve characters.                                    */

static char convert_case[128] = {
      /* ^@ */    0,    /* ^A */    0,    /* ^B */    0,    /* ^C */    0,
      /* ^D */    0,    /* ^E */    0,    /* ^F */    0,    /* ^G */    0,
      /* ^H */    0,    /* ^I */    0,    /* ^J */    0,    /* ^K */    0,
      /* ^L */    0,    /* ^M */    0,    /* ^N */    0,    /* ^O */    0,
      /* ^P */    0,    /* ^Q */    0,    /* ^R */    0,    /* ^S */    0,
      /* ^T */    0,    /* ^U */    0,    /* ^V */    0,    /* ^W */    0,
      /* ^X */    0,    /* ^Y */    0,    /* ^Z */    0,    /* ^[ */    0,
      /* ^\ */    0,    /* ^] */    0,    /* ^^ */    0,    /* ^_ */    0,
      /*    */  ' ',    /*  ! */  '!',    /*  " */  '"',    /*  # */  '#',
      /*  $ */  '$',    /*  % */  '%',    /*  & */  '&',    /*  ' */ '\'',
      /*  ( */  '(',    /*  ) */  ')',    /*  * */  '*',    /*  + */  '+',
      /*  , */  ',',    /*  - */  '-',    /*  . */  '.',    /*  / */  '/',
      /*  0 */  '0',    /*  1 */  '1',    /*  2 */  '2',    /*  3 */  '3',
      /*  4 */  '4',    /*  5 */  '5',    /*  6 */  '6',    /*  7 */  '7',
      /*  8 */  '8',    /*  9 */  '9',    /*  : */  ':',    /*  ; */  ';',
      /*  < */  '<',    /*  = */  '=',    /*  > */  '>',    /*  ? */  '?',
      /*  @ */  '@',    /*  A */  'a',    /*  B */  'b',    /*  C */  'c',
      /*  D */  'd',    /*  E */  'e',    /*  F */  'f',    /*  G */  'g',
      /*  H */  'h',    /*  I */  'i',    /*  J */  'j',    /*  K */  'k',
      /*  L */  'l',    /*  M */  'm',    /*  N */  'n',    /*  O */  'o',
      /*  P */  'p',    /*  Q */  'q',    /*  R */  'r',    /*  S */  's',
      /*  T */  't',    /*  U */  'u',    /*  V */  'v',    /*  W */  'w',
      /*  X */  'x',    /*  Y */  'y',    /*  Z */  'z',    /*  [ */  '[',
      /*  \ */   92,    /*  ] */  ']',    /*  ^ */  '^',    /*  _ */  '_',
      /*  ` */  '`',    /*  a */  'a',    /*  b */  'b',    /*  c */  'c',
      /*  d */  'd',    /*  e */  'e',    /*  f */  'f',    /*  g */  'g',
      /*  h */  'h',    /*  i */  'i',    /*  j */  'j',    /*  k */  'k',
      /*  l */  'l',    /*  m */  'm',    /*  n */  'n',    /*  o */  'o',
      /*  p */  'p',    /*  q */  'q',    /*  r */  'r',    /*  s */  's',
      /*  t */  't',    /*  u */  'u',    /*  v */  'v',    /*  w */  'w',
      /*  x */  'x',    /*  y */  'y',    /*  z */  'z',    /*  { */  '{',
      /*  | */  '|',    /*  } */  '}',    /*  ~ */  '~',    /* ^? */    0, };

#define DEAD_STATE                    -1   /* used to block a DFA */
#define TABLE_INCREMENT              256   /* used to grow tables */


       /*************************   Hashing   **************************/
       /* Sets of suffixes labeling states during the DFA construction */
       /* are hashed to speed searching.  The hashing function uses an */
       /* entire integer variable range as its hash table size;  in an */
       /* effort to get a good spread through this range, hash values  */
       /* start big, and are incremented by a lot with every new word  */
       /* in the list.  The collision rate is low using this method    */

#define HASH_START               5775863
#define HASH_INCREMENT          38873647


       /**************   State Label Binary Search Tree   **************/
       /* During DFA construction, all states must be searched by      */
       /* their labels to make sure that the minimum number of states  */
       /* are used.  This operation is sped up by hashing the labels   */
       /* to a signature value, then storing the signatures and labels */
       /* in a binary search tree.  The tree is destroyed once the DFA */
       /* is fully constructed.                                        */

typedef struct TreeNode {
       StrList label;            /* state label used as search key     */
       unsigned signature;       /* hashed label to speed searching    */
       int state;                /* whose label is representd by node  */
       struct TreeNode *left;    /* left binary search subtree         */
       struct TreeNode *right;   /* right binary search subtree        */
       } SearchTreeNode, *SearchTree;


       /*********************   DFA State Table   **********************/
       /* The state table is an array of structures holding a state    */
       /* label, a count of the arcs out of the state, a pointer into  */
       /* the arc table for these arcs, and a final state flag.  The   */
       /* label field is used only during machine construction.        */

typedef struct {
       StrList label;            /* for this state - used during build */
       int num_arcs;             /* for this state in the arc table    */
       int arc_offset;           /* for finding arcs in the arc table  */
       short is_final;           /* TRUE iff this is a final state     */
       } StateTableEntry, *StateTable;


       /**********************   DFA Arc Table   ***********************/
       /* The arc table lists all transitions for all states in a DFA  */
       /* in compacted form.  Each state's transitions are offset from */
       /* the start of the table, then listed in arc label order.      */
       /* Transitions are found by a linear search of the sub-section  */
       /* of the table for a given state.                              */

typedef struct {
       char label;               /* character label on an out-arrow    */
       int target;               /* the target state for the out-arrow */
       } ArcTableEntry, *ArcTable;


       /**********************   DFA Structure   ***********************/
       /* A DFA is represented as a pointer to a structure holding the */
       /* machine's state and transition tables, and bookkeepping      */
       /* counters.  The tables are arrays whose space is malloc'd,    */
       /* then realloc'd if more space is required.  Once a machine is */
       /* constructed, the table space is realloc'd one last time to   */
       /* fit the needs of the machine exactly.                        */

typedef struct _DfaStruct {
       int num_states;           /* in the DFA (and state table)       */
       int max_states;           /* now allocated in the state table   */
       int num_arcs;             /* in the arc table for this machine  */
       int max_arcs;             /* now allocated in the arc table     */
       StateTable state_table;   /* the compacted DFA state table      */
       ArcTable arc_table;       /* the compacted DFA transition table */
       SearchTree tree;          /* storing state labels used in build */
       } DFAStruct;

/******************************************************************************/
/*************************   Function Declarations   **************************/

#ifdef __STDC__

static char *GetMemory( char *ptr, int num_bytes );
static void  DestroyTree( SearchTree tree );
static int   GetState( DFA machine, StrList label, unsigned signature );
static void  AddArc( DFA machine, int state, char arc_label,
                     StrList state_label, unsigned state_signature );

extern DFA   BuildDFA( StrList words );
extern char *GetTerm( FILE *stream, DFA machine, int size, char *output );

#else

static char *GetMemory();
static void  DestroyTree();
static int   GetState();
static void  AddArc();

extern DFA   BuildDFA();
extern char *GetTerm();

#endif

/******************************************************************************/
/************************   Private Function Definitions   ********************/

/*FN***************************************************************************

        GetMemory( ptr, num_bytes )

   Returns: char * -- new/expanded block of memory

   Purpose: Rationalize memory allocation and handle errors

   Plan:    Part 1: Allocate memory with supplied allocation functions
            Part 2: Handle any errors
            Part 3: Return the allocated block of memory

   Notes:   None.
**/

static char *
GetMemory( ptr, num_bytes )
   char *ptr;      /* in: expanded block; NULL if nonesuch */
   int num_bytes;  /* in: number of bytes to allocate */
   {
   char *memory;   /* temporary for holding results */

        /* Part 1: Allocate memory with supplied allocation functions */
   if ( NULL == ptr )
      memory = malloc( (unsigned)num_bytes );
   else
      memory = realloc( ptr, (unsigned)num_bytes );

                    /* Part 2: Handle any errors */
   if ( NULL == memory )
      {
      (void)fprintf( stderr, "malloc failure--aborting\n" );
      exit(1);
      }

             /* Part 3: Return the allocated block of memory */
   return( memory );

   } /* GetMemory */

/*FN***************************************************************************

        DestroyTree( tree )

   Returns: void

   Purpose: Destroy a binary search tree created during machine construction

   Plan:    Part 1: Return right away of there is no tree
            Part 2: Deallocate the subtrees
            Part 3: Deallocate the root

   Notes:   None.
**/

static void
DestroyTree( tree )
   SearchTree tree;   /* in: search tree destroyed */
   {
              /* Part 1: Return right away of there is no tree */
   if ( NULL == tree ) return;

                     /* Part 2: Deallocate the subtrees */
   if ( NULL != tree->left )  DestroyTree( tree->left );
   if ( NULL != tree->right ) DestroyTree( tree->right );

                      /* Part 3: Deallocate the root */
   tree->left = tree->right = NULL;
   (void)free( (char *)tree );

   } /* DestroyTree */

/*FN***************************************************************************

        GetState( machine, label, signature )

   Returns: int -- state with the given label

   Purpose: Search a machine and return the state with a given state label

   Plan:    Part 1: Search the tree for the requested state
            Part 2: If not found, add the label to the tree
            Part 3: Return the state number

   Notes:   This machine always returns a state with the given label
            because if the machine does not have a state with the given
            label, then one is created.
**/

static int
GetState( machine, label, signature )
   DFA machine;        /* in: DFA whose state labels are searched;
   StrList label;      /* in: state label searched for */
   unsigned signature; /* in: signature of the label requested */
   {
   SearchTree *ptr;       /* pointer to a search tree link field */
   SearchTree new_node;   /* for a newly added search tree node */

             /* Part 1: Search the tree for the requested state */
   ptr = &(machine->tree);
   while ( (NULL != *ptr) && (   (signature != (*ptr)->signature)
                              || !StrListEqual(label,(*ptr)->label)) )
      ptr = (signature <= (*ptr)->signature) ? &(*ptr)->left : &(*ptr)->right;

             /* Part 2: If not found, add the label to the tree */
   if ( NULL == *ptr )
      {
         /* create a new node and fill in its fields */
      new_node = (SearchTree)GetMemory( NULL, sizeof(SearchTreeNode) );
      new_node->signature = signature;
      new_node->label = (StrList)label;
      new_node->state = machine->num_states;
      new_node->left = new_node->right = NULL;

         /* allocate more states if needed, set up the new state */
      if ( machine->num_states == machine->max_states )
         {
         machine->max_states += TABLE_INCREMENT;
         machine->state_table =
           (StateTable)GetMemory( machine->state_table,
                                  machine->max_states*sizeof(StateTableEntry));
         }
      machine->state_table[machine->num_states].label = (StrList)label;
      machine->num_states++;

         /* hook the new node into the binary search tree */
      *ptr = new_node;
      }
   else
      StrListDestroy( label );

                   /* Part 3: Return the state number */
   return( (*ptr)->state );

   } /* GetState */

/*FN***************************************************************************

        AddArc( machine, state, arc_label, state_label, state_signature )

   Returns: void

   Purpose: Add an arc between two states in a DFA

   Plan:    Part 1: Search for the target state among existing states
            Part 2: Make sure the arc table is big enough
            Part 3: Add the new arc

   Notes:   None.
**/

static void
AddArc( machine, state, arc_label, state_label, state_signature )
   DFA machine;              /* in/out: machine with an arc added */
   int state;                /* in: with an out arc added */
   char arc_label;           /* in: label on the new arc */
   StrList state_label;      /* in: label on the target state */
   unsigned state_signature; /* in: label hash signature to speed searching */
   {
   register int target;   /* destination state for the new arc */

        /* Part 1: Search for the target state among existing states */
   StrListSort( state_label );
   target = GetState( machine, state_label, state_signature );

               /* Part 2: Make sure the arc table is big enough */
   if ( machine->num_arcs == machine->max_arcs )
      {
      machine->max_arcs += TABLE_INCREMENT;
      machine->arc_table =
         (ArcTable)GetMemory( machine->arc_table,
                              machine->max_arcs * sizeof(ArcTableEntry) );
      }

                        /* Part 3: Add the new arc */
   machine->arc_table[machine->num_arcs].label = arc_label;
   machine->arc_table[machine->num_arcs].target = target;
   machine->num_arcs++;
   machine->state_table[state].num_arcs++;

   } /* AddArc */

/*FN***************************************************************************

        BuildDFA( words )

   Returns: DFA -- newly created finite state machine

   Purpose: Build a DFA to recognize a list of words

   Plan:    Part 1: Allocate space and initialize variables
            Part 2: Make and label the DFA start state
            Part 3: Main loop - build the state and arc tables
            Part 4: Deallocate the binary search tree and the state labels
            Part 5: Reallocate the tables to squish them down
            Part 6: Return the newly constructed DFA

   Notes:   None.
**/

DFA
BuildDFA( words )
   StrList words;  /* in: that the machine is built to recognize */
   {
   DFA machine;               /* local for easier access to machine */
   register int state;        /* current state's state number */
   char arc_label;            /* for the current arc when adding arcs */
   char *string;              /* element in a set of state labels */
   char ch;                   /* the first character in a new string */
   StrList current_label;     /* set of strings labeling a state */
   StrList target_label;      /* labeling the arc target state */
   unsigned target_signature; /* hashed label for binary search tree */
   register int i;            /* for looping through strings */

              /* Part 1: Allocate space and initialize variables */
   machine = (DFA)GetMemory( NULL, sizeof(DFAStruct) );

   machine->max_states = TABLE_INCREMENT;
   machine->state_table =
      (StateTable)GetMemory(NULL, machine->max_states*sizeof(StateTableEntry));
   machine->num_states = 0;

   machine->max_arcs = TABLE_INCREMENT;
   machine->arc_table =
      (ArcTable)GetMemory( NULL, machine->max_arcs * sizeof(ArcTableEntry) );
   machine->num_arcs = 0;

   machine->tree = NULL;

                /* Part 2: Make and label the DFA start state */
   StrListUnique( words );               /* sort and unique the list */
   machine->state_table[0].label = words;
   machine->num_states = 1;

            /* Part 3: Main loop - build the state and arc tables */
   for ( state = 0; state < machine->num_states; state++ )
      {
              /* The current state has nothing but a label, so */
              /* the first order of business is to set up some */
              /* of its other major fields                     */
      machine->state_table[state].is_final = FALSE;
      machine->state_table[state].arc_offset = machine->num_arcs;
      machine->state_table[state].num_arcs = 0;

             /* Add arcs to the arc table for the current state */
             /* based on the state's derived set.  Also set the */
             /* state's final flag if the empty string is found */
             /* in the suffix list                              */
      current_label = machine->state_table[state].label;
      target_label = StrListCreate();
      target_signature = HASH_START;
      arc_label = EOS;
      for ( i = 0; i < StrListSize(current_label); i++ )
         {
            /* get the next string in the label and lop it */
         string = StrListPeek( current_label, i );
         ch = *string++;

            /* the empty string means mark this state as final */
         if ( EOS == ch )
            { machine->state_table[state].is_final = TRUE; continue; }

            /* make sure we have a legitimate arc_label */
         if ( EOS == arc_label ) arc_label = ch;

            /* if the first character is new, then we must */
            /* add an arc for the previous first character */
         if ( ch != arc_label )
            {
            AddArc(machine, state, arc_label, target_label, target_signature);
            target_label = StrListCreate();
            target_signature = HASH_START;
            arc_label = ch;
            }

            /* add the current suffix to the target state label */
         StrListAppend( target_label, string );
         target_signature += (*string + 1) * HASH_INCREMENT;
         while ( *string ) target_signature += *string++;
         }

              /* On loop exit we have not added an arc for the */
              /* last bunch of suffixes, so we must do so, as  */
              /* long as the last set of suffixes is not empty */
              /* (which happens when the current state label   */
              /* is the singleton set of the empty string).    */
      if ( 0 < StrListSize(target_label) )
         AddArc( machine, state, arc_label, target_label, target_signature );
      }

      /* Part 4: Deallocate the binary search tree and the state labels */
   DestroyTree( machine->tree );  machine->tree = NULL;
   for ( i = 0; i < machine->num_states; i++ )
      {
      StrListDestroy( machine->state_table[i].label );
      machine->state_table[i].label = NULL;
      }

            /* Part 5: Reallocate the tables to squish them down */
   machine->state_table = (StateTable)GetMemory( machine->state_table,
                               machine->num_states * sizeof(StateTableEntry) );
   machine->arc_table = (ArcTable)GetMemory( machine->arc_table,
                               machine->num_arcs * sizeof(ArcTableEntry) );

                /* Part 6: Return the newly constructed DFA */
   return( machine );

   } /* BuildDFA */

/*FN***************************************************************************

        GetTerm( stream, machine, size, output )

   Returns: char * -- NULL if stream is exhausted, otherwise output buffer

   Purpose: Get the next token from an input stream, filtering stop words

   Plan:    Part 1: Return NULL immediately if there is no input
            Part 2: Initialize the local variables
            Part 3: Main Loop: Put an unfiltered word into the output buffer
            Part 4: Return the output buffer

   Notes:   This routine runs the DFA provided as the machine parameter,
            and collects the text of any term in the output buffer.  If
            a stop word is recognized in this process, it is skipped.
            Care is also taken to be sure not to overrun the output buffer.
**/

char *
GetTerm( stream, machine, size, output )
   FILE *stream;    /* in: source of input characters */
   DFA machine;     /* in: finite state machine driving process */
   int size;        /* in: bytes in the output buffer */
   char *output;    /* in/out: where the next token in placed */
   {
   char *outptr;        /* for scanning through the output buffer */
   int ch;              /* current character during input scan */
   register int state;  /* current state during DFA execution */

           /* Part 1: Return NULL immediately if there is no input */
   if ( EOF == (ch = getc(stream)) ) return( NULL );

                  /* Part 2: Initialize the local variables */
   outptr = output;

     /* Part 3: Main Loop: Put an unfiltered word into the output buffer */
   do
      {
         /* scan past any leading delimiters */
      while ( (EOF != ch ) &&
              ((DELIM_CH == char_class[ch]) ||
               (DIGIT_CH == char_class[ch])) ) ch = getc( stream );

         /* start the machine in its start state */
      state = 0;

         /* copy input to output until reaching a delimiter, and also */
         /* run the DFA on the input to watch for filtered words      */
      while ( (EOF != ch) && (DELIM_CH != char_class[ch]) )
         {
         if ( outptr == (output+size-1) ) { outptr = output; state = 0; }
         *outptr++ = convert_case[ch];

         if ( DEAD_STATE != state )
            {
            register int i;   /* for scanning through arc labels */
            int arc_start;    /* where the arc label list starts */
            int arc_end;      /* where the arc label list ends */

            arc_start = machine->state_table[state].arc_offset;
            arc_end = arc_start + machine->state_table[state].num_arcs;

            for ( i = arc_start; i < arc_end; i++ )
               if ( convert_case[ch] == machine->arc_table[i].label )
                  { state = machine->arc_table[i].target; break; }

            if ( i == arc_end ) state = DEAD_STATE;
            }

         ch = getc( stream );
         }

         /* start from scratch if a stop word is recognized */
      if ( (DEAD_STATE != state) && machine->state_table[state].is_final )
         outptr = output;

         /* terminate the output buffer */
      *outptr = EOS;
      }
   while ( (EOF != ch) && !*output );

                    /* Part 4: Return the output buffer */
   return( output );

   } /* GetTerm */
