/* National Institute of Standards and Technology (NIST)
/* National Computer System Laboratory (NCSL)
/* Office Systems Engineering (OSE) Group
/* ********************************************************************
/*                            D I S C L A I M E R
/*                              (March 8, 1989)
/*  
/* There is no warranty for the NIST NCSL OSE SGML parser and/or the NIST
/* NCSL OSE SGML parser validation suite.  If the SGML parser and/or
/* validation suite is modified by someone else and passed on, NIST wants
/* the parser's recipients to know that what they have is not what NIST
/* distributed, so that any problems introduced by others will not
/* reflect on our reputation.
/* 
/* Policies
/* 
/* 1. Anyone may copy and distribute verbatim copies of the SGML source
/* code as received in any medium.
/* 
/* 2. Anyone may modify your copy or copies of SGML parser source code or
/* any portion of it, and copy and distribute such modifications provided
/* that all modifications are clearly associated with the entity that
/* performs the modifications.
/* 
/* NO WARRANTY
/* ===========
/* 
/* NIST PROVIDES ABSOLUTELY NO WARRANTY.  THE SGML PARSER AND VALIDATION
/* SUITE ARE PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER
/* EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
/* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
/* THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS
/* WITH YOU.  SHOULD THE SGML PARSER OR VALIDATION SUITE PROVE DEFECTIVE,
/* YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
/* 
/* IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW WILL NIST BE LIABLE FOR
/* DAMAGES, INCLUDING ANY LOST PROFITS, LOST MONIES, OR OTHER SPECIAL,
/* INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR
/* INABILITY TO USE (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA
/* BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY THIRD PARTIES OR A
/* FAILURE OF THE PROGRAM TO OPERATE WITH PROGRAMS NOT DISTRIBUTED BY
/* NIST) THE PROGRAM, EVEN IF YOU HAVE BEEN ADVISED OF THE POSSIBILITY OF
/* SUCH DAMAGES, OR FOR ANY CLAIM BY ANY OTHER PARTY.
*/

/***************************************************************/
/*    TITLE:          SGML PARSER                              */
/*    SYSTEM:         DTD PREPROCESSOR                         */
/*    SUBSYSTEM:      PREPROCESSOR for DETERMINING             */
/*                       AMBIGUOUS CONTENT MODELS              */
/*    SOURCE FILE:    PREPROC.C                                */
/*    AUTHOR:         Steven Lindeman                          */
/*    DATE CREATED:   05January1987                            */
/***************************************************************/

/***************************************************************/
/*   PREPROC -- Takes a valid content model to be reduced or   */
/*              simplified and tokenizes it for use with       */
/*              determin.c                                     */
/***************************************************************/
#include <stdio.h>
#include <ctype.h>
#include "detdefs.h"
#include "detglbl.h"

void preproc(expression,buffer)
char expression[];
ITEM buffer[];
{
   int i,size,low,high,whocares;

   low = size = 0;  
   high = -1;
   init();
   for (i=0; expression[i]!='\0'; i++)
      high++;    /* set high */
   whocares = reduce(expression,low,high,START);
   tokenize(reducedexpr,buffer,&size);
#ifdef JJJ
   printf("Expression is -> %s\n",expression);
   for (i=0; i<size; i++)
      printf("%02d ",buffer[i].itoken);
   printf("\n");
#endif
   return;
}


/***********************************/
/* INIT                            */
/***********************************/
void init()
{
   int *iptr,i;

   symtabindx = 0;
   memset((char *)buffer, '\0', sizeof(buffer));
   for(i=0; i<BUFFSIZE; i++)
      reducedexpr[i] = '\0';
   index = 0;
}

/***********************************/
/* TOKENIZE                        */
/***********************************/
void tokenize(expression,buffer,j)
char expression[];
ITEM buffer[];
int *j;
{
   int i,position;
   char name[NAMELEN+1];

   i=0;

   while(expression[i] != '\0')  {
      switch(expression[i])  {
      case '(':     /* GRPO */
         if (*j >= BUFFSIZE){
            printf("overflow in preproc()\n");
            exit(0);
         }
         buffer[(*j)++].itoken = GRPO;
         break;
      case ')':     /* GRPC */
         if ((*j) >= BUFFSIZE){
            printf("overflow in preproc()\n");
            exit(0);
         }
         buffer[(*j)++].itoken = GRPC;
         i++;   /* get next char */
         handleoi(expression,buffer,&i,j);  /* tokenize occurr. ind. */
         break;
      case '&':
      case '|':     /* AND, OR */
         if ((*j) >= BUFFSIZE){
            printf("overflow in preproc()\n");
            exit(0);
         }
         buffer[(*j)++].itoken = OR;  /* AND must be converted to OR */
         break;
      case ',':     /* SEQ */
         if ((*j) >= BUFFSIZE){
            printf("overflow in preproc()\n");
            exit(0);
         }
         buffer[(*j)++].itoken = SEQ;
         break;
      default:    /* must be a element name */
         get_name(expression,&i,name); /* get element name */
         position = mysearch(name);    /* find position */
         if ((*j) >= BUFFSIZE){
            printf("overflow in preproc()\n");
            exit(0);
         }
         buffer[(*j)++].itoken = position+17;    /* tokenize name */
         handleoi(expression,buffer,&i,j);  /* tokenize o.i. */
         break;
      }
      i++;   /* next char */
   }

   return;
}

/*************************************/
/* SEARCH                            */
/*************************************/
mysearch(name)
char name[];
{
   int found,position;


   found = FALSE;   
   position=0;
   while ((position < symtabindx) && (found != TRUE))  /* search table linearly */
      if (strcmp(symtable[position].entry,name) == 0)
         found = TRUE;   /* name was found */
      else 
         position++;
   if (found == FALSE) {   /* not found in table */
      strcpy(symtable[symtabindx].entry,name);   /* add to table */
      symtabindx++;    /* increment index */
   }

   return(position);   /* position found */
}

/*************************************/
/* GET_NAME                          */
/*************************************/
void get_name(expression,i,name)
char expression[];
int *i;
char name[];
{
   int j;

   j = 0;

   while(isvalid(expression[*i]))
      name[j++] = expression[(*i)++];   /* load name */
   /* i will point to the next char */
   name[j] = '\0';   /* null terminate */

   return;
}

/*************************************/
/* HANDLEOI                          */
/*************************************/
void handleoi(expression,buffer,i,j)
char expression[];
ITEM buffer[];
int *i,*j;
{
   switch(expression[*i])  {
   case '?':    /* OPT */
      buffer[(*j)++].itoken = OPT;
      break;
   case '*':    /* REP  */
      buffer[(*j)++].itoken = REP;
      break;
   case '+':   /* PLUS */
      buffer[(*j)++].itoken = PLUS;
      break;
   default:  /* no oi, must add REQ */
      buffer[(*j)++].itoken = REQ;
      (*i)--;  /* unget char */
      break;
   }

   return;
}

/*********************************/
/* REMOVE_BLANKS *****************/
/*********************************/
void remove_blanks(newexpr,oldexpr)
char newexpr[],oldexpr[];
{
   int i,j;

   i=j=0;

   while (oldexpr[i] != '\0')    /* not end of string */
      if (oldexpr[i] != ' ')    /* not blank */
         newexpr[j++] = oldexpr[i++];   /* copy char */
      else 
         i++;       /* incr i to skip blank */
   newexpr[j] = '\0';   /* null newexpr */

   return;
}

/*********************************/
/* VALIDATE_EXPR *****************/
/*********************************/
void validate_expr(expr)
char expr[];
{
   int i;

   i=0;

   while (expr[i] != '\0')  {  /* not end of string */
      switch(expr[i])  {
      case '(':
      case ')':
      case '+':
      case '*':
      case '?':
      case ',':
      case '|':
      case '&':
         break;
      default:    /* must be a char or error */
         if (!(isvalid(expr[i])))  {
            printf("Illegal character in content model\n");
            printf("Character -> '%c' , Character number %d\n",
                expr[i],i+1);
            exit(99);
         }
         break;
      }
      i++;
   }

   return;
}

/********************************/
/* GET_EXPR *********************/
/********************************/
get_expr(expr)
char expr[];
{
   char temp[BUFFSIZE];
   int length;

   length = get_string(temp,BUFFSIZE);
   strcpy(expr,temp);         /* copy expr from temp */
   remove_blanks(expr,temp);   /* remove blanks from expr */
   validate_expr(expr);      /* validate expression */

   return(length);
}

/********************************/
/* GET_STRING *******************/
/********************************/
get_string(temp,size)
char temp[];
int size;
{
   int c,i;

   i = 0;

   while (--size > 0 && (c=getchar()) != EOF &&  c != '\n')
      temp[i++] = c;
   temp[i] = '\0';

   return(i);
}

/********************************/
/* REDUCE ***********************/
/********************************/
reduce(expr,low,high,came_from)
char expr[];
int low,high;
int came_from;
{
   int i,scan,numstriped,level,reduced,startinsert;
   int currentoi,newoi,saveoi;

   reduced = FALSE;
   numstriped = 0;
   saveoi = currentoi = '1';

   while (reduced == FALSE) {
      level = 0;
      for (scan=low; scan<=high; scan++)  { /* scan for connector at level==0 */
         switch(expr[scan])  {
         case '|':
         case '&':
         case ',':
            if (level==0) {
               startinsert = index;
               currentoi = reduce(expr,low,scan-1,CONNECTOR);
               reducedexpr[index++] = expr[scan];
               currentoi = reduce(expr,scan+1,high,CONNECTOR);
               currentoi = 1;
               reduced = TRUE;
               scan = high+1;
               if (numstriped > 0)
                  replace_parens(startinsert,saveoi);
            }
            break;
         case '(':
            level++;
            break;
         case ')':
            level--;
            break;
         }
      }
      if (reduced == FALSE)
         if (expr[high] == ')' || expr[high-1] == ')' )  {
            if (expr[high] == ')' )  {
               newoi = '1';
               high--;
            }
            else
               if (expr[high-1] == ')' )  {
                  newoi = expr[high];
                  high = high-2;
               }
            low++;
            numstriped++;
            update_oi(&currentoi,newoi);
            saveoi = currentoi;
         }
         else {    /* must be terminal */
            switch(expr[high])  {
            case '+':
            case '?':
            case '*':
               newoi = expr[high--];   /* strip oi */
               break;
            default:
               newoi = '1';
               break;
            }
            update_oi(&currentoi,newoi);
            for (i=low; i<=high; i++)     /* add to reducedexpr  */
               reducedexpr[index++] = expr[i];
            if (numstriped > 0 && came_from == START)
               replace_parens(0,currentoi);
            if (currentoi != '1' && came_from == CONNECTOR)
               reducedexpr[index++] = currentoi;
            reduced = TRUE;
         }
   }
   return(currentoi);
}
/**************************************/
void replace_parens(insert,oi)
int insert;
char oi;
{
   int i;

   for (i=index; i>insert; i--)
      reducedexpr[i] = reducedexpr[i-1];
   index++;
   reducedexpr[insert] = '(';
   reducedexpr[index++] = ')';
   if (oi != '1')
      reducedexpr[index++] = oi;

   return;
}

/*******************************************/
/*   UPDATEOI                              */
/*******************************************/
void update_oi(currentoi, newoi)
int *currentoi;
char newoi;
{
   if (newoi == '+')
      if ((*currentoi == '+') || (*currentoi == '1'))
         *currentoi = '+';
      else 
         *currentoi = '*';
   if (newoi == '?')
      if ((*currentoi == '?') || (*currentoi == '1'))
         *currentoi = '?';
      else
         *currentoi = '*';
   if (newoi == '*')
      *currentoi = '*';
   return;
}
/*******************************************/
/*   ISVALID                               */
/*******************************************/
int isvalid(c)
char c;
{
   switch (c) {
   case '.':
   case '-':
   case '#':
      return(TRUE);
   default:
      if(isalnum(c))
         return(TRUE);
      return(FALSE);
   }
}
