/* National Institute of Standards and Technology (NIST)
/* National Computer System Laboratory (NCSL)
/* Office Systems Engineering (OSE) Group
/* ********************************************************************
/*                            D I S C L A I M E R
/*                              (March 8, 1989)
/*  
/* There is no warranty for the NIST NCSL OSE SGML parser and/or the NIST
/* NCSL OSE SGML parser validation suite.  If the SGML parser and/or
/* validation suite is modified by someone else and passed on, NIST wants
/* the parser's recipients to know that what they have is not what NIST
/* distributed, so that any problems introduced by others will not
/* reflect on our reputation.
/* 
/* Policies
/* 
/* 1. Anyone may copy and distribute verbatim copies of the SGML source
/* code as received in any medium.
/* 
/* 2. Anyone may modify your copy or copies of SGML parser source code or
/* any portion of it, and copy and distribute such modifications provided
/* that all modifications are clearly associated with the entity that
/* performs the modifications.
/* 
/* NO WARRANTY
/* ===========
/* 
/* NIST PROVIDES ABSOLUTELY NO WARRANTY.  THE SGML PARSER AND VALIDATION
/* SUITE ARE PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER
/* EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
/* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
/* THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS
/* WITH YOU.  SHOULD THE SGML PARSER OR VALIDATION SUITE PROVE DEFECTIVE,
/* YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
/* 
/* IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW WILL NIST BE LIABLE FOR
/* DAMAGES, INCLUDING ANY LOST PROFITS, LOST MONIES, OR OTHER SPECIAL,
/* INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR
/* INABILITY TO USE (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA
/* BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY THIRD PARTIES OR A
/* FAILURE OF THE PROGRAM TO OPERATE WITH PROGRAMS NOT DISTRIBUTED BY
/* NIST) THE PROGRAM, EVEN IF YOU HAVE BEEN ADVISED OF THE POSSIBILITY OF
/* SUCH DAMAGES, OR FOR ANY CLAIM BY ANY OTHER PARTY.
*/

/************************************************************************/
/*   TITLE:          SGML PARSER                                        */
/*   SYSTEM:         DTD PROCESSOR                                      */
/*   SUBSYSTEM:                                                         */
/*   SOURCE FILE:    DTUINP.C                                           */
/*   AUTHOR:         Jim Heath & Mike Garris                            */
/*                                                                      */
/*   DATE CREATED:                                                      */
/*   LAST MODIFIED:                                                     */
/*                                                                      */
/*                  REVISIONS                                           */
/*   WHEN      WHO            WHY                                       */
/************************************************************************/
#include <stdio.h>
#include <ctype.h>
#include <setjmp.h>
#include "qntyset.h"
#include "dtd.h"
#include "dtdglbl.h"
#include "dtdfncs.h"

#define STACKSIZE    1000
int mystack[STACKSIZE];
int stkptr = 0;
static FILE *entfile = NULL;
/* ============================================================ */
/* inpsep() uses jgetc() to input a series of multiple RS's,    */
/* RE's, SPACE's, EE's, and rsolves parameter entity references */
/* processing any sequential occurances of the above in the     */
/* resolving text. The process continues inputing data until    */
/* a character not of the above types is found. Upon which, it  */
/* returns the number of separators encountered. Inpsep() can   */
/* be used for PS's, TS's, and DS's because these above char's  */
/* are common to all three. It is the caller's responsibilty to */
/* impose restrictions and checks according to which separator  */
/* is to be used.                                               */
/* ============================================================ */
inpsep(septype)
int septype;
{
   int c;
   int sepcount = 0;
   for(;;){
      if ((c = jgetc()) == EOF)
         return(EOF);
      switch ((char) c) {
         /* all common to PS's, TS's, and DS's */
      case RS:
      case RE:
      case SPACE:
      case EE:
      case TAB:
         sepcount++;
         break;
         /* must check if parameter entity reference */
      case PERO:
         if ((c = jgetc()) == EOF)
            return(EOF);
         /* if it is a parameter entity reference ...*/
         if(ISALPHA(c)){
            /* set flag to show we are in an entity */
            SETFLAG(IN_ENTITY);
            jungetc(c);
            /* call procedure to resolve reference at separator level */
            reslvpref();
            /* bump separator counter because a parameter entity reference */
            /* is considered a separator */
            sepcount++;
            break;
         }
         /* otherwise, not a parameter entity reference */
         else{
            /* so unget char and PERO, and return */
            jungetc(c);
            jungetc(PERO);
            return(sepcount);
         }
         /* if '-', then check if comment */
      case '-':
         /* if separator type is not PS, then comment can not exist */
         if(septype != PS){
            /* unget '-' and return */
            jungetc(c);
            return(sepcount);
         }
         if ((c = jgetc()) == EOF)
            return(EOF);
         /* otherwise if comment ...*/
         if (c == '-') {
            /* call procedure to input comment */
            inpcomment();
            /* bump separator counter because a comment is a PS */
            sepcount++;
            break;
         }
         /* if not a comment, then unget char and '-' returning */
         jungetc(c);
         jungetc('-');
         return(sepcount);
         /* if non of the above characters, then unget char and return */
      default:
         jungetc(c);
         return(sepcount);
      }
   }
}

/* ============================================================ */
/* reslvpref() resolves any parameter entity reference at a     */
/* separator level. It inputs the entity name, searches a table */
/* on the name getting its entity text, and 'ungets' the text   */
/* for further processing.                                      */
/* ============================================================ */
void reslvpref()
{
   char namearray[NAMELEN + 1], *nameptr = namearray;
   char *resptr;
   char tarray[LITLEN + 1], *tptr = tarray;
   int c;
   int synkey;

   /* input the parameter entity name */
   if(INPNAME(&nameptr, NAMELEN - 1, noxlat) >= GOOD){
      nameptr = namearray;
      /* search a table for a match on the name returning a */
      /* syntactic literal and the text associated with the name */
      synkey = search(PARM_ENT_NAME, nameptr, &resptr);
      switch(synkey){
         /* if the syntactic literal is 'ILLCHAR' then the */
         /* search was unsuccessful */
      case ILLCHAR:
         syntxerr("Entity Reference not found in table");
         break;
      case KW_PUBLIC:
      case KW_SYSTEM:
         if((c = jgetc()) == EOF)
            terminate(1, "End of File found while resolving parameter entity reference");
         if(c != REFC)
            jungetc(c);
         if (strlen(resptr))
            cknotation(resptr);
         if (strlen(entfilename) == 0) {
            jungetc(EE);
            break;
         }
         entfile = safefopen(entfilename, "r", ENTITYFILE);
         break;
      case KW_MD:
      case KW_STARTTAG:
      case KW_ENDTAG:
         /* if the bracketed text keyword MD was found ... */
         /* add the delimeters to the test string */
         if (synkey == KW_MD)
            sprintf(tptr,"<!%s>",resptr);
         else if (synkey == KW_STARTTAG)
            sprintf(tptr,"<%s>",resptr);
         else if (synkey == KW_ENDTAG)
            sprintf(tptr,"</%s>",resptr);
         if((c = jgetc()) == EOF)
            terminate(1, "End of File found while resolving parameter entity reference");
         if(c != REFC)
            jungetc(c);
         /* call procedure which ungets the text string */
         ungetreslv(tarray);
         break;
      case KW_MS:
         /* add the delimeters to the test string */
         sprintf(tptr,"<![%s]]>",resptr);
         if((c = jgetc()) == EOF)
            terminate(1, "End of File found while resolving parameter entity reference");
         if(c != REFC)
            jungetc(c);
         /* call procedure which ungets the text string */
         ungetreslv(tarray);
         break;

         /* ***** must also add cases for STARTTAG and ENDTAG ***** */

         /* if syntactic literal is NULL, then entity text consists */
         /* of only a parameter literal */
      case NULL:
         if((c = jgetc()) == EOF)
            terminate(1, "End of File found while resolving parameter entity reference");
         if(c != REFC)
            jungetc(c);
         /* unget parameter literal */
         ungetreslv(resptr);
         break;
         /* any other syntactic literal is illegal */
      default:
         syntxerr("Illegal use of syntactic literal in a PS entity reference");
         break;
      }
   }
   else
      syntxerr("Reference name not found in PS entity reference");
}

/* ============================================================ */
/* ungetreslv() puts a text string onto the "unget" stack in    */
/* reverse order. First pushing an EE onto the stack marking    */
/* the end of and enitity text.                                 */
/* ============================================================ */
void ungetreslv(resptr)
char *resptr;
{
   int len;
   REGISTER char *endptr = resptr;

   len = strlen(resptr);
   jungetc(EE);
   if(len > 0){
      endptr += len - 1;
      while(endptr != resptr)
         jungetc(*endptr--);
      jungetc(*endptr);
   }
}
/* ============================================================ */
inpitem(recptr, len, firstchar, remchar, xlat, rniflag)
char **recptr;
int len;
int (*firstchar)(), (*remchar)(), (*xlat)();
int rniflag;
{
   char tmparray[NAMELEN + 2], *tbuff = tmparray;
   char *temp = *recptr;
   REGISTER int c, j;
   int ccount = 0, index;

   if(len > NAMELEN)
      terminate(1, "length exceeds NAMELEN in getitem()");
   if ((c = (*xlat)(jgetc())) == EOF)
      return(EOF);
   ccount++;
   if (rniflag == YES)
      if (c == RNI) {
         len++;
         xlat = TOUPPER;
         goto L1;
      }
   if (!(*firstchar)(c)) {
      jungetc(c);
      return(BAD);
   }
L1:
   *tbuff++ = c;
   while (--len) {
      if ((c = (*xlat)(jgetc())) == EOF)
         return(EOF);
      if ((*remchar)(c)) {
         ccount++;
         *tbuff++ = c;
      }
      else {
         jungetc(c);
         break;
      }
   }
   if ((c = (*xlat)(jgetc())) == EOF)
      return(EOF);
   jungetc(c);
   if ((*remchar)(c)) {
      for (; ccount > 0; ccount--)
         jungetc(tmparray[ccount]);
      return(BAD);
   }
   for (j = 0; j < ccount; j++)
      *temp++ = tmparray[j];
   *temp = '\0';
   index = getkwindex(*recptr);
   *recptr = temp;
   return(index);
}
/* ============================================================ */
void inpcomment()
{
   REGISTER int c;
   for(;;) {
      if ((c = jgetc()) == EOF)
         terminate(1, "EOF found while processing comment");
      if (c != '-')
         continue;
      if ((c = jgetc()) == EOF)
         terminate(1, "EOF found while processing comment");
      if (c == '-')
         return;
   }
}
/* ============================================================ */
int inpMDO()
{
   REGISTER int c;

   if ((c = jgetc()) == EOF)
      return(EOF);
   if (c != '<') {
      jungetc(c);
      return(BAD);
   }
   if ((c = jgetc()) == EOF)
      return(EOF);
   if (c != '!') {
      jungetc(c);
      jungetc('<');
      return(BAD);
   }
   ADDCHAR('<');
   ADDCHAR('!');
   return(GOOD);
}

/* ============================================================ */
/* inpparmlit() inputs a parameter literal delemeted by either  */
/* LIT's or LITA's. It only recognizes PERO's ans CRO's as      */
/* mark-up due to the standards definition of replaceable       */
/* parameter data.                                              */
/* ============================================================ */
void inpparmlit(litptr)
char **litptr;
{
   int delimeter, len = 0;
   REGISTER int c, d;
   char *lptr = *litptr;
   int refflag = 0; /* flag if 1 => parameter entity reference found in */
   /*              parameter literal with unmatch EE */
   /*      if 0 => no unmatched reference currently in */
   /*              parameter literal */

   if((c = jgetc()) == EOF)
      terminate(1, "End of File found while processing parameter literal");
   /* if LIT then delimeter is LIT */
   if(c == LIT)
      delimeter = LIT;
      /* else delimeter is LITA */
   else
      if(c == LITA)
         delimeter = LITA;
      else{
         ADDCHAR(c);
         jungetc(c);
         syntxerr("Delimeter not found in parameter literal");
      }
   /* while closeing delimeter not found ...*/
   while((c = jgetc()) != delimeter){
      switch ((char) c){
      case EE:
         /* EE found not matching any existing reference within par. lit. */
         if(refflag == 0)
            syntxerr("EE found terminating reference not occurring within parameter literal");
         refflag = 0;
         break;
         /* if PERO, then check if entity reference */
      case PERO:
         if ((c = jgetc()) == EOF)
            terminate(1, "End of File found while interpreting parameter literal");
         /* if reference ... */
         if(ISALPHA(c)){
            jungetc(c);
            /* call procedure to resolve parameter entity reference within */
            /* replaceable parameter data */
            reslvreplpref(&lptr, &len);
            /* set flag to unmatched reference found in parameter literal */
            refflag = 1;
         }
         else{
            /* otherwise, no reference, so treat PERO as char data */
            *lptr++ = PERO;
            *lptr++ = c;
            *lptr = '\0';
            len += 2;
         }
         break;
         /* if '&', then check if character reference */
      case '&':
         if((c = jgetc()) == '#') {
            if ((d = jgetc()) == EOF)
               terminate(1, "End of File found while interpreting parameter literal");
            if(ISDIGIT(d) || isnmstrt(d)) {
               jungetc(d);
               /* if CRO, then call procedure to resolve char reference */
               reslvcharref(&lptr, &len);
            }
            else{
               *lptr++ = '&';
               *lptr++ = c;
               *lptr++ = d;
               *lptr = '\0';
               len += 3;
            }
         }
         /* otherwise, treat '&' as char data */
         else{
            jungetc(c);
            *lptr++ = '&';
            *lptr = '\0';
            len += 1;
         }
         break;
         /* default => char data */
      default:
         *lptr++ = c;
         *lptr = '\0';
         len++;
         break;
      }
      /* if length of interpretted parameter literal */
      /*  exceeds LITLEN then ERROR */
      if(len > LITLEN){
         ADDCHAR(LIT);
         ADDSTRING(lptr);
         syntxerr("LITLEN is exceeded in parameter literal");
      }
   }
   ADDCHAR(LIT);
   ADDSTRING(*litptr);
   ADDCHAR(LIT);
   *litptr = lptr;
}

/* ============================================================ */
/* reslvcharref() resolves character references. It determines  */
/* if character reference consists of a function name or char   */
/* number. If reference consists of a character number, then    */
/* an a to i translation routine is called and appropriate      */
/* character is added to the literal string.                    */
/* ============================================================ */
void reslvcharref(litptr, lenptr)
char **litptr;
int *lenptr;
{
   REGISTER int j;
   char namearray[NAMELEN + 1], *nameptr = namearray;
   char *lptr = *litptr;

   /* input name */
   if((j = INPNAME(&nameptr, NAMELEN, TOUPPER)) >= GOOD){
      switch (j){
         /* if name is valid then assume char reference containing function name*/
      case RE:
         *lptr++ = RE;
         *lptr = '\0';
         *lenptr = *lenptr + 1;
         break;
      case RS:
         *lptr++ = RS;
         *lptr = '\0';
         *lenptr = *lenptr + 1;
         break;
      case SPACE:
         *lptr++ = SPACE;
         *lptr = '\0';
         *lenptr = *lenptr + 1;
         break;
         /* if any other name found then ERROR */
      default:
         ADDSTRING(synliteral(j));
         syntxerr("Unknown function name in character reference");
         break;
      }
   }
   /* otherwise, no name found, so assume character number */
   else{
      if((j = jgetc()) == EOF)
         terminate(1, "End of File found while resolving character reference");
      /* if next character inputted is not numeric then ERROR */
      if(ISALPHA(j)){
         ADDCHAR(j);
         jungetc(j);
         syntxerr("Character number expected while resolving character reference");
      }
      jungetc(j);
      /* otherwise call procedure to add character represented to lit string */
      xlatcharnum(&lptr, lenptr);
   }
   if((j = jgetc()) == EOF)
      terminate(1, "End of File found while resolving character reference");
   if(j != REFC)
      jungetc(j);
   *litptr = lptr;
}

/* ============================================================ */
/* xlatcharnum() inputs an ascii number string and converts it  */
/* to its character equivalent adding it to the literal string. */
/* ============================================================ */
void xlatcharnum(litptr, lenptr)
char **litptr;
int *lenptr;
{
   REGISTER int j;
   int xlatcharnum;
   char *lptr = *litptr, charnum[MAX_CHAR_IN_DELIM_NUM + 1];
   REGISTER char *cptr = charnum;

   if((j = jgetc()) == EOF)
      terminate(1, "End of File found while resolving character reference");
   if(!ISDIGIT(j)){
      ADDCHAR(j);
      jungetc(j);
      syntxerr("Character number not found in resolving character reference");
   }
   while(ISDIGIT(j)){
      *cptr++ = j;
      *cptr = '\0';
      if((j = jgetc()) == EOF)
         terminate(1, "End of File found while resolving character reference");
   }
   jungetc(j);
   xlatcharnum = atoi(charnum);
   *lptr++ = xlatcharnum;
   *lptr = '\0';
   *lenptr = *lenptr + 1;
   *litptr = lptr;
}

/* ============================================================ */
/* reslvreplpref() resolves parameter entity references within  */
/* replaceable parameter data. It inputs the reference name,    */
/* searches a table on the name, and adds then entity text to   */
/* the interpretted literal string.                             */
/* ============================================================ */
void reslvreplpref(litptr, lenptr)
char **litptr;
int *lenptr;
{
   REGISTER char *lptr = *litptr;
   char namearray[NAMELEN + 1], *nameptr = namearray;
   int c, len;
   char *resptr;
   int synkey;

   /* input name */
   if(INPNAME(&nameptr, NAMELEN - 1, noxlat) >= GOOD){
      nameptr = namearray;
      synkey = search(PARM_ENT_NAME, nameptr, &resptr);
      switch(synkey){
         /* if syntactic literal is MD then add appropriate delimeters */
         /* to resolved text */
      case KW_MD:
         *lptr++ = '<';
         *lptr++ = '!';
         strcat(lptr, resptr);
         len = strlen(resptr);
         lptr += len;
         *lptr++ = MDC;
         *lptr = '\0';
         len += 3;
         break;

         /* ***** add bracketed text keywords STARTTAG, ENDTAG, and MD ***** */

         /* if syntactic literal is 'ILLCHAR' then search unsuccessful */
      case ILLCHAR:
         syntxerr("Entity reference not found in table");
         break;
         /* if syntactic literal is NULL, then entity text consists of */
         /* parameter literal only */
      case NULL:
         strcat(lptr, resptr);
         len = strlen(resptr);
         lptr += len;

         /*           *lptr++ = EE;*/
         *lptr = '\0';

         break;
         /* any other syntactic literal found is ERROR */
      default:
         syntxerr("Unknown syntactic literal in parameter entity reference.");
         break;
      }
   }
   else
      syntxerr("Reference name not found while resolving parameter entity reference.");
   /* increment length of interpretted literal by length of entity text */
   *lenptr = *lenptr + len;
   if((c = jgetc()) == EOF)
      terminate(1, "End of File found while resolving parameter entity referecne");
   if(c != REFC)
      jungetc(c);
   *litptr = lptr;
}
/* ============================================================ */
int jgetc()
{
   extern int debug;
   REGISTER int temp;
   if(stkptr > 0)
      temp = mystack[--stkptr];
   else if (entfile != NULL)
      temp = getc(entfile);
   else
      temp = getc(docfile);
   if ((temp == CTRLZ) || (temp == EOF)) {
      if (entfile != NULL) {
         safefclose(entfile, entfilename, ENTITYFILE);
         entfile = NULL;
         temp = EE;
      }
      else
         temp = EOF;
   }
   if (debug & 1) {
      if ((temp > ' ') && (temp < 0x7f))
         printf("jgetc returns %c\n", temp);
      else
         printf("jgetc returns 0x%02x\n", temp);
   }
   if ((char) temp == EE){
      CLEARFLAG(IN_ENTITY);
      if (TESTFLAG(IN_DECL) && TESTFLAG(DECL_IS_IN_ENTITY))
         syntxerr("illegal parameter entity reference");
   }
   return(temp);
}
/* ============================================================ */
void jungetc(c)
REGISTER int c;
{
   extern int debug;
   if (debug & 1) {
      if ((c > ' ') && (c < 0x7f))
         printf("jungetc returns %c\n", c);
      else
         printf("jungetc returns 0x%02x\n", c);
   }
   if(stkptr >= STACKSIZE)
      terminate(1,"User Stack Over-Flow!");
   mystack[stkptr++] = c;
}
/* ============================================================ */
void stackinit()
{
   stkptr = 0;
}

/* ============================================================ */
/* getkwindex() returns the defined string equivalent of an     */
/* inputted integer.                                            */
/* ============================================================ */
int getkwindex(s)
char *s;
{
   REGISTER int jj;
   char *j, *k = s;
   char chararray[NAMELEN + 1];
   static char *keywords[] = {
      "DOCTYPE", "ELEMENT", "ENTITY", "RNIDEFAULT",
      "ANY", "CDATA", "RCDATA", "SDATA", "PI", "EMPTY",
      "STARTTAG", "ENDTAG", "MS", "MD", "ATTLIST",
      "ID", "IDREF", "IDREFS", "NAME", "NAMES",
      "NMTOKEN", "NMTOKENS", "NOTATION", "NUMBER",
      "NUMBERS", "NUTOKEN", "NUTOKENS", "#REQUIRED",
      "#CURRENT", "#CONREF", "#IMPLIED", "#FIXED","",
      "","","SYSTEM", "PUBLIC", "NDATA"         };
#define MAXKEYWORDS ((sizeof(keywords))/(sizeof(char *)))   
   static int keytokens[] = 
   {
      KW_DOCTYPE, KW_ELEMENT, KW_ENTITY, KW_RNIDEFAULT,
      KW_ANY, KW_CDATA, KW_RCDATA, KW_SDATA, KW_PI, KW_EMPTY,
      KW_STARTTAG, KW_ENDTAG, KW_MS, KW_MD, KW_ATTLIST,
      KW_ID, KW_IDREF, KW_IDREFS, KW_NAME, KW_NAMES,
      KW_NMTOKEN, KW_NMTOKENS, KW_NOTATION, KW_NUMBER,
      KW_NUMBERS, KW_NUTOKEN, KW_NUTOKENS, KW_REQUIRED,
      KW_CURRENT, KW_CONREF, KW_IMPLIED, KW_FIXED, KW_GROUP,
      KW_LIT, KW_UNFIXED, KW_SYSTEM, KW_PUBLIC, KW_NDATA       };

   for (jj = 0; jj < MAXKEYWORDS; jj++) {
      if(strcmp(s, keywords[jj]) == 0)
         return(keytokens[jj]);
   }
   if (strcmp(s, "RE") == 0)
      return(RE);
   if (strcmp(s, "RS") == 0)
      return(RS);
   if (strcmp(s, "SPACE") == 0)
      return(SPACE);

   for(j = chararray; (*j = TOUPPER(*k)) != EOS; j++, k++);
   if(strcmp(chararray, "#DEFAULT") == 0)
      return(KW_RNIDEFAULT);
   return(GOOD);
}
