/* National Institute of Standards and Technology (NIST)
/* National Computer System Laboratory (NCSL)
/* Office Systems Engineering (OSE) Group
/* ********************************************************************
/*                            D I S C L A I M E R
/*                              (March 8, 1989)
/*  
/* There is no warranty for the NIST NCSL OSE SGML parser and/or the NIST
/* NCSL OSE SGML parser validation suite.  If the SGML parser and/or
/* validation suite is modified by someone else and passed on, NIST wants
/* the parser's recipients to know that what they have is not what NIST
/* distributed, so that any problems introduced by others will not
/* reflect on our reputation.
/* 
/* Policies
/* 
/* 1. Anyone may copy and distribute verbatim copies of the SGML source
/* code as received in any medium.
/* 
/* 2. Anyone may modify your copy or copies of SGML parser source code or
/* any portion of it, and copy and distribute such modifications provided
/* that all modifications are clearly associated with the entity that
/* performs the modifications.
/* 
/* NO WARRANTY
/* ===========
/* 
/* NIST PROVIDES ABSOLUTELY NO WARRANTY.  THE SGML PARSER AND VALIDATION
/* SUITE ARE PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER
/* EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
/* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
/* THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS
/* WITH YOU.  SHOULD THE SGML PARSER OR VALIDATION SUITE PROVE DEFECTIVE,
/* YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
/* 
/* IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW WILL NIST BE LIABLE FOR
/* DAMAGES, INCLUDING ANY LOST PROFITS, LOST MONIES, OR OTHER SPECIAL,
/* INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR
/* INABILITY TO USE (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA
/* BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY THIRD PARTIES OR A
/* FAILURE OF THE PROGRAM TO OPERATE WITH PROGRAMS NOT DISTRIBUTED BY
/* NIST) THE PROGRAM, EVEN IF YOU HAVE BEEN ADVISED OF THE POSSIBILITY OF
/* SUCH DAMAGES, OR FOR ANY CLAIM BY ANY OTHER PARTY.
*/

/************************************************************************/
/*   TITLE:          SGML PARSER                                        */
/*   SYSTEM:         DTD PROCESSOR                                      */
/*   SUBSYSTEM:                                                         */
/*   SOURCE FILE:    DTD.C                                              */
/*   AUTHOR:         Jim Heath                                          */
/*                                                                      */
/*   DATE CREATED:                                                      */
/*   LAST MODIFIED:                                                     */
/*                                                                      */
/*                  REVISIONS                                           */
/*   WHEN      WHO            WHY                                       */
/************************************************************************/
#include <stdio.h>
#include <setjmp.h>
#include <unistd.h>

#include "qntyset.h"
#include "dtd.h"
#include "dtdfncs.h"
#include "dtdglbl.h"
#include "dtddefs.h"

#define      MAXDECLSIZE           2048
static char decl[MAXDECLSIZE];
#define  DEMO   0
int num_open_ms=0;

/* ============================================================ */
main(argc, argv)
int argc;
char *argv[];
{
#undef PROFILE
   char *targv[2];
   REGISTER int j;
   filemask = 0;
   FUNCTRACE("main");
#ifdef PROFILE
   argc = 2;
   targv[0] = "parse1";
   targv[1] = "testdoc";
   doopts(argc, targv);
   dofiles(targv);
#else 
   doopts(argc, argv);
   dofiles(argv);
#endif
SETENV:
   if (setjmp(Xenv) != 0) {
      while(1) {
         if ((j = jgetc()) == EOF)
            terminate(1, "EOF while recovering for syntax error");
         if (j == MDC) {
            stackinit();
            goto SETENV;
         }
      }
   }
   processinput();
}
/* ============================================================ */
void processinput()
{
   int tempnum, elements = 0, attributes = 0, 
   entities = 0;
   enum    {
      TRYPIO, GETDS, GETMDO, GETKEYWORD, TRYSGML                                                                  }
   state = TRYSGML;

   do {
      switch(state) {
      case TRYSGML:
         dogetsgml();     /* look for and strip sgml declaration */
         state = GETDS;
         break;
      case GETDS:
         tempnum = num_open_ms;
         CLEARFLAG(IN_DECL);
         dogetds(elements, attributes, entities);  /* arguments are for show count */
         if (num_open_ms > 0)
            get_ms_closes();
         if (tempnum == num_open_ms)
            state = GETMDO;
         else 
            state = GETDS;
         break;
      case TRYPIO:
         dopio();
         state = GETDS;
         break;
      case GETMDO:
         CLRDECL();
         SETFLAG(IN_DECL);
         if (TESTFLAG(IN_ENTITY))
            SETFLAG(DECL_IS_IN_ENTITY);
         else
            CLEARFLAG(DECL_IS_IN_ENTITY);
         if ((inpMDO()) != GOOD)
            state = TRYPIO;
         else
            state = GETKEYWORD;
         break;
      case GETKEYWORD:
         dokeyword();
         state = GETDS;
      }
   }     while(1);
}
/* ============================================================ */
/* ============================================================ */
void doopts(argc, argv)
int argc;
char *argv[];
{
   REGISTER int j;
   FUNCTRACE("doopts");
   printf("argc = %d\n", argc);
   for (j = 0; j < argc; j++)
      printf("argv[%d] = %s\n", j, argv[j]);
   for (j = 2; j < argc; j++) {
      switch(*(argv[j] + 1)) {
      case 'P':
      case 'p':
         strcpy(entfilename, (argv[j] + 2));
         break;
      case 'T':
      case 't':
         strcpy(pathname, (argv[j] + 2));
         if (*(pathname + strlen(pathname) -1) == ':')
            strcat(pathname, "\\");
         else if (ISALPHA(*(pathname + strlen(pathname) -1)))
            strcat(pathname, ":\\");
         break;
      case '1':
         debug = atoi((argv[j] + 2));
         break;
      case 'F':
         deletefiles = FALSE;
         break;
      case 'H':
         /*         heading();*/
         break;
      default:
         terminate(1, "illegal option to PARSE1");
      }
   }
}
/* ============================================================ */
void dofiles(argv)
char *argv[];
{
   /* create the intermediate work files */
   FUNCTRACE("dofiles");
   strcpy(treefname,pathname);
   strcat(treefname,"treefile.sgm");

   strcpy(symbfname,pathname);
   strcat(symbfname,"symbfile.sgm");

   /* create the attribute file */
   strcpy(attrfname,pathname);
   strcat(attrfname,"attrfile.sgm");

   /* create the attribute work file */
   strcpy(attrtname,pathname);
   strcat(attrtname,"attrtemp.sgm");

   /* build the dtdfile for later use */
   strcpy(dtdfname,pathname);
   strcat(dtdfname,"dtdfile1.sgm");

   strcpy(preffname,pathname);
   strcat(preffname,"preffile.sgm");

   strcpy(greffname,pathname);
   strcat(greffname,"greffile.sgm");

   strcpy(posfname,pathname);
   strcat(posfname,"posfile.sgm");

   strcpy(xcptfname,pathname);
   strcat(xcptfname,"except.sgm");

   strcpy(exclfname,pathname);
   strcat(exclfname,"exclusns.sgm");

   strcpy(inclfname,pathname);
   strcat(inclfname,"inclusns.sgm");

   strcpy(cmfname,pathname);
   strcat(cmfname,"cmfile.sgm");

   /* build the name for my input file */
   strcpy(docfname, argv[1]);

   unlinkall(TRUE);

   treefile = safecreat(treefname, TREEFILE);
   symbfile = safecreat(symbfname, SYMBFILE);
   attrtemp = safecreat(attrtname, ATTRTEMP);
   preffile = safecreat(preffname, PREFFILE);
   greffile = safecreat(greffname, GREFFILE);
   docfile = safefopen(docfname, "r", DOCFILE);
   cmfile = safefopen(cmfname, "wb", CMFILE);
   inclfile = safecreat(inclfname, INCLFILE);
   exclfile = safecreat(exclfname, EXCLFILE);
}
/* ============================================================ */
void dodecl(clrflag, c, inptr, outptr)
int clrflag;
char c;
REGISTER char *inptr;
char **outptr;
{
   static char *declptr = decl;


   if(clrflag == ON){
      memset(decl, '\0', MAXDECLSIZE);
      declptr = decl;
      return;
   }
   if(c != NULL){
      if(declptr != (decl + MAXDECLSIZE)){
         *declptr++ = c;
         *declptr = '\0';
         return;
      }
      else
         terminate(1, "MAXDECLSIZE exceeded in normalized declaration buffer");
   }
   if(inptr != NULL){
      while(*inptr != '\0'){
         if(declptr == (decl + MAXDECLSIZE))
            terminate(1, "MAXDECLSIZE exceeded in normalized declaration buffer");
         *declptr++ = *inptr++;
         *declptr = '\0';
      }
      return;
   }
   if(outptr != NULL){
      *outptr = decl;
      return;
   }
}
/* ============================================================ */
void writeposition()
{
   long position;
   int  posfile;
   position = ftell(docfile);
   if (position == -1L)
      terminate(1, "failure on lseek");
   posfile = safecreat(posfname, POSFILE);
   safewrite(posfile, (char *) &position, sizeof(position));
   safeclose(posfile, "position file", POSFILE);
}
/* ============================================================ */
static void showcounts(elements, attributes, entities)
int elements, attributes, entities;
{
   printf("\ntotal elements = %d\n", elements);
   printf("total attributes = %d\n", attributes);
   printf("total entities = %d\n\n", entities);
}
/* ============================================================ */
void dogetds(elements, attributes, entities)
int elements, attributes, entities;
{
   REGISTER int j;
   if (INPDS() == EOF)   /* inpsep: gets s's */
      terminate(1, "EOF found while looking for DS");
   if ((j = jgetc()) != DSC) {
      jungetc(j);
      return;
   }
   ADDCHAR(DSC);
   if (INPPS() == EOF)
      terminate(1, "EOF found following DSC");
   if ((j = jgetc()) == MDC) {
      finddocelt();
      writeposition();
      ADDCHAR(DSC);
      closeall();
      showcounts(elements, attributes, entities);
      if (strlen(rootelt) != 0) {
         printf("\n\nERROR - no element declaration for root element\n");
         errflag = 1;
      }
      if (errflag == 0)
         bldsymbtbl();
      else {
         unlinkall(TRUE);
         exit(1);
      }
   }
   else 
      if (j == DSC) {
         jungetc(']');
         jungetc(']');
      }
      else
         terminate(1, "expected MDC following DSC");
}
/* ============================================================ */
void dopio()
{
   REGISTER int j, count = 0;
   char *mydecl;
   if ((j = jgetc()) == EOF)
      terminate(1, "EOF found");
   if (j == '<') {
      ADDCHAR(j);
      if ((j = jgetc()) == EOF)
         terminate(1, "EOF within declaration");  /* not really a valid statement */
      ADDCHAR(j);
      if (j != '?')
         syntxerr("expected MDO or PIO");
   }
   else 
      syntxerr("expected MDO or PIO");
   do {    /* found valid PIO */
      if (count > PILEN)
         syntxerr("length of processing instruction exceeds PILEN");
      if ((j = jgetc()) == EOF)
         terminate(1, "EOF within declaration");
      ++count;
      ADDCHAR(j);
   }   while(j != MDC);
   GETDECLADDR(&mydecl);
   printf("%s\n", mydecl);
}
/* ============================================================ */
void finddocelt()
{
   long currpos;
   int c;

STEP1:
   while(1) {
      CLRDECL();
      while(1) {
         if ((c = getc(docfile)) == EOF)
            PERROR("unexpected EOF in finddocelt");
         switch(c) {
         case ERO:
            if ((currpos = ftell(docfile)) == -1L)
               PERROR("failure in ftell(docfile) in finddocelt");
            currpos--;
            if ((c = getc(docfile)) == EOF)
               PERROR("unexpected EOF in finddocelt");
            if (isnmstrt(c))  {
               ADDCHAR(ERO);
               ADDCHAR(c);
               syntxerr("illegal character found while searching other prolog");
            }
            if (fseek(docfile, currpos, SEEK_SET) == -1L)
               PERROR("failure in fseek(docfile) in finddocelt()");
            return;
         case SPACE:
         case RS:
         case RE:
         case TAB:
            continue;
         case '<':
            finddocelt2();
            return;
         default:
            if ((currpos = ftell(docfile)) == -1L)
               PERROR("failure in ftell(docfile) in finddocelt");
            currpos--;
            if (fseek(docfile, currpos, SEEK_SET) == -1L)
               PERROR("failure in fseek(docfile) in finddocelt()");
            return;
         }
      }
   }
}
/* ============================================================ */
void finddocelt2()
{
   long currpos;
   int c;
   char *tdecl;

   if ((currpos = ftell(docfile)) == -1L)
      PERROR("failure in ftell(docfile) in finddocelt2");
   currpos--;
   if ((c = getc(docfile)) == EOF)
      PERROR("unexpected EOF in finddocelt2");
   switch(c) {
   case '?':
      dopio2();
      GETDECLADDR(&tdecl);
      printf("%s\n", tdecl);
      break;
   case '!':
      if (docomment() == FALSE){
         if (fseek(docfile, currpos, SEEK_SET) == -1L)
            PERROR("failure in fseek(docfile) in finddocelt2()");
      }
      GETDECLADDR(&tdecl);
      printf("%s\n", tdecl);
      break;
   default:
      if (fseek(docfile, currpos, SEEK_SET) == -1L)
         PERROR("failure in fseek(docfile) in gotodocelt2()");
   }
}
/* ============================================================ */
void dopio2()
{
   int c;
   int len = 0;

   ADDSTRING("<?");
   while(1) {
      if ((c = getc(docfile)) == EOF)
         PERROR("unexpected EOF while reading other prolog");
      ADDCHAR(c);
      if (c == MDC)
         return;
      if (++len > PILEN)
         syntxerr("PI too long in other prolog");
   }
}
/* ============================================================ */
int docomment()
{
   int c;
   enum {
      sawNONE, sawONE, sawTWO                      }
   state;

   if ((c = getc(docfile)) == EOF)
      PERROR("unexpected EOF while reading other prolog");
   if (c != '-')
      return(FALSE);
   if ((c = getc(docfile)) == EOF)
      PERROR("unexpected EOF while reading other prolog");
   if (c != '-')
      return(FALSE);
   ADDSTRING("<!--");
INSIDE_COMMENT:
   for(state = sawNONE; state != sawTWO;) {
      if ((c = getc(docfile)) == EOF)
         PERROR("unexpected EOF while reading other prolog");
      ADDCHAR(c);
      if (c == '-') {
         if (state == sawNONE)
            state = sawONE;
         else if (state == sawONE)
            state = sawTWO;
      }
      else 
         state = sawNONE;
   }
   while(1) {
      if ((c = getc(docfile)) == EOF)
         PERROR("unexpected EOF while reading other prolog");
      ADDCHAR(c);
      switch(c) {
      case MDC:
         return(TRUE);
      case SPACE:
      case RE:
      case RS:
      case TAB:
         continue;
      case '-':
         if ((c = getc(docfile)) == EOF)
            PERROR("unexpected EOF while reading other prolog");
         ADDCHAR(c);
         if (c == '-')
            goto INSIDE_COMMENT;
         syntxerr("illegal declaration in other prolog");
      }
   }
}
/* ============================================================ */
void dokeyword()
{
   char namearray[NAMELEN + 1], *ptr, *mydecl;
   int j, elements = 0, attributes = 0, 
   entities = 0, notations = 0;
   static int founddoctype = FALSE;

   ptr = namearray;
   j = INPNAME( &ptr, NAMELEN, TOUPPER);
   switch (j) {
   case KW_DOCTYPE:
      ADDSTRING("DOCTYPE");
      if (INPPS() >= 1)
         ADDCHAR(SPACE);
      else
         terminate(1, "Error while processing DOCTYPE declaration");
      /* input the document name */
      ptr = rootelt;
      if (INPNAME( &ptr, NAMELEN, TOUPPER) >= GOOD){
         ADDSTRING(rootelt);
         INPPS();
         if ((j = jgetc()) != DSO)
            terminate(1, "Error while processing DOCTYPE declaration");
         ADDSTRING(" [");
      }
      else 
         terminate(1, "invalid docname");
      founddoctype = TRUE;
      break;
   case KW_ENTITY:
      ADDSTRING("ENTITY");
      if(founddoctype == FALSE)
         syntxerr("Entity declaration found before DOCTYPE declaration");
      doentity();
      entities++;
      if (TESTFLAG(IN_ENTITY) != TESTFLAG(DECL_IS_IN_ENTITY))
         syntxerr("illegal parameter entity reference");
      break;
   case KW_ELEMENT:
      ADDSTRING("ELEMENT");
      if(founddoctype == FALSE)
         syntxerr("ELEMENT declaration found before DOCTYPE declaration");
      elements += doelement();
      if (TESTFLAG(IN_ENTITY) != TESTFLAG(DECL_IS_IN_ENTITY))
         syntxerr("illegal parameter entity reference");
      break;
   case KW_ATTLIST:
      ADDSTRING("ATTLIST");
      if(founddoctype == FALSE)
         syntxerr("ATTLIST declaration found before DOCTYPE declaration");
      attributes += doattlist();
      if (TESTFLAG(IN_ENTITY) != TESTFLAG(DECL_IS_IN_ENTITY))
         syntxerr("illegal parameter entity reference");
      break;
   case KW_NOTATION:
      ADDSTRING("NOTATION");
      if(founddoctype == FALSE)
         syntxerr("NOTATION declaration found before DOCTYPE declaration");
      notations = donotation();
      if (TESTFLAG(IN_ENTITY) != TESTFLAG(DECL_IS_IN_ENTITY))
         syntxerr("illegal parameter entity reference");
      break;
   default:
      j = jgetc();
      if(j == MDC) {
         if (TESTFLAG(IN_ENTITY) != TESTFLAG(DECL_IS_IN_ENTITY))
            syntxerr("illegal parameter entity reference");
         ADDCHAR(MDC);
         break;       /* start over; look for another decl */
      }
      else if(j == '-') {
         jungetc(j);
         j = INPPS();    /* check for comment */
         if ((j = jgetc()) != MDC) {
            ADDCHAR(j);
            syntxerr("illegal sequence following MDO");
         }
         if (TESTFLAG(IN_ENTITY) != TESTFLAG(DECL_IS_IN_ENTITY))
            syntxerr("illegal parameter entity reference");
         ADDCHAR(MDC);
         break;
      }
      else if (j == DSO) {    /* check for marked section */
         ADDCHAR(j);    /* add DSO to output */
         get_marked_section();   /* process marked section */
         break;
      }
      else {
         ADDCHAR(j);
         syntxerr("illegal sequence following MDO");
      }
   }
   GETDECLADDR(&mydecl);
   printf("%s\n", mydecl);
}
