/* Texchk -- a LaTeX syntax and spelling checker.
   Written by JP Massar, Thinking Machines Corporation, Cambridge, MA
   This code is hereby released into the public domain, for better or worse.
*/

#include <stdio.h>
#include <ctype.h>

/* if your system doesn't have either string.h or strings.h you */
/* may have to declare the string functions yourself */
#ifdef BSD42
#include <strings.h>
#else
#include <string.h>
#endif

#ifdef TMC
#include <ctools.h>
#else
#include "ctools.h"
#endif

#include "texchk.h"
#include "cmds.h"
#include "texchars.h"

Bool Verbose_Mode = F;                  /* -v option */
Bool Check_Mode = F;                    /* -c option */

int Indent_Level = 0;                   /* for verbose output mode */

Stack_Entry Lex_Stack[MAX_ENTRIES];     /* environment stack */
int Lex_TOS = -1;

FILE *fp;                               /* file being processed */
Bool Already_At_Eof = F;

long Current_Line = 0;                  /* where we are in input text */
long Current_Char = 0;                  /* where we are in input text */
long Line_Length = 0;                   /* current line length */
char Line_Buffer[MAXLL];                /* buffer for input text */

Bool In_Math_Mode = F;
int Math_Mode_Depth = 0;

char Keyword_Buffer[MAX_KEYWORD_LENGTH];


new_file ()
{
  Current_Line = 0;
  Current_Char = 0;
  Line_Length = 0;
  In_Math_Mode = F;
  Math_Mode_Depth = 0;
  Indent_Level = 0;
}  
        
do_indent (level) int level;
{
  int j,i;
  for (j = 0; j < level; j++)
      for (i = 0; i < SPACES_PER_INDENT_LEVEL; i++) putc(' ',stderr);
}


lex_push (etype,keyword,linenum) envtype etype; char *keyword; long linenum;

/* push an environment onto the stack */

{
  if (++Lex_TOS >= MAX_ENTRIES) {
     fprintf(stderr,"Stack overflow...Process terminating.\n");
     texit();
  }
  Lex_Stack[Lex_TOS].etype = etype;
  Lex_Stack[Lex_TOS].keyword = keyword;
  Lex_Stack[Lex_TOS].linenum = linenum;
}


lex_pop (ptr_etype,ptr_keyword,ptr_linenum)

/* pop an environment and return its components */

  envtype *ptr_etype;
  char **ptr_keyword;
  long *ptr_linenum;

{
  if (Lex_TOS < 0) {
     fprintf(stderr,"Stack underflow...Process terminating\n");
     texit();
  }
  *ptr_etype = Lex_Stack[Lex_TOS].etype;
  *ptr_keyword = Lex_Stack[Lex_TOS].keyword;
  *ptr_linenum = Lex_Stack[Lex_TOS].linenum;
  Lex_TOS--;
}


curstack (ptr_etype,ptr_keyword,ptr_linenum)

  /* get the components of the current stack entry, but leave the entry */
  /* on the stack. */

  envtype *ptr_etype;
  char **ptr_keyword;
  long *ptr_linenum;

{
  if (Stack_Empty) {
     fprintf(stderr,"Fatal error, bad call to curstack\n");
     texit();
  }
  lex_pop(ptr_etype,ptr_keyword,ptr_linenum);
  lex_push(*ptr_etype,*ptr_keyword,*ptr_linenum);
}


char *copy_keyword (starttoken,endtoken) int starttoken,endtoken;

/* grab a keyword from the Line_Buffer and copy it into a static buffer */

{
  int len;
  if (MAX_KEYWORD_LENGTH <= (len = (endtoken - starttoken) + 1)) {
     keyword_length_error();
     texit();
  }
  strncpy(Keyword_Buffer,Line_Buffer + starttoken,len);
  Keyword_Buffer[len] = '\0';
  return(Keyword_Buffer);
}


do_pop (etype,keyword) envtype etype; char *keyword;

/* make sure that the current environment is the matching begin-environment */
/* for the end-environment that we have just discovered.  If so, pop the */
/* environment off the stack.   If not its an error. */

{
        
  envtype oldetype;
  char *oldkeyword;
  long oldlinenum;
  char *s, *e;
        
  lex_pop(&oldetype,&oldkeyword,&oldlinenum);
        
  switch (etype) {
     
    case ESCAPE_END :
      s = "\\begin";
      e = "\\end";
      if (oldetype != ESCAPE_BEGIN) goto nesterror;
      if (0 != strcmp(oldkeyword,keyword)) goto nesterror;
      break;

    case RIGHT_SQUARE_BRACKET :
      s = "[";
      e = "]";
      if (oldetype != LEFT_SQUARE_BRACKET) goto nesterror;
      break;

    case RIGHT_CURLY_BRACKET :
      s = "{";
      e = "}";
      if (oldetype != LEFT_CURLY_BRACKET) goto nesterror;
      break;

    case MATH :
      s = "Begin Math Mode";
      e = "End Math Mode";
      if (oldetype != etype) goto nesterror;
      break;
      
    case DOUBLE_MATH :
      s = "Begin Display Math Mode";
      e = "End Display Math Mode";
      if (oldetype != etype) goto nesterror;
      break;

  }

  return(0);

  nesterror:

  nest_error(s,e,oldlinenum,oldkeyword);
  texit();

}


int get_a_char ()

/* buffered input routine, to keep track of line number */

{
  int ch,rval;
  if (Current_Char >= Line_Length) {
     switch (rval = getline(fp,Line_Buffer,MAXLL-2)) {
       case AT_EOF:
         return(EOF);
         break;
       case TOO_MANY_CHARS :
         line_too_long_error();
         texit();
         break;
       default :
         Line_Buffer[rval] = '\n';
         Line_Buffer[++rval] = '\0';
         Line_Length = rval;
         Current_Char = 0;
         Current_Line++;
         break;
     }
  }
  ch = (int) (255 & Line_Buffer[Current_Char++]);
  if (!LGL_CHAR(ch)) bad_char_error(ch,T);
  return(ch);
}  
  

unget_a_char ()

{
  if (Current_Char == 0) {        
     fprintf(stderr,"Invalid unget...process terminating\n");
     texit();
  }
  Current_Char--;
}  


char *get_keyword ()

/* read a keyword.  Keywords consist of contiguous alphabetic characters */
/* keyword returned is in a static buffer. */

{
  int starttoken,endtoken,ch;
  starttoken = Current_Char - 1;
  endtoken = Current_Char - 1;
  while (isalpha(ch = get_a_char())) {
    endtoken++;
  }
  if (ch == EOF) {
     Already_At_Eof = 1;
  }
  else unget_a_char();
  return(copy_keyword(starttoken,endtoken));
} 


char *get_begin_end_keyword ()

/* called after a \begin or \end construct is found. */
/* begin and end keywords are enclosed in {}. */
/* a warning is issued if there is any whitespace within the {}s */
/* returns a string constituting what is in between the {}s save for */
/* whitespace immediately after the { and immediately before the } */

/* keyword returned is in a static buffer. */

{
  int ch;        
  int starttoken,endtoken;
  
  ch = get_a_char();
  if (ch != LCB) {
     no_brace_after_begin_end_error();
     texit();
  }
  
  starttoken = Current_Char;
  endtoken = starttoken - 1;
  while (RCB != (ch = get_a_char())) {
    if (ch == '\n')
       warning_close_brace();
    else if (ch == EOF) {
       eof_error();
       texit();
    }
    else 
       endtoken++;
  }
  
  /* ignore whitespace after '{' and before '}' */
  
  if (ISWHITE(Line_Buffer[starttoken]) || ISWHITE(Line_Buffer[endtoken])) {
     warning_blanks_in_cb();
  }
  
  while (starttoken < endtoken && ISWHITE(Line_Buffer[starttoken]))
    starttoken++;
  if (starttoken >= endtoken) {
     blank_begin_end_error();
     texit();
  }
  while (endtoken > starttoken && ISWHITE(Line_Buffer[endtoken]))
    endtoken--;
  return(copy_keyword(starttoken,endtoken));
  
}  


get_token (action,etype,keyword) 

  /* get the next significant token from the input stream. Based on its type */
  /* an action to perform is computed.  The significant part of the token is */
  /* returns in *keyword, which points to a static buffer. */

  /* returns 0 on encountering EOF, otherwise returns 1. */

  Actions *action;
  envtype *etype;
  char **keyword;

{  
  int ch,isbegin,isend;
  
  *keyword = 0;
  if (Already_At_Eof) return(0);
  
  readloop:
  
  if (EOF == (ch = get_a_char())) return(0);

  switch (ch) {

    case LSB :
      *etype = LEFT_SQUARE_BRACKET;
      *action = PUSH;
      *keyword = "[";
      return(1);

    case RSB :
      *etype = RIGHT_SQUARE_BRACKET;
      *action = POP;
      *keyword = "]";
      return(1);

    case LCB :
      *etype = LEFT_CURLY_BRACKET;
      *action = PUSH;
      *keyword = "{";
      return(1);

    case RCB :
      *etype = RIGHT_CURLY_BRACKET;
      *action = POP;
      *keyword = "}";
      return(1);

    case MATH_CHAR :
    
      /* Is the next character also a '$'? If so this is 'Display Math Mode' */
    
      if (EOF == (ch = get_a_char())) {
         *action = DOLLAR;
         *etype = MATH;
         *keyword = "$";
         Already_At_Eof = 1;
      }
      else if (ch == MATH_CHAR) {
         *action = DOLLAR_DOLLAR;
         *etype = DOUBLE_MATH;
         *keyword = "$$";
      }
      else {
         unget_a_char();
         *action = DOLLAR;
         *etype = MATH;
         *keyword = "$";
      }
      return(1);

    case ESCAPE :
    
      /* treat specially \begin and \end */
    
      if (EOF == (ch = get_a_char())) {
         eof_error();
         texit();
      }
      
      /* first check for single character non-alphabetic commands */
      
      if (!isalpha(ch)) {
         *action = CHECK_SINGLE;
         *etype = ESCAPE_SINGLE_CHAR;
         Keyword_Buffer[0] = ch;
         Keyword_Buffer[1] = '\0';
         *keyword = Keyword_Buffer;
         return(1);
      }
        
        
      *keyword = get_keyword();
      isbegin = (0 == strcmp(*keyword,BEGINSTRING));
      isend = (0 == strcmp(*keyword,ENDSTRING));
      if (!isbegin && !isend) {
         *action = CHECK;
         *etype = ESCAPE_ANY;
         return(1);
      }
      
      *etype = isbegin ? ESCAPE_BEGIN : ESCAPE_END;
      *action = isbegin ? PUSH : POP;
      *keyword = get_begin_end_keyword();
      return(1);

    case COMMENT :
    
      /* just read in the rest of the line and ignore what's on it */
    
      while ('\n' != (ch = get_a_char())) {
        if (EOF == ch) return(0);
      }
      goto readloop;
    
    default :
      goto readloop;
      
  }

}

push_math_mode (key) char *key;
{
  if (Verbose_Mode) {
     do_indent(Indent_Level++);
     fprintf (
         stderr,"Line %d: Entering math mode using <%s>\n",Current_Line,key
       );
  }
  Math_Mode_Depth++;
  In_Math_Mode = T;
  lex_push(MATH,key,Current_Line);
}

pop_math_mode (key) char *key;
{
  envtype etype;
  char *keyword;
  long linenum;
  if (Verbose_Mode) {
     do_indent(--Indent_Level);
     fprintf (
         stderr,"Line %d: Leaving math mode using <%s>\n",Current_Line,key
       );
  }
  Math_Mode_Depth--;
  In_Math_Mode = (Math_Mode_Depth > 0);
  lex_pop(&etype,&keyword,&linenum);
}


math_mode_action (action,keyword) Actions action; char *keyword;

/* check for math mode tokens, and enter or leave math mode as appropriate */

{
  char *stack_keyword;
  long linenum;
  envtype etype;
  char *key, *matching_keyword;
  
  switch (action) {

    /* If there is a matching '$' or '$$' as the latest entry on the stack */
    /* we pop it because it is a matching token.  Otherwise, we push it, */
    /* even if we are already in math mode. */
        
    case (DOLLAR) :
    case (DOLLAR_DOLLAR) :
      key = (action == DOLLAR) ? "$" : "$$";
      if (!In_Math_Mode) {
         push_math_mode(key);
         break;
      }
      curstack(&etype,&stack_keyword,&linenum);
      if (0 != strcmp(key,stack_keyword)) {
         push_math_mode(key);
      }
      else {
         pop_math_mode(key);
      }
      break;
     
    /* just adjust Math Mode for PUSH and POP, because in process_file */
    /* we will do the actual pushing and popping of these environments. */
      
    case (PUSH) :
      if (is_math_environment(keyword)) {
         Math_Mode_Depth++;
         In_Math_Mode = T;
      }
      break;

    case (POP) :
      if (is_math_environment(keyword)) {
         Math_Mode_Depth--;
         In_Math_Mode = (Math_Mode_Depth == 0);
      }
      break;
      
    /* look for \( and \[ commands which put us into math mode, and \) and */
    /* \] commands which pop us out of math mode.   Make sure if we are */
    /* popping that the proper pushed math mode command is the current */
    /* stack entry. */
      
    case (CHECK_SINGLE) :
      if (*keyword == '(' || *keyword == '[') {
         push_math_mode(anewstr(keyword));
      }
      else if (*keyword == ')' || *keyword == ']') {
         if (Stack_Empty) {
            stack_empty_error(MATH,keyword);
            texit();
         }
         curstack(&etype,&stack_keyword,&linenum);
         matching_keyword = (*keyword == ')') ? "(" : "[";
         if (0 != strcmp(matching_keyword,stack_keyword)) {
            nest_error(matching_keyword,keyword,linenum,stack_keyword);
            texit();
         }
         pop_math_mode(keyword);
      }
      break;

  }

}


process_file () 

/* Get significant LaTeX forms from the input file.  For each one, depending */
/* on its nature perform a verification or manipulate the environment stack. */
/* When we are done the stack should be empty. */

/* The file has already been opened using the global file descriptor 'fp' */

{
  Actions action;          
  envtype etype;
  char *keyword;
  int cmd_index,ch;
        
  while (0 != get_token(&action,&etype,&keyword)) {

    switch (action) {

      case (POP) :

        /* \end{keyword},, '}', ']' */
      
        if (Stack_Empty) {
           stack_empty_error(etype,keyword);
           texit();
        }
        
        math_mode_action(POP,keyword);
      
        if (Verbose_Mode && *keyword != '}' && *keyword != ']') {
           do_indent(--Indent_Level);
           printf("line %d: \\end{%s}\n",Current_Line,keyword);
        }
        do_pop(etype,keyword);
        break;

      case (PUSH) :
      
        /* \begin{keyword}, '{', '[' */
      
        math_mode_action(PUSH,keyword);
      
        if (Verbose_Mode && *keyword != '{' && *keyword != '[') {
           do_indent(Indent_Level++);
           printf("line %d: \\begin{%s}\n",Current_Line,keyword);
        }
        
        if (0==strcmp("verbatim",keyword) || 0==strcmp("verbatim*",keyword)) {
           do_verbatim(keyword);
           break;
        }
        else {
           lex_push(etype,anewstr(keyword),Current_Line);
           break;
        }

      case (DOLLAR) :
        math_mode_action(DOLLAR,keyword);
        break;
        
      case (DOLLAR_DOLLAR) :
        math_mode_action(DOLLAR_DOLLAR,keyword);
        break;
      
      case (CHECK_SINGLE) :

        /* check for \(, \[, \), \] for math mode */
      
        math_mode_action(CHECK_SINGLE,keyword);
        
        if (Check_Mode) {
           if (!LGL_SINGLE_COMMAND_CHAR(*keyword)) {
              single_char_command_error(*keyword);
           }
           if (NOT_FOUND == (cmd_index = command_lookup(keyword))) {
              fprintf(stderr,"Fatal error:\n");
              fprintf(stderr,"Command Table and Legal Chars out of sync\n");
              exit(1);
           }
           if (!In_Math_Mode && IS_MATH_MODE(cmd_index)) {
              math_keyword_error(keyword);
           }
              
        }
        break;
        
      case (CHECK) :
      
        /* \command token */
      
        if (0 == strcmp("verb",keyword)) {
           if ('*' != (ch = get_a_char())) unget_a_char();
           do_verb();
           break;
        }
      
        if (Check_Mode) {
           if (NOT_FOUND == (cmd_index = command_lookup(keyword))) {
              keyword_error(keyword);
           }
           else if (!In_Math_Mode && IS_MATH_MODE(cmd_index)) {
              math_keyword_error(keyword);
           }
        }
        
        break;
        
      default :
        fprintf(stderr,"Invalid return from get_token...\n");
        texit();
        
    }

  }
    
  if (!Stack_Empty) {
     eof_error();
     texit();
  }

  return(0);

}


texit ()
{
  fclose(fp);
  exit(1);
}


usage () 
{
  fprintf(stderr,"\nUnrecognized argument to texchk\n");
  fprintf(stderr,"Usage: texchk [ -v -c ] [ file1 file2 ... ]\n");
  exit(1);
}
  
  
main (argc,argv) int argc; char **argv;

{
  char **argptr;        
  int j,input_files = 0;
        
  init_legal_chars();
  
  /* process command line arguments */
  
  argptr = argv;        
  while (*++argptr != 0) {
    if (**argptr == '-') {
       if (strlen(*argptr) != 2) { 
          usage();
       }
       switch ((*argptr)[1]) {
         case 'v' :
           Verbose_Mode = T;
           break;
         case 'c' :
           Check_Mode = T;
           break;
         default :
           usage();
           break;
       }
       *argptr = '\0';
    }
    else input_files = 1;
  }
        
  /* read and process each file */
  
  if (!input_files) {
     printf("\n");
     fp = stdin;
     process_file();
     printf("\nOK!\n\n");
  }  
  else {
    for (j = 1; j < argc; j++) {
        if (argv[j] != '\0') {
           printf("\nChecking file %s.\n\n",argv[j]);
           new_file();
           fp = (FILE *) efopen(argv[j],"r");
           process_file();
           fclose(fp);
        }
    }
 }
  
 exit(0);
 
}


