/*------------------------------------------------------------------
 * ctok : C language tokenizer
 *------------------------------------------------------------------
 * 10-01-91 Patrick J. Mueller
 *------------------------------------------------------------------*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

#include "ctok.h"

/*------------------------------------------------------------------
 * is a character a valid character in a C identifier
 *------------------------------------------------------------------*/
#define isCsymbol(c) (isalnum(c) || ('_' == c))

/*------------------------------------------------------------------
 * typedefs
 *------------------------------------------------------------------*/
typedef struct
   {
   int            eof;
   char          *buffer;
   long           bufferLen;
   long           bufferInd;
   long           fileOffs;
   long           line;
   int            unGetChar;
   int            unGetReady;
   long           tokOffs;
   long           tokLen;
   CTokRead       readFunc;
   void          *readInfo;
   char           ident[MAX_IDENT_LEN+1];
   } CTokInfo;

/*-/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\-*/
/*-\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/-*/

/*------------------------------------------------------------------
 * get next char from file
 *------------------------------------------------------------------*/
void GetNextChar(
   int         *c,
   CTokInfo    *cti
   )
   {
   cti->fileOffs++;

   /*---------------------------------------------------------------
    * check for end of file
    *---------------------------------------------------------------*/
   if (cti->eof)
      {
      *c = EOF;
      return;
      }

   /*---------------------------------------------------------------
    * check for a char in the unget holder
    *---------------------------------------------------------------*/
   if (cti->unGetReady)
      {
      cti->unGetReady = 0;
      *c = cti->unGetChar;

      if ('\n' == *c)
         cti->line++;
      return;
      }

   /*---------------------------------------------------------------
    * see if we need to read another buffer
    *---------------------------------------------------------------*/
   if (cti->bufferInd == cti->bufferLen)
      {
      cti->bufferLen = cti->readFunc(cti->readInfo,&(cti->buffer));
      cti->bufferInd = 0L;

      if (0L == cti->bufferLen)
         {
         *c = EOF;
         cti->eof = 1;
         return;
         }
      }

   /*---------------------------------------------------------------
    * read character from buffer
    *---------------------------------------------------------------*/
   *c = cti->buffer[cti->bufferInd++];

   if ('\n' == *c)
      cti->line++;

   return;
   }

/*------------------------------------------------------------------
 * put back last char from file
 *------------------------------------------------------------------*/
void UnGetNextChar(
   int          c,
   CTokInfo    *cti
   )
   {
   cti->fileOffs--;

   cti->unGetChar  = c;
   cti->unGetReady = 1;

   if ('\n' == c)
      cti->line--;
   }

/*-/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\-*/
/*-\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/-*/

/*------------------------------------------------------------------
 * read a C character constant or string
 *------------------------------------------------------------------*/
static void ReadString(
   CTokInfo    *cti,
   int          c
   )
   {
   int stop;

   /*---------------------------------------------------------------
    * the character passed in is ' or ", and it is the character that
    * signifies the end of the string
    *---------------------------------------------------------------*/
   stop = c;

   /*---------------------------------------------------------------
    * keep going until we hit our stop character
    *---------------------------------------------------------------*/
   GetNextChar(&c,cti);
   while (stop != c)
      {
      /*------------------------------------------------------------
       * for a \, inhale next character
       *------------------------------------------------------------*/
      if ('\\' == c)
         GetNextChar(&c,cti);

      /*------------------------------------------------------------
       * for EOF, break
       *------------------------------------------------------------*/
      if (EOF == c)
         break;

      GetNextChar(&c,cti);
      }

   return;
   }

/*------------------------------------------------------------------
 * read a C comment
 *------------------------------------------------------------------*/
static void ReadComment(
   CTokInfo    *cti
   )
   {
   int c;

   /*---------------------------------------------------------------
    * loop until end of file (or return in middle)
    *---------------------------------------------------------------*/
   GetNextChar(&c,cti);
   while (EOF != c)
      {

      /*------------------------------------------------------------
       * if not *, just get next character
       *------------------------------------------------------------*/
      if ('*' != c)
         GetNextChar(&c,cti);

      /*------------------------------------------------------------
       * got a * - see if next is /
       *------------------------------------------------------------*/
      else
         {
         /*---------------------------------------------------------
          * if next is /, return
          *---------------------------------------------------------*/
         GetNextChar(&c,cti);
         if ('/'  == c)
            return;
         }

      }

   return;
   }

/*------------------------------------------------------------------
 * read a C++ style comment
 *------------------------------------------------------------------*/
static void ReadCppComment(
   CTokInfo    *cti
   )
   {
   int c;

   /*---------------------------------------------------------------
    * loop until end of line or end of file
    *---------------------------------------------------------------*/
   GetNextChar(&c,cti);

   while ((EOF != c) && ('\n' != c))
      GetNextChar(&c,cti);

   UnGetNextChar(c,cti);
   return;
   }

/*------------------------------------------------------------------
 * read an identifier
 *------------------------------------------------------------------*/
static void ReadIdent(
   CTokInfo    *cti,
   int          c
   )
   {
   int identLen;

   /*---------------------------------------------------------------
    * initialize length and stick first char in
    *---------------------------------------------------------------*/
   identLen = 0;
   cti->ident[identLen++] = (char) c;

   /*---------------------------------------------------------------
    * while still a valid symbol character ...
    *---------------------------------------------------------------*/
   GetNextChar(&c,cti);
   while (isCsymbol(c))
      {
      /*------------------------------------------------------------
       * make sure we got enough room, then stick it in
       *------------------------------------------------------------*/
      if (identLen < MAX_IDENT_LEN)
         cti->ident[identLen++] = (char) c;

      GetNextChar(&c,cti);
      }

   /*---------------------------------------------------------------
    * finish up identifier, put last character back
    *---------------------------------------------------------------*/
   cti->ident[identLen] = '\0';
   UnGetNextChar(c,cti);
   }

/*------------------------------------------------------------------
 * read a number
 *------------------------------------------------------------------*/
static void ReadNumber(
   CTokInfo    *cti,
   int          c
   )
   {

   /*---------------------------------------------------------------
    * while still a valid number character ...
    *---------------------------------------------------------------*/
   GetNextChar(&c,cti);
   while (isalnum(c))
      GetNextChar(&c,cti);

   /*---------------------------------------------------------------
    * put last character back
    *---------------------------------------------------------------*/
   UnGetNextChar(c,cti);
   }

/*------------------------------------------------------------------
 * read a preprocessor statement
 *------------------------------------------------------------------*/
static void ReadPreprocessor(
   CTokInfo    *cti
   )
   {
   int c;

   /*---------------------------------------------------------------
    * loop until end of file (or return in middle)
    *---------------------------------------------------------------*/
   GetNextChar(&c,cti);
   while (EOF != c)
      {
      /*------------------------------------------------------------
       * if we found a newline, leave
       *------------------------------------------------------------*/
      if ('\n' == c)
         {
         UnGetNextChar(c,cti);
         return;
         }

      /*------------------------------------------------------------
       * if we got anything but a \, eat it
       *------------------------------------------------------------*/
      else if ('\\' != c)
         GetNextChar(&c,cti);

      /*------------------------------------------------------------
       * got a \ - see if next is \n
       *------------------------------------------------------------*/
      else
         {
         /*---------------------------------------------------------
          * if next isn't \n, start at top of loop
          *---------------------------------------------------------*/
         GetNextChar(&c,cti);

         /*---------------------------------------------------------
          * skip over white space first
          *---------------------------------------------------------*/
         while (isspace(c) && ('\n' != c))
            GetNextChar(&c,cti);

         if ('\n' != c)
            continue;

         /*---------------------------------------------------------
          * if it is a \n, read next char and continue
          *---------------------------------------------------------*/
         GetNextChar(&c,cti);
         continue;
         }

      }

   return;
   }

/*-/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\-*/
/*-\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/-*/

/*------------------------------------------------------------------
 * tokenizer
 *------------------------------------------------------------------*/

static int GetToken(
   CTokInfo    *cti
   )
   {
   int           c;
   int           type;
   unsigned long offsStart;

   /*---------------------------------------------------------------
    * read next character
    *---------------------------------------------------------------*/
   GetNextChar(&c,cti);

   /*---------------------------------------------------------------
    * skip white space
    *---------------------------------------------------------------*/
   while (isspace(c))
      GetNextChar(&c,cti);

   /*---------------------------------------------------------------
    * save starting offset
    *---------------------------------------------------------------*/
   offsStart = cti->fileOffs;

   /*---------------------------------------------------------------
    * empty identifier
    *---------------------------------------------------------------*/
   memset(cti->ident,'\0',sizeof(cti->ident));

   /*---------------------------------------------------------------
    * big switch on it's value
    *---------------------------------------------------------------*/
   switch(c)
      {
      /*------------------------------------------------------------
       * check for end of file
       *------------------------------------------------------------*/
      case EOF:
         type = TOKEN_EOF;
         break;

      /*------------------------------------------------------------
       * for pound sign, read preprocessor directive
       *------------------------------------------------------------*/
      case '#':
         ReadPreprocessor(cti);
         type = TOKEN_PREPROC;
         break;

      /*------------------------------------------------------------
       * single or double quote
       *------------------------------------------------------------*/
      case '\'':
      case '"':
         ReadString(cti,c);
         type = TOKEN_STRING;
         break;

      /*------------------------------------------------------------
       * start of comment?
       *------------------------------------------------------------*/
      case '/':
         /*---------------------------------------------------------
          * get next char - if *, read to end of comment
          *---------------------------------------------------------*/
         GetNextChar(&c,cti);
         if ('*' == c)
            {
            ReadComment(cti);
            type = TOKEN_COMMENT;
            }

         /*---------------------------------------------------------
          * see if it's a C++ style comment
          *---------------------------------------------------------*/
         else if ('/' == c)
            {
            ReadCppComment(cti);
            type = TOKEN_COMMENT;
            }

         /*---------------------------------------------------------
          * otherwise it's just a plain /
          *---------------------------------------------------------*/
         else
            {
            UnGetNextChar(c,cti);
            type = TOKEN_OPER;
            }

         break;

      /*------------------------------------------------------------
       * everything else - identifiers and punctuation
       *------------------------------------------------------------*/
      default:
         if (isCsymbol(c) && !isdigit(c))
            {
            ReadIdent(cti,c);
            type = TOKEN_IDENT;
            }

         else if (isdigit(c))
            {
            ReadNumber(cti,c);
            type = TOKEN_NUMBER;
            }

         /*---------------------------------------------------------
          * anything else
          *---------------------------------------------------------*/
         else
            {
            type = TOKEN_OPER;
            cti->ident[0] = (char) c;
            }

         break;
      }

   cti->tokOffs = offsStart;
   cti->tokLen  = cti->fileOffs - offsStart + 1;
   return(type);
   }

/*-/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\-*/
/*-\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/-*/


/*------------------------------------------------------------------
 * Initializer
 *------------------------------------------------------------------*/
void *CTokInit(
   CTokRead  readFunc,
   void     *readInfo
   )
   {
   CTokInfo *cti;

   /*---------------------------------------------------------------
    * allocate space for structure
    *---------------------------------------------------------------*/
   cti = malloc(sizeof(CTokInfo));
   if (NULL == cti)
      return NULL;

   /*---------------------------------------------------------------
    * initialize structure
    *---------------------------------------------------------------*/
   cti->eof         = 0;
   cti->buffer      = NULL;
   cti->bufferLen   = 0L;
   cti->bufferInd   = 0L;
   cti->fileOffs    = -1L;
   cti->line        = 1;
   cti->unGetChar   = '\0';
   cti->unGetReady  = 0;
   cti->tokOffs     = 0L;
   cti->tokLen      = 0L;
   cti->readFunc    = readFunc;
   cti->readInfo    = readInfo;
   memset(cti->ident,'\0',sizeof(cti->ident));

   return cti;
   }

/*-/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\-*/
/*-\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/-*/

/*------------------------------------------------------------------
 * Terminator
 *------------------------------------------------------------------*/
void CTokTerm(
   void *handle
   )
   {
   free(handle);
   }

/*-/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\-*/
/*-\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/-*/

/*------------------------------------------------------------------
 * Tokenizer
 *------------------------------------------------------------------*/
void CTokGet(
   void     *handle,
   Token    *token
   )
   {
   CTokInfo *cti;

   cti = handle;

   token->type  = GetToken(cti);
   token->offs  = cti->tokOffs;
   token->len   = cti->tokLen;
   token->ident = cti->ident;
   token->line  = cti->line;
   }
