/*
**	lexer.c	- 
**
**
** Copyright (c) 1995-96  Hughes Technologies
**
** Permission to use, copy, and distribute for non-commercial purposes,
** is hereby granted without fee, providing that the above copyright
** notice appear in all copies and that both the copyright notice and this
** permission notice appear in supporting documentation.
**
** This software is provided "as is" without any expressed or implied warranty.
**
**
*/


#include <stdio.h>
#include <ctype.h>
#include <sys/types.h>

#include <common/portability.h>
#include "lite.h"
#include "y.tab.h"

#define	REG		register


/*
** Macros for handling the scanner's internal pointers
*/
#ifdef YYLINE_DEBUG
#define yyGet()         (*tokPtr++); yytoklen++;\
                        if (*(tokPtr-1) == '\n') yylineno++;\
                        printf("Get at line %d = '%c'\n",yylineno,*(tokPtr-1))
#define yyUnget()       tokPtr--; yytoklen--;\
                        if (*tokPtr == '\n') yylineno--
#define yySkip()        (*tokPtr++); tokStart++;\
                        if (*(tokPtr-1) == '\n') yylineno++;\
                        origlineno=yylineno;\
                        printf("Skip at line %d = '%c'\n",yylineno,*(tokPtr-1))
#define yyRevert()      {tokPtr=tokStart; yytoklen=0;yylineno=origlineno;\
                        printf("Reverting ...\n");}
#define yyReturn(t)     {tokStart=tokPtr; origlineno=yylineno;return(t);}

#else
#define yyGet()         (*tokPtr++); yytoklen++;\
                        if (*(tokPtr-1) == '\n') yylineno++
#define yyUnget()       tokPtr--; yytoklen--;\
                        if (*tokPtr == '\n') yylineno--
#define yySkip()        (*tokPtr++); tokStart++;\
                        if (*(tokPtr-1) == '\n') yylineno++;\
                        origlineno=yylineno
#define yyRevert()      {tokPtr=tokStart; yytoklen=0;yylineno=origlineno;}
#define yyReturn(t)     {tokStart=tokPtr; origlineno=yylineno;return(t);}
#endif



/*
** Macros for matching character classes.  These are in addition to
** those provided in <ctypes.h>
*/
#ifdef	iswhite
# undef iswhite
#endif
#define iswhite(c)	(c==' ' || c=='\t' || c=='\n' || c=='\r' || c == '')

#ifdef	iscompop
# undef iscompop
#endif
#define iscompop(c)	(c == '<'||c == '>'||c == '='||c == '!'||c == '~' ||c=='&'||c=='|')


/*
** Debugging macros.
*/

/* #define DEBUG_STATE	/* Define this to watch the state transitions */

#ifdef LEX_DEBUG
#  define token(x)	(int) "x"
#else
#  define token(x)	x
#endif /* LEX_DEBUG */

#ifdef DEBUG_STATE
#  define CASE(x)	case x: if (x) printf("%c -> state %d  cond %d.%d\n",\
				c,x,condState,condDepth); \
				else printf("Scanner starting at state 0\n");
#else
#  define CASE(x)	case x:
#endif



u_char	*yytext 	= NULL;
u_int	yytoklen	= 0;
int	yylineno 	= 1;
static	u_char 		*tokPtr,
			*tokStart;
static	int		state = 0,
			condState = 0,
			condDepth = 0,
			origlineno;


#ifdef LEX_DEBUG
	YYSTYPE		yylval;
#else
	extern	YYSTYPE		yylval;
#endif



void lexInitScanner(buf)
	u_char	*buf;
{
	tokStart = buf;
	state = 0;
	origlineno = yylineno = 1;
}



static int checkKeyword(tok, key, len)
	char	*tok,
		*key;
	int	len;
{
	if (strlen(key) != len)
		return(-1);
	return(strncmp(tok,key,len));
}



static int findKeyword(tok,len)
	char	*tok;
	int	len;
{
	/*
	** Comparison ops
	*/
	if (checkKeyword(tok,">",len) == 0)
	{
		if (condState)
		{
			return(token(GT));
		}
		else
		{
			state = 0;
			return(token(TAG_CLOSE));
		}
	}
	if (checkKeyword(tok,"<",len) == 0)
		return(token(LT));
	if (checkKeyword(tok,"==",len) == 0)
		return(token(EQ));
	if (checkKeyword(tok,"!=",len) == 0)
		return(token(NE));
	if (checkKeyword(tok,">=",len) == 0)
		return(token(GE));
	if (checkKeyword(tok,"<=",len) == 0)
		return(token(LE));
	if (checkKeyword(tok,"=~",len) == 0)
		return(token(RE));

	if (checkKeyword(tok,"&&",len) == 0)
		return(token(LOGICAL_AND));
	if (checkKeyword(tok,"||",len) == 0)
		return(token(LOGICAL_OR));

	/*
	** Language keywords
	*/
	if (checkKeyword(tok,"if",len) == 0)
	{
		condState++;
		return(token(IF));
	}
	if (checkKeyword(tok,"else",len) == 0)
		return(token(ELSE));
	if (checkKeyword(tok,"while",len) == 0)
	{
		condState++;
		return(token(WHILE));
	}
	if (checkKeyword(tok,"break",len) == 0)
		return(token(BREAK));
	if (checkKeyword(tok,"continue",len) == 0)
		return(token(CONTINUE));

        if (checkKeyword(tok,"load",len) == 0)
                return(token(LOAD));
        if (checkKeyword(tok,"modload",len) == 0)
                return(token(MOD_LOAD));
        if (checkKeyword(tok,"funct",len) == 0)
                return(token(FUNCT));
        if (checkKeyword(tok,"return",len) == 0)
                return(token(RETURN));


	/*
	** Nothing matched
	*/
	return(0);
}



static u_char *tokenDup(tok,len)
	u_char	*tok;
	int	len;
{
	u_char	*new;

	new = (u_char *)malloc(len+1);
	(void)bcopy(tok,new,len);
	*(new + len) = 0;
	return(new);
}


static u_char *readTextLiteral(tok)
	u_char	*tok;
{
	REG 	u_char c;
	int	bail;

	bail = 0;
	while(!bail)
	{
		c = yyGet();
		switch(c)
		{
			case 0:
				return(NULL);

			case '\\':
				c = yyGet();
				if (!c)
					return(NULL);
				break;
	
			case '"':
				bail=1;
				break;
		}
	}
	return(tokenDup(tok,yytoklen));
}


int yylex()
{
	REG	u_char	c;
	REG	u_char	t;
	int	tokval;
	static	u_char dummyBuf[2];


	/*
	** Handle the end of input.  We return an EOI token when we hit
	** the end and then return a 0 on the next call to yylex.  This
	** allows the parser to do the right thing with trailing garbage
	** in the expression.
	*/
	yytext = NULL;
	if (state == 1000)
	{
		return(0);
	}

	/*
	** Dive into the state machine
	*/
	origlineno = yylineno;
	while(1)
	{
		switch(state)
		{
			/* State 0 : Start of token parse */
			CASE(0)
				tokPtr = tokStart;
				yytext = NULL;
				yytoklen = 0;
				state = 1;
				break;

			/* State 1 : Normal HTML */
			CASE(1)
				c = yyGet();
				if (c == '<')
				{
					c = yyGet();
					if (c == '!')
					{
						state=2;
						yyUnget();
						yyUnget();
						if (yytoklen > 0)
						{
						    yytext=tokenDup(tokStart,
							yytoklen);
						    yylval = (YYSTYPE) yytext;
						    yySkip();
						    yySkip();
						    yyReturn(token(HTML));
						}
						yySkip();
						yySkip();
						break;
					}
				}
				if (c == 0)
				{
					state=1001;
					yytext = tokenDup(tokStart,
						yytoklen);
					yylval = (YYSTYPE) yytext;
					yyReturn(token(HTML));
				}
				break;
				
			/* State 2 : Start of w3-mSQL code */
			CASE(2)
				tokPtr = tokStart;
				yytext = NULL;
				yytoklen = 0;
				c = yyGet();
				while (iswhite(c))
				{
					c = yySkip();
				}
				if (c == '"')
				{
					state = 14;
					break;
				}
				if (c == '$')
				{
					state = 20;
					break;
				}
				if (isalpha(c))
				{
					state = 3;
					break;
				}
				if (isdigit(c))
				{
					state = 7;
					break;
				}
				if (c == '/')
				{
					state =21;
					break;
				}
				if (c == '.')
				{
					t = yyGet();
					if ( isdigit(t) ) 
					{
						yyUnget();
						state = 9;
						break;
					} 
					else
						yyUnget();
				}
				if (c == '-' || c == '+')
				{
					state = 11;
					break;
				}
				if (iscompop(c))
				{
					state = 12;
					break;
				}
				if (c == 0)
				{
					state = 1000;
					break;
				}
				state = 999;
				break;

			/* State 3 : Incomplete keyword or ident */
			CASE(3)
				c = yyGet();
				if (isalpha(c))
				{
					state = 3;
					break;
				}
				if (isdigit(c) || c == '_')
				{
					state = 5;
					break;
				}
				state = 4;
				break;


			/* State 4 : Complete keyword or ident */
			CASE(4)
				yyUnget();
				tokval = findKeyword(tokStart,yytoklen);
				if (tokval)
				{
					if (tokval != TAG_CLOSE)
						state = 2;
					yyReturn(tokval);
				}
				else
				{
					yytext = tokenDup(tokStart,yytoklen);
					yylval = (YYSTYPE) yytext;
					state = 2;
					yyReturn(token(IDENT));
				}
				break;


			/* State 5 : Incomplete ident */
			CASE(5)
				c = yyGet();
				if (isalnum(c) || c == '_')
				{
					state = 5;
					break;
				}
				state = 6;
				break;


			/* State 6: Complete ident */
			CASE(6)
				yyUnget();
				yytext = tokenDup(tokStart,yytoklen);
				yylval = (YYSTYPE) yytext;
				state = 2;
				yyReturn(token(IDENT));


			/* State 7: Incomplete real or int number */
			CASE(7)
				c = yyGet();
				if (isdigit(c))
				{
					state = 7;
					break;
				}
				if (c == '.')
				{
					state = 9;
					break;
				}
				state = 8;
				break;


			/* State 8: Complete integer number */
			CASE(8)
				yyUnget();
				yytext = tokenDup(tokStart,yytoklen);
				yylval = (YYSTYPE) yytext;
				state = 2;
				yyReturn(token(NUM));
				break;


			/* State 9: Incomplete real number */
			CASE(9)
				c = yyGet();

                                if(c == 'e' || c == 'E')
                                {
                                        state = 17;
                                        break;
                                }
				if (isdigit(c))
				{
					state = 9;
					break;
				}
				state = 10;
				break;


			/* State 10: Complete real number */
			CASE(10)
				yyUnget();
				yytext = tokenDup(tokStart,yytoklen);
				yylval = (YYSTYPE) yytext;
				state = 2;
				yyReturn(token(REAL));


			/* State 11: Incomplete signed number */
			CASE(11)
				c = yyGet();
				if (isdigit(c))
				{
					state = 7;
					break;
				}
				if (c == '.')
				{
					state = 9;
					break;
				}
				state = 999;
				break;


			/* State 12: Incomplete comparison operator */
			CASE(12)
				c = yyGet();
				if (iscompop(c))
				{
					state = 12;
					break;
				}
				state = 13;
				break;


			/* State 13: Complete comparison operator */
			CASE(13)
				yyUnget();
				tokval = findKeyword(tokStart,yytoklen);
				if (tokval > 0)
				{
					if (state != 0)
						state = 2;
					yyReturn(tokval);
				}
				state = 999;
				break;

	
			/* State 14: Incomplete text string */
			CASE(14)
				yytext = readTextLiteral(tokStart);
				yylval = (YYSTYPE) yytext;
				if (yytext)
				{
					state = 15;
					break;
				}
				state = 999;
				break;



			/* State 15: Complete text string */
			CASE(15)
				state = 2;
				yyReturn(token(TEXT));
				break;



                        /* State 17 : Exponent Sign in Scientific Notation */
                        CASE(17)
                                c = yyGet();
                                if(c == '-' || c == '+')
                                {
                                      state = 18;
                                      break;
                                }
                                state = 999;
                                break;

                        /* State 18 : Exponent Value-first digit in Scientific 
			** Notation */
                        CASE(18)
                                c = yyGet();
                                if (isdigit(c))
                                {
                                        state = 19;
                                        break;
                                }
                                state = 999;  	/* if no digit, then token 
						** is unknown */
                                break;

                        /* State 19 : Exponent Value in Scientific Notation */
                        CASE(19)
                                c = yyGet();
                                if (isdigit(c))
                                {
                                        state = 19;
                                        break;
                                }
                                state = 10;    	/* At least 1 exponent 
						** digit was required */
                                break;

			/* State 20 : Incomplete variable */
			CASE(20)
				c = yyGet();
				if (isalpha(c) || isdigit(c) || c=='_')
				{
					break;
				}
				yyUnget();
				yytext = tokenDup(tokStart,yytoklen);
				yylval = (YYSTYPE) yytext;
				state = 2;
				yyReturn(token(VAR));
				break;


			/* State 21: Start of comment*/
			CASE(21)
				c = yyGet();
				if (c == '*')
				{
					state = 22;
					break;
				}
				state = 999;
				break;


			/* State 22: Inside comment*/
			CASE(22)
				while(1)
				{
					c = yySkip();
					if (c == 0)
					{
						yyerror("Bad Comment!");
					}
					if (c == '*')
					{
						c = yySkip();
						if (c == '/')
						{
							tokStart=tokPtr; 
							origlineno=yylineno;
							state=2;
							break;
						}
					}
				}
				break;

			/* State 999 : Unknown token.  Revert to single char */
			CASE(999)
				yyRevert();
				c = yyGet();
				if (c == '(')
				{
					if(condState)
						condDepth++;
				}
				if (c == ')')
				{
					if(condState)
					{
						condDepth--;
						if(condDepth == 0)
							condState--;
					}
				}
				*dummyBuf = c;
				*(dummyBuf+1) = 0;
				yytext = dummyBuf;
				yylval = (YYSTYPE) yytext;
				state = 2;
				yyReturn(token(yytext[0]));


			/* State 1000 : End Of Input 
			**
			** We need both of these as there a 2 possible
			** valid locations for the EOI.  One can bail
			** out and one must return a token before
			** bailing.
			*/
			CASE(1000)
				yyReturn(token(END_OF_INPUT));
				break;

			CASE(1001)
				state = 1000;
                                tokPtr = tokStart;
                                yytext = NULL;
                                yytoklen = 0;
				yyReturn(token(END_OF_INPUT));
				break;

		}
	}
}


#ifdef LEX_DEBUG

void yyerror(str)
	char	*str;
{
}


main()
{
	char	*p,
		tmpBuf[4 * 1024];

	(void)bzero(tmpBuf,sizeof(tmpBuf));
	read(fileno(stdin),tmpBuf,sizeof(tmpBuf));
	lexInitScanner(tmpBuf);
	while(p = (char *) yylex())
	{
                printf("Line %-3d : %-15.15s of length %u is \"%s\"\n",
                        yylineno, p, yytoklen,
                        yytext?yytext:(u_char *)"(null)");

	}
}

#endif
