/*
**	lite_lex.c	- 
**
**
** Copyright (c) 1995-96  Hughes Technologies
**
** Permission to use, copy, and distribute for non-commercial purposes,
** is hereby granted without fee, providing that the above copyright
** notice appear in all copies and that both the copyright notice and this
** permission notice appear in supporting documentation.
**
** This software is provided "as is" without any expressed or implied warranty.
**
**
*/


#include <stdio.h>
#include <ctype.h>
#include <sys/types.h>

#include "lite.h"
#include "../lang-common/y.tab.h"

#define	REG		register


/*
** Macros for handling the scanner's internal pointers
*/
#ifdef YYLINE_DEBUG
#define yyGet()		(*tokPtr++); yytoklen++;\
			if (*(tokPtr-1) == '\n') yylineno++;\
			printf("Get at line %d = '%c'\n",yylineno,*(tokPtr-1))
#define yyUnget()	tokPtr--; yytoklen--;\
			if (*tokPtr == '\n') yylineno--
#define yySkip()	(*tokPtr++); tokStart++;\
			if (*(tokPtr-1) == '\n') yylineno++;\
			origlineno=yylineno;\
			printf("Skip at line %d = '%c'\n",yylineno,*(tokPtr-1))
#define yyRevert()	{tokPtr=tokStart; yytoklen=0;yylineno=origlineno;\
			printf("Reverting ...\n");}
#define yyReturn(t)	{tokStart=tokPtr; origlineno=yylineno;return(t);}

#else
#define yyGet()		(*tokPtr++); yytoklen++;\
			if (*(tokPtr-1) == '\n') yylineno++
#define yyUnget()	tokPtr--; yytoklen--;\
			if (*tokPtr == '\n') yylineno--
#define yySkip()	(*tokPtr++); tokStart++;\
			if (*(tokPtr-1) == '\n') yylineno++;\
			origlineno=yylineno
#define yyRevert()	{tokPtr=tokStart; yytoklen=0;yylineno=origlineno;}
#define yyReturn(t)	{tokStart=tokPtr; origlineno=yylineno;return(t);}
#endif



/*
** Macros for matching character classes.  These are in addition to
** those provided in <ctypes.h>
*/
#ifdef	iswhite
# undef iswhite
#endif
#define iswhite(c)	(c == ' ' || c == '\t' || c == '\n')

#ifdef	iscompop
# undef iscompop
#endif
#define iscompop(c)	(c == '<'||c == '>'||c == '='||c == '!'||c == '~'||c == '&' || c=='|')


/*
** Debugging macros.
*/

/* #define DEBUG_STATE	/* Define this to watch the state transitions */

#ifdef LEX_DEBUG
#  define token(x)	(int) "x"
#else
#  define token(x)	x
#endif /* LEX_DEBUG */

#ifdef DEBUG_STATE
#  define CASE(x)	case x: if (x) printf("%c -> state %d  cond %d.%d\n",\
				c,x,condState,condDepth); \
				else printf("Scanner starting at state 0\n");
#else
#  define CASE(x)	case x:
#endif



u_char	*yytext 	= NULL;
u_int	yytoklen	= 0;
int	yylineno 	= 1;
static	u_char 		*tokPtr,
			*tokStart;
static	int		state = 2,
			condState = 0,
			condDepth = 0,
			origlineno;


#ifdef LEX_DEBUG
	YYSTYPE		yylval;
#else
	extern	YYSTYPE		yylval;
#endif



void lexInitScanner(buf)
	u_char	*buf;
{
	u_char *cp;

	cp = buf;
	if (*cp == '#' && *(cp+1) == '!')
	{
		while(*cp != '\n')
		{
			cp++;
		}
		origlineno = yylineno = 2;
		tokStart = cp+1;
	}
	else
	{
		origlineno = yylineno = 1;
		tokStart = buf;
	}
	state = 2;
}



static int checkKeyword(tok, key, len)
	char	*tok,
		*key;
	int	len;
{
	if (strlen(key) != len)
		return(-1);
	return(strncmp(tok,key,len));
}



static int findKeyword(tok,len)
	char	*tok;
	int	len;
{
	/*
	** Comparison ops
	*/
	if (checkKeyword(tok,">",len) == 0)
	{
		return(token(GT));
	}
	if (checkKeyword(tok,"<",len) == 0)
		return(token(LT));
	if (checkKeyword(tok,"==",len) == 0)
		return(token(EQ));
	if (checkKeyword(tok,"!=",len) == 0)
		return(token(NE));
	if (checkKeyword(tok,">=",len) == 0)
		return(token(GE));
	if (checkKeyword(tok,"<=",len) == 0)
		return(token(LE));
	if (checkKeyword(tok,"=~",len) == 0)
		return(token(RE));

	if (checkKeyword(tok,"&&",len) == 0)
		return(token(LOGICAL_AND));
	if (checkKeyword(tok,"||",len) == 0)
		return(token(LOGICAL_OR));

	/*
	** Language keywords
	*/
	if (checkKeyword(tok,"if",len) == 0)
	{
		condState++;
		return(token(IF));
	}
	if (checkKeyword(tok,"else",len) == 0)
		return(token(ELSE));
	if (checkKeyword(tok,"while",len) == 0)
	{
		condState++;
		return(token(WHILE));
	}
	if (checkKeyword(tok,"break",len) == 0)
		return(token(BREAK));
	if (checkKeyword(tok,"continue",len) == 0)
		return(token(CONTINUE));

	if (checkKeyword(tok,"load",len) == 0)
		return(token(LOAD));
	if (checkKeyword(tok,"modload",len) == 0)
		return(token(MOD_LOAD));
	if (checkKeyword(tok,"funct",len) == 0)
		return(token(FUNCT));
	if (checkKeyword(tok,"return",len) == 0)
		return(token(RETURN));
	/*
	** Nothing matched
	*/
	return(0);
}



static u_char *tokenDup(tok,len)
	u_char	*tok;
	int	len;
{
	u_char	*new;

	new = (u_char *)malloc(len+1);
	(void)bcopy(tok,new,len);
	*(new + len) = 0;
	return(new);
}


static u_char *readTextLiteral(tok)
	u_char	*tok;
{
	REG 	u_char c;
	int	bail;

	bail = 0;
	while(!bail)
	{
		c = yyGet();
		switch(c)
		{
			case 0:
				return(NULL);

			case '\\':
				c = yyGet();
				if (!c)
					return(NULL);
				break;
	
			case '"':
				bail=1;
				break;
		}
	}
	return(tokenDup(tok,yytoklen));
}


int yylex()
{
	REG	u_char	c;
	REG	u_char	t;
	int	tokval,
		signedVal = 0;
	static	u_char dummyBuf[2];
	static	int init;


	/*
	** Fudge it so that this looks like Lite code within and HTML
	** page (i.e. w3-msql stuff).  We do this so that we can share
	** the parser with w3-msql.  Keeping 2 copies of the parser in
	** synch would be a hastle and the added overhead of 2 calls
	** to yylex() aren't a problem.
	*/
	if (!init)
	{
		init=1;
		return(token(HTML));
	}

	/*
	** Handle the end of input.  We return an EOI token when we hit
	** the end and then return a 0 on the next call to yylex.  This
	** allows the parser to do the right thing with trailing garbage
	** in the expression.
	*/
	yytext = NULL;
	if (state == 1002)
	{
		return(0);
	}

	/*
	** Dive into the state machine
	*/
	origlineno = yylineno;
	while(1)
	{
		switch(state)
		{
			/* State 2 : Start of w3-mSQL code */
			CASE(2)
				tokPtr = tokStart;
				yytext = NULL;
				yytoklen = 0;
				c = yyGet();
				while (iswhite(c))
				{
					c = yySkip();
				}
				if (c == '"')
				{
					state = 14;
					break;
				}
				if (c == '$')
				{
					state = 20;
					break;
				}
				if (isalpha(c))
				{
					state = 3;
					break;
				}
				if (isdigit(c))
				{
					state = 7;
					break;
				}
				if (c == '/')
				{
					state =21;
					break;
				}
				if (c == '.')
				{
					t = yyGet();
					if ( isdigit(t) ) 
					{
						yyUnget();
						state = 9;
						break;
					} 
					else
						yyUnget();
				}
				if (c == '-' || c == '+')
				{
					state = 11;
					break;
				}
				if (iscompop(c))
				{
					state = 12;
					break;
				}
				if (c == 0)
				{
					state = 1000;
					break;
				}
				state = 999;
				break;

			/* State 3 : Incomplete keyword or ident */
			CASE(3)
				c = yyGet();
				if (isalpha(c))
				{
					state = 3;
					break;
				}
				if (isdigit(c) || c == '_')
				{
					state = 5;
					break;
				}
				state = 4;
				break;


			/* State 4 : Complete keyword or ident */
			CASE(4)
				yyUnget();
				tokval = findKeyword(tokStart,yytoklen);
				if (tokval)
				{
					if (tokval != TAG_CLOSE)
						state = 2;
					yyReturn(tokval);
				}
				else
				{
					yytext = tokenDup(tokStart,yytoklen);
					yylval = (YYSTYPE) yytext;
					state = 2;
					yyReturn(token(IDENT));
				}
				break;


			/* State 5 : Incomplete ident */
			CASE(5)
				c = yyGet();
				if (isalnum(c) || c == '_')
				{
					state = 5;
					break;
				}
				state = 6;
				break;


			/* State 6: Complete ident */
			CASE(6)
				yyUnget();
				yytext = tokenDup(tokStart,yytoklen);
				yylval = (YYSTYPE) yytext;
				state = 2;
				yyReturn(token(IDENT));


			/* State 7: Incomplete real or int number */
			CASE(7)
				c = yyGet();
				if (isdigit(c))
				{
					state = 7;
					break;
				}
				if (c == '.')
				{
					state = 9;
					break;
				}
				state = 8;
				break;


			/* State 8: Complete integer number */
			CASE(8)
				yyUnget();
				yytext = tokenDup(tokStart,yytoklen);
				yylval = (YYSTYPE) yytext;
				state = 2;
				if (signedVal)
				{
					yyReturn(token(SIGNED_NUM));
				}
				else
				{
					yyReturn(token(NUM));
				}
				break;


			/* State 9: Incomplete real number */
			CASE(9)
				c = yyGet();

                                if(c == 'e' || c == 'E')
                                {
                                        state = 17;
                                        break;
                                }
				if (isdigit(c))
				{
					state = 9;
					break;
				}
				state = 10;
				break;


			/* State 10: Complete real number */
			CASE(10)
				yyUnget();
				yytext = tokenDup(tokStart,yytoklen);
				yylval = (YYSTYPE) yytext;
				state = 2;
				if (signedVal)
				{
					yyReturn(token(SIGNED_REAL));
				}
				else
				{
					yyReturn(token(REAL));
				}


			/* State 11: Incomplete signed number */
			CASE(11)
				c = yyGet();
				signedVal = 1;
				if (isdigit(c))
				{
					state = 7;
					break;
				}
				if (c == '.')
				{
					state = 9;
					break;
				}
				signedVal = 0;
				state = 999;
				break;


			/* State 12: Incomplete comparison operator */
			CASE(12)
				c = yyGet();
				if (iscompop(c))
				{
					state = 12;
					break;
				}
				state = 13;
				break;


			/* State 13: Complete comparison operator */
			CASE(13)
				yyUnget();
				tokval = findKeyword(tokStart,yytoklen);
				if (tokval > 0)
				{
					if (state != 0)
						state = 2;
					yyReturn(tokval);
				}
				state = 999;
				break;

	
			/* State 14: Incomplete text string */
			CASE(14)
				yytext = readTextLiteral(tokStart);
				yylval = (YYSTYPE) yytext;
				if (yytext)
				{
					state = 15;
					break;
				}
				state = 999;
				break;



			/* State 15: Complete text string */
			CASE(15)
				state = 2;
				yyReturn(token(TEXT));
				break;



                        /* State 17 : Exponent Sign in Scientific Notation */
                        CASE(17)
                                c = yyGet();
                                if(c == '-' || c == '+')
                                {
                                      state = 18;
                                      break;
                                }
                                state = 999;
                                break;

                        /* State 18 : Exponent Value-first digit in Scientific 
			** Notation */
                        CASE(18)
                                c = yyGet();
                                if (isdigit(c))
                                {
                                        state = 19;
                                        break;
                                }
                                state = 999;  	/* if no digit, then token 
						** is unknown */
                                break;

                        /* State 19 : Exponent Value in Scientific Notation */
                        CASE(19)
                                c = yyGet();
                                if (isdigit(c))
                                {
                                        state = 19;
                                        break;
                                }
                                state = 10;    	/* At least 1 exponent 
						** digit was required */
                                break;

			/* State 20 : Incomplete variable */
			CASE(20)
				c = yyGet();
				if (isalpha(c) || isdigit(c) || c=='_')
				{
					break;
				}
				yyUnget();
				yytext = tokenDup(tokStart,yytoklen);
				yylval = (YYSTYPE) yytext;
				state = 2;
				yyReturn(token(VAR));
				break;


			/* State 21: Start of comment*/
			CASE(21)
				c = yyGet();
				if (c == '*')
				{
					state = 22;
					break;
				}
				state = 999;
				break;


			/* State 22: Inside comment*/
			CASE(22)
				while(1)
				{
					c = yySkip();
					if (c == 0)
					{
						yyerror("Bad Comment!");
					}
					if (c == '*')
					{
						c = yySkip();
						if (c == '/')
						{
							tokStart=tokPtr; 
							origlineno=yylineno;
							state=2;
							break;
						}
					}
				}
				break;

			/* State 999 : Unknown token.  Revert to single char */
			CASE(999)
				yyRevert();
				c = yyGet();
				if (c == '(')
				{
					if(condState)
						condDepth++;
				}
				if (c == ')')
				{
					if(condState)
					{
						condDepth--;
						if(condDepth == 0)
							condState--;
					}
				}
				*dummyBuf = c;
				*(dummyBuf+1) = 0;
				yytext = dummyBuf;
				yylval = (YYSTYPE) yytext;
				state = 2;
				yyReturn(token(yytext[0]));


			/* State 1000 : End Of Input 
			** We need to fudge a CLOSE_TAG token for the
			** parser too
			*/
			CASE(1000)
				state = 1001;
				yyReturn(token(TAG_CLOSE));
				break;

			CASE(1001)
				state = 1002;
				yyReturn(token(END_OF_INPUT));
				break;

		}
	}
}


#ifdef LEX_DEBUG

void yyerror(str)
	char	*str;
{
}

main()
{
	char	*p,
		tmpBuf[4 * 1024];

	(void)bzero(tmpBuf,sizeof(tmpBuf));
	read(fileno(stdin),tmpBuf,sizeof(tmpBuf));
	lexInitScanner(tmpBuf);
	while(p = (char *) yylex())
	{
		printf("Line %-3d : %-15.15s of length %u is \"%s\"\n", 
			yylineno, p, yytoklen,
			yytext?yytext:(u_char *)"(null)");
	}
}

#endif
