/*
 * Bawk C actions compiler
 */
#include <stdio.h>
#include "bawk.h"

EXPR_NODE *act_compile( actbuf )
register char	*actbuf;/* where tokenized actions are compiled into */
{
	DBUG_ENTER("act_compile");
	Where = ACTION;
	stmt_lex( actbuf );
	Actptr = actbuf;
	getoken();
	DBUG_RETURN(stmt_parse());
}

EXPR_NODE *pat_compile( actbuf )
register char	*actbuf;/* where tokenized actions are compiled into */
{
	DBUG_ENTER("pat_compile");
	Where = PATTERN;
	stmt_lex( actbuf );
	Actptr = actbuf;
	getoken();
	DBUG_RETURN(stmt_parse());
}

void stmt_lex( actbuf )
register char	*actbuf;/* where tokenized actions are compiled into */
{
	/*
	 * Read and tokenize C actions from current input file into the
	 * action buffer.  Strip out comments and whitespace in the
	 * process.
	 */
	register char *actptr,	/* actbuf pointer */
		*cp;		/* work pointer */
	char	buf[MAXLINELEN+1];/* string buffer */
	register int braces = 0,/* counts '{}' pairs - return when 0 */
		parens = 0,	/* counts '()' pairs */
		i,		/* temp */
		c,		/* current input character */
		finished = 0;

	DBUG_ENTER("stmt_lex");
	actptr = actbuf;
	while ( !finished && ((c = getcharacter()) != -1) )
	{
	    switch(c) {
		case ' ':
		case '\t':
		case '\n':
			/*
		 	* Skip over spaces, tabs and newlines
		 	*/
			break;
		case '#':
			/*
			 * Skip comments.  Comments start with a '#' and
			 * end at the next newline.
			 */
			while ( (c = getcharacter()) != -1 && c!='\n' )
				;
			break;
		case '{':
			if ( Where==PATTERN )
			{
				/*
				 * We're compiling a pattern. The '{' marks
				 * the beginning of an action statement.
				 * Push the character back and return.
				 */
				ungetcharacter( (char) '{' );
				finished = 1;
			}
			else
			{
				/*
				 * We must be compiling an action statement.
				 * '{'s mark beginning of action or compound
				 * statements.
				 */
				++braces;
				*actptr++ = T_LBRACE;
			}
			break;
		case '}':
			*actptr++ = T_RBRACE;
			finished = (! --braces );
			break;
		case '(':
			++parens;
			*actptr++ = T_LPAREN;
			break;
		case ')':
			if ( --parens < 0 )
				error( "mismatched '()'", ACT_ERROR );
			*actptr++ = T_RPAREN;
			break;
		case ',':
			if ( !braces && !parens )
			{
				/*
			 	* found a comma outside of any braces or 
				* parens - this must be a regular
				* expression seperator.
				*/
				ungetcharacter( (char) ',' );
				finished = 1;
			} else
				*actptr++ = T_COMMA;
			break;
		case '/':
			*actptr++ = T_DIV;
			break;
		case '@':
			*actptr++ = T_REGEXP;
			ungetcharacter( (char) c );
			actptr += re_compile( actptr );
			break;
		case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
		case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
		case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
		case 's': case 't': case 'u': case 'v': case 'w': case 'x':
		case 'y': case 'z':
		case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
		case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
		case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
		case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
		case 'Y': case 'Z':
		case '_':
			/*
			 * It's a symbol reference. Copy the symbol into
			 * string buffer.
			 */
			cp = buf;
			do
				*cp++ = c;
			while ( (c=getcharacter()) != -1 &&
				(isalnum( c ) || (c == '_')));
			ungetcharacter( (char) c );
			*cp = 0;
			/*
			 * Check if a keyword, builtin function or variable.
			 */
			if ( c = iskeyword( buf ) )
				*actptr++ = c;
			else if ( i = isfunction( buf ) )
			{
				*actptr++ = T_FUNCTION;
				storeint( actptr, i );
				actptr += sizeof( i );
			}
			else
			{
				/*
				 * It's a symbol name.
				 */
				*actptr++ = T_VARIABLE;
				if ( !(cp = (char *) findvar( buf )) )
					cp = (char *) addvar( buf );
				storeptr( actptr, cp );
				actptr += sizeof( cp );
			}
			break;
#ifdef QUOTE_STRING_HACK
		case '`':
#endif
		case '"':
			/*
			 * It's a string constant
			 */
			*actptr++ = T_STRING;
			actptr = str_compile( actptr, c );
			break;
		case '\'':
			/*
			 * It's a character constant
			 */
			*actptr++ = T_CONSTANT;
			str_compile( buf, (char) '\'' );
			storeint( actptr, *buf );
			actptr += sizeof( i );
			break;
		case '0': case '1': case '2': case '3': case '4': case '5':
		case '6': case '7': case '8': case '9':
			/*
			 * It's a numeric constant
			 */
			*actptr++ = T_CONSTANT;
			cp = buf;
			do
				*cp++ = c;
			while ( (c=getcharacter()) != -1 && isdigit(c) );
			ungetcharacter( (char) c );
			*cp = 0;
			storeint( actptr, atoi( buf ) );
			actptr += sizeof( i );
			break;
		case '$':
			*actptr++ = T_DOLLAR;
			break;
		case '=':
			if ( (c=getcharacter()) == '=' )
				*actptr++ = T_EQ;
			else
			{
				ungetcharacter( (char) c );
				*actptr++ = T_ASSIGN;
			}
			break;
		case '!':
			if ( (c=getcharacter()) == '=' )
				*actptr++ = T_NE;
			else
			{
				ungetcharacter( (char) c );
				*actptr++ = T_LNOT;
			}
			break;
		case '<':
			if ( (c=getcharacter()) == '<' )
				*actptr++ = T_SHL;
			else if ( c == '=' )
				*actptr++ = T_LE;
			else
			{
				ungetcharacter( (char) c );
				*actptr++ = T_LT;
			}
			break;
		case '>':
			if ( (c=getcharacter()) == '>' )
				*actptr++ = T_SHR;
			else if ( c == '=' )
				*actptr++ = T_GE;
			else
			{
				ungetcharacter( (char) c );
				*actptr++ = T_GT;
			}
			break;
		case '&':
			if ( (c=getcharacter()) == '&' )
				*actptr++ = T_LAND;
			else
			{
				ungetcharacter( (char) c );
				*actptr++ = T_AND;
			}
			break;
		case '|':
			if ( (c=getcharacter()) == '|' )
				*actptr++ = T_LOR;
			else
			{
				ungetcharacter( (char) c );
				*actptr++ = T_OR;
			}
			break;
		case '+':
			if ( (c=getcharacter()) == '+' )
				*actptr++ = T_INCR;
			else
			{
				ungetcharacter( (char) c );
				*actptr++ = T_ADD;
			}
			break;
		case '-':
			if ( (c=getcharacter()) == '-' )
				*actptr++ = T_DECR;
			else
			{
				ungetcharacter( (char) c );
				*actptr++ = T_SUB;
			}
			break;
		case '[':
			*actptr++ = T_LBRACKET;
			break;
		case ']':
			*actptr++ = T_RBRACKET;
			break;
		case ';':
			*actptr++ = T_SEMICOLON;
			break;
		case '*':
			*actptr++ = T_MUL;
			break;
		case '%':
			*actptr++ = T_MOD;
			break;
		case '^':
			*actptr++ = T_XOR;
			break;
		case '~':
			*actptr++ = T_NOT;
			break;
		default:
			/*
			 * Bad character in input line
			 */
			error( "lexical error", ACT_ERROR );
	    }
	    if ( actptr >= Workbuf + MAXWORKBUFLEN )
		error( "action too long", MEM_ERROR );
	}
	if ( braces || parens )
		error( "mismatched '{}' or '()'", ACT_ERROR );

	*actptr++ = T_EOF;

	DBUG_VOID_RETURN;
}

char *
str_compile( str, delim )
register char *str, delim;
{
	/*
	 * Compile a string from current input file into the given string
	 * buffer.  Stop when input character is the delimiter in "delim".
	 * Returns a pointer to the first character after the string.
	 */
	int tmpc;	/* can not be a register variable */
	register int c;
	register char buf[4];

	DBUG_ENTER("str_compile");
	while ( (c = getcharacter()) != -1 && c != delim)
	{
		if ( c  == '\\' )
		{
			switch ( c = getcharacter() )
			{
			case -1: goto err;
			case 'b': c = '\b'; break;
			case 'n': c = '\n'; break;
			case 't': c = '\t'; break;
			case 'f': c = '\f'; break;
			case 'r': c = '\r'; break;
			case '0':
			case '1':
			case '2':
			case '3':
				*buf = c;
				for ( c=1; c<3; c++ )
				{
					if ( (buf[c]=getcharacter()) == -1 )
						goto err;
				}
				buf[c] = 0;
				sscanf( buf, "%o", &tmpc );
				c = tmpc;
				break;
			case '\n':
				if ( getcharacter() == -1 )
					goto err;
			default:
				if ( (c = getcharacter()) == -1 )
					goto err;
			}
		}
		*str++ = c;
	}
	*str++ = 0;

	DBUG_RETURN(str);
err:
	sprintf( buf, "missing %c delimiter", delim );
	error( buf, 4 );
	DBUG_RETURN(NULL);
}

void storeint( ip, i )
char *ip;
int i;
{
	DBUG_ENTER("storeint");
	movmem((char *) &i, ip, sizeof(i));
	DBUG_VOID_RETURN;
}

void storeptr( pp, p )
char *pp, *p;
{
	DBUG_ENTER("storeptr");
	movmem((char *) &p, pp, sizeof(p));
	DBUG_VOID_RETURN;
}

int fetchint( ip )
register char *ip;
{
	int i;

	DBUG_ENTER("fetchint");
	movmem(ip, (char *) &i, sizeof(i));
	DBUG_RETURN(i);
}

char *
fetchptr( pp )
register char *pp;
{
	char *p;

	DBUG_ENTER("fetchptr");
	movmem(pp, (char *) &p, sizeof(p));
	DBUG_RETURN(p);
}

#ifndef DBUG_OFF
char *token_name[] = {
0,
"CHAR",
"BOL",
"EOL",
"ANY",
"CLASS",
"NCLASS",
"STAR",
"PLUS",
"MINUS",
"ALPHA",
"DIGIT",
"NALPHA",
"PUNCT",
"RANGE",
"ENDPAT",
"T_STRING",
"T_DOLLAR",
"T_REGEXP",
"T_REGEXP_ARG",
"T_CONSTANT",
"T_VARIABLE",
"T_FUNCTION",
"T_SEMICOLON",
"T_EOF",
"T_LBRACE",
"T_RBRACE",
"T_LPAREN",
"T_RPAREN",
"T_LBRACKET",
"T_RBRACKET",
"T_COMMA",
"T_ASSIGN",
"T_STAR",
"T_MUL",
"T_DIV",
"T_MOD",
"T_ADD",
"T_UMINUS",
"T_SUB",
"T_SHL",
"T_SHR",
"T_LT",
"T_LE",
"T_GT",
"T_GE",
"T_EQ",
"T_NE",
"T_NOT",
"T_ADDROF",
"T_AND",
"T_XOR",
"T_OR",
"T_LNOT",
"T_LAND",
"T_LOR",
"T_INCR",
"T_DECR",
"T_POSTINCR",
"T_POSTDECR",
"T_IF",
"T_ELSE",
"T_WHILE",
"T_BREAK",
"T_CHAR",
"T_INT",
"T_BEGIN",
"T_END",
"T_NF",
"T_NR",
"T_FS",
"T_RS",
"T_FILENAME",
"T_STATEMENT",
"T_DECLARE",
"T_ARRAY_DECLARE"
};
#endif

char getoken()
{
	register char *cp;
	register int i;

	DBUG_ENTER("getoken");
	switch ( Token = *Actptr++ )
	{
	case T_STRING:
	case T_REGEXP:
		Value.dptr = Actptr;
		Actptr += strlen( Actptr ) + 1;
		break;
	case T_VARIABLE:
		Value.dptr = fetchptr( Actptr );
		Actptr += sizeof( cp );
		break;
	case T_FUNCTION:
	case T_CONSTANT:
		Value.ival = fetchint( Actptr );
		Actptr += sizeof( i );
		break;
	case T_EOF:
		--Actptr;
	default:
		Value.dptr = 0;
	}

	DBUG_PRINT("getoken",
	   ("Token='%s' (%d), Value=%d",token_name[Token],Token,Value.ival));
	DBUG_RETURN(Token);
}
