/*
 * Bawk C actions compiler
 */
#include <stdio.h>
#include "bawk.h"

act_compile( actbuf )
char	*actbuf;	/* where tokenized actions are compiled into */
{
	Where = ACTION;
	return stmt_compile( actbuf );
}

pat_compile( actbuf )
char	*actbuf;	/* where tokenized actions are compiled into */
{
	Where = PATTERN;
	return stmt_compile( actbuf );
}

stmt_compile( actbuf )
char	*actbuf;	/* where tokenized actions are compiled into */
{
	/*
	 * Read and tokenize C actions from current input file into the
	 * action buffer.  Strip out comments and whitespace in the
	 * process.
	 */
	char	*actptr,	/* actbuf pointer */
		*cp,		/* work pointer */
		buf[MAXLINELEN];/* string buffer */
	int	braces,		/* counts '{}' pairs - return when 0 */
		parens,		/* counts '()' pairs */
		i,		/* temp */
		c;		/* current input character */

	braces = parens = 0;
	actptr = actbuf;
	while ( (c = getcharacter()) != -1 )
	{
		/*
		 * Skip over spaces, tabs and newlines
		 */
		if ( c==' ' || c=='\t' || c=='\n' )
			continue;
		if ( c=='#' )
		{
			/*
			 * Skip comments.  Comments start with a '#' and
			 * end at the next newline.
			 */
			while ( (c = getcharacter()) != -1 && c!='\n' )
				;
			continue;
		}

		if ( c=='{' )
		{
			if ( Where==PATTERN )
			{
				/*
				 * We're compiling a pattern. The '{' marks
				 * the beginning of an action statement.
				 * Push the character back and return.
				 */
				ungetcharacter( '{' );
				break;
			}
			else
			{
				/*
				 * We must be compiling an action statement.
				 * '{'s mark beginning of action or compound
				 * statements.
				 */
				++braces;
				*actptr++ = T_LBRACE;
			}
		}
		else if ( c=='}' )
		{
			*actptr++ = T_RBRACE;
			if ( ! --braces )
				/*
				 * Found the end of the action string
				 */
				break;
		}
		else if ( c=='(' )
		{
			++parens;
			*actptr++ = T_LPAREN;
		}
		else if ( c==')' )
		{
			if ( --parens < 0 )
				error( "mismatched '()'", ACT_ERROR );
			*actptr++ = T_RPAREN;
		}
		else if ( c==',' && !braces && !parens && Where==PATTERN )
		{
			/*
			 * found a comma outside of any braces or parens-
			 * this must be a regular expression seperator.
			 */
			ungetcharacter( ',' );
			break;
		}

		/*
		 * Check if it's a regular expression:
		 */
		else if ( c=='/' )
		{
			/*
			 * A '/' inside a pattern string starts a regular
			 * expression.  Inside action strings, a '/' is
			 * the division operator.
			 */
			if ( Where == PATTERN )
				goto dopattern;
			else
				*actptr++ = T_DIV;
		}
		else if ( c=='@' )
		{
dopattern:
			/*
			 * Within action strings, only the '@' may be used to
			 * delimit regular expressions
			 */
			*actptr++ = T_REGEXP;
			ungetcharacter( c );
			actptr += re_compile( actptr );
		}

		/*
		 * symbol, string or constant:
		 */
		else if ( alpha( c ) )
		{
			/*
			 * It's a symbol reference. Copy the symbol into
			 * string buffer.
			 */
			cp = buf;
			do
				*cp++ = c;
			while ( (c=getcharacter()) != -1 && alphanum( c ) );
			ungetcharacter( c );
			*cp = 0;
			/*
			 * Check if a keyword, builtin function or variable.
			 */
			if ( c = iskeyword( buf ) )
				*actptr++ = c;
			else if ( i = isfunction( buf ) )
			{
				*actptr++ = T_FUNCTION;
				storeint( actptr, i );
				actptr += sizeof( i );
			}
			else
			{
				/*
				 * It's a symbol name.
				 */
				*actptr++ = T_VARIABLE;
				if ( !(cp = findvar( buf )) )
					cp = addvar( buf );
				storeptr( actptr, cp );
				actptr += sizeof( cp );
			}
		}

		else if ( c == '"' )
		{
			/*
			 * It's a string constant
			 */
			*actptr++ = T_STRING;
			actptr = str_compile( actptr, '"' );
		}
		else if ( c == '\'' )
		{
			/*
			 * It's a character constant
			 */
			*actptr++ = T_CONSTANT;
			str_compile( buf, '\'' );
			storeint( actptr, *buf );
			actptr += sizeof( i );
		}

		else if ( num( c ) )
		{
			/*
			 * It's a numeric constant
			 */
			*actptr++ = T_CONSTANT;
			cp = buf;
			do
				*cp++ = c;
			while ( (c=getcharacter()) != -1 && num(c) );
			ungetcharacter( c );
			*cp = 0;
			storeint( actptr, atoi( buf ) );
			actptr += sizeof( i );
		}

		/*
		 * unary operator:
		 */
		else if ( c == '$' )
			*actptr++ = T_DOLLAR;

		/*
		 * or binary operator:
		 */
		else if ( c == '=' )
		{
			if ( (c=getcharacter()) == '=' )
				*actptr++ = T_EQ;
			else
			{
				ungetcharacter( c );
				*actptr++ = T_ASSIGN;
			}
		}

		else if ( c == '!' )
		{
			if ( (c=getcharacter()) == '=' )
				*actptr++ = T_NE;
			else
			{
				ungetcharacter( c );
				*actptr++ = T_LNOT;
			}
		}

		else if ( c == '<' )
		{
			if ( (c=getcharacter()) == '<' )
				*actptr++ = T_SHL;
			else if ( c == '=' )
				*actptr++ = T_LE;
			else
			{
				ungetcharacter( c );
				*actptr++ = T_LT;
			}
		}

		else if ( c == '>' )
		{
			if ( (c=getcharacter()) == '>' )
				*actptr++ = T_SHR;
			else if ( c == '=' )
				*actptr++ = T_GE;
			else
			{
				ungetcharacter( c );
				*actptr++ = T_GT;
			}
		}

		else if ( c == '&' )
		{
			if ( (c=getcharacter()) == '&' )
				*actptr++ = T_LAND;
			else
			{
				ungetcharacter( c );
				*actptr++ = T_AND;
			}
		}

		else if ( c == '|' )
		{
			if ( (c=getcharacter()) == '|' )
				*actptr++ = T_LIOR;
			else
			{
				ungetcharacter( c );
				*actptr++ = T_IOR;
			}
		}
		else if ( c == '+' )
		{
			if ( (c=getcharacter()) == '+' )
				*actptr++ = T_INCR;
			else
			{
				ungetcharacter( c );
				*actptr++ = T_ADD;
			}
		}

		else if ( c == '-' )
		{
			if ( (c=getcharacter()) == '-' )
				*actptr++ = T_DECR;
			else
			{
				ungetcharacter( c );
				*actptr++ = T_SUB;
			}
		}

		/*
		 * punctuation
		 */
		else if ( instr( c, "[](),;*/%+-^~" ) )
			*actptr++ = c;

		else
		{
			/*
			 * Bad character in input line
			 */
			error( "lexical error", ACT_ERROR );
		}

		if ( actptr >= Workbuf + MAXWORKBUFLEN )
			error( "action too long", MEM_ERROR );
	}
	if ( braces || parens )
		error( "mismatched '{}' or '()'", ACT_ERROR );

	*actptr++ = T_EOF;

	return actptr - actbuf;
}

char *
str_compile( str, delim )
char *str, delim;
{
	/*
	 * Compile a string from current input file into the given string
	 * buffer.  Stop when input character is the delimiter in "delim".
	 * Returns a pointer to the first character after the string.
	 */
	int c;
	char buf[ MAXLINELEN ];

	while ( (c = getcharacter()) != -1 && c != delim)
	{
		if ( c  == '\\' )
		{
			switch ( c = getcharacter() )
			{
			case -1: goto err;
			case 'b': c = '\b'; break;
			case 'n': c = '\n'; break;
			case 't': c = '\t'; break;
			case 'f': c = '\f'; break;
			case 'r': c = '\r'; break;
			case '0':
			case '1':
			case '2':
			case '3':
				*buf = c;
				for ( c=1; c<3; ++c )
				{
					if ( (buf[c]=getcharacter()) == -1 )
						goto err;
				}
				buf[c] = 0;
				sscanf( buf, "%o", &c );
				break;
			case '\n':
				if ( getcharacter() == -1 )
					goto err;
			default:
				if ( (c = getcharacter()) == -1 )
					goto err;
			}
		}
		*str++ = c;
	}
	*str++ = 0;

	return str;
err:
	sprintf( buf, "missing %c delimiter", delim );
	error( buf, 4 );
}

storeint( ip, i )
int *ip, i;
{
	return *ip = i;
}

storeptr( pp, p )
char **pp, *p;
{
	return *pp = p;
}

fetchint( ip )
int *ip;
{
	return *ip;
}

char *
fetchptr( pp )
char **pp;
{
	return *pp;
}

getoken()
{
	char *cp;
	int i;

	switch ( Token = *Actptr++ )
	{
	case T_STRING:
	case T_REGEXP:
		Value.dptr = Actptr;
		Actptr += strlen( Actptr ) + 1;
		break;
	case T_VARIABLE:
		Value.dptr = fetchptr( Actptr );
		Actptr += sizeof( cp );
		break;
	case T_FUNCTION:
	case T_CONSTANT:
		Value.ival = fetchint( Actptr );
		Actptr += sizeof( i );
		break;
	case T_EOF:
		--Actptr;
	default:
		Value.dptr = 0;
	}

#ifdef DEBUG
	if ( Debug > 1 )
		printf( "Token='%c' (0x%x), Value=%d\n",
			Token,Token,Value.ival );
#endif

	return Token;
}

