/*
 * Bawk regular expression compiler/interpreter
 */
#include <stdio.h>
#include "bawk.h"
 
int re_compile( patbuf )
char	*patbuf;		/* where to put compiled pattern */
{
	/*
	 * Compile a regular expression from current input file
	 * into the given pattern buffer.
	 */
	register int c,		/* Current character         */
		o;		/* Temp                      */
        register char delim,	/* pattern delimiter         */
		*patptr,	/* destination string pntr   */
		*lp,		/* Last pattern pointer      */
		*spp;		/* Save beginning of pattern */
	char *cclass();		/* Compile class routine     */
 
	DBUG_ENTER("re_compile");
	lp = patptr = patbuf;
	delim = getcharacter();

	while ( (c = getcharacter()) != -1 && c != delim )
	{
		/*
		 * STAR, PLUS and MINUS are special.
		 */
		if (c == '*' || c == '+' || c == '-') {
			if (patptr == patbuf ||
				  (o=patptr[-1]) == BOL ||
				  o == EOL ||
				  o == STAR ||
				  o == PLUS ||
				  o == MINUS)
				error( "illegal occurrance op", RE_ERROR );
			*patptr++ = ENDPAT;
			*patptr++ = ENDPAT;
			spp = patptr;		/* Save pattern end     */
			while (--patptr > lp)	/* Move pattern down... */
				*patptr = patptr[-1];	/* one byte     */
			*patptr =   (c == '*') ? STAR :
				(c == '-') ? MINUS : PLUS;
			patptr = spp;		/* Restore pattern end  */
			continue;
		}
		/*
		 * All the rest.
		 */
		lp = patptr;			/* Remember start       */
		switch(c) {
 
		case '^':
			*patptr++ = BOL;
			break;
 
		case '$':
			*patptr++ = EOL;
			break;
 
		case '.':
			*patptr++ = ANY;
			break;
 
		case '[':
			patptr = cclass( patptr );
			break;
 
		case ':':
			if ( (c=getcharacter()) != -1 )
			{
				switch( tolower( c ) )
				{
 
				case 'a':
					*patptr++ = ALPHA;
					break;
 
				case 'd':
					*patptr++ = DIGIT;
					break;
 
				case 'n':
					*patptr++ = NALPHA;
					break;
 
				case ' ':
					*patptr++ = PUNCT;
					break;
 
				default:
					error( "unknown ':' type", RE_ERROR );
 
				}
			}
			else
				error( "no ':' type", RE_ERROR );
 			break;

		case '\\':
			c = getcharacter();
 
		default:
			*patptr++ = CHAR;
			*patptr++ = c;
		}
	}
	*patptr++ = ENDPAT;
	*patptr++ = 0;			/* Terminate string     */

	DBUG_EXECUTE("re_match",re_debug(patbuf,patptr););
	DBUG_RETURN(patptr - patbuf);
}

#ifndef DBUG_OFF
re_debug(patbuf, patptr)
register char *patbuf, *patptr;
{
	register char *lp;

	for ( lp=patbuf; lp<patptr; ++lp )
	{
		switch ( *lp )
		{
		case CHAR:	DBUG_PRINT("re_match",("char ")); break;
		case BOL:	DBUG_PRINT("re_match",("bol ")); break;
		case EOL:	DBUG_PRINT("re_match",("eol ")); break;
		case ANY:	DBUG_PRINT("re_match",("any ")); break;
		case CLASS:	DBUG_PRINT("re_match",("class(%d) ",*++lp)); break;
		case NCLASS:	DBUG_PRINT("re_match",("notclass(%d) ",*++lp)); break;
		case STAR:	DBUG_PRINT("re_match",("star ")); break;
		case PLUS:	DBUG_PRINT("re_match",("plus ")); break;
		case MINUS:	DBUG_PRINT("re_match",("minus ")); break;
		case ALPHA:	DBUG_PRINT("re_match",("alpha ")); break;
		case DIGIT:	DBUG_PRINT("re_match",("digit ")); break;
		case NALPHA:	DBUG_PRINT("re_match",("notalpha ")); break;
		case PUNCT:	DBUG_PRINT("re_match",("punct ")); break;
		case RANGE:	DBUG_PRINT("re_match",("range ")); break;
		case ENDPAT:	DBUG_PRINT("re_match",("endpat ")); break;
		default:	DBUG_PRINT("re_match",("<%c> ", *lp)); break;
		}
	}
}
#endif

char *
cclass( patbuf )
register char	*patbuf;	/* destination pattern buffer */
{
	/*
	 * Compile a class (within [])
	 */
	register char *patptr,	/* destination pattern pointer */
		*cp;		/* Pattern start     */
	register int c,		/* Current character */
		o;		/* Temp              */

	DBUG_ENTER("cclass");
	patptr = patbuf;

	if ( (c = getcharacter()) == -1 )
		error( "class terminates badly", RE_ERROR );
	else if ( c == '^')
	{
		/*
		 * Class exclusion, for example: [^abc]
		 * Swallow the "^" and set token type to class exclusion.
		 */
		o = NCLASS;
	}
	else
	{
		/*
		 * Normal class, for example: [abc]
		 * push back the character and set token type to class
		 */
		ungetcharacter( (char) c );
		o = CLASS;
	}
	*patptr++ = o;

	cp = patptr;	/* remember where byte count is */
	*patptr++ = 0;	/* and initialize byte count */
	while ( (c = getcharacter()) != -1 && c!=']' )
	{
		o = getcharacter();		/* peek at next char */
		if (c == '\\')			/* Store quoted chars */
		{
			if ( o == -1) /* Gotta get something */
				error( "class terminates badly", RE_ERROR );
			*patptr++ = o;
		}
		else if ( c=='-' && (patptr-cp)>1 && o!=']' && o != -1 )
		{
			c = patptr[-1];		/* Range start     */
			patptr[-1] = RANGE;	/* Range signal    */
			*patptr++ = c;		/* Re-store start  */
			*patptr++ = o;		/* Store end char  */
		}
		else
		{
			*patptr++ = c;		/* Store normal char */
			ungetcharacter( (char) o );
		}
	}
	if (c != ']')
		error( "unterminated class", RE_ERROR );
	if ( (c = (patptr - cp)) >= 256 )
		error( "class too large", RE_ERROR );
	if ( c == 0 )
		error( "empty class", RE_ERROR );
	*cp = c;		/* fill in byte count */

	DBUG_RETURN(patptr);
}
 
int match( line, pattern )
char	*line;		/* line to match */
register char *pattern;	/* pattern to match */
{
	/*
	 * Match the current line (in Linebuf[]), return 1 if it does.
	 */
	register char *l;	/* Line pointer       */
	char	*pmatch();
	register char *next;
	register int	matches;
 
	DBUG_ENTER("match");
	matches = 0;
	for (l = line; *l; l++)
	{
		if ( next = pmatch(line, l, pattern) )
		{
			l = next - 1;
			++matches;
			DBUG_PRINT("match",("match found"));
		}
	}

	DBUG_RETURN(matches);
}
 
char *
pmatch(linestart, line, pattern)
char	*linestart;	 /* start of line to match */
char	*line;		 /* (partial) line to match      */
char	*pattern;	 /* (partial) pattern to match   */
{
	register char *l;/* Current line pointer         */
	register char *p;/* Current pattern pointer      */
	register char c; /* Current character            */
	register char *e;/* End for STAR and PLUS match  */
	register int op; /* Pattern operation            */
	register int n;	 /* Class counter                */
	char	*are;	 /* Start of STAR match          */
 
	DBUG_ENTER("pmatch");
	l = line;

	DBUG_PRINT("pmatch",("line: (%s)", line));

	p = pattern;
	while ((op = *p++) != ENDPAT) {

	DBUG_PRINT("pmatch",("byte[%d] = 0%o, '%c', op = 0%o",l-line, *l, *l, op));

		switch(op) {
 
		case CHAR:
			if ( *l++ != *p++)
				DBUG_RETURN(0);
			break;
 
		case BOL:
			if (l != linestart)
				DBUG_RETURN(0);
			break;
 
		case EOL:
			if (*l != '\0')
				DBUG_RETURN(0);
			break;
 
		case ANY:
			if (*l++ == '\0')
				DBUG_RETURN(0);
			break;
 
		case DIGIT:
			if ((c = *l++) < '0' || (c > '9'))
				DBUG_RETURN(0);
			break;
 
		case ALPHA:
			c = *l++;
			c = tolower( c );
			if (c < 'a' || c > 'z')
				DBUG_RETURN(0);
			break;
 
		case NALPHA:
			c = *l++;
			c = tolower( c );
			if (c >= 'a' && c <= 'z')
				break;
			else if (c < '0' || c > '9')
				DBUG_RETURN(0);
			break;
 
		case PUNCT:
			c = *l++;
			if (c == 0 || c > ' ')
				DBUG_RETURN(0);
			break;
 
		case CLASS:
		case NCLASS:
			c = *l++;
			n = *p++ & 0377;
			do {
				if (*p == RANGE) {
					p += 3;
					n -= 2;
					if (c >= p[-2] && c <= p[-1])
						break;
				}
				else if (c == *p++)
					break;
			} while (--n > 1);
			if ((op == CLASS) == (n <= 1))
				DBUG_RETURN(0);
			if (op == CLASS)
				p += n - 2;
			break;
 
		case MINUS:
			e = pmatch(linestart,l,p);/* Look for a match    */
			while (*p++ != ENDPAT);	/* Skip over pattern   */
			if (e)			/* Got a match?        */
				l = e;		/* Yes, update string  */
			break;			/* Always succeeds     */
 
		case PLUS:			/* One or more ...     */
			if ((l = pmatch(linestart,l,p)) == 0)
				DBUG_RETURN(0);	/* Gotta have a match  */
		case STAR:			/* Zero or more ...    */
			are = l;		/* Remember line start */
			while (*l && (e = pmatch(linestart,l,p)))
				l = e;		/* Get longest match   */
			while (*p++ != ENDPAT);	/* Skip over pattern   */
			while (l >= are) {	/* Try to match rest   */
				if (e = pmatch(linestart,l,p))
					DBUG_RETURN(e);
				--l;		/* Nope, try earlier   */
			}
			DBUG_RETURN(0);		/* Nothing else worked */
 
		default:
			fprintf( stderr, "bad op code %d\n", op );
			error( "can't happen -- match", RE_ERROR );
		}
	}
	DBUG_RETURN(l);
}

