/************************************************************************
 * This program is Copyright (C) 1986 by Jonathan Payne.  JOVE is       *
 * provided to you without charge, and with no warranty.  You may give  *
 * away copies of JOVE, including sources, provided that this notice is *
 * included in all the files.                                           *
 ************************************************************************/
/*
 * Modified: Sep-88 by Tom Hageman [TRH]
 * fixed bug in re_dosub; some fine tuning & additions
 * Mar-89 [TRH] extend to fuller RE, remove alternate parameter.
 * Jan-91 [TRH] more optimization.
 * {{TODO: more robustness in RE compiler.}}
 */

/*
 * The regular expression string is compiled into a (finite nondeterministic)
 * automaton. The following opcodes are used:
 *
 * token 		opcode		arguments
 * -----		------		---------
 *  ^			AT_BOL
 *  $			AT_EOL
 *  \<			AT_BOW
 *  \>			AT_EOW
 *  \{alt1, ... altN>\}	ALT		|len1|alt| ... lenN|altN|EOP|
 *  \(			OPENP+#
 *  \)			CLOSEP+#
 *  \#			BACKREF		#
 *  .			ANY
 *  [...], [^...]	ONE_OF		bitmap
 * (endmarker)		EOP
 * (implements loops)	BACK		offset
 * (avoid enless loops)	MARK		<record current line pointer>
 * (normal characters)	NORMC		|len|literal string|
 * (case independent)	CINDC		|len|literal string|
 * (end w/ paren count)	NUMP		#
 * (implement negation)	NOT		|len|subexpr|
 *
 * Each "atomic" RE (except ^,$,\<,\>) can be followed by a *, +, or ? suffix
 * for (0 or more), (1 or more), (0 or one) matches. (or \! to negate the
 * match condition {{this is expeximental!}})
 * The "simple" REs have the corresponding code (STAR, PLUS, QUEST)
 * ORed in with their opcode; NORMC/CINDC modified in this way
 * have a single character argument instead of a counted string.
 * The complex cases "\{ ... \}[*+?]" and "\( ... \)[*+?]" are explicitly
 * compiled as follows:
 * 	\(<sub>\)[*+?]	->	\(<sub>[*+?]\)
 *
 *	<x>*		->	MARK\{<x>BACK(MARK),\}
 *	<x>+		->	MARK<x>\{BACK(MARK),\}
 *	<x>?		->	\{<x>,\}
 *
 * where BACK(x) means: jump backward to the opcode for <x> in the compiled
 * RE automaton.
 *
 * The \| (alternate) operator is compiled much the same way as the
 * curly brace pair, except that an alternate is ended with a NUMP opcode,
 * to record the actual number of \( \) parens in that alternate.
 *
 * The first RE_NFIRST characters in the buffer are reserved for start-
 * character optimization.
 */

#include "tune.h"	/* to allow definition of debug flags there. */

#ifdef DEBUG		/* General debug flag, turn on file-specific flag. */
#   ifndef RE_DEBUG
#	define RE_DEBUG	1
#   endif
#else
#   if RE_DEBUG		/* File-specific debug flag, turn on general flag. */
#	define DEBUG
#   endif
#endif

#include "jove.h"

RCS("$Id: re.c,v 14.32.0.10 1994/04/22 18:24:23 tom Exp tom $")

#include "ctype.h"
#define Extern public
#include "re.h"

#ifndef TINY
private int	REfirst __(( const char	*_(comp_p), char *_(firsts) ));
#endif

private void 	cset_range __(( char *_(base), int _(from), int _(to) )),
		cset_case_ignore __(( char *_(base) )),
		REreset __(( void ));

private	char	*do_comp __(( char *_(comp_base), int _(in_subalt),
			      char **_(parens) )),
		*ins_alt __(( char *_(at_p), char *_(to_p) ));

/* convert to upper case to achieve case equivalence -- MOVED TO re.h */

/* bit field manipulations	*/
#define BLKIND(c)	(_Uc(c) >> 3)
#define BITIND(c)	((c) & 07)
#define SETMEMBER(c,s)	((s)[BLKIND(c)] |= Bit(BITIND(c)))
#define ISMEMBER(c,s)	((s)[BLKIND(c)] & Bit(BITIND(c)))

char	compbuf[RE_SIZE],		/* global default compbuf */
	*rep_search ZERO,		/* replace search string */
	*rep_str ZERO;			/* contains replacement string */

int	REdirection;

DEF_INT( "case-ignore-search", CaseIgnore, V_BOOL ) ZERO;
DEF_INT( "wrap-search", WrapScan, V_BOOL ) ZERO;
DEF_INT( "match-regular-expressions", UseRE, V_BOOL ) ZERO;

/* The following are ORed in in main opcode of simple kinds. */
#define QUEST	1		/* ? match 0 or 1 of last RE. */
#define PLUS	2		/* + match 1 or more of last RE. */
#define STAR	3		/* * match any number of last RE. */
/* These values are chosen so that "x+?", "x+*", "x?*"
   all boil down to `x*', just as you would expect. */

/* Opcodes for compiled RE. */
#define EOP	0		/* end of pattern. */
	/* note: some optimizations are aware of this being zero */
#define AT_BOL	1		/* ^ */
#define AT_EOL	2		/* $ */
#define AT_BOW	3		/* \< */
#define AT_EOW	4		/* \> */
#define BACK	5		/* backward jumps */
#define NUMP	6		/* EOP with paren count */
#define NOT	7		/* \! (experimental) */
#define NOSTR	8		/* Codes < NOSTR can't be [*+?]'d. */

#define ANYC	NOSTR		/* . */
#define NORMC	(NOSTR+4)	/* normal character */
#define CINDC	(NOSTR+8)	/* case independent character */
#define ONE_OF	(NOSTR+12)	/* [xxx] and [^xxx] */
#define SIMPLE	(NOSTR+16)	/* codes < SIMPLE can have [*+?] ORed in */

#define OPENP	(SIMPLE-1)	/* \( numbered [1..NPAR] */
#define CLOSEP	(OPENP+NPAR)	/* \) numbered [1..NPAR] */
#define ALT	(CLOSEP+NPAR+1)	/* \{ */
#define MARK	(ALT+1)		/* target of backward jump. */
#define BACKREF	(ALT+2)		/* \# in [1..NPAR] */

#define NPAR	9		/* [1..9] */

				/* nice shorthands */
#define isOPENP(c)	((unsigned)((c)-OPENP-1)<NPAR)
#define isPAREN(c)	((unsigned)((c)-OPENP-1)<2*NPAR)
#define case_PAR(b) 	case b+1:case b+2:case b+3:case b+4:case b+5:\
				 case b+6:case b+7:case b+8:case b+9

#define CSETSIZE (Bit(BPC)/8)

#if (__CHAR_UNSIGNED__)
#   define MAXBYTE	256
#else
#   define MAXBYTE	128
#endif

private const char	*REptr;
private int	nparens;
private char	*cur_compb,
		*end_compb;

private	const char	TooLong[] = "Search string too long/complex.";

#ifndef JUMP_LONG
#   if (RE_SIZE > 0400)	/* Small RE buffers don't need long format jumps. */
#	define JUMP_LONG	0377
#   endif
#else
#   if (2* JUMP_LONG -1 != 2* 0 -1)
#	undef JUMP_LONG
#	define JUMP_LONG	0377
#   endif
#endif

#if JUMP_LONG
/* Calculate the number of bytes a jump (opcode + offset) will take,
   as a function of the distance to jump.  A short jump offset will fit in a
   single byte; a long format jump has itself tagged as such in the byte
   normally used for the short offset, and encodes the offset in two
   additional bytes. */
#   define JUMP_SIZE(len)	((len) >= JUMP_LONG ? 4 : 2)

private int	jump_store __(( char *_(at), int _(len), int _(backward) ));
private int
jump_store(at, len, backward)
register char	*at;
register int	len;
int		backward;
{
	ASSERT(len > 0);
	if (len >= JUMP_LONG) {
		if (!backward) {
			/* Shift tail to make room for long address. */
			register char	*d = at + len;

			while (--d > at)
				d[2] = d[0];
		}
		*at++ = JUMP_LONG;
		*at++ = (char)(len >> 8);
		backward += 2;
	}
	*at = (char) len;

	return backward;
}

/* Store a backward jump offset `length' in the position pointed to by `at'
   while advancing the pointer. */
#   define B_JUMP_STORE(at, len)	(at += jump_store(at, len, YES))

/* Calculate a forward jump offset from pointers `end', presumably pointing
   to the position to jump to, and `at', presumably pointing to the jump's
   offset location.  It is assumed that a single byte is already allocated
   to hold the jump's offset.  If necessary the stuff between `at' and `end'
   is shifted upward and the `end' pointer is updated to allocate space for
   a long jump offset. */
#   define F_JUMP_STORE(at, end)	(end += jump_store(at, end - at, NO))
#else
/* Short jumps only; pretty easy... */
#   define JUMP_SIZE(len)	2
#   define B_JUMP_STORE(at, len)	(*at=len,++at)
#   define F_JUMP_STORE(at, end)	(*at=end-at)
#endif

/* Size of MARK instruction. */
#define MARK_SIZE	3

/*
 * REcompile: compiles a regular expression pattern in an internal code.
 * if `mode' <= 0, use pattern as a literal string, else do full regular
 * expression parsing.  (this way we can temporarily switch in and out
 * of RE mode in macros, using the next trick:
 * "ESC -- (set match-RE) ... search ... ESC - (set match-RE)
 */
void
REcompile(pattern, re, into_buf)
register const char	*pattern;
register char		*into_buf;
{
	nparens = 0;
	REdirection = 0;	/* !BACKWARD;	- restore default. */

	/*
	 * Optimization: calculate the set of possible start characters
	 * for this RE.  The trivial case is a single character: this
	 * character is stored in compiled[0], while compiled[1] contains
	 * a copy of "case-ignore-search" at compile-time.  The more
	 * complex case involves a bitset of allowed characters: the
	 * offset to this is encoded in compiled[0] and compiled[1] and
	 * is marked by the RE_FSET flag set in compiled[1].
	 *  No first-character optimization is present when both
	 * compiled[0] == 0 and not (compiled[1] & RE_FSET).
	 */
#define RE_NFIRST	2
#define RE_FSET		(~0177)

	if (re) {
		char	*parens[NPAR+1];		/* paren stack */

		parens[0] = NULL;		/* mark bottom of stack */
		end_compb = into_buf + RE_SIZE - 7;	/* chicken factor */
		REptr = pattern;
		*into_buf++ = '\0';		/* clear FIRSTC and CASE_IGN */
		*into_buf++ = NO;
		cur_compb = into_buf;
#ifndef TINY
#   define	end_p	pattern
		end_p =
#endif
		do_comp(into_buf, NO, &parens[1]);
		REreset();

		/* post-process the compiled expression for start-characters */
	    {
		register char	*p = into_buf;
#ifndef TINY
		register int	anchored = 0;
#endif
		for (;;) {
			switch (*p++) {

				/* easy cases are handled here. */
			case CINDC:
				into_buf[-1]++;
				/* fall through ... */

			case NORMC:
				into_buf[-2] = p[1];
				break;

			case AT_BOL:
#ifndef TINY
				anchored++;
#endif
			case AT_BOW:
			case AT_EOW:
				continue;

			default:
				if (isPAREN(p[-1]))
					continue;
#ifndef TINY
				if (p[-1] == MARK) {
					p += 2;
					continue;
				}
				/*
				 * The hard case: if there is room after
				 * the actual RE, the bitset of possible
				 * start characters is placed there.  It
				 * is built in a temporary variable so as
				 * to not disturb some semi pre-compiled
				 * expressions that use a small RE buffer.
				 * Anchored match doesn't need it, either.
				 */
				if (anchored)
					break;
				if (end_p >
				    into_buf + RE_SIZE - RE_NFIRST - CSETSIZE)
					break;
			    {
				char	firstset[CSETSIZE];

				/* blank it out. */
				p = &firstset[CSETSIZE];
				do *--p = 0; while (p > firstset);

				if (REfirst(into_buf, p) <= 0)
					break;

				/* copy bitset to final location. */
				p = (char *) end_p + CSETSIZE;
				end_p = &firstset[CSETSIZE];
				do *--p = *--end_p; while (end_p > firstset);
			    }
				/* now `p' points to firstset in RE buffer. */
				if (True(CaseIgnore))
					cset_case_ignore(p);

				/* fall through... */

			case ONE_OF:
			case ONE_OF | PLUS:
			    {
				/* store offset to start bitset */
				register int	offset = p - into_buf;

				*--into_buf = RE_FSET | (offset >> 7);
				*--into_buf = offset & (Bit(7)-1);
			    }
#   undef end_p
#endif /* TINY */
				break;
			}
			break;
		}
	    }
	} else {					/* RE_LITERAL */
		register int	len = strlen(pattern);
#		define comp_p	into_buf		/* alias */

#if (RE_SIZE - RE_NFIRST - 1 >= MAXBYTE)
		if (len >= MAXBYTE)
			complain(TooLong);
#else
		if (len >= RE_SIZE - RE_NFIRST - 1)
			complain(TooLong);
#endif
#ifdef OLD_CASEIGNORE
		if (True(CaseIgnore)) {
			*comp_p++ = CEquiv(*pattern);
			*comp_p++ = YES;

			*comp_p++ = CINDC;
			*comp_p++ = len;
			while (*comp_p++ = CEquiv(*pattern++))
				;
		} else {
			*comp_p++ = *pattern;
			*comp_p++ = NO;

			*comp_p++ = NORMC;
			*comp_p++ = len;
			while (*comp_p++ = *pattern++)
				;
		}
#else
		/* The new way of ignoring case ignores case only if
		   the SEARCH PATTERN is lower case; uppercase characters
		   in the search pattern must match exactly. */

		*comp_p++ = *pattern;
		if (True(CaseIgnore)) {
			*comp_p++ = YES;
			*comp_p++ = CINDC;
		} else {
			*comp_p++ = NO;
			*comp_p++ = NORMC;
		}
		*comp_p++ = len;
		while (*comp_p++ = *pattern++)
			;
#endif
#		undef comp_p				/* unalias */
	}
}

/* compile the pattern into an internal code */

private char *
do_comp(comp_base, in_subalt, parens)
	char	*comp_base;
	char	**parens;
{
	register char	*comp_p = comp_base;	/* speed ... */
	register char	*last_p = NULL;		/* non-NULL if [*+?] allowed */
	register char	*chr_cnt = NULL;	/* non-NULL if collecting chars */
	register int	c;
	char		**parenp = parens;
	register int	suffix;

	/* static to communicate "return code" between incarnations */
	static int	more;

	more = NO;
	suffix = STAR;		/* loop invariant. */

	while (c = *REptr++) {
		if (comp_p > end_compb)
toolong:		complain(TooLong);

		/* handle [*+?] suffixes before `last_p' is updated. */

		switch (c) {
			/* PRE: (suffix == STAR) */
		case '?':			/* suffix = QUEST; */
			suffix += QUEST - PLUS;
		case '+':			/* suffix = PLUS; */
			suffix += PLUS - STAR;
		case '*':			/* suffix = STAR; */
			if (chr_cnt)
				last_p = chr_cnt - 1;

			/* Make sure we can handle it. */
			if (last_p == NULL || *last_p < NOSTR) {
#if NOTYET
				if (True(AllowSPQ))
					goto defchar;
#endif
				complain("Unexpected %c", c);
			}
#ifndef TINY
			/* Collect multiple [*?+]'s.
			   {this is an optimization which speeds up compilation
			    and tends to make the resulting RE smaller and
			    more efficient, especially for non-trivial cases.
			    On the other hand, multiple flags are rarely used
			    in practice, so why bother, really?} */

			for (;;) {
				switch (*REptr++) {
				case '?':
					suffix |= QUEST;
					continue;
				case '+':
					suffix |= PLUS;
				    /*-	continue; -*/
				case '*':
					suffix |= STAR;
					continue;
				}
				REptr--;
				break;
			}
#endif
			if (chr_cnt) {			/* NORMC, CINDC */
				c = *--comp_p;
				if (suffix == PLUS) {	/* compile x+ as xx*. */
					++comp_p;
					/* suffix = STAR; */
					suffix += STAR - PLUS;
				}
				else if (--*chr_cnt == 0) {
					comp_p = last_p;
				}
				*comp_p = *last_p;
#ifdef TINY
				last_p = comp_p;
#endif
				*comp_p++ |= suffix;
				*comp_p++ = c;
				chr_cnt = NULL;
			}
			else if (*last_p < SIMPLE) {	/* ANYC, ONE_OF */
				if (suffix == PLUS && *last_p == ANYC) {
					/*
					 * just copy previous atom, since "x+"
					 * is really just shorthand for "xx*"
					 */
					register char	*end_p = comp_p;

					while (last_p < end_p)
						*comp_p++ = *last_p++;
					suffix += STAR - PLUS;
				}
				*last_p |= suffix;
			}
			else {			/* OPENP, ALT, BACKREF */
				register int	closep = 0;

				/* Suffixes apply to expression inside \( \). */
				if (isOPENP(*last_p)) {
					++last_p;
					closep = *--comp_p;
				}

				switch (suffix) {

				case QUEST:
					/*
					 * Compile x? as:
					 *       ______
					 *      /      v
					 * ALT o x EOP o EOP
					 *              \_^
					 */
					*comp_p++ = EOP;
					comp_p = ins_alt(last_p, comp_p);
					break;

				case PLUS:
					/*
					 * Compile x+ as:
					 *              _______
					 *             /       v
					 * MARK x ALT o BACK o o EOP
					 *  ^_______________/   \_^
					 */
					*comp_p++ = ALT;
#if JUMP_LONG
					*comp_p = JUMP_SIZE(comp_p - last_p +
							    MARK_SIZE + 2) + 1,
					comp_p++;
#else
					*comp_p++ = 2 + 1;
#endif
					*comp_p++ = BACK;
					B_JUMP_STORE(comp_p,
						     comp_p - last_p +
						     MARK_SIZE);
					goto insert_mark;

			    /*-	case STAR: -*/
				default:
					/*
					 * Compile x* as:
					 *            _________
					 *           /         v
					 * MARK ALT o x BACK o o EOP
					 *  ^_______________/   \_^
					 */
					*comp_p++ = BACK;
					B_JUMP_STORE(comp_p,
						     comp_p - last_p +
						     MARK_SIZE +
						     JUMP_SIZE(comp_p - last_p +
							       MARK_SIZE + 1));
					comp_p = ins_alt(last_p, comp_p);
				insert_mark:
				    {
					char *save = comp_p;

					do {
						--comp_p;
						comp_p[MARK_SIZE] = comp_p[0];
					} while (comp_p > last_p);

					*last_p = MARK;
					comp_p = save + MARK_SIZE;
				    }
					break;
				}
				*comp_p++ = 1;
				*comp_p++ = EOP;

				if (closep) {		/* restore \( \) */
#ifdef TINY
					--last_p;
#endif
					*comp_p++ = closep;
				}
			}
			suffix = STAR;		/* restore loop invariant. */
			continue;
#ifdef NOT
		case '\\':
			if (*REptr == '!') {
				REptr++;
				*comp_p++ = EOP;
				comp_p = ins_alt(last_p, comp_p);
				*last_p = NOT;
				continue;
			}
#endif
		}

		last_p = comp_p;

		switch (c) {
		case '\\':
			switch (c = *REptr++) {
			case '\0':
				complain("Premature end of pattern.");

			case '{':
			    {
				char	*save = last_p;

				*comp_p++ = ALT;
				do {
					last_p = comp_p++;
					comp_p = do_comp(comp_p, YES, parenp);
					F_JUMP_STORE(last_p, comp_p);
				} while (more);
#if JUMP_LONG
				if (*last_p == 0)	/* borrow */
					--last_p[-1];
#endif
				--*last_p;	/* i.e. point to EOP == 0 */
				last_p = save;
				break;
			    }

			case '}':
				if (!in_subalt)
					complain("Unexpected \\}.");
				goto outahere;

			case '(':
				if (++nparens >= NPAR)
					complain("Too many \\('s; max is %d.", NPAR);
				*parenp++ = comp_p;
				*comp_p++ = OPENP + nparens;
				last_p = NULL;
				break;

			case ')':
				/* Check for unmatched \) and empty \(\)'s. */
				if (parenp == parens)
					complain("Unexpected \\).");
				last_p = *--parenp;
				*comp_p++ = *last_p + (CLOSEP - OPENP);
				break;

			case '|':
				/*
				 * [TRH] Emulate end of pattern,
				 * EXCEPT for first occurrence
				 */
				c = '\0';
				more++;
				if (comp_base != cur_compb)
					goto outahere;

				*comp_p++ = NUMP;
				*comp_p++ = nparens;
				comp_p = ins_alt(cur_compb, comp_p);
				do {
					nparens = 0;
					last_p = comp_p++;
					comp_p = do_comp(comp_p, NO, parenp);
					comp_p[-1] = NUMP; /* replace EOP */
					*comp_p++ = nparens;
					F_JUMP_STORE(last_p, comp_p);
				} while (more);
				*comp_p++ = 0;	/* end of alt list */

				goto outahere;	/* we are done */

			case_PAR('0'):
				/*
				 * [TRH] we can't allow forward ("..\4..")
				 * or pending ("\(..\1..\)..") sub-expressions.
				 */
			    {
				register char **pp  =  parenp;

				c -= '0';
				if (c <= nparens) {
					/* no forward ref; check pending */
					while(*--pp && c != **pp - OPENP)
						;
				}
				if (c > nparens || *pp)
					complain("Backref \\%d not allowed here", c);

				*comp_p++ = BACKREF;
				*comp_p++ = c;
				break;
			    }

			case '<':
				*comp_p++ = AT_BOW;
				break;

			case '>':
				*comp_p++ = AT_EOW;
				break;

			default:
				goto defchar;
			}
			break;

		case '.':
			*comp_p++ = ANYC;
			break;

		case '^':
			if (comp_p != comp_base)
				goto defchar;

			*comp_p++ = AT_BOL;
			break;

		case '$':
			if (*REptr && *REptr != '\\' &&
			    !(*REptr == ',' && in_subalt))
				goto defchar;

			*comp_p++ = AT_EOL;
			break;

		case '[':
		    {
			/*
			 * [TRH] compile [xxx] and [^xxx] to same code.
			 * we must negate the bitset in case of [^xxx]
			 * Accept ']' as normal char if first in set,
			 * and '-' if first or last in set.
			 * backslash '\' has no special meaning here.
			 */
			register char	*neg_p = NULL;
			register int	c2;

			comp_p += 1 + CSETSIZE;
			if (comp_p > end_compb)
				goto toolong;

		    	if ((c = *REptr++) == '^') {
		    		neg_p = comp_p;		/* remember... */
				c = *REptr++;
		    	}
			/* clear bitset */
			do *--comp_p = '\0'; while (comp_p > last_p);
		    	*comp_p++ = ONE_OF;

			do {
#if NO
				if (c == '\\')
					c = *REptr++;
#endif
			    	if (c == '\0')
			    		complain("Missing ].");

				SETMEMBER(c, comp_p);

				if (*REptr == '-' && (c2 = *++REptr)) {
					if (c2 == ']') {
						REptr--;
						continue;
					}
#if NO
					if (c2 == '\\')
						if ((c2 = *++REptr) == '\0')
							continue;
							/*  for error mesg. */
#endif
					REptr++;
					if (c < c2)
						cset_range(comp_p, c, c2);
				}
		    	} while ((c = *REptr++) != ']');

			if (True(CaseIgnore))
				cset_case_ignore(comp_p);

			if (neg_p == NULL) {
				comp_p += CSETSIZE;
			} else {
				/*
				 * [^xxx] so negate bitset.
				 * make sure '\0' is not in set after negation.
				 */
				SETMEMBER('\0', comp_p);
				while (comp_p < neg_p)
					*comp_p++ ^= ~0;
				/* ...and comp_p at right spot for free */
			}
		    	break;
		    }

		case ',':
			if (in_subalt) {
				more++;
				goto outahere;
			}
			/* fall through ... */

defchar:	default:
			if (chr_cnt == NULL) {
				if (True(CaseIgnore))
					*comp_p++ = CINDC;
				else
					*comp_p++ = NORMC;
				chr_cnt = comp_p;
				*comp_p++ = 0;
			}
#if (RE_SIZE >= MAXBYTE)
			if (*chr_cnt >= _UC_(MAXBYTE - 1))
				goto toolong;
#endif
			++*chr_cnt;
#ifdef OLD_CASEIGNORE
			if (True(CaseIgnore))
				*comp_p++ = CEquiv(c);
			else
#endif
				*comp_p++ = c;
			continue;
		}
		chr_cnt = NULL;
	}
	/* Kludge: remember # of parens for this pattern. */
	if (cur_compb == comp_base)
		*comp_p++ = NUMP, *comp_p++ = nparens;
	else
outahere:	*comp_p++ = EOP;

	/* End of pattern, let's do some error checking. */
	if (parenp != parens)
		complain("Missing \\).");
	if (in_subalt && c == 0)	/* End of pattern with \}. */
		complain("Missing \\}.");

	return comp_p;
}

private char *
ins_alt(at_p, to_p)
	char		*at_p;
	register char	*to_p;
{
	register char	*s = to_p;
#if JUMP_LONG
	int		len = to_p - at_p;
#endif
	register char	*d = to_p += JUMP_SIZE(len);

	while (s > at_p)		/* make room for new node */
		*--d = *--s;
	*s++ = ALT;
#if JUMP_LONG
	if ((len) >= JUMP_LONG) {
		*s++ = JUMP_LONG;
		*s++ = len >> 8;
	}
#endif
	*s = to_p - s;
	return to_p;
}

/*
 * union of bitset at `base' with bitset in range [from..to] (inclusive)
 */
private void
cset_range(base, from, to)
char		*base;
register int	from;
{
	register int	mask = -Bit(BITIND(from)); /* assumes 2's complement */
	register char	*p = base,
			*q = p + BLKIND(to);

	p += BLKIND(from);
	while (p < q) {
		*p++ |= mask;
		mask = ~0;
	}
	*p |= mask & (Bit(BITIND(to)+1) - 1);
}

/*
 * make bitset case-independent
 * {{depends heavily on ASCII character coding, eg. 'A'-'a' is divisible by 8}}
 */
private void
cset_case_ignore(base)
char	*base;
{
#if (('a' - 'A') % 8 == 0 && ('z' - 'a') == 25)	/* i.e. ASCII */
    {
	register char	*up = base,
			*lp = up + BLKIND('a'),
			*up_end = up + BLKIND('Z');

	up += BLKIND('A');
#   ifdef OLD_CASEIGNORE
	*lp   |= *up   & -Bit(BITIND('A'));
#   endif
	*up++ |= *lp++ & -Bit(BITIND('a'));	/* assumes 2's complement */
	do {
#   ifdef OLD_CASEIGNORE
		*lp   |= *up;
		*up++  = *lp++;
#   else
		*up++ |= *lp++;
#   endif
	} while (up < up_end);
#   ifdef OLD_CASEIGNORE
	*lp |= *up & (Bit(BITIND('Z')+1) - 1);
#   endif
	*up |= *lp & (Bit(BITIND('z')+1) - 1);
    }
# if (BPC == 8)
#   define CSET_LIMIT	0200	/* to resolve equivalences for char 128..255 */
# endif
#else
#   define CSET_LIMIT	0
#endif
#ifdef CSET_LIMIT
    {	/* this does not depend on ASCII character coding */
	register int	c = CSETSIZE * 8 - 1,
			m = 0;
	register char	*p = base + CSETSIZE;
	register int	ce;

	do {
		if ((m >>= 1) == 0) {
			--p;
			m = Bit(8-1);
		}
		if ((ce = CEquiv(c)) != c) {
#   ifdef OLD_CASEIGNORE
			register char	*pe = base + BLKIND(ce);
			register int	me = Bit(BITIND(ce));

			if (*p & m)
				*pe |= me;
			else if (*pe & me)
#   else
			if (ISMEMBER(ce, base))
#   endif
				*p |= m;
		}
	} while (--c >= CSET_LIMIT);
    }
#   undef CSET_LIMIT
#endif
}

#ifndef TINY
/* Build FIRST(comp_p) in cset `firstset' */
private int
REfirst(comp_p, firstset)
register const char	*comp_p;
register char		*firstset;
{
	for (;;) {
		switch (*comp_p++) {
		case NORMC:
		case CINDC:
		    {
			register int	c = comp_p[1];

			SETMEMBER(c, firstset);
		    }
			break;

		case NORMC | STAR:
		case NORMC | QUEST:
		case CINDC | STAR:
		case CINDC | QUEST:
		    {
			register int	c = *comp_p++;

			SETMEMBER(c, firstset);
		    }
			continue;

		case ONE_OF:
		case ONE_OF | PLUS:
		    {
			register const char	*end_p = comp_p + CSETSIZE;

			do *firstset++ |= *comp_p++; while (comp_p < end_p);
			firstset -= CSETSIZE;
		    }
			break;

		case ONE_OF | QUEST:
		case ONE_OF | STAR:
		    {
			register const char	*end_p = comp_p + CSETSIZE;

			do *firstset++ |= *comp_p++; while (comp_p < end_p);
			firstset -= CSETSIZE;
		    }
			continue;

		case AT_EOL:
			SETMEMBER('\0', firstset);
		case AT_BOL:
			break;

		case ALT:
		    {
			register int	n,
					maybe = NO;

			while (n = _UC_(*comp_p++)) {
#if JUMP_LONG
				if (n == JUMP_LONG) {
					n = (unsigned short)(*comp_p++ << 8),
					n |= _UC_(*comp_p++);
				}
#endif
				switch(REfirst(comp_p, firstset)) {
				case NO:
					return NO;
				case -YES:
					maybe++;
				}
				comp_p += n - 1;
			}
			if (maybe)
				continue;
		    }
			break;

		case NUMP:
		case EOP:
		case BACK:
			return -YES;	/* maybe... */

		case AT_BOW:
		case AT_EOW:
		case_PAR(OPENP):
		case_PAR(CLOSEP):
			continue;

		case MARK:
			comp_p += 2;
			continue;

		default:
			return NO;
		}
		break;
	}
	return YES;
}
#endif /* TINY */

private const char	*pstrtlst[NPAR+1],	/* index into REbuf */
			*pendlst[NPAR+1],
			*locrater,
			*REbolp;

/*	use pstrtlst[0], pendlst[0] to record complete match */

#define loc1	pstrtlst[0]
#define loc2	pendlst[0]

int	REbom,
	REeom;		/* beginning and end of match */

#ifdef SMALL
private int member __(( int _(c), const char *_(base) ));
private int
member(c, base)
register int	c;
const char	*base;
{
	return ISMEMBER(c, base);
}
#else /* !SMALL i.e., BIG; trade program size for speed. */
    /* !!this uses the (local) variable `c' to be (at least partially) safe!! */
#   define member(chr, base)	((c = (chr), ISMEMBER(c, base)))
#endif

#if RE_DEBUG
private int	level;

private void re_dump __(( const char *_(line_p), const char *_(comp_p) ));
private void
re_dump(line_p, comp_p)
register const char *line_p;
register const char *comp_p;
{
	char		textbuf[17], work[50];
	register int	i;

	pop_wind("*DEBUG*", 0, B_SCRATCH);
	ToLast();
	sprintf(work, "\n--- garbled RE: (level %d)\nline ...", level);
	ins_str(work, 0);
	ins_str(line_p, 0);
	ins_str("\npattern (compiled) :", 0);
	textbuf[0] = textbuf[16] = '\0';
	line_p = cur_compb;
	for (i = 0;  i < RE_SIZE;  i++) {
		if ((i & 15) == 0) {
			sprintf(work, "%s\n%3d %3x  ", textbuf, i, i);
			ins_str(work, 0);
		}
		textbuf[i&15] = (isctrl(*line_p)) ? '.' : *line_p;
		sprintf(work, "%02x ", _UC_(*line_p++));
		if (line_p == comp_p)
			work[2] = '*';
		ins_str(work, 0);
	}
	ins_str(textbuf, 0);
	level = 0;
}
#endif

private int REmatch __(( const char *_(line_base), char *_(comp_base) ));
private int
REmatch(line_base, comp_base)
const char	*line_base;
char		*comp_base;
{
	register const char	*line_p = line_base;
	register char		*comp_p = comp_base;
	register int		n;
#define first_p	line_base
#define c n /* for member() macro */
#if RE_DEBUG
	level++;
#	undef YES
#	define YES (--level, 1)
#	undef NO
#	define NO (--level, 0)
#endif
	for (;;) {
		switch (*comp_p++) {
		case NORMC:
			n = *comp_p++;
			do {
				if (*line_p++ != *comp_p++)
					return NO;
			} while (--n);
			continue;

		case CINDC:	/* case independent comparison */
			n = *comp_p++;
			do {
#ifdef OLD_CASEIGNORE
				if (CEquiv(*line_p++) != *comp_p++)
#else
				/* Optimized for most common (lower)case. */
				if (*line_p++ != *comp_p++ &&
				    CEquiv(line_p[-1]) != comp_p[-1])
#endif
					return NO;
			} while (--n);
			continue;

		case NUMP:
			nparens = *comp_p++;
			/* fall into... */

		case EOP:
			loc2 = line_p;
			REeom = line_p - REbolp;
			return YES;	/* Success! */

		case AT_BOL:
			if (line_p == REbolp &&
			    line_p != locrater)
				continue;
			return NO;

		case AT_EOL:
			if (*line_p == '\0')
				continue;
			return NO;

		case ANYC:
			if (*line_p++ != '\0')
				continue;
			return NO;

		case AT_BOW:
			if (isword(*line_p) &&
			    !(line_p > REbolp && isword(line_p[-1])) /* &&
			    line_p != locrater  -- NO! */)
				/* consider replacement of "\<[a-z]* *" with "";
				   test for locrater here would skip next
				   adjacent word, even though it should match
				 */
				continue;
			return NO;

		case AT_EOW:
			if (!isword(*line_p) &&
			    (line_p > REbolp && isword(line_p[-1])) /* &&
			    line_p != locrater  -- NO! (see above) */)
				continue;
			return NO;

		case ONE_OF:
			if (member(*line_p++, comp_p)) {
				comp_p += CSETSIZE;
				continue;
			}
			return NO;

		case_PAR(OPENP):
			n = comp_p[-1] - OPENP;
			pstrtlst[n] = line_p;
			pendlst[n] = line_p;
			continue;

		case_PAR(CLOSEP):
			n = comp_p[-1] - CLOSEP;
			pendlst[n] = line_p;
			continue;

		case BACKREF:
		    {
			register const char	*end_p;
			register char		*save_p;

			n = *comp_p++;
			save_p = comp_p;
			comp_p = (char *) pstrtlst[n];
			end_p = pendlst[n];

			while (comp_p < end_p)
				if (*line_p++ != *comp_p++)
					return NO;

			comp_p = save_p;
			continue;
		    }

		case ALT:
			/* This is a new, backtracking (kinda) version.
			   If rejects matching sub-expressions for which the
			   pattern following this alternative does not match.
			   The old code blindly accepted the first matching
			   sub-expr, no matter what the consequences.
			   It still returns the *first* matching sub-expr,
			   not necessarily the *longest*.  Also watch out
			   for simple `*', `+' and `?' at the end of a
			   sub-expr: these either match the longest possible
			   substring, or fail--no backtracking there.
			   (Yes this is a bug...) */

			while ((n = _UC_(*comp_p++))) {
#if JUMP_LONG
				if (n == JUMP_LONG) {
					n = (unsigned short)(*comp_p++ << 8),
					n |= _UC_(*comp_p++);
				}

#endif
				--n;

				/* Debugging/assertion goo: */
#if JUMP_LONG
#   define ALT_J_L_ASSERTION	(comp_p[n-3]==(char)JUMP_LONG && \
				 comp_p[n-4]==BACK)
#else
#   define ALT_J_L_ASSERTION	0
#endif
#define ALT_ASSERTION		(comp_p[n]==EOP || comp_p[n-1]==EOP ||	\
				 (comp_p[n-2]==NUMP &&			\
				  (unsigned)comp_p[n-1]<=NPAR) ||	\
				 comp_p[n-2]==BACK || ALT_J_L_ASSERTION)
				/* You rang...? */
#if RE_DEBUG
				if (!ALT_ASSERTION) {
					re_dump(line_p, comp_p);
					complain("BUG: garbled RE (ALT offset=%d)", n);
				}
#else
				ASSERT(ALT_ASSERTION);
#endif
				switch (REmatch(line_p, comp_p)) {
				case NO:
					comp_p += n;
					continue;

				case YES:
					break;

				default:
			    /*-	case YES+YES: -*/
					/* Means matched complete expression,
					   not just sub-expr.
					   (This is a kludge to make it work
					    in combination with BACK.) */ 
					return YES+YES;
				}

				comp_p += n;

				/* Now that we have a matching sub-expression,
				   see if the tail matches too. */
			    {
				register char	*save = comp_p;

				/* {{As a speed-optimization we can cache
				    a pointer to the t(r)ailing expression,
				    instead of walking through the list
				    each time.}} */
				while ((n = _UC_(*comp_p++))) {
#if JUMP_LONG
					if (n == JUMP_LONG) {
						n = ((unsigned short)
						     (*comp_p++ << 8)),
						n |= _UC_(*comp_p++);
					}
#endif
					--n;
#if RE_DEBUG
					if (!ALT_ASSERTION) {
						re_dump(line_p, comp_p);
						complain("BUG: garbled RE (ALT offset=%d)", n);
					}
#else
					ASSERT(ALT_ASSERTION);
#endif
					comp_p += n;
				}

				if (REmatch(loc2, comp_p))
					return YES+YES;

				comp_p = save;
			    }
		    	}
    			return NO;

		case BACK:
			/* Backward jump, to implement `+' and `*' closure
			   on complex subexpressions.
			   Note that jumping back to *before* the enclosing
			   ALT gives us (recursive) backtracking for free!
			   (I didn't notice it myself until recently...) */

			n = _UC_(*comp_p++);
#if JUMP_LONG
			if (n == JUMP_LONG) {
				n = (unsigned short)(comp_p[0] << 8) |
						    _UC_(comp_p[1]);
			}
#endif
#if RE_DEBUG
			if (!(n > 0 && comp_p[-n-1] != MARK)) {
				re_dump(line_p, comp_p - 1);
				complain("BUG: garbled RE (BACK offset=%d)", n);
			}
#endif
			comp_p -= n;
			ASSERT(n > 0);
			/* BACK should point to a MARK opcode, so that we can
			   check for an empty match and fail, instead of
			   running astray in an endless loop. */
			ASSERT(comp_p[-1] == MARK);
#if pdp11
/* This is a code-size optimization hack. ONLY valid if sizeof(char *) == 2. */
#   define MARKVAL(lp)	((unsigned short)(lp))
#else
#   define MARKVAL(lp)	((lp) - REbolp)
#endif
			if (MARKVAL(line_p) <=
			    (unsigned short)(comp_p[0] << 8) + _UC_(comp_p[1]))
				return NO;

			/* Take a shortcut and fall through... */
		case MARK:
			/* Record line pointer for BACK empty match check. */
			n = MARKVAL(line_p);
			*comp_p++ = (char)(n >> 8);
			*comp_p++ = (char)(n);
			continue;

#ifdef NOT
		case NOT:
			n = _UC_(*comp_p++);
#   if JUMP_LONG
			if (n == JUMP_LONG) {
				n = (unsigned short)(*comp_p++ << 8);
				n |= _UC_(*comp_p++);
			}
#   endif
			if (REmatch(line_p, comp_p))
				return NO;
			comp_p += --n;
			ASSERT(comp_p[-1]==EOP);
			continue;
#endif
		case ANYC | QUEST:
			if (*line_p)
		quest:		if (REmatch(line_p + 1, comp_p))
					return YES;
			continue;

		case NORMC | QUEST:
			if (*line_p == *comp_p++)
				goto quest;
			continue;

		case CINDC | QUEST:
#ifdef OLD_CASEIGNORE
			if (CEquiv(*line_p) == *comp_p++)
#else
			if (*line_p == *comp_p++ ||
			    CEquiv(*line_p) == comp_p[-1])
#endif
				goto quest;
			continue;

		case ONE_OF | QUEST:
			if (member(*line_p, (comp_p += CSETSIZE) - CSETSIZE))
				goto quest;
			continue;

		case ANYC | STAR:
			first_p = line_p;
			while (*line_p++) ;
			break;

		case NORMC | STAR:
			first_p = line_p;
			while (*line_p++ == *comp_p)
				;
			comp_p++;
			break;

		case CINDC | STAR:
			first_p = line_p;
#ifdef OLD_CASEIGNORE
			while (CEquiv(*line_p++) == *comp_p) ;
#else
			while (*line_p++ == *comp_p ||
			       CEquiv(line_p[-1]) == *comp_p) ;
#endif
			comp_p++;
			break;

		case ONE_OF | PLUS:
			if (!member(*line_p++, comp_p))
				return NO;

		case ONE_OF | STAR:
			first_p = line_p;
			while (member(*line_p++, comp_p))
				;
			comp_p += CSETSIZE;
			break;

		default:
#if RE_DEBUG
			re_dump(line_p, comp_p);
#endif
			complain("BUG: garbled RE (0%o) @%d.", _UC_(*--comp_p),
				(int)(comp_p - cur_compb));
		} /*switch */
	    {
		/*
		 * only get here when STAR (or PLUS)
		 * Do a little lookahead to avoid lots of recursive calls
		 */
#ifndef TINY
		register const char	*lookahead_p = comp_p;

		while (isPAREN(n = *lookahead_p) || n == AT_BOW || n == AT_EOW)
			lookahead_p++;

#   define	comp_p	lookahead_p
#endif
		while (--line_p > first_p) {
			switch (*comp_p) {	/* a little lookahead */
			case NORMC:
				++line_p;
				while (*--line_p != comp_p[2])
					if (line_p == first_p)
						return NO;
				break;

			case CINDC:
				++line_p;
#ifdef OLD_CASEIGNORE
				while (CEquiv(*--line_p) != comp_p[2])
#else
				while (*--line_p != comp_p[2] &&
				       CEquiv(*line_p) != comp_p[2])
#endif
					if (line_p == first_p)
						return NO;
				break;

			case ONE_OF:
			case ONE_OF | PLUS:
				++line_p;
				while (!member(*--line_p, comp_p + 1))
					if (line_p == first_p)
						return NO;
				break;
			}
#undef comp_p
			if (REmatch(line_p, comp_p))
				return YES;
		}
		continue;
	    }
	} /* for */
#if RE_DEBUG
#	undef YES
#	define YES	1
#	undef NO
#	define NO	0
#endif
#undef c
#undef first_p
	/* NOTREACHED. */
}

private void
REreset()
{
	register int		i = NPAR+1;
	register const char	**ps = pstrtlst,
				**pe = pendlst;

	do {
		if (*ps == NULL)
			break;
		*ps++ = NULL;
		*pe++ = NULL;
	} while (--i);
}

/* Index LINE at OFFSET, the compiled EXPR, with alternates ALTS. [*- If
   lbuf_okay is nonzero it's okay to use linebuf if LINE is the current
   line.  This should save lots of time in things like paren matching in
   LISP mode.  Saves all that copying from linebuf to REbuf.  substitute()
   is the guy who calls re_lindex with lbuf_okay as 0, since the substitution
   gets placed in linebuf ... doesn't work too well when the source and
   destination strings are the same.  I hate all these arguments! -*]

   [TRH] ALWAYS use linebuf for search if LINE is the current line.
   substitute problem is solved in re_dosub().
   BTW as far as I understand it, this lbuf_okay kludge is necessary
   because paren matching sets up a fake curline/curchar. Therefore we
   cannot be sure that lcontents() delivers the 'right' pointer.
   So actually if lbuf_okay is TRUE, linebuf is ignored completely.
    Maybe it is more elegant to pass an extra start-bufpos parameter
   to docompiled() (with NULL as default: curline/curchar).

   Oct 88 [TRH], well I reorganized it -- docompiled now has an extra
   parameter "(Bufpos *) start". No more cheating with currpos needed anymore,
   so lcontents() works fine and thus no need for an lsave().

   Mar 89 [TRH] get rid of alts parameter; this information is now compiled
   into RE.  Also change type of first parameter from (Line *) to (char *)
   to make it more general (and changed the name to reflect this change).
    Add another extra parameter "(Line *) end_lp" to docompiled() to get
   a still cleaner interface.

   Note that backward search stops at the first match from the end of the
   string, so for more complicated expressions it does not necessarily find
   the longest possible match (alas, alas...).

   [JP] This code is cumbersome, repetetive for reasons of efficiency.  Fast
   search is a must as far as I am concerned. */

int
re_sindex(str, offset, expr)
const char	*str;
char		*expr;
{
	register const char	*s = expr;
	register char		c,
				firstc = *s++;
	register int		case_ign = *s++;
#ifndef TINY
	register const char	*firstset = NULL;

	if (case_ign & RE_FSET) {
		firstset = s + (((case_ign & ~RE_FSET) << 7) + firstc);
		firstc = '\0';
	}
#endif
	if (pstrtlst[1])
		REreset();

	expr = (char *) s;

	REbolp = s = str;

	if (expr[0] == AT_BOL) {
		if (offset && REdirection >= 0)
			return NO;
#ifndef TINY
		if (firstc) {
			if (firstc != *s && (!case_ign || firstc != CEquiv(*s)))
				return NO;
		}
#endif
		if (REmatch(s, expr)) {
  found:		loc1 = s;
			REbom = s - REbolp;
			return YES;
		}
		return NO;
	}

	s += offset;

	if (REdirection < 0) {	/* BACKWARD */
		if (offset < 0)		/* flag from BACKWARD */
			s -= offset, s += strlen(s);
		do {
			if (firstc) {
				s++;
				if (!case_ign) {
					while (*--s != firstc)
						if (s == REbolp)
							return NO;
				} else {
#ifdef OLD_CASEIGNORE
					while (CEquiv(*--s) != firstc)
#else
					while (*--s != firstc &&
					       CEquiv(*s) != firstc)
#endif
						if (s == REbolp)
							return NO;
				}
			}
#ifndef TINY
			else if (firstset) {
				s++;
				while (!member(*--s, firstset))
					if (s == REbolp)
						break;
						/* always try at BOL since
						   sub-expr may be anchored. */
			}
#endif
			if (REmatch(s, expr)) {
				goto found;
			}
		} while (--s >= REbolp);

	} else {	/* FORWARD */
#ifndef TINY
		if (firstset && offset == 0)	/* always try at BOL since
						   sub-expr may be anchored. */
			goto f_match;
#endif
		do {
			if (firstc) {
				if (!case_ign) {
					do
						if ((c = *s++) == '\0')
							return NO;
					while (c != firstc);
				} else {
					do
						if ((c = *s++) == '\0')
							return NO;
#ifdef OLD_CASEIGNORE
					while (CEquiv(c) != firstc);
#else
					while (c != firstc &&
					       CEquiv(c) != firstc);
#endif
				}
				--s;
			}
#ifndef TINY
			else if (firstset) {
				while (!member(c = *s++, firstset))
					if (c == '\0')
						return NO;
				--s;
			}
	f_match:
#endif
			if (REmatch(s, expr)) {
				goto found;
			}
		} while (*s++);
	}
	return NO;
}

int	okay_wrap ZERO;		/* Implicit parameter to dosearch */

Bufpos *
dosearch(pattern, dir, re)
const char	*pattern;
{
	register Buffer	*cb = curbuf;
	register Line	*wrap_lp = NULL;

	if (b_empty(cb))
		return NULL;	/* Can't match!  There's no buffer. */

	REcompile(pattern, re, compbuf);

	if (okay_wrap && True(WrapScan))
		wrap_lp = cb->b_dot;

	return docompiled(dir, compbuf, (Bufpos *) 0, wrap_lp);
}

Bufpos *
docompiled(dir, expr, start, wrap_lp)
char		*expr;
Bufpos		*start;
Line		*wrap_lp;
{
	static Bufpos	ret;
	register Line	*lp;
	register int	offset;
    {
	register Bufpos	*sp;

	if ((sp = start) == NULL) {
		sp = &ret;
		DOTsave(sp);
	}
	lp = sp->p_line;
	offset = sp->p_char;
    }
	REdirection = dir;
    {
	register Line		*end_lp = wrap_lp;
	register const char	*cp = lcontents(lp);

	if (start)
		locrater = cp + offset;

	if (dir < 0) {	/* BACKWARD */

		if (!offset || !re_sindex(cp, offset - 1, expr)) {
			locrater = NULL;
			do {
				if (!(lp = lp->l_prev)) {
					if (!end_lp)
						return NULL;
					lp = curbuf->b_last;
				}
				if (lp == end_lp) {
					if (offset < 0)
						return NULL;
					end_lp = end_lp->l_prev;
					offset = -1;	/* exit flag */
				}
			} while (!re_sindex(lcontents(lp), -1, expr));
		}
		ret.p_char = REbom;

	} else { /* FORWARD */

		/* this avoids infinite loops at "$" ?? */
		if (!cp[offset] || !re_sindex(cp, offset, expr)) {
			locrater = NULL;
			do {
				if (!(lp = lp->l_next)) {
					if (!end_lp)
						return NULL;
					lp = curbuf->b_first;
				}
				if (lp == end_lp) {
					if (offset < 0)
						return NULL;
					end_lp = end_lp->l_next;
					offset = -1;	/* exit flag */
				}
			} while (!re_sindex(lcontents(lp), 0, expr));
		}
		ret.p_char = REeom;
	}
	locrater = NULL;
    }
	ret.p_line = lp;
	return &ret;
}

private char *insert __(( char *_(off), char *_(endp), int _(which) ));
private char *
insert(off, endp, which)
register char	*off;
char		*endp;
{
	register int		n;
	register const char	*pp;

	n = pendlst[which] - (pp = pstrtlst[which]);

	while (--n >= 0 && off < endp) {
		*off++ = *pp++;
	}
	return off;
}

/* Build the actual substitution string from the replacement string and
   the most recent match.
   [TRH] add "\0" (complete match) -- fix bug in "\N" recognition.
   Escape conventions:
	\0 .. \9	- sub-expression substitutions.
	\\0 .. \\9	- literal \0 .. \9
	\\\		- literal \ + \-escape [yes was still buggy...]
   Otherwise backslash is treated as a normal character.

   This routine is a replacement for re_dosub()
 */
void
REsubst(buf, rep_str)
char		*buf;
const char 	*rep_str;
{
	register char		*tp = buf;	/* destination */
	register const char	*rp = rep_str;	/* where the matched line is */
	register int		c;
#define endp buf /* alias! */
	endp += LBSIZE - 1;

	for (rp = rep_str; (c = *rp++); ) {
		if (tp > endp) {
			--tp;		/* truncate line */
			break;
		}
		if (c == '\\') {
			if (!(*rp++ == c && (isdigit(*rp) || *rp == c)) &&
			    (unsigned)(*--rp - '0') <= nparens) {
				tp = insert(tp, endp, *rp++ - '0');
				continue;
			}
			/* [TRH] don't eat backslash if no (valid) sub-expr */
		}
		if (c == '\r')
			c = '\n';	/* so we can have embedded newlines */
		*tp++ = c;
	}
	*tp = '\0';
#undef endp
}

void
putmatch(which, buf, size)
register char	*buf;
{
	register char	*endp = buf + size;

	if ((buf = insert(buf, endp, which)) >= endp)
		len_error(error);
	*buf = '\0';
}

void
RErecur()
{
	char		cbuf[sizeof compbuf];
	register char	*save_rep;
	register char	*save_search;
	register int	npars;

	npars = nparens;
	byte_copy(compbuf, cbuf, sizeof compbuf);
	save_rep = copystr(rep_str);
	save_search = copystr(rep_search);
	Recur();
	set_str(&rep_search, save_search);
	free(save_search);
	set_str(&rep_str, save_rep);
	free(save_rep);
	byte_copy(cbuf, compbuf, sizeof compbuf);
	nparens = npars;
}

/*
 * ask for RE string and compiles it into compbuf.
 * if the RE is invalid, the cursor is set near the offending
 * character and the error message is displayed (briefly).
 * [14-jun-89 TRH] don't catch errors if in .joverc.
 * [Jul-90 TRH] adapted to changed ask().
 */
#ifdef _STDARG_
#   include <stdarg.h>
#else
#   include <varargs.h>
#endif

private struct {
  const char	*def;
	int	use_re;
} re_ask_par;

private int	re_aux __(( int _(c) ));

char *
DEFVARG(re_ask, (const char *def, int use_re, const char *fmt, ...),
		(def, use_re, fmt, va_alist)
	const char	*def;
	const char	*fmt;)
{
	va_register va_list	ap;
	register const char	*o_def = re_ask_par.def;/* make it re-entrant */
	register char		*ans;
	VA_INIT_PROPAGATE

	re_ask_par.use_re = use_re;
	va_begin(ap, fmt);
	ans = do_ask("\r\n", re_aux, re_ask_par.def = def, VA_PROPAGATE(fmt, ap));
	va_end(ap);
	re_ask_par.def = o_def;
	return ans;
}

/* this is called from within ask() when the user types CR or LF */

private
re_aux(c)
{
	Savejmp			savejmp;
	char			saveline[MESG_SIZE],
				errmesg[MESG_SIZE];
	register char		*pattern = linebuf;
	register int		err;

	strcpy(saveline, mesgbuf);	/* may need it later, for error mesg */

	if ((err = push_env(savejmp)) == 0) {
		if (pattern[0] == '\0') {
			/* empty string - use default instead */
			if (re_ask_par.def == NULL)
				complain("No default");

			/* copy it to linebuf so it will be returned by do_ask */
			strcpy(pattern, re_ask_par.def);
		}
		REcompile(pattern, True(re_ask_par.use_re), compbuf);
	}
	pop_env(savejmp);

	if (err) {
		register int	at;

		if (InJoverc)			/* propagate abort */
			complain((char *) 0);

		if (pattern[0] == '\0' || (at = REptr - pattern - 1) < 0)
			at = 0;
		curchar = at;			/* set cursor near the error */

		s_mess("%s [%s]", saveline, strcpy(errmesg, mesgbuf));
	}
	return err;
}

/* Do we match PATTERN at OFFSET in BUF? */

private	int	look_re = YES;	/* this is a kludge */

LookingAt(pattern, buf, offset)
const char		*pattern;
register const char	*buf;
register int		offset;
{
	char		comp_buf[RE_SIZE];
	register char	*comp = comp_buf;

	REcompile(pattern, look_re, comp);
	REbolp = buf;
	REbom = offset;
	return REmatch(loc1 = buf + offset, comp + RE_NFIRST);
}

look_at(pattern)
const char	*pattern;
{
	register int	result;
	look_re = NO;
	result = LookingAt(pattern, linebuf, curchar);
	look_re++;
	return result;
}

/*======================================================================
 * $Log: re.c,v $
 * Revision 14.32.0.10  1994/04/22  18:24:23  tom
 * (re_ask): use `va_register va_list'.
 *
 * Revision 14.32.0.9  1994/02/01  20:35:19  tom
 * (do_comp, ALT_J_L_ASSERTION): fix datatype range errors.
 *
 * Revision 14.32.0.8  1993/11/01  18:05:42  tom
 * (searchstr): removed; (UseRE): made non-PRIVATE;
 * (re_ask): changed semantics of force_re argument -> use_re.
 *
 * Revision 14.32.0.4  1993/07/31  03:24:27  tom
 * (MARK): new opcode, introduced to avoid endless loops in `*' and `+'
 * closure of complex sub-expressions (always a target of BACK jump);
 * (REcompile, do_comp, REfirst): add MARK support;
 * (REmatch): [ALT] matching sub-expr is accepted only if tail matches too,
 * [BACK] fail if match is empty (to avoid endless loops),
 * [MARK] (new) record current line pointer (for empty match check in BACK).
 *
 * Revision 14.32  1993/05/14  17:40:26  tom
 * some (space) optimizations for dumber compilers; various changes for new
 * case-ignore scheme: preserve old way in OLD_CASEIGNORE conditionals.
 *
 * Revision 14.31  1993/02/17  00:55:04  tom
 * preserve rep_search in RErecur(); lotsa random optimizations.
 *
 * Revision 14.30  1993/01/26  17:53:27  tom
 * cleanup whitespace; some random optimizations; standardize DEBUG options;
 * add "\!" directive; allow long offsets to ALT/BACK.
 *
 * Revision 14.28  1992/10/27  12:26:42  tom
 * convert to "port{ansi,defs}.h" conventions; fix REdirection botch after
 * failing backward search--now reset it at each REcompile.
 *
 * Revision 14.26  1992/08/26  23:56:57  tom
 * PRIVATE-ized some Variable defs; add RCS directives.
 *
 */
