/* TRIPOS Pattern Matching algorithm with extensions */
/*
 * Original BCPL version by Martin Richards
 * This is a transliteration of the BCPL code with additions
 * by Pete Goodeve 87:8:10
 *
 * This version has:
 *                      simple repetition ("*") (alternative to "#?")
 *                      negation ("~" and "||")
 *                      slicing ("^")
 */

/*
 * Reference:
 *
 *          Martin Richards,
 *                "A Compact Function for Regular Expression
 *                            Pattern Matching"
 *          [Software--Practice and Experience, Vol 9, 527-534 (1979)]
 */

/* don't #define DEBUG 1 */
/* ...there are large hunks of optional trace code in this version */

#include <exec/types.h>

/* Code bits returned by Pattern Compiler: */

/*  this bit always set unless pattern is bad: */
#define CODE_OK 1
/*   the following bits will not be set if pattern is bad: */
/*  set if any single-wild-match characters present ("?"): */
#define CODE_ANY 2
/*  set if any multiple=-wild-match characters present ("*"): */
#define CODE_ALL 4
/*  set if any grouping characters present ("()"): */
#define CODE_GROUP 8
/*  set if pattern has alternations ("|"): */
#define CODE_ALT 16
/*  set if pattern has repeating segments ("#" or "*"): */
#define CODE_REP 32
/*  set if pattern has negation ("~" or "||"): */
#define CODE_NEG 64
/*  set if pattern is sliced ("^"): */
#define CODE_SLICE 128
/*  set if more than MAXMARK slices or if used within parentheses */
#define CODE_OVERSLICE CODE_SLICE | 256

/* Pattern Control Characters are defined here so you can change them
 * easily.
 * You may undefine PAT_ALL or PAT_SLICE to remove the sections of code
 * that provide these functions; the other definitions may be changed but
 * not removed.
 */

#define PAT_QUOTE '\''
#define PAT_LGROUP '('
#define PAT_RGROUP ')'
#define PAT_ALT '|'
#define PAT_REP '#'
#define PAT_ANY '?'
#define PAT_ALL '*'
#define PAT_NEG '~'
    /* -- note double PAT_ALT is also always a negation code */
#define PAT_NULL '%'
#define PAT_SLICE '^'

#define Endstreamch 0xFF
#define WORKSIZE 128


/*** Static Variables: ***/

UBYTE    *Work = NULL;
int      Wp = 0, Succflag = FALSE;
char     *Pat = 0;
UBYTE    *Aux = 0;
UBYTE    Ch = 0;
int      PatP = 0,  Patlen = 0,
         Errflag = FALSE,
         NegP = 0;
UBYTE    *NWork = NULL;
#ifdef PAT_SLICE -------------------------v
#define MAXMARK 4
#define MAXCUT 16
struct  MarkSet {UBYTE mk[MAXMARK];};
UBYTE   *Svect = NULL;
int     cutlimit;
struct MarkSet   MarkP, *MWork;
#endif -----------------------------------^

int S, StrLength;

#ifdef DEBUG ==============================V
int DEBmode = TRUE; /* set this false for no printout when DEBUG defined */
int ii; /* local variable -- defined here for convenience */
#endif ====================================^

/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/


/*** The Interpreter: ***/

#ifdef PAT_SLICE --------------------------v
SMatch(Pat, Aux, Str, Slice) UBYTE *Aux; char *Pat, *Str, *Slice;
#else ------------------------------------
Match(Pat, Aux, Str) UBYTE *Aux; char *Pat, *Str;
#endif ------------------------------------^
{
    UBYTE W[WORKSIZE];
    UBYTE NegW[WORKSIZE]; /* for negation */
#ifdef PAT_SLICE --------------------------v
    struct MarkSet MarkW[WORKSIZE]; /* for slice marking */
#endif ------------------------------------^
    register int N, I;
    register UBYTE P, Q;
    char K;

    StrLength = strlen(Str);
    S = 0;
    Pat -= 1; /* align into "BCPL" form (byte 0 never accessed) */
    Work = W;
    *Work = 0; /* remote possibility of first Put failing otherwise */
    NWork = NegW;
#ifdef PAT_SLICE ---------------------------v
    MWork = MarkW;
    for (I=0; I < MAXMARK; I++) {
        MWork->mk[I] = MarkP.mk[I] = 0;
    }
    if (Svect = Slice) /* allowed to be NULL */
        *Svect = 0;
    cutlimit = MAXCUT;
#endif -------------------------------------^
    Wp = 0;
    Succflag = FALSE;
    NegP = 0;
    Put(1);
    if (!(Aux[0]==0)) Put(Aux[0]);
  do {
    for (N=1; N <= Wp; N++) { /* first complete the closure: */
        P = Work[N];
        NegP = NWork[N] & 1; /* propagate low bit only */
#ifdef PAT_SLICE ---------------------------v
        MarkP = MWork[N];
#endif -------------------------------------^
        K = Pat[P]; Q = Aux[P];
        switch(K)
        {
         case PAT_REP:  Put(P+1);
#ifdef PAT_ALL -----------------------------v
         case PAT_ALL:
#endif -------------------------------------^
         case PAT_NULL: Put(Q);
         default:       break;
         case PAT_LGROUP:
         case PAT_ALT:
                        if (Q != 0) Put(Q);
                        if (Pat[P+1] == '|') { /* Negative alternate */
                            NegP = 1;
                            P++;
                        }
                        Put(P+1);
                        break;
         case PAT_NEG:
                        Put(Q);
                        NegP = 1;
                        if (!(NWork[N] & 2)) Put(P+1);
                        break;
#ifdef PAT_SLICE ----------------------------v
         case PAT_SLICE:
                        Putslice(P, S);
                        Put(Q);
#endif --------------------------------------^
        }
    }
#ifdef DEBUG ====================================V
    if (DEBmode) {
       printf("\nSucc=%d  Closure Vector (len=%d):\n", Succflag, Wp);
       for (ii=1; ii<=Wp; ii++) printf("%3d", Work[ii]);
       putchar('\n');
       for (ii=1; ii<=Wp; ii++) printf("%3d", NWork[ii]);
       puts("\n=====");
    }
#endif ==========================================^

    if (S >= StrLength)
        return (Succflag > 0);
    if (Wp == 0) return FALSE;

    Ch = Str[S++];
#ifdef DEBUG ====================================V
    if (DEBmode)
       printf("Matching character %c:\n",Ch);
#endif ==========================================^

    /* now deal with match items: */
    N = Wp;
    Wp = 0;
    Succflag = FALSE;

    for (I = 1; I <= N; I++) {
        P = Work[I];
        NegP = NWork[I];
#ifdef PAT_SLICE ------------------------------v
        MarkP = MWork[I];
#endif ----------------------------------------^
        K = Pat[P];
        switch(K) {
         case PAT_NEG:  NegP |= 2; Put(P);
#ifdef PAT_SLICE ------------------------------v
         case PAT_SLICE:
#endif ----------------------------------------^
         case PAT_REP:
         case PAT_ALT:
         case PAT_NULL:
         case PAT_LGROUP:
                    break; /* BCPL was 'loop' */
         case PAT_QUOTE: K = Pat[P+1];
         default:   if (Ch != K) break; /* 'loop' */
         case PAT_ANY:  /* successful match */
                    Put(Aux[P]);
                    break; /* 'loop' */
#ifdef PAT_ALL --------------------------------v
         case PAT_ALL:  Put(P); /* point back to self...*/
                    break; /* 'loop' */
#endif ----------------------------------------^
        }
    }
#ifdef DEBUG ======================================V
    if (DEBmode) {
       printf("\nSucc=%d Match Vector (len=%d):\n", Succflag, Wp);
       for (ii=1; ii<=Wp; ii++) printf("%3d", Work[ii]);
       putchar('\n');
       for (ii=1; ii<=Wp; ii++) printf("%3d", NWork[ii]);
       puts("\n=====");
    }
#endif ============================================^
  } while (TRUE);
} /*** end Match ***/


Put(N) int N;
{
    UBYTE *W, *Wlim, *NW;
#ifdef DEBUG ======================================V
    if (DEBmode)
#ifdef PAT_SLICE ------------------------------v
       printf("Put %d[%d, %d/%d]  ", N, NegP, MarkP.mk[0], MarkP.mk[1]);
#else ----------------------------------------
       printf("Put %d[%d]  ", N, NegP);
#endif ----------------------------------------^
#endif ============================================^
    if (N == 0) {
        Succflag |= NegP ? -1 : 1;
#ifdef PAT_SLICE ------------------------------v
        if (S == StrLength) RetSlice(); /* matched -- pass back slice info */
#endif ----------------------------------------^
    }
    else {
        W = Work;
        Wlim = W + Wp;
        NW = NWork;
        for(;W <= Wlim; W++, NW++)
             if ((*W == N)&&(*NW == NegP))
                 return;
        *W = N;
        *NW = NegP;
        Wp = W - Work;
#ifdef PAT_SLICE ------------------------------v
        MWork[Wp] = MarkP;
#endif ----------------------------------------^
    if (Wp >= WORKSIZE) exit(33);
    }
}

#ifdef PAT_SLICE -----------------------------------------------v
Putslice(P, S) int P, S;
{
    int i;
    UBYTE *MW;
#ifdef DEBUG ======================================V
    if (DEBmode)
       printf("Slice ^%d=%d  ", P, S);
#endif ============================================^
    for (i = 0, MW = (UBYTE *)MWork;
         i<MAXMARK && *MW && (P != *MW); i++, MW++)
        ; /* loop */
    if (i==MAXMARK) return;
    *MW = P;
    MarkP.mk[i] = S;
}



RetSlice() {
    int i;
    UBYTE *mwp = MWork->mk,
          *mpp = MarkP.mk,
          lastcut = 0;
    if (!Svect) return; /* ignore if NULL */
#ifdef DEBUG ======================================V
    if (DEBmode)
        printf("RetSlice.. Svect = %d\n",Svect);
#endif ============================================^
    for (i=MAXMARK; i && *mwp && cutlimit; i--, cutlimit--)
        if(*mpp >= lastcut) { /* avoid supefluous cuts */
            *Svect++ = *mwp++;
            lastcut = *Svect++ = *mpp++;
        }
    *Svect = 0;
}
#endif ---------------------------------------------------------^


/*********************************************************************/


/*** The Compiler: ***/

int retcode, grouplevel, slicecount;

Rch() {
    if (PatP >= Patlen)
        Ch = Endstreamch;
    else {
        Ch = Pat[++PatP];
    }
}

Nextitem() {
    if (Ch == PAT_QUOTE) Rch();
    Rch();
}

int Prim(NegMark) int NegMark;
{
    int A = PatP;
    UBYTE Op = Ch;
    if (NegMark) retcode |= CODE_NEG;
    Nextitem();
    switch(Op) {
     case PAT_ANY:
                retcode |= CODE_ANY;
                return A;
     case PAT_ALL:
                retcode |= CODE_ALL;
                return A;
     case Endstreamch:
     case PAT_RGROUP:
     case PAT_ALT:
                Errflag = TRUE;
     default:   return A;
     case PAT_REP:
                Setexits(Prim(NegMark), A);
                retcode |= CODE_REP;
                return A;
     case PAT_LGROUP:
                grouplevel++;
                A = Exp(A, (NegMark ? 1 : FALSE));
                grouplevel--;
                if (Ch != PAT_RGROUP) Errflag = TRUE;
                retcode |= CODE_GROUP;
                Nextitem();
                return A;
     case PAT_NEG:
                if (NegMark) Errflag = TRUE;
                NegMark = 1;
                retcode |= CODE_NEG;
                Join(A, Prim(NegMark));
                NegMark = FALSE;
                return A;
#ifdef PAT_SLICE ------------------------------v
     case PAT_SLICE:
                retcode |= (++slicecount > MAXMARK || grouplevel ) ?
                    CODE_OVERSLICE : CODE_SLICE;
                return A;
#endif ----------------------------------------^
    }
}

int Exp(AltP, NegMark) int AltP, NegMark;
{
    int Exits = 0, A;
    do {
        A = Prim(NegMark);
        if (Ch == PAT_ALT || Ch == PAT_RGROUP || Ch == Endstreamch)
            {   Exits = Join(Exits, A);
                if (Ch != PAT_ALT)
                    return Exits;
                retcode |= CODE_ALT;
                Aux[AltP] = PatP;
                AltP = PatP;
                Nextitem();
                if (Ch == PAT_ALT) { /* negation convention */
                    if (NegMark == 1) Errflag = TRUE;
                    NegMark = -1; /* not an error to meet oneself */
                    retcode |= CODE_NEG;
                    Nextitem();
                }
                else NegMark = FALSE;
            }
        else Setexits(A, PatP);
    } while (TRUE);
}


Setexits(List, Val) int List, Val;
{
    int A;
    while (List != 0) {
        A = Aux[List];
        Aux[List] = Val;
        List = A;
    }
}

int Join(A, B) int A, B;
{
    int T = A;
    if (A == 0) return B;
    while (Aux[A] != 0) A = Aux[A];
    Aux[A] = B;
    return T;
}

int CmplPat(Pattern, CmplPattern) char *Pattern, *CmplPattern;
{
    int I;
    Pat = Pattern - 1;
    Aux = CmplPattern;
    Patlen = strlen(Pattern); /** this is no longer a BSTR!!! **/
    PatP = 0;
    Errflag = FALSE;
    retcode = CODE_OK;
    grouplevel = slicecount = 0;
    for (I = 0; I <= Patlen; I++) Aux[I] = 0;
    Rch();
    Setexits(Exp(0,FALSE), 0);
    return Errflag ? 0 : retcode;
}

