/***************************************************************
 *
 * Boyer-Moore string search routine
 *
 * Author:    John Rex
 * References: (1) Boyer RS, Moore JS: "A fast string searching
 *                 algorithm"  CACM 20(10):762-777, 1977
 *             (2) plus others--see text of article
 *
 * Compilers: Microsoft C V5.1  - compile as is
 *            Turbo C V2.0      - compile as is
 *
 * Compile time preprocessor switches:
 *    DEBUG - if defined, include test driver
 *
 * Usage:
 *
 *   char *pattern, *text;  - search for pattern in text
 *   unsigned length;       - length of text (the routine does
 *                            NOT stop for '\0' bytes, thus
 *                            allowing it to search strings
 *                            stored sequentially in memory.
 *   char *start;           - pointer to match
 *
 *   char *Boyer_Moore(char *, char *, unsigned);
 *
 *   start = Boyer_Moore(pattern, text, strlen(text);
 *
 *   NULL is returned if the search fails.
 *
 *   Switches: if defined:
 *
 *      DEBUG will cause the search routine to dump its tables
 *            at various times--this is useful when trying to
 *            understand how upMatchJump is generated
 *
 *      DRIVER will cause a test drive to be compiled
 *
 * Source code may be used freely if source is acknowledged.
 * Object code may be used freely.
 **************************************************************/

#define DRIVER

#if defined(DEBUG)
#define SHOWCHAR for (uT=1; uT<=uPatLen; uT++)  \
                 printf(" %c ", pcPattern[uT-1])
#define SHOWJUMP for (uT=1;uT<=uPatLen;uT++)    \
                 printf("%2d ", upMatchJump[uT])
#define SHOWA    printf("  uA = %u  ", uA)
#define SHOWB    printf("  uB = %u", uB)
#define SHOWBACK for (uT=1;uT<=uPatLen;uT++)    \
                 printf("%2d ", upBackUp[uT])
#define NL       printf("\n")

unsigned uT;
#else
#define SHOWCHAR
#define SHOWJUMP
#define SHOWA
#define SHOWB
#define SHOWBACK
#define NL
#endif

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define AlphabetSize 256

char *Boyer_Moore(pcPattern, pcText, uTextLen)
char *pcPattern;        /* we search for this ... */
char *pcText;           /* ... in this text ...   */
unsigned uTextLen;      /* ... up to this length  */
{
             /* array of character mis-match offsets */
    unsigned uCharJump[AlphabetSize];
             /* array of offsets for partial matches */
    unsigned *upMatchJump;
             /* temporary array for upMatchJump calc */
    unsigned *upBackUp;
    unsigned u, uPatLen;
    unsigned uText, uPat, uA, uB;


    /* Setup and initialize arrays */
    uPatLen = strlen(pcPattern);
    upMatchJump = (unsigned *)
         malloc(2 * (sizeof(unsigned) * (uPatLen + 1)) );
    upBackUp = upMatchJump + uPatLen + 1;

    /* Heuristic #1 -- simple char mis-match jumps ... */
    memset(uCharJump, 0, AlphabetSize*sizeof(unsigned));
    for (u = 0 ; u < uPatLen; u++)
        uCharJump[((unsigned char) pcPattern[u])]
                     = uPatLen - u - 1;

    /* Heuristic #2 -- offsets from partial matches ... */
    for (u = 1; u <= uPatLen; u++)
        upMatchJump[u] = 2 * uPatLen - u;
                                /* largest possible jump */
    SHOWCHAR; NL;
    SHOWJUMP; NL;

    u = uPatLen;
    uA = uPatLen + 1;
    while (u > 0) {
        upBackUp[u] = uA;
        while( uA <= uPatLen &&
          pcPattern[u - 1] != pcPattern[uA - 1]) {
            if (upMatchJump[uA] > uPatLen - u)
                upMatchJump[uA] = uPatLen - u;
            uA = upBackUp[uA];
        }
        u--;
        uA--;
    }

    SHOWJUMP; SHOWA; SHOWBACK; NL;

    for (u = 1; u <= uA; u++)
        if (upMatchJump[u] > uPatLen + uA - u)
            upMatchJump[u] = uPatLen + uA - u;

    uB = upBackUp[uA];
    SHOWJUMP; SHOWB; NL;

    while (uA <= uPatLen) {
        while (uA <= uB) {
            if (upMatchJump[uA] > uB - uA + uPatLen)
                upMatchJump[uA] = uB - uA + uPatLen;
            uA++;
        }
        uB = upBackUp[uB];
    }
    SHOWJUMP; NL;

    /* now search */
    uPat = uPatLen;         /* tracks position in Pattern */
    uText = uPatLen - 1;    /* tracks position in Text */
    while (uText < uTextLen && uPat != 0) {
        if (pcText[uText] == pcPattern[uPat - 1]) { /* match? */
            uText--;    /* back up to next */
            uPat--;
        }
        else { /* a mismatch - slide pattern forward */
            uA = uCharJump[((unsigned char) pcText[uText])];
            uB = upMatchJump[uPat];
            uText += max(uA, uB);  /* select larger jump */
            uPat = uPatLen;
        }
    }

    /* return our findings */
    if (uPat == 0)
        return(pcText + (uText + 1)); /* have a match */
    else
        return (NULL); /* no match */
}

#pragma subtitle("Test Driver")

#ifdef DRIVER

#define PATTERN argv[1]
#define TEXT_SIZE 32000u

void main(argc, argv)
int argc;
char **argv;
{
    char *start, *p;
    char text[TEXT_SIZE];
    int i;
    unsigned length, count;
    FILE *infile;

    if (argc != 3) {
        puts("Usage is: bm pattern file\n");
        exit(0);
    }

    if ((infile = fopen(argv[2], "r")) == NULL) {
        puts("Can't open input file\n");
        exit(0);
    }

    /* read in whole file */
    length = fread(text, 1, TEXT_SIZE, infile);
    fclose(infile);
    p=text;
    count=0;
    while (count < length) {
        if (*p == '\n')
            *p = '\0';
        p++;
        count++;
    }

    /* now search repeatedly */
    start = Boyer_Moore(PATTERN, text, length);
    while (start != NULL) {
        for (p = start; ; p--) { /* find start of line */
            if (*p == '\0') {
                p++;
                break;
            }
            else if (p == text)
                break;
        }
        printf("Match:\n%s\n", p);
        for (i=start-p; i>0; i--)
            fputc(' ', stdout);
        printf("%s\n\n", PATTERN);
        start = Boyer_Moore(PATTERN, start+1,
                             length - (start-text) - 1);
    }
}
#endif
