/* bgrep --- grep using Boyer-Moore pattern matcher */

/*
 * All the wrapper code (argument and file handling, printing, etc.) by
 * Arnold Robbins, gatech!arnold
 *
 * Boyer-Moore pattern matching, coded by Roy Mongiovi, gatech!gitpyr!roy
 * and edited some by Arnold Robbins.
 *
 * No warranty of suitability for any purpose, either expressed
 * or implied, is made.
 */

#include <stdio.h>
#include <ctype.h>

#define TRUE	1
#define FALSE	0

#define MAXPATS	120		/* (arbitrary maximum number of patterns */
#define MAXLINE	(256 + 1)

/*
 * command line options
 */

int Allbut = FALSE;		/* print lines that don't match pattern */
int Exact = FALSE;		/* only print lines that match exactly */
int Countlines = FALSE;		/* only print a count of matching lines */
int Listnames = FALSE;		/* only list file names that match */
int Numberlines = FALSE;	/* print relative line number */
int Silent = FALSE;		/* don't print anything, just exit */
int Monocase = FALSE;		/* ignore case distinctions */

/*
 * variables
 */

long Curline = 0;		/* current file input line */
long Lines_matched = 0;		/* how many lines matched the pattern */

int Lotsafiles = FALSE;		/* are there more than one file? */
int Pat_length[MAXPATS];	/* length of pattern */
int Line_length = 0;		/* length of line */
int Couldnt_open_files = FALSE;	/* one or more files could not be opened */
int Exit_val = 0;		/* return code status */
int Curpat = 0;			/* current pattern comparing against */
int Numpats = 0;		/* total number of patterns */

char Inbuf[MAXLINE];		/* input buffer */
char Pattern[MAXPATS][MAXLINE];	/* pattern to be matched; init = NULs */
char *Program = NULL;		/* program name */

int Argc;			/* make argc and argv global */
char **Argv;

extern char *basename();	/* leaf file of a pathname */

main (argc, argv, envp)
int argc;
char **argv, **envp;
{
	register int i, j;
	char *index();

	Program = basename (argv[0]);
	Argc = argc;
	Argv = argv;

	parse_args ();		/* deal with command line */

	if (Pattern[0][0] == '\0')	/* not from -f or -e */
	{
		if (Argv[0] == NULL)	/* no string given */
			usage ();
		setpats (Argv[0]);
		Argc--;
		Argv++;
	}

	for (Curpat = 0; Curpat <= Numpats; Curpat++)
	{
		if (Monocase)
			mapdown (Pattern[Curpat]);

		Pat_length[Curpat] = strlen (Pattern[Curpat]);
		initialize ();		/* set up necessary tables */
	}

	Lotsafiles = (Argc > 1);	/* more than one file left */

	if (Argc == 0)		/* search stdin */
		process ("-");
	else
		for (i = 0; Argv[i] != NULL; i++)
			process (Argv[i]);
	
	if (! Silent && Countlines)
		printf ("%ld\n", Lines_matched);

	if (Lines_matched == 0)
		exit (1);
	else if (Couldnt_open_files)
		exit (2);
	else
		exit (0);
}

/* process --- start doing the work on each file */

process (infile)
register char *infile;
{
	FILE *fp;
	int c;
	int success;
	long prev_lines_matched = Lines_matched;	/* save count */

	Curline = 0;	/* reset for each file */

	if (infile[0] == '-' && infile[1] == '\0')
		fp = stdin;
	else if ((fp = fopen (infile, "r")) == NULL)
	{
		Couldnt_open_files = TRUE;
		perror (infile);
		return;
	}

	while (fgets (Inbuf, sizeof Inbuf, fp) != NULL)
	{
		Curline++;
		if (Monocase)
			mapdown (Inbuf);
		Line_length = strlen (Inbuf);

		/* first, throw away rest of a truncated input line */
		if (Inbuf[Line_length - 1] != '\n')
			while ((c = getc (fp)) != '\n')
				;
		else
			Inbuf[--Line_length] = '\0';
			/* newline is there, nuke it */

		for (Curpat = 0; Curpat <= Numpats; Curpat++)
		{
			if (success = match ())
				Lines_matched++;
			/* do any necessary output */
			if (! Silent && ! Countlines && ! Listnames &&
				((success != FALSE) ^ (Allbut != FALSE)))
				/* either success, or Allbut, but not both,
				   and not neither */
			{
				if (Lotsafiles)
					printf ("%s: ", infile);
				if (Numberlines)
					printf ("%ld: ", Curline);
				printf ("%s\n", Inbuf);
			}
		}
	}

	fclose (fp);
	if (! Silent && Listnames && prev_lines_matched < Lines_matched)
		printf ("%s\n", infile);
}

/* parse_args --- check out command line arguments */

parse_args ()
{
	register int i,j;

	if (Argc == 1)
		usage ();

	for (Argc--, Argv++; Argv[0] != NULL && Argv[0][0] == '-'; Argc--, Argv++)
	{
		int cheat = FALSE;

		for (j = 1; Argv[0][j] != '\0'; j++)
		{
			switch (Argv[0][j]) {
			case 'c':
				Countlines = TRUE;
				break;

			case 'e':
				strcpy (Pattern[0], Argv[1]);
				Pattern[0][sizeof Pattern[0] - 1] = '\0';
				cheat = TRUE;
				continue;

			case 'f':
				patfromfile (Argv[1]);
				cheat = TRUE;
				continue;

			case 'i':
			case 'y':
				Monocase = TRUE;
				break;

			case 'l':
				Listnames = TRUE;
				break;

			case 'n':
				Numberlines = TRUE;
				break;

			case 's':
				Silent = TRUE;
				break;

			case 'v':
				Allbut = TRUE;
				break;

			case 'x':
				Exact = TRUE;
				break;

			default:
				usage ();
				break;
			}
		}
		if (cheat)
		{
			cheat = FALSE;
			Argc--;
			Argv++;
			/* boy is this stuff a kludge! */
		}
	}

	/* check for argument conflicts */
	if (
		(Silent &&
			(Allbut || Exact || Countlines || Listnames ||
				Numberlines))
		||
		(Allbut && Exact)
		||
		(Countlines && Listnames)
	)
	{
		fprintf (stderr, "%s: argument conflict -- see the man page\n",
			Program);
		usage ();	/* will exit */
	}
}

/* mapdown --- remove case distinctions in a string */

mapdown (str)
register char *str;
{
	register int i;

	for (i = 0; str[i] != '\0'; i++)
		if (isupper (str[i]))
			str[i] = tolower (str[i]);
}

/* return basename part of a pathname, if '/'s are present */

char *basename (str)
register char *str;
{
	register int i = 0;
	register int j = 0;

	for (; str[i] != '\0'; i++)
		if (str[i] == '/')
			j = i;
	
	if (j != 0)
		return (& str[++j]);
	else
		return (str);
}

/* usage --- print usage message and die */

usage ()
{
	fprintf (stderr, "usage: %s [-vxclnisef] [string] [files]\n",
			Program);
	exit (2);
}

/* index --- do index by hand so it'll work on any Unix */

char *index (str, c)
char *str, c;
{
	for (; *str; str++)
		if (*str == c)
			return (str);
	
	return (NULL);
}

/* patfromfile --- retrieve the pattern from the named file */

patfromfile (infile)
char *infile;
{
	register int i, j;
	register FILE *fp;
	register int c;

	if ((fp = fopen (infile, "r")) == NULL ||
			(c = getc (fp)) == EOF)
	{
		perror (infile);	/* be like standard grep */
		exit (2);
	}
	else
		ungetc (c, fp);

	for (i = 0; fgets (Pattern[i], MAXLINE, fp) != NULL; i++)
	{
		if (i >= 120)
		{
			fprintf (stderr, "%s: Only %d strings allowed\n",
				Program, MAXPATS);
			exit (2);
		}
		j = strlen (Pattern[i]);
		if (Pattern[i][j - 1] == '\n')
			Pattern[i][--j] = '\0';
	}
	Numpats = i - 1;

	fclose (fp);
}

/* setpats --- set up the patterns from a string */

setpats (str)
register char *str;
{
	register int i, j;

	while (*str == '\n' || *str == '\r')
		str++;

	for (i = j = 0; *str; str++)
	{
		if (*str == '\n')
		{
			Pattern[i][j] = '\0';
			j = 0;
			i++;
		}
		else
			Pattern[i][j++] = *str;
	}
	Numpats = i;
}

/* begin magic stuff for Boyer-Moore pattern matching */

int D1[MAXPATS][128];
int D2[MAXPATS][128];

int F[MAXPATS][128];

initialize ()
{
	register int i, t;

	for (i = 0; i < 128; i++)
		D1[Curpat][i] = Pat_length[Curpat];
	
	for (i = 0; i < Pat_length[Curpat]; i++)
		D1[Curpat][Pattern[Curpat][i]] = Pat_length[Curpat] - i - 1;
	
	for (i = 0; i < Pat_length[Curpat]; i++)
		D2[Curpat][i] = (Pat_length[Curpat] << 1) - i - 1;
	
	for (i = (t = Pat_length[Curpat]) - 1; i >= 0; i--, t--)
		for (F[Curpat][i] = t; t < Pat_length[Curpat]
			&& Pattern[Curpat][i] != Pattern[Curpat][t];
							t = F[Curpat][t])
			if (Pat_length[Curpat] - i - 1 < D2[Curpat][t])
				D2[Curpat][t] = Pat_length[Curpat] - i - 1;

	for (i = 0; i <= t; i++)
		if (Pat_length[Curpat] + t - i < D2[Curpat][i])
			D2[Curpat][i] = Pat_length[Curpat] + t - i;
}

/* match --- do Boyer-Moore pattern search on input buffer */

match ()
{
	register int i, j;

	if (Exact && Pat_length[Curpat] != Line_length)
		return FALSE;

	i = Pat_length[Curpat] - 1;

	while (i < Line_length)
	{
		j = Pat_length[Curpat] - 1;
		while (j >= 0)
		{
			if (Inbuf[i] == Pattern[Curpat][j])
				i--, j--;
			else
				break;
		}

		if (j < 0)
		{
			/* found a match */
			return TRUE;
			/*
			 * note: if we were going to seach for further matches
			 * on the input line, we would do this:
			 *
			 * i += Pat_length[Curpat] + 1;
			 *
			 * which shifts right by Pat_length[Curpat] + 1 places
			 */
		}
		else
		{
			j = (D1[Curpat][Inbuf[i]] >= D2[Curpat][j]) ?
				D1[Curpat][Inbuf[i]]
			:
				D2[Curpat][j];
			i += j;
			/* shift right by j places */
		}
	}

	return FALSE;
}

#ifdef AMIGA
perror (msg)
char *msg;
{
	if (msg && *msg) {
		fprintf (stderr, "%s: ", msg);
	}
	fprintf (stderr, "<unknown error>\n");
}
#endif
