/*
  ts.c

  A text searching utility
  Way Cool Games, Michael Lindner, 1993
*/

#if 0
*kw* "%n, v%v, %d, %t"
  "TS.C, v21, 4-Apr-93, 22:54:54"

*kl*
  v1 TS.C 5-Feb-93,11:17:30 initial version
  v2 TS.C 5-Feb-93,11:26:10 debugging
  v3 TS.C 5-Feb-93,11:28:54 fixed setargv bug
  v4 TS.C 5-Feb-93,12:13:14 fixed those pesky buffer problems
  v5 TS.C 5-Feb-93,12:14:52 what gives with the history lines?
  v6 TS.C 5-Feb-93,12:17:34 figured out and fixed util.jnl bug
  v7 TS.C 5-Feb-93,12:26:56 cleaned up formatting and trimmed buffer line
  v     for display; also changed LEN_MAX to MAX_LEN
  v8 TS.C 5-Feb-93,12:34:40 final version; i'm happy but slow
  v9 TS.C 8-Feb-93,12:50:28 changed signoff results line
  v10 TS.C 8-Feb-93,12:51:34 see above
  v11 TS.C 9-Feb-93,18:19:26 added support for test modes; added timing mode
  v12 TS.C 9-Feb-93,21:57:34  implemented brute force and KMP searching methods
  v13 TS.C 9-Feb-93,23:17:02 >?
  v14 TS.C 9-Feb-93,23:22:22 implemented boyer-moore searching algorithm
  v15 TS.C 10-Feb-93,1:02:26 implemented all search algorithms operated by
  v     use of the -m? switch where ? specifies the mode
  v16 TS.C 10-Feb-93,1:39:22 implememented a slow Rabin-Karp algorithm
  v17 TS.C 10-Feb-93,2:03:48 rabin-karp long manipulation routines are wanting
  v18 TS.C 10-Feb-93,13:28:34 starting revision to use _dos_ i/o functions
  v19 TS.C 10-Feb-93,16:46:16 first working version using malloc'd buffering
  v20 TS.C 10-Feb-93,17:49:22 nasty bug
  v21 TS.C 4-Apr-93,22:54:54 enlarged max string buffer len
*kl*
#endif

#include <ctype.h>
#include <fcntl.h>
#include <io.h>
#include <malloc.h>
#include <process.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>

#define NO_DEBUG
#define MAX_LEN 300
#define VERSION 1.2

#define SEARCH_STUPID 0
#define SEARCH_BRUTE  1
#define SEARCH_KNUTH  2
#define SEARCH_BOYER  3
#define SEARCH_RABIN  4

void process_hit( char *buf );
void show_usage( void );
void search_file( void );
char *trim_left( char *buf );

void search_buf( char *buf );

void search_buf_stupid( char *buf );
void search_buf_brute( char *buf );
void search_buf_knuth( char *buf );
void search_buf_boyer( char *buf );
void search_buf_rabin( char *buf );

void (*search_funcs[])( char * ) = {
  search_buf_stupid,
  search_buf_brute,
  search_buf_knuth,
  search_buf_boyer,
  search_buf_rabin
};

int ih;
int search_mode = -1;
char *wbuf;
char search[81], fname[60];
char nologo, timer, nolines;

int line_num, line_pos, slen;
unsigned num_hits_f, num_hits_t;
unsigned num_file, num_hit_file;

clock_t clk1, clk2;

/* for k-m-p method */
void init_next( void );
int next[60];

/* for boyer method */
int index( char c );
void init_skip( void );
int skip[256];

void show_usage( void )
{
  puts( "usage: ts <options> \"<search string>\" <filespec>\n"
  " <search string> must appear between quotes if\n"
  "     it contains whitespace\n"
  " <filespec> may be a wild card specification\n\n"
  " <options> in the form of -o, /o or \\o follow\n"
  "   -q    quiet mode (no banner or totals)\n"
  "   -t    timer mode\n"
  "   -l    turns line numbering off\n"
  "   -?    this message (also -help)\n\n"
  "   -mx   search method where x is one of\n"
  "   -ms   the stupid method I dreamed up\n"
  "   -mf   a good brute-force algorithm\n"
  "   -mk   the Knuth-Morris_Pratt algorithm\n"
  "   -mb   the Boyer-Moore algorithm (default)\n"
  "   -mr   the Rabin-Karp algorithm\n" );
  exit( 1 );
}

void main( int argc, char **argv )
{
  int i;
  unsigned char filenames;

  if ( argc < 3 )
    show_usage();

  search[0] = '\0';
  num_hits_t = num_file = num_hit_file = 0;
  nologo = timer = nolines = filenames = 0;

  for ( i = 1; i < argc; i++ ) {
    switch( argv[i][0] ) {
    case '\\':
    case '/':
    case '-':
      switch( argv[i][1] ) {
      case '?':
      case 'h': /* help */
        show_usage();

      case 'q':
        nologo = 1;
        break;

      case 't':
        timer = 1;
        break;

      case 'l':
        nolines = 1;
        break;

      case 'm':
        switch( argv[i][2] ) {
        case 's':
          search_mode = SEARCH_STUPID;
          break;

        case 'f':
          search_mode = SEARCH_BRUTE;
          break;

        case 'k':
          search_mode = SEARCH_KNUTH;
          break;

        case 'b':
          search_mode = SEARCH_BOYER;
          break;

        case 'r':
          search_mode = SEARCH_RABIN;
          break;

        default:
          printf( "ts: unknown search mode '%c'\n\n", argv[i][2] );
          show_usage();
        }
        break;
      }
      break;

    default:
      /* test open file */
      if ( ( ih = _open( argv[i], _O_RDONLY, &ih ) ) != -1 ) {
        filenames = 1;
        _close( ih );
      }
      else
        strcpy( search, argv[i] );
      break;
    }
    if ( filenames )
      break;
  }

  if ( !search[0] ) {
    printf( "ts: no search string\n\n" );
    show_usage();
  }
  slen = strlen( search );

  if ( search_mode == -1 )
    search_mode = SEARCH_BOYER;

  if ( !nologo )
    printf( "Ts v%2.1f - A text searching utility\n", VERSION );

  if ( search_mode == SEARCH_KNUTH )
    init_next();

  if ( search_mode == SEARCH_BOYER )
    init_skip();

  if ( timer )
    clk1 = clock();

  /* allocate 32k buffer */
  if ( ( wbuf = malloc( (size_t) 32767 ) ) == 0 ) {
    printf( "ts: unable to alloc 32k block\n" );
    exit( 1 );
  }

  /* main program loop */
  for ( i = i; i < argc; i++ ) {
    strcpy( fname, argv[i] );
    if ( ( ih = _open( fname, _O_RDONLY | _O_BINARY ) ) == -1 ) {
      printf( "ts: unable to open '%s'\n", fname );
      exit( 1 );
    }
    search_file();
    _close( ih );
  }

  free( wbuf );

  if ( !nologo ) {
    if ( num_hits_t )
      printf( "\n%d occurences of \"%s\" in %d files, %d files searched.\n",
        num_hits_t, search, num_hit_file, num_file );
    else
      printf( "no occurences\n" );
  }

  if ( timer ) {
    clk2 = clock();
    printf( "Elapsed Time: %2.1f secs\n", difftime( clk2, clk1 ) / 1000.0 );
  }
}

void search_file( void )
{
  char c, buf[MAX_LEN];
  unsigned int buf_pos, num_to_read, num_act_read;
  long flen;

  ++num_file;
  num_hits_f = 0;

  /* get file length */
  flen = _filelength( ih );

  num_to_read = flen > 32767L ? 32767U : (unsigned) flen;
  num_act_read = _read( ih, wbuf, num_to_read );
  if ( num_act_read != num_to_read ) {
    printf( "f: %s  to: %u  act: %u\n", fname, num_to_read, num_act_read );
    exit( 1 );
  }

  line_pos = 0;
  line_num = 1;
  buf[0] = '\0';
  buf_pos = 0;

  while ( ( c = wbuf[buf_pos] ) != EOF && buf_pos++ <= num_act_read ) {
    if ( c == 0x0d ) {
      buf[line_pos] = '\0';
      if ( buf[0] ) {
        search_buf( buf );
        buf[0] = '\0';
      }
      else
        ++line_num;

      /* linefeed (0x0a) should follow carriage return */
      buf_pos++;
      continue;
    }
    buf[line_pos] = c;
    if ( ++line_pos >= MAX_LEN ) {
      printf( "ts: buf overflow - '%s', line %d\n", fname, line_num );
      return;
    }
  }
  if ( ( c == EOF || buf_pos > num_act_read ) && buf[0] ) {
    buf[--line_pos] = '\0';
    search_buf( buf );
  }
}


/* The various search algorithms employed */

void search_buf( char *buf )
{
  (*search_funcs[ search_mode ])( buf );
  ++line_num;
  line_pos = 0;
}

/* My brain-dead brute-force method */

void search_buf_stupid( char *buf )
{
  int i, j, k, blen;

  blen = strlen( buf );
  i = 0;
  while ( i < blen - slen + 1 ) {
    j = 0;
    k = i;
    while ( j < slen ) {
      if ( buf[k] != search[j] )
        break;
      else {
        ++k;
        ++j;
        if ( j == slen )
          process_hit( buf );
      }
    }
    ++i;
  }
}

/* A more elegant brute_force algorithm */

void search_buf_brute( char *buf )
{
  int i, j, len_b;

  len_b = strlen( buf );
  i = j = 1;

  while ( !( ( j > slen ) || ( i > len_b ) ) ) {
    if ( buf[i-1] == search[j-1] ) {
      ++i;
      ++j;
    }
    else {
      i = i - j + 2;
      j = 1;
    }
  }
  if ( j > slen )
    process_hit( buf );
}

/* The Knuth-Morris_Pratt searching method */

void init_next( void )
{
  int i, j;

  i = 1;
  j = 0;
  next[0] = 0;

  /* initialize next[] array */
  while ( !( i > slen ) ) {
    if ( !j || search[i-1] == search[j-1] ) {
      ++i;
      ++j;
      /* next[i-1] = j; */
      if ( search[j-1] != search[i-1] )
        next[i-1] = j;
      else
        next[i-1] = next[j-1];
    }
    else
      j = next[j-1];
  }
}

void search_buf_knuth( char *buf )
{
  int i, j, blen;

  blen = strlen( buf );

  /* perform the search */
  i = j = 1;
  while ( !( ( j > slen ) || ( i > blen ) ) ) {
    if ( !j || ( buf[i-1] == search[j-1] ) ) {
      ++i;
      ++j;
    }
    else
      j = next[j-1];
  }
  if ( j > slen )
    process_hit( buf );
}

/* The Boyer-Morris searching method */

void init_skip( void )
{
  int i;

  for ( i = 0; i < 256; i++ )
    skip[i] = slen;
  for ( i = 0; i < slen; i++ )
    skip[ index( search[i] ) ] = slen - i - 1;
}

int index( char c )
{
  switch( c ) {
  case ' ':
  case '\t':
  case '\n':
    return( 0 );

  default:
    return( (int) c );
  }
}

void search_buf_boyer( char *buf )
{
  int i, j, blen, tmp1, tmp2;

  i = j = slen;
  blen = strlen( buf );

  while ( !( ( j < 1 ) || ( i > blen ) ) ) {
    if ( buf[i-1] == search[j-1] ) {
      --i;
      --j;
    }
    else {
      i = i + slen - j + 1;
      j = slen;
      tmp1 = skip[ index( buf[i-1] ) ];
      tmp2 = slen - j + 1;
      if ( tmp1 > tmp2 )
        i = i + tmp1 - tmp2;
    }
  }
  if ( ! ( i > blen ) )
    process_hit( buf );
}

/* The Rabin-Karp searching method */

#if 0
long lmod( long numer, long denom )
{
  ldiv_t lrec;

  lrec = ldiv( numer, denom );
  return( lrec.rem );
}
#endif

long lmod( long numer, long denom )
{
  long n;

  n = numer / denom;
  return( numer - ( n * denom ) );
}

#define _q_ 33554393L
#define _d_ 32L

void search_buf_rabin( char *buf )
{
  int i, blen;
  long h1, h2, dm;

  blen = strlen( buf );
  dm = 1L;
  for ( i = 1; i <= slen - 1; i++ )
    dm = lmod( ( _d_ * dm ), _q_ );

  h1 = 0;
  for ( i = 1; i <= slen; i++ )
    h1 = lmod( ( h1 * _d_ + index( search[i-1] ) ), _q_ );

  h2 = 0;
  for ( i = 1; i <= slen; i++ )
    h2 = lmod( ( h2 * _d_ + index( buf[i-1] ) ), _q_ );

  i = 1;
  while ( h1 != h2 && ( i <= ( blen - slen ) ) ) {
    h2 = lmod( ( h2 + _d_ * _q_ - index( buf[i-1] ) * dm ), _q_ );
    h2 = lmod( ( h2 * _d_ + index( buf[i-1+slen] ) ), _q_ );
    ++i;
  }
  if ( h1 == h2 )
    process_hit( buf );
}

void process_hit( char *buf )
{
  if ( ! num_hits_f ) {
    printf( "\n%s\n", fname );
    ++num_hit_file;
  }
  if ( nolines)
    printf( "%s\n", buf );
  else
    printf( "%4d: %s\n", line_num, trim_left( buf ) );
  ++num_hits_f;
  ++num_hits_t;
}

char *trim_left( char *buf )
{
  int i, j, start;
  char sbuf[MAX_LEN];

  start = j = 0;
  for ( i = 0; buf[i]; i++ ) {
    if ( buf[i] != ' ' && buf[i] != '\t' )
      start = 1;
    if ( start )
      sbuf[j++] = buf[i];
  }
  sbuf[j] = '\0';
  return( sbuf );
}

/* End of ts.c */
