#include <stdio.h>
#include <string.h>
#include "utils.h"

#ifdef TESTING
#define MAXF 3
int 
main ()
{
  int i, numfields;
  char thisline[1000];
  char **words;

  printf ("For today, atmost %d fields/line.  FS = | only.\n", MAXF);
  words = (char **) malloc (MAXF * sizeof (char *));
  do
    {
      printf ("\n\nEnter a line: ");
      if (NULL == gets (thisline))
	break;
      else
	{
	  numfields = split (thisline, words, MAXF, "|");
	  printf ("\n%d fields.\n", numfields);
	  for (i = 0; i < (numfields > MAXF ? MAXF : numfields); i++)
	    printf ("%s\n", words[i]);
	}
    }
  while (1 == 1);

  return 0;
}

#endif /* TESTING */

/*
 * split - divide a string into fields, like awk split()
 */
int				/* number of fields, including overflow */
split (string, fields, nfields, sep)
     char *string;
     char *fields[];		/* list is not NULL-terminated */
     int nfields;		/* number of entries available in fields[] */
     char *sep;			/* "" white, "c" single char, "ab" [ab]+ */
{
  register char *p = string;
  register char c;		/* latest character */
  register char sepc = sep[0];
  register char sepc2;
  register int fn;
  register char **fp = fields;
  register char *sepp;
  register int trimtrail;

  /* white space */
  if (sepc == '\0')
    {
      while ((c = *p++) == ' ' || c == '\t')
	continue;
      p--;
      trimtrail = 1;
      sep = " \t";		/* note, code below knows this is 2 long */
      sepc = ' ';
    }
  else
    trimtrail = 0;
  sepc2 = sep[1];		/* now we can safely pick this up */

  /* catch empties */
  if (*p == '\0')
    return (0);

  /* single separator */
  if (sepc2 == '\0')
    {
      fn = nfields;
      for (;;)
	{
	  *fp++ = p;
	  fn--;
	  if (fn == 0)
	    break;
	  while ((c = *p++) != sepc)
	    if (c == '\0')
	      return (nfields - fn);
	  *(p - 1) = '\0';
	}
      /* we have overflowed the fields vector -- just count them */
      fn = nfields;
      for (;;)
	{
	  while ((c = *p++) != sepc)
	    if (c == '\0')
	      return (fn);
	  fn++;
	}
      /* not reached */
    }

  /* two separators */
  if (sep[2] == '\0')
    {
      fn = nfields;
      for (;;)
	{
	  *fp++ = p;
	  fn--;
	  while ((c = *p++) != sepc && c != sepc2)
	    if (c == '\0')
	      {
		if (trimtrail && **(fp - 1) == '\0')
		  fn++;
		return (nfields - fn);
	      }
	  if (fn == 0)
	    break;
	  *(p - 1) = '\0';
	  while ((c = *p++) == sepc || c == sepc2)
	    continue;
	  p--;
	}
      /* we have overflowed the fields vector -- just count them */
      fn = nfields;
      while (c != '\0')
	{
	  while ((c = *p++) == sepc || c == sepc2)
	    continue;
	  p--;
	  fn++;
	  while ((c = *p++) != '\0' && c != sepc && c != sepc2)
	    continue;
	}
      /* might have to trim trailing white space */
      if (trimtrail)
	{
	  p--;
	  while ((c = *--p) == sepc || c == sepc2)
	    continue;
	  p++;
	  if (*p != '\0')
	    {
	      if (fn == nfields + 1)
		*p = '\0';
	      fn--;
	    }
	}
      return (fn);
    }

  /* n separators */
  fn = 0;
  for (;;)
    {
      if (fn < nfields)
	*fp++ = p;
      fn++;
      for (;;)
	{
	  c = *p++;
	  if (c == '\0')
	    return (fn);
	  sepp = sep;
	  while ((sepc = *sepp++) != '\0' && sepc != c)
	    continue;
	  if (sepc != '\0')	/* it was a separator */
	    break;
	}
      if (fn < nfields)
	*(p - 1) = '\0';
      for (;;)
	{
	  c = *p++;
	  sepp = sep;
	  while ((sepc = *sepp++) != '\0' && sepc != c)
	    continue;
	  if (sepc == '\0')	/* it wasn't a separator */
	    break;
	}
      p--;
    }

  /* not reached */
}

#ifdef TEST_SPLIT


/*
 * test program
 * pgm		runs regression
 * pgm sep	splits stdin lines by sep
 * pgm str sep	splits str by sep
 * pgm str sep n	splits str by sep n times
 */
int
main (argc, argv)
     int argc;
     char *argv[];
{
  char buf[512];
  register int n;
#	define	MNF	10
  char *fields[MNF];

  if (argc > 4)
    for (n = atoi (argv[3]); n > 0; n--)
      {
	(void) strcpy (buf, argv[1]);
      }
  else if (argc > 3)
    for (n = atoi (argv[3]); n > 0; n--)
      {
	(void) strcpy (buf, argv[1]);
	(void) split (buf, fields, MNF, argv[2]);
      }
  else if (argc > 2)
    dosplit (argv[1], argv[2]);
  else if (argc > 1)
    while (fgets (buf, sizeof (buf), stdin) != NULL)
      {
	buf[strlen (buf) - 1] = '\0';	/* stomp newline */
	dosplit (buf, argv[1]);
      }
  else
    regress ();

  exit (0);
}

dosplit (string, seps)
     char *string;
     char *seps;
{
#	define	NF	5
  char *fields[NF];
  register int nf;

  nf = split (string, fields, NF, seps);
  print (nf, fields);
}

print (nf, fields)
     int nf;
     char *fields[];
{
  register int fn;

  printf ("%d:\t", nf);
  for (fn = 0; fn < nf; fn++)
    printf ("\"%s\"%s", fields[fn], (fn + 1 < nf) ? ", " : "\n");
}

#define	RNF	5		/* some table entries know this */
struct
  {
    char *str;
    char *seps;
    int nf;
    char *fi[RNF];
  }

tests[] =
{
  "", " ", 0,
  {
    ""
  }
  ,
  " ", " ", 2,
  {
    "", ""
  }
  ,
  "x", " ", 1,
  {
    "x"
  }
  ,
  "xy", " ", 1,
  {
    "xy"
  }
  ,
  "x y", " ", 2,
  {
    "x", "y"
  }
  ,
  "abc def  g ", " ", 5,
  {
    "abc", "def", "", "g", ""
  }
  ,
  "  a bcd", " ", 4,
  {
    "", "", "a", "bcd"
  }
  ,
  "a b c d e f", " ", 6,
  {
    "a", "b", "c", "d", "e f"
  }
  ,
  " a b c d ", " ", 6,
  {
    "", "a", "b", "c", "d "
  }
  ,

  "", " _", 0,
  {
    ""
  }
  ,
  " ", " _", 2,
  {
    "", ""
  }
  ,
  "x", " _", 1,
  {
    "x"
  }
  ,
  "x y", " _", 2,
  {
    "x", "y"
  }
  ,
  "ab _ cd", " _", 2,
  {
    "ab", "cd"
  }
  ,
  " a_b	 c ", " _", 5,
  {
    "", "a", "b", "c", ""
  }
  ,
  "a b c_d e f", " _", 6,
  {
    "a", "b", "c", "d", "e f"
  }
  ,
  " a b c d ", " _", 6,
  {
    "", "a", "b", "c", "d "
  }
  ,

  "", " _~", 0,
  {
    ""
  }
  ,
  " ", " _~", 2,
  {
    "", ""
  }
  ,
  "x", " _~", 1,
  {
    "x"
  }
  ,
  "x y", " _~", 2,
  {
    "x", "y"
  }
  ,
  "ab _~ cd", " _~", 2,
  {
    "ab", "cd"
  }
  ,
  " a_b	 c~", " _~", 5,
  {
    "", "a", "b", "c", ""
  }
  ,
  "a b_c d~e f", " _~", 6,
  {
    "a", "b", "c", "d", "e f"
  }
  ,
  "~a b c d ", " _~", 6,
  {
    "", "a", "b", "c", "d "
  }
  ,

  "", " _~-", 0,
  {
    ""
  }
  ,
  " ", " _~-", 2,
  {
    "", ""
  }
  ,
  "x", " _~-", 1,
  {
    "x"
  }
  ,
  "x y", " _~-", 2,
  {
    "x", "y"
  }
  ,
  "ab _~- cd", " _~-", 2,
  {
    "ab", "cd"
  }
  ,
  " a_b	 c~", " _~-", 5,
  {
    "", "a", "b", "c", ""
  }
  ,
  "a b_c-d~e f", " _~-", 6,
  {
    "a", "b", "c", "d", "e f"
  }
  ,
  "~a-b c d ", " _~-", 6,
  {
    "", "a", "b", "c", "d "
  }
  ,

  "", "	 ", 0,
  {
    ""
  }
  ,
  " ", "  ", 2,
  {
    "", ""
  }
  ,
  "x", "  ", 1,
  {
    "x"
  }
  ,
  "xy", "  ", 1,
  {
    "xy"
  }
  ,
  "x y", "  ", 2,
  {
    "x", "y"
  }
  ,
  "abc def  g ", "  ", 4,
  {
    "abc", "def", "g", ""
  }
  ,
  "  a bcd", "	", 3,
  {
    "", "a", "bcd"
  }
  ,
  "a b c d e f", "  ", 6,
  {
    "a", "b", "c", "d", "e f"
  }
  ,
  " a b c d ", "  ", 6,
  {
    "", "a", "b", "c", "d "
  }
  ,

  "", "", 0,
  {
    ""
  }
  ,
  " ", "", 0,
  {
    ""
  }
  ,
  "x", "", 1,
  {
    "x"
  }
  ,
  "xy", "", 1,
  {
    "xy"
  }
  ,
  "x y", "", 2,
  {
    "x", "y"
  }
  ,
  "abc def  g ", "", 3,
  {
    "abc", "def", "g"
  }
  ,
  "\t a bcd", "", 2,
  {
    "a", "bcd"
  }
  ,
  "  a \tb\t c ", "", 3,
  {
    "a", "b", "c"
  }
  ,
  "a b\tc d e f", "", 6,
  {
    "a", "b", "c", "d", "e f"
  }
  ,
  " a b c d e f ", "", 6,
  {
    "a", "b", "c", "d", "e f "
  }
  ,

  NULL, NULL, 0,
  {
    NULL
  }
  ,
};

regress ()
{
  char buf[512];
  register int n;
  char *fields[RNF + 1];
  register int nf;
  register int i;
  register int printit;
  register char *f;

  for (n = 0; tests[n].str != NULL; n++)
    {
      (void) strcpy (buf, tests[n].str);
      fields[RNF] = NULL;
      nf = split (buf, fields, RNF, tests[n].seps);
      printit = 0;
      if (nf != tests[n].nf)
	{
	  printf ("split `%s' by `%s' gave %d fields, not %d\n",
		  tests[n].str, tests[n].seps, nf, tests[n].nf);
	  printit = 1;
	}
      else if (fields[RNF] != NULL)
	{
	  printf ("split() went beyond array end\n");
	  printit = 1;
	}
      else
	{
	  for (i = 0; i < nf && i < RNF; i++)
	    {
	      f = fields[i];
	      if (f == NULL)
		f = "(NULL)";
	      if (strcmp (f, tests[n].fi[i]) != 0)
		{
		  printf ("split `%s' by `%s', field %d is `%s', not `%s'\n",
			  tests[n].str, tests[n].seps,
			  i, fields[i], tests[n].fi[i]);
		  printit = 1;
		}
	    }
	}
      if (printit)
	print (nf, fields);
    }
}

#endif
