Date: Wed, 23 Apr 86 20:03:28 EST
From: Edward_Vielmetti%UMich-MTS.Mailnet@MIT-MULTICS.ARPA
To: info-ibmpc@USC-ISIB.ARPA
Subject: Breakup.C

/*** BREAKUP.  Break up a (presumably large) file into smaller pieces,
/    about a set of breakpoints.  Invoked as:
/       BREAKUP  BigFile.Ext  -C1 A1  -C2 A2  -C3 A2   etc...
/    where the arguments (breakpoints) are:
/       -B  nnn    break after next nnn bytes
/       -L  nnn     "     "     "    "  lines
/       -S  str    break after next occurrence of "str"
/       -LB nnn    break after newline after next nnn bytes
/       -LS str    break after newline after next occurrence of "str"
/       -R         repeat last breakpoint until eof on BigFile
/
/    Written by Charles Roth, December 1983.  This program is in the public
/    domain. */

#include <stdio.h>
#define  DASH_B  0
#define  DASH_S  1
#define  DASH_L  2
#define  DASH_LB 3
#define  DASH_LS 4
#define  NONE    5
#define  FUNCTION
#define  NOT       !

FUNCTION  main (argc, argv)
   int  argc;   char *argv[];
{
   FILE *in, *out, *fopen();
   int   outnum, c, breaktest, next, ringlen, ringpos, i, r, strfound;
   long  breaknumb, count;
   char  fname[80], fext[80], outfile[80], breakstr[80], ringbuf[80];


   /*** Mark the end of the argument list so PARSE knows when to stop. */
   argv[argc] = NULL;

   if (argc <= 1) {
      printf ("%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n",
      "Usage:  BREAKUP  File.Ext  -C1 A1  -C2 A2  -C3 A3  etc...",
      "  where each -Cn An means break after...",
      "    -B   nnn    next nnn bytes",
      "    -L   nnn    next nnn lines",
      "    -S   str    next occurrence of \"str\"",
      "    -LB  nnn    end-of-line after next nnn bytes",
      "    -LS  str    end-of-line after next occurrence of \"str\"",
      "    -R          repeat last breakpoint indefinitely.");
      exit(1);
   }

   /*** Make sure the input file exists, and open it.*/
   if ( (in = fopen (argv[1], "r")) == NULL) {
      printf ("No such file %s\n", argv[1]);
      exit(1);
   }
   xstrbreak (argv[1], fname, fext, ".");

   /*** Prepare for main character-by-character loop.  NEXT means "close
   /    file and process next breakpoint."   STRFOUND is used by the -LS
   /    option to remember if the string was found somewhere in the line. */
   next   = 1;
   out    = NULL;
   outnum = 0;
   strfound = 0;

   while ( (c = getc (in)) >= 0) {

      if (next) {
         /*** Reset the counts, close the old file, open the new one. */
         next  = 0;
         count = 0;
         if (out != NULL)  {fclose (out);    printf  ("%s\n", outfile);}
         sprintf (outfile, "%s.%03d", fname, outnum++);
         out = fopen (outfile, "w");

         /*** Parse the next breakpoint and return its type. */
         breaktest = parse (argv, breakstr, &breaknumb);

         /*** Initialize the ring buffer for testing -s, -ls strings */
         if (breaktest==DASH_S  ||  breaktest==DASH_LS) {
            ringlen = strlen (breakstr);
            ringpos = 0;
         }
      }

      putc (c, out);

      /*** For each possible type of breakpoint, test the type and see if
      /    the appropriate condition has happened to break off a new piece. */
      if (breaktest==DASH_B   &&               ++count >= breaknumb)  next = 1;
      if (breaktest==DASH_L   &&  c=='\n'  &&  ++count >= breaknumb)  next = 1;
      if (breaktest==DASH_LB  &&  ++count >= breaknumb  &&  c=='\n')  next = 1;
      if (breaktest==DASH_S  ||  breaktest==DASH_LS) {
         /*** The "str" test is the most difficult.  Keep a ring buffer of
         /    the characters encountered so far, size equal to the size of
         /    the break string.  Each time around the main character loop,
         /    add the new char to the end of the ring buffer, and compare
         /    the ring buffer against the break string.  */
         ringbuf[ringpos] = c;
         ringpos = (ringpos + 1) % ringlen;
         for (i=0, r=ringpos;   breakstr[i];   ++i, r = (r+1) % ringlen)
            if (breakstr[i] != ringbuf[r])  break;
         if (breaktest==DASH_S)    next     = NOT breakstr[i];
         if (breaktest==DASH_LS)   strfound = NOT breakstr[i]  ||  strfound;
      }

      if (breaktest==DASH_LS  &&  c=='\n') {
         next     = strfound;
         strfound = 0;
      }

   }
   fclose (out);
   printf ("%s\n", outfile);
}


/*** PARSE breakpoint commands.  On each call, returns 'next' breakpoint
/    type, sets BREAKNUMB to 'nnn' part of -l, -b, -lb breakpoints, and
/    BREAKSTR to string part of -s, -ls breakpoint. */

FUNCTION  parse (argv, breakstr, breaknumb)
   char  breakstr[], *argv[];
   long  *breaknumb;
{
   static int a = 1;
   static char realstr[2] = {0, 0};
   char        comstr[80], octalstr[10];
   int         type, p, octalval;

   if (argv[++a] == NULL)  {--a;   return(NONE);}

   xstrlower  (argv[a]);
   if (xstreq (argv[a], "-r"))  a = a-2;

   if      (xstreq (argv[a], "-l"))   type = DASH_L;
   else if (xstreq (argv[a], "-b"))   type = DASH_B;
   else if (xstreq (argv[a], "-s"))   type = DASH_S;
   else if (xstreq (argv[a], "-lb"))  type = DASH_LB;
   else if (xstreq (argv[a], "-ls"))  type = DASH_LS;

   ++a;
   if (type==DASH_S  ||  type==DASH_LS) {
      /*** Convert the various \ escape sequences to their proper form. */
      strcpy (breakstr, argv[a]);
      while  (xstralter (breakstr, "\\\"", "\"")) ;
      while  (xstralter (breakstr, "\\n",  "\n")) ;
      while  (xstralter (breakstr, "\\\\", "\\")) ;

      /*** Convert \ddd octal strings to the actual characters. */
      for (p=0;  breakstr[p];  ++p) {
         if (breakstr[p] == '\\') {
            xstrsub (octalstr, breakstr, p, 4);
            if (NOT sscanf  (octalstr, "\\%3o", &octalval))  continue;
            realstr[0] = octalval;
            xstralter (breakstr, octalstr, realstr);
         }
      }
   }

   if (type==DASH_B  ||  type==DASH_LB  ||  type==DASH_L)
      sscanf (argv[a], "%ld", breaknumb);

   return(type);
}
