/*
 *	Program name:  slice
 *
 *	Copyright (c) 1987 by Gary Puckering
 *
 *	This program may be freely used and/or modified, 
 *	with the following provisions:
 *	1. This notice and the above copyright notice must remain intact.
 *	2. Neither this program, nor any modification of it,
 *	   may be sold for profit without written consent of the author.
 *
 *	-----------------------------------------------------------------
 *
 *	This program allows you to cut a file into pieces, either at every
 *  n lines (like fsplit) or based on a pattern match.  Slices are sent
 *  to output files, which are named by providing a format string.  A
 *  file name may be a constant, or may contain substitution parameters,
 *  such as #f for the input file name or #1, #2, ... #9 for tokens
 *  1 through 9 on the line matching the pattern.
 *
 *	-----------------------------------------------------------------
 */

char version[] = "@(#)slice.c	2.4";

#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <sys/file.h>

#include "opts.h"				/* defines nextstr() etc */

bool exclude = FALSE;			/* exclude matched line from o/p files */
bool split_after = FALSE;		/* split after matched line */
bool m_flag = FALSE;			/* was -m option used */

FILE *output = (FILE *) NULL;	/* fd of current output file */
FILE *rejectfd = (FILE *) NULL;	/* fd of reject file */

int  n_format;					/* number of format strings */
int  filenumber = 0;			/* #n substitution for each file */
int  every_n_lines = 0;			/* split every n lines */
int rejectcnt = 0;				/* count of rejected lines */

char inbuffer[BUFLEN];			/* input buffer */
char *progname = "slice";		/* for error messages */
char *pattern = (char *) NULL;	/* reg expr used to split file */
char **format;					/* ptr for format strings */
char *defaultfmt[] = {DFLTOUTNAME};	/* default format string */
char *mboxformat[] = {MBOXFORMAT};	/* default format for mailboxes */
char parmbuf[BUFLEN];			/* parameter buffer */
char *parm[MAXPARM+1];			/* array of pointers to parms */
char nullstring[1] = {""};		/* a null string */
char *infile = (char *) NULL;	/* input file name */
char rejectfile[MAXFILENAMELEN+2] = {DFLTREJECT}; /* reject file name */

/* forward declarations */
FILE * openfile();
FILE * mkreject();
char * mkname();
char * rmpath();
char   getfmt();


main(argc, argv)
	char *argv[];
{
	/* split files at points that match a given pattern */

	/* initialise things */
	char *buffer;
	int i;
	int getnum();		/* does more checking than atoi */
	char *rmpath();    /* removes leading pathname from a filename */

	for (i=0; i<=MAXPARM; i++) parm[i] = nullstring;

	/* now remove possible leading pathname
	 * (e.g. /usr/bin/slice is to report it's errors as slice
	*/
	progname = rmpath(argv[0]);


	while (--argc) {
	  if (**++argv == '-') {
		switch(*++*argv) {
			case 'a': {				/* split after pattern */
				split_after = TRUE;
				break;
			}
			case 'e': {				/* pattern (expression) */
				++argv; argc--;
				if (argc==0 || !**argv) {
					error("Pattern after -e missing or null\n");
					usage(1);
				}
				pattern = *argv;
				break;
			}
			case 'm': {				/* mailbox pattern */
				pattern = "^From ";
				m_flag = TRUE;
				break; 
			}
			case 's': {				/* shell pattern */
				pattern = "^#! *\/bin\/sh";
				break; 
			}
			case 'n': {				/* -n n_lines -- split every n lines */
				nextstr(buffer,argc,argv,usage(2));
				every_n_lines = getnum(buffer);
				if (every_n_lines <= 0) {
					error("-n: number must be at least 1\n");
					exit(EXIT_SYNTAX);
				}
				break;
			} 
			case 'f': {
				++argv; argc--;
				if (argc==0 || !**argv) {
					error("Filename after -f missing or null\n");
					usage(1);
				}
				infile = *argv;
				break;
			}				
			case 'r': {
				++argv; argc--;
				if (argc==0 ||!**argv) {
					error("Filename after -r missing or null\n");
					usage(1);
				}
				strcpy(rejectfile,*argv);
				break;
			}
		    case 'i': {	/* -i initial_number */
				nextstr(buffer,argc,argv,usage(2));
				filenumber = getnum(buffer);
				if (filenumber < 0) {
			    	error("-i must be followed by a positive number\n");
				    exit(EXIT_SYNTAX);
				 }
				filenumber--;	/* needs to be one less to start with */
				break;
		    }
			case 'x': { /* exclude matched lines */
				exclude = TRUE;
				break;
			}
		    default: {
				error("Unknown flag -%c\n", **argv);
				usage(1);
		    }
		}			/* end switch */
	  } else {	
		if (!pattern) pattern = *argv;	/* first non-flag is pattern */
		else break;						/* break while loop */
	  }			/* end if */
     }		/* end while */

	 if (!argc) {
		if (m_flag) {
			format = mboxformat;
		} else {
			format = defaultfmt;
		}
		n_format = 1; 
	 } else {
		format = argv;
		n_format = argc;
	 }

#ifdef DEBUG
	printf("argc=%d\n",argc);
	printf("format='%s'\n",*format);
	printf("pattern='%s'\n",pattern);
#endif

	 if (!infile) split(stdin, DFLTNAME, pattern);
	 else        fsplit(infile, pattern);

     exit(0);
}


/* split a file that hasn't been opened yet */

fsplit(name, pat)
     char *name;
     char *pat;
{
     FILE *fd;

     if (!name || !*name) {
	  error("Can't split a file with an empty name\n");
	  usage(2);
     }

     if ( (fd = fopen(name, "r")) == NULL) {
	  error("Can't open %s\n", name);
	  return;
     }

     (void) split(fd, name, pat);

     if (fclose(fd) == EOF) {	/* something's gone wrong */
	  error("Can't close %s -- giving up\n", name);
	  exit(EXIT_RUNERR);
     }
}


/* Split a file that's already been opened */

split(input, name, pattern)
     FILE *input;		/* fd of input file */
     char *name;		/* input filename */
     char *pattern;		/* pattern used to split file */
{

#ifndef USG
     extern char *re_comp();     /* compile string into automaton */
     extern int   re_exec();	/* try to match string */
     int reg_status = 0;	/* regular expression status */
     char *errmessage;
#define REMATCH 1
#define RENOMATCH 0
#define REFAULT -1
#define match(expr) ((expr) == REMATCH)
#else
     extern char *regcmp();	/* compile string into automaton */
     extern char *regex();	/* match string with automaton */
     char *reg_status;		/* regular expression status */
     char *rex;
#define match(expr) ((expr) != NULL)
#endif

	 char *fname;
     int line = 0;

	 rejectcnt = 0;

	 if (split_after && exclude) {
	  error("Can't specify both -a and -x\n");
	  usage(2);
	 }

	 if (every_n_lines && exclude) {
	  error("Can't specify both -n and -x\n");
	  usage(2);
	 }

	 if (every_n_lines && split_after) {
	  error("Can't specify both -n and -a\n");
	  usage(2);
	 }

	 if (every_n_lines && pattern) {
	  error("Can't specify both -n and pattern\n");
	  usage(2);
	 }

     if (!every_n_lines && (!pattern || !*pattern)) {
	  error("Can't match an empty pattern\n");
	  usage(2);
     }

#ifndef USG
     if (!every_n_lines && (errmessage = re_comp(pattern)) != NULL) {
	  error("Error in pattern <%s>: %s\n", pattern, errmessage);
	  exit(EXIT_RUNERR);
     }
     /* errmessage is NULL here */
#else
     if (!every_n_lines && (rex = regcmp(pattern,(char *)0)) == NULL) {
	  error("Erron in pattern <%s>\n", pattern);
	  exit(EXIT_RUNERR);
     }
     /* rex is pointer to compiled expression.... */
#endif

	/* if split after mode, open file at start */
	if (split_after) {
		fname = mkname(name);
		output = openfile(fname);
}
	
     /* the -2 to fgets is because of the null and \n appended */
     while (fgets(inbuffer, BUFLEN - 2, input) != NULL) {

	  if ((every_n_lines > 0 && (++line == every_n_lines)) || 	/* nth line */
	     (!every_n_lines &&
#ifndef USG
	     ( match(reg_status = re_exec(inbuffer) ) ) ) ) { 		/* matches pat */
#else
	     ( match(reg_status = regex(rex, inbuffer) ) ) ) ) { 		/* matches pat */
#endif

			if (split_after) putbuff(inbuffer,output);

			if (!every_n_lines) get_parms(inbuffer);
			
			/* close the current file */
			if (output && output != stderr && 
						  output != stdout &&
						  output != rejectfd) {
				if (fclose(output) == EOF) {
					error("Can't close output file\n");
					exit(EXIT_RUNERR);
				}
			}

			fname = mkname(name);

			if (*fname) {
				/* open a new file */
				output = openfile(fname);
			} else {
				/* no filename to open, so use reject file */
				error("Insufficient formats -- remainder rejected\n");
				output = (FILE *) NULL;
			}

			line = 0;  /* reset input line count */

			/* if matched lines are excluded, skip the putbuff */
			if (exclude && match(reg_status)) continue;

			/* if file is to be split after pattern, put already done */
			if (split_after && match(reg_status)) continue;
#ifndef USG
	  } else {
	  		if (reg_status == REFAULT) {	/* the re_exec failed */
				error("Internal error trying to match <%s> to <%s>\n",pattern, inbuffer);
				exit(EXIT_INTERN);
			}
#endif
	  }  /* end match pattern test  */

	  putbuff(inbuffer, output);		/* now put line out */

      }  /* end while */

	  if (rejectcnt && strcmp(rejectfile,"/dev/null")==0) {
	  	error("%d lines rejected to /dev/null\n",rejectcnt);
	  }
	  
      return (filenumber == -1);	/* exit status for main */
}


/* Make a reject file */

FILE *
mkreject()
{
	if (rejectfd) return(rejectfd);	/* if there's already one, don't bother */

	if ( strcmp(rejectfile,"stderr")==0 ) 
			rejectfd = stderr;
	else	rejectfd = openfile(rejectfile);

	if (!rejectfd) {
		error("Cannot open reject file %s\n",rejectfile);
		exit(EXIT_RUNERR);
	}

	return (rejectfd);
}


/* Open an output file (or the reject file) */

/* If filename starts with '+' open for append */
/* If filename is '@' use /dev/null */

FILE *
openfile(fname)
	char *fname;				/* file to be opened */
{
	FILE *fd = (FILE *) NULL;
	bool exists = FALSE;

	switch (fname[0]) {
		case '+': {
			fname++;
			/* check for output file = input file */
			if (infile && (strcmp(fname,infile)==0) ) {
				error("Output file %s same as input file -- slice rejected\n",fname);
				break;
			}
			if (fname[0]==NULL) {
				fd = stdout;
				break;
			} else {
				if ((fd = fopen(fname, "a")) == NULL) {
					error("Can't open output file %s for append -- slice rejected\n", fname[1]);
					break;
				}
			}
			break;
		}
		case '@': {
			fname++;
			if (fname[0]==NULL) {
				if ((fd = fopen("/dev/null", "w")) == NULL) {
					error("Can't open output file /dev/null -- slice rejected\n");
					break;
				}
				break;
			} else {
				fname--;
				/* fall through to process as normal filename */
			}
		}
		default: {
			if (access(fname,F_OK)==0) exists = TRUE;
			if ((fd = fopen(fname, "w")) == NULL) {
				error("Can't open output file %s for write -- slice rejected\n", fname);
				break;
			}
			if (exists && strcmp(fname,"/dev/null")!=0) {
				error("File %s overwritten\n",fname);
			}
		}
	}
	return (fd);
}


/* Make a new file name using a format string */

char *
mkname(name)
	char *name;		/* file name for #f substitution */
{
	static char fnambuf[MAXFILENAMELEN + 2]; /* +1 for null, +1 for overflow */
	static char lastname[MAXFILENAMELEN + 2];

	int i, j;
	char *p, *q, *fn;
	char fmtcode;
	char fmt[MAXFILENAMELEN];
	char tempbuf[MAXFILENAMELEN];

  do_format:
	for (p=(*format), q=fnambuf; *p; p++) {
		if (*p != PARMESCAPE) { 
			*q = *p; 
			q++;
		} else {
			*q = NULL;
			switch (*++p) {
				case PARMESCAPE: {
					*q = PARMESCAPE;
					q++;
					break;
				}
				case 'f': {
					fn = rmpath(name);
					strcat(q,fn);
					q += strlen(fn);
					break;
				}
				case 'n': {
					++filenumber;
					if (*(p+1) == '%') {
						p++;
						fmtcode = getfmt(fmt,p);
						p += strlen(fmt) - 1;
						sprintf(tempbuf,fmt,filenumber);
					} else {
						sprintf(tempbuf,"%d",filenumber);
					}
					strcat(q,tempbuf);
					q += strlen(tempbuf);
					break;
				}
				case '1':
				case '2':
				case '3':
				case '4':
				case '5':
				case '6':
				case '7':
				case '8':
				case '9': 
				case '$': {
					if (*p == '$') {
						i = lastparm();
					} else {
						i = (*p) - '1';
					}
					if (*(p+1) == '%') {
						p++;
						fmtcode = getfmt(fmt,p);
						p += strlen(fmt) - 1;
						if (fmtcode != 's') {
							if (fmtcode == 'm') {
								j = mtoi(parm[i]);
							} else {
								j = atoi(parm[i]);
							}
							sprintf(tempbuf,fmt,j);
						} else {
							sprintf(tempbuf,fmt,parm[i]);
						}
					} else {
						strcpy(tempbuf,parm[i]);
					}
					strcat(q,tempbuf);
					q += strlen(tempbuf);
					break;
				}
				default: {
					error("Invalid substitution #%c in format '%s'\n",*p,*format);
					exit(EXIT_RUNERR);
				}
			}	/* end switch */
		}	/* end if-else */
	}	/* end for */
	
	*q = NULL;

	/* if name is same, try next format */
	if (strcmp(fnambuf,lastname)==0) {
		if (n_format>1) {	/* must be a format left to try */
			format++;
			--n_format;
			filenumber=0;
			lastname[0] = NULL;
			goto do_format;
		} else {			/* we've run out of formats */
			fnambuf[0] = NULL;			
		}
	}

	if (fnambuf[0]) strcpy(lastname,fnambuf);

	return(fnambuf);

} /* end routine */



/* Get a printf-style format string from within an output format */

char
getfmt(fmt,p)		/* returns last character of string (format code) */
	char *fmt;		/* target of format string */
	char *p;		/* p should point to the '%' starting the format */
{
	char *q;
	char fmtcode;

	if (*p != '%') {
		error("Internal error -- getfmt called when not pointing at %");
		exit(EXIT_RUNERR);
	}
	q = strpbrk(p,"mduxhocsfeg");  /* 'm' is a special extension */
	
	if (!q) {
		error("Can't find end of format spec");
		exit(EXIT_RUNERR);
	}

	fmtcode = *q;
	strncpy(fmt,p,(q-p+1));

	switch (*q) {
		case 'h':
		case 'c':
		case 'f':
		case 'e':
		case 'g': {
			error("Format '%s' is not supported\n",fmt);
			exit(EXIT_RUNERR);
		}
		case 'm': {
			fmt[strlen(fmt) - 1] = 'd';	/* change to d */
		}
	}

	return(fmtcode);
}



/* Write an input line to the output file */
/* If the output file isn't open, write it to the reject file */

putbuff(buffer,fd)
	char *buffer;
	FILE *fd;
{
	if (fd) fputs(buffer,fd);
	else {
		rejectcnt++;
		fputs( buffer,mkreject() );
	}
}



/* getnum(s) returns the value of the unsigned int in s.  If there's any
 * trailing garbage, or the number isn't +ve, we return -1
 */

getnum(s)
     char *s;
{
     register char *p;

     for (p = s; *p; p++) {
	  if (!isdigit(*p)) {
	       return -1;
	  }
     }
     return atoi(s);
}



/* Remove the leading pathname from a filename */

char *
rmpath(fullname)
    char *fullname;
{
    register char *p;
    char *q = (char *) NULL;

    for (p = fullname; p && *p; p++) {
         if (*p == '/')
  	    q = ++p;
    }
    if (q && *q) {
         return(q);
    }
    return(fullname);
}



/* Get tokens (parameters) from matched input lines */

get_parms(buffer)
	char *buffer;
{
	int  i,l;

	strcpy(parmbuf,buffer);
	l = strlen(parmbuf);
	if(parmbuf[l-1]=='\n') parmbuf[l-1]=NULL;

	parm[0]=strtok(parmbuf," ");

	for (i=1; i<=MAXPARM; i++) {
		parm[i]=strtok(NULL," ");
	}

	for (i=0; i<=MAXPARM; i++) {
		if (!parm[i]) parm[i] = nullstring;
	}
}



/* Find last non-null parameter */

lastparm()
{
	int i;

	for (i=MAXPARM; i>0; i--) {
		if (parm[i] && *parm[i]) return(i);
	}
	return(0);
}



/* Convert three character month name to integer */

mtoi(monthname)
	char *monthname;
{
	static char *months[] = { 
		"Jan", "Feb", "Mar", "Apr", "May", "Jun",
		"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
		};

	int i;

	for (i=1; i<=12; i++) {
		if (strcmp(monthname,months[i-1])==0) return(i);
	}

	error("Invalid month '%s' found -- zero used",monthname);
	i=0;
	return(i);
}


error(fmt, a1, a2, a3, a4)
     char *fmt;
{
     fputs(progname, stderr);
     fputs(": ", stderr);
     fprintf(stderr, fmt, a1, a2, a3, a4);
}


usage(status)
     int status;	/* exit if status != 0 */
{
     fprintf(stderr,"Usage: %s [-f filename] [-a|-x] [-i<n>] [-w] [-m|-s|-n<n>] [-r file] [-e expression | expression] [format...]\n", progname);
     if (status)
	  exit(status);
}

