/*
	punc: print punctuation graphs for sentences
	by Gary Perlman & Tom Erickson
	A punctuation graph places a sentence on one line
	with words replaced by underscores and punctuation maintained verbatim.

	A sentence must end at the end of input lines.
	Lines beginning with . are ignored (nroff control lines).
*/
#include <stdio.h>
#include <ctype.h>

typedef	int       Status;      /* return/exit status of functions */
#define	SUCCESS   ((Status) 0)
#define	FAILURE   ((Status) 1)
typedef	int       Boole;       /* no Boolean type in C */
#define	TRUE      ((Boole) 1)
#define	FALSE     ((Boole) 0)

/* macros to identify the end of a sentence */
#define	Maybefinisher(c) ((c)=='\'' || (c)==')' || (c)=='"')
#define	Finisher(c) ((c)=='!' || (c)=='?' || (c)=='.')

#define	WORDCHAR '_'  /* display words like this */
#define LONGWORD '*'  /* display long words like this */
#define	CAPCHAR  '^'  /* display capitalized words like this */

#define ANDCHAR  '&'  /* display conjunctions like this */
#define ORCHAR   '|'  /* display disjunctions like this */
#define	NUMCHAR  '#'  /* display numbers like this */
#define	NOTCHAR  '~'  /* display negations like this */
#define	PRONOUN  '"'  /* display pronouns like this */
#define WHCHAR   'w'  /* who, what where when why, ... */
#define THCHAR   't'  /* the, that, those, ... */

char	*Andwords[] = {"and", "but", NULL};
char	*Orwords[] = {"or", "nor", NULL};
char	*Notwords[] = {"no", "not", "never", NULL};
char	*Numwords[] = {"one", "two", "three", "four", "five",
						"six", "seven", "eight", "nine", "ten",
						"eleven", "twelve", "thirteen", "fourteen",
						"fifteen", "sixteen", "seventeen", "eighteen",
						"nineteen", "twenty", "thirty", "forty",
						"fifty", "sixty", "seventy", "eighty", "ninety",
						"hundred", "thousand", "million", "billion",
						"trillion", "zero", NULL};
char	*Whwords[] = {"who", "what", "where", "when", "why", "which", "whence",
						"how", "while", NULL};
char	*Thwords[] = {"the", "these", "those", "a", "that", NULL};
char	*Pronouns[] = {"he", "she", "they", "them", "i", "me", "my",
						"his", "her", "their", "myself", "themselves", NULL};

int 	Linecount;          /* number of lines read */
int 	Sentcount;          /* number of sentences read */
Boole	Countsent = FALSE;  /* should sentence numbers be printed? */
Boole	Countline = FALSE;  /* should sentence line numbers be printed? */
Boole	Printsent = FALSE;  /* should sentences be printed? */
Boole	Printlength = FALSE;/* should words be printed as lengths? */
int 	Criterion = 0;      /* only print punc lines longer than this */
char	*Argv0;             /* program name */
Boole	Mapwords = FALSE;   /* should words be mapped to class type? */

/* horribly simple, and not terribly robust sentence reader */
char *
getsentence (ioptr)
FILE	*ioptr;
	{
	static	char	sentence[BUFSIZ];
	char	line[BUFSIZ];
	char	*ptr;
	int 	len;
	int 	sentlen;

	*sentence = '\0';
	sentlen = 0;

	for (;;)
		{
		if (fgets (ptr = line, BUFSIZ, ioptr) == NULL)
			return (NULL);
		Linecount++;
		if (*line == '.')
			continue;
		len = strlen (line);
		while (isspace (*ptr))
			ptr++;
		if (*ptr == '\0')
			continue;
		if (sentlen + len >= BUFSIZ)
			{
			fprintf (stderr, "punc: sentence too long near line %d\n", Linecount);
			return (NULL);
			}
		strcat (sentence, line);
		sentlen += len;
		if (Finisher (line[len-2]))
			break;
		if (Maybefinisher (line[len-2]) && Finisher (line[len-3]))
			break;
		}
	return (sentence);
	}


char *
puncsentence (sent)
char	*sent;
	{
	static	char	sbuf[BUFSIZ];
	char	*sptr;

	if (sent == NULL || *sent == '\0')
		return (NULL);
	sptr = sbuf;

	while (*sent)
		{
		while (isspace (*sent))
			sent++;
		if (*sent == '\0')
			break;
		if (isalnum (*sent)) /* copy word into buffer */
			{
			char	buf[100], *ptr = sent;
			int 	length;
			int 	wordchar;
			while (isalnum (*sent))
				sent++;
			length = sent-ptr;
			strncpy (buf, ptr, length);
			buf[length] = '\0';
			wordchar = WORDCHAR;

			if (Printlength == TRUE)
				{
				if (length > 10) /* two digit word length, too long! */
					wordchar = LONGWORD;
				else if (length == 10)
					wordchar = '0';
				else wordchar = '0' + length;
				}

			if (Mapwords == TRUE)
				{
				int	ucase = lcase (buf);
				if (check (buf, Andwords))
					*sptr++ = ANDCHAR;
				else if (check (buf, Orwords))
					*sptr++ = ORCHAR;
				else if (check (buf, Pronouns))
					*sptr++ = ucase ? (toupper (PRONOUN)) : PRONOUN;
				else if (check (buf, Thwords))
					*sptr++ = ucase ? (toupper (THCHAR)) : THCHAR;
				else if (check (buf, Whwords))
					*sptr++ = ucase ? (toupper (WHCHAR)) : WHCHAR;
				else if (check (buf, Notwords))
					*sptr++ = NOTCHAR;
				else if (isnum (buf) || check (buf, Numwords))
					*sptr++ = NUMCHAR;
				else
					*sptr++ = ucase ? CAPCHAR : wordchar;
				}
			else *sptr++ = wordchar;
			}
		else if (ispunct (*sent))
			*sptr++ = *sent++;
		else sent++;
		}
	*sptr = '\0';
	return ((strlen (sbuf) >= Criterion) ? sbuf : NULL);
	}

isnum (s)
char	*s;
	{
	while (*s)
		if (!isdigit (*s++)) return (0);
	return (1);
	}

/* returns 1 if input s is upper case, maps s to lower case */
lcase (s)
char	*s;
	{
	char	*ptr = s;
	int 	ucase = isupper (*s);
	while (*ptr)
		{
		if (isupper (*ptr))
			*ptr = tolower (*ptr);
		ptr++;
		}
	return (ucase);
	}

check (target, list)
char	*target;
char	**list;
	{
	while (*list)
		{
		if ((target[0] == list[0][0]) && !strcmp (&target[1], &list[0][1]))
			return (1);
		list++;
		}
	return (0);
	}

initial (argc,argv)
char	**argv;
	{
	int 	C;
	int 	errflg = 0;
	extern	int optind;
	extern	char *optarg;
	char	*optstring = "lmpswc:";
	char	*usage = "[-lmpsw] [-c criterion] [-] [files]";
	Argv0 = argv[0];
	while ((C = getopt (argc, argv, optstring)) != EOF)
		switch (C)
			{
			case 'c': Criterion = atoi (optarg); break;
			case 'l': Countline = TRUE; break;
			case 'm': Mapwords = TRUE; break;
			case 'p': Printsent = TRUE; break;
			case 's': Countsent = TRUE; break;
			case 'w': Printlength = TRUE; break;
			default: errflg++;
			}
	if (errflg)
		{
		fprintf (stderr, "Usage: %s %s\n", argv[0], usage);
		exit (1);
		}
	return (optind);
	}

Status
punc (file, ioptr)
char	*file;
FILE	*ioptr;
	{
	char	*sptr;
	char	*pptr;
	int 	linecount;

	Sentcount = 0;
	Linecount = 0;
	linecount = 1;

	while (sptr = getsentence (ioptr))
		{
		Sentcount++;
		if (pptr = puncsentence (sptr))
			{
			if (Countsent == TRUE)
				printf ("%4d\t", Sentcount);
			if (Countline == TRUE)
				printf ("%4d\t", linecount);
			puts (pptr);
			if (Printsent == TRUE)
				fputs (sptr, stdout);
			}
		linecount = Linecount+1;  /* point to start of next sentence */
		}
	return (SUCCESS);
	}

/*FUNCTION main: loop through files in classic UNIX filter style */
main (argc, argv)
int 	argc;     /* argument count */
char	**argv;   /* argument vector */
	{
	Status 	punc ();      /* punc (file, ioptr) will filter files */
	Status	status;       /* return status of filter () */
	int 	firstfile;    /* first file name index returned by initial */
	firstfile = initial (argc, argv);
	status = filter (argc, argv, firstfile, punc);
	exit (status);
	}
