/*
 * scpp.c - main processing for the selective C preprocessor, scpp.
 *
 * Copyright (c) 1985 by
 * Tektronix, Incorporated Beaverton, Oregon 97077
 * All rights reserved.
 *
 * Permission is hereby granted for personal, non-commercial
 * reproduction and use of this program, provided that this
 * notice and all copyright notices are included in any copy.
 */

#define VARS
# include <stdio.h>
# include "scpp.h"
# include "y.tab.h"

/*
 * actual[] - the array of actual parameters of the macro currently being
 *  interpreted.
 */

struct anactual {
	char *aa_val;	/*
			 * the value of this actual (a pointer to the null-
			 * terminator.  see amacro.am_val in scpp.h).
			 */
	char *aa_mem;	/*
			 * points to the beginning of the aa_val string.
			 * Used to later free the value's memory.
			 */
};
#define ACTSIZ MAXPARMS
struct anactual actual[ACTSIZ];
struct anactual *actp;	/* the next available slot in actual[] */



main(argc, argv)
int argc;
char **argv;
{
	int tok;	/* current token's value	*/
	char *cp;
	char *ep;
	char **dp;	/* where within dirlist to put the next directory */
	struct amacro *np;
	char *name;	/* name of the current macro	*/
	char *val;	/* value of the current macro	*/
	char *defmagic = "defined";	/* name of the 'defined()' macro */
	struct amacro *magmac;	/* (temp) slot for the magic macro */

	/*
	 * init all the global structures
	 */

	nxtout = &pend[0];
	curfile = &filestk[-1];
	nxtin = &istk[ISTKSIZ];
	curif = &ifstk[-1];
	
	nxtfile = &catlist[0];
	dp = &dirlist[0];

	/*
	 * setup the keyword symbols and the special macro, 'defined()'.
	 */

	ikeywords();
	magmac = findmac(defmagic, defmagic + strlen(defmagic));
	if (magmac->am_name) {
		bomb("INTERNAL: 'defined()' macro slot in use");
	}
	magmac->am_name = defmagic;
	magmac->am_npar = 1;
	magmac->am_val = &magicval;

	while (++argv, --argc > 0) {
		cp = *argv;
		if (*cp == '-' && *(cp + 1) != '\0') {
			switch(*++cp) {
			case 'C':
				savcom = TRUE;
				break;
			case 'I':
				*dp++ = cp + 1;
				break;
			case 'M':
				/*
				 * for each name in the list of whitespace-
				 *  separated macro names,
				 * Setup a slot for that macro, but leave it
				 *  undefined.
				 */

				while (*cp) {
					while (*++cp == ' ' || *cp == '\t' ||
					    *cp == '\n')
						;
					if (*cp == '\0') {
						break;
					}
					for (name = cp; *cp != '\0' &&
					  *cp != ' ' && *cp != '\t' &&
					  *cp != '\n'; ++cp)
						;

					np = findmac(name, cp);
					if (np->am_name == (char *) 0) {
					    np->am_name = savtok(name, cp);
						np->am_npar = -1;
					}
					/* am_val is left as zero */
				}
				break;
			case 'D':
				for (name = ++cp; *cp != '\0' && *cp != '=';
				  ++cp)
					;
				if (name == cp) {
					warn("missing macro name in `%s'",
					  name - 2);
					break;
				}

				if (*cp == '\0') {
					/*
					 * macro name with no definition.
					 * Define the name with no parameters
					 *  and with a value of "1".
					 */

					defmac(name, cp, -1, "1");
				} else {
					/* macro + definition */

					for (*cp++ = '\0', val = cp;
					  *cp != '\0'; ++cp)
						;
					defmac(name, name + strlen(name),
					  -1, val);
				}
				break;
			default:
				bomb("unknown switch `%c'", *cp);
			}
		} else {
			*nxtfile++ = cp;
		}
	}

	if (nxtfile == &catlist[0]) {
		*nxtfile++ = "-";
	}
	*nxtfile = (char *) 0;
	nxtfile = &catlist[0];

	*dp++ = "/usr/include";
	*dp = (char *) 0;

	/*
	 * prime the input stack and go,
	 * interpreting preprocessor directives along the way.
	 */

	pushfile(*nxtfile++, PF_NOLOOK, PF_NOHIDE);
	do {
		tok = gintok();
		if (tok == POUNDLINE) {
			tok = doctrl(curtext);
		}
		outpend();	/* even the 0 token needs to be flushed.
				 * Otherwise, incomplete comments at the end
				 * of the file would be destroyed.
				 */
	} while (tok != 0);
	writepend();		/* flush trailing output	*/

	if (curif >= &ifstk[0]) {
		warnf("missing endif");
	}

	exit(sawerror ? 1 : 0);
}

int
gintok()	/* get a token, interpreting macro's	*/
{
	int tok;		/* the current token's value	*/
	struct amacro *mac;	/* the current macro		*/
	struct amacro *defsym;	/* the macro being checked for 'defined()' */
	char *mactext;		/*
				 * the start of the invocation of a macro
				 * which has parameters.
				 */
	char *start;		/* the start of the current parameter	*/
	int nest;		/*
				 * current nesting level of parentheses.
				 * used to avoid misinterpreting commas within
				 * nested parens as parameter separators.
				 */
	char *defident;		/*
				 * The IDENT parameter for the magic macro,
				 * 'defined()' (dynamically alloc'ed).
				 * If gintok() is interpreting the magic macro,
				 * this variable is marked so that, during the
				 * parameter parsing, the first IDENT is saved
				 * here.
				 */
	int parmgripe;		/*
				 * "an error message about parameters of
				 * this macro has already been printed."
				 */
	int i;			/* an actual-parameter index	*/
	char *cp;		/* a temp pointer		*/

	/*
	 * special macro values (see scpp.h: struct amacro, field am_val):
	 *  noval == a null macro value;
	 *  oneval == a macro value of '1';
	 *  zeroval == a macro value of '0';
	 */

	static char nv[2] = {'\0', '\0'};
	static char *noval = &nv[1];
	static char ov[3] = {'\0', '1', '\0'};
	static char *oneval = &ov[2];
	static char zv[3] = {'\0', '0', '\0'};
	static char *zeroval = &zv[2];


	tok = OTHER;
	while (tok != DEFMAC && (tok = gtok()) != 0) {
		if (tok == QUOTE || tok == DQUOTE) {
			tok = gstrtok(tok);
		}
		if (tok != IDENT) {
			return(tok);
		}

		if ((mac = findmac(curtext, nxtout))->am_name == (char *) 0 ||
		    mac->am_val == (char *) 0) {
			/* there is no macro by this name currently defined */

			return(tok);
		}

		/*
		 * tally this interpretation
		 */

		++ninterp;

		if (mac->am_npar < 0) {
			/*
			 * the macro has no formal parameters.
			 * pushback the replacement text and continue.
			 */

			(void) dispose(curtext);
			(void) pushmac(mac->am_val);
			continue;
		}

		/* this is a macro with formals */

		/*
		 * save the starting-point of the macro's text.
		 * Used for later disposal.  The text is not disposed
		 * here in case the macro is a 'defined()' of some non--M'ed
		 * macro.
		 */

		mactext = curtext;

		/*
		 * collect the comma-separated actual parameters of the macro,
		 * ignoring commas within pairs of parens or within strings.
		 */

		parmgripe = FALSE;
		actp = &actual[0];
		nest = 0;
		if (mac->am_val == &magicval) {
			defident = &magicval;
		} else {
			defident = (char *) 0;
		}

		if ((tok = nonwhite(gtok)) != LP) {
			warnf("missing parenthesis in macro");
			parmgripe = TRUE;
	
			/* pushback the erroneous token	*/
			untok();
		} else {
			do {
				/* collect one parameter */

				start = nxtout;
				while ((tok = gtok())) {
					if (tok == CM && nest == 0) {
						break;
					} else if (tok == RP) {
						if (nest > 0) {
							--nest;
						} else if (nest == 0) {
							break;
						}
					} else if (tok == LP) {
						++nest;
					} else if (tok == QUOTE ||
					  tok == DQUOTE) {
						tok = gstrtok(tok);
					} else if (tok == IDENT &&
					  defident == &magicval) {
						defident =
						  savtok(curtext, nxtout);
					}
				}

				/*
				 * Warn about too many parameters, otherwise,
				 * store the parameter in the format of
				 * a macro value.
				 */

				if ((actp - &actual[0]) >= mac->am_npar) {
					if (!parmgripe) {
					  warnf("macro parameter mismatch");
					  parmgripe = TRUE;
					}
				} else {
					cp = savtok(start - 1, curtext);
					*cp = '\0';
					actp->aa_mem = cp;
					while (*++cp)
						;
					actp->aa_val = cp;
					++actp;
				}
			} while (tok == CM);
			if (tok != RP) {
				if (!parmgripe) {
				  warnf("missing parenthesis in macro");
				  parmgripe = TRUE;
				}
			}
		}

		/*
		 * If there are too few actual parameters, fill out the
		 * list with null values.
		 */

		while (actp - &actual[0] < mac->am_npar) {
			if (!parmgripe) {
				warnf("parameter mismatch");
				parmgripe = TRUE;
			}
			actp->aa_val = noval;
			actp->aa_mem = (char *) 0;
			++actp;
		}

		/*
		 * replace the macro invocation with the value of the macro,
		 *  replacing formal arguments with the corresponding actual.
		 */

		if ((cp = mac->am_val) == &magicval) {
			/*
			 * This is the magic macro, "defined(x)".
			 * Interpret only if the parameter is a -M'ed
			 *  macro and we are currently parsing a
			 *  #if expression.
			 * Lookup the parameter (if any);
			 * If the parameter is -M'ed, pushback a '1' or '0',
			 * depending on whether the macro is defined.
			 */

			defsym = findmac(defident, defident + strlen(defident));
			if (!defsym->am_name || !expparse) {
				/*
				 * Leave the invocation of defined() untouched.
				 */

				curtext = mactext;
				tok = DEFMAC;
			} else {
				(void) dispose(mactext);
				if (defsym->am_val) {
					(void) pushmac(oneval);
				} else {
					(void) pushmac(zeroval);
				}
			}
			free(defident);
		} else {
			(void) dispose(mactext);
			while (*(cp = pushmac(cp)) == ATTN) {
				i = (int) (*--cp) - 1;
				if (i < 0 || i >= mac->am_npar) {
					warnf(
"INTERNAL: parameter number %d out of bounds", i);
				} else {
					(void) pushmac(actual[i].aa_val);
				}
			}
		}

		/*
		 * free the actual parameters.
		 */

		while (--actp >= &actual[0]) {
			if (actp->aa_mem) {
				free(actp->aa_mem);
			}
		}
	}
	return(tok);
}

/*
 * gtok() - get a token without interpreting macros or preprocessor directives.
 *  This is the low-level lexical analyzer.  It exists only because Lex's
 *  analyzer chokes on long comments.
 */

int
gtok()
{
	int tok;


	curtext = nxtout;
	tok = xxlex();
	if (tok == OPENC) {
		while ((tok = xxlex()) != CLOSEC) {
			if (tok == 0) {
				warnf("unterminated comment");
				return(0);
			}
		}
		tok = COMMENT;
	}
	return(tok);
}

/*
 * gstrtok - get a string token.  Given the token which starts a string
 *  or character constant (I.E. QUOTE or DQUOTE), collect the string token
 *  as if it had been recognised by the lexical analyzer as a single token.
 */

int
gstrtok(tok)
int tok;		/* token which started the quoted string	*/
{
	int tok2;		/* the next token's value	*/
	char *qstrt;		/* start of a string in pend[]	*/

	/*
	 * collect the string without interpreting
	 * macros.  Allow \' and \" within strings.
	 * Newline or EOF terminate strings.
	 * Save and restore curtext so that on returning,
	 * curtext points to the beginning of the token.
	 */

	qstrt = curtext;
	while ((tok2 = gtok()) != tok) {
		if (tok2 == 0) {
			/* unterminated quote	*/
			curtext = qstrt;
			return(0);
		}
		if (tok2 == NL) {
			/* unterminated quote. pushback the newline	*/

			untok();
			break;
		}
		if (tok2 == BACKS) {
			if (gtok() == 0) {
				/* unterminated quote */
				curtext = qstrt;
				return(0);
			}
		}
	}
	curtext = qstrt;
	return(tok == DQUOTE ? STRING : CHARS);
}

/*
 * findmac - find a macro
 *  given the bounds of what might be a macro name (possibly containing ATTN
 *   bytes), return a pointer to the symbol table slot
 *  corresponding to that name.
 */

struct amacro *
findmac(name, last)
char *name;	/* points to the beginning of the name.			*/
char *last;	/* points to the char beyond the end of the name	*/
{
	/*
	 * hash the first 8 chars of the name (less ATTN bytes) into an index;
	 * Use that index as a starting point for a linear search
	 *  for either the matching slot or an empty slot.
	 */

	int idx;
	char *cp;
	char *tp;
	int cnt;
	struct amacro *np, *start;


	for (idx = 0, cp = name, cnt = 0; cp < last && cnt < 8; ++cp) {
		if (*cp == ATTN) {
			++cp;
		} else {
			idx += (int) *cp++ & 0xff;
			++cnt;
		}
	}
	start = np = &sym[idx % SYMSIZ];

	while (np->am_name) {
		/*
		 * compare the token at 'name' with the macro's name,
		 * skipping ATTN bytes and their associated codes.
		 */

		for (tp = name, cp = np->am_name; tp < last; ++tp) {
			if (*tp == ATTN) {
				++tp;
				continue;
			}
			if (*tp != *cp++) {
				break;
			}
		}
		if (tp == last) {
			/* the names match */
			break;
		}

		if (++np >= &sym[SYMSIZ]) {
			np = &sym[0];
		}
		if (np == start) {
			bombf("symbol table overflow");
		}
	}
	return(np);
}

/*
 * defmac - define a macro
 */

defmac(name, end, npar, val)
char *name;		/* the start of the macro's name		*/
char *end;		/* points to one char beyond the end of the name */
int npar;		/* # of parameters (-1 == none)			*/
char *val;		/* the beginning of the value string		*/
{
	char *cp;
	struct amacro *np;
	struct akeyword *kp;
	char *malloc();


	/*
	 * find the slot for the macro and give it a name if this is the
	 * first occurrence of this name.
	 */

	np = findmac(name, end);
	if (!np->am_name) {
		np->am_name = savtok(name, end);
	} else {
		/*
		 * Don't allow preprocessor keywords to be defined.
		 */

		if ((kp = findkey(np)) != (struct akeyword *) 0) {
			warnf("redeclaration of keyword \"%s\"", kp->ak_name);
			return;
		}

		/*
		 * if the macro is currently defined (I.E. has a value),
		 *  reject redefinitions of magic macros.
		 * compare the new and old values.
		 * If the value or number of parameters differs,
		 *  print a warning and destroy the old value.
		 * If they are the same, do nothing (return).
		 */

		if (np->am_val) {
			if (np->am_val == &magicval) {
				warnf("cannot redefine implicit macro");
				return;
			}
			cp = np->am_val;
			while (*--cp)
				;
			if (np->am_npar == npar && strcmp(cp + 1, val) == 0) {
				return;
			}

			warnf("redeclaration of \"%s\"", np->am_name);
			free(cp);
		}
	}

	/*
	 * Set the new value and number of parameters.
	 * Put a null introduction on the value;
	 * Remember that am_val points to the *end* of the value.
	 */

	np->am_npar = npar;

	if (!(cp = malloc((unsigned) strlen(val) + 2))) {
		bombf("out of memory");
	}
	*cp++ = '\0';
	strcpy(cp, val);
	np->am_val = cp + strlen(cp);
}

/*
 * savtok - given the limits of a token string,
 *  copy that string (less ATTN bytes) into a dynamically allocated buffer
 *  then return the buffer.
 */

char *
savtok(s, e)
char *s;	/* first char of token			*/
char *e;	/* points beyond the last char of token	*/
{
	char *name;	/* the text of the token -- the value to return	*/
	char *cp;
	char *malloc();

	if (!(name = malloc(e - s + 1))) {
		bombf("out of memory");
	}

	for (cp = name; s < e; ++s) {
		if (*s == ATTN) {
			++s;
		} else {
			*cp++ = *s;
		}
	}
	*cp = '\0';

	return(name);
}
