/*
 * Lexical Analysis Functions Takes input stream and returns "tokens".
 * Implements pre-processor, allowing #include's and #define's tokens are
 * character strings.
 */
#include <stdio.h>
#include <ctype.h>
#include <functions.h>
#include <exec/types.h>
#include "listmac.h"
#include "std.h"
#include "lex.h"


struct StringElt {
	struct Nod      node;
	char           *buf;
};

struct Token {
	struct Nod      node;
	char           *buf;
	short           type, can_be_macro;
	char            chr;
};

struct Macro {
	struct Nod      node;
	struct Lst      arguments;
	struct Lst      tokens;
	char           *name;
	short           active, nargs;
};

struct ActiveMacro {
	struct Nod      node;
	struct Macro   *mac;
	struct Token   *curtok;
};

struct LexFile {
	struct Nod      node;
	FILE           *fp;
	short           newln;
	short           nextchar;
};

struct IfBlock {
	struct Nod     *node;
	short           skip_else;
};


struct Token   *CopyToken ();
struct Macro   *FindMacro ();
char           *FindString ();


#define BUFLEN 100
static char     buf[BUFLEN];
static short    bufpos;
static struct Token
		rawtok, *lasttok, *retok;

static struct LexFile *toplf;
static struct Lst macros, activemacs, files, ifblocks;
static short    files_open = 0, init_done = 0;
static char	*includestr, *definestr, *ifdefstr,
		*ifndefstr, *elsestr, *endifstr;

static char    *libbase = "hd:include/", libnam[80], basnam[80];

#define NHASH 53
struct Lst      hashTab[NHASH];


/*
 * Open a file for lexing - could be an include.
 * Set newline flag to true.
 */
short OpenLexFile (name)
	char           *name;
{
	struct LexFile *lf;
	struct Macro   *mac;

	/*
	 * on first call, init environment vars 
	 */
	if (!init_done) {
		NewList (&files);
		NewList (&macros);
		NewList (&ifblocks);
		NewList (&activemacs);
		InitHash ();
		includestr = FindString ("include");
		definestr = FindString ("define");
		ifdefstr = FindString ("ifdef");
		ifndefstr = FindString ("ifndef");
		elsestr = FindString ("else");
		endifstr = FindString ("endif");
		init_done = 1;
	}

	/*
	 * if there are no files currently open, prepare for a new one 
	 */
	if (!files_open) {
		while (mac = (struct Macro *) RemHead (&macros))
			FreeMacro (mac);
		ASSERT (!TEST (HEAD (&ifblocks)));
		ASSERT (!TEST (HEAD (&activemacs)));
		retok = NULL;
	}
	lf = NEW (struct LexFile);
	if (!lf) return 0;

	/*
	 * printf ("opening \"%s\"\n", name); 
	 */
	lf->fp = fopen (name, "r");
	if (!lf->fp) {
		FREI (lf);
		return 0;
	}
	lf->newln = 1;
	lf->nextchar = getc (lf->fp);
	AddHead (&files, lf);
	toplf = lf;

	if (feof (lf->fp)) {
		CloseLexFile ();
		return 0;
	}
	files_open = 1;
	return 1;
}


/*
 * Close top file returning 1 for the last file now closed.
 */
short CloseLexFile ()
{
	if (!files_open) return 1;

	fclose (toplf->fp);
	Remove (toplf);
	FREI (toplf);
	toplf = HEAD (&files);
	if (TEST (toplf)) return 0;
	files_open = 0;
	toplf = 0;
	return 1;
}


/*
 * free stuff up 
 */
LexCleanup ()
{
	struct Macro   *mac, *nmac;

	while (files_open) CloseLexFile ();

	FreeHash ();
	for (mac = HEAD (&macros); nmac = NEXT (mac); mac = nmac)
		FreeMacro (mac);
	ASSERT (!TEST (HEAD (&activemacs)));

	/*
	 * initialization can be done again 
	 */
	init_done = 0;
}


FreeMacro (mac)
	struct Macro   *mac;
{
	struct Token   *tok;
	struct StringElt *se;

	while (tok = (struct Token *) RemHead (&mac->tokens))
		FREI (tok);
	while (se = (struct StringElt *) RemHead (&mac->arguments))
		FREI (se);
	FREI (mac);
}


/*
 * Get next raw character from file dealing with backslash escape chars.
 * This is kind of wrong since these really ought to be recognized only
 * inside strings.  Doing it this way deals with backslashes before
 * newlines.
 */
short NextC1 (fp)
	FILE           *fp;
{
	short           c, k, radix;

	c = getc (fp);
	if (c == '\\') {
		c = getc (fp);
		if (isdigit (c)) {
			radix = 10;
			if (c == '0') radix = 8;
			k = (c - '0');
			while (isdigit (c = getc (fp)))
				k = k * radix + (c - '0');
			ungetc (c, fp);
			c = k;
		} else {
			switch (c) {
			    case '\\':
				break;
			    case '"':
				c = '.';
				break;
			    case 'n':
				c = '\n';
				break;
			    case 'b':
				c = '\b';
				break;
			    case 't':
				c = '\t';
				break;
			    case '\n':
				c = getc (fp);
			}
		}
	}
	return c;
}


/*
 * Get next character from Top Lex File, updating nextchar.
 */
short NextC ()
{
	short           c;

	if (!toplf) return EOF;
	c = toplf->nextchar;
	toplf->nextchar = NextC1 (toplf->fp);
	return c;
}


/*
 * Move the top lex file to the next newline.
 */
AdvanceEOL ()
{
	if (!toplf) return;
	while (toplf->nextchar != '\n' && !feof (toplf->fp))
		NextC ();
}


/*
 * Read a raw set of characters updating newline state.  Sets the
 * fields in rawtok to the type and string pointer, if any. 
 */
RawToken ()
{
	short           c;
	short           nochar = 1, comment, nl;

	bufpos = 0;
	rawtok.can_be_macro = 1;

	/*
	 * get a non-blank char 
	 */
	while (nochar) {

		/*
		 * get a character testing for end-of-file if EOF, just
		 * return that fact right away 
		 */
		c = NextC ();
		if (c == EOF) {
			rawtok.type = RT_EOF;
			return;
		}

		/*
		 * test next char: if it's white space other than newline,
		 * just clear the newline flag and continue looking. 
		 */
		if (isspace (c) && c != '\n')
			toplf->newln = 0;
		else {

			/*
			 * otherwise, this is potentially interesting, but it
			 * might be the start of a comment - if so, scan til
			 * the end of the comment (no nested comments ala
			 * "C").  If not, exit the loop. 
			 */
			if (c == '/' && toplf->nextchar == '*') {
				comment = 1;
				while (comment) {
					c = NextC ();
					if (c == '*'
					  && toplf->nextchar == '/') {
						c = NextC ();
						comment = 0;
					}
				}

			} else nochar = 0;
		}
	}

	nl = toplf->newln;
	toplf->newln = 0;
	bufpos = 0;

	/*
	 * otherwise, read out the cases id symbol starting with char 
	 */
	if (nl && c == '#')
		rawtok.type = RT_ESC;

	else if (c == '\'') {
		rawtok.chr = NextC ();
		c = NextC ();

		/*
		 * Skip past weird character constants, like 'FOOL',
		 * making no attempt to deal with them correctly.
		 */
		comment = 0;
		while (c != '\'' && comment < 6) {
			c = NextC ();
			comment++;
		}
		if (comment >= 6) printf ("error in character constant\n");
		rawtok.type = RT_CHRC;

	} else if (isalpha (c) || c == '_') {
		buf[bufpos++] = c;
		while (isalnum (toplf->nextchar) || toplf->nextchar == '_')
			buf[bufpos++] = NextC ();
		buf[bufpos] = 0;
		rawtok.type = RT_ID;
		rawtok.buf = FindString (buf);

	} else if (c == '\n') {
		toplf->newln = 1;
		rawtok.type = RT_NEWLN;

	} else if (isdigit (c)) {
		buf[bufpos++] = c;
		while (isdigit (toplf->nextchar)) buf[bufpos++] = NextC ();
		buf[bufpos] = 0;
		rawtok.buf = FindString (buf);
		rawtok.type = RT_NUM;

	} else if (c == '"') {
		while ((c = NextC ()) != '"')
			if (bufpos < BUFLEN)
				buf[bufpos++] = c;

		if (bufpos + 1 == BUFLEN) printf ("string overflow\n");
		buf[bufpos] = 0;
		rawtok.buf = FindString (buf);
		rawtok.type = RT_STR;

	} else {
		rawtok.chr = c;
		rawtok.type = RT_CHR;
	}
}


/*
 * Main function -- returns string pointer for next token makes include files
 * and macros transparent reads from top open file and returns pointer to
 * string of chars could be a symbol, could be a number, could be a character 
 */
short NextToken (tbuf)
	char          **tbuf;
{
	struct Token   *tok;
	struct Macro   *mac;
	struct ActiveMacro *amac;
	short           i;
	char            c;

	while () {

		/*
		 * get raw token from file or active macro 
		 */
		amac = HEAD (&activemacs);
		if (retok) {
			tok = retok;
			retok = NULL;
		} else if (TEST (amac)) {
			tok = amac->curtok;
			if (!TEST (tok)) {
				amac->mac->active = 0;
				for (i = 0; i < amac->mac->nargs; i++)
					FreeMacro (RemHead (&macros));
				Remove (amac);
				FREI (amac);
				continue;
			}
			amac->curtok = NEXT (amac->curtok);
		} else {
			RawToken ();
			tok = &rawtok;
		}
		switch (tok->type) {
		    case RT_EOF:
			if (CloseLexFile ()) {
				lasttok = tok;
				return RT_EOF;
			}
			break;
		    case RT_ESC:
			RawToken ();
			if (rawtok.type != RT_ID) {
				printf ("real problem with lexer directive\n");
				AdvanceEOL ();
				break;
			}
			if (rawtok.buf == includestr)		Include ();
			else if (rawtok.buf == definestr)	Define ();
			else if (rawtok.buf == ifdefstr)	IfDef (1);
			else if (rawtok.buf == ifndefstr)	IfDef (0);
			else if (rawtok.buf == elsestr)		Else ();
			else if (rawtok.buf == endifstr)	EndIf ();
			else {
				printf ("unknown directive \"%s\"\n", rawtok.buf);
				AdvanceEOL ();
			}
			break;

		    case RT_ID:

			/*
			 * test for known macros (if this token can be one) 
			 */
			if (tok->can_be_macro) {
				mac = FindMacro (tok->buf);
				if (mac) {
					InstantiateMacro (mac);
					break;
				}
			}
		    case RT_STR:
		    case RT_NUM:
			*tbuf = tok->buf;
			lasttok = tok;
			return tok->type;
		    case RT_CHR:
		    case RT_CHRC:
			*tbuf = &tok->chr;
			lasttok = tok;
			return tok->type;
		}
	}
}


/*
 * The given macro has occured in the text, cause it to come into existence
 * with argument substitution. 
 */
InstantiateMacro (mac)
	struct Macro   *mac;
{
	struct ActiveMacro *amac;
	struct StringElt *arg;
	struct Macro   *smac;
	struct Token   *tok;
	char           *buf;
	short           vtok, level, endarglist;

	if (mac->active) {
		printf ("recursive macro ignored\n");
		return;
	}

	/*
	 * read arguments, if any, and make them macros in their own right
	 * arguments are read recursively using NextToken (right? I'm not
	 * sure...) 
	 */
	arg = HEAD (&mac->arguments);
	if (TEST (arg)) {

		/*
		 * get first open paren 
		 */
		vtok = NextToken (&buf);
		if (vtok != RT_CHR || *buf != '(')
			goto argfail;

		/*
		 * loop through args separated with commas 
		 */
		endarglist = 0;
		while (TEST (arg)) {
			smac = NEW (struct Macro);
			if (!smac) goto argfail;
			smac->name = arg->buf;
			smac->active = 0;
			smac->nargs = 0;
			NewList (&smac->arguments);
			NewList (&smac->tokens);

			/*
			 * scan out tokens allowing for nested commas 
			 */
			level = 0;
			while () {
				vtok = NextToken (&buf);
				if (vtok == RT_CHR) {
					if (*buf == '(' || *buf == '{')
						level++;
					if (*buf == '}')
						level--;
					if (*buf == ')') {
						if (level == 0) {
							endarglist = 1;
							break;
						}
						level--;
					}
					if (*buf == ',' && level == 0)
						break;
				}
				tok = CopyToken (lasttok);
				if (!tok) goto argfail;
				tok->can_be_macro = 0;
				AddTail (&smac->tokens, tok);
			}
			AddHead (&macros, smac);
			if (endarglist) break;
			arg = NEXT (arg);
		}
		if (vtok != RT_CHR || *buf != ')') goto argfail;
	}

	/*
	 * Macro does not become active in the global list until all
	 * arguments have been parsed and interpreted. 
	 */
	amac = NEW (struct ActiveMacro);
	if (!amac) return;		/* what the fuck do I do here? */
	amac->mac = mac;
	amac->curtok = HEAD (&mac->tokens);
	mac->active = 1;
	AddHead (&activemacs, amac);
	return;

argfail:
	printf ("bad problem with arguments\n");
}


/*
 * locate a specified macro in the available macros list 
 */
struct Macro * FindMacro (buf)
	char           *buf;
{
	struct Macro   *mac;

	for (mac = HEAD (&macros); TEST (mac); mac = NEXT (mac))
		if (mac->name == buf)
			return mac;

	return NULL;
}


/*
 * Do an "include" directive 
 */
Include ()
{
	short           pos, ok, global;
	char            c;

	/*
	 * get filename 
	 */
	RawToken ();
	ok = 0;
	global = 0;

	if (rawtok.type == RT_STR) {
		ok = 1;
		strcpy (basnam, rawtok.buf);
	} else if (rawtok.type == RT_CHR && rawtok.chr == '<') {
		ok = 1;
		pos = 0;
		while () {
			c = NextC ();
			if (c == '>') break;
			basnam[pos++] = c;
		}
		basnam[pos] = 0;
		global = 1;
	}
	AdvanceEOL ();
	if (!ok) {
		printf ("no file to include\n");
		return;
	}
	if (!global) if (OpenLexFile (basnam)) return;
	strcpy (libnam, libbase);
	strcat (libnam, basnam);
	if (OpenLexFile (libnam)) return;
	printf ("error open include file <%s>\n", libnam);
}


/*
 * Do the "define" directive.
 */
Define ()
{
	struct Macro   *mac = NULL;
	struct Token   *tok;
	struct StringElt *se;

	/*
	 * get identifier to define 
	 */
	RawToken ();
	if (rawtok.type != RT_ID) {
		printf ("error in define - no identifier\n");
		goto escape;
	}
	mac = NEW (struct Macro);
	if (!mac) goto escape;
	mac->name = rawtok.buf;
	mac->active = 0;
	mac->nargs = 0;
	NewList (&mac->arguments);
	NewList (&mac->tokens);

	/*
	 * Look for parenthized argument list.
	 */
	if (toplf->nextchar == '(') {
		RawToken ();
		while () {
			RawToken ();

			/*
			 * deal with special case of "#define MACRO()" 
			 */
			if (!mac->nargs && rawtok.type == RT_CHR
			  && rawtok.chr == ')')
				break;
			if (rawtok.type != RT_ID) {
				printf ("macro argument not an identifier\n");
				goto escape;
			}
			se = NEW (struct StringElt);
			if (!se) goto escape;
			se->buf = rawtok.buf;
			AddTail (&mac->arguments, se);
			mac->nargs++;
			RawToken ();
			if (rawtok.type != RT_CHR) {
				printf ("macro arg list delimiter not a character\n");
				goto escape;
			}
			if (rawtok.chr == ')') break;
			if (rawtok.chr != ',') {
				printf ("macro arg list delimiter not ',' or ')'\n");
				goto escape;
			}
		}
	}

	/*
	 * get the sequence of tokens which make up this definition 
	 */
	while () {
		RawToken ();
		if (rawtok.type == RT_NEWLN || rawtok.type == RT_EOF) break;
		tok = CopyToken (&rawtok);
		if (!tok) goto escape;
		tok->can_be_macro = 1;
		AddTail (&mac->tokens, tok);
	}

	AddHead (&macros, mac);
	return;

escape:
	if (mac) FreeMacro (mac);
	AdvanceEOL ();
}


/*
 * Skip over a defined-out section.  Returns 1 if it ends with #else, 0
 * if it ends with #endif, -1 for EOF.  Keeps track of nested if's
 * being skipped.
 */
short SkipRemovedSec ()
{
	short           level = 0;

	while () {
		RawToken ();
		if (rawtok.type == RT_EOF) return -1;
		if (rawtok.type == RT_ESC) {
			RawToken ();
			if (rawtok.type == RT_ID) {
				if (rawtok.buf == ifdefstr
				  || rawtok.buf == ifndefstr) level++;
				if (rawtok.buf == elsestr)
					if (!level) return 1;
				if (rawtok.buf == endifstr)
					if (!level--) return 0;
			}
		}
	}
}


/*
 * Does the "ifdef"/"ifndef" - "else" - "endif" directive.  Type is 1
 * for ifdef and 0 for ifndef.
 */
IfDef (type)
	short           type;
{
	struct Macro   *mac;
	struct IfBlock *ib;
	short           els;

	RawToken ();
	mac = FindMacro (rawtok.buf);

	/*
	 * If condition is true, add a node to say "skip the next #else
	 * section" if false, skip this section.  If section ends with an
	 * else, add a junk node to pop off for consistency at next #endif.
	 */
	if ((mac && type) || (!mac && !type)) {
		ib = NEW (struct IfBlock);
		if (!ib) return;
		ib->skip_else = 1;
		AddHead (&ifblocks, ib);
	} else {
		els = SkipRemovedSec ();
		if (els == 1) {
			ib = NEW (struct IfBlock);
			if (!ib) return;
			ib->skip_else = 0;
			AddHead (&ifblocks, ib);
		}
	}
}


Else ()
{
	struct IfBlock *ib;
	short           els;

	ib = HEAD (&ifblocks);
	ASSERT (TEST (ib));
	Remove (ib);
	if (!ib->skip_else) {
		printf ("strange \"else\" block found\n");
	} else {
		els = SkipRemovedSec ();
		if (els == 1 && ib->skip_else) {
			printf ("strange \"else\" block found\n");
		}
	}
	FREI (ib);
}


EndIf ()
{
	struct IfBlock *ib;

	ib = HEAD (&ifblocks);
	ASSERT (TEST (ib));
	Remove (ib);
	FREI (ib);
}


/*
 * Backs up one token 
 */
Backspace ()
{
	retok = lasttok;
}


struct Token * CopyToken (tok)
	struct Token   *tok;
{
	struct Token   *ntok;

	ntok = NEW (struct Token);
	if (ntok) *ntok = *tok;
	return ntok;
}



/* HASH TABLE STUFF */


InitHash ()
{
	short           i;

	for (i = 0; i < NHASH; i++)
		NewList (&hashTab[i]);
}


/*
 * Simple but (I hope) effective hash function.
 */
short HashVal (buf)
	char           *buf;
{
	unsigned long   hash;

	hash = 0;
	while (*buf) hash = hash * 3 + *buf++;
	return hash % NHASH;
}


/*
 * Finds (or creates) a stored string matching the given one return pointer
 * which acts as unique ident for this string.
 */
char * FindString (buf)
	char           *buf;
{
	short           hash;
	struct Lst     *hlist;
	struct StringElt *se;
	char           *sto;

	hash = HashVal (buf);
	hlist = &hashTab[hash];

	for (se = HEAD (hlist); TEST (se); se = NEXT (se)) {
		if (!strcmp (se->buf, buf))
			return se->buf;
	}
	se = NEW (struct StringElt);
	if (!se) return NULL;
	sto = NEW_N (char, strlen (buf) + 1);
	if (!sto) {
		FREI (se);
		return NULL;
	}
	AddTail (hlist, se);
	strcpy (sto, buf);
	se->buf = sto;
	return sto;
}


FreeHash ()
{
	short           i;
	struct StringElt *se, *nse;

	for (i = 0; i < NHASH; i++) {
		for (se = HEAD (&hashTab[i]); nse = NEXT (se); se = nse) {
			FREE_N (se->buf, char, strlen (se->buf) + 1);
			FREI (se);
		}
	}
}
