/* W A R T
 *
 * pre-process a lex-like file into a C program.
 *
 * Jeff Damens, Columbia University Center for Computing Activites, 11/84.
 * (Reorganized by Frank da Cruz into a single source module for ease
 * of distribution).
 * Copyright (C) 1984, Trustees of Columbia University.
 * May be copied and used except for explicitly commercial purposes.
 *
 * input format is:
 *  lines to be copied | %state <state names...>
 *  %%
 * <state> | <state,state,...> CHAR  { actions }
 * ...
 *  %%
 */


/* modified for CTOS C2.0 by Joel Dunn, UNC-CH, October 1986 */

#include <stdio.h>
#include <ctype.h>

/* token types */

#define SEP 1
#define LBRACK 2
#define RBRACK 3
#define WORD 4
#define COMMA 5

/* storage sizes */

#define MAXSTATES 50			/* max number of states */
#define MAXWORD 50			/* max # of chars/word */
#define SBYTES ((MAXSTATES+7)/8)	/* # of bytes for state bitmask */

/* name of wart function in generated program */

#ifndef FNAME
#define FNAME "wart"
#endif

/* data structure for state information */

#ifdef PROVX1
typedef unsigned short CHAR;
#else
typedef unsigned char CHAR;
#endif

struct trans { CHAR states[SBYTES];	/* included states */
    	       int anyst;		/* true if this good from any state */
    	       CHAR inchr;		/* input character */
	       int actno;		/* associated action */
	       struct trans *nxt; };	/* next transition */

typedef struct trans *Trans;

/* Variables and tables */

int lines,nstates,nacts;

char tokval[MAXWORD];

int tbl[MAXSTATES*128];



char *txt1 = "\n\
#define BEGIN state =\n\
\n\
int state = 0;\n\
\n";

char *fname = FNAME;		/* function name goes here */

/* rest of program... */

char *txt2 = "()\n\
{\n\
  int c,actno;\n\
  extern int tbl[];\n\
  while (1) {\n\
	c = input();\n\
	if ((actno = tbl[c + state*128]) != -1)\n\
	  switch(actno) {\n";

/* this program's output goes here, followed by final text... */

char *txt3 = "\n    }\n  }\n\}\n\n";

/*
 * turn on the bit associated with the given state
 *
 */
setstate(state,t)
int state;
Trans t;
{
  int idx,msk;
  idx = state/8;			/* byte associated with state */
  msk = 0x80 >> (state % 8);		/* bit mask for state */
  t->states[idx] |= msk;
}

/*
 * see if the state is involved in the transition
 *
 */

teststate(state,t)
int state;
Trans t;
{
  int idx,msk;
  idx = state/8;
  msk = 0x80 >> (state % 8);
  return(t->states[idx] & msk);
}


/*
 * read input from here...
 *
 */

Trans
rdinput(infp,outfp)
FILE *infp,*outfp;
{
  Trans x,rdrules();
  lines = 1;				/* line counter */
  nstates = 0;				/* no states */
  nacts = 0;				/* no actions yet */
  fprintf(outfp,"\n\
/* WARNING -- This C source program generated by Wart preprocessor. */\n");
  fprintf(outfp,"\
/* Do not edit this file; edit the Wart-format source file instead, */\n");
  fprintf(outfp,"\
/* and then run it through Wart to produce a new C source file.     */\n\n");
  initial(infp,outfp);			/* read state names, initial defs */
  prolog(outfp);			/* write out our initial code */
  x = rdrules(infp,outfp);		/* read rules */
  epilogue(outfp);			/* write out epilogue code */
  return(x);
}

/*
 * initial - read initial definitions and state names.  Returns
 * on EOF or %%.
 *
 */

initial(infp,outfp)
FILE *infp,*outfp;
{
  int c;
  char wordbuf[MAXWORD];
  while ((c = getc(infp)) != EOF) {
	if (c == '%') {
			rdword(infp,wordbuf);
			if (strcmp(wordbuf,"states") == 0)
			    rdstates(infp,outfp);
			else if (strcmp(wordbuf,"%") == 0) return;
			else fprintf(outfp,"%%%s",wordbuf);
		      }
	else putc(c,outfp);
	if (c == '\n') lines++;
     }
}

/*
 * boolean function to tell if the given character can be part of
 * a word.
 *
 */
isin(s,c) char *s; int c; {
   for (; *s != '\0'; s++)
      if (*s == c) return(1);
   return(0);
}
isword(c)
int c;
{
  static char special[] = ".%_-$@";	/* these are allowable */
  return(isalnum(c) || isin(special,c));
}

/*
 * read the next word into the given buffer.
 *
 */
rdword(fp,buf)
FILE *fp;
char *buf;
{
  int len = 0,c;
  while (isword(c = getc(fp)) && ++len < MAXWORD) *buf++ = c;
  *buf++ = '\0';			/* tie off word */
  ungetc(c,fp);				/* put break char back */
}

/*
 * read state names, up to a newline.
 *
 */

rdstates(fp,ofp)
FILE *fp,*ofp;
{
  int c;
  char wordbuf[MAXWORD];
  while ((c = getc(fp)) != EOF && c != '\n')
  {
	if (isspace(c)) continue;	/* skip whitespace */
	ungetc(c,fp);			/* put char back */
	rdword(fp,wordbuf);		/* read the whole word */
	enter(wordbuf,++nstates);	/* put into symbol tbl */
	fprintf(ofp,"#define %s %d\n",wordbuf,nstates);
  }
  lines++;
}
		
/*
 * allocate a new, empty transition node
 *
 */

Trans
newtrans()
{
  Trans new;
  int i;
  new = (Trans) malloc(sizeof (struct trans));
  for (i=0; i<SBYTES; i++) new->states[i] = 0;
  new->anyst = 0;
  new->nxt = NULL;
  return(new);
}

/*
 * read all the rules.
 *
 */

Trans
rdrules(fp,out)
FILE *fp,*out;
{
  Trans head,cur,prev;
  int curtok,i;
  head = cur = NULL;
  while ((curtok = gettoken(fp)) != SEP) 

	switch(curtok) {
		case LBRACK: if (cur == NULL) cur = newtrans();
		    	     else fatal("duplicate state list");
			     statelist(fp,cur);/* set states */
			     continue;	/* prepare to read char */

		case WORD:   if (strlen(tokval) != 1)
					fatal("multiple chars in state");
			     if (cur == NULL) {
				cur = newtrans();
				cur->anyst = 1;
				}
			     cur->actno = ++nacts;
			     cur->inchr = tokval[0];
			     if (head == NULL) head = cur;
			     else prev->nxt = cur;
			     prev = cur;
			     cur = NULL;
			     copyact(fp,out,nacts);
			     break; 
		 default: fatal("bad input format");
	     }
	
   return(head);
}

/*
 * read a list of (comma-separated) states, set them in the
 * given transition.
 *
 */
statelist(fp,t)
FILE *fp;
Trans t;
{
  int curtok,sval;
  curtok = COMMA;
  while (curtok != RBRACK) {
	if (curtok != COMMA) fatal("missing comma");
	if ((curtok = gettoken(fp)) != WORD) fatal("missing state name");
        if ((sval = lkup(tokval)) == -1) {
		fprintf(stderr,"state %s undefined\n",tokval);
		fatal("undefined state");
	   }
        setstate(sval,t);
	curtok = gettoken(fp);
   }
}

/*
 * copy an action from the input to the output file
 *
 */
copyact(inp,outp,actno)
FILE *inp,*outp;
int actno;
{
  int c,bcnt;
  fprintf(outp,"case %d:\n",actno);
  while ((c = getc(inp)) != '\n' && isspace(c));	/* skip whitespace */
  if (c == '{') {
     bcnt = 1;
     putc(c,outp);
     while (bcnt > 0 && (c = getc(inp)) != EOF) {
	if (c == '{') bcnt++;
	else if (c == '}') bcnt--;
	else if (c == '\n') lines++;
	putc(c,outp);
      }
     if (bcnt > 0) fatal("action doesn't end");
    }
   else {
	  while (c != '\n' && c != EOF) {
		putc(c,outp);
		c = getc(inp);
	    }
	  lines++;
	}
   fprintf(outp,"\nbreak;\n");
}

/*
 * find the action associated with a given character and state.
 * returns -1 if one can't be found.
 *
 */
faction(hd,state,chr)
Trans hd;
int state,chr;
{
  while (hd != NULL) {
    if (hd->anyst || teststate(state,hd))
      if (hd->inchr == '.' || hd->inchr == chr) return(hd->actno);
    hd = hd->nxt;
    }
  return(-1);
}


/*
 * empty the table...
 *
 */
emptytbl()
{
  int i;
  for (i=0; i<nstates*128; i++) tbl[i] = -1;
}

/*
 * add the specified action to the output for the given state and chr.
 *
 */

addaction(act,state,chr)
int act,state,chr;
{
 tbl[state*128 + chr] = act;
}

writetbl(fp)
FILE *fp;
{
  warray(fp,"tbl",tbl,128*(nstates+1));
}

/*
 * write an array to the output file, given its name and size.
 *
 */
warray(fp,nam,cont,siz)
FILE *fp;
char *nam;
int cont[],siz;
{
  int i;
  fprintf(fp,"int %s[] = {\n",nam);
  for (i = 0; i < siz; i++) {
	fprintf(fp,"%d, ",cont[i]);
	if ((i % 20) == 0) putc('\n',fp);
	}
  fprintf(fp,"};\n");
}

main(argc, argv)
  int argc;
  char **argv;
{
  Trans head;
  int state,c;
  FILE *infile,*outfile;

  if (argc > 1) {
  argv++;
    if ((infile = fopen(*argv,"r")) == NULL) {
    	fprintf(stderr,"Can't open %s\n",*argv);
	fatal("unreadable input file"); } }
  else infile = stdin;

  if (argc > 2) {
  argv++;
    if ((outfile = fopen(*argv,"w")) == NULL) {
    	fprintf(stderr,"Can't write to %s\n",*argv);
	fatal("bad output file"); } }
  else outfile = stdout;

  clrhash();				/* empty hash table */
  head = rdinput(infile,outfile);	/* read input file */
  emptytbl();				/* empty our tables */
  for (state = 0; state <= nstates; state++)
    for (c = 1; c < 128; c++)
     addaction(faction(head,state,c),state,c);	/* find actions, add to tbl */
  writetbl(outfile);
  copyrest(infile,outfile);
  printf("%d states, %d actions\n",nstates,nacts);
#ifdef undef
  for (state = 1; state <= nstates; state ++)
    for (c = 1; c < 128; c++)
       if (tbl[state*128 + c] != -1) printf("state %d, chr %d, act %d\n",
       	state,c,tbl[state*128 + c]);
#endif                 
  fclose(infile);
  fclose(outfile);
  exit(0);
}

/*
 * fatal error handler
 *
 */

fatal(msg)
char *msg;
{
  fprintf(stderr,"error in line %d: %s\n",lines,msg);
  exit(1);
}

prolog(outfp)
FILE *outfp;
{
  int c;
  while ((c = *txt1++) != '\0')  putc(c,outfp);
  while ((c = *fname++) != '\0') putc(c,outfp);
  while ((c = *txt2++) != '\0')  putc(c,outfp);
}

epilogue(outfp)
FILE *outfp;
{
  int c;
  while ((c = *txt3++) != '\0') putc(c,outfp);
}

copyrest(in,out)
FILE *in,*out;
{
  int c;
  while ((c = getc(in)) != EOF) putc(c,out);
}

/*
 * gettoken - returns token type of next token, sets tokval
 * to the string value of the token if appropriate.
 *
 */

gettoken(fp)
FILE *fp;
{
  int c;
  while (1) {				/* loop if reading comments... */
    do {
	  c = getc(fp);
	  if (c == '\n') lines++;
       } while (isspace(c));		/* skip whitespace */
    switch(c) {
	  case EOF: return(SEP);
	  case '%': if ((c = getc(fp)) == '%') return(SEP);
		    tokval[0] = '%';
		    tokval[1] = c;
		    rdword(fp,tokval+2);
		    return(WORD);
	  case '<': return(LBRACK);
	  case '>': return(RBRACK);
	  case ',': return(COMMA);
	  case '/': if ((c = getc(fp)) == '*') {
	    	      rdcmnt(fp);	/* skip over the comment */
		      continue; }	/* and keep looping */
		    else {
			ungetc(c);	/* put this back into input */
			c = '/'; }	/* put character back, fall thru */

	  default: if (isword(c)) {
			  ungetc(c,fp);
			  rdword(fp,tokval);
			  return(WORD);
		      	}
		   else fatal("Invalid character in input");
	     }
  }
}

/*
 * skip over a comment
 *
 */

rdcmnt(fp)
FILE *fp;
{
  int c,star,prcnt;
  prcnt = star = 0;			/* no star seen yet */
  while (!((c = getc(fp)) == '/' && star)) {
    if (c == EOF || (prcnt && c == '%')) fatal("Unterminated comment");
    prcnt = (c == '%');
    star = (c == '*');
    if (c == '\n') lines++; }
}


/*
 * symbol table management for wart
 *
 * entry points:
 *   clrhash - empty hash table.
 *   enter - enter a name into the symbol table
 *   lkup - find a name's value in the symbol table.
 *
 */

#define HASHSIZE 101			/* # of entries in hash table */

struct sym { char *name;		/* symbol name */
	     int val;			/* value */
	     struct sym *hnxt; }	/* next on collision chain */
    *htab[HASHSIZE];			/* the hash table */


/*
 * empty the hash table before using it...
 *
 */
clrhash()
{
  int i;
  for (i=0; i<HASHSIZE; i++) htab[i] = NULL;
}

/*
 * compute the value of the hash for a symbol
 *
 */
hash(name)
char *name;
{
  int sum;
  for (sum = 0; *name != '\0'; name++) sum += (sum + *name);
  sum %= HASHSIZE;			/* take sum mod hashsize */
  if (sum < 0) sum += HASHSIZE;		/* disallow negative hash value */
  return(sum);
}

/*
 * make a private copy of a string...
 *
 */
char *
copy(s)
char *s;
{
  char *new;
  new = (char *) malloc(strlen(s) + 1);
  strcpy(new,s);
  return(new);
}

/*
 * enter state name into the hash table
 *
 */
enter(name,svalue)
char *name;
int svalue;
{
  int h;
  struct sym *cur;
  if (lkup(name) != -1) {
	fprintf(stderr,"state %s appears twice...\n");
	exit(1); }
  h = hash(name);
  cur = (struct sym *)malloc(sizeof (struct sym));
  cur->name = copy(name);
  cur->val = svalue;
  cur->hnxt = htab[h];
  htab[h] = cur;
}

/*
 * find name in the symbol table, return its value.  Returns -1
 * if not found.
 *
 */
lkup(name)
char *name;
{
  struct sym *cur;
  for (cur = htab[hash(name)]; cur != NULL; cur = cur->hnxt)
	if (strcmp(cur->name,name) == 0) return(cur->val);
  return(-1);
}
