/*
 * Date: March 17 1985			Author: Arch D. Robison
 *						Dept. of Computer Science
 *						University of Illinois
 *						Urbana-Champaign
 *
 *					USENET:	robison@uiucdcs
 *
 * Hash8 copies stdin to stdout, while replacing certain identifiers.
 * lint can be converted to accept long identifiers by hacking in hash8
 * between /lib/cpp and /usr/lib/lint/lint appropriately.
 *
 * There are three ways to call hash8:
 *
 *     hash8 encode table
 *         Map long identifiers and those beginning with Q 
 *	   into short identifiers Q%d
 *
 *     hash8 decode table
 *         Map short identifiers Q%d into their long equivalents
 *
 *     hash8 _decode table
 *         Map short identifiers _Q%d into their long equivalents
 * 	   This is used to decode the linker's error messages
 *
 * The 'table' argument is the file name for the identifier map.
 * The 'encode' calls will either create or expand the table.
 *
 * Typically, the encode option is used to preprocess input to the compiler
 * or lint, and the decode option is used to decode error messages from
 * the compiler.
 *
 * The constant HASHBITS may need to be changed.  It is the base two
 * log of the number of distinct long identifiers which may be found.
 * E.g. the value of 12 allows for 4096 long identifiers.
 *
 * Hash8 has not been thoroughly tested, though it can translate itself
 * correctly.  Note that itself contains all sorts of quotes within quotes.
 */
#include <stdio.h>
#include <ctype.h>

/*
 * Reserved is an array of words which we don't want modified, such
 * as the key word "register", or system functions longer than 7 characters.
 * Feel free to add any others, though remember to clear your hash table
 * files after recompiling.
 */
char **Reserved = NULL;
int Res_max = 0;
int Res_count = 0;
char *Def_reserved[] = {
   "continue",
   "register",
   "unsigned"
};

extern char *malloc (), *strcpy ();

#define SIGCHARS 7		/* significant characters in identifier */
#define HASHBITS 12		/* hash table address size */
#define HASHLIMIT (1<<HASHBITS)
#define HASHMASK (HASHLIMIT-1)
#define PAGESIZE 4096		/* Memory allocation pagesize */ 
#define MAXLINE 1024		/* Maximum length of a source line allowed */

#define W_SHORTEN  0		/* Identifier classes */
#define W_NORMAL   1
#define W_RESERVED 2
#define W_Valid(N) ((N) >= 0 && (N) <= 2)

/*
 * HashTab
 *
 * The identifier map is a hash table.  The table uses open addressing
 * with linear probing for collision resolution.  Identifiers in the
 * table are mapped into Qxxx, where xxx is the table address in hex.
 *
 * The hash table is effectively declared:
 *
 *      char *HashTab[HASHLIMIT];
 *
 * though the memory allocation is done with malloc.  Each empty hash table
 * item is NULL.  Full entries point to an identifier.  The first byte of
 * the identifier classifies the identifier:
 *
 *      W_NORMAL - don't modify this identifier
 *	W_SHORTEN - shorten this identifier
 *	W_RESERVE - reserved word 
 */
char **HashTab;
int HashSize = 0;    /* Number of elements in hash table 		    */
int NewTab;          /* Flag which is set to true if hash table is modified */

char *StrFree;	     /* Pointer to base of free string area                 */
int StrLeft = 0;     /* Number of characters left in free string area       */

/*
 * Insert
 * 
 * Insert identifier in hash table
 *
 * In
 *      k = index into hash table
 *      S = identifier
 *	Class = class of identifier (W_NORMAL,W_SHORTEN,W_RESERVED)
 */
void Insert (k,S,Class)
   int k;
   char *S;
   int Class;
   {
      register int L;

      NewTab = 1;
      HashSize++;
      if ((StrLeft -= (L=2+strlen (S))) < 0)
         StrFree = malloc (StrLeft=PAGESIZE);
      *(HashTab[k] = StrFree) = Class;
      strcpy (StrFree+1, S);
      StrFree += L;
      StrLeft -= L;
   }			   

/*
 * LookUp
 *
 * Look up an identifer in the identifier hash table.
 * If not found, then insert it in the table.
 *
 * The hashing uses open addressing with linear probing.
 * The algorithm is a blue-light special, a better hash function
 * (double hashing?) should be used.
 * 
 * In
 *      S = identifier (must be at least seven characters if Duplicate == 0)
 *      Class = identifier class (W_NORMAL,W_SHORTEN,W_RESERVED)
 * Out
 *      result = index into hash table 
 */
int LookUp (S,Class)
   char *S;
   int Class;
   {
      register int k,j;
      register char *T;

      if (Class != W_SHORTEN) {

         /* Hash first seven characters of identifier */ 
         for (j=0,k=0,T=S; j<SIGCHARS; j++, k+= *T++) k = (k<<1) + k;

         /* 7-character search for identifier in table */
         for (j=k; HashTab[j&=HASHMASK] != NULL; j++) 
            if (!strncmp (HashTab[j]+1,S,SIGCHARS)) 
               if (!strcmp (HashTab[j]+1,S)) return j;
	       else {
		  Class = W_SHORTEN;
		  break;
	       }
	 /* The following test and assignment cause identifiers to be
	  * hashed even if they are the first long identifier.  This
	  * protects from truncation by the compiler.  Othewise, when
	  * you run adb you have to know which long id came first.
	  * Geoff Kuenning 11/8/86
	  */
         if (Class == W_NORMAL  &&  strlen (S) > SIGCHARS)
	    Class = W_SHORTEN;
      }

      if (Class == W_SHORTEN) {
	 /* 
	  * There is another identifier with the same 7-character prefix. 
	  * Hash the complete identifier and look it up in the table.
	  */
         for (j=k; *T; j+= *T++) j = (j<<1) + j;

	 /* all characters search for identifier in table */
	 for (; HashTab[j&=HASHMASK] != NULL; j++)
	       if (!strcmp (HashTab[j]+1,S)) return j;
      }

      /* Identifier was not found - insert it in hash table */
      Insert (j,S,Class);
      if (HashSize == HASHLIMIT) 
	 fprintf (stderr,"hash8: table overflow\n"), exit (1);
      return j;
   }

#define C_CODE 0	/* Defines for translator states */
#define S_QUOTE 1
#define D_QUOTE 2
#define COMMENT 3

#define ENCODE 0	/* Mode values for translator */
#define DECODE 1
#define _DECODE 2

/*
 * Translate
 *
 * Translate input stream with identifier map.
 *
 * This should have been written with lex.
 */
Translate (Mode) 
   int Mode;
   {
      register char C, *P, *Q;
      char S[MAXLINE];
      int k, state=C_CODE, IsQ;

      while (NULL != fgets (S,MAXLINE,stdin)) 
         for (P=S; C= *P; )
	    switch (state) {
	       case COMMENT:
	          putchar (*P++);
                  if (C == '*' && *P == '/') state=C_CODE, putchar (*P++);
		  break;
	       case S_QUOTE:
	       case D_QUOTE:
		  putchar (*P++);
		  switch (C) {
		     case '\'': if (state == S_QUOTE) state = C_CODE; break;
		     case '"' : if (state == D_QUOTE) state = C_CODE; break;
		     case '\\': putchar (*P++); break;
		     default: break; 
		  }
	          break;
	       
               case C_CODE:
		  if (isalpha (C) || C=='_') {
	             /* Beginning of identifier */
      	             for (Q=P; C= *Q, isalnum(C)||C=='_'; Q++);
	             *Q = '\0';
   	             switch (Mode) {

			case ENCODE: /* We are encoding C source */
		           IsQ = *P=='Q' && isdigit (P[1]);
   	                   if (Q-P <= SIGCHARS && !IsQ) 
   	                      fputs (P,stdout);
		           else {
			      k = LookUp (P,IsQ ? W_SHORTEN : W_NORMAL);
			      if (*HashTab[k] != W_SHORTEN) fputs (P,stdout);
		              else printf ("Q%d",k);
		           }
			   break;

			case _DECODE: /* We are decoding linker messages */
		           if (*P != '_') {
			      fputs (P,stdout);
			      break;
			   }
			   putchar (*P++);
			   /* continue on down to case DECODE */

			case DECODE: /* We are decoding error message */
   	                   if (*P=='Q' && isdigit (P[1])) { 
			      k=atoi(P+1);
			      if (!(k &~HASHMASK) && HashTab[k]!=NULL) 
				 P = HashTab[k] + 1;
			   }
		           fputs (P,stdout);
			   break;
		     }
   	             *(P=Q) = C;
   	          } else if (isdigit (C)) {
	             /* Skip number to avoid changing long numbers */
	             while (isalnum(*P)) putchar (*P++);
	          } else {
		     putchar (*P++);
		     switch (C) {
		        default: break;
			case '\'': state = S_QUOTE; break;
                        case '"' : state = D_QUOTE; break;
                        case '/' : if (*P != '*') continue;
			           state=COMMENT;
		        case '\\': putchar (*P++); break;
		     }
	          }
	       }
   }

/*
 * ReadTab
 *
 * Read the hash table.
 *
 * In
 *      Name = name of hash table file
 */
ReadTab (Name)
   char *Name;
   {
      FILE *Table;
      char S[MAXLINE];
      int k,L,Class;

      /* First record all words we don't want mangled in hash table */      
      for (k = 0;  k < sizeof (Def_reserved) / sizeof (char *);  k++)
         LookUp (Def_reserved[k], W_RESERVED);
      for (k = 0;  k < Res_count;  k++) 
         LookUp (Reserved[k],W_RESERVED);

      if (NULL == (Table = fopen (Name,"r"))) return;
      while (EOF != (L = fscanf (Table,"%d %d %s",&k,&Class,S))) 
	 if (L != 3 || k &~HASHMASK || !W_Valid (Class))
	    fprintf (stderr,"hash8 table error\n"),
	    exit (1);
         else Insert (k,S,Class);
      fclose (Table); 
      NewTab = 0;
   }

/*
 * WriteTab
 *
 * Write out the hash table
 *
 * In
 *      Name = name of hash table file
 */
WriteTab (Name)
   char *Name; 
   { 
      FILE *Table;
      int i;

      if (NULL == (Table = fopen (Name,"w"))) 
	 fprintf (stderr,"hash8: can't open hash table file '%s'\n",Name),
	 exit (1);
      for (i=0; i<HASHLIMIT; i++)
         if (HashTab[i] != NULL && *HashTab[i] != W_RESERVED) 
            fprintf (Table,"%d	%d	%s\n",i,*HashTab[i],HashTab[i]+1);
      fclose (Table); 
   }

main (argc,argv)
   int argc; char *argv[];
   {
      register char **h;
      int Mode;

      /*
       * Set up the reserved-word list.
       */
      while (argc > 3  &&  argv[1][0] == '-'  &&  argv[1][1] == 'r') {
	 argv[1] += 2;
	 if (argv[1][0] == '\0') {
	    argc--;
	    argv++;
	 }
	 if (Res_count == Res_max) {
	    Res_max += 5;
	    if (Reserved == NULL)
	       Reserved = (char **) malloc (5 * sizeof (char *));
	    else
	       Reserved = (char **)
	        realloc ((char *) Reserved, Res_max * sizeof (char *));
	 }
	 Reserved[Res_count] = argv[1];
	 Res_count++;
	 argc--;
	 argv++;
      }
      if (argc != 3) {
         fprintf (stderr,
           "usage: hash8 [-r reserved] ... (encode|[_]decode) table\n");
         exit (1);
      }

      /*
       * If either stdin or stdout is a tty, set both unbuffered, for use
       * in pipes.
       * Geoff Kuenning, 11/8/86
       */
      if (isatty (fileno (stdin))  ||  isatty (fileno (stdout))) {
	 setbuf (stdin, NULL);
	 setbuf (stdout, NULL);
      }
      HashTab = (char **) malloc ((sizeof (char*)) * (HASHLIMIT));
      for (h = HashTab+HASHLIMIT; --h > HashTab; ) *h = NULL;
     
      ReadTab(argv[2]); 
      
      if (!strcmp (argv[1],"encode")) Mode = ENCODE;
      else if (!strcmp (argv[1],"decode")) Mode = DECODE; 
      else if (!strcmp (argv[1],"_decode")) Mode = _DECODE; 
      else
         fprintf (stderr,"hash8: second arg must be 'encode' or 'decode'\n"),
	 exit (1);

      Translate (Mode);
      if (NewTab) WriteTab(argv[2]);
   }

