From bacchus.pa.dec.com!decwrl!apple!usc!cs.utexas.edu!uunet!allbery Sat Jun 16 11:53:40 PDT 1990
Article 1647 of comp.sources.misc:
Path: bacchus.pa.dec.com!decwrl!apple!usc!cs.utexas.edu!uunet!allbery
From: shankar@hpclscu.cup.hp.com (Shankar Unni)
Newsgroups: comp.sources.misc
Subject: v13i052: Skeleton Parser and Lexer for ANSI C
Message-ID: <93339@uunet.UU.NET>
Date: 15 Jun 90 23:02:07 GMT
Sender: allbery@uunet.UU.NET
Lines: 1310
Approved: allbery@uunet.UU.NET (Brandon S. Allbery - comp.sources.misc)

Posting-number: Volume 13, Issue 52
Submitted-by: shankar@hpclscu.cup.hp.com (Shankar Unni)
Archive-name: ansi-c_su/part01

The following shar file is a set of source files (and test cases) for a
small, portable parser and lexer for ANSI C. This stuff originally came to
me from Vick Khera (@CMU), and I have beefed it up to handle typedef's
properly, and do some rudimentary line-control.

If you have any enhancements, bug-fixes or requests, send it to me
(shankar%hpclscu@hpda.hp.com).


# This is a shell archive.  Remove anything before this line,
# then unpack it by saving it in a file and typing "sh file".
#
# Wrapped by Shankar Unni <shankar@hpclscu> on Wed Jun 13 19:35:25 1990
#
# This archive contains:
#	Makefile	README		TEST1.C		TEST2.C		
#	gram.y		main.c		misctypes.h	scan.l		
#	scanaux.c	scanaux.h	
#

LANG=""; export LANG
PATH=/bin:/usr/bin:$PATH; export PATH

echo x - Makefile
cat >Makefile <<'@EOF'
YFLAGS	= -d
CFLAGS	= -g
LFLAGS	=

SRC	= gram.y scan.l main.c scanaux.c misctypes.h scanaux.h
OBJ	= main.o gram.o scan.o scanaux.o
TESTS   = TEST1.C TEST2.C
BIN	= ansi_c

$(BIN)	:	$(OBJ)
	cc $(CFLAGS) $(OBJ) -o $(BIN)

scan.o	: y.tab.h

clean	:
	rm -f y.tab.h y.output *.o

test: ansi_c $(TESTS)
	for fn in $(TESTS); do echo " "; echo $$fn: ; ansi_c < $$fn ; done

shar: CGRAM.SHAR

CGRAM.SHAR: README Makefile $(SRC) $(TESTS)
	shar -c README Makefile $(SRC) $(TESTS) > $@
@EOF

chmod 664 Makefile

echo x - README
cat >README <<'@EOF'
This grammar implements the latest ANSI C grammar. I'm not sure if there
are any omissions. This stuff came to me from outside HP (via Vick Khera
of CMU), and I have sort of fixed it up to conform to the latest draft of
the standard (Dec 88). I'm not sure if I missed out on anything..

I added all the typedef-handling code (scanaux.c). It can handle nested
re-declarations of typedefs and all that jazz.

Notes:

1. To make the parser, type "make". This should produce a program file
   called "ansi_c".

2. The scanner recognizes the cpp line specificiers of the form
   "# <number> [ <filename> ]", and sets internal variables accordingly.
   Any other line starting with "#" is ignored (this includes pragmas).
   If you need to do something with these, change the function line_number.

3. By default, the parse is silent. However, if the "ansi_c" skeleton is
   run with the "-L" option, then the input is echoed to the output. In
   general, if you want the input echoed, set the global variable "input_echo".

4. There are a couple of test cases to make sure that ansi_c compiled
   correctly. After making the parser, try "make test".

----
Shankar.
shankar%hpclscu@hpda.hp.com

P.S. If you do make any improvements to this, I'd appreciate a copy of the
changes.
@EOF

chmod 664 README

echo x - TEST1.C
cat >TEST1.C <<'@EOF'
extern int fum(const char *);
typedef int FOO;
int BAR;

FOO junk;

func1()
{
    char *FOO;
    typedef char *BAR;
    BAR junk;

    FOO = 0;
}

struct {
    int FOO;
    int BAR;
    struct {
	FOO junk;
    } junk;
} xxx;

struct {
    FOO BAR;
} yyy;

func2()
{
    FOO junk;

    BAR = 0;
}
@EOF

chmod 664 TEST1.C

echo x - TEST2.C
cat >TEST2.C <<'@EOF'
struct foo {int a;};
typedef int foo;
struct foo bar;
foo fum;
int foo();
foo bar;
@EOF

chmod 664 TEST2.C

echo x - gram.y
cat >gram.y <<'@EOF'
%{
#include "misctypes.h"
#include "scanaux.h"
extern char yytext[];
extern int yyleng;
%}
%token IDENTIFIER CONSTANT STRING_LITERAL SIZEOF
%token PTR_OP INC_OP DEC_OP LEFT_OP RIGHT_OP LE_OP GE_OP EQ_OP NE_OP
%token AND_OP OR_OP MUL_ASSIGN DIV_ASSIGN MOD_ASSIGN ADD_ASSIGN
%token SUB_ASSIGN LEFT_ASSIGN RIGHT_ASSIGN AND_ASSIGN
%token XOR_ASSIGN OR_ASSIGN TYPE_NAME

%token TYPEDEF EXTERN STATIC AUTO REGISTER
%token CHAR SHORT INT LONG SIGNED UNSIGNED FLOAT DOUBLE CONST VOLATILE VOID
%token STRUCT UNION ENUM ELLIPSIS

%token CASE DEFAULT IF ELSE SWITCH WHILE DO FOR GOTO CONTINUE BREAK RETURN

%start translation_unit
%%

primary_expr
	: identifier
	| CONSTANT
	| STRING_LITERAL
	| '(' expr ')'
	;

postfix_expr
	: primary_expr
	| postfix_expr '[' expr ']'
	| postfix_expr '(' ')'
	| postfix_expr '(' argument_expr_list ')'
	| postfix_expr '.' identifier
	| postfix_expr PTR_OP identifier
	| postfix_expr INC_OP
	| postfix_expr DEC_OP
	;

argument_expr_list
	: assignment_expr
	| argument_expr_list ',' assignment_expr
	;

unary_expr
	: postfix_expr
	| INC_OP unary_expr
	| DEC_OP unary_expr
	| unary_operator cast_expr
	| SIZEOF unary_expr
	| SIZEOF '(' type_name ')'
	;

unary_operator
	: '&'
	| '*'
	| '+'
	| '-'
	| '~'
	| '!'
	;

cast_expr
	: unary_expr
	| '(' type_name ')' cast_expr
	;

multiplicative_expr
	: cast_expr
	| multiplicative_expr '*' cast_expr
	| multiplicative_expr '/' cast_expr
	| multiplicative_expr '%' cast_expr
	;

additive_expr
	: multiplicative_expr
	| additive_expr '+' multiplicative_expr
	| additive_expr '-' multiplicative_expr
	;

shift_expr
	: additive_expr
	| shift_expr LEFT_OP additive_expr
	| shift_expr RIGHT_OP additive_expr
	;

relational_expr
	: shift_expr
	| relational_expr '<' shift_expr
	| relational_expr '>' shift_expr
	| relational_expr LE_OP shift_expr
	| relational_expr GE_OP shift_expr
	;

equality_expr
	: relational_expr
	| equality_expr EQ_OP relational_expr
	| equality_expr NE_OP relational_expr
	;

and_expr
	: equality_expr
	| and_expr '&' equality_expr
	;

exclusive_or_expr
	: and_expr
	| exclusive_or_expr '^' and_expr
	;

inclusive_or_expr
	: exclusive_or_expr
	| inclusive_or_expr '|' exclusive_or_expr
	;

logical_and_expr
	: inclusive_or_expr
	| logical_and_expr AND_OP inclusive_or_expr
	;

logical_or_expr
	: logical_and_expr
	| logical_or_expr OR_OP logical_and_expr
	;

conditional_expr
	: logical_or_expr
	| logical_or_expr '?' expr ':' conditional_expr
	;

assignment_expr
	: conditional_expr
	| unary_expr assignment_operator assignment_expr
	;

assignment_operator
	: '='
	| MUL_ASSIGN
	| DIV_ASSIGN
	| MOD_ASSIGN
	| ADD_ASSIGN
	| SUB_ASSIGN
	| LEFT_ASSIGN
	| RIGHT_ASSIGN
	| AND_ASSIGN
	| XOR_ASSIGN
	| OR_ASSIGN
	;

expr
	: assignment_expr
	| expr ',' assignment_expr
	;

constant_expr
	: conditional_expr
	;

declaration
	: declaration_specifiers ';'
	  {reset_in_typedef(); set_typedef_recognition(); }
	| declaration_specifiers init_declarator_list ';'
	  {reset_in_typedef(); set_typedef_recognition(); }
	;

declaration_specifiers
	: storage_class_specifier
	| storage_class_specifier declaration_specifiers
	| type_specifier
	| type_specifier declaration_specifiers
	| type_qualifier
	| type_qualifier declaration_specifiers
	;

init_declarator_list
	: init_declarator
	| init_declarator_list ',' init_declarator
	;

init_declarator
	: declarator
	| declarator '=' initializer
	;

storage_class_specifier
	: TYPEDEF { set_in_typedef(); }
	| EXTERN
	| STATIC
	| AUTO
	| REGISTER
	;

type_specifier
	: { reset_typedef_recognition(); } type_specifier2
	;

type_specifier2
	: VOID
	| CHAR
	| SHORT
	| INT
	| LONG
	| FLOAT
	| DOUBLE
	| SIGNED
	| UNSIGNED
	| struct_or_union_specifier
	| enum_specifier
	| typedef_name
	;

struct_or_union_specifier
	: struct_or_union identifier struct_body
	| struct_or_union struct_body
	| struct_or_union identifier
	;

struct_body
	: { push_in_memberlist();
	    push_in_typedef();
	    set_in_memberlist();
	    reset_in_typedef(); 
	    set_typedef_recognition(); }
	  '{' struct_declaration_list '}'
	  { reset_typedef_recognition();
	    pop_in_typedef();
	    pop_in_memberlist(); }

struct_or_union
	: STRUCT { reset_typedef_recognition(); }
	| UNION { reset_typedef_recognition(); }
	;

struct_declaration_list
	: struct_declaration
	| struct_declaration_list struct_declaration
	;

struct_declaration
	: { set_typedef_recognition(); }
	  struct_declaration2
	;

struct_declaration2
	: specifier_qualifier_list struct_declarator_list ';'
	;

specifier_qualifier_list
	: type_specifier
	| type_specifier specifier_qualifier_list
	| type_qualifier
	| type_qualifier specifier_qualifier_list
	;

struct_declarator_list
	: struct_declarator
	| struct_declarator_list ',' struct_declarator
	;

struct_declarator
	: declarator
	| ':' constant_expr
	| declarator ':' constant_expr
	;

enum_specifier
	: enum_head '{' enumerator_list '}'
	| enum_head identifier '{' enumerator_list '}'
	| enum_head identifier
	;

enum_head
	: ENUM { reset_typedef_recognition(); }
	;

enumerator_list
	: enumerator
	| enumerator_list ',' enumerator
	;

enumerator
	: identifier
	| identifier '=' constant_expr
	;

type_qualifier
	: CONST
	| VOLATILE
	;

declarator
	: direct_declarator
	| pointer direct_declarator
	;

direct_declarator
	: identifier { enter_tdname (yytext, yyleng); }
	| '(' declarator ')'
	| direct_declarator '[' ']'
	| direct_declarator '[' constant_expr ']'
	| direct_declarator '(' ')'
	| direct_declarator '(' parameter_type_list ')'
	| direct_declarator '(' identifier_list ')'
	;

pointer
	: '*'
	| '*' type_qualifier_list
	| '*' pointer
	| '*' type_qualifier_list pointer
	;

type_qualifier_list
	: type_qualifier
	| type_qualifier_list type_qualifier
	;

identifier_list
	: identifier
	| identifier_list ',' identifier
	;

parameter_type_list
	: { push_in_typedef();
	    set_typedef_recognition();
	    reset_in_typedef(); }
	  parameter_type_list2
	  { pop_in_typedef();
	    reset_typedef_recognition(); }

parameter_type_list2
	: parameter_list
	| parameter_list ',' ELLIPSIS
	;

parameter_list
	: parameter_declaration
	| parameter_list ',' parameter_declaration
	;

parameter_declaration
	: declaration_specifiers declarator
	| declaration_specifiers
	| declaration_specifiers abstract_declarator
	;

type_name
	: specifier_qualifier_list
	| specifier_qualifier_list abstract_declarator
	;

abstract_declarator
	: pointer
	| direct_abstract_declarator
	| pointer direct_abstract_declarator
	;

direct_abstract_declarator
	: '(' abstract_declarator ')'
	| '[' ']'
	| '[' constant_expr ']'
	| direct_abstract_declarator '[' ']'
	| direct_abstract_declarator '[' constant_expr ']'
	| '(' ')'
	| '(' parameter_type_list ')'
	| direct_abstract_declarator '(' ')'
	| direct_abstract_declarator '(' parameter_type_list ')'
	;

typedef_name
	: TYPE_NAME
	;

initializer
	: assignment_expr
	| '{' initializer_list '}'
	| '{' initializer_list ',' '}'
	;

initializer_list
	: initializer
	| initializer_list ',' initializer
	;

statement
	: labeled_statement
	| compound_statement
	| expression_statement
	| selection_statement
	| iteration_statement
	| jump_statement
	;

labeled_statement
	: identifier ':' statement
	| CASE constant_expr ':' statement
	| DEFAULT ':' statement
	;

compound_statement
	: '{' cs_decl_list cs_stmt_list '}'
	;

cs_decl_list
	: { enter_TD_scope(); }
	  declaration_list
	  { exit_TD_scope(); }
	|
	;

cs_stmt_list
	: { reset_typedef_recognition(); }
	  statement_list
	|
	;

declaration_list
	: declaration
	| declaration_list declaration
	;

statement_list
	: statement
	| statement_list statement
	;

expression_statement
	: ';'
	| expr ';'
	;

selection_statement
	: IF '(' expr ')' statement
	| IF '(' expr ')' statement ELSE statement
	| SWITCH '(' expr ')' statement
	;

iteration_statement
	: WHILE '(' expr ')' statement
	| DO statement WHILE '(' expr ')' ';'
	| FOR '(' ';' ';' ')' statement
	| FOR '(' ';' ';' expr ')' statement
	| FOR '(' ';' expr ';' ')' statement
	| FOR '(' ';' expr ';' expr ')' statement
	| FOR '(' expr ';' ';' ')' statement
	| FOR '(' expr ';' ';' expr ')' statement
	| FOR '(' expr ';' expr ';' ')' statement
	| FOR '(' expr ';' expr ';' expr ')' statement
	;

jump_statement
	: GOTO identifier ';'
	| CONTINUE ';'
	| BREAK ';'
	| RETURN ';'
	| RETURN expr ';'
	;

translation_unit
	: external_declaration
	| translation_unit external_declaration
	;

external_declaration
	: { set_typedef_recognition(); reset_in_typedef(); }
	  external_declaration2
	;

external_declaration2
	: function_definition
	| declaration
	;

function_definition
	: declarator function_body
	| declaration_specifiers declarator function_body
	;

function_body
	: compound_statement
	| declaration_list  compound_statement
	;

identifier
	: IDENTIFIER
	;
%%

#include <stdio.h>

extern int yycolumn, yylineno;
extern unsigned char yyfilename[];

yyerror(s)
char *s;
{
	fflush(stdout);
	if (input_echo) {
	    printf("\n%*s\n", yycolumn, "^");
	}
	printf ("%s, line %d: %s\n", yyfilename, yylineno, s);
}
@EOF

chmod 664 gram.y

echo x - main.c
cat >main.c <<'@EOF'
#include "scanaux.h"

int input_echo = 0;

main(argc, argv)
int argc;
char **argv;
{
	int yyparse();

	if ((argc >=2) && (strcmp (argv[1], "-L") == 0)) {
		input_echo = 1;
	}
	init_scanner();
	return(yyparse());
}
@EOF

chmod 664 main.c

echo x - misctypes.h
cat >misctypes.h <<'@EOF'
#define TRUE 1
#define FALSE 0
@EOF

chmod 664 misctypes.h

echo x - scan.l
cat >scan.l <<'@EOF'
D			[0-9]
L			[a-zA-Z_]
H			[a-fA-F0-9]
E			[Ee][+-]?{D}+
FS			(f|F|l|L)
IS			(u|U|l|L)*

%{
#include <stdio.h>
#include "scanaux.h"
#include "y.tab.h"

#undef input
extern int input();

unsigned char yyfilename[256] = "stdin";
%}

%%

"#"			{ line_number(); }
"/*"			{ comment(); }

"auto"			{ return(AUTO); }
"break"			{ return(BREAK); }
"case"			{ return(CASE); }
"char"			{ return(CHAR); }
"const"			{ return(CONST); }
"continue"		{ return(CONTINUE); }
"default"		{ return(DEFAULT); }
"do"			{ return(DO); }
"double"		{ return(DOUBLE); }
"else"			{ return(ELSE); }
"enum"			{ return(ENUM); }
"extern"		{ return(EXTERN); }
"float"			{ return(FLOAT); }
"for"			{ return(FOR); }
"goto"			{ return(GOTO); }
"if"			{ return(IF); }
"int"			{ return(INT); }
"long"			{ return(LONG); }
"register"		{ return(REGISTER); }
"return"		{ return(RETURN); }
"short"			{ return(SHORT); }
"signed"		{ return(SIGNED); }
"sizeof"		{ return(SIZEOF); }
"static"		{ return(STATIC); }
"struct"		{ return(STRUCT); }
"switch"		{ return(SWITCH); }
"typedef"		{ return(TYPEDEF); }
"union"			{ return(UNION); }
"unsigned"		{ return(UNSIGNED); }
"void"			{ return(VOID); }
"volatile"		{ return(VOLATILE); }
"while"			{ return(WHILE); }

{L}({L}|{D})*		{ return(check_type()); }

0[xX]{H}+{IS}?		{ return(CONSTANT); }
0[xX]{H}+{IS}?		{ return(CONSTANT); }
0{D}+{IS}?		{ return(CONSTANT); }
0{D}+{IS}?		{ return(CONSTANT); }
{D}+{IS}?		{ return(CONSTANT); }
{D}+{IS}?		{ return(CONSTANT); }
'(\\.|[^\\'])+'		{ return(CONSTANT); }

{D}+{E}{FS}?		{ return(CONSTANT); }
{D}*"."{D}+({E})?{FS}?	{ return(CONSTANT); }
{D}+"."{D}*({E})?{FS}?	{ return(CONSTANT); }

\"(\\.|[^\\"])*\"	{ return(STRING_LITERAL); }

"..."                   { return(ELLIPSIS); }
">>="			{ return(RIGHT_ASSIGN); }
"<<="			{ return(LEFT_ASSIGN); }
"+="			{ return(ADD_ASSIGN); }
"-="			{ return(SUB_ASSIGN); }
"*="			{ return(MUL_ASSIGN); }
"/="			{ return(DIV_ASSIGN); }
"%="			{ return(MOD_ASSIGN); }
"&="			{ return(AND_ASSIGN); }
"^="			{ return(XOR_ASSIGN); }
"|="			{ return(OR_ASSIGN); }
">>"			{ return(RIGHT_OP); }
"<<"			{ return(LEFT_OP); }
"++"			{ return(INC_OP); }
"--"			{ return(DEC_OP); }
"->"			{ return(PTR_OP); }
"&&"			{ return(AND_OP); }
"||"			{ return(OR_OP); }
"<="			{ return(LE_OP); }
">="			{ return(GE_OP); }
"=="			{ return(EQ_OP); }
"!="			{ return(NE_OP); }
";"			{ return(';'); }
"{"			{ return('{'); }
"}"			{ return('}'); }
","			{ return(','); }
":"			{ return(':'); }
"="			{ return('='); }
"("			{ return('('); }
")"			{ return(')'); }
"["			{ return('['); }
"]"			{ return(']'); }
"."			{ return('.'); }
"&"			{ return('&'); }
"!"			{ return('!'); }
"~"			{ return('~'); }
"-"			{ return('-'); }
"+"			{ return('+'); }
"*"			{ return('*'); }
"/"			{ return('/'); }
"%"			{ return('%'); }
"<"			{ return('<'); }
">"			{ return('>'); }
"^"			{ return('^'); }
"|"			{ return('|'); }
"?"			{ return('?'); }

[ \t\v\n\f]		{ }
.			{ /* ignore bad characters */ }

%%

int yycolumn = 0;

yywrap()
{
	return(1);
}

int input()
{
	if (yysptr > yysbuf) {
		/* retrieve pushed-back character */
		yytchar = *--yysptr;
	} else {
		yytchar = getc(yyin);
		if (yytchar == EOF) {
			return 0;
		} else if (input_echo) {
			output(yytchar);
		}
	}

	/* count yycolumn and yylineno */
	if (yytchar == '\n') {
		yylineno++;
		yycolumn = 0;
	} else if (yytchar == '\t') {
		yycolumn += 8 - (yycolumn % 8);
	} else {
		yycolumn++;
	}

	return yytchar;
}

comment()
{
	char c, c1;

loop:
	/* we have already seen a / and a * */
	while ((c = input()) != '*' && c != 0) /* NOTHING */;

	if (c != 0 && (c1 = input()) != '/' )
	{
		unput(c1);
		goto loop;
	}
}

#define READWHILE(cond)	while(cond) c = input();
line_number()
{
	char c;
	/* skip spaces */
	c = input();
	READWHILE ((c == ' ' || c == '\t'));

	if (c >= '0' && c <= '9') {
		/* line number specification */
		int line_num = 0;
		while (c >= '0' && c <= '9') {
			line_num = line_num * 10 + c - '0';
			c = input();
		}
		if (line_num > 0)
		    yylineno = line_num - 1;
		READWHILE ((c == ' ' || c == '\t'));
		if (c == '"') {
			unsigned char *yf = yyfilename;
			do {
				*yf++ = c;
				c = input();
			} while (c != '"');
			*yf++ = c;
			*yf = '\0';
		}
	}

	/* flush rest of line */
	READWHILE ((c != '\n'));
}

int check_type()
{

    if (lookup_tdname(yytext, yyleng))
	return (TYPE_NAME);
    else
	return (IDENTIFIER);
}
@EOF

chmod 664 scan.l

echo x - scanaux.c
cat >scanaux.c <<'@EOF'
#include <assert.h>
#include <stdio.h>
#include "misctypes.h"

#define TYPEDEF_UNKNOWN	-1
#define TYPEDEF_FALSE	0
#define TYPEDEF_TRUE	1

static int in_typedef = FALSE;
static int typedef_recognition = TRUE;
static int in_memberlist = FALSE;

extern char *malloc();
extern char *realloc();
extern char *calloc();

/* TSS types */

#define TSS_INCR 16
static struct typedef_state_stack {
    int TOS;
    int MAX;
    int *values;
} TSS, RDS;
/*
 * TSS is used to push values of state variables like in_typedef and
 *      typedef_recognition
 *
 * RDS is used to keep track of identifiers re-defined in inner scopes.
 */

/* Typedef Table types */
static struct TypedefTable {
    char **tab;
    int cur;
    int max;
} TDT;

/* ID Hash Tbl types */

#define IDHASH_INCR 32
#define HASHSIZE 509

struct hashbucket {
    int nxt_entry;
    int max_entry;
    int entry[1];
};

static struct hashbucket *HTBL [HASHSIZE];	/* hopefully zeros? */

/* Char pool */

#define CHARBLOCKSIZE 1024
struct charpool_block {
    int next_ch;
    char chars[CHARBLOCKSIZE];
};

#define CHARPOOL_INCR 128
static struct charpool {
    struct charpool_block **char_pool;
    int maxind;
    int curind;
} CP;

static char *recalloc (ptr, oldnumelems, newnumelems, elemsize)
char *ptr;
int oldnumelems;
int newnumelems;
int elemsize;
{
    char *t = calloc (newnumelems, elemsize);
    memcpy (t, ptr, oldnumelems * elemsize);
    free (ptr);
    return t;
}

void init_scanner()
{
    TSS.TOS = -1;
    TSS.MAX = TSS_INCR;
    TSS.values = (int *)malloc (TSS_INCR * sizeof(int *));

    RDS.TOS = -1;
    RDS.MAX = TSS_INCR;
    RDS.values = (int *)malloc (TSS_INCR * sizeof(int *));

    CP.char_pool = (struct charpool_block **)
		    malloc (CHARPOOL_INCR * sizeof (struct charpool_block *));
    CP.char_pool[0] = (struct charpool_block *)
				      malloc (sizeof (struct charpool_block));
    CP.char_pool[0]->next_ch = 0;
    CP.maxind = CHARPOOL_INCR;
    CP.curind = 0;

    TDT.tab = (char **) calloc (CHARPOOL_INCR, sizeof (char *));
    TDT.cur = -1;
    TDT.max = CHARPOOL_INCR;
}

static void push_TSS (val)
int val;
{
    if (++TSS.TOS > TSS.MAX) {
	TSS.MAX += TSS_INCR;
	TSS.values = (int *) realloc (TSS.values, TSS.MAX * sizeof(int *));
	if (! TSS.values) {
	    fprintf (stderr, "realloc failed in push_TSS\n");
	    exit(1);
	}
    }
    TSS.values[TSS.TOS] = val;
}

static int pop_TSS ()
{
    if (TSS.TOS < 0) {
	fprintf (stderr, "TSS underflow\n");
	exit(1);
    }
    return (TSS.values[TSS.TOS--]);
}

static void push_RDS (val)
int val;
{
    if (++RDS.TOS > RDS.MAX) {
	RDS.MAX += TSS_INCR;
	RDS.values = (int *) realloc (RDS.values, RDS.MAX * sizeof(int *));
	if (! RDS.values) {
	    fprintf (stderr, "realloc failed in push_RDS\n");
	    exit(1);
	}
    }
    RDS.values[RDS.TOS] = val;
}

static int pop_RDS ()
{
    if (RDS.TOS < 0) {
	fprintf (stderr, "RDS underflow\n");
	exit(1);
    }
    return (RDS.values[RDS.TOS--]);
}

int IDhash (text, leng)
char *text;
int leng;
{
    short sum = 0, temp;

    while (leng > 0) {
	temp = *text++;
	temp <<= 8;
	temp |= *text++;
	leng -= 2;
	sum ^= temp;
    }
    return (sum %= HASHSIZE);
}

static int add_charpool (text, leng)
{
    int start;
    struct charpool_block *tcpb;

    if ((CHARBLOCKSIZE - CP.char_pool[CP.curind]->next_ch) < (leng+1)) {
	if (CP.curind++ > CP.maxind) {
	    CP.maxind += CHARPOOL_INCR;
	    CP.char_pool = (struct charpool_block **) realloc (CP.char_pool,
			      CP.maxind * sizeof (struct charpool_block *));
	}
	CP.char_pool[CP.curind] = (struct charpool_block *)
				      malloc (sizeof (struct charpool_block));
	CP.char_pool[CP.curind]->next_ch = 0;
    }

    tcpb = CP.char_pool[CP.curind];
    start = CP.curind * CHARBLOCKSIZE + tcpb->next_ch;
    memcpy (tcpb->chars + tcpb->next_ch, text, leng);
    tcpb->next_ch += (leng + 1);
    tcpb->chars[tcpb->next_ch - 1] = '\0';
    return start;
}

static int cpcmp (text, leng, index)
char *text;
int leng;
int index;
{
    int high = index / CHARBLOCKSIZE;
    int low = index % CHARBLOCKSIZE;
    char *start = CP.char_pool[high]->chars + low;

    return (memcmp (text, start, leng));
}

static int enterIDhash (text, leng)
char *text;
int leng;
{
    int i;
    int hval = IDhash (text, leng);
    struct hashbucket *htmp;

    /* search in hash tbl */
    if (!HTBL[hval]) {
	HTBL[hval] = (struct hashbucket *)
				     malloc ((IDHASH_INCR + 2) * sizeof(int));
	HTBL[hval]->nxt_entry = 0;
	HTBL[hval]->max_entry = IDHASH_INCR;
    }

    htmp = HTBL[hval];
    for (i = 0; i < htmp->nxt_entry; i++) {
	if (!cpcmp (text, leng, htmp->entry[i])) {
	    return htmp->entry[i];
	}
    }
    
    if (htmp->nxt_entry > htmp->max_entry) {
	htmp->max_entry += IDHASH_INCR;
	HTBL[hval] = (struct hashbucket *)
			realloc (htmp, (htmp->max_entry + 2) * sizeof(int));
	htmp = HTBL[hval];
    }
    htmp->entry[htmp->nxt_entry++] = add_charpool (text, leng);
}

static void set_typedef (index, val)
int index;
int val;
{
    int low, high;

    high = index / CHARBLOCKSIZE;
    low = index % CHARBLOCKSIZE;

    if (high >  TDT.cur) {
	assert (high == (TDT.cur + 1));
	if (++TDT.cur > TDT.max) {
	    TDT.tab = (char **) recalloc (TDT.tab, TDT.max,
				    TDT.max+CHARPOOL_INCR, sizeof (char *));
	    TDT.max += CHARPOOL_INCR;
	}
    }
    if (! TDT.tab[high]) {
	TDT.tab[high] = (char *) malloc (CHARBLOCKSIZE * sizeof(char));
	memset (TDT.tab[high], TYPEDEF_UNKNOWN, CHARBLOCKSIZE);
    }

    TDT.tab[high][low] = val;
}

static int lookup_typedef (index)
int index;
{
    int low, high;

    high = index / CHARBLOCKSIZE;
    low = index % CHARBLOCKSIZE;
    return (TDT.tab[high] ? TDT.tab[high][low] : TYPEDEF_UNKNOWN);
}

int lookup_tdname(text, leng)
char *text;
int leng;
{
    int IDindex;

    if (typedef_recognition) {
	IDindex = enterIDhash (text, leng);
	return (lookup_typedef(IDindex) == TYPEDEF_TRUE);
    } else {
	return FALSE;
    }
}

void enter_tdname(text, leng)
char *text;
int leng;
{
    int IDindex, oldval;

    IDindex = enterIDhash (text, leng);
    oldval = lookup_typedef (IDindex);
    if (in_typedef) {
	if (RDS.TOS >= 0 && oldval == TYPEDEF_FALSE)
		push_RDS (IDindex);
	set_typedef (IDindex, TYPEDEF_TRUE);
    } else if (! in_memberlist) {
	if (RDS.TOS >= 0 && oldval == TYPEDEF_TRUE)
		push_RDS (IDindex);
	set_typedef (IDindex, TYPEDEF_FALSE);
    }
}

void enter_TD_scope()
{
    push_RDS (-1);
}

void exit_TD_scope()
{
    int k;

    while ((k = pop_RDS()) != -1) {
	if (lookup_typedef(k)) {
	    set_typedef (k, FALSE);
	} else {
	    set_typedef (k, TRUE);
	}
    }
}

void set_in_memberlist()
{
    in_memberlist = TRUE;
}

void reset_in_memberlist()
{
    in_memberlist = FALSE;
}

void push_in_memberlist()
{
    push_TSS(in_memberlist);
}

void pop_in_memberlist()
{
    in_memberlist = pop_TSS();
}

void set_in_typedef()
{
    in_typedef = TRUE;
}

void reset_in_typedef()
{
    in_typedef = FALSE;
}

void push_in_typedef()
{
    push_TSS(in_typedef);
}

void pop_in_typedef()
{
    in_typedef = pop_TSS();
}

void set_typedef_recognition()
{
    typedef_recognition = TRUE;
}

void reset_typedef_recognition()
{
    typedef_recognition = FALSE;
}
@EOF

chmod 664 scanaux.c

echo x - scanaux.h
cat >scanaux.h <<'@EOF'
void init_scanner();
void push_TSS ();
int pop_TSS ();

extern void set_in_memberlist(),
	    reset_in_memberlist(),
	    push_in_memberlist(),
	    pop_in_memberlist(),
	    set_in_typedef(),
	    reset_in_typedef(),
	    push_in_typedef(),
	    pop_in_typedef(),
	    set_typedef_recognition(),
	    reset_typedef_recognition();

extern int  lookup_tdname();

extern int  input_echo;
@EOF

chmod 664 scanaux.h

exit 0

-----
Shankar Unni                                   E-Mail: 
Hewlett-Packard California Language Lab.     Internet: shankar@hpda.hp.com
Phone : (408) 447-5797                           UUCP: ...!hplabs!hpda!shankar

DISCLAIMER:
This response does not represent the official position of, or statement by,
the Hewlett-Packard Company.  The above data is provided for informational
purposes only.  It is supplied without warranty of any kind.


