Subject: v06i051: Cross-reference for Yacc (yyref) Newsgroups: mod.sources Approved: rs@mirror.UUCP Submitted by: Cathy Taylor Mod.sources: Volume 6, Issue 51 Archive-name: yyref [ I changed the name to yyref, added a Makefile, and broke up the mail message I received into a README and a manpage. --r$ ] The following source forms the xref program for syntactically correct Yacc files. All tokens and non-terminals in the grammar are located, together with a list of the line numbers at which they occur. The output is a numbered listing of the Yacc grammar up to the second '%%' or EOF, followed by the xref table. Should a syntax error occur before normal termination, the grammar listing ends and as much of the table as has been constructed is printed. All comments /* .. */ , %{ .. %} and actions { .. } are ignored, although they appear in the listing. #!/bin/sh # This is a shell archive. Remove anything before this line, # then unpack it by saving it in a file and typing "sh file". # Contents: Makefile README yyref.1 yyref-grammer yyref-lex # yyref-line.h echo x - Makefile sed 's/^XX//' > "Makefile" <<'@//E*O*F Makefile//' XXCFLAGS= XXyyref: yyref-lex yyref-grammar XX lex yyref-lex XX yacc -vd yyref-grammar XX cc $(CFLAGS) y.tab.c -o yyref XXclean: XX rm -f lex.yy.c y.tab.? y.output yyref @//E*O*F Makefile// chmod u=rw,g=rw,o=rw Makefile echo x - README sed 's/^XX//' > "README" <<'@//E*O*F README//' XXA COUPLE OF NOTES ON CONFIGURATION PARAMETERS: XX(See the code section of yyref-grammar) XX Table data is stored in "table[]", which holds MAXIDENTS terminal/non- XXterminal names, each of length <= MAXCHARS. A total of MAXDEFS defining (rhs of XXrule) occurances can be held, and a total of MAXOCCS lhs occurances. Each XXidentifier which may be the start token is printed following the message held XXin "start_maybe[]" - likewise all possible tokens are accompanied by XX"token_maybe[]". All defining occurances are printed following XX"declared_at_mark[]", and all other occurances following "occurs_at_mark[]". @//E*O*F README// chmod u=rw,g=rw,o=rw README echo x - yyref.1 sed 's/^XX//' > "yyref.1" <<'@//E*O*F yyref.1//' XX.TH YYREF 1 LOCAL XX.SH NAME XXyyref \- generate cross\-reference for YACC input XX.SH SYNOPSIS XX.B yyref XX[ < inputfile ] XX.SH DESCRIPTION XX.I Yyref XXgenerates cross\-references for XX.IR yacc (1) XXinput files. XXIt reads source from standard input, and upon EOF or seeing the second XX.B %% XXthat marks the end of the rules portion, generates a cross\-reference XXlist of all parser tokens. XX.PP XXIn case of syntax errors in the input, XX.I yyref XXtries to generate as much information as it can. XX.PP XXThe output consists of a number listing of the header and rules part, XXfollowed by the cross\-reference: XX.RS XX.nf XX 1 : %{ XX 2 : # include XX 3 : %} XX 4 : XX 5 : XX 6 : /*******************************************************\ XX 7 : * * XX 8 : * Date : Fri Jul 4 00:50:04 BST 1986 * XX 9 : * * XX 10 : \*******************************************************/ XX 11 : XX 12 : %token IDENTIFIER START_TOKEN NUMBER XX 13 : XX 14 : %start small XX 15 : XX 16 : %% XX 17 : XX 18 : small XX 19 : : start middle XX 20 : { XX 21 : printf("\n\nMiddle"); XX 22 : yyclearin; XX 23 : return(0); XX 24 : } XX 25 : end postamble XX 26 : ; XX 27 : XX 28 : start XX 29 : : /* empty */ XX 30 : | START_TOKEN XX 31 : ; XX 32 : XX 33 : middle XX 34 : : XX 35 : ; XX 36 : XX 37 : middle XX 38 : : MID_TOKEN XX 39 : | /* empty */ XX 40 : ; XX 41 : XX 42 : %% XX'small' - XX~~~~~~~ *18 , never occurs on rhs of rule - start rule? XX'start' - XX~~~~~~~ *28 , 19 XX'middle' - XX~~~~~~~ *33 , *37, 19 XX'end' - XX~~~~~~~ is not declared - token??, 25 XX'postamble' - XX~~~~~~~ is not declared - token??, 25 XX'START_TOKEN' - XX~~~~~~~ is not declared - token??, 30 XX'MID_TOKEN' - XX~~~~~~~ is not declared - token??, 38 XX End of X-ref XX ~~~~~~~~~~~~ XX.fi XX.RE XX.SH BUGS XXShould be able to understand the XX.I %token XXand similar directives. @//E*O*F yyref.1// chmod u=rw,g=rw,o=rw yyref.1 echo x - yyref-grammer sed 's/^XX//' > "yyref-grammer" <<'@//E*O*F yyref-grammer//' XX%{ XX# include XX# include XX%} XX /*******************************************************\ XX * * XX * X_reference program for YACC files * XX * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * XX * * XX * Cathy Taylor, * XX * c/o Department of Computing, * XX * University of Lancaster, * XX * Bailrigg, Lancaster, England. * XX * Date : Fri Jul 4 00:50:04 BST 1986 * XX * * XX \*******************************************************/ XX /***********************************************\ XX * * XX * Yacc Input Syntax * XX * ~~~~~~~~~~~~~~~~~ * XX * * XX * Adapted from the document * XX * 'YACC - Yet Another Compiler Compiler' * XX * by * XX * S. C. Johnson * XX * * XX * Date: Tue Jul 1 02:40:18 BST 1986 * XX * * XX \***********************************************/ XX%token IDENTIFIER CHARACTER NUMBER XX%token LEFT RIGHT NONASSOC TOKEN PREC TYPE START UNION XX%token PER PERCURL ACT XX%token COLON SEMICOLON COMMA OR LESS GREATER XX%start spec XX%% XXspec XX : defs PER rules tail XX { XX printf("\n\n"); XX yyclearin; XX return(0); XX } XX ; XXtail XX : /* empty */ XX | PER XX ; XXdefs XX : /* empty */ XX | def_bk XX ; XXdef_bk XX : def XX | def_bk def XX ; XXdef XX : START IDENTIFIER XX | UNION XX | PERCURL XX | rword tag nlist XX ; XXrword XX : TOKEN XX | LEFT XX | RIGHT XX | NONASSOC XX | TYPE XX ; XXtag XX : /* empty */ XX | LESS IDENTIFIER GREATER XX ; XXnlist XX : nmno XX | nlist opt_comma nmno XX ; XXopt_comma XX : /* empty */ XX | COMMA XX ; XXnmno XX : IDENTIFIER opt_num XX ; XXopt_num XX : /* empty */ XX | NUMBER XX ; XXrules XX : rule XX | rules rule XX ; XXrule XX : IDENTIFIER XX { XX yyaction(ON_C_IDENT,line); XX } XX COLON body SEMICOLON XX ; XXbody XX : body_block XX | body OR body_block XX ; XXbody_block XX : /* empty */ XX | body_block body_entity XX ; XXbody_entity XX : opt_prec id_ent XX | ACT XX ; XXid_ent XX : IDENTIFIER XX { XX yyaction(ON_IDENT,line); XX } XX | CHARACTER XX ; XXopt_prec XX : /* empty */ XX | PREC XX ; XX%% XX# include XX# include "lex.yy.c" XX# include "yyref-line.h" XX#define ON_C_IDENT 000 XX#define ON_IDENT 001 XX#define MAXIDENTS 1000 XX#define MAXCHARS 100 XX#define MAXDEFS 20 XX#define MAXOCCS 1000 XXstruct IREC { XX char ident[MAXCHARS]; XX int desc[MAXDEFS]; XX int nextdesc; XX int occ[MAXOCCS]; XX int nextocc; XX } table[MAXIDENTS]; XXyyaction (action,ln) XXint action; XXint ln; XX{ XX int id; XX XX id = 0; XX while ( strcmp(table[id].ident,yytext) != 0 && strcmp(table[id].ident,"") != 0 ) XX id++; XX if ( strcmp(table[id].ident, yytext) != 0 ) XX { XX /*******************************************************\ XX * * XX * New non-terminal to be stored. * XX * No distinction is made here between tokens * XX * and (non) terminals. * XX * * XX \*******************************************************/ XX strcpy(table[id].ident,yytext); XX table[id].nextdesc = 0; XX table[id].nextocc = 0; XX } /* fi */ XX switch (action) { XX case ON_C_IDENT: XX /*******************************************************\ XX * * XX * Add to list of definition lines. * XX * * XX \*******************************************************/ XX table[id].desc[table[id].nextdesc++] = ln; XX break; XX case ON_IDENT: XX XX /*******************************************************\ XX * * XX * Add to list of occurance lines. * XX * * XX \*******************************************************/ XX table[id].occ[table[id].nextocc++] = ln; XX break; XX default : XX fprintf (stdout, "yyaction: invalid action\n"); XX return (-1); XX } /* hctiws */ XX return (0); XX} /* corp */ XXnline(ln) XXint ln; XX{ XX printf("%4d :\t",ln); XX} XX char declared_at_mark[] = "*"; XX char occurs_at_mark[] = ""; XX char token_maybe[] = "is not declared - token??"; XX char start_maybe[] = "never occurs on rhs of rule - start rule?"; XX XX/* XX* Strings for output XX*/ XXmain () XX{ XX int ind,id; XX strcpy(table[0].ident,""); XX line = 0; XX nline(++line); XX yyparse (); XX id = 0; XX while( strcmp(table[id].ident,"") != 0 ) XX { XX printf("\n'%s' -\n~~~~~~~\t\t",table[id].ident); XX if (table[id].nextdesc == 0 ) XX printf("%s",token_maybe); XX else XX { XX ind = 0; XX printf("*%d ",table[id].desc[ind++]); XX for ( ind=1; ind < table[id].nextdesc ; ind++) XX printf(", %s%d",declared_at_mark,table[id].desc[ind]); XX } XX if (table[id].occ[0] == 0) XX printf(", %s",start_maybe); XX else XX { XX for ( ind = 0; ind < table[id].nextocc ; ind++ ) XX printf(", %s%d",occurs_at_mark,table[id].occ[ind]); XX } XX id++; XX } XX printf("\n\n\tEnd of X-ref\n\t~~~~~~~~~~~~\n"); XX} /* niam */ XXyyerror(mess) XXchar *mess; XX{ XX printf("\n\t%s\n",mess); XX} /* corp */ @//E*O*F yyref-grammer// chmod u=rw,g=rw,o=rw yyref-grammer echo x - yyref-lex sed 's/^XX//' > "yyref-lex" <<'@//E*O*F yyref-lex//' XX /*******************************************************\ XX * * XX * X_ref for YACC - LEX file * XX * ~~~~~~~~~~~~~~~~~~~~~~~~~ * XX * Date: Tue Jul 1 03:36:21 BST 1986 * XX * * XX \*******************************************************/ XX%{ XX# include XX# include "yyref-line.h" XX#define TRUE 1 XX#define FALSE 0 XXint recognised; XXchar c; XX%} XX /* abbreviations */ XXdigit [0-9] XXu_case [A-Z] XXl_case [a-z] XXid_char [A-Za-z0-9_] XXletter [A-Za-z] XXwhite [\t ] XX%% XX"/*" { XX ECHO; XX recognised = FALSE; XX c = nextchar(); XX while (recognised == FALSE) XX { XX while (c != '*') XX c = nextchar(); XX c = nextchar(); XX if (c == '\/') XX recognised = TRUE; XX } XX } XX"%{" { XX ECHO; XX recognised = FALSE; XX c = nextchar(); XX while (recognised == FALSE) XX { XX while (c != '\%') XX c = nextchar(); XX c = nextchar(); XX if (c == '\}') XX recognised = TRUE; XX } XX return(PERCURL); XX } XX"{" { XX/* XX* Although LEX can cope with the full definition, XX* ( "{"[^\}]*"}" ) this may overrun the lex buffer (200 chars). XX* Thus this routine. XX*/ XX ECHO; XX c = nextchar(); XX while (c != '\}') XX c = nextchar(); XX return(ACT); XX } XX{letter}{id_char}* { XX ECHO; XX return(IDENTIFIER); XX } XX"'"\\?[^']+"'" { XX ECHO; XX return(CHARACTER); XX } XX{white}+ { XX ECHO; XX } XX{digit}+ { XX ECHO; XX return(NUMBER); XX } XX"%"{white}*"left" { XX ECHO; XX return(LEFT); XX } XX"%"{white}*"right" { XX ECHO; XX return(RIGHT); XX } XX"%"{white}*"nonassoc" { XX ECHO; XX return(NONASSOC); XX } XX"%"{white}*"token" { XX ECHO; XX return(TOKEN); XX } XX"%"{white}*"prec" { XX ECHO; XX return(PREC); XX } XX"%"{white}*"type" { XX ECHO; XX return(TYPE); XX } XX"%"{white}*"start" { XX ECHO; XX return(START); XX } XX"%"{white}*"union" { XX ECHO; XX return(UNION); XX } XX"%%" { XX ECHO; XX return(PER); XX } XX":" { XX ECHO; XX return(COLON); XX } XX";" { XX ECHO; XX return(SEMICOLON); XX } XX"," { XX ECHO; XX return(COMMA); XX } XX"|" { XX ECHO; XX return(OR); XX } XX"<" { XX ECHO; XX return(LESS); XX } XX">" { XX ECHO; XX return(GREATER); XX } XX"\n" { XX ECHO; XX nline(++line); XX } XX%% XXyywrap() XX{ XX /* wrap-up procedure */ XX return(1); XX} XXnextchar() XX{ XX char c; XX XX c = input(); XX printf("%c",c); XX if (c == '\n') XX nline(++line); XX return(c); XX} @//E*O*F yyref-lex// chmod u=rw,g=rw,o=rw yyref-lex echo x - yyref-line.h sed 's/^XX//' > "yyref-line.h" <<'@//E*O*F yyref-line.h//' XX int line; @//E*O*F yyref-line.h// chmod u=rw,g=rw,o=rw yyref-line.h echo Inspecting for damage in transit... temp=/tmp/sharin$$; dtemp=/tmp/sharout$$ trap "rm -f $temp $dtemp; exit" 0 1 2 3 15 cat > $temp <<\!!! 9 21 157 Makefile 10 87 601 README 91 330 1806 yyref.1 286 591 4433 yyref-grammer 173 293 2323 yyref-lex 2 2 12 yyref-line.h 571 1324 9332 total !!! wc Makefile README yyref.1 yyref-grammer yyref-lex yyref-line.h | sed 's=[^ ]*/==' | diff -b $temp - >$dtemp if test -s $dtemp then echo "Ouch [diff of wc output]:" ; cat $dtemp else echo "No problems found." fi exit 0