{ Copyright (C) 1986 by QCAD Systems Incorporated, All Rights Reserved } program TUPROG(input, output); { TUSKEL: A simple Compiler for Turbo } const VERSION_STRING= 'version 1.0'; STACKSIZE = 60; { maximum size of LR(1) parser stack } EOS = 0; { marks end of line in LINE } EOFCH = 26; { reader end-of-file character } EOLCH = 12; { end of line character } CR = 13; { carriage return } MAXSTRCHARS = 1000; { maximum string space/procedure } MAXERRORS = 20; { maximum errors before aborting } HASHSIZE = 167; { hash table size -- prime number! } HLIMIT = 166; { limit in hash table (hashsize minus one) } MAXTOKLEN = 30; { length of a token or symbol } MERR_COUNT= 4; {minimum tokens to scan before reporting errors again} {#C -- constants defined by the parser generator go here } IDENT_TOKLEN = #C; { maximum user identifier length } MAXRPLEN = #D; { length of longest production right part } TERM_TOKS = #E; { number of terminal tokens } NTERM_TOKS = #F; { number of nonterminal tokens } ALL_TOKS = #G; { term_toks + nterm_toks } IDENT_TOKX = #H; { token number of } INT_TOKX = #I; { token number of } REAL_TOKX = #J; { token number of } STR_TOKX = #K; { token number of } STOP_TOKX = #L; { token number of stopsign (end-of-file) } GOAL_TOKX = #M; { token number of goal } EOL_TOKX = #N; { token number of end-of-line } READSTATE = #O; { first READ state } LOOKSTATE = #P; { first LOOK state } MAXSTATE = #Q; { largest state number } REDUCELEN = #R; { number of productions } RLTOKENS = #S; SSTOKENS = #T; PRODTOKS = #U; TOKCHARS = #V; START_STATE = #W; { initial state } STK_STATE_1 = #X; { state initially pushed on stack } {#> -- end of constants } type INT = -32767..32767; STRING1= string[1]; STRING7 = string[7]; STRING8 = string[8]; STRING9 = string[9]; STRING15 = string[15]; STRING31 = string[31]; STRING80 = string[80]; LONGSTRING = string[255]; TOKRANGE = 1..term_toks; {$I tudecls} type SYMTABLE= array [0..hlimit] of symtabp; STATE_STACK = array [0..stacksize] of int; { Types for parser tables. NB: These type names are used by the typed constant generation. } STATE_ARRAY = array [1..maxstate] of int; REDUCE_ARRAY = array [1..reducelen] of int; POP_ARRAY = array [1..reducelen] of byte; TOKEN_ARRAY = array [0..rltokens] of byte; TOSTATE_ARRAY = array [0..rltokens] of int; SS_ARRAY = array [0..sstokens] of int; PROD_ARRAY = array [1..prodtoks] of byte; TOKX_ARRAY = array [1..all_toks] of int; TOKCHAR_ARRAY = array [1..tokchars] of char; INSYM_ARRAY = array [1..lookstate] of int; const {#} {Flag constants} {#F } #N= #V; var { Dynamic parser data structures } STACK: state_stack; { the LR(1) state stack } SEM: array [0..stacksize] of semrecp; { semantics stack } TOS: int; { index of top of stack } {#<~C -- the following are redundant if typed constants are used } { Static parser data structures (parser tables). } STATEX: state_array; { stack top index } MAP: reduce_array; { mapping from state to apply numbers } POPNO: pop_array; { reduce pop size } TOKNUM: token_array; { token list } TOSTATE: tostate_array; { read, look states } STK_STATE: ss_array; STK_TOSTATE: ss_array; {# -- end if for debugging. } {#> -- end if for typed constants. } {# -- end if for debugging. } { Lexical and token data } LINE: longstring; { source line } LX: int; { index of next character in LINE } ERRPOS: int; { current token position in LINE } PROMPT: string8; { prompt string } PROMPTLEN: int; { front-end length for error purposes } IS_CONSOLE: boolean; { TRUE if input from console } CH: char; { next character from input file } TOKEN: int; { Next token in input list } LSEMP: semrecp; { current semantics assoc. with token } TOKENX: int; { index into TOKARY, LSEMPARY } TOKARY: array [0..1] of int; { token queue } LSEMPARY: array [0..1] of semrecp; ERRSYM: symbol; { special symbol reserved for errors } { The next table can be omitted if real numbers are not used. } PWR10_2: array [0..8] of real; { Binary powers of ten. } { Symbol table data } SYMTAB: symtable; { String table space } STRTAB: packed array [0..maxstrchars] of char; STRX: integer; {next available character slot in STRTAB} SFILE, RFILE: text; { source, report files } ERR_COUNT, ERRORS: int; DEBUG: int; { >0 turns on some tracing } TRACE: boolean; SFILENAME, RFILENAME: string80; { file names } function NEW_SEM (SEMTYP: semtype): semrecp; forward; procedure IDEBUG; forward; {$I tuutils} {$I tufiles} {******************} procedure MORE(MSG: string80); { print the string, and let the user type any character to proceed. } var FOO: char; begin foo := resp(msg) end; {******************} procedure REPORT_ERR(MSG: string80); begin if err_count=0 then begin if errpos+promptlen>1 then write(rfile, ' ':errpos+promptlen-1); writeln(rfile, '^'); { mark error point } writeln(rfile, msg); end end; {*******************} procedure ABORT(MSG: string80); begin report_err(concat('FATAL ERROR: ', msg)); writeln('... aborting'); halt; end; {******************} procedure ERROR(MSG: string80); begin if err_count=0 then begin report_err(concat('ERROR: ', msg)); errors:=errors+1; if errors>maxerrors then begin err_count:=0; abort('Error limit exceeded'); end else if (rfilename=default_rfile) then idebug; end end; {*******************} procedure WARN(MSG: string80); begin report_err(concat('WARNING: ', msg)); end; {$I tusyms} {********************} procedure SYMERROR(SYM: symbol; MSG: string80); begin error(concat(sym, concat(': ', msg))); end; {# -- end debugging stuff. } { LEXICAL ANALYZER } {*******************} procedure GETLINE; { read the next source line, when nextch exhausts the current one. } {.............} procedure GENEOF; begin line := chr(eofch); lx := 1 end; {............} procedure GRABLINE; begin readln(sfile, line); writeln(rfile, ' ; ', line); lx := 1 end; begin { getline } if is_console then begin { prompt if from the console file } write(prompt); grabline; if line = 'EOF' then geneof end else if eof(sfile) then geneof else grabline; { The appended eol character ensures that tokens are broken over line endings; they would otherwise be invisible to the scanner. eolch allows the string scanner to distinguish ends of lines. } line := concat(line, ' '); line[length(line)-1]:=chr(eolch); end; {*******************} procedure NEXTCH; { gets next character from line } begin if lx > length(line) then getline; ch := line[lx]; { don't move past an eof mark } if ch <> chr(eofch) then lx := lx+1 end; {********************} function PEEKCH: char; begin if lx>length(line) then peekch:=chr(eolch) else peekch:=line[lx]; end; {********************} procedure SKIPBLANKS; label 1; {..................} function END_COMMENT: boolean; begin if ch=chr(eofch) then begin error('unclosed comment at file end'); end_comment:=true; end else end_comment:=(ch='}'); end; begin {comments open on { and close on close-brace } 1: while ch=' ' do nextch; if (ch='{') then begin while not(end_comment) do nextch; nextch; goto 1; end end; {***********************} procedure GET_SYMBOL; {collects Pascal-style identifiers, stuffed into symbol table under IDENT tag} var SYM: symbol; STP: symtabp; begin sym:=''; { Keep snarfing alphanumeric characters. Up to the first maxtoklen of them will be put in the symbol spelling. } while ch in ['a'..'z', 'A'..'Z', '0'..'9', '_'] do begin if length(sym) <= maxtoklen then begin sym:=concat(sym, ' '); sym[length(sym)] := upcase(ch); end; nextch; end; stp := makesym(symtab, sym, user, 0); { the default level is 0 } if (stp^.symt=reserved) then token:=stp^.tokval else begin lsemp:=new_sem(ident); with lsemp^ do begin symp := stp; token := ident_tokx; end end end; {************************} procedure GET_NUMBER; var V: integer; begin v:=0; while ch in ['0'..'9'] do begin v:=10*v + ord(ch) - ord('0'); nextch; end; token:= int_tokx; lsemp:=new_sem(fixed); lsemp^.numval:=v; end; {***********************} procedure GET_STRING; label 1, 99; {..................} procedure PUTCH(CH: char); begin if strx'''' do begin if ch=chr(eolch) then begin error('string runs over end of line'); goto 99; end; putch(ch); nextch; end; nextch; {get over last quote mark} if ch='''' then begin {repeated quote mark} putch(ch); goto 1; {continue grabbing characters} end; 99: putch(chr(0)); token:=str_tokx; end; {********************} procedure GET_TOKEN; { Pascal-style lexical analyzer -- sets TOKEN to token number } begin lsemp:= nil; { default case } skipblanks; errpos:=lx-1; case ch of 'a'..'z', 'A'..'Z': get_symbol; '0'..'9': get_number; '''': get_string; {#} {#G special symbol cases go here } ELSE begin if ch=chr(eofch) then token := stop_tokx else if ch=chr(eolch) then begin nextch; {#} end else begin error('invalid character'); nextch; get_token; end end { case alternatives } end; { case } if err_count>0 then err_count:=err_count-1; end { get_token }; {*******************} procedure NEXT_TOKEN; begin if tokenx>1 then begin tokenx := 1; get_token; { goes into token, lsemp } tokary[1] := token; lsempary[1] := lsemp; end else begin { is in tokary } token := tokary[tokenx]; lsemp := lsempary[tokenx]; end end; {*****************} procedure TOKENREAD; begin tokenx := tokenx+1; end; {$I tusems} { LR(1) PARSER procedures } {****************} function ERROR_RECOVERY(var MSTACK: state_stack; var MSTACKX: int; MCSTATE: int): int; label 99, 100; var STACK: state_stack; { local copy of stack } TOS, { local stack pointer } CSTATE, { local state } JSTX, { temporary stack limit } RX, TL: int; { index into TOKNUM table } {...............} procedure COPY_STACK; var STX: int; begin if (jstx<0) or (jstx>mstackx) then abort('ERROR RECOVERY BUG'); for stx := 0 to jstx do stack[stx] := mstack[stx]; tos := jstx; if jstx=mstackx then cstate := mcstate else cstate := mstack[jstx+1]; end; {...............} procedure PUSHREAD(CSTATE: int); { adjusts the state stack } begin tos := tos+1; if tos>stacksize then abort('stack overflow'); stack[tos] := cstate; end; {...............} function TRIAL_PARSE: boolean; { parses from current read state through the inserted and the error token; if successful, returns TRUE. } label 99; var RX: int; begin trial_parse := true; { until proven otherwise } while cstate<>0 do begin if cstate < readstate then begin { a reduce state } {# 3 then stk_dump('E*Reduce', stack, tos, cstate); {#> end conditional. } if popno[cstate]=0 then begin { empty production } pushread(stk_state[statex[cstate]]); cstate := stk_tostate[statex[cstate]]; end else begin { non-empty production } tos := tos - popno[cstate] + 1; rx := statex[cstate]; { compute the GOTO state } cstate := stack[tos]; while (stk_state[rx]<>cstate) and (stk_state[rx]<>0) do rx := rx+1; cstate := stk_tostate[rx]; end end else if cstate < lookstate then begin { a read state } next_token; { need a token now } {# 3 then stk_dump('E*Read', stack, tos, cstate); {#> end conditional. } rx := statex[cstate]; while (toknum[rx]<>0) and (toknum[rx]<>token) do rx := rx+1; if toknum[rx]=0 then begin { failure } trial_parse := false; goto 99; end else begin { did read something } pushread(cstate); cstate := tostate[rx]; tokenread; { scan the token } if tokenx>1 then goto 99 { successful } end end else begin { lookahead state } next_token; { need a token now } {# 3 then stk_dump('E*Look', stack, tos, cstate); {#> end conditional. } rx := statex[cstate]; while (toknum[rx]<>0) and (toknum[rx]<>token) do rx := rx+1; cstate := tostate[rx]; end end; 99: end; {.................} procedure INCR_ERRSYM; { Note that this procedure assumes ASCII. } begin if errsym[6]='Z' then begin errsym[5] := succ(errsym[5]); errsym[6] := 'A'; end else errsym[6] := succ(errsym[6]); end; {.................} procedure MAKE_DEFAULT(TOKX: int; var SEMP: semrecp); { creates a default token data structure } var SYM: symbol; TSYMP: symtabp; begin case tokx of ident_tokx: begin tsymp := makesym(symtab, errsym, symerr, 0); semp:=new_sem(ident); with semp^ do begin symp:=tsymp; incr_errsym; end end; int_tokx: begin semp:=new_sem(fixed); semp^.numval:=1; end; ELSE semp:=nil; end { case tokx } end; begin { ERROR_RECOVERY } if debug > 3 then writeln(rfile, 'Going into ERROR RECOVERY'); while true do begin jstx := mstackx; while jstx>=0 do begin copy_stack; rx := statex[cstate]; while toknum[rx]<>0 do begin { scan through legal next tokens } if debug > 3 then writeln(rfile, '...starting trial parse'); tokary[0] := toknum[rx]; { the insertion } tokenx := 0; if trial_parse then goto 99; { it clicked! } rx := rx+1; if toknum[rx]<>0 then copy_stack; end; jstx := jstx-1; { reduce stack } end; if token=stop_tokx then begin { empty stack, no more tokens } cstate := 0; { halt state } tokenx := 2; jstx := 0; { bottom of stack } goto 100; end; {# 3 then begin write(rfile, '...dropping token '); tl := wrtok(tokary[1]); writeln(rfile); end; {#>} tokenx := 2; next_token; {# 3 then begin write(rfile, 'New token '); tl := wrtok(token); writeln(rfile); end {#>} end; 99: { found a solution } copy_stack; {# 3 then begin write(rfile, 'insertion of '); tl := wrtok(tokary[0]); writeln(rfile, ' succeeded'); end; {#>} make_default(tokary[0], lsempary[0]); tokenx := 0; { forces a `real' rescan of the insertion } if jstx 3 then writeln(rfile, 'Ending error recovery'); end; {****************} procedure PARSER; { Carries out a complete parse, until the halt state is seen -- same as empty stack} var CSTATE, RX: int; TSEMP: semrecp; {...............} procedure PUSHREAD(CSTATE: int; SEMP: semrecp); { do the push part of a readstate. } begin tos := tos+1; if tos>stacksize then abort('stack overflow'); sem[tos] := semp; stack[tos] := cstate; end; begin cstate := start_state; tos := -1; pushread(stk_state_1, nil); while cstate<>0 do begin tsemp:=nil; if cstate < readstate then begin { a reduce state } {# 0 then stk_dump('Reduce', stack, tos, cstate); {#> end conditional. } if map[cstate] <> 0 then { the semantics action } apply(map[cstate], tsemp); if popno[cstate]=0 then begin { empty production } pushread(stk_state[statex[cstate]], tsemp); cstate := stk_tostate[statex[cstate]]; end else begin { non-empty production: semantics is preserved on a unit production A --> w, where |w| = 1, unless something is in TSEMP. Note that if w is nonterminal, the production may be bypassed. } tos := tos - popno[cstate] + 1; if (tsemp<>nil) or (popno[cstate]<>1) then sem[tos] := tsemp; { compute the GOTO state } rx := statex[cstate]; cstate := stack[tos]; while (stk_state[rx]<>cstate) and (stk_state[rx]<>0) do rx := rx+1; cstate := stk_tostate[rx]; end end else if cstate < lookstate then begin { a read state } next_token; { need next token now } {# 2 then stk_dump('Read', stack, tos, cstate); {#> end conditional. } rx := statex[cstate]; while (toknum[rx]<>0) and (toknum[rx]<>token) do rx := rx+1; if toknum[rx]=0 then begin error('syntax error'); cstate := error_recovery(stack, tos, cstate); err_count:=merr_count; end else begin pushread(cstate, lsemp); cstate := tostate[rx]; tokenread; { token has been scanned } end end else begin { lookahead state } next_token; { need another token now } {# 2 then stk_dump('Look', stack, tos, cstate); {#> end conditional. } rx := statex[cstate]; while (toknum[rx]<>0) and (toknum[rx]<>token) do rx := rx+1; cstate := tostate[rx]; end end; end_sem; end; { PARSE INITIALIZATION } {*****************} procedure INITTABLES; var SX: int; CODE_SYMP: symtabp; {................} procedure PUTSYM(TSYM: symbol; TV: int); var SYMP: symtabp; I: int; begin symp:=makesym(symtab, tsym, reserved, -1); symp^.tokval:=tv; end; {# end puttok insertion. } {................} procedure INIT_PARSER_TABLES; { initialize the parser tables } begin {#IS inline symbol table inits. } {# end assignment inits. } {# end debugging } end { init_parser_tables }; begin { inittables } pwr10_2[0] := 1E1; {10^(2^0)} pwr10_2[1] := 1E2; {10^(2^1)} pwr10_2[2] := 1E4; pwr10_2[3] := 1E8; pwr10_2[4] := 1E16; pwr10_2[5] := 1E32; lsempary[0]:=nil; lsempary[1]:=nil; lsemp := nil; tokenx := 2; { no token queue } for sx := 0 to hlimit do begin symtab[sx] := nil; { initialize symbol tables } end; for sx := 0 to stacksize do begin sem[sx]:=nil; end; init_parser_tables; init_sem; errsym:='ERR#AA'; strx:=0; line := ''; { fake a new line } lx := 1; errpos:=1; err_count:=0; nextch; { fetch the first character, forcing a line read } end; {**********************} function OPENFILES: boolean; begin openfiles:=false; write('Source file? '); readln(sfilename); prompt:=''; promptlen:=3; {length of ` ; '} if sfilename='' then begin prompt:='> '; sfilename:=default_sfile; is_console:=true; promptlen:=promptlen+2; end else is_console:=false; if openfile(sfile, sfilename, false) then begin write('Target file? '); readln(rfilename); if rfilename='' then rfilename:=default_rfile; openfiles:=openfile(rfile, rfilename, true); end end; begin writeln('Tiny Pascal Compiler, ', version_string); writeln; errors := 0; debug := 0; trace := false; if openfiles then begin inittables; parser; { does it all } closefiles; end end.