{ Copyright (C) 1986 by QCAD Systems Incorporated, All Rights Reserved }

program TUPROG(input, output);

  { TUSKEL:

       A simple Compiler for Turbo }

  const
    VERSION_STRING= 'version 1.0';
    STACKSIZE = 60;   { maximum size of LR(1) parser stack }
    EOS = 0;          { marks end of line in LINE }
    EOFCH = 26;       { reader end-of-file character }
    EOLCH = 12;       { end of line character }
    CR = 13;          { carriage return }
    MAXSTRCHARS = 1000;  { maximum string space/procedure }
    MAXERRORS = 20;   { maximum errors before aborting }
    HASHSIZE = 167;    { hash table size -- prime number! }
    HLIMIT = 166;      { limit in hash table (hashsize minus one) }
    MAXTOKLEN = 30;   { length of a token or symbol }
    MERR_COUNT= 4;    {minimum tokens to scan before reporting errors
                        again}

{#C -- constants defined by the parser generator go here }
    IDENT_TOKLEN = #C;  { maximum user identifier length }
    MAXRPLEN = #D;    { length of longest production right part }
    TERM_TOKS = #E;   { number of terminal tokens }
    NTERM_TOKS = #F;  { number of nonterminal tokens }
    ALL_TOKS = #G;    { term_toks + nterm_toks }
    IDENT_TOKX = #H;  { token number of <identifier> }
    INT_TOKX = #I;    { token number of <integer> }
    REAL_TOKX = #J;   { token number of <real> }
    STR_TOKX = #K;    { token number of <string> }
    STOP_TOKX = #L;   { token number of stopsign (end-of-file) }
    GOAL_TOKX = #M;   { token number of goal }
    EOL_TOKX = #N;    { token number of end-of-line }
    READSTATE = #O;   { first READ state }
    LOOKSTATE = #P;   { first LOOK state }
    MAXSTATE = #Q;    { largest state number }
    REDUCELEN = #R;   { number of productions }
    RLTOKENS = #S;
    SSTOKENS = #T;
    PRODTOKS = #U;
    TOKCHARS = #V;
    START_STATE = #W;  { initial state }
    STK_STATE_1 = #X;  { state initially pushed on stack }
{#> -- end of constants }

  type
    INT = -32767..32767;
    STRING1= string[1];
    STRING7 = string[7];
    STRING8 = string[8];
    STRING9 = string[9];
    STRING15 = string[15];
    STRING31 = string[31];
    STRING80 = string[80];
    LONGSTRING = string[255];
    TOKRANGE = 1..term_toks;

   {$I tudecls}

  type
    SYMTABLE= array [0..hlimit] of symtabp;
    STATE_STACK = array [0..stacksize] of int;
    { Types for parser tables.  NB:  These type names are used by
      the typed constant generation. }
    STATE_ARRAY = array [1..maxstate] of int;
    REDUCE_ARRAY = array [1..reducelen] of int;
    POP_ARRAY = array [1..reducelen] of byte;
    TOKEN_ARRAY = array [0..rltokens] of byte;
    TOSTATE_ARRAY = array [0..rltokens] of int;
    SS_ARRAY = array [0..sstokens] of int;
    PROD_ARRAY = array [1..prodtoks] of byte;
    TOKX_ARRAY = array [1..all_toks] of int;
    TOKCHAR_ARRAY = array [1..tokchars] of char;
    INSYM_ARRAY = array [1..lookstate] of int;

  const
{#<C -- put typed constants here, if they've been requested }
    { Static parser data structures (parser tables). }
{#IP}
{#>}
   {Flag constants}
{#F   }
    #N= #V;

  var
    { Dynamic parser data structures }
    STACK:  state_stack;  { the LR(1) state stack }
    SEM:  array [0..stacksize] of semrecp;  { semantics stack }
    TOS:  int;  { index of top of stack }

{#<~C -- the following are redundant if typed constants are used }
    { Static parser data structures (parser tables). }
    STATEX:  state_array;    { stack top index }
    MAP:  reduce_array;      { mapping from state to apply numbers }
    POPNO:  pop_array;       { reduce pop size }
    TOKNUM:  token_array;    { token list }
    TOSTATE:  tostate_array;  { read, look states }
    STK_STATE:  ss_array;
    STK_TOSTATE: ss_array;
{#<D -- these are for parser stack dumps. }
    PRODX:  reduce_array;    { prod index into ... }
    PRODS:  prod_array;      { token number, index into ... }
    INSYM:  insym_array;
{#> -- end if for debugging. }
{#> -- end if for typed constants. }

{#<D -- debugging (these cannot be typed constants.) }
    { These guys are for printing tokens in parser stack dumps. }
    TOKX:  tokx_array;       { token index, index into ... }
    TOKCHAR:  tokchar_array;  { token characters }
{#> -- end if for debugging. }

    { Lexical and token data }
    LINE:  longstring;       { source line }
    LX:  int;                { index of next character in LINE }
    ERRPOS: int;             { current token position in LINE }
    PROMPT: string8;         { prompt string }
    PROMPTLEN: int;          { front-end length for error purposes }
    IS_CONSOLE: boolean;     { TRUE if input from console }
    CH:  char;               { next character from input file }
    TOKEN:  int;             { Next token in input list }
    LSEMP:  semrecp;         { current semantics assoc. with token }
    TOKENX:  int;            { index into TOKARY, LSEMPARY }
    TOKARY:  array [0..1] of int;  { token queue }
    LSEMPARY:  array [0..1] of semrecp;
    ERRSYM:  symbol;        { special symbol reserved for errors }
    { The next table can be omitted if real numbers are not used. }
    PWR10_2:  array [0..8] of real;  { Binary powers of ten. }

    { Symbol table data }
    SYMTAB: symtable;

    { String table space }
    STRTAB: packed array [0..maxstrchars] of char;
    STRX: integer;   {next available character slot in STRTAB}

    SFILE, RFILE: text;      { source, report files }

    ERR_COUNT,
    ERRORS: int;
    DEBUG: int;              { >0 turns on some tracing }
    TRACE: boolean;

    SFILENAME, RFILENAME: string80;  { file names }

  function NEW_SEM (SEMTYP: semtype): semrecp; forward;
  procedure IDEBUG; forward;

  {$I tuutils}

  {$I tufiles}

  {******************}
  procedure MORE(MSG: string80);
    { print the string, and let the user type
      any character to proceed. }
    var FOO:  char;
  begin
    foo := resp(msg)
    end;

  {******************}
  procedure REPORT_ERR(MSG: string80);
  begin
    if err_count=0 then begin
      if errpos+promptlen>1 then
        write(rfile, ' ':errpos+promptlen-1);
      writeln(rfile, '^');  { mark error point }
      writeln(rfile, msg);
      end
    end;

  {*******************}
  procedure ABORT(MSG: string80);
  begin
    report_err(concat('FATAL ERROR: ', msg));
    writeln('... aborting');
    halt;
    end;

  {******************}
  procedure ERROR(MSG: string80);
  begin
    if err_count=0 then begin
      report_err(concat('ERROR: ', msg));
      errors:=errors+1;
      if errors>maxerrors then begin
        err_count:=0;
        abort('Error limit exceeded');
        end
      else
      if (rfilename=default_rfile) then idebug;
      end
    end;

  {*******************}
  procedure WARN(MSG: string80);
  begin
    report_err(concat('WARNING: ', msg));
    end;

  {$I tusyms}

  {********************}
  procedure SYMERROR(SYM: symbol; MSG: string80);
  begin
    error(concat(sym, concat(': ', msg)));
    end;

{#<D -- debugging utilities. }
  {$I tudbug}

{#> -- end debugging stuff. }
{ LEXICAL ANALYZER }

  {*******************}
  procedure GETLINE;
    { read the next source line, when nextch exhausts
      the current one. }

    {.............}
    procedure GENEOF;
    begin
      line := chr(eofch);
      lx := 1
    end;

    {............}
    procedure GRABLINE;
    begin
      readln(sfile, line);
      writeln(rfile, ' ; ', line);
      lx := 1
    end;

  begin { getline }
    if is_console then begin
      { prompt if from the console file }
      write(prompt);
      grabline;
      if line = 'EOF' then geneof
      end
    else if eof(sfile) then
      geneof
    else
      grabline;
    { The appended eol character ensures that tokens are broken over
      line endings; they would otherwise be invisible to the scanner.
      eolch allows the string scanner to distinguish ends of lines. }
    line := concat(line, '  ');
    line[length(line)-1]:=chr(eolch);
  end;

  {*******************}
  procedure NEXTCH;
    { gets next character from line }
  begin
    if lx > length(line) then
      getline;
    ch := line[lx];
    { don't move past an eof mark }
    if ch <> chr(eofch) then lx := lx+1
  end;

  {********************}
  function PEEKCH: char;
  begin
    if lx>length(line) then peekch:=chr(eolch)
    else
    peekch:=line[lx];
    end;

  {********************}
  procedure SKIPBLANKS;
    label 1;

    {..................}
    function END_COMMENT: boolean;
    begin
      if ch=chr(eofch) then begin
        error('unclosed comment at file end');
        end_comment:=true;
        end
      else
      end_comment:=(ch='}');
      end;

  begin  {comments open on { and close on close-brace }
    1:
    while ch=' ' do nextch;
    if (ch='{') then begin
      while not(end_comment) do nextch;
      nextch;
      goto 1;
      end
    end;

  {***********************}
  procedure GET_SYMBOL;
      {collects Pascal-style identifiers,
          stuffed into symbol table under IDENT tag}
    var SYM: symbol;
        STP: symtabp;
  begin
    sym:='';
    { Keep snarfing alphanumeric characters.  Up to the first
      maxtoklen of them will be put in the symbol spelling. }
    while ch in ['a'..'z', 'A'..'Z', '0'..'9', '_'] do begin
      if length(sym) <= maxtoklen then begin
        sym:=concat(sym, ' ');
        sym[length(sym)] := upcase(ch);
        end;
      nextch;
      end;
    stp := makesym(symtab, sym, user, 0);  { the default level is 0 }
    if (stp^.symt=reserved) then
    token:=stp^.tokval
    else begin
      lsemp:=new_sem(ident);
      with lsemp^ do begin
        symp := stp;
        token := ident_tokx;
        end
      end
    end;

  {************************}
  procedure GET_NUMBER;
    var V: integer;
  begin
    v:=0;
    while ch in ['0'..'9'] do begin
      v:=10*v + ord(ch) - ord('0');
      nextch;
      end;
    token:= int_tokx;
    lsemp:=new_sem(fixed);
    lsemp^.numval:=v;
    end;

  {***********************}
  procedure GET_STRING;
    label 1, 99;
  
    {..................}
    procedure PUTCH(CH: char);
    begin
      if strx<maxstrchars then begin
        strtab[strx]:=ch;
        strx:=strx+1;
        end
      else
      if strx=maxstrchars then begin
        error('too many string characters');
        strtab[maxstrchars]:=chr(0);
        strx:=strx+1;
        end
      end;
        
  begin
    lsemp:=new_sem(strng);
    lsemp^.stx:=strx;
    1:
    nextch;  {get over first quote mark}
    while ch<>'''' do begin
      if ch=chr(eolch) then begin
        error('string runs over end of line');
        goto 99;
        end;
      putch(ch);
      nextch;
      end;
    nextch;  {get over last quote mark}
    if ch='''' then begin  {repeated quote mark}
      putch(ch);
      goto 1;   {continue grabbing characters}
      end;
    99:
    putch(chr(0));
    token:=str_tokx;
    end;
    
  {********************}
  procedure GET_TOKEN;
    { Pascal-style lexical analyzer -- sets TOKEN to token number }
  begin
    lsemp:= nil;  { default case }
    skipblanks;
    errpos:=lx-1;
    case ch of
      'a'..'z', 'A'..'Z': get_symbol;
      '0'..'9': get_number;
      '''': get_string;
{#<D -- if debugging, invoke idebug on a bang (or other char). }
      '!':  begin
              idebug;
              nextch;
              get_token
            end;
{#>}
{#G   special symbol cases go here }
      ELSE  begin
        if ch=chr(eofch) then
          token := stop_tokx
        else if ch=chr(eolch) then begin
          nextch;
{#<E            end-of-line token dealt with here }
          token := eol_tokx  { accept an end-of-line token }
{#:}
          get_token  { go find another (significant) character }
{#>}
          end
        else begin
          error('invalid character');
          nextch;
          get_token;
          end
        end { case alternatives }
      end; { case }
    if err_count>0 then err_count:=err_count-1;
    end { get_token };

  {*******************}
  procedure NEXT_TOKEN;
  begin
    if tokenx>1 then begin
      tokenx := 1;
      get_token;  { goes into token, lsemp }
      tokary[1] := token;
      lsempary[1] := lsemp;
      end
    else begin
      { is in tokary }
      token := tokary[tokenx];
      lsemp := lsempary[tokenx];
      end
    end;

  {*****************}
  procedure TOKENREAD;
  begin
    tokenx := tokenx+1;
    end;

  {$I tusems}

  { LR(1) PARSER procedures }

  {****************}
  function ERROR_RECOVERY(var MSTACK: state_stack;
                          var MSTACKX: int; MCSTATE: int): int;
    label 99, 100;
    var STACK: state_stack;  { local copy of stack }
        TOS,              { local stack pointer }
        CSTATE,              { local state }
        JSTX,                { temporary stack limit }
        RX, TL: int;         { index into TOKNUM table }

    {...............}
    procedure COPY_STACK;
      var STX: int;
    begin
      if (jstx<0) or (jstx>mstackx) then abort('ERROR RECOVERY BUG');
      for stx := 0 to jstx do
        stack[stx] := mstack[stx];
      tos := jstx;
      if jstx=mstackx then
        cstate := mcstate
      else
        cstate := mstack[jstx+1];
    end;

    {...............}
    procedure PUSHREAD(CSTATE: int);
      { adjusts the state stack }
    begin
      tos := tos+1;
      if tos>stacksize then
        abort('stack overflow');
      stack[tos] := cstate;
    end;

    {...............}
    function TRIAL_PARSE: boolean;
      { parses from current read state through the inserted and the
        error token; if successful, returns TRUE. }
      label 99;
      var RX: int;
    begin
      trial_parse := true;  { until proven otherwise }
      while cstate<>0 do begin
        if cstate < readstate then begin
          { a reduce state }
{#<D      dump if debugging enabled. }
          if debug > 3 then stk_dump('E*Reduce', stack,
                                     tos, cstate);
{#>       end conditional. }
          if popno[cstate]=0 then begin
            { empty production }
            pushread(stk_state[statex[cstate]]);
            cstate := stk_tostate[statex[cstate]];
          end
          else begin
            { non-empty production }
            tos := tos - popno[cstate] + 1;
            rx := statex[cstate];   { compute the GOTO state }
            cstate := stack[tos];
            while (stk_state[rx]<>cstate) and
                  (stk_state[rx]<>0) do rx := rx+1;
            cstate := stk_tostate[rx];
          end
        end
        else if cstate < lookstate then begin
          { a read state }
          next_token;  { need a token now }
{#<D      dump if debugging enabled. }
          if debug > 3 then stk_dump('E*Read', stack, tos, cstate);
{#>       end conditional. }
          rx := statex[cstate];
          while (toknum[rx]<>0) and
                (toknum[rx]<>token) do rx := rx+1;
          if toknum[rx]=0 then begin
            { failure }
            trial_parse := false;
            goto 99;
          end
          else begin
            { did read something }
            pushread(cstate);
            cstate := tostate[rx];
            tokenread;  { scan the token }
            if tokenx>1 then goto 99 { successful }
          end
        end
        else begin
          { lookahead state }
          next_token;  { need a token now }
{#<D      dump if debugging enabled. }
          if debug > 3 then stk_dump('E*Look', stack, tos, cstate);
{#>       end conditional. }
          rx := statex[cstate];
          while (toknum[rx]<>0) and
                (toknum[rx]<>token) do rx := rx+1;
          cstate := tostate[rx];
        end
      end;
    99:
    end;

    {.................}
    procedure INCR_ERRSYM;
      { Note that this procedure assumes ASCII. }
    begin
      if errsym[6]='Z' then begin
        errsym[5] := succ(errsym[5]);
        errsym[6] := 'A';
      end
      else
        errsym[6] := succ(errsym[6]);
    end;

    {.................}
    procedure MAKE_DEFAULT(TOKX: int; var SEMP: semrecp);
      { creates a default token data structure }
      var SYM: symbol; TSYMP: symtabp;
    begin
      case tokx of
        ident_tokx: begin
          tsymp := makesym(symtab, errsym, symerr, 0);
          semp:=new_sem(ident);
          with semp^ do begin
            symp:=tsymp;
            incr_errsym;
            end
          end;
        int_tokx: begin 
          semp:=new_sem(fixed);
          semp^.numval:=1;
          end;
        ELSE
          semp:=nil;
      end { case tokx }
    end;

  begin  { ERROR_RECOVERY }
    if debug > 3 then writeln(rfile, 'Going into ERROR RECOVERY');
    while true do begin
      jstx := mstackx;
      while jstx>=0 do begin
        copy_stack;
        rx := statex[cstate];
        while toknum[rx]<>0 do begin
          { scan through legal next tokens }
          if debug > 3 then writeln(rfile, '...starting trial parse');
          tokary[0] := toknum[rx];  { the insertion }
          tokenx := 0;
          if trial_parse then goto 99;  { it clicked! }
          rx := rx+1;
          if toknum[rx]<>0 then
            copy_stack;
        end;
        jstx := jstx-1;  { reduce stack }
      end;
      if token=stop_tokx then begin
        { empty stack, no more tokens }
        cstate := 0;  { halt state }
        tokenx := 2;
        jstx := 0;  { bottom of stack }
        goto 100;
      end;
{#<D}
      if debug > 3 then begin
        write(rfile, '...dropping token ');
        tl := wrtok(tokary[1]);
        writeln(rfile);
      end;
{#>}
      tokenx := 2;
      next_token;
{#<D}
      if debug > 3 then begin
        write(rfile, 'New token ');
        tl := wrtok(token);
        writeln(rfile);
      end
{#>}
    end;
  99:  { found a solution }
    copy_stack;
{#<D}
    if debug > 3 then begin
      write(rfile, 'insertion of ');
      tl := wrtok(tokary[0]);
      writeln(rfile, ' succeeded');
    end;
{#>}
    make_default(tokary[0], lsempary[0]);
    tokenx := 0;  { forces a `real' rescan of the insertion }
    if jstx<mstackx then
      cstate := stack[jstx+1]
    else
      cstate := mcstate;  { cstate returned }
  100:
    error_recovery := cstate;
    mstackx := jstx;
    if debug > 3 then writeln(rfile, 'Ending error recovery');
  end;

  {****************}
  procedure PARSER;
    { Carries out a complete parse, until
      the halt state is seen -- same as empty stack}
    var CSTATE, RX: int;
        TSEMP: semrecp;

    {...............}
    procedure PUSHREAD(CSTATE: int; SEMP: semrecp);
      { do the push part of a readstate. }
    begin
      tos := tos+1;
      if tos>stacksize then
        abort('stack overflow');
      sem[tos] := semp;
      stack[tos] := cstate;
    end;

  begin
    cstate := start_state;
    tos := -1;
    pushread(stk_state_1, nil);
    while cstate<>0 do begin
      tsemp:=nil;
      if cstate < readstate then begin
        { a reduce state }
{#<D    dump if debugging enabled. }
        if debug > 0 then stk_dump('Reduce', stack, tos, cstate);
{#>     end conditional. }
        if map[cstate] <> 0 then
          { the semantics action }
          apply(map[cstate], tsemp);
        if popno[cstate]=0 then begin
          { empty production }
          pushread(stk_state[statex[cstate]], tsemp);
          cstate := stk_tostate[statex[cstate]];
        end
        else begin
          { non-empty production:
            semantics is preserved on a unit production A --> w,
            where |w| = 1, unless something is in TSEMP.  Note that
            if w is nonterminal, the production may be bypassed. }
          tos := tos - popno[cstate] + 1;
          if (tsemp<>nil) or (popno[cstate]<>1) then
            sem[tos] := tsemp;
          { compute the GOTO state }
          rx := statex[cstate];
          cstate := stack[tos];
          while (stk_state[rx]<>cstate) and (stk_state[rx]<>0) do
            rx := rx+1;
          cstate := stk_tostate[rx];
        end
      end
      else if cstate < lookstate then begin
        { a read state }
        next_token;  { need next token now }
{#<D    dump if debugging enabled. }
        if debug > 2 then stk_dump('Read', stack, tos, cstate);
{#>     end conditional. }
        rx := statex[cstate];
        while (toknum[rx]<>0) and (toknum[rx]<>token) do
          rx := rx+1;
        if toknum[rx]=0 then begin
          error('syntax error');
          cstate := error_recovery(stack, tos, cstate);
          err_count:=merr_count;
        end
        else begin
          pushread(cstate, lsemp);
          cstate := tostate[rx];
          tokenread;  { token has been scanned }
        end
      end
      else begin
        { lookahead state }
        next_token;  { need another token now }
{#<D    dump if debugging enabled. }
        if debug > 2 then stk_dump('Look', stack, tos, cstate);
{#>     end conditional. }
        rx := statex[cstate];
        while (toknum[rx]<>0) and (toknum[rx]<>token) do
          rx := rx+1;
        cstate := tostate[rx];
      end
    end;
    end_sem;
  end;

  { PARSE INITIALIZATION }

  {*****************}
  procedure INITTABLES;
    var SX: int;
        CODE_SYMP: symtabp;

    {................}
    procedure PUTSYM(TSYM: symbol; TV: int);
      var SYMP: symtabp;
          I: int;
    begin
      symp:=makesym(symtab, tsym, reserved, -1);
      symp^.tokval:=tv;
    end;

{#<D   also need to init debugging tables? }
    {................}
    procedure PUTTOK(PRINTVAL: string80;  TOKNUM, START: int);
      { this procedure is used to initialize the token tables.
        toknum is the number of the token to be initialized, and
        start is where it should start in the tokchar array. }
      var OFFSET:  int;
    begin
      tokx[toknum] := start;
      for offset := 0 to length(printval)-1 do
        tokchar[start+offset] := printval[offset+1];
      tokchar[start+length(printval)] := chr(0)
    end;
{#>    end puttok insertion. }

    {................}
    procedure INIT_PARSER_TABLES;
      { initialize the parser tables }
    begin
{#IS    inline symbol table inits. }
{#<A    assignment style inits? }
{#IP      do the parser tables inline. }
{#>     end assignment inits. }
{#<D    debugging? }
{#IT      do the token tables inline. }
{#>     end debugging }
    end { init_parser_tables };

  begin { inittables }
    pwr10_2[0] := 1E1;  {10^(2^0)}
    pwr10_2[1] := 1E2;  {10^(2^1)}
    pwr10_2[2] := 1E4;
    pwr10_2[3] := 1E8;
    pwr10_2[4] := 1E16;
    pwr10_2[5] := 1E32;
    lsempary[0]:=nil;
    lsempary[1]:=nil;
    lsemp := nil;
    tokenx := 2;  { no token queue }
    for sx := 0 to hlimit do begin
      symtab[sx] := nil;  { initialize symbol tables }
      end;
    for sx := 0 to stacksize do begin
      sem[sx]:=nil;
    end;
    init_parser_tables;
    init_sem;
    errsym:='ERR#AA';
    strx:=0;
    line := '';  { fake a new line }
    lx := 1;
    errpos:=1;
    err_count:=0;
    nextch;  { fetch the first character, forcing a line read }
  end;

  {**********************}
  function OPENFILES: boolean;
  begin
    openfiles:=false;
    write('Source file? ');
    readln(sfilename);
    prompt:='';
    promptlen:=3;  {length of ` ; '}
    if sfilename='' then begin
      prompt:='> ';
      sfilename:=default_sfile;
      is_console:=true;
      promptlen:=promptlen+2;
      end
    else is_console:=false;
    if openfile(sfile, sfilename, false) then begin
      write('Target file? ');
      readln(rfilename);
      if rfilename='' then rfilename:=default_rfile;
      openfiles:=openfile(rfile, rfilename, true);
      end
    end;
    
begin
  writeln('Tiny Pascal Compiler, ', version_string);
  writeln;
  errors := 0;
  debug := 0;
  trace := false;
  if openfiles then begin
    inittables;
    parser;  { does it all }
    closefiles;
    end
  end.


