(*
   gramtool.pas       Copyright (c) 1991  by Georg Post

      Toolbox for the ChekGram grammar check program.

  Purpose: check a given grammar for LL(1) properties and generate parsers.
           Code of 3 parsers in this file, for performance comparison:
           -  Big parsing table:     Speed=100
           -  Condensed table  :     Speed=65
           -  Recursive interpreter: Speed=40
  Input  : A grammar specification file, here Grammar5.txt for Turbo Pascal.
           The file has Backus-Naur-like productions mixed with semantic action
           markers "_<actionName>".
  Output : The elementary grammar rule set with additional nonterminals.
           LL(1) violations and misplaced semantic actions are reported.
           Parsing tables for nonrecursive parsers are built up, assuming
           that ambiguities be resolved in favour of the "earlier" rule.

 The table-driven LL(1) parser (procedure tableParser) is an "exercise" only.
 Even if it's  2.5 times faster than a recursive "rule interpreter",
 I do not, for the time being, use it in  PCPC.PAS: It is a memory hog
 requiring a huge (about 50 K ) sparse matrix  on the heap. A second variant
 uses the parsing table in a compacted form, with slower list searching.
 The tables nonTmTab and ruleTab below prepare such a cheap version (<10K).
 With dichotomic lookup, it is 30 % slower but saves 50 K of precious PC
 memory!
 tableParser interfaces with the world exactly like the PCPC basicParser:
 calling

    pascannr.Scanner        - source code lexical analysis
    pascannr.TreatSymbol    - symbol classification
    semanti5.SemanticAction - semantic data pile-up ...

*)

unit GramTool;

{$R+,S+}
interface
uses pcpcdata,
     pascannr,
     semanti6;

const nothing=9999; {null symbol for void production, terminates the others}
      nulAction=1000; {everything >1000 is a  semantic action }
      start='{';   {special markers in Gt the grammar table}
      termi='}';
      separ='|';
      repet='#';
      escape='_';
      nontrm='~';

      nIndirect=2000; { size of indirect parse tables}
      nbProduct=460; {max. number of productions}
      maxTerm=125;   {max. number of terminal symb}
      maxNont=230;   {max. number of nonterminals}
      anyId=12; {privileged symbol for any new identifier}
      maxId=15; {we have <=15 kinds of Ids}

type
    production=array[0..12] of integer; {index 0: the nonterminal symb.
      Then a sequence of numbers terminated by  Nothing }
    termSet= set of 0..255; {set of terminal symbols, 0 = nothing}
{
 Convention for integer entries in a production:
   Epsilon = Null    = 9999
   Actions = numbers > 1000
   Nonterminal symbols =      257...999
   Terminal reserved words =  128...255
   Terminal ASCII =             33..127
   Terminal unknown Id =        1..15
   Terminal known   Id =       17..31
}
    contiTp=array[1..nbProduct] of termSet;
        {continuation sets: index is a production }
     biTp=array[0..255] of integer;    {base index type}
     tabTp=array[1..nIndirect] of integer;  {condensed parseTab}

     parseTab = array[0..maxTerm, 0..maxNont] of integer; {50K ! }
     ppTab=^parseTab;
     termsymbIx= array[0..255] of byte; { column entry to parseTable }

var prodList : array[1..nbProduct] of production;
     {prodList = elementary productions, actions absorbed in prodList }
    prodLength:array[1..nbProduct] of byte; {length of each production}
    nameIndex: array[1..maxNont] of integer;
    nprod: integer;    {number of productions, <=nbProduct}
    nNonterm: integer; {number of non-terminal symbols}
    first, follow, aux: array[0..maxNont] of termSet;
      {For any nonterminal, set of First and Follow terminals}
    reCycle:array[0..255] of boolean; {symbol MAY be redefined as anyId}
    bigMemory:boolean; {make sparse parsing matrix, or else compressed lists }
var stat:scanStatus;
    sDat:symbolData; {used for symbol table insertion }
    globDebug: boolean;
    emergency: boolean;
    basic:     boolean;

procedure showAllProd(waitRet:boolean);
procedure checkActions; { for actions inside prodList}
procedure checkBraces; {in the grammar table gt}
procedure testUse; {are all terminals/nonterminals really useful ? }
procedure compFirst; {compute the First tables}
procedure compFollow; {the Follow items of nonterminals}
procedure testLL1( var conti:contiTp);
procedure suspectTriple;
procedure modifConti(var conti:contiTp); {identifier parsing trick}
procedure parsingTables( var conti:contiTp;  {entry data to this procedure}
  var nTerm,lastIndex: integer;
  var baseIndex:biTp; var nontmTab,ruleTab: tabTp;
  var parseTable: parseTab; var termIndex: termSymbIx);
procedure basicParser(var success: boolean; var p:integer;
  var stat: scanStatus;  mustskip,freshFile:boolean);
procedure tableParser(var correct: boolean;
  var baseIndex:biTp; var nontmTab,ruleTab: tabTp;
  var Pt:parseTab; var ti:termSymbIx;
  entry: integer; debug,turbo:boolean);
procedure productionList;

implementation

procedure display(var s:symbol);
{ACTION: output an identifier to screen
 CALLER: showSymbol
}
var i:integer; c:char;
begin i:=0;
  repeat i:=i+1;
    if i<=lenIde then c:=s[i] else c:=' ';
    if c>' ' then write(c);
  until c<=' ';
end;

procedure showSymbol(k: integer); {k<256 terminal, else nonterminal}
{ACTION: grammar symbol (nonterminal/terminal/action) to screen
 CALLER: showProd testLL1 parsingTables
}
var j: integer;
begin
  write(' ');
  if k<16 then begin
    write('?');display(idTypeList[k]);
  end else if k<32 then begin
    write('&');display(idTypeList[k-16]);
  end else if k<128 then begin
    write(chr(k))
  end else if k<256 then begin {reserved}
    with smb[k-127]  do begin
      for j:=ix to ix+len-1 do write(gt[j]);
    end;
  end else if k<nulAction then begin j:=k-256;
    if (j>0)and(j<=nbObject) then display(objectList[j]) else  write(k:3);
  end else if k<nothing then begin
    write('_'); display(actionList[k-nulAction]);
  end else write('$');  { $ marks the end of production}
end;

procedure showProd(var p:production);
{ACTION: a grammar production to screen
 CALLER: showAllProd checkActions suspectTriple testLL1

  Problem: we have Named Productions and UnNamed ones !
  Invent numbered names for those subproductions!
  The production expander has the code:
  prodList[iPlist,0]:=nonterm;
  nameList[nonterm-256]:=objectList[origin];
}
var i,j,k,s: integer;
begin  s:=p[0]-256;
  if s<=nbObject then write('    ',objectList[s]) else begin
    write(p[0]:3); write('.',objectList[nameIndex[s]]);
  end;
  write(':-'); i:=0;
  repeat i:=i+1; k:=p[i]; showSymbol(k);
  until k>=nothing;
end;

procedure showAllProd(waitRet:boolean);
{ACTION: list of the expanded productions, 20 lines per screen
 CALLER: checkGram.
}
var i:integer;
begin
  writeln('   --------  List of ',nProd,'  productions: -----');
  for i:=1 to nProd do begin
    showProd(prodList[i]);
    writeln;
    if waitRet and ((i mod 20) = 0) then readln;
  end;
end;

function couldVanish(var p:production; n:integer): boolean;
{ACTION: test if part 1..n of production p has vanishing nonterminals
 CALLER: checkActions
}
var yes:boolean; j,k: integer;
begin yes:=true; k:=0;
  repeat k:=k+1;
    if k<=n then begin j:=p[k];
      if j<256 then yes:=false
      else if j<nulAction then begin {nonterminal}
        if not (0 in first[j-256]) then yes:=false;
      end;
    end;
  until (k>n) or (not yes);
  couldVanish:=yes;
end;

procedure checkActions; { for actions inside prodList}
{ACTION:  flag early semantic actions: placed before a production is accepted
 CALLER:  chekGram.
}
var ix,i,p,q: integer;
    used: set of 1..dictSize;
begin
  used:=[];
  for ix:=1 to nprod do begin i:=1; p:=prodList[ix,i];
    while p<>nothing do begin
      if (p>nulAction) then begin {action triggered at point i}
        q:=p-nulaction; used:=used+[q];
        if couldVanish(prodList[ix],i-1) then begin
          write('Unsafe Action _',actionList[q],' in ');
          showProd(prodList[ix]); writeln;
        end;
      end;
      i:=i+1; p:=prodList[ix,i];
    end; {while}
  end;  {for ix}
  for ix:=1 to nbAction do begin
    if not (ix in used) then writeln('Unused Action _',actionList[ix]);
  end;
  writeln;
end;

procedure nextSmb(p:integer; var k,pk:integer);
{ACTION: advance k in production p to next symbol, skipping actions.
 CALLER: suspectTriple
 OUTPUT: pk= serial number of the symbok, =Nothing if end of list.
}
begin
  pk:=prodList[p,k];
  if pk<>nothing then begin
    repeat k:=succ(k);pk:=prodList[p,k] until (pk<nulAction)or(pk=nothing);
  end;
end;

procedure suspectTriple;
{ACTION: check the production list for buggy sequences
         for the moment, a triple M N t in a production is suspicious if the
         nonterminals M and N have 0 in their First set, t= terminal smb.
         Parser seems to have a bug for #M N t  ?
 CALLER: chekGram.
}
var p,i,j,k, pi,pj,pk: integer;
    suspect:boolean;
begin
  for p:=1 to nProd do begin
    i:=0; nextSmb(p,i,pi);
    j:=i; nextSmb(p,j,pj);
    k:=j; nextSmb(p,k,pk); {the first triplet}
    while pk<>nothing do begin
      suspect:=(pi>256)and(pj>256)and(pk<256); {sequence NNt}
      if suspect then suspect:=(0 in First[pi-256])and(0 in First[pj-256]);
      if suspect then begin
        showProd(prodList[p]); writeln(' at ',k);
      end;
      pi:=pj;pj:=pk; nextSmb(p,k,pk);
    end;
  end;
end;

procedure checkBraces;
{ACTION: check that all braces match in the grammar table Gt
 CALLER: chekGram.
}
var a,b,i,k,ix,nLevel: integer; c:char;    stop:boolean;
begin
  k:=1; while index[k]>0 do k:=succ(k);
  k:=k-1; {nb of explicit nonterminals}
  for i:=1 to k do begin
    a:=index[i];
    if i<k then b:=index[i+1]-1 else b:=GtSize-1; {range in gt}
    nLevel:=0; ix:=a; stop:=false;
    repeat c:=gt[ix];
      while c=escape do begin ix:=ix+2;c:=gt[ix] end;
      if c=start then nLevel:=nLevel+1
      else if c=termi then nLevel:=nLevel-1;
      ix:=succ(ix);
      if (nLevel=0)and(ix<=b) then begin
        writeln('Too many "}" in ',objectList[i]); stop:=true;
      end;
    until (ix>b) or stop;
    if nLevel>0 then writeln('Too many "{" in ',objectList[i]);
  end; {for i}
end;

procedure testUse;
{ACTION: production list test: are all terminals/nonterminals really useful ?
 CALLER: chekGram
}
var use,aux: array[0..255] of boolean;
    i,j,k,n,nN,parasitic: integer; ok,stable:boolean;
begin
  nN:=nNonterm-256;
  for i:=0 to nN do use[i]:=false;
  write('Test Use of Nonterminals ');
  repeat aux:=use; {for saturation check}
    write('*');
    for i:=1 to nProd do begin
      j:=prodList[i,0]-256; {terminal symbol} n:=0; ok:=true;
      repeat n:=succ(n); k:=prodList[i,n];
        if k<256 then {do nothing, ok =true}
        else if k<nulAction then begin
          k:=k-256;
          if k<0 then begin writeln('Index error k:TestUse = ,k');
          end;
          ok:=ok and use[k];
        end;
      until k=nothing;
      if ok then use[j]:=true;
    end;
    stable:=true;
    for i:=1 to nN do stable:=stable and (use[i]=aux[i]);
  until stable;
  writeln;
  parasitic:=0;
  for i:=1 to nN do begin
    if not use[i] then begin
      writeln('Symbol ',i, objectList[nameIndex[i]], ' not used ! ');
      parasitic:=parasitic+1;
    end;
  end;
  writeln(parasitic,'  parasitic nonterminal symbols.');
end;

procedure compFirst; 
{ACTION: compute the First tables (a set of Terminals per Nonterminal)
 CALLER: chekGram.
}
var Nn, i,j,k,n: integer;
    stop,goon,stable: boolean;
begin  Nn:=nNonterm-256;
  for i:=0 to Nn do first[i]:=[];
  for k:=1 to nProd do begin {initialize First with immediate symbols}
    i:=prodList[k,0]-256; j:=prodList[k,1];
    if j<256 then first[i]:=first[i]+[j]
    else if j=nothing then first[i]:=first[i]+[0];
  end;
  repeat {brute-force loop until we saturate all of them}
    write('*');
    for i:=1 to Nn do aux[i]:=first[i]; {keep trace of old status}
    for k:=1 to nProd do begin
      if (k mod 10)=0 then write('.');
      i:=prodList[k,0]-256; n:=0;  goon:=false;
      repeat
        repeat n:=n+1; j:=prodList[k,n];
        until (j<nulAction) or (j=nothing);
          {skip action symbols}
        stop:=(j<256) or (j=nothing);
        if not stop then begin
          j:=j-256; {nonterminal index}
          first[i]:=first[i] + ( first[j] - [0] );
          goon:=(0 in first[j]);  {j may be missing ! }
        end else begin
          if goon and (n>1) then begin
            if j<256 then first[i]:=first[i]+[j]
            else if j=nothing then first[i]:=first[i]+[0];
          end;
        end;
      until stop or (not goon);
    end; {for k}
    stable:=true;
    for i:=1 to Nn do stable:=stable and (first[i]=aux[i]);
    writeln;
  until stable;
end;

function mayVanish(var p: production; n: integer): boolean;
{ACTION: test if the tail n+1, n+2 ... of prod. p has vanishing nonterminals
 CALLER: compFollow
}
var yes: boolean; i,k:integer;
begin yes:=true; k:=n;
  repeat
    repeat k:=k+1; until (p[k]<nulAction) or (p[k]=nothing);
    if p[k]<256 then yes:=false  {terminal doesn't vanish}
    else if p[k]<>nothing then begin i:=p[k]-256;
      yes:= ( 0 in first[i]); {this one may disappear}
    end;
  until (not yes) or (p[k]=nothing);
  mayVanish:=yes  { and (k>(n+1)); if k=n+1 we have one nothing ! }
end;

procedure compFollow;
{ACTION: compute for each Nonterminal the Follow set of Terminals
 CALLER: chekGram
}
var Nn, i,j,k, n,m: integer;
    stable,stop: boolean;
begin  Nn:=nNonterm-256;
  for i:=1 to Nn do follow[i]:=[];
  for k:=1 to nProd do begin {initialize Follow with immediate symbols}
    n:=0;
    repeat n:=succ(n); i:=prodList[k,n]
    until (i<nulAction)or(i=nothing);
    repeat
      stop:=(prodList[k,n]=nothing);
      if not stop then begin
        i:=prodList[k,n];
        m:=n;
        repeat m:=succ(m); j:=prodList[k,m]
        until (j<nulAction)or(j=nothing);
        if (i>=256) and (j<>nothing) then begin {nonterminal + nontrivial}
          i:=i-256;
          if j<256 then follow[i]:=follow[i]+[j]
          else follow[i]:=follow[i]+ (first[j-256] - [0] );
        end;
        n:=m;
      end;
    until stop;
  end; {for k}
  repeat {loop until we saturate all of them}
    write('*');
    for i:=1 to Nn do aux[i]:=follow[i]; {keep trace of old status}
    for k:=1 to nProd do begin
      if (k mod 10)=0 then write('.');
      {look at all optional tails, to complete Follow}
      i:=prodList[k,0]-256; n:=0;
      repeat
        repeat n:=n+1; j:=prodList[k,n];
        until (j<nulAction)or(j=nothing);
        if (j>=256)and(j<>nothing) then begin j:=j-256; {nonterminal}
          if mayVanish(prodList[k],n) then
            follow[j]:=follow[j]+follow[i];
        end;
      until j=nothing;
    end; {for k}
    stable:=true;
    for i:=1 to Nn do stable:=stable and (follow[i]=aux[i]);
    writeln;
  until stable;
end;

procedure calcFirst(var p: production; var f:termSet);
{ACTION: compute f= First set of the right side of production p
         assume that First sets of all Nonterminals are known.
 CALLER: testLL1
}
var n,j: integer; ok:boolean;
begin f:=[]; n:=0; ok:=false;
  repeat n:=succ(n); j:=p[n];
    if j<256 then begin f:=f+[j]; {terminal: we're through} ok:=true
    end else if j<nulAction then begin
      j:=j-256; {nonterminal}
      f:=f+ ( first[j] - [0] );
      ok:=not (0 in first[j]);
    end else if j=nothing then f:=f+[0];
  until ok or (j=nothing);
end;

procedure screen(var t:text; var s:termSet);
var i,j,k: integer;
begin
  k:=0;j:=0;
  for i:= 255 downto 0 do begin
    if k=4 then begin write(t,chr(j+ord('0')));
      j:=0; k:=0;
    end else j:=2*j;
    if i in s then j:=j+1;
    k:=k+1;
  end;
  writeln(t,chr(j+ord('0')));
end;

procedure dbgSets(var conti:contiTp);
var i:integer; t:text;
begin
  assign(t,'bug.bug');
  rewrite(t);
  for i:=0 to nNonterm-256 do screen(t,first[i]);
  writeln(t);
  for i:=0 to nNonterm-256 do screen(t,follow[i]);
  writeln(t);
  for i:=1 to nProd do screen(t,conti[i]);
  writeln(t);
  close(t);
end;


procedure testLL1( var conti:contiTp);
{ACTION: compute continuation sets of all productions and check that they
         are disjoint, for any Nonterminal. Show ambiguities.
 CALLER: chekGram.
 OUTPUT: conti[p] is, for the production rule p, the set of all Terminals
         which may be valid first results of that rule.
}
var  t,common:termSet; h,i,j,k,m,n: integer;  ok:boolean;
begin
  for k:=1 to nProd do begin
    calcFirst(prodList[k], t);
    if not (0 in t) then conti[k]:=t
    else conti[k]:=(t - [0]) + follow[prodList[k,0]-256];
  end;
{  dbgSets(conti); debug log file ... }
  {LL(1) check: for any pair (j,k) with prodList[j,0]=prodList[k,0],
   void intersection required: conti[j] * conti[k] = [] .
   We need the right to eliminate the intersecting characters from the
   less important rule: the one with higher index ?
  }
  for j:=1 to nProd-1 do begin
    {write('.'); }
    n:=prodlist[j,0]; {the nonterminal}
    for k:=j+1 to nProd do begin
      m:=prodList[k,0];
      if n=m then begin {two productions starting from same nonterminal}
        common:=conti[j] * conti[k]; ok:=(common = []);
        if not ok then begin
          write ('Ambiguous rules:');
          for i:=1 to 255 do if i in common then begin
            showSymbol(i);
          end;
          writeln;
          showProd(prodList[j]); writeln;
          showProd(prodList[k]); writeln;
        end;
      end;
    end;
  end; {for j}
  writeln;
end;

procedure modifConti(var conti:contiTp);
{ACTION: revise continuation sets for my identifier parsing trick:
         replace all sorts of unknown Id's with unique symbol anyId.
         Ambiguities are resolved at parsing time: first rule seen is Ok.
 CALLER: chekGram.
}
var repl,known,unKnown: termSet;
    j,k: integer;
begin
  repl:=[anyId]; known:=[17..31]; unKnown:=[1..15];
  for k:=1 to nProd do if (conti[k]*unknown)<>[] then begin
    conti[k]:=(conti[k]-unKnown)+repl ;
  end;
end;

function PtLookup(Tsymb,Nsymb:integer;
    var baseIndex:biTp; var nontmTab,ruleTab: tabTp): integer;
{ACTION: dichotomic lookup in a condensed parse table. For medium-speed
         memory-efficient parser.
 CALLER: tableParser
 INPUT : baseIndex,nonTmTab,ruleTab: these 3 are the condensed parse table
 OUTPUT: PTlookup(t,n) should be = ParseTab[termIndex[t], n-256]
}
var i,n,x,y,z, Nx,Ny,Nz: integer; found,hope:boolean;
begin
  i:=baseIndex[Tsymb]; N:=nonTmTab[i];
  if N>0 then begin x:=i+1; y:=i+N;
    Nx:=NonTmTab[x];
    found:=(Nsymb=Nx); hope:=false;
    if not found then begin
      Ny:=NonTmTab[y]; found:=(Nsymb=Ny);
      if not found then hope:=(Nsymb>Nx)and(Nsymb<Ny) else z:=y;
    end else z:=x;
    while hope {invariant (Nx<Nsymb<Ny) and (x<y)} do begin {dichotomic search}
      z:=(x+y) div 2; {z ALWAYS <y but may be=x}
      hope:=(z>x);
      if hope then begin
        Nz:=NonTmTab[z]; found:=(Nsymb=Nz);
        if found then hope:=false else begin
          if Nsymb<Nz then begin y:=z;Ny:=Nz end
          else begin x:=z;Nx:=Nz; end;
        end;
      end;
    end;
  end else found:=false;
  if found then begin
    PtLookup:=ruleTab[z]; {the valid rule}
  end else begin
    PtLookup:=0;
  end;
end;

procedure sortTables(var nt,ru: tabTp; b,n:integer; var ambig:integer);
{ACTION: sort the interval nt[b+1] up to nt[b+n], permute ru as well.
         put double entries in nt out of useful range (ambiguities)
 CALLER: parsingTables
 OUTPUT: increment ambig= counter of ambiguous rules
}
var i,j,m,nj,limit,ilim: integer;
begin {pick the smallest any time}
  i:=1; m:=n; {upper limit may drop as double entries get out of the way}
  repeat
    limit:=nt[b+i]; ilim:=i; j:=i+1;
    while j<=m do begin
      nj:=nt[b+j];
      if nj<limit then begin
        limit:=nj; ilim:=j; j:=j+1;
      end else if nj=limit then begin { double entry to end}
        ambig:=ambig+1; {count ambiguous rules}
                     nt[b+j]:=nt[b+m]; nt[b+m]:=nj;
        nj:=ru[b+j]; ru[b+j]:=ru[b+m]; ru[b+m]:=nj;
        m:=m-1;
      end else j:=j+1;
    end;
    if ilim<>i then begin {swap the 2 }
      nj:=nt[b+ilim]; nt[b+ilim]:=nt[b+i]; nt[b+i]:=nj;
      nj:=ru[b+ilim]; ru[b+ilim]:=ru[b+i]; ru[b+i]:=nj;
    end;
    i:=i+1;
  until (i>=m);
  for i:=1 to m-1 do begin
    if nt[b+i+1]<=nt[b+i] then write('@#!'); {order bug}
  end;
  nt[b]:=m; {update number of valid entries}
end;

procedure parsingTables( var conti:contiTp;
  var nTerm,lastIndex: integer;
  var baseIndex:biTp; var nontmTab,ruleTab: tabTp;
  var parseTable: parseTab; var termIndex: termSymbIx);
{ACTION: build parsing tables from continuation sets.
 CALLER: chekgram.
 INPUT :  conti (continuation sets)
 OUTPUT:  baseIndex + nontmtab + ruleTab: compact parsing table
          parseTable + termIndex: huge "turbo" parsing table
          nTerm : number of terminal symbols
          lastIndex: valid size of nonTmTab

   Goal: prepare a table-driven parser, to be called after testLL1 only !
   We know the Continuation sets of rules r, i.e.
   conti[r] = set of all terminal leading symbols resulting from rule r.
   This is sometimes called the set of "Director Symbols" DS[r] of the rule r.
   We produce the tables nontmTab, ruleTab, and aux. table baseIndex.
   For each terminal symbol a , for each nonterminal N , the parsing table
   Pt[a,N] yields the  next rule to apply when we are at N in the
   current production, and at a in the source text.
   Pt[a,N] = r if and only if a is in the set conti[r] of rule r:N-->anything.
   At any moment of parsing, we have a stack of not-yet-used symbols.
   If we find a in the source, do the following:
     If Tos is terminal, it MUST be =a, else error. Pop and read on.
     If Tos is nonterminal N, Error if Pt[a,N] is "empty".
       Else pop N and push the rule sequence Pt[a,N], (first symbol on top).
       Do NOT read on but loop.
   Algorithm ends when stack is empty.
   However, since Pt is a space consuming sparse matrix, in addition we
   give each "a" only a linear List nonTmTab of possible nonterminals where
   a is valid, and ruleTab = the next rule to consider:  dichotomic search...
}
var i,j,k,b,n,nt, nAmbi, maxNt,biggestNt,smallestNt: integer;
begin
  for i:=0 to 255 do begin
    baseIndex[i]:=0; {no entry reported}
    termIndex[i]:=0;
    reCycle[i]:=false;
  end;
  for i:=17 to 31 do reCycle[i]:=true; {known identifiers}
  for i:=1 to lastSymbol do reCycle[i+127]:=(smb[i].flag>0); {standard symbol}
  if bigMemory then
    for i:=0 to maxTerm do for j:=0 to maxNont do parseTable[i,j]:=0;
  lastIndex:=0;  {nonTmTab[lastIndex]:=0; }
  {  loop order to respect the "first seen" priority of ambiguous productions.
     The later productions have lower priority.
  }
  for i:=1 to 255 do begin  write('.');
    lastIndex:=lastIndex+1; b:=lastIndex; baseIndex[i]:=b; n:=0;
      {nontmTab[b]:=n later on}
    for k:=1 to nProd do if i in conti[k] then begin
      n:=n+1; lastIndex:=lastIndex+1;
      nontmTab[lastIndex]:= prodList[k,0]; ruleTab[lastIndex]:= k;
    end;
    nonTmtab[b]:=n;
  end; {for i}
  maxNt:=0; nTerm:=0;  nAmbi:=0;
  biggestNt:=0; smallestNt:=9999;
  for i:=1 to 255 do begin    {outer loop: i = nonterminals}
    n:=nonTmTab[baseIndex[i]];
    if n>0 then begin
      nTerm:=nTerm+1;
      termIndex[i]:=nTerm;
    end;
    for k:=1 to n do begin    { inner loop: k = production rules }
      nt:=nonTmTab[baseIndex[i]+k];
      if nt>biggestNt then biggestNt:=nt;
      if nt<smallestNt then smallestNt:=nt;
      if bigMemory then begin   {make big parsing tables}
        if parseTable[nTerm, nt-256]=0 then begin {first (i,k) match is Ok}
          parseTable[nTerm, nt-256]:=ruleTab[baseIndex[i]+k];
        end else begin { write(chr(7)); } nAmbi:=nAmbi+1; end;
      end;
    end;
    if n>maxNt then maxNt:=n;
  end;
  if termIndex[anyId]=0 then begin {force that one into terminal symb table}
    writeln('anyId  entered');
    nTerm:=nTerm+1; termIndex[anyId]:=nTerm;
  end;
  writeln;
  writeln(nProd:3, ' productions');
  writeln((nNonterm-256):3, ' nonterminals');
  writeln(nTerm:3,' terminal symbols.');
  writeln(' LastIndex=',lastIndex,'. ');
  writeln(' longest nonterminal list: ',maxNt);
  { writeln(' Number of ambiguities: ',nAmbi); }
  writeln(' Smallest and biggest Nonterminal are: ',smallestNt,' ',biggestNt);
  maxNt:=biggestNt-smallestNt+1;
  writeln('Parsing matrix size is : ',nTerm,'*',maxNt,'=',nTerm*maxNt);
(*
  write(' <Ret>');   readln;
  for i:=1 to 255 do begin {loop over terminal symbols}
    b:=baseIndex[i]; n:=nonTmTab[b];
    if n>0 then begin showSymbol(i); write(' <-- ');
      for k:=1 to n do showSymbol(nonTmTab[b+k]); writeln;
    end;
    if (i mod 20)=0 then begin write('<Ret>'); readln end;
  end;
*)
  write('Sorting NonTmTab ');
  {kill double entries and sort all parts of nonTmTab}
  nAmbi:=0;
  for i:=1 to 255 do begin {loop over terminal symbols}
    b:=baseIndex[i]; n:=nonTmTab[b];
    if n>1 then begin write('.'); sortTables(nonTmTab,ruleTab, b,n,nAmbi); end;
  end;
  writeln(' Ambiguities: ',nAmbi);
end;

(*
procedure storeParsTable( nProd,nNont,nTerm: integer;
  var parseTable: parseTab;
  var termIndex: termSymbIx);
var f:text; i,j,k,x,pr: integer;
begin
  assign(f,'grammar4.ptb'); rewrite(f);
  writeln(f,nProd);
  writeln(f,nNonterm);
  writeln(f,nTerm);
  writeln(f);
  for i:=1 to nProd do begin
    write(f,prodList[i,0]); k:=0;
    repeat k:=k+1; x:=prodList[i,k];
      if x<>nothing then write(f,' ',x) else writeln(f);
    until x=nothing;
  end;
  writeln(f);
  for i:=0 to 255 do writeln(f,termIndex[i]);
  writeln(f);
  for i:=1 to nTerm do begin
    for j:=1 to nNont do begin pr:=parseTable[i,j];
      if pr>0 then write(f,' ',j,':',pr);
    end;
    writeln(f);
  end;
  close(f);
end;
*)

{---------  the recursive rule-interpreting parser ---------------}

procedure runto(c: char; var p:integer);
(*ACTION:  advances to the char c or to Termi symbol in Gt,
           whichever comes first at Level 0 (i.e. we skip nested {...} )
  CALLER:  basicParser
  INPUT :  p index in global table Gt
  OUTPUT:  p advanced so that Gt[p] = c OR the } symbol
*)
var lev:integer; q:char;
begin lev:=0; q:=Gt[p];  {here, q is NOT escape or nontrm ? }
  { while (q=escape)or(q=nontrm) do begin p:=p+2; q:=Gt[p] end; }
  while not((lev=0)and ((q=c)or(q=termi))) do begin
    if q=start then lev:=succ(lev)
    else if q=termi then lev:=pred(lev);
    p:=succ(p); q:=Gt[p];
    while (q=escape)or(q=nontrm) do begin p:=p+2; q:=Gt[p] end;  {skip}
  end; {while}
end;

procedure basicParser(var success: boolean; var p:integer;
  var stat: scanStatus;  mustskip,freshFile:boolean);
{ ACTION: recursive descent parser, driven by the grammar productions Gt.
          Global flag Emergency stops any of the 3 loops in tableParser and
          CheckElem. This program is Goto- and Exit-free!
  CALLER: itself, via Checkelem, and Translate.
  INPUT : p is the current start symbol in grammar table Gt
          stat = current source symbol data, to be compared with Gt[p].
          mustskip is TRUE if tableParser is called for a sublist, p must
          be advanced to terminator at exit.
          freshFile tells tableParser to fetch 1st code item.
  OUTPUT: Success is set if some leading item of the production is recognized.
}

procedure checkelem(var success:boolean; var p:integer;
  { var stat:scanStatus; } leader:boolean);
{ ACTION: check 1 entry of syntax sequence: terminals, nonterminals, sublists.
          preceding repetition symbol and trailing semantic actions are
          handled here. If tooManyErrors, recursions and repetitions are
          skipped and Success is forced True. Rep symbol always succeeds.
  CALLER: basicParser
  INPUT : like basicParser. leading=True for director nonterminal symbol.
  OUTPUT: success if Gt[p] is recognized. Emergency if syntax error.
}
var rep, match, goAhead, inPhrase: boolean;
  lp,rp: integer; {the local p for repeating, the recursion p}
  q,r: char;
  pid: pide;
  pt1,pt2: ptpel;
begin
  { here ,emergency always FALSE due to While loop in caller}
  rep:=(Gt[p]=repet); if rep then p:=succ(p);
  lp:=p; success:=rep; match:=false;
  inPhrase:= not (leader or rep); {the symbol MUST succeed or fail}
  repeat {loop over 0,1,2... items at lp if rep}
    p:=lp;
    q:=Gt[p]; goAhead:=false;
    if q=start then begin {sublist = implicit no-name nonterminal }
      rp:=p;
      basicParser(match, p,stat, true {mustskip}, false);
      if inPhrase and (not match) then begin
        SyntaxBug (rp, q,r, stat,sdat); emergency:=true;
      end;
    end else if q=nontrm then begin {explicit nonterminal}
      p:=succ(p); q:=Gt[p]; rp:=index[ord(q)];
        {if not tooManyErrors then }
      basicParser(match,rp,stat,false,false);   { recursion ! }
      if inPhrase and (not match) then begin
        SyntaxBug(0,nontrm,q, stat,sdat); emergency:=true;
      end;
    end else begin {terminal symbol}
      goAhead:=true;
      if (q='?')or(q='&') then begin p:=succ(p);r:=Gt[p] end;
      treatsymbol(match,emergency, q,r, leader or rep, stat,sDat);
        { at start of phrase, treatsymbol mismatch --> recursion backtrack }
        { if tooManyErrors then match:=true; force advancing ? }
    end;
    p:=succ(p);
    if not rep then success:=match; {a Handle is seen, else success:=true }
      { oksoFar:=match and (not tooManyErrors)  }
    while Gt[p]=escape do begin
      p:=succ(p);
      if match then begin
        semanticAction(Gt[p], stat, pid,pt1,pt2);
      end;
      p:=succ(p)
    end;
    if match and goAhead then begin
      scanner(stat, sDat);  { the source pointer is advanced}
      if stat.cc=#136 {BEGIN token } then write('-');
      { cc may be: % ? ' (token) (terminalSymbol) }
    end;
  until (not rep) or (not match) or (emergency);
end; {checkelem}

var  goodInput, firstAtom: boolean;
begin {basicParser: LL(1)  without backtracking}
 if freshFile then begin
   scanner(stat, sDat); {get first item}
   emergency:=False;  {reset emergency exit signal }
 end;
 {Here, we know both the next item from the input file (record Stat)
  and the grammar production set to match, at entry point p in table Gt   }
  repeat {loop over alternative clauses, i.e. the OR | symbols }
    p:=succ(p);
    firstAtom:=true; { the 1st atom of a list decides if we accept the whole}
    success:=true;
    while success and not ((Gt[p]=separ)or(Gt[p]=termi)) and (not emergency)
    do begin
        {inner loop over a sequence of (non)terminals, i.e. implicit AND}
      checkelem(goodInput, p, {stat,} firstAtom);
        {if checkelem succeeds, some input has been parsed.}
      if firstAtom then begin success:=goodInput; firstAtom:=false end;
    end; {while}
    if not success then runto(separ,p); {else we are there}
  until success or (Gt[p]=termi) or emergency;
  if success and mustskip then runto(termi,p);
end; {basicParser}

{---------  the non-recursive table-driven LL(1) parser -----------}

procedure advance(var nextChar: integer);
{ACTION: go on in the source text, return next terminal symbol nextChar
 CALLER: tableParser
 NOTES:
  1: unknown Id of current level must be equiv. to known id of lower level.
  2: advance must NOT be called directly after treatSymbol success but
        only after all semantic actions are exhausted.
}
var c:char;
begin
  scanner(stat,sDat);
  c:=stat.cc;
  if c=#136 {BEGIN token } then write('-');
  if c='?' then begin
    nextChar:=(ord(stat.pp^.class)+1);
    if nextChar<>anyId then nextChar:=nextChar+16; {signal known type}
    if globDebug then with stat.pp^ do begin
      write(' #',nextChar,':',rscope,':',name);
    end;
  end else nextChar:=ord(c);
end;

procedure complain(symb: integer; s:str40);
{ACTION: error message s about a Terminal Symb (char,string, IdentClass)
 CALLER: tableParser
}
var errNb: str40;  i,j,l: integer;
begin
  if symb<32 then begin
    errNb:=idTypeList[symb mod 16];
  end else if symb>127 then begin {multicharacter symbol}
    i:=symb-127; j:=smb[i].ix; l:=j-1+smb[i].len;
    errNb:='';
    for i:=j to l do errNb:=errNb+Gt[i];
  end else errNb:='"'+chr(symb)+'"';
  error('Symbol '+errNb+s);
end;

procedure tableParser(var correct: boolean;
  var baseIndex:biTp; var nontmTab,ruleTab: tabTp;
  var Pt:parseTab; var ti:termSymbIx;
  entry: integer; debug,turbo:boolean);
{ACTION: classical LL(1) parser for Turbo Pascal 4
 CALLER: chekGram.
 INPUT : Entry: is the root nonterminal symbol for a correct language file.
         The sets (Pt,Ti) and (baseIndex,nonTmTab,ruleTab) are 2 alternative
         parsing tables, big & fast ( turbo mode) or  small & slow
 OUTPUT: correct=TRUE if parsing successful
 NOTE:
  Uses global prodList, and tables Pt,Ti, for a guided tour through the
  source text (external procedure Scanner is called).
  This parser is a State Machine with a stack as the state data.
  Roughly, Pt triggers the state transitions as a function of the scanner
  token (the input "tape")  and the partial state info on top of stack.
}
var sStack: array[1..100] of integer; {symbol stack}
    k,p,tos,kmax, nextChar,nextRule,nextCompare: integer;
    pid: pide;
    errNb:str40;
    pt1,pt2: ptpel;
    {tos is top of stack}
    push, break, dontCare, ok, needInput: Boolean;
      {push=true if some stack fill action is required}
    q,r:char;

begin
  globDebug:=debug;
  break:=false;
  push:=false; tos:=1; sStack[tos]:=entry;
  advance(nextChar);
  needInput:=false; { if true, triggers scanner calls}
  repeat   {the big parser loop has 5 phases}
      { PHASE 1: check if stack needs some food }
    if push then begin {push new rule expansion on stack}
      kmax:=prodLength[nextRule];
      for k:=kmax downto 1 do begin p:=prodList[nextRule,k];
        tos:=succ(tos); sStack[tos]:=p;
      end;
      push:=false;
    end;
      { PHASE 2: check if semantic actions are pending }
    nextCompare:=sStack[tos];
    while nextCompare>nulAction do begin { is an action marker}
      semanticAction(chr(nextCompare-nulAction), stat, pid,pt1,pt2);
      tos:=pred(tos);
      if tos>0 then nextCompare:=sStack[tos] else nextCompare:=0;
    end;
      { PHASE 3: check if input from scanner is required}
    if needInput then begin advance(nextChar); needInput:=false end;
      { PHASE 4: check if nonterminal to be expanded }
    if nextCompare>256 then begin {nonterminal rule}
      if turbo then begin
        nextRule:=pt[ti[nextChar], nextCompare-256];
{  -- debugging:
        k:=PtLookup(nextChar,nextCompare, baseIndex,nonTmTab,ruleTab);
        if k<>nextRule then begin
          write('BUG: T=',nextChar,' N=',nextCompare,' R=',nextRule,'/',k);
          readln;
        end;
}
      end else nextRule:=
        PtLookup(nextChar,nextCompare, baseIndex,nonTmTab,ruleTab);
      break:=(nextRule=0);
      if break then begin {give redefinable Ids a chance , here}
        if reCycle[nextChar] then nextRule:=
          PtLookup(anyId,nextCompare, baseIndex,nonTmTab,ruleTab);
        break:=(nextRule=0);
      end;
      if not break then begin
        tos:=pred(tos); push:=true; {here Tos transient to 0 ! Bug? }
      end else complain(nextChar,' rejected!');
      { PHASE 5: (decisive) the input token faces a terminal symbol to match}
    end else if nextCompare>0 then begin
      {compare scanner input and Terminal Symbol, 0 marks stop}
      {look if nextCompare=nextChar , things match}
      if nextCompare<ord(' ') then begin
        r:=chr(nextCompare mod 16);
        if nextCompare<16 then q:='?' else  q:='&';
        if debug then write(' ',q,nextCompare);
      end else q:=chr(nextCompare);  {PCPC compatibility}
      treatSymbol(ok, dontCare, q,r, true, stat,sDat);
        { call as if leading symbol, won't do its own error message
          if q='?', the identifier type r enters the symbol table
        }
      if ok then begin  { input has been swallowed}
        tos:=pred(tos);
        needInput:=true; {delay advance(nextChar); }
      end else begin
        break:=true;
        complain(nextCompare,' expected!');
      end;
    end;
  until break or ((tos<=0)and (not push));
  correct:=not break;
end;

(*   superseded by modifConti .....
procedure modifParsTab(var pt:parseTab; var ti:TermSymbIx);
{ Two passes for table fill-in :
   If Pt[ u , N] =q >0 for any unknown Id u,
     set Pt [ anyId, N ] := q.  (collision testing required)
   If Pt[ anyId, N] =q>0 and Pt[ k , N]=0 for any Known Id k,
     set Pt [k, N] :=q;
  Any new Id is equivalent! any known Id may be unknown at other level.
}
var n,x,y,a,ta: integer;
begin
  for n:=1 to nNonterm-256 do begin {column loop}
    x:=0;
    for a:=1 to maxId do begin ta:=ti[a];
      y:=pt[ta,n];
      if y>0 then begin
        if x=0 then x:=y else begin
          write('Collision ',idTypeList[a]);
          if n<=nbObject then writeln(objectList[n]) else writeln(n:4);
        end;
      end;
    end;
    ta:=ti[anyId];
    if (ta>0)and(x>0) then pt[ta,n]:=x;
  end;
{  write('<Ret>'); readln;  }
end;
*)

{******************   input expansion ***********}

procedure defProdLength;
{ACTION: compute length of each prodList line
         done after grammar check, before use in parsing (speed-up trick)
 CALLER: productionList
}
var n,kmax: integer;
begin
  for n:=1 to nProd do begin
    kmax:=0;
    repeat kmax:=succ(kmax) until prodList[n,kmax]= nothing;
    { prodList[n,0]:=kmax-1; true length of production, strip null rule}
    prodLength[n]:=kmax-1;
  end;
end;

procedure getNext(var i: integer);
begin nprod:=succ(nprod); i:=nprod end;

procedure getNextNt(var i: integer);
begin nNonterm:=succ(nNonterm); i:=nNonterm end;

procedure rightRecursion(i,j, origin: integer);
{ACTION: replace a Repeat of symbol J with a new symbol I and 2 rules:
         I -> Null;   I -> J I
 CALLER: produce
 INPUT : origin points to the high-level nonterminal in Gt
}
var p:integer; {production number}
begin
  nameIndex[i-256]:=origin;
  getNext(p);
  prodList[p,0]:=i;
  prodList[p,1]:=nothing; {null production}
  getNext(p);
  prodList[p,0]:=i;
  prodList[p,1]:=j; prodList[p,2]:=i; prodList[p,3]:=nothing;
end;

procedure errorMess(m:str40);
begin
  writeln(m); halt;
end;

{Recursive procedure to expand the grammar:
 - At each opening brace, invent a new Nonterminal symbol, call the
   routine to write that symbol's expansion
 - At each vertical bar, terminate expansion, start new one for same symb
 - At each #, invent a new Nonterminal; give it the first expansion to nothing,
   the second expansion to "the symbol after # " + itself (tail recursion)
}

procedure produce (var ix:integer; nonterm,origin: integer);
{ACTION: expand a nested production at Gt[ix] into elementary prodList
 CALLER: productionList
 INPUT : nonterm = the nonterminal symbol to expand. Base index is 256 !
         origin  = backward index, to recover name of original nonterminal.
}
var n,k,iPlist,iNt,jNt: integer;  {iNt = index of nonterminal symbol}
    c:char;  oneAtom: boolean;
begin
  getNext(iPlist); (*iPlist = last index to prodList *)
  prodList[iPlist,0]:=nonterm;
  c:=gt[ix];
  nameIndex[nonterm-256]:=origin; {debug ! }
  n:=0; {count concatenated objects}
  { actList[iPlist,0]:=0; no actions reported}
  oneAtom:=(c<>start); {and(c<>sepa);  else, a true production}
  repeat
    if not oneAtom then begin ix:=succ(ix); c:=gt[ix]; end;
    { while c=escape do begin ix:=ix+2; c:=gt[ix] end; skip semantics}
    if c=separ then begin {start next production for same item}
      n:=succ(n); prodList[iPlist,n]:=nothing;
      getNext(iPlist); prodList[iPlist,0]:=nonterm; n:=0;
      { actList[iPlist,0]:=0; }
    end else if c=start then begin {recursion required ! }
      getNextNt(iNt);
      n:=succ(n); prodList[iPlist,n]:=iNt;  write('.');
      produce(ix,iNt, origin);
    end else if c=repet then begin {insert tail-recursive aux production}
      getNextNt(iNt);
      n:=succ(n); prodList[iPlist,n]:=iNt; { we insert I for # J }
      getNextNt(jNt); ix:=succ(ix);  write(';');
      produce(ix,jNt, origin); {do the expansions of J }
      {now insert I -> nothing and I -> J I }
      rightRecursion(iNt,jNt, origin);
    end else if c=termi then begin
      n:=succ(n); prodList[iPlist,n]:=nothing; {ix:=succ(ix); we are through}
    end else if c=escape then begin {action}
      ix:=succ(ix);
      n:=succ(n); prodList[iPlist,n]:=nulAction+ord(gt[ix]);
    end else if c='?' then begin {unknown Id of some type}
      ix:=succ(ix); c:=gt[ix]; k:=ord(c);
      n:=succ(n); prodList[iPlist,n]:=k;
    end else if c='&' then begin {known Id}
      ix:=succ(ix); c:=gt[ix]; k:=ord(c)+16;
      n:=succ(n); prodList[iPlist,n]:=k;
    end else if c=nontrm  then begin {named nonterminal}
      ix:=succ(ix); c:=gt[ix]; k:=256+ord(c);
      n:=succ(n); prodList[iPlist,n]:=k;
    end else if c>=' ' then begin {terminal symbol}
      k:=ord(c);
      n:=succ(n); prodList[iPlist,n]:=k;
    end else begin
      errorMess('Bad character: '+ c +'  in grammar table');
    end;
  until (c=termi) or oneAtom;
  if oneAtom then begin n:=succ(n); prodList[iPlist,n]:=nothing end;
end; {produce}

procedure productionList;
{ACTION: expand the grammar table Gt into elementary productions
 CALLER: chekGram
 INPUT : works on global tables only: Gt[Index[i]] = expansion of symbol i
}
var ix,i,k: integer;
begin
  nProd:=0; k:=1; while index[k]>0 do k:=succ(k); k:=k-1;
  nNonterm:=256+k; { last named nonterm symbol is here }
  for i:=1 to k do begin
    ix:=index[i]; {"produce" will destroy ix}
    write('*'); produce(ix, i+256, i);
  end;
  writeln;
  defProdLength;
  writeln('Number of productions: ',nProd);
  writeln('Number of nonterminals: ',nNonterm-256);
end;

end {unit gramTool}.
