{   Copyright (c) 1991 by Georg Post     }

unit pcpcdata;    {Global data declarations for almost all units of PCPC}

{$S+,R+}

interface
const lbuf=58000;  {58k source text buffer = max size of input PAS file}
      lenIde=15;   {significant length of identifiers}
      maxLine=80;
      dictSize=200;{max number of semantic actions}
      nonterms=50; {max number of nonterminal symbols}
      GtSize=2400; {size of the table Gt}

    maxInteg=32767;
    minInteg=-32767;
    maxWord=65535;
type
  action= {MUST BE SAME TEXT as in grammar file .
           These are CASE labels, and "forbidden" is the last one! }
(ifThen,     elseDo,     endIf,      doLoop,     doWhile,    endWhile,
 endRep,     caseSel,    blockEntry, blockBegin, blockEnd,   pointTo,
 subField,   arrayAddr,  getAddr,    pushVar,    pushConst,  assignmt,
 addit,      subtr,      multi,      divis,      modulo,     negate,
 andLog,     orLog,      notLog,     equal,      unEqual,    greater,
 less,       greaterEqu, lessEqu,    arrayDef,   midIndex,   lastIndex,
 arrayEnd,   recordDef,  newField,   fieldType,  recordEnd,  ptrDef,
 linkToPtr,  typeDef,    enumDef,    newConst,   enumEnd,    subranDef,
 subranEnd,  typeName,   typeEnd,    firstVar,   nextVar,    tpVarList,
 constIde,   assignCon,  stringCon,  plus,       minus,      constRef,
 intConst,   endConst,   nilSymbol,  paramDef,   paramTp,    paramEnd,
 procEnd,    functEnd,   functName,  functCall,  procCall,   pushParam,
 doCall,     forInit,    comparTo,   increment,  comparDown, decrement,
 caseFirst,  caseNext,   caseLast,   caseEnd,    caseOther,  caseTerm,
 withFirst,  withNext,   withReleas, withAddr,   assignDef,  parAddr,
 valpAddr,   typeBool,   typeByte,   typeChar,   typeInt,    typeWord,
 typeLong,   typeReal,   typeDoub,   typeText,   typePoin,   typeStr,
 maxiStr,    trueSymb,   falsSymb,   forwDecl,   terminator, repLoop,
 beginSymb,  endSymb,    parenthes,  shfLeft,    shfRight,   xorLog,
 setDef,     interval,   setList,    setMake,    inOper,     formData,
 fmtOne,     fmtTwo,     fileRef,    doRead,     doRdLn,     doWrite,
 doWriLn,    rdVar,      wrFmt,      numToStr,   fileAddr,   memAlloc,
 typeShort,  dotSymb,    typeNull,   typeSet,    tpSetEnd,   mainFile,
 unitFile,   implPart,   useUnit,    mainPrgr,   linkTname,  intLabel,
 intJump,    intLbDef,   symbLabel,  symbJump,   symbLbDef,  normUnit,
 optional,   typeFile,   tpFlEnd,    forDef,     typeCast,   valCast,
 relaxTp,    dummyTp,    isMissing,  doProg,     doUnit,     doIntf,
 typConst,   initUnit,   getPtr,     getSize,    gotoExit,   setcoInit,
 setcoOne,   setcoTwo,   setcoTerm,  tradCons,   tradOpen,   tradSepa,
 tradClos,   assignTC,   typeSize,   typeComp,   typeExte,   mkVariant,
 extDecla,   intfDecl,   kwMem,      kwMemw,     kwMeml,     memArray,
 kwPort,     kwPortw,    ioPort,     caseIntv,   ignore,     forbidden
);

    idClass=  {13 identifier classes:}
(constId,    typeId,     fieldId,    varId,      functId,    procId,
 varParId,   valParId,   fileId,     forwId,     unitId,     otherId,
 labelId
);
{  forward Identifiers : for  pointers declared before their base types}

  str8=string[8];
  str20=string[20];
  str40=string[40];
  str80=string[80];
  str255=string[255];

  symbol=packed array[1..lenIde] of char;
  dict= array[1..dictSize] of symbol;
  textbuff= packed array [1..lbuf] of char;
  ptextbuff=^textbuff;

  pide = ^ide; {identifier pointer}
  ptpel=^tpel; {pointer to Type Elements: for the Semantic unit}

  tpel= record     {29 bytes type info}
      cl: char;    {"class" marker of type element}
      l: longint;  {length,multiplier}
      m: longInt;  {for subranges >32000}
      hook,        {link for memory management}
      p,q: ptpel;  {variable meaning}
      tName: pide; {type Id for named type, Id of 1st field for records}
      ixName:pide; {index constant identifier for subrange type}
  end;

{the data in tpel type elements are:
    cl              p             q                l            m
 R  Record/Field field type    rest Record/Field   total size   variant #
 Y  arraY        1st Index     base or rest Array
 P  Pointer      pointed tp    Nil
 S  Subrange     base type     Nil
 E  Enumerated   Nil           Nil
 F  file         data type
 U  fUnction     result tp     1st param (A/L)   | l = sizeof parameter space
 D  proceDure    Nil           1st param         | m = nb of optional params
 A  vArparameter base type     next param          m=0: strict, m=1: weak type
 L  vaLparameter base type     next param          m=0: strict, m=1: weak type
 O  anyOrdinal   specific tp

                  lower case: standard predeclared types
 b  boolean
 c  char
 y  byte
 i  integer
 w  word
 l  longint
 z  single
 r  real
 d  double
 x  extended
 k  comp
 s  string
 t  text
 p  anyPointer
 e  set          base type
 h  short
 a  arrayOfChar
 *  wildcard for any type

Convention: "Weak types", allowed for System procedure declarations only:
  ::File:      F t
  ::Integer:   c b y h E i w l (ordinal)
  ::Char:      b c y h E       (1 byte )
  ::Word:      i w             (2 bytes)
  ::Real:      all float and integer types.
  ::Pointer:   any kind of pointer

For function results, weak type means that the result has same type as the
(weakly typed) first argument: Succ Pred Abs...
}

  ide=record      {data structure for identifiers = symbol table entries}
     name: symbol;
     class:idClass; {one of the 9 identifier markers}
     defLevel,    { scope level where the Id was defined}
     rScope,      { nested scope for record fields, will enter comparison}
     qualif,      { unit qualifier, compared for idLevel zero and One ? }
     reUse:  byte;{ flag global reuse of external global symbol}
     x: longInt;  { constant value }
     y: integer;  { see below }
     typof:ptpel; { every Id points to a valid type info}
     rg,lf,chain :pide; {right-left linking, binary tree}
  end;

{Meaning of y for different id classes:

  procId,functId:   y=0 normal,  y=1 undefined, y=2 forward,
                    y=3 external y=4 interface.
  const,type Id:        y = the scope of definition
  var,field,parameter:  y=serial number inside lists.
}

  rsymbol=record ix,len,flag:integer end;
    {index,length and "non-reserved" flag of a keyword stored in Gt}
    {truly reserved keywords are all uppercase in the source file  }

  textLine= array[1..maxLine] of char;

  scanStatus=record {data for last scanner response}
      pp,pput: pide;  {pp=pointer to entry in symb table. pput= its parent}
      ii: longInt; cc: char;
      redefKey: integer; {is >0 if the item is a redefinable keyword}
      rr:real; isFloat:boolean; {if isFloat, rr is the value}
      chain: textLine;
      chainLen: integer;  {string if any}
      oldAction,newAction: action; {the last 2 semantic actions}
  end;

  scopeData = record {global semantic data for symbol management }
        {nested procedure scopes, record field & unit scopes}
    actual,       {actual block scope level}
    currentUnit,  {current unit number, 1=system, 2=crt, 3=dos, ... ? }
    recIndex : integer;  { max index of recScope table }
    maxRecords,   { a "serial number" counter of record definitions}
    markId: byte; { current active record number, goes into identifiers}
    publicPart,   { is True while in the INTERFACE section of a Unit}
    fieldOnly: boolean; {Id search restriction to fields}
    pstart: array[0..20] of pide; {identifier list root, one per level}
    tstart: array[0..20] of ptpel; {private type list, linked with "hook"}
    recScope : array[0..20] of integer;
    variantNb: array[0..20] of byte; {in each record, variant serial # }
{       recScope is a stack (valid 0..recIndex) used in unit Semantic:
        when we enter a Record, recScope[recIndex]:=markId
        is the Id-scope number we put into new (field) identifiers.
}
   idLevel,idUnit :integer;
       {level & unit of last ID if it's in table: idLevel <= actual }
  end;

  withStackData=record
       nWith,wsp: integer; {nWith=the number of active With statements}
       typ: array[0..10] of ptpel; {types of With arguments}
       snb,nwi: array[0..10] of integer; {serial numbers, copy of nwith}
       {these are stack-like data, wsp is the stack pointer}
   end;

  symbolData=record {status of search in symbol table}
    db:word;
    ln:integer; {start,length of last parsed id or string, in src^}
    smaller,  { comparison result of preceding search move}
    small,    { aux variable for next search move}
    found: boolean;   {identifier found in the table}
  end;

var
  actionList, idTypeList: dict; {dictionaries for syntax file}
  className: array[1..31] of idClass;  {inverse of ord(*) }
  actionName:array[1..dictSize] of action;
  objectList: dict; {nonterminal objects for syntax file}
  nbAction,nbObject,nbType, lastSymbol: integer;
  nulch,tab,cr,lf, endsym:char; {nulch=file terminator}
  Gt: packed array[1..GtSize] of char; {grammar table}
  index: array[1 ..nonterms] of integer; {nonterminal symbols index}
  smb: array[1..dictSize] of rsymbol; {reserved symbols table}
  scope:scopeData;
  WithStack: withStackData;
  tooManyErrors: boolean;          { condition errCount>maxErr }
  errline ,errcount, warncount, fatals, maxerr: integer;
    { counters and maximum allowed, for syntax analysis}
    { errline counts the current line, even if no error}
  miniStak,maxiStak, stakLine: word; {extreme values of SPtr }

procedure readGram(name: str40; var entry,interfa:integer; var src:textBuff);

implementation

{****  Grammar table construction ********}

const escape='_';
var gram:text;

function lowercase(c:char):boolean;
begin lowercase:=('a'<=c)and(c<='z') end;

function uppercase(c:char):boolean;
begin uppercase:=('A'<=c)and(c<='Z') end;

function numeric(c:char):boolean;
begin numeric:=('0'<=c)and(c<='9') end;

function alfaNum(c:char):boolean;
begin alfaNum:=lowercase(c) or uppercase(c) or (c='_') or numeric(c) end;

(*
procedure showGt;
{ ACTION: debugging. ugly screen output of the content of Gt
  CALLER: readGram ?
  INPUT : global arrays Gt, actionList,idTypeList,objectList.
}
var i,j,k,x,y,mode:integer; c:char;
begin i:=1;  mode:=0;
  while gt[i]<>chr(0) do begin c:=gt[i]; i:=succ(i);
    if (i mod 100)=0 then readln;
    if mode=0 then begin
      if c=escape then mode:=1
      else if (c='?')or(c='&') then mode:=2
      else if c='~' then mode:=3;  {we have nonterminal}
      if (c>'~') then begin {terminal symbol}
        k:=ord(c)-127;x:=smb[k].ix; y:=smb[k].len;
        for j:=x to x+y-1 do write(gt[j]); {copy the special symbol}
      end else write(c);
    end else begin
      if mode=1 then write(actionList[ord(c)]);
      if mode=2 then write(idTypeList[ord(c)]);
      if mode=3 then write(objectList[ord(c)]);
      mode:=0;
    end;
  end;
  writeln;
end;
*)

function ixChar(s:symbol; var d:dict; max,offset:integer):char;
{ ACTION: search for symbol s in dictionary d.
  CALLER: readGram (3 places)
  OUTPUT: the index, as a char (offset allowed).
          returns zero if not found
}
var i:integer;
begin i:=0;
  repeat i:=i+1 until (d[i]=s)or(i>=max);
  if (d[i]<>s) then begin i:=0;
    writeln('ERROR: ',s,' not found.');
  end;
  ixChar:=chr(i+offset);
end;

procedure takeWord(k:integer; var s:symbol);
{ ACTION: copy a word from Gt to s
  CALLER: readGram
  INPUT : k = indirect address for entry in Gt (via global smb[k])
}
var x,l,j: integer;
begin
  x:=smb[k].ix; l:=smb[k].len; j:=1;
  repeat s[j]:=gt[x]; j:=succ(j); x:=succ(x) until (j>lenIde)or(j>l);
  for x:=j to lenIde do s[x]:=' ';
end;

(* Structure of the grammar table Gt:
  First:  the list of all keyword strings ! Then a mass of:
  metasymbols {|}# $ ' %  ~_ and terminal one-char symbols
  _n semantic actions n=1..255
  ?n &n class of new/old identifier  n=1..15
  ~n  reference to other nonterminal n=1..255.
  n (>#127)  reference to keyword or multi-char symbol
*)

procedure getgramsymb(var c:char);
{ ACTION: gets start of symbol in Gt, skips separators, marks EOF by !
  CALLER: readlist, readgramnumb, readgram
  OUTPUT: c= the first char of a valid symbol.
}
var d,e:char;
begin c:=' ';
  repeat
    if eof(gram) then c:='!'
    else if eoln(gram) then readln(gram)
    else begin read(gram,d);
      if d='!' then
        while not (eoln(gram) or eof(gram)) do read(gram,e);
      if not((d=' ')or(d=tab)or(d='!')) then c:=d;
    end; {else}
  until c<>' ';
  if c='~' then c:='!'; {end of section mark ! }
end;

procedure readGramId(var s:symbol; var c:char);
{ ACTION:  fetch an identifier s in file Gram
  CALLER:  readList, readGram
  INPUT :  c is the last char read from file
  OUTPUT:  c is the following char
}
var i,k:integer;
begin i:=1;
  while alfaNum(c) do begin
    if i<=lenIde then begin s[i]:=c; i:=succ(i); end;
    if eoln(gram) then begin
      readln(gram); c:=' ';
    end else read(gram,c);
  end;
  if c=' ' then getgramsymb(c) { skip white space };
  for k:=i to lenIde do s[k]:=' ';
end;

procedure readList(var tb:dict; var size:integer; var c:char);
{ ACTION: fetch a list of identifiers ( i1 , i2 , ..., ilast ) from file Gram
  CALLER: readGram
  INPUT : last char c read in Gram
  OUTPUT: following char c
}
var stop:boolean;
begin
  while c<>'(' do getgramsymb(c); getgramsymb(c);
  size:=0; {is past the ( of enumerate list}
  repeat
    size:=succ(size); readGramId(tb[size],c);
    {debug write(' ',size,':',tb[size]); }
    stop:=(c=')');
    getgramsymb(c);
  until stop;
end;

(*
procedure readgramnumb(var i:integer; var c:char);
begin i:=0;
  while numeric(c) do begin i:=10*i+ord(c)-ord('0'); getgramsymb(c) end;
end;
*)

procedure readTrailer(var source:text; var src:textbuff);
{ ACTION: transfer rest of file Source into buffer Src^. Newline stored as Lf.
  CALLER: readGram.
  INPUT : file Source, open for input. Global pointer Src (to 60 K buffer)
  OUTPUT: reset globals: texpo, errline, olderrl, texl.
}
var c:char; t:word;

procedure putSrc(c:char);
begin src[t]:=c; t:=succ(t) end;

begin
  t:=1;
  while not eof(source) do begin
    if eoln(source) then begin
      readln(source); putSrc(lf);
      {debug writeln; }
    end else begin
      read(source,c); if c<>nulch then putSrc(c);
      {debug write(c); }
    end;
  end; putSrc(nulch);
end;

procedure readGram(name: str40; var entry,interfa:integer; var src:textBuff);
{ ACTION:  fills tables Gt, Index, Smb from disk file "name" (grammar5.txt).
  CALLER:  main program init part (Pcpc.Translate)
  OUTPUT:  Index[entry] will be the entry point of the Pascal syntax structure
           Index[Interfa] the one for scanning of interface parts
}
var c,d:char; {last valid symbol}
    i,j,k,l :integer;
    ok:boolean;
    s,reserve,rword:symbol;

begin {first symbol tables, then syntax formulae, the rest: trailer}
  assign(gram,name);
  {$I-} reset(gram); {$I+}
  if ioResult<>0 then begin
    writeln; writeln('Cannot find grammar: ',name); halt;
  end;
  readList(actionList,nbAction,c);
  readList(objectList,nbObject,c);
  readList(idTypeList,nbType,  c);
  i:=0; repeat i:=succ(i) until objectList[i]='goodFile       ';
  entry:=i;
   { debug writeln('Entry object number: ',i ); }
  i:=0; repeat i:=succ(i) until objectList[i]='intrFace       ';
  interfa:=i;
  i:=0;j:=1; {j is position in gt = grammar table}
  while c<>'|' do begin {double | is end mark for symbols}
    i:=succ(i);
    with smb[i] do begin ix:=j;len:=0;
      repeat d:=c; gt[j]:=upcase(c); j:=succ(j);
        len:=succ(len);getgramsymb(c);
      until c='|';
      if d>='a' then flag:=1 else flag:=0; {1 = redefinable std word}
    end; {with}
    getgramsymb(c);
  end; {while}
  for k:=1 to i do with smb[k] do begin {truncate lengths}
    if len>lenIde then len:=lenIde;
  end;
  lastSymbol:=i;
    {debug writeln(i,' reserved symbols read.'); }
  i:=succ(i); smb[i].len:=0;smb[i].ix:=0; {end mark}
   { here, j= next free pos in gt. Start grammar section}
  getgramsymb(c);
  repeat
    if c='"' then begin {new nonterminal definition}
      getgramsymb(c); readGramId(s,c);
      {debug writeln; writeln('"',s);}
      c:=ixchar(s,objectList,nbObject,0);
      index[ord(c)]:=j;
      getgramsymb(c); {skip " }
    end else if c=escape then {copy compiler directive} begin
      gt[j]:=c;j:=succ(j);getgramsymb(c);
      readGramId(s,c);
      {debug write(' _',s);}
      gt[j]:=ixchar(s,actionList,nbAction,0); j:=succ(j);
    end else if (c='?')or(c='&') then begin {copy new/old id specifier}
      gt[j]:=c;j:=succ(j); getgramsymb(c); readGramId(s,c);
      gt[j]:=ixchar(s,idTypeList,nbType,0); j:=succ(j);
    end else if lowercase(c) then begin {reference to syntax object }
      readGramId(s,c);
      gt[j]:='~'; j:=succ(j);
      gt[j]:=ixchar(s,objectList,nbObject,0); j:=succ(j);
    end else if not uppercase(c) then begin {ordinary symbol?}
      d:=c;getgramsymb(c); i:=0; {check .. <= etc, quick & dirty }
      if(d='.')and(c='.') then i:=1
      else if (d=':')and(c='=') then i:=2
      else if (d='>')and(c='=') then i:=3
      else if (d='<')and(c='=') then i:=4
      else if (d='<')and(c='>') then i:=5;
      if i>0 then begin getgramsymb(c); d:=chr(127+i) end;
      gt[j]:=d; j:=succ(j)
    end else begin {reserved uppercase word, }
      k:=5; {first alpha entry comes after 5 two-char symbols }
      repeat k:=succ(k); l:=smb[k].ix until gt[l]=c;{fast preselect, 1st char}
      readGramId(rWord,c); {the reserved word}
      repeat takeWord(k,reserve) ; ok:=(reserve=rWord);
        if not ok then k:=succ(k);
      until ok;
      gt[j]:=chr(127+k); j:=succ(j);
    end; {upper case}
  until c='!'; {end of file}
  gt[j]:=chr(0); {end mark}
   {debug  writeln('Grammar table size = ',j); }
  readTrailer(gram,src);
  close(gram);
  {debug  showGt; }
end; {readgram}

end.