{                         P32 - 32bit Pascal Compiler                        }
{ -------------------------------------------------------------------------- }
{                       Johan Prins - jprins@knoware.nl                      }
{ ========================================================================== }
{ Scanner and token-recognition  ( a.k.a. lexical analyser)                  }
{                                                   (c) Copyright 1996-1998  }
{                                                                            }
{ -------------------------------------------------------------------------- }
{ $DEFINE P32IDE}

unit P32_scan;

interface

uses P32_err, P32_prep;

const
     P32_version = 'v0.4d';
     {$I date.inc}                              {...compilation date...}

type
   Str14      = string[14];

   Token      = {pascal tokens...for internal handling}
   (_program,             _var,            _const,
      _type_,           _begin,            _while,
     _downto,              _do,           _repeat,
      _until,             _asm,              _end,

     _period,           _comma,             _plus,
      _minus,             _mul,              _div,
        _mod,              _at,              _ptr,
        _shl,             _shr,            _slash,
     _lparen,          _rparen,        _separator,
     _assign,           _equal,          _greater,
       _less,         _less_eq,       _greater_eq,
     _not_eq,           _colon,               _in,

         _if,            _then,             _else,
        _for,              _to,              _set,
  _procedure,        _function,
   _lbracket,        _rbracket,          _period2,
         _of,             _and,               _or,
        _xor,             _nil,
        _not,           _array,           _record,
     _string,        _external,        _assembler,
   _register,        _win32api,
        _far,            _near,        _interrupt,
     _inline,            _file,           _system,
       _goto,            _case,             _with,

      __unit,  _implementation,        _interface,
       _uses,           _break,         _continue,
       _exit,          _label_,         _absolute,
    _unknown, _string_constant, _integer_constant,
                _char_constant,    _real_constant,
       _name);

const

   MaxToken  = high(token);

   TokenName : array[token] of Str14 =
   (
   'PROGRAM',            'VAR',
     'CONST',           'TYPE',
     'BEGIN',          'WHILE',          'DOWNTO',
        'DO',         'REPEAT',           'UNTIL',
       'ASM',            'END',
         '.',              ',',               '+',
         '-',              '*',             'DIV',
       'MOD',              '@',               '^',
       'SHL',            'SHR',               '/',
         '(',              ')',               ';',
        ':=',              '=',               '>',
         '<',             '<=',              '>=',
        '<>',              ':',              'IN',
        'IF',
      'THEN',           'ELSE',             'FOR',
        'TO',            'SET',
        'PROCEDURE',        'FUNCTION',
         '[',              ']',              '..',
        'OF',             'AND',              'OR',
       'XOR',             'NIL',
       'NOT',           'ARRAY',          'RECORD',
    'STRING',        'EXTERNAL',       'ASSEMBLER',
  'REGISTER',        'WIN32API',
       'FAR',            'NEAR',       'INTERRUPT',
    'INLINE',            'FILE',      '__SYSTEM__',
      'GOTO',            'CASE',            'WITH',
      'UNIT',  'IMPLEMENTATION',       'INTERFACE',
      'USES',           'BREAK',        'CONTINUE',
      'EXIT',           'LABEL',        'ABSOLUTE',
          '',               '',                '',
          '',               '',                '');

var

   Look,Ahead            : char;                   {last read char}
   source, dest, incl    : ^text;                  {file-pointers}
   ProcName              : string;                 {current procedure}
   ProgramName           : string;                 {program name }

   upcase_string         : string;
   current_string        : string;
   current_token         : token;
   current_number        : longint;
   current_float         : double;

   previous_token        : token;

function  Numb(i:longint):string;
{converts a number to a string}
procedure GetChar;
{gets a char from sourcefile}
procedure SkipSpace;
{removes unwanted spaces}
procedure GetToken;
{gets a string from a file and analyses it...returns a token/name/number}
function  ToUpper(S : string): string;
{converts a string to uppercase}
function  GetName: string;
{gets a variable name from the sourcefile}
function  GetNumber: longint;
{gets a number from the sourcefile}
function GetLabel: string;   {LB for goto and labels}
{gets a label from the source file}
function GetFloat: double;
{gets a number from the sourcefile}
procedure Match(x: Token);
{'Eats' the current token and processes the next token}
function MaybeLabel(x:Token):boolean;
{Returns true if x is a pascal label (identifier or integer constant) }

implementation

uses P32_symb;

const
   Cr  = #13;
   Lf  = #10;
   Tab = ^I;
   HexCode   = '0123456789ABCDEF';

function numb(i : longint): string;
{converts number to string}
var
   s : string;
begin
   str(i, s);
   numb:=s;
end;

procedure GetChar;
begin
   Look:=Ahead;
   if not eof(source^) then read(Source^, Ahead)
                       else Look := '.';
   if Look = #13 then begin
                        inc(LineCount);
                        {$IFDEF P32IDE}
                        ShowLineNum;
                        {$ENDIF}
                      end;
end;

procedure SkipSpace;
begin
   while (look in [Cr, Lf, Tab, ' '] ) and (not eof(source^)) do
     GetChar;
end;

Procedure EatComment;
var last: char;
Begin
  while ((last<>'*') or (look <> ')')) and
        ((last<>'*') or (look <> '/')) and
        (look <> '}') and
        not eof(source^) do
  begin
    last := look;
    GetChar;
    Case look of
      '{': EatComment;
      '(': Begin
             Getchar;
             IF look='*' then
               EatComment;
           end;
    end;
  end;
  GetChar;
end;

procedure GetToken;

label
   restart;
var
   tmp:string;
   code:integer;
   i    : token;
   j    : word;
   o    : longint;
   open, X   : boolean;
   tmpchar, last: char;

begin
RESTART:
        code:=0;
        previous_token := current_token;
        Current_String := '';
        Current_Token  := _Unknown;
        Current_Number := 0;
        SkipSpace;
        case Look of
        '{'  :    begin {normal comments}
                    getchar;
                    if look='$' then DoDirectives;
                    if NestedComments then
                      EatComment
                    else
                      begin
                        while (Look <> '}') and not eof(source^) do getchar;
                        getchar; {eat one extra}
                      end;
                    goto Restart;
                  end;
        '/'  :    begin
                    getchar;
                    current_token := _slash;
                    if CstyleComments then {C-style comment}
                      begin
                        case Look of
                        '/' : begin
                                o:=LineCount;
                                while (LineCount = o) and not eof(source^) do getchar;
                                goto Restart;
                              end;
                        '*' : begin
                                EatComment;
                                goto Restart;
                              end;
                        end;
                      end;
                  end;
        '-'  :    begin  {ADA-style, one line comment}
                    getchar;
                    if Look = '-' then
                      begin
                        o:=LineCount;
                        while (LineCount = o) and not eof(source^) do getchar;
                        goto Restart;
                      end
                    else
                      current_token := _minus;
                  end;
        '('  :    begin {old-style comments}
                    getchar;
                    if look = '*' then
                      begin
                        EatComment;
                        goto Restart;
                      end
                    else
                      current_token := _lparen;
                  end;
        '#',
        ''''    : begin {string constant}
                    current_string := '';
                    X := false;
                    open:=false;
                    repeat
                      case look of
                      cr    : abort('String exceeds line');
                      '#'   : begin
                                getchar;
                                tmp:='';
                                while (upcase(Look) in ['$','0'..'9', 'A'..'F'] ) do
                                begin
                                  tmp:=tmp+look;
                                  GetChar;
                                end;
                                val(tmp, current_number, code);
                                current_string:=current_string+chr(current_number);
                                open:=false;
                              end;
                      ''''  : begin
                                getchar;
                                repeat
                                  case look of
                                  cr    : abort('String exceeds line');
                                  ''''  : begin
                                            getchar;
                                            if look<>'''' then
                                              begin
                                                open:=false;
                                                break;
                                              end
                                            else begin
                                                   current_string := current_string + look;
                                                   getchar;
                                                   open:=true;
                                                 end;
                                          end;
                                  else begin
                                         current_string := current_string + look;
                                         getchar;
                                         open:=true;
                                       end;
                                  end;
                                until false;
                              end;
                      else
                      current_string := current_string + look;
                      getchar;
                      open:=true;
                      end;
                    until (not open) and not (look in ['#','''']);
                    current_token := _string_constant;
                    if length(current_string)=1 then current_token:=_char_constant;
                  end;
        '$'     : begin {hex-number}
                    current_string:=look;
                    getchar;
                    while (upcase(Look) in ['$','0'..'9', 'A'..'F'] ) do
                    begin
                      current_string:=current_string+look;
                      GetChar;
                    end;
                    val(current_string, current_number, code);
                    Current_Token := _integer_constant;
                 end;
    '-'        : begin
                   current_string:='-';
                   current_token:=_minus; {just a normal '-'}
                   GetChar;
                 end;
       '0'..'9': begin {number}
                  while Look in ['0'..'9'] do
                  begin
                    current_string:=current_string+look;
                    GetChar;
                  end;
                  if (Look='.') or (upcase(Look)='E') then
                    begin
                     if (Ahead='.') then begin {subrange, like: 1..100}
                                           val(current_string, current_Number, code);
                                           current_token := _integer_constant;
                                         end
                     else                begin
                                           if (upcase(Look) in ['.','0'..'9','E'{,'-'}]) then
                                           begin {real constant: 3. or 2.0}
                                             current_string:=current_string+look;
                                             GetChar;
                                             while (upcase(Look) in ['0'..'9']) do
                                               begin
                                                 current_string:=current_string+look;
                                                 GetChar;
                                              end;
                                             if (upcase(look) = 'E') then
                                               begin
                                                 current_string:=current_string+look;
                                                 GetChar;
                                                 while (upcase(Look) in ['0'..'9','-']) do
                                                   begin
                                                     current_string:=current_string+look;
                                                     GetChar;
                                                   end;
                                               end;
                                             if (look='h') or (look='h') then
                                               begin
                                                 {delete(current_string,length(current_string),1);}
                                                 GetChar; {eat the 'h'}
                                                 val('$'+numb(current_number)+current_string, current_number, code);
                                                 current_token := _integer_constant;
                                               end
                                             else
                                               begin
                                                 val(current_string, current_float, code);
                                                 current_token := _real_constant;
                                               end;
                                           end;
                                         end
                     end
                  else begin
                         val(current_string,Current_Number,code);
                         current_token := _integer_constant;
                       end;
                end;
    '_',
    'A'..'Z',
    'a'..'z'  : begin {identifier}
                  while Look in ['_', '0'..'9','A'..'Z','a'..'z' ] do
                  begin
                    Current_String := current_string + look;
                    GetChar;
                  end;
                    for i := low(i) to MaxToken do
                      if ToUpper(Current_String) = TokenName[i] then
                      begin
                        Current_Token := i;
                        {goto done}
                      end;
                  if Current_Token = _unknown then Current_Token := _name;
                end;
    else        Current_String := Look;
                GetChar;
                repeat
                  J := 0;
                  for i := low(i) to MaxToken do
                    if ToUpper(Current_string + Look) = TokenName[i] then J := word(i);
                    if J <> 0 then begin
                                     Current_String := Current_String + Look;
                                     GetChar;
                                   end;
                until J = 0;
                for i := low(i) to MaxToken do
                  if ToUpper(Current_String) = TokenName[i] then  J := word(i);
                  Current_Token := Token(j);
    end;
    upcase_string:=ToUpper(current_string);
if code<>0 then Error('Invalid number');
end;

function ToUpper(s:string): string;
var
  I: byte;

begin
  for I := 1 to Length(s) do if S[I] in ['a'..'z'] then dec(S[i], 32);
  ToUpper := S;
end;

function GetName: string;
begin
  if Current_Token = _Name then GetName := '_' + ToUpper (Current_String)
   else
    Expected ('identifier');
   GetToken;
 end;

function GetNumber: longint;
var code:integer;
begin
  current_string:='';
  if upcase(look) in ['A'..'F','H'] then
  begin
    while (upcase(Look) in ['$','0'..'9', 'A'..'F'] ) do
      begin
        current_string:=current_string+look;
        GetChar;
      end;
    if upcase(look)='H' then GetChar;
    val('$'+numb(current_number)+current_string, current_number, code);
  end;
  {Current_Token := _integer_constant;}
  GetNumber := Current_Number;
  GetToken;
end;

function GetFloat: double;
begin
  GetFloat:=Current_Float;
  GetToken;
end;

function GetLabel: string;   {LB for goto and labels}
var s:string;    {?reduce this to string[20]}
begin
  if Current_Token = _Name then GetLabel := GetName
   else
  if Current_Token = _integer_constant then
    begin {convert integer label to string}
    {str(current_number:0,s);}
    GetLabel := '_' + Numb(GetNumber);
    end
   else
    Expected ('identifier or integer constant');
{   GetToken;}
 end;

procedure Match(x: Token);
{'Eats' the current token and processes the next token}
begin
  if Current_Token <> X then
    begin
      if X <= MaxToken then Expected(TokenName[X])
                             else Abort('Unknown token, compiler error!');
    end
  else
    GetToken;
end;

function MaybeLabel(x:Token):boolean;
{Returns true if x is a pascal label (identifier or integer constant) }
{Standard pascal only allows an integer constant, BP extends this to  }
{allow identifiers as well, both followed by a colon. x=current_token }

Begin
  MaybeLabel := (x=_integer_constant) or (x=_name);
end;

begin
end.