(*[72457,2131]
TXTSEK.PAS                06-Oct-85 7075               184

    Keywords: MSDOS PCDOS TEXT INDEX RANDOM ACCESS FILES

    This program demonstrates a technique to index a sequential text file.
    After indexing, any line of the text file can be accessed randomly by line
    number. This may be useful for text sort programs, text-based databases,
    large file editors or anywhere that disk paging of text files is required.
    Written with PCDOS TP 3.01A, should run on MSDOS as is, CP/M with minor
    modifications. Capacity as it stands is for files of up to 9000 lines and
    2.3 Mbytes.
*)

PROGRAM textseek;

    {
    Demonstrate a technique for making a normal text file into a fully indexed
    file, where any line can be read randomly by line number. Each line must
    be terminated by at least a <LF>.

    Useful for text sort programs, large file editors, text databases, etc.

    Should run on any PCDOS or MSDOS Turbo 3.0 (developed on IBM).

    Written 10/5/85, Kim Kokkonen, TurboPower Software.
    408-378-3672, CompuServe 72457,2131.

    Released to the public domain.
    }

  CONST
    {maxlines*maxlength gives the maximum filesize, here about 2.3 megabytes}
    MaxLines = 9000;          {limited by 7*maxlines<=65536}
    MaxLength = 255;          {max length of a given line, limited to 255}
    BufSize = 4096;           {number of bytes per blockread}

  TYPE
    TextString = STRING[MaxLength];
    PathName = STRING[64];
    FilePointer = RECORD
                    SeekTo : Real;
                    LenToRead : Byte;
                  END;
    FileIndexArray = ARRAY[1..MaxLines] OF FilePointer;
    FileIndexPtr = ^FileIndexArray;
    TextBuffer = ARRAY[1..BufSize] OF Char;
    TextBufferPtr = ^TextBuffer;

    {following record carries all information about the indexed text file}
    {requires 97 bytes in the segment where its var is located}
    {requires 7*maxlines+bufsize on the heap}
    IndexedFile =
    RECORD
      fil : FILE;             {untyped file is critical for this application}
      EndOfFile : Boolean;    {true when all of file read}
      LineNum : Integer;      {last line read in}
      FilePosition : Real;    {current byte position in file during readin}
      Buffer : TextBufferPtr; {pointer to buffer for this file}
      BufPos : Integer;       {position in current buffer}
      BytesRead : Integer;    {number read in last blockread}
      index : FileIndexPtr;   {pointer to file index}
    END;

  VAR
    fname : PathName;
    F : IndexedFile;
    L : TextString;
    LineToSeek : Integer;
    Success : Boolean;

  FUNCTION Cardinal(i : Integer) : Real;
      {-return positive real 0<=r<=65535}
    VAR
      r : Real;
    BEGIN
      r := i;
      IF r < 0 THEN r := r+65536.0;
      Cardinal := r;
    END;                      {cardinal}

  PROCEDURE OpenFile(fname : PathName;
                     VAR F : IndexedFile;
                     VAR Success : Boolean);
      {-open an indexed textfile, return true if successful}
    BEGIN
      WITH F DO BEGIN

        {open the physical file}
        Assign(fil, fname);
        {$I-} Reset(fil, 1) {$I+} ;
        Success := (IOResult = 0);
        IF NOT(Success) THEN Exit;
        EndOfFile := False;

        {allocate the file buffer}
        Success := 16.0*Cardinal(MaxAvail) > Cardinal(SizeOf(TextBuffer));
        IF NOT(Success) THEN BEGIN
          Close(fil);
          Exit;
        END;
        New(Buffer);
        BytesRead := 0;
        BufPos := 1;          {force blockread the first time}

        {allocate the lines array}
        Success := 16.0*Cardinal(MaxAvail) > Cardinal(SizeOf(FileIndexArray));
        IF NOT(Success) THEN BEGIN
          Close(fil);
          Exit;
        END;
        New(index);
        LineNum := 0;
        FilePosition := 0.0;

      END;
    END;                      {openfile}

  PROCEDURE CloseFile(VAR F : IndexedFile);
      {-free up dynamic space and close physical file}
    BEGIN
      WITH F DO BEGIN
        Close(fil);
        Dispose(index);
        Dispose(Buffer);
      END;
    END;                      {closefile}

  PROCEDURE ReadNewLine(VAR F : IndexedFile; VAR L : TextString);
      {-read a text line and store information for later random access}
    VAR
      EndOfLine : Boolean;
      lpos, terminators : Integer;
      ch : Char;
    BEGIN
      WITH F DO BEGIN
        EndOfLine := False;
        lpos := 0;
        terminators := 1;

        {look at characters until end of line found}
        REPEAT

          IF BufPos > BytesRead THEN BEGIN
            {get another buffer full}
            BlockRead(fil, Buffer^, BufSize, BytesRead);
            BufPos := 1;
          END;

          IF BytesRead = 0 THEN
            ch := ^Z
          ELSE BEGIN
            ch := Buffer^[BufPos];
            BufPos := Succ(BufPos);
          END;

          CASE ch OF
            ^M : terminators := Succ(terminators);
            ^J : EndOfLine := True;
            ^Z : BEGIN
                   EndOfLine := True;
                   EndOfFile := True;
                 END;
          ELSE
            IF lpos < MaxLength THEN BEGIN
              lpos := Succ(lpos);
              L[lpos] := ch;
            END;
          END;

        UNTIL EndOfLine;

        {finish up line}
        L[0] := Chr(lpos);

        {store info for later random access}
        IF LineNum < MaxLines THEN BEGIN
          LineNum := Succ(LineNum);
          WITH index^[LineNum] DO BEGIN
            SeekTo := FilePosition;
            LenToRead := lpos;
          END;
          FilePosition := FilePosition+lpos+terminators;
        END;

      END;
    END;                      {readnewline}

  PROCEDURE ReadIndexedLine(VAR F : IndexedFile;
                            VAR Num : Integer;
                            VAR L : TextString);
      {-get an indexed line from f}
      {increment num for quasi-sequential access}
    BEGIN
      WITH F DO BEGIN
        IF (Num > 0) AND (Num < LineNum) THEN BEGIN
          WITH index^[Num] DO BEGIN
            LongSeek(fil, SeekTo);
            BlockRead(fil, L[1], LenToRead);
            L[0] := Chr(LenToRead);
          END;
          Num := Succ(Num);
        END ELSE BEGIN
          L := '';
          Num := 0;
        END;
      END;
    END;                      {readindexedline}

  PROCEDURE BuildFileIndex(fname : PathName;
                           VAR F : IndexedFile;
                           VAR Success : Boolean);
      {-read a text file and define index pointers for random access}
    VAR
      L : TextString;
    BEGIN
      OpenFile(fname, F, Success);
      IF NOT(Success) THEN Exit;
      {read it all to get the filepointers}
      {ignore the returned string l in this case}
      WHILE NOT(F.EndOfFile) DO ReadNewLine(F, L);
    END;                      {buildfilepointers}

  BEGIN

    {get a file to read, and read it in}
    Write('Enter text file name to read: ');
    ReadLn(fname);
    WriteLn('Reading');
    BuildFileIndex(fname, F, Success);
    IF NOT(Success) THEN BEGIN
      WriteLn('could not build index...');
      Halt;
    END;

    {demonstrate random access}
    REPEAT
      Write('Enter any linenumber (0 to quit, max ', F.LineNum, '): ');
      ReadLn(LineToSeek);
      IF LineToSeek > 0 THEN BEGIN
        ReadIndexedLine(F, LineToSeek, L);
        WriteLn(L);
      END;
    UNTIL LineToSeek = 0;

    CloseFile(F);

  END.
