
{--------------------------------------------------------------}
{                           WordStat                           }
{                                                              }
{     Word Counter & Word Length Tabulator for TextFiles       }
{                                                              }
{                              by Jeff Duntemann               }
{                              and Hugh Kenner                 }
{                              Turbo Pascal V3.0               }
{                              Last update 3/20/86             }
{                                                              }
{    From the book, COMPLETE TURBO PASCAL, by Jeff Duntemann   }
{    Scott, Foresman & Co. (c) 1986,1987  ISBN 0-673-18600-8   }
{--------------------------------------------------------------}

PROGRAM WordStat;

CONST
  PrintWidth  = 68;
  Tab         = #9;


TYPE
  Array40     = ARRAY[0..40] OF Integer;
  String80    = String[80];

VAR
  I,J         : Integer;
  Scale       : Real;
  Ch          : Char;
  Opened      : Boolean;
  TestFile    : Text;
  FName       : String80;
  Counters    : Array40;
  Line        : String80;
  AWord       : String80;
  WordLength  : Integer;
  LineCount   : Integer;
  WhiteSpace  : SET OF Char;
  GoodChars   : SET OF Char;


PROCEDURE KillJunk(VAR AString : String80);

BEGIN
  WhiteSpace := [#8,#9,#10,#12,#13,#32];
  GoodChars  := ['A'..'Z','a'..'z','0'..'9'];
  REPEAT        { Clean up leading end of word }
    IF Length(AString) > 0 THEN
      IF (AString[1] IN WhiteSpace) OR (NOT(AString[1] IN GoodChars))
        THEN Delete(AString,1,1)
  UNTIL ((NOT (AString[1] IN WhiteSpace)) AND (AString[1] IN GoodChars))
    OR (Length(AString) <= 0);
  REPEAT        { Clean up trailing end of word }
    IF Length(AString) > 0 THEN
      IF (AString[Length(AString)] IN WhiteSpace)
        OR (NOT(AString[Length(AString)] IN GoodChars))
      THEN Delete(AString,Length(AString),1)
  UNTIL ((NOT(AString[Length(AString)] IN WhiteSpace)
    AND (AString[Length(AString)] IN GoodChars))
    OR  (Length(AString) <= 0))
END;  { KillJunk }



PROCEDURE Opener(    FileName : String80;
                 VAR TFile    : Text;
                 VAR OpenFlag : Boolean);

VAR
  I : Integer;

BEGIN
  Assign(TFile,FileName);       { Associate logical to physical }
  {$I-} Reset(TFile); {$I+}     { Open file for read    }
  I := IOResult;                { I <> 0 = File Not Found  }
  IF I = 0 THEN OpenFlag := True ELSE OpenFlag := False;
END;  { Opener }



FUNCTION Scaler(Counters : Array40) : Real;

VAR
  I,MaxCount : Integer;

BEGIN
  MaxCount := 0;           { Set initial count to 0 }
  FOR I := 1 TO 40 DO
    IF Counters[I] > MaxCount THEN MaxCount := Counters[I];
  IF MaxCount > PrintWidth THEN Scaler := PrintWidth / MaxCount
    ELSE Scaler := 1.0;    { Scale=1 if max < printer width}
END;  { Scaler }



PROCEDURE Grapher(Counters : Array40; Scale : Real);

VAR
  I,J : Integer;

BEGIN
  FOR I := 1 TO 40 DO
    BEGIN
      Write(Lst,'[',I:3,']: ');      { Show count }
      FOR J:=1 TO Round(Counters[I] * Scale) DO Write(Lst,'*');
      Writeln(Lst,'')                { Add (CR) at end of *'s}
    END
END;


BEGIN   { WordStat Main }

  FName := ParamStr(1);           { We must pick up command tail first, }
  KillJunk(FName);                {   before opening any files! }
  FOR I:=0 TO 40 DO Counters[I]:=0;          { Init Counters }
  LineCount := 0;

  Opener(FName,TestFile,Opened);  { Attempt to open input file  }
  IF NOT Opened THEN              { If we can't open it...      }
    BEGIN
      Writeln('>>>Input file ',FName,' is missing or damaged.');
      Writeln('   Please Check this file''s status and try again.');
    END
  ELSE                            { If you've got a file, run with it! }
    BEGIN
      WHILE NOT EOF(TestFile) DO  { While there's stuff in the file }
        BEGIN
          Readln(TestFile,Line);        { Read a Line }
          LineCount := LineCount + 1;   { Count the Line }
          Write('.');                   { Display a progress indicator }
          FOR I := 1 TO Length(Line) DO
            IF Line[I] = Tab THEN Line[I] := ' ';
          WHILE Length(Line) > 0 DO     { While there are words in the Line }
            BEGIN
              KillJunk(Line);           { Remove any non-text characters }
              IF POS(' ',Line) > 0 THEN
                AWord := Copy(Line,1,POS(' ',Line)) ELSE AWord := Line;
              KillJunk(AWord);          { Clean up the individual word }
              Counters[0] := Succ(Counters[0]);    { Count the word }
              WordLength := Length(AWord);
              IF WordLength > 40 THEN WordLength := 40;
              J := Counters[WordLength]; { Get counter for that Length }
              J := Succ(J);              { Increment it...     }
              Counters[WordLength] := J; { ...and put it back. }
              Delete(Line,1,Length(AWord));  { Remove the word from the Line }
            END
        END;
      Writeln;
      Close(TestFile);                { Close the input file }
      { The count itself is done.  Now to display it: }
      Scale := Scaler(Counters);      { Scale the Counters }
      Writeln(Lst,
      '>>Text file ',FName,
      ' has ',Counters[0],
      ' words in ',LineCount,' Lines.');
      Writeln(Lst,
      '  Word size histogram follows:');
      Grapher(Counters,Scale);        { Display Scaled histograms  }
      Writeln(Lst,Chr(12));           { Send a formfeed to printer }
    END
END.
