{$A+,B-,D-,E-,F+,G-,I+,L-,N-,O-,R-,S-,V+,X+}
Unit Dict;
Interface
{ DICT.PAS dictionary object and methods to create and use a superimposed
  code dictionary.  Copyright Edwin T. Floyd, 1990, 1991. }
Type
  Dictionary = Object
    DictArray : Pointer;  { Pointer to dictionary bit array }
    DictCount : LongInt;  { Number of key entries in this dictionary }
    DictSize : Word;      { Number of bytes in dictionary bit array }
    DictBits : Byte;      { Number of bits per key entry }

    Constructor Init(MaxKeys : Word; BitsPerKey : Byte);
    { Initialize dictionary, specify maximum keys and bits per key. }

    Constructor RestoreDictionary(FileName : String);
    { Restore dictionary saved on disk by SaveDictionary }

    { Note: Use either Init or RestoreDictionary, not both. }

    Destructor Done;
    { Release storage allocated to dictionary. }

    Function DictionarySize : Word;
    { Returns number of bytes that will be written by SaveDictionary. }

    Procedure SaveDictionary(FileName : String);
    { Save dictionary in a disk file. }

    Function InsertString(Var s : String) : Boolean;
    { Insert string in dictionary; returns TRUE if string is already there. }

    Function StringInDictionary(Var s : String) : Boolean;
    { Returns TRUE if string is in dictionary. }

    Function InsertBlock(Var Data; Len : Word) : Boolean;
    { Insert block in dictionary; returns TRUE if block is already there. }

    Function BlockInDictionary(Var Data; Len : Word) : Boolean;
    { Returns TRUE if block is in dictionary. }

    Function EstError : Real;
    { Returns estimated probability of error. }

    Function ActError : Real;
    { Returns actual probability of error (slow, counts bits). }

  End;

Function DictionaryBytes(MaxKeys : LongInt; BitsPerKey : Byte) : LongInt;
{ Returns the size in bytes of the optimal dictionary bit table for the
  indicated key and bit-per-key counts. }

Implementation

Const
  MagicNumber = $F501205E; { Used to validate dictionary save file }

Type
  SaveFileHeader = Record
  { Header for dictionary save file (all numbers are byte-reversed) }
    Magic : LongInt;       { Magic number for validity test }
    BitsCount : LongInt;   { Bits-per-key and entry count }
    Size : Word;           { Size of dictionary bit map in bytes }
  End;

Function CountBits(Var InBuf; InLen : Word) : LongInt;
External; {FAR}

Function SetBloomFilter(Var InBuf; InLen : Word;
  Var BitTable; BitTableLen, BitCount : Word) : Boolean;
External; {FAR}

Function TestBloomFilter(Var InBuf; InLen : Word;
  Var BitTable; BitTableLen, BitCount : Word) : Boolean;
External; {FAR}

{$L BLOOM.OBJ }

Function LongSwap(n : LongInt) : LongInt;
{ Reverse bytes in a LongInt. }
Inline(
  $5A/                   {      pop    dx}
  $58/                   {      pop    ax}
  $86/$C4/               {      xchg   ah,al}
  $86/$D6);              {      xchg   dh,dl}

Function DictionaryBytes(MaxKeys : LongInt; BitsPerKey : Byte) : LongInt;
Begin
  DictionaryBytes := Round(MaxKeys * BitsPerKey / (-Ln(0.5) * 8));
End;

Constructor Dictionary.Init(MaxKeys : Word; BitsPerKey : Byte);
Var
  DictBytes : LongInt;
Begin
  DictBytes := DictionaryBytes(MaxKeys, BitsPerKey);
  If DictBytes > $FFF0 Then Begin
    WriteLn(DictBytes, ' bytes optimal for dictionary, but ', $FFF0,
      ' is maximum size dictionary.  Using max size.');
    DictBytes := $FFF0;
  End Else If DictBytes > MaxAvail Then Begin
    WriteLn(DictBytes, ' bytes optimal for dictionary, but only ', MaxAvail,
      ' bytes are available.  Using ', MaxAvail);
    DictBytes := MaxAvail;
  End Else If DictBytes < 16 Then DictBytes := 16;
  DictSize := DictBytes;
  GetMem(DictArray, DictSize);
  FillChar(DictArray^, DictSize, 0);
  DictCount := 0;
  DictBits := BitsPerKey;
End;

Constructor Dictionary.RestoreDictionary(FileName : String);
Var
  Header : SaveFileHeader;
  DictBytes : LongInt;
  f : File;
  OldMode : Byte;
Begin
  OldMode := FileMode;
  FileMode := $40;
  Assign(f, FileName);
  Reset(f, 1);
  BlockRead(f, Header, SizeOf(Header));
  With Header Do Begin
    Magic := LongSwap(Magic);
    Size := Swap(Size);
    DictBytes := FileSize(f) - SizeOf(Header);
    If (Magic <> MagicNumber) Or (Size <> DictBytes) Or (Size < 16)
    Or (Size > $FFF0) Then Begin
      WriteLn('File ', FileName, ' is not a dictionary save file.');
      Halt(1);
    End;
    DictSize := Size;
    DictBits := BitsCount And $FF;
    DictCount := LongSwap(BitsCount And $FFFFFF00);
    GetMem(DictArray, DictSize);
    BlockRead(f, DictArray^, DictSize);
    Close(f);
    FileMode := OldMode;
  End;
End;

Destructor Dictionary.Done;
Begin
  FreeMem(DictArray, DictSize);
  DictArray := Nil;
  DictSize := 0;
  DictBits := 0;
  DictCount := 0;
End;

Function Dictionary.DictionarySize : Word;
Begin
  DictionarySize := DictSize + SizeOf(SaveFileHeader);
End;

Function Dictionary.InsertString(Var s : String) : Boolean;
Begin
  InsertString := InsertBlock(s[1], Length(s));
End;

Function Dictionary.StringInDictionary(Var s : String) : Boolean;
Begin
  StringInDictionary := BlockInDictionary(s[1], Length(s));
End;

Function Dictionary.InsertBlock(Var Data; Len : Word) : Boolean;
Begin
  If SetBloomFilter(Data, Len, DictArray^, DictSize, DictBits)
  Then InsertBlock := True Else Begin
    Inc(DictCount);
    InsertBlock := False;
  End;
End;

Function Dictionary.BlockInDictionary(Var Data; Len : Word) : Boolean;
Begin
  BlockInDictionary := TestBloomFilter(Data, Len, DictArray^, DictSize,
    DictBits);
End;

Procedure Dictionary.SaveDictionary(FileName : String);
Var
  Header : SaveFileHeader;
  f : File;
Begin
  Assign(f, FileName);
  ReWrite(f, 1);
  With Header Do Begin
    Magic := LongSwap(MagicNumber);
    Size := Swap(DictSize);
    BitsCount := LongSwap(DictCount) + DictBits;
  End;
  BlockWrite(f, Header, SizeOf(Header));
  BlockWrite(f, DictArray^, DictSize);
  Close(f);
End;

Function Dictionary.EstError : Real;
Begin
  EstError := Exp(Ln(1.0-Exp(-(DictCount*DictBits)/(DictSize*8.0)))*DictBits);
End;

Function Dictionary.ActError : Real;
Var
  AllBits, BitsOn : LongInt;
Begin
  AllBits := LongInt(DictSize) * 8;
  BitsOn := CountBits(DictArray^, DictSize);
  ActError := Exp(Ln(BitsOn / AllBits) * DictBits);
End;

End.
