'/////////////////////////////////////////////////////////////////////////////
'/                           SLENCODE.BAS v1.0                               /
'/        An Implemention of the LZSS Pattern Compression Algorithm          /
'/                         By Rich Geldreich 1992                            /
'/             Original C Program by Haruhiko Okumura 4/6/1989               /
'/                                                                           /
'/   Anyone may freely distribute and use this program, as long as proper    /
'/                        credit is given. Thanks!                           /
'/                                                                           /
'/               Any bugs, problems, questions, write/call:                  /
'/                    Rich Geldreich                                         /
'/                    410 Market St.                                         /
'/                    Gloucester City, NJ 08030                              /
'/                    (609)-742-8752 or (609)-456-0721                       /
'/                                                                           /
'/ The two programs SLDECODE.ASM and SLDECODE.BAS decompress files created   /
'/ with this program.                                                        /
'/////////////////////////////////////////////////////////////////////////////

'ToDo list: add more terse error checking, move code from main module to
'a callable subroutine, CRC-32 routine, add a statistical modeling scheme
'for greater compression(dynamic Huffman coding is a very likely choice-
'arithmitic coding in QB might not work too well).

'Warning: Do NOT press CTRL+Break and then continue this program while
'you are running it in the environment! At best, the resulting compressed
'file will be invalid. At worst, your machine will lock up.

'QuickBASIC 4.5 users: As this program is, it will not work correctly with
'QB 4.5. To make it QB 4.5 compatible, simply change all of the "SSEG" strings
'in this program to "VARSEG" strings with the search and replace function.
'Note that the SLDECODE.ASM routine should not require any changes for it to
'work properly.

DEFINT A-Z

CONST BufferSize = 4096 'Ring buffer's size
CONST MaxMatch = 74     'Maximum match length
CONST Threshold = 2     'Minimum match length
CONST Null = BufferSize

DECLARE SUB InitTree ()         'Initializes the multiple binary trees
DECLARE SUB InsertNode (R)      'Inserts string R to R+MaxMatch-1 into tree
DECLARE SUB DeleteNode (P)      'Deletes node P from tree
DECLARE SUB FillInputBuffer ()  'Fills input buffer from disk
DECLARE SUB PutData (Stuff, Codesize) 'Writes multibit codes to output buffer

'Last Buffersize characters from input file go into ring buffer for searching
DIM SHARED Ring.Buffer((BufferSize - 1) + (MaxMatch - 1))
DIM SHARED Dad(BufferSize)      'Father to each entry
DIM SHARED LeftSon(BufferSize)  'Left son of each entry
DIM SHARED RightSon(BufferSize + 1 + 255) 'Right son of each entry+root of each
                                          'binary tree.
'Note that node 4096 is a "null" node

'Maximum match length and position returned by InsertNode
DIM SHARED Match.Position, Match.Length

'Input & output buffer stuff
DIM SHARED InBuffer$, Iseg, IAddress, IEndAddress
DIM SHARED OutBuffer$, Oseg, OAddress, OStartAddress, OEndAddress

'PutData stuff
DIM SHARED Shift(7) AS LONG, Char&, CurrentBit

PRINT "SLENCODE.BAS v1.0 - LZSS Encoder in QuickBASIC 4.5"
PRINT "By Rich Geldreich 1992"
A$ = COMMAND$
IF A$ = "" THEN INPUT "File to compress"; A$
IF A$ = "" THEN END
IF INSTR(A$, "OUTPUT.SL1") THEN
    PRINT "Cannot compress output file."
    END
END IF

'Why use DATA/READ when we can just plop the values in ourselves?
 Shift(0) = 1
 Shift(1) = 2
 Shift(2) = 4
 Shift(3) = 8
 Shift(4) = 16
 Shift(5) = 32
 Shift(6) = 64
 Shift(7) = 128
 
'Initialize the input and output buffers
 InBuffer$ = SPACE$(4096)
 IAddress = 0: IEndAddress = 1

 OutBuffer$ = SPACE$(4096)
'Make sure that the offset used to address the OutBuffer$ is always an
'integer for speed.
 A& = SADD(OutBuffer$)
 A& = A& - 65536 * (A& < 0)
 Oseg& = SSEG(OutBuffer$) + (A& \ 16)
 IF Oseg& > 32767 THEN Oseg = Oseg& - 65536 ELSE Oseg = Oseg&
 OAddress = A& AND 15
 OStartAddress = OAddress
 OEndAddress = OAddress + 4096

'Input input file
OPEN A$ FOR BINARY AS #1
BytesLeft& = LOF(1)
IF BytesLeft& = 0 THEN
    PRINT A$; " not found or null."
    CLOSE #1
    KILL A$
    END
END IF
PRINT "Compressing "; A$; " -"; : xpos = POS(0)

'Open output file
OPEN "output.sl1" FOR BINARY AS #2
IF LOF(2) <> 0 THEN
    CLOSE #2
    KILL "output.sl1"
    OPEN "output.sl1" FOR BINARY AS #2
END IF

GOSUB UpdatePercent

'Put my little header
A$ = "RG": PUT #2, , A$
PUT #2, , BytesLeft&

'Initialize the ring buffer with space characters.
InitTree
S = 0: R = BufferSize - MaxMatch
FOR Work = 0 TO R - 1
    Ring.Buffer(Work) = 32
NEXT

'Attempt to get MaxMatch characters from the file, and put them in the ring
'buffer.
FOR LookAheadLength = 0 TO MaxMatch - 1
     IAddress = IAddress + 1
     IF IAddress = IEndAddress THEN FillInputBuffer
     IF BytesLeft& = 0 THEN EXIT FOR
    Ring.Buffer(R + LookAheadLength) = PEEK(IAddress)
    BytesLeft& = BytesLeft& - 1
NEXT

'Insert the characters into the tree.
FOR Work = 1 TO MaxMatch
    InsertNode R - Work
NEXT
InsertNode R
DO
    'Match.Length must always be less than the look ahead buffer's length
    IF Match.Length > LookAheadLength THEN Match.Length = LookAheadLength

    'Does the match length exceed the break even point?
    IF Match.Length <= Threshold THEN
        CodesOut& = CodesOut& + 1

        Match.Length = 1
        PutData Ring.Buffer(R) * 2, 9 'Send 1 character unencoded
    ELSE
        CodesOut& = CodesOut& + 1

        'Send a position and match length pair
        IF Match.Length <= 10 THEN 'do we send 3 or 6 bits for match length?
            PutData 1, 2 'send 1 0
            PutData (Match.Length - (Threshold + 1)), 3
        ELSE
            PutData 3, 2 'send 1 1
            PutData (Match.Length - 11), 6
        END IF

        'Send match position.
        PutData Match.Position, 12
    END IF

    'Get Match.Length chars from the input file and put them into the
    'ring buffer.
    Last.Match.Length = Match.Length
    FOR Work = 0 TO Last.Match.Length - 1
         'Check to see if any bytes left, and get a byte if there is.
         'Otherwise exit this loop.
         IF BytesLeft& = 0 THEN EXIT FOR
         IAddress = IAddress + 1
         IF IAddress = IEndAddress THEN FillInputBuffer
         C = PEEK(IAddress)
         BytesLeft& = BytesLeft& - 1

        DeleteNode S
        Ring.Buffer(S) = C
        
        'Copy the char to a ghost buffer at the end of the table in case
        'it's near the beginning. This simplifies the InsertNode
        'procedure, because the ring buffer comparison does not have to
        'increment the comparison positions modulo BufferSize.
        
        IF S < (MaxMatch - 1) THEN Ring.Buffer(S + BufferSize) = C

        S = (S + 1) AND (BufferSize - 1)
        R = (R + 1) AND (BufferSize - 1)
        InsertNode R
    NEXT

    'At end of file, but still characters left in the look ahead buffer.
    FOR Work = Work TO Last.Match.Length - 1
        DeleteNode S
        S = (S + 1) AND (BufferSize - 1)
        R = (R + 1) AND (BufferSize - 1)
        LookAheadLength = LookAheadLength - 1
        IF LookAheadLength > 0 THEN InsertNode R
    NEXT

    IF Tc = 0 THEN GOSUB UpdatePercent
    Tc = (Tc + 1) AND 255
        
LOOP WHILE LookAheadLength > 0

'Flush output buffer
PutData 0, 12
OutBuffer$ = LEFT$(OutBuffer$, OAddress - OStartAddress)

PUT #2, , OutBuffer$
PUT #2, 3, CodesOut&    'Store # of codes sent

'Report compression
LOCATE , 1
PRINT "Bytes in:"; LOF(1); "Bytes out:"; LOF(2);
PRINT "Compression:";
PRINT 100 - (LOF(2) * 100&) \ LOF(1); "%"; SPACE$(16)
CLOSE #1, #2
END

UpdatePercent:
 LOCATE , xpos
 PRINT (100& * (LOF(1) - BytesLeft&)) \ LOF(1); "% complete";
RETURN

'Deletes node P from binary tree
SUB DeleteNode (P)
    'not in tree yet?
    IF Dad(P) = Null THEN EXIT SUB

    L = LeftSon(P)
    R = RightSon(P)

    IF R = Null THEN        'no right son?
        Q = L               'use left son then
    ELSEIF L = Null THEN    'no left son?
        Q = R               'use right son then
    ELSE
        Q = L               'great, it has two sons! find a place for
                            'one of them...

        IF RightSon(Q) <> Null THEN
            'find a leaf branch
            DO
                Q = RightSon(Q)
            LOOP WHILE RightSon(Q) <> Null
            'RightSon(Q)=Null now

            'Make right son of Q's dad point tward the left son of Q

            D = Dad(Q)
            RightSon(D) = LeftSon(Q)
            Dad(LeftSon(Q)) = D

            'The left son of P is now the left son of Q
            L = LeftSon(P)
            LeftSon(Q) = L
            Dad(L) = Q
        END IF
        'The right son of P is now the right son of Q
        R = RightSon(P)
        RightSon(Q) = R
        Dad(R) = Q
        'now P has no children
    END IF
    
    'Delete node P- replace it with node Q
    Dad(Q) = Dad(P)
    IF RightSon(Dad(P)) = P THEN
        RightSon(Dad(P)) = Q        'which way was P's father pointing twards
    ELSE                            'P? Right or left?
        LeftSon(Dad(P)) = Q
    END IF
    Dad(P) = Null                   'P bites the dust here

END SUB

SUB FillInputBuffer
    GET #1, , InBuffer$
    A& = SADD(InBuffer$): A& = A& - 65536 * (A& < 0)
    Iseg& = SSEG(InBuffer$) + (A& \ 16)
    IF Iseg& > 32767 THEN Iseg = Iseg& - 65536 ELSE Iseg = Iseg&
    IAddress = A& AND 15
    IEndAddress = IAddress + 4096
    DEF SEG = Iseg
END SUB

SUB InitTree
    'Initialize RightSon(FirstChar)...RightSon(LastChar) to null
    '(These make up the roots to the multiple binary search trees.)
    FOR Work = BufferSize + 1 TO BufferSize + 256
        RightSon(Work) = Null
    NEXT
    'Clear ring buffer's father pointers
    FOR Work = 0 TO BufferSize - 1
        Dad(Work) = Null
    NEXT
END SUB

'Inserts string Ring.Buffer(R) into tree- finds maximum match length and
'position too. If match length is the maximum match length, then the old
'node will be deleted for the new one to avoid having two nodes of the
'same string in the tree.
SUB InsertNode (R)
    Test = 1
    P = (BufferSize + 1) + Ring.Buffer(R) 'Find first tree
    RightSon(R) = Null
    LeftSon(R) = Null
    Match.Length = 0
    DO
        IF Test >= 0 THEN 'which way did he go?
            IF RightSon(P) <> Null THEN
                P = RightSon(P)
            ELSE
                RightSon(P) = R
                Dad(R) = P
                EXIT SUB
            END IF
        ELSE
            IF LeftSon(P) <> Null THEN
                P = LeftSon(P)
            ELSE
                LeftSon(P) = R
                Dad(R) = P
                EXIT SUB
            END IF
        END IF

        'Do an alphabetical comparison on Ring.Buffer(R) & Ring.Buffer(P)
        FOR Work = 1 TO MaxMatch - 1 'Compare
            Test = Ring.Buffer(R + Work) - Ring.Buffer(P + Work)
            IF Test <> 0 THEN EXIT FOR 'Exit if not equal
        NEXT
        IF Work > Match.Length THEN 'higher than current match length?
            Match.Position = P      'save match position and match length
            Match.Length = Work
        END IF

    LOOP UNTIL Work >= MaxMatch
    
   'The following code is only executed when the string found is of the
   'maximum match size.
   'The old string is deleted so the binary tree doesn't have two copies
   'of the same string for efficiency.
   
   'delete node P for node R

    Dad(R) = Dad(P)
    LeftSon(R) = LeftSon(P)
    RightSon(R) = RightSon(P)
    Dad(LeftSon(P)) = R
    Dad(RightSon(P)) = R

   'make P's father point twards R

    IF RightSon(Dad(P)) = P THEN
        RightSon(Dad(P)) = R
    ELSE
        LeftSon(Dad(P)) = R
    END IF

   'delete P

    Dad(P) = Null

END SUB

'Sends a multibit code to the output file.
SUB PutData (Stuff, Codesize) STATIC

    DEF SEG = Oseg 'switch to output segment

    Char& = Char& + Stuff * Shift(CurrentBit)  'attach bits to bit buffer
    CurrentBit = CurrentBit + Codesize         '+Codesize more bits now
    

    DO WHILE CurrentBit > 7 'we have at least one byte?
         IF OAddress = OEndAddress THEN 'At end of output buffer?
             PUT #2, , OutBuffer$
             OAddress = OStartAddress
         END IF
         POKE OAddress, Char& AND 255   'save 8 bits
         OAddress = OAddress + 1

        Char& = Char& \ 256
        CurrentBit = CurrentBit - 8     '8 less bits now
    LOOP

    DEF SEG = Iseg 'switch to input segment

END SUB

