'Huffman encoder
'by Rich Geldreich May 29th, 1992
'This program is in the public domain.
DEFINT A-Z
DECLARE SUB InitTree ()
DECLARE SUB MakeSortTable ()
DECLARE SUB CombineTree ()
DECLARE SUB CleanUpTree ()
DECLARE SUB WriteTree ()

DECLARE SUB SortDistribution2 ()
DECLARE SUB SortDistribution ()
DECLARE SUB GetDistribution ()
DECLARE SUB RecurseTree (Node)

DECLARE SUB FillBuffer ()


CONST True = -1, False = 0
CONST Null = -2
CONST BufferLength = 10000

CLEAR , , 10000

DIM SHARED Father(512) AS LONG, LeftSon(512), RightSon(512)
DIM SHARED Index(512), RealIndex, Used(255) AS LONG
DIM SHARED Pointer(255), HighestEntry
DIM SHARED Code(255, 40), CodeLength(255)
DIM SHARED CurrentLength, CurrentCode(40)

DIM SHARED Buffer$, Address, EndAddress, Bits(8), CurrentByte, CurrentBit
DIM SHARED BufferSeg


LOCATE , , 1


Bits:
    DATA 1,2,4,8,16,32,64,128,256

'read the bit masks
RESTORE Bits
FOR A = 0 TO 8: READ Bits(A): NEXT

'initilize the tree
InitTree

'initlize the input buffer
Buffer$ = STRING$(BufferLength, 0)
EndAddress = 1: Address = 0

PRINT "Getting Distribution:";
'open input file
OPEN COMMAND$ FOR BINARY AS #1
'check to see if it exists
IF LOF(1) = 0 THEN
    CLOSE #1
    KILL COMMAND$
    PRINT
    PRINT COMMAND$; " not found"
    END
END IF
'read the input file and gather the distribution of each character
GetDistribution
'make a sorting table
MakeSortTable
'sort the table with the Shell Metzer sort
SortDistribution
'combine the tree until there is only one node at the "top"
CombineTree
'work down the tree finding codes which represent each character
TopOfTree = Pointer(0)
CurrentLength = 0
RecurseTree TopOfTree
'for debugging: prints the code for each character
'FOR A = 0 TO 255
'    IF Used(A) > 256 THEN
'        PRINT A;
'        FOR B = 0 TO CodeLength(A)
'            PRINT Code(A, B);
'        NEXT
'        PRINT
'    END IF
'NEXT
'END
'"cleans" the tree up so it can be sent as small as possible
CleanUpTree

CurrentByte = 0: CurrentBit = 0
RealIndex = RealIndex - 1
'open output file
OPEN "output.huf" FOR BINARY AS #2
'kill file if it already exists
IF LOF(2) <> 0 THEN
    CLOSE #2
    KILL "output.huf"
    OPEN "output.huf" FOR BINARY AS #2
END IF

'put the header
A& = LOF(1)
PUT #2, , A&            'number of bytes in original file
PUT #2, , RealIndex     'number of nodes in tree
Top = Index(TopOfTree)
PUT #2, , Top           'top of tree

WriteTree               'writes the tree to the output file

'compresses the input file
PRINT : PRINT "Encoding...": PRINT : PRINT
Ypos = CSRLIN - 2

SEEK #1, 1
EndAddress = 1: Address = 0
'initilize the output buffer
A$ = STRING$(5000, 0)
A& = SADD(A$)
A& = A& - 65536 * (A& < 0)
OBufferSeg = VARSEG(A$) + (A& \ 16)
OAddress = (A& MOD 16)
OEndAddress = OAddress + 5000
Ostart = OAddress
'start compressing
FOR A& = 1 TO LOF(1)
   
    'get a byte from the input file
    Address = Address + 1
    'if Address=EndBuffer then it's time to fill the input buffer
    IF Address = EndAddress THEN FillBuffer
    B = PEEK(Address)
    'send out all of the bits that represent the input character
    FOR C = 0 TO CodeLength(B)
        IF Code(B, C) THEN
            CurrentByte = CurrentByte * 2 OR 1      'send "1"
        ELSE
            CurrentByte = CurrentByte * 2           'send "0"
        END IF
        CurrentBit = CurrentBit + 1
        'if CurrentBit=8 then we have a complete byte
        IF CurrentBit = 8 THEN
            DEF SEG = OBufferSeg
            POKE OAddress, CurrentByte
            OAddress = OAddress + 1
            'if Oaddress=Oendaddress then it's time to flush the
            'output buffer
            IF OAddress = OEndAddress THEN
                PUT #2, , A$
                B& = SADD(A$)
                B& = B& - 65536 * (B& < 0)
                OBufferSeg = VARSEG(A$) + (B& \ 16)
                OAddress = (B& MOD 16)
                OEndAddress = OAddress + 5000
                Ostart = OAddress
            END IF
            CurrentByte = 0: CurrentBit = 0
            DEF SEG = BufferSeg
        END IF
    NEXT
    'see if it's time to update screen
    PrintCount = PrintCount + 1
    IF PrintCount = 1024 THEN
        PrintCount = 0
        LOCATE Ypos, 1
        PRINT "Bytes In:"; A&; (A& * 100&) \ LOF(1); "%  "
        B& = LOF(2) + OAddress - Ostart
        PRINT "Bytes Out:"; B&; "   "
        PRINT "Compression:"; 100 - (B& * 100&) \ A&; "% ";
    END IF
NEXT
'put whatever is left of the byte buffer into the output buffer
DO UNTIL CurrentBit = 8
    CurrentByte = CurrentByte * 2
    CurrentBit = CurrentBit + 1
LOOP

DEF SEG = OBufferSeg
POKE OAddress, CurrentByte
A$ = LEFT$(A$, OAddress + 1 - Ostart)
PUT #2, , A$
'report compression
LOCATE Ypos, 1
PRINT "Bytes In:"; LOF(1); SPACE$(16)
PRINT "Bytes Out:"; LOF(2); SPACE$(16)
PRINT "Overall Compression:"; 100 - (LOF(2) * 100&) \ LOF(1); "%"; SPACE$(16);
CLOSE

END

'"Cleans" up the tree so it can be sent.
SUB CleanUpTree
    RealIndex = 0
    FOR A = 0 TO 512
        B& = Father(A)
        IF B& <> Null THEN
            IF B& < 256 THEN
                IF Used(B&) > 256 THEN
                    Index(A) = RealIndex
                    RealIndex = RealIndex + 1
                END IF
            ELSEIF B& > 256 THEN
                Index(A) = RealIndex
                RealIndex = RealIndex + 1
            END IF
        END IF
    NEXT

    FOR A = 0 TO 512
        B& = Father(A)
        IF B& <> Null THEN
            IF B& < 256 THEN
                IF Used(B&) > 256 THEN
                    IF LeftSon(A) <> Null THEN
                        LeftSon(A) = Index(LeftSon(A))
                    END IF
                    IF RightSon(A) <> Null THEN
                        RightSon(A) = Index(RightSon(A))
                    END IF
                END IF
            ELSEIF B& > 256 THEN
                IF LeftSon(A) <> Null THEN
                    LeftSon(A) = Index(LeftSon(A))
                END IF
                IF RightSon(A) <> Null THEN
                    RightSon(A) = Index(RightSon(A))
                END IF
            END IF
        END IF
    NEXT
END SUB

'Combines the tree until there is only one node at the top.
SUB CombineTree
    
    Parents = HighestEntry + 1
    DO UNTIL Parents = 1
        'sort the current distribution
        SortDistribution2
        'find the lowest 2 entries
        Lowest = Pointer(HighestEntry)
        NextLowest = Pointer(HighestEntry - 1)
        'find new frequency
        NewFrequency& = Father(Lowest) + Father(NextLowest) - 256
        'combine the two nodes
        IF RightSon(Lowest) = Null AND RightSon(NextLowest) = Null THEN
            Father(NextLowest) = NewFrequency&
            RightSon(NextLowest) = LeftSon(Lowest)
            Father(Lowest) = Null
            Parents = Parents - 1
            HighestEntry = HighestEntry - 1
        ELSEIF RightSon(Lowest) = Null AND RightSon(NextLowest) <> Null THEN
            Father(Lowest) = NewFrequency&
            RightSon(Lowest) = NextLowest
            Pointer(HighestEntry - 1) = Pointer(HighestEntry)
            Parents = Parents - 1
            HighestEntry = HighestEntry - 1
        ELSEIF RightSon(Lowest) <> Null AND RightSon(NextLowest) = Null THEN
            Father(NextLowest) = NewFrequency&
            RightSon(NextLowest) = Lowest
            Parents = Parents - 1
            HighestEntry = HighestEntry - 1
        ELSEIF RightSon(Lowest) <> Null AND RightSon(NextLowest) <> Null THEN
            'search for new node
            FOR A = 512 TO 0 STEP -1
                IF Father(A) = Null THEN EXIT FOR
            NEXT
            Father(A) = NewFrequency&
            LeftSon(A) = Lowest
            RightSon(A) = NextLowest
      
            HighestEntry = HighestEntry - 1
            Pointer(HighestEntry) = A
            Parents = Parents - 1
        END IF
    'loop until there is only one node at the top
    LOOP

END SUB

'Fills the input buffer.
SUB FillBuffer
    GET #1, , Buffer$

    A& = SADD(Buffer$)
    A& = A& - 65536 * (A& < 0)
    BufferSeg = VARSEG(Buffer$) + (A& \ 16)
    Address = (A& MOD 16)
    EndAddress = Address + BufferLength
    DEF SEG = BufferSeg

END SUB

'Scans the input file for it's distribution.
SUB GetDistribution
       
    FOR A& = 1 TO LOF(1)
        Address = Address + 1
        IF Address = EndAddress THEN
            FillBuffer
            PRINT ".";
        END IF
        B = PEEK(Address) * 2
        Father(B) = Father(B) + 1
    NEXT
    B = 0
    FOR A = 0 TO 510 STEP 2
        Used(B) = Father(A): B = B + 1
    NEXT
END SUB

'Initilizes the tree.
SUB InitTree
    B = 0
    FOR A = 0 TO 510 STEP 2
  
        Father(A) = 256
        LeftSon(A) = A + 1
        RightSon(A) = Null
  
        Father(A + 1) = B
        LeftSon(A + 1) = Null
        RightSon(A + 1) = Null
  
        B = B + 1
    NEXT
END SUB

'Makes a sorting table.
SUB MakeSortTable
    HighestEntry = 0
    FOR A = 0 TO 510 STEP 2
        IF Father(A) > 256 THEN
            Pointer(HighestEntry) = A
            HighestEntry = HighestEntry + 1
        END IF
    NEXT
    HighestEntry = HighestEntry - 1
END SUB

'Recursize procedure to go down the tree and build up codes
'that represent each character.
SUB RecurseTree (Node)
    'are we at a character?
    IF Father(Node) < 256 THEN
        'yup! we CurrentCode() has this character's bit sequence
        Char = Father(Node)
        FOR A = 0 TO CurrentLength - 1
            Code(Char, A) = CurrentCode(A)
        NEXT
        CodeLength(Char) = CurrentLength - 1
    END IF
    'go to the left if there's something there
    IF LeftSon(Node) <> Null THEN
        CurrentCode(CurrentLength) = 1      'add "1" to the current code
        CurrentLength = CurrentLength + 1
        RecurseTree LeftSon(Node)           'go down
        CurrentLength = CurrentLength - 1   'take "1" from the current code
    END IF
    'go to the right if there's something there
    IF RightSon(Node) <> Null THEN
        CurrentCode(CurrentLength) = 0      'add "0" to the current code
        CurrentLength = CurrentLength + 1
        RecurseTree RightSon(Node)          'got down
        CurrentLength = CurrentLength - 1   'take "0" from the current code
    END IF
END SUB

'A REAL Shell sort follows. It is much faster than the well-known one.
'Sorts the nodes according to the sorting table.
SUB SortDistribution
    Offset = HighestEntry \ 2
    DO
        FOR I = 0 TO HighestEntry - Offset
            IF Father(Pointer(I)) < Father(Pointer(I + Offset)) THEN
                SWAP Pointer(I), Pointer(I + Offset)
                CompareLow = I - Offset
                CompareHigh = I
                DO WHILE CompareLow >= 0
                    IF Father(Pointer(CompareLow)) < Father(Pointer(CompareHigh)) THEN
                        SWAP Pointer(CompareLow), Pointer(CompareHigh)
                        CompareHigh = CompareLow
                        CompareLow = CompareLow - Offset
                    ELSE
                        EXIT DO
                    END IF
                LOOP
            END IF
        NEXT
        Offset = Offset \ 2
    LOOP WHILE Offset > 0
    

END SUB

'A simple bubble sort... used while combining the tree.
SUB SortDistribution2
    
    DO
        SwapFlag = False
        FOR A = HighestEntry - 1 TO 0 STEP -1
            IF Father(Pointer(A + 1)) > Father(Pointer(A)) THEN
                SWAP Pointer(A + 1), Pointer(A)
                SwapFlag = True
            END IF
        NEXT
    LOOP WHILE SwapFlag
    
END SUB

'Writes the tree to disk.
SUB WriteTree
    

    FOR A = 0 TO 512
        B& = Father(A)
        IF B& <> Null THEN
            IF B& < 256 THEN
                IF Used(B&) > 256 THEN
                    GOSUB SendOne
                    FOR C = 0 TO 7
                        IF (B& AND Bits(C)) > 0 THEN
                            GOSUB SendOne
                        ELSE
                            GOSUB SendZero
                        END IF
                    NEXT
                END IF
            ELSEIF B& > 256 THEN
                GOSUB SendZero
                IF LeftSon(A) <> Null THEN
                    GOSUB SendOne
                    Son = LeftSon(A)
               
                    FOR C = 0 TO 8
                        IF (Son AND Bits(C)) > 0 THEN
                            GOSUB SendOne
                        ELSE
                            GOSUB SendZero
                        END IF
                    NEXT
                ELSE
                    GOSUB SendZero
                END IF
                IF RightSon(A) <> Null THEN
                    GOSUB SendOne
                    Son = RightSon(A)
                   
                    FOR C = 0 TO 8
                        IF (Son AND Bits(C)) > 0 THEN
                            GOSUB SendOne
                        ELSE
                            GOSUB SendZero
                        END IF
                    NEXT
                ELSE
                    GOSUB SendZero
                END IF
            END IF
        END IF
    NEXT

    EXIT SUB

SendZero:
    CurrentByte = CurrentByte * 2
    CurrentBit = CurrentBit + 1
    IF CurrentBit = 8 THEN
        A$ = CHR$(CurrentByte)
        PUT #2, , A$
        CurrentByte = 0: CurrentBit = 0
    END IF
RETURN

SendOne:
   
    CurrentByte = CurrentByte * 2 OR 1
    CurrentBit = CurrentBit + 1
    IF CurrentBit = 8 THEN
        A$ = CHR$(CurrentByte)
        PUT #2, , A$
        CurrentByte = 0: CurrentBit = 0
    END IF
RETURN

END SUB

