|
@@ -0,0 +1,379 @@
|
|
|
|
+MODULE AdaptiveHuffman; (** AUTHOR GF; PUROSE "adaptive Huffman coding" *)
|
|
|
|
+
|
|
|
|
+(* adaptive Huffman compression, Vitter's algorithm *)
|
|
|
|
+
|
|
|
|
+IMPORT Streams;
|
|
|
|
+
|
|
|
|
+CONST
|
|
|
|
+ AlphabetSize* = 256; (* byte *)
|
|
|
|
+ BlockSize* = 8*1024;
|
|
|
|
+ ScaleSize = 2*1024;
|
|
|
|
+
|
|
|
|
+ Encode = 0; Decode = 1;
|
|
|
|
+
|
|
|
|
+TYPE
|
|
|
|
+ Index = INTEGER; (* 16-bit integer to keep the table small *)
|
|
|
|
+ Symbol = LONGINT;
|
|
|
|
+
|
|
|
|
+ Node = RECORD
|
|
|
|
+ weight: LONGINT; (* node weight *)
|
|
|
|
+ up: Index; (* next node up the tree *)
|
|
|
|
+ down: Index; (* pair of down nodes *)
|
|
|
|
+ pattern: Symbol (* node pattern *)
|
|
|
|
+ END;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ HuffmanCoder = OBJECT
|
|
|
|
+ VAR
|
|
|
|
+ mode: SHORTINT; (* encode, decode *)
|
|
|
|
+ in: Streams.Reader; (* input from archive *)
|
|
|
|
+ out: Streams.Writer; (* output to archive *)
|
|
|
|
+ curByte, curBit: LONGINT;
|
|
|
|
+
|
|
|
|
+ esc: Index; (* the current tree height *)
|
|
|
|
+ root: Index; (* the root of the tree *)
|
|
|
|
+ map: ARRAY AlphabetSize OF Index; (* mapping for symbols to nodes *)
|
|
|
|
+ table: ARRAY 2*AlphabetSize + 4 OF Node; (* the coding table *)
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ PROCEDURE &New( m: SHORTINT; input: Streams.Reader; output: Streams.Writer );
|
|
|
|
+ BEGIN
|
|
|
|
+ ASSERT( m IN {Encode, Decode} );
|
|
|
|
+ mode := m;
|
|
|
|
+ IF mode = Encode THEN out := output ELSE in := input END;
|
|
|
|
+ END New;
|
|
|
|
+
|
|
|
|
+ PROCEDURE Initialize;
|
|
|
|
+ VAR i: Index;
|
|
|
|
+ BEGIN
|
|
|
|
+ root := 2*AlphabetSize + 4 - 1; curByte := 0;
|
|
|
|
+ IF mode = Encode THEN curByte := 0; curBit := 0 ELSE curBit := -1 END;
|
|
|
|
+ FOR i := 0 TO root DO
|
|
|
|
+ table[i].up := 0; table[i].down := 0; table[i].pattern := 0; table[i].weight := 0
|
|
|
|
+ END;
|
|
|
|
+ FOR i := 0 TO AlphabetSize - 1 DO map[i] := 0 END;
|
|
|
|
+ esc := root;
|
|
|
|
+ END Initialize;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ PROCEDURE Finish; (* flush last few bits *)
|
|
|
|
+ BEGIN
|
|
|
|
+ ASSERT( mode = Encode );
|
|
|
|
+ WHILE curBit # 0 DO PutBit( 0 ) END;
|
|
|
|
+ out.Update
|
|
|
|
+ END Finish;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ PROCEDURE GetBit( ): LONGINT;
|
|
|
|
+ VAR
|
|
|
|
+ bit: LONGINT; ch: CHAR;
|
|
|
|
+ BEGIN
|
|
|
|
+ IF curBit < 0 THEN
|
|
|
|
+ in.Char( ch ); curByte := ORD( ch ); curBit := 7
|
|
|
|
+ END;
|
|
|
|
+ bit := ASH( curByte, -curBit ) MOD 2;
|
|
|
|
+ DEC( curBit );
|
|
|
|
+ RETURN bit
|
|
|
|
+ END GetBit;
|
|
|
|
+
|
|
|
|
+ PROCEDURE PutBit( bit: LONGINT );
|
|
|
|
+ BEGIN
|
|
|
|
+ curByte := 2*curByte + bit;
|
|
|
|
+ INC( curBit );
|
|
|
|
+ IF curBit > 7 THEN
|
|
|
|
+ out.Char( CHR( curByte ) ); curByte := 0; curBit := 0
|
|
|
|
+ END
|
|
|
|
+ END PutBit;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ PROCEDURE GetPattern( ): Symbol;
|
|
|
|
+ VAR sym, i, bit: LONGINT;
|
|
|
|
+ BEGIN
|
|
|
|
+ sym := 0; bit := 1;
|
|
|
|
+ FOR i := 1 TO 8 DO
|
|
|
|
+ IF GetBit() = 1 THEN sym := sym + bit END;
|
|
|
|
+ bit := bit * 2;
|
|
|
|
+ END;
|
|
|
|
+ RETURN sym
|
|
|
|
+ END GetPattern;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ (* send the bits for an escaped symbol *)
|
|
|
|
+ PROCEDURE PutPattern( sym: Symbol );
|
|
|
|
+ VAR i: LONGINT;
|
|
|
|
+ BEGIN
|
|
|
|
+ FOR i := 1 TO 8 DO
|
|
|
|
+ PutBit( sym MOD 2 ); sym := sym DIV 2;
|
|
|
|
+ END
|
|
|
|
+ END PutPattern;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ (* split escape node to incorporate new symbol *)
|
|
|
|
+ PROCEDURE AddSymbol( sym: Symbol ): Index;
|
|
|
|
+ VAR pair, node: Index;
|
|
|
|
+ BEGIN
|
|
|
|
+ ASSERT( esc > 1 );
|
|
|
|
+ pair := esc; node := esc - 1; esc := esc - 2 ;
|
|
|
|
+
|
|
|
|
+ table[pair].down := node; table[pair].weight := 1;
|
|
|
|
+
|
|
|
|
+ table[node].up := pair;
|
|
|
|
+ table[node].down := 0; table[node].weight := 0; table[node].pattern := sym;
|
|
|
|
+
|
|
|
|
+ table[esc].up := pair;
|
|
|
|
+ table[esc].down := 0; table[esc].weight := 0;
|
|
|
|
+
|
|
|
|
+ map[sym] := node;
|
|
|
|
+
|
|
|
|
+ RETURN node;
|
|
|
|
+ END AddSymbol;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ (* swap leaf to group leader position return symbol's new node *)
|
|
|
|
+ PROCEDURE GroupLeader( node: Index ): Index;
|
|
|
|
+ VAR
|
|
|
|
+ leader: Index;
|
|
|
|
+ weight: LONGINT;
|
|
|
|
+ sym, prev: Symbol;
|
|
|
|
+ BEGIN
|
|
|
|
+ weight := table[node].weight; leader := node;
|
|
|
|
+ WHILE (leader < root) & (weight = table[leader + 1].weight) DO INC( leader ) END;
|
|
|
|
+ IF leader # node THEN
|
|
|
|
+ (* swap the leaf nodes *)
|
|
|
|
+ sym := table[node].pattern;
|
|
|
|
+ prev := table[leader].pattern;
|
|
|
|
+ table[leader].pattern := sym;
|
|
|
|
+ table[node].pattern := prev;
|
|
|
|
+ map[sym] := leader;
|
|
|
|
+ map[prev] := node;
|
|
|
|
+ END;
|
|
|
|
+ RETURN leader
|
|
|
|
+ END GroupLeader;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ (* slide internal node up over all leaves of equal weight;
|
|
|
|
+ or exchange leaf with next smaller weight internal node.
|
|
|
|
+ return node's new position *)
|
|
|
|
+ PROCEDURE SlideNode( node: Index ): Index;
|
|
|
|
+ VAR next: Index; swap: Node;
|
|
|
|
+ BEGIN
|
|
|
|
+ swap := table[node]; next := node + 1;
|
|
|
|
+ (* if we're sliding an internal node, find the highest possible leaf to exchange with *)
|
|
|
|
+ IF ODD( swap.weight ) THEN
|
|
|
|
+ WHILE swap.weight > table[next+1].weight DO INC( next ) END
|
|
|
|
+ END;
|
|
|
|
+ (* swap the two nodes *)
|
|
|
|
+ table[node] := table[next]; table[next] := swap;
|
|
|
|
+ table[next].up := table[node].up; table[node].up := swap.up;
|
|
|
|
+
|
|
|
|
+ (* repair the symbol map and tree structure *)
|
|
|
|
+ IF ODD( swap.weight ) THEN
|
|
|
|
+ table[swap.down].up := next;
|
|
|
|
+ table[swap.down - 1].up := next;
|
|
|
|
+ map[table[node].pattern] := node;
|
|
|
|
+ ELSE
|
|
|
|
+ table[table[node].down - 1].up := node;
|
|
|
|
+ table[table[node].down].up := node;
|
|
|
|
+ map[swap.pattern] := next;
|
|
|
|
+ END;
|
|
|
|
+ RETURN next;
|
|
|
|
+ END SlideNode;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ (* increment symbol weight and re balance the tree. *)
|
|
|
|
+ PROCEDURE IncWeight( node: Index );
|
|
|
|
+ VAR up: Index;
|
|
|
|
+ BEGIN
|
|
|
|
+ (* obviate swapping a parent with its child: increment the leaf and proceed directly to its parent.
|
|
|
|
+ otherwise, promote leaf to group leader position in the tree *)
|
|
|
|
+ IF table[node].up = node + 1 THEN
|
|
|
|
+ INC( table[node].weight, 2 ); INC( node );
|
|
|
|
+ ELSE
|
|
|
|
+ node := GroupLeader( node )
|
|
|
|
+ END;
|
|
|
|
+ (* increase the weight of each node and slide over any smaller weights ahead of it until reaching the root.
|
|
|
|
+ internal nodes work upwards from their initial positions; while symbol nodes slide over first,
|
|
|
|
+ then work up from their final positions. *)
|
|
|
|
+ INC( table[node].weight, 2 ); up := table[node].up;
|
|
|
|
+ WHILE up # 0 DO
|
|
|
|
+ WHILE table[node].weight > table[node + 1].weight DO node := SlideNode( node) END;
|
|
|
|
+ IF ODD( table[node].weight) THEN
|
|
|
|
+ node := up
|
|
|
|
+ ELSE
|
|
|
|
+ node := table[node].up
|
|
|
|
+ END;
|
|
|
|
+ INC( table[node].weight, 2 ); up := table[node].up
|
|
|
|
+ END;
|
|
|
|
+ END IncWeight;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ (* scale all weights and rebalance the tree,
|
|
|
|
+ zero weight nodes are removed from the tree by sliding them out the rank list *)
|
|
|
|
+ PROCEDURE Scale;
|
|
|
|
+ VAR node, prev: Index; weight: LONGINT;
|
|
|
|
+
|
|
|
|
+ PROCEDURE Weight( idx: Index ): LONGINT;
|
|
|
|
+ VAR w: LONGINT;
|
|
|
|
+ BEGIN
|
|
|
|
+ w := table[idx].weight;
|
|
|
|
+ IF ODD( w ) THEN RETURN w - 1 ELSE RETURN w END
|
|
|
|
+ END Weight;
|
|
|
|
+
|
|
|
|
+ BEGIN
|
|
|
|
+ node := esc + 1;
|
|
|
|
+ (* work up the tree from the escape node scaling weights by the value of bits *)
|
|
|
|
+ WHILE node <= root DO
|
|
|
|
+ (* recompute the weight of internal nodes; slide down and out any unused ones *)
|
|
|
|
+ weight := table[node].weight;
|
|
|
|
+ IF ODD( weight ) THEN
|
|
|
|
+ weight := Weight( table[node].down ) + Weight( table[node].down-1 ) + 1
|
|
|
|
+ ELSE
|
|
|
|
+ (* remove zero weight leaves by incrementing esc and removing them from the symbol map *)
|
|
|
|
+ weight := weight DIV 2;
|
|
|
|
+ IF ODD( weight ) THEN DEC( weight ) END;
|
|
|
|
+ IF weight = 0 THEN
|
|
|
|
+ map[table[node].pattern] := 0; INC( esc, 2 );
|
|
|
|
+ END
|
|
|
|
+ END;
|
|
|
|
+ (* slide the scaled node back down over any previous nodes with larger weights *)
|
|
|
|
+ table[node].weight := weight;
|
|
|
|
+ prev := node - 1;
|
|
|
|
+ WHILE weight < table[prev].weight DO IGNORE SlideNode( prev ); DEC( prev ) END;
|
|
|
|
+ INC( node )
|
|
|
|
+ END;
|
|
|
|
+ (* prepare a new escape node *)
|
|
|
|
+ table[esc].down := 0; table[esc].weight := 0;
|
|
|
|
+ END Scale;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ (* encode the next symbol *)
|
|
|
|
+ PROCEDURE EncodeByte( ch: CHAR );
|
|
|
|
+ VAR
|
|
|
|
+ code, n: LONGINT;
|
|
|
|
+ up, idx, node: Index; sym: Symbol;
|
|
|
|
+ BEGIN
|
|
|
|
+ ASSERT( mode = Encode );
|
|
|
|
+ (* for a new symbol, direct the receiver to the escape node *)
|
|
|
|
+ sym := ORD( ch );
|
|
|
|
+ node := map[sym]; idx := node;
|
|
|
|
+ IF idx = 0 THEN idx := esc END;
|
|
|
|
+ ASSERT( idx # 0 );
|
|
|
|
+ (* accumulate the code bits by working up the tree from the node to the root *)
|
|
|
|
+ code := 1; n := 0; up := table[idx].up;
|
|
|
|
+ WHILE up # 0 DO
|
|
|
|
+ code := code * 2; INC( n );
|
|
|
|
+ IF ODD( idx ) THEN INC( code ) END;
|
|
|
|
+ idx := up; up := table[idx].up
|
|
|
|
+ END;
|
|
|
|
+ (* send the code, root selector bit first *)
|
|
|
|
+ WHILE n > 0 DO
|
|
|
|
+ PutBit( code MOD 2 ); code := code DIV 2;
|
|
|
|
+ DEC( n )
|
|
|
|
+ END;
|
|
|
|
+ (* send pattern and incorporate it into the tree *)
|
|
|
|
+ IF node = 0 THEN
|
|
|
|
+ PutPattern( sym );
|
|
|
|
+ node := AddSymbol( sym );
|
|
|
|
+ END;
|
|
|
|
+ (* adjust and re-balance the tree *)
|
|
|
|
+ IncWeight( node );
|
|
|
|
+ END EncodeByte;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ (* decode the next symbol *)
|
|
|
|
+ PROCEDURE DecodeByte( ): CHAR;
|
|
|
|
+ VAR
|
|
|
|
+ node, down: Index;
|
|
|
|
+ sym: Symbol;
|
|
|
|
+ BEGIN
|
|
|
|
+ ASSERT( mode = Decode );
|
|
|
|
+ (* work down the tree from the root until reaching either a leaf or the escape node.
|
|
|
|
+ A one bit means go left, a zero means go right. *)
|
|
|
|
+ node := root; down := table[node].down;
|
|
|
|
+ WHILE down # 0 DO
|
|
|
|
+ IF GetBit( ) = 1 THEN
|
|
|
|
+ node := down-1
|
|
|
|
+ ELSE
|
|
|
|
+ node := down;
|
|
|
|
+ END;
|
|
|
|
+ down := table[node].down;
|
|
|
|
+ END;
|
|
|
|
+ (* sent to the escape node *)
|
|
|
|
+ IF node = esc THEN
|
|
|
|
+ sym := GetPattern( );
|
|
|
|
+ node := AddSymbol( sym );
|
|
|
|
+ ELSE
|
|
|
|
+ sym := table[node].pattern
|
|
|
|
+ END;
|
|
|
|
+ (* adjust and re-balance the tree *)
|
|
|
|
+ IncWeight( node );
|
|
|
|
+ RETURN CHR( sym );
|
|
|
|
+ END DecodeByte;
|
|
|
|
+
|
|
|
|
+ END HuffmanCoder;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+TYPE
|
|
|
|
+ Encoder* =OBJECT
|
|
|
|
+ VAR
|
|
|
|
+ huff: HuffmanCoder;
|
|
|
|
+ out: Streams.Writer;
|
|
|
|
+
|
|
|
|
+ PROCEDURE & New*( archive: Streams.Writer );
|
|
|
|
+ BEGIN
|
|
|
|
+ NEW( huff, Encode, NIL, archive );
|
|
|
|
+ out := archive
|
|
|
|
+ END New;
|
|
|
|
+
|
|
|
|
+ PROCEDURE CompressBlock*( CONST source: ARRAY OF CHAR; len: LONGINT );
|
|
|
|
+ VAR i: LONGINT;
|
|
|
|
+ BEGIN
|
|
|
|
+ out.RawNum( len );
|
|
|
|
+ huff.Initialize;
|
|
|
|
+ i := 0;
|
|
|
|
+ WHILE i < len DO
|
|
|
|
+ huff.EncodeByte( source[i] ); INC( i );
|
|
|
|
+ IF (i MOD ScaleSize = 0) & ((len-i) >= ScaleSize) THEN huff.Scale END
|
|
|
|
+ END;
|
|
|
|
+ huff.Finish
|
|
|
|
+ END CompressBlock;
|
|
|
|
+
|
|
|
|
+ END Encoder;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+TYPE
|
|
|
|
+ Decoder* =OBJECT
|
|
|
|
+ VAR
|
|
|
|
+ huff: HuffmanCoder;
|
|
|
|
+ input: Streams.Reader;
|
|
|
|
+
|
|
|
|
+ PROCEDURE & New*( archive: Streams.Reader );
|
|
|
|
+ BEGIN
|
|
|
|
+ NEW( huff, Decode, archive, NIL );
|
|
|
|
+ input := archive
|
|
|
|
+ END New;
|
|
|
|
+
|
|
|
|
+ PROCEDURE ExtractBlock*( VAR buf: ARRAY OF CHAR ): LONGINT;
|
|
|
|
+ VAR i, blockSize: LONGINT;
|
|
|
|
+ BEGIN
|
|
|
|
+ input.RawNum( blockSize );
|
|
|
|
+ IF blockSize = 0 THEN (* end of stream *) RETURN 0 END;
|
|
|
|
+ huff.Initialize;
|
|
|
|
+ i := 0;
|
|
|
|
+ WHILE i < blockSize DO
|
|
|
|
+ buf[i] := huff.DecodeByte( ); INC( i );
|
|
|
|
+ IF (i MOD ScaleSize = 0) & ((blockSize-i) >= ScaleSize) THEN huff.Scale END
|
|
|
|
+ END;
|
|
|
|
+ RETURN blockSize
|
|
|
|
+ END ExtractBlock;
|
|
|
|
+
|
|
|
|
+ END Decoder;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+END AdaptiveHuffman.
|
|
|
|
+
|
|
|
|
+
|