123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379 |
- MODULE AdaptiveHuffman; (** AUTHOR GF; PUROSE "adaptive Huffman coding" *)
- (* adaptive Huffman compression, Vitter's algorithm *)
- IMPORT Streams;
- CONST
- AlphabetSize* = 256; (* byte *)
- BlockSize* = 8*1024;
- ScaleSize = 2*1024;
-
- Encode = 0; Decode = 1;
- TYPE
- Index = INTEGER; (* 16-bit integer to keep the table small *)
- Symbol = LONGINT;
-
- Node = RECORD
- weight: LONGINT; (* node weight *)
- up: Index; (* next node up the tree *)
- down: Index; (* pair of down nodes *)
- pattern: Symbol (* node pattern *)
- END;
- HuffmanCoder = OBJECT
- VAR
- mode: SHORTINT; (* encode, decode *)
- in: Streams.Reader; (* input from archive *)
- out: Streams.Writer; (* output to archive *)
- curByte, curBit: LONGINT;
-
- esc: Index; (* the current tree height *)
- root: Index; (* the root of the tree *)
- map: ARRAY AlphabetSize OF Index; (* mapping for symbols to nodes *)
- table: ARRAY 2*AlphabetSize + 4 OF Node; (* the coding table *)
-
-
-
- PROCEDURE &New( m: SHORTINT; input: Streams.Reader; output: Streams.Writer );
- BEGIN
- ASSERT( m IN {Encode, Decode} );
- mode := m;
- IF mode = Encode THEN out := output ELSE in := input END;
- END New;
-
- PROCEDURE Initialize;
- VAR i: Index;
- BEGIN
- root := 2*AlphabetSize + 4 - 1; curByte := 0;
- IF mode = Encode THEN curByte := 0; curBit := 0 ELSE curBit := -1 END;
- FOR i := 0 TO root DO
- table[i].up := 0; table[i].down := 0; table[i].pattern := 0; table[i].weight := 0
- END;
- FOR i := 0 TO AlphabetSize - 1 DO map[i] := 0 END;
- esc := root;
- END Initialize;
-
-
- PROCEDURE Finish; (* flush last few bits *)
- BEGIN
- ASSERT( mode = Encode );
- WHILE curBit # 0 DO PutBit( 0 ) END;
- out.Update
- END Finish;
-
-
-
- PROCEDURE GetBit( ): LONGINT;
- VAR
- bit: LONGINT; ch: CHAR;
- BEGIN
- IF curBit < 0 THEN
- in.Char( ch ); curByte := ORD( ch ); curBit := 7
- END;
- bit := ASH( curByte, -curBit ) MOD 2;
- DEC( curBit );
- RETURN bit
- END GetBit;
-
- PROCEDURE PutBit( bit: LONGINT );
- BEGIN
- curByte := 2*curByte + bit;
- INC( curBit );
- IF curBit > 7 THEN
- out.Char( CHR( curByte ) ); curByte := 0; curBit := 0
- END
- END PutBit;
-
-
- PROCEDURE GetPattern( ): Symbol;
- VAR sym, i, bit: LONGINT;
- BEGIN
- sym := 0; bit := 1;
- FOR i := 1 TO 8 DO
- IF GetBit() = 1 THEN sym := sym + bit END;
- bit := bit * 2;
- END;
- RETURN sym
- END GetPattern;
-
-
- (* send the bits for an escaped symbol *)
- PROCEDURE PutPattern( sym: Symbol );
- VAR i: LONGINT;
- BEGIN
- FOR i := 1 TO 8 DO
- PutBit( sym MOD 2 ); sym := sym DIV 2;
- END
- END PutPattern;
-
- (* split escape node to incorporate new symbol *)
- PROCEDURE AddSymbol( sym: Symbol ): Index;
- VAR pair, node: Index;
- BEGIN
- ASSERT( esc > 1 );
- pair := esc; node := esc - 1; esc := esc - 2 ;
-
- table[pair].down := node; table[pair].weight := 1;
-
- table[node].up := pair;
- table[node].down := 0; table[node].weight := 0; table[node].pattern := sym;
-
- table[esc].up := pair;
- table[esc].down := 0; table[esc].weight := 0;
-
- map[sym] := node;
-
- RETURN node;
- END AddSymbol;
-
- (* swap leaf to group leader position return symbol's new node *)
- PROCEDURE GroupLeader( node: Index ): Index;
- VAR
- leader: Index;
- weight: LONGINT;
- sym, prev: Symbol;
- BEGIN
- weight := table[node].weight; leader := node;
- WHILE (leader < root) & (weight = table[leader + 1].weight) DO INC( leader ) END;
- IF leader # node THEN
- (* swap the leaf nodes *)
- sym := table[node].pattern;
- prev := table[leader].pattern;
- table[leader].pattern := sym;
- table[node].pattern := prev;
- map[sym] := leader;
- map[prev] := node;
- END;
- RETURN leader
- END GroupLeader;
-
- (* slide internal node up over all leaves of equal weight;
- or exchange leaf with next smaller weight internal node.
- return node's new position *)
- PROCEDURE SlideNode( node: Index ): Index;
- VAR next: Index; swap: Node;
- BEGIN
- swap := table[node]; next := node + 1;
- (* if we're sliding an internal node, find the highest possible leaf to exchange with *)
- IF ODD( swap.weight ) THEN
- WHILE swap.weight > table[next+1].weight DO INC( next ) END
- END;
- (* swap the two nodes *)
- table[node] := table[next]; table[next] := swap;
- table[next].up := table[node].up; table[node].up := swap.up;
- (* repair the symbol map and tree structure *)
- IF ODD( swap.weight ) THEN
- table[swap.down].up := next;
- table[swap.down - 1].up := next;
- map[table[node].pattern] := node;
- ELSE
- table[table[node].down - 1].up := node;
- table[table[node].down].up := node;
- map[swap.pattern] := next;
- END;
- RETURN next;
- END SlideNode;
- (* increment symbol weight and re balance the tree. *)
- PROCEDURE IncWeight( node: Index );
- VAR up: Index;
- BEGIN
- (* obviate swapping a parent with its child: increment the leaf and proceed directly to its parent.
- otherwise, promote leaf to group leader position in the tree *)
- IF table[node].up = node + 1 THEN
- INC( table[node].weight, 2 ); INC( node );
- ELSE
- node := GroupLeader( node )
- END;
- (* increase the weight of each node and slide over any smaller weights ahead of it until reaching the root.
- internal nodes work upwards from their initial positions; while symbol nodes slide over first,
- then work up from their final positions. *)
- INC( table[node].weight, 2 ); up := table[node].up;
- WHILE up # 0 DO
- WHILE table[node].weight > table[node + 1].weight DO node := SlideNode( node) END;
- IF ODD( table[node].weight) THEN
- node := up
- ELSE
- node := table[node].up
- END;
- INC( table[node].weight, 2 ); up := table[node].up
- END;
- END IncWeight;
-
-
- (* scale all weights and rebalance the tree,
- zero weight nodes are removed from the tree by sliding them out the rank list *)
- PROCEDURE Scale;
- VAR node, prev: Index; weight: LONGINT;
-
- PROCEDURE Weight( idx: Index ): LONGINT;
- VAR w: LONGINT;
- BEGIN
- w := table[idx].weight;
- IF ODD( w ) THEN RETURN w - 1 ELSE RETURN w END
- END Weight;
-
- BEGIN
- node := esc + 1;
- (* work up the tree from the escape node scaling weights by the value of bits *)
- WHILE node <= root DO
- (* recompute the weight of internal nodes; slide down and out any unused ones *)
- weight := table[node].weight;
- IF ODD( weight ) THEN
- weight := Weight( table[node].down ) + Weight( table[node].down-1 ) + 1
- ELSE
- (* remove zero weight leaves by incrementing esc and removing them from the symbol map *)
- weight := weight DIV 2;
- IF ODD( weight ) THEN DEC( weight ) END;
- IF weight = 0 THEN
- map[table[node].pattern] := 0; INC( esc, 2 );
- END
- END;
- (* slide the scaled node back down over any previous nodes with larger weights *)
- table[node].weight := weight;
- prev := node - 1;
- WHILE weight < table[prev].weight DO IGNORE SlideNode( prev ); DEC( prev ) END;
- INC( node )
- END;
- (* prepare a new escape node *)
- table[esc].down := 0; table[esc].weight := 0;
- END Scale;
-
-
- (* encode the next symbol *)
- PROCEDURE EncodeByte( ch: CHAR );
- VAR
- code, n: LONGINT;
- up, idx, node: Index; sym: Symbol;
- BEGIN
- ASSERT( mode = Encode );
- (* for a new symbol, direct the receiver to the escape node *)
- sym := ORD( ch );
- node := map[sym]; idx := node;
- IF idx = 0 THEN idx := esc END;
- ASSERT( idx # 0 );
- (* accumulate the code bits by working up the tree from the node to the root *)
- code := 1; n := 0; up := table[idx].up;
- WHILE up # 0 DO
- code := code * 2; INC( n );
- IF ODD( idx ) THEN INC( code ) END;
- idx := up; up := table[idx].up
- END;
- (* send the code, root selector bit first *)
- WHILE n > 0 DO
- PutBit( code MOD 2 ); code := code DIV 2;
- DEC( n )
- END;
- (* send pattern and incorporate it into the tree *)
- IF node = 0 THEN
- PutPattern( sym );
- node := AddSymbol( sym );
- END;
- (* adjust and re-balance the tree *)
- IncWeight( node );
- END EncodeByte;
- (* decode the next symbol *)
- PROCEDURE DecodeByte( ): CHAR;
- VAR
- node, down: Index;
- sym: Symbol;
- BEGIN
- ASSERT( mode = Decode );
- (* work down the tree from the root until reaching either a leaf or the escape node.
- A one bit means go left, a zero means go right. *)
- node := root; down := table[node].down;
- WHILE down # 0 DO
- IF GetBit( ) = 1 THEN
- node := down-1
- ELSE
- node := down;
- END;
- down := table[node].down;
- END;
- (* sent to the escape node *)
- IF node = esc THEN
- sym := GetPattern( );
- node := AddSymbol( sym );
- ELSE
- sym := table[node].pattern
- END;
- (* adjust and re-balance the tree *)
- IncWeight( node );
- RETURN CHR( sym );
- END DecodeByte;
-
- END HuffmanCoder;
- TYPE
- Encoder* =OBJECT
- VAR
- huff: HuffmanCoder;
- out: Streams.Writer;
-
- PROCEDURE & New*( archive: Streams.Writer );
- BEGIN
- NEW( huff, Encode, NIL, archive );
- out := archive
- END New;
-
- PROCEDURE CompressBlock*( CONST source: ARRAY OF CHAR; len: LONGINT );
- VAR i: LONGINT;
- BEGIN
- out.RawNum( len );
- huff.Initialize;
- i := 0;
- WHILE i < len DO
- huff.EncodeByte( source[i] ); INC( i );
- IF (i MOD ScaleSize = 0) & ((len-i) >= ScaleSize) THEN huff.Scale END
- END;
- huff.Finish
- END CompressBlock;
- END Encoder;
-
-
-
- TYPE
- Decoder* =OBJECT
- VAR
- huff: HuffmanCoder;
- input: Streams.Reader;
-
- PROCEDURE & New*( archive: Streams.Reader );
- BEGIN
- NEW( huff, Decode, archive, NIL );
- input := archive
- END New;
-
- PROCEDURE ExtractBlock*( VAR buf: ARRAY OF CHAR ): LONGINT;
- VAR i, blockSize: LONGINT;
- BEGIN
- input.RawNum( blockSize );
- IF blockSize = 0 THEN (* end of stream *) RETURN 0 END;
- huff.Initialize;
- i := 0;
- WHILE i < blockSize DO
- buf[i] := huff.DecodeByte( ); INC( i );
- IF (i MOD ScaleSize = 0) & ((blockSize-i) >= ScaleSize) THEN huff.Scale END
- END;
- RETURN blockSize
- END ExtractBlock;
- END Decoder;
-
-
- END AdaptiveHuffman.
|