123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387 |
- MODULE AdaptiveHuffman; (** AUTHOR GF; PUROSE "adaptive Huffman coding" *)
- (* adaptive Huffman compression, Vitter's FGK algorithm *)
- IMPORT Streams;
- CONST
- AlphabetSize = 256; (* byte *)
- BlockSize* = 8*1024;
- ScaleLimit = 4*1024;
- Encode = 0; Decode = 1;
- TYPE
- BitReader = OBJECT
- VAR
- in: Streams.Reader;
- curByte, curBit: LONGINT;
- PROCEDURE &New( r: Streams.Reader );
- BEGIN
- in := r; curBit := -1; curByte := 0
- END New;
- PROCEDURE Initialize;
- BEGIN
- curBit := -1; curByte := 0
- END Initialize;
- PROCEDURE Bit( ): LONGINT;
- VAR
- bit: LONGINT; ch: CHAR;
- BEGIN
- IF curBit < 0 THEN
- in.Char( ch ); curByte := ORD( ch ); curBit := 7
- END;
- bit := ASH( curByte, -curBit ) MOD 2;
- DEC( curBit );
- RETURN bit
- END Bit;
- END BitReader;
- TYPE
- BitWriter = OBJECT
- VAR
- out: Streams.Writer;
- curByte, curBit: LONGINT;
- PROCEDURE &New( w: Streams.Writer );
- BEGIN
- out := w; curBit := 0; curByte := 0
- END New;
- PROCEDURE Bit( bit: LONGINT );
- BEGIN
- curByte := 2*curByte + bit;
- INC( curBit );
- IF curBit > 7 THEN
- out.Char( CHR( curByte ) ); curByte := 0; curBit := 0
- END
- END Bit;
- PROCEDURE Finish; (* flush last few bits *)
- BEGIN
- WHILE curBit # 0 DO Bit( 0 ) END;
- out.Update
- END Finish;
- END BitWriter;
- TYPE
- HuffmanCoder = OBJECT
- TYPE
- Index = INTEGER; (* 16-bit integer to keep the table small *)
- Pattern = INTEGER;
- Node = RECORD
- weight: INTEGER; (* node weight *)
- pattern: Pattern; (* node pattern (if weight is even, leaf node) *)
- up: Index; (* next node up the tree *)
- down: Index (* pair of down nodes (if weight is odd, internal node) *)
- END;
- VAR
- mode: SHORTINT; (* Encode, Decode *)
- in: BitReader; (* input from archive *)
- out: BitWriter; (* output to archive *)
- esc: Index; (* the current escape node *)
- root: Index; (* the root of the tree *)
- map: ARRAY AlphabetSize OF Index; (* mapping of patterns to nodes *)
- table: ARRAY 2*AlphabetSize + 2 OF Node; (* the Huffman tree *)
- PROCEDURE &New( m: SHORTINT; input: Streams.Reader; output: Streams.Writer );
- BEGIN
- ASSERT( m IN {Encode, Decode} );
- mode := m;
- IF mode = Encode THEN NEW( out, output ) ELSE NEW( in, input ) END;
- END New;
- PROCEDURE Initialize;
- VAR i: Index;
- BEGIN
- root := 2*AlphabetSize + 2 - 1;
- FOR i := 0 TO root DO
- table[i].up := 0; table[i].down := 0; table[i].pattern := 0; table[i].weight := 0
- END;
- FOR i := 0 TO AlphabetSize - 1 DO map[i] := 0 END;
- esc := root;
- IF mode = Decode THEN in.Initialize END
- END Initialize;
- PROCEDURE Finish;
- BEGIN
- ASSERT( mode = Encode );
- out.Finish (* flush last few bits *)
- END Finish;
- PROCEDURE GetPattern( ): Pattern;
- VAR i: INTEGER; patt: Pattern;
- BEGIN
- patt := 0;
- FOR i := 0 TO 7 DO
- IF in.Bit() = 1 THEN patt := patt + INTEGER(ASH( 1, i )) END;
- END;
- RETURN patt
- END GetPattern;
- PROCEDURE PutPattern( patt: Pattern );
- VAR i: LONGINT;
- BEGIN
- FOR i := 0 TO 7 DO
- out.Bit( patt MOD 2 ); patt := patt DIV 2;
- END
- END PutPattern;
- (* split escape node to incorporate a new pattern *)
- PROCEDURE AddPattern( patt: Pattern ): Index;
- VAR pair, node: Index;
- BEGIN
- ASSERT( esc > 1 );
- pair := esc; node := esc - 1; esc := esc - 2 ;
- table[pair].down := node; table[pair].weight := 1;
- table[node].up := pair;
- table[node].down := 0; table[node].weight := 0; table[node].pattern := patt;
- table[esc].up := pair;
- table[esc].down := 0; table[esc].weight := 0;
- map[patt] := node;
- RETURN node;
- END AddPattern;
- (* swap leaf to group leader position, return pattern's new node *)
- PROCEDURE GroupLeader( node: Index ): Index;
- VAR
- leader: Index;
- weight: LONGINT;
- patt, prev: Pattern;
- BEGIN
- weight := table[node].weight; leader := node;
- WHILE (leader < root) & (weight = table[leader + 1].weight) DO INC( leader ) END;
- IF leader # node THEN
- (* swap the leaf nodes *)
- patt := table[node].pattern;
- prev := table[leader].pattern;
- table[leader].pattern := patt;
- table[node].pattern := prev;
- map[patt] := leader;
- map[prev] := node;
- END;
- RETURN leader
- END GroupLeader;
- (* slide internal node up over all leaves of equal weight
- or exchange leaf with next smaller weight internal node.
- return node's new position *)
- PROCEDURE SlideNode( node: Index ): Index;
- VAR next: Index; swap: Node;
- BEGIN
- swap := table[node]; next := node + 1;
- (* if we're sliding an internal node, find the highest possible leaf to exchange with *)
- IF ODD( swap.weight ) THEN
- WHILE swap.weight > table[next+1].weight DO INC( next ) END
- END;
- (* swap the two nodes *)
- table[node] := table[next]; table[next] := swap;
- table[next].up := table[node].up; table[node].up := swap.up;
- (* repair the pattern map and tree structure *)
- IF ODD( swap.weight ) THEN
- table[swap.down].up := next;
- table[swap.down - 1].up := next;
- map[table[node].pattern] := node;
- ELSE
- table[table[node].down - 1].up := node;
- table[table[node].down].up := node;
- map[swap.pattern] := next;
- END;
- RETURN next;
- END SlideNode;
- (* increment pattern weight and re balance the tree. *)
- PROCEDURE IncrementWeight( node: Index );
- VAR up: Index;
- BEGIN
- (* obviate swapping a parent with its child: increment the leaf and proceed directly to its parent.
- otherwise, promote leaf to group leader position in the tree *)
- IF table[node].up = node + 1 THEN
- INC( table[node].weight, 2 ); INC( node );
- ELSE
- node := GroupLeader( node )
- END;
- (* increase the weight of each node and slide over any smaller weights ahead of it until reaching the root.
- internal nodes work upwards from their initial positions; while pattern nodes slide over first,
- then work up from their final positions. *)
- INC( table[node].weight, 2 ); up := table[node].up;
- WHILE up # 0 DO
- WHILE table[node].weight > table[node + 1].weight DO node := SlideNode( node) END;
- IF ODD( table[node].weight) THEN
- node := up
- ELSE
- node := table[node].up
- END;
- INC( table[node].weight, 2 ); up := table[node].up
- END;
- END IncrementWeight;
- (* scale all weights and rebalance the tree,
- zero weight nodes are removed from the tree *)
- PROCEDURE Scale;
- VAR node, prev: Index; weight: INTEGER;
- PROCEDURE Weight( idx: Index ): INTEGER;
- VAR w: INTEGER;
- BEGIN
- w := table[idx].weight;
- IF ODD( w ) THEN RETURN w - 1 ELSE RETURN w END
- END Weight;
- BEGIN
- node := esc + 1;
- (* work up the tree from the escape node scaling weights *)
- WHILE node <= root DO
- weight := table[node].weight;
- IF ODD( weight ) THEN
- (* recompute the weight of internal nodes *)
- weight := Weight( table[node].down ) + Weight( table[node].down-1 ) + 1
- ELSE
- (* remove zero weight leaves and remove them from the pattern map *)
- weight := weight DIV 2;
- IF ODD( weight ) THEN DEC( weight ) END;
- IF weight = 0 THEN
- map[table[node].pattern] := 0; INC( esc, 2 );
- END
- END;
- (* slide the scaled node back down over any previous nodes with larger weights *)
- table[node].weight := weight;
- prev := node - 1;
- WHILE weight < table[prev].weight DO IGNORE SlideNode( prev ); DEC( prev ) END;
- INC( node )
- END;
- (* prepare a new escape node *)
- table[esc].down := 0; table[esc].weight := 0;
- END Scale;
- PROCEDURE EncodeByte( ch: CHAR );
- VAR
- code, bits: LONGINT;
- cur, node: Index; patt: Pattern;
- BEGIN
- patt := ORD( ch ); node := map[patt];
- (* accumulate the code bits by working up from the node to the root *)
- cur := node; code := 0; bits := 0;
- IF cur = 0 THEN cur := esc END;
- WHILE table[cur].up # 0 DO
- IF ODD( cur ) THEN code := code*2 + 1 ELSE code := code*2 END;
- INC( bits ); cur := table[cur].up
- END;
- (* send the code *)
- WHILE bits > 0 DO
- out.Bit( code MOD 2 ); code := code DIV 2;
- DEC( bits )
- END;
- IF node = 0 THEN
- (* send the new pattern and incorporate it into the tree *)
- PutPattern( patt ); node := AddPattern( patt );
- END;
- IncrementWeight( node );
- END EncodeByte;
- PROCEDURE ExtractByte( ): CHAR;
- VAR
- node, down: Index;
- patt: Pattern;
- BEGIN
- (* work down the tree from the root until reaching either a leaf or the escape node *)
- node := root; down := table[node].down;
- WHILE down # 0 DO
- IF in.Bit( ) = 1 THEN node := down - 1 ELSE node := down END;
- down := table[node].down;
- END;
- IF node = esc THEN
- (* add the new pattern to the tree *)
- patt := GetPattern( ); node := AddPattern( patt )
- ELSE
- patt := table[node].pattern
- END;
- IncrementWeight( node );
- RETURN CHR( patt );
- END ExtractByte;
- END HuffmanCoder;
- TYPE
- Encoder* =OBJECT
- VAR
- huff: HuffmanCoder;
- PROCEDURE & New*( archive: Streams.Writer );
- BEGIN
- NEW( huff, Encode, NIL, archive );
- END New;
- PROCEDURE CompressBlock*( CONST source: ARRAY OF CHAR; len: LONGINT );
- VAR i: LONGINT;
- BEGIN
- huff.Initialize;
- i := 0;
- WHILE i < len DO
- huff.EncodeByte( source[i] ); INC( i );
- IF (i MOD ScaleLimit = 0) & ((len-i) >= ScaleLimit) THEN huff.Scale END
- END;
- huff.Finish
- END CompressBlock;
- END Encoder;
- TYPE
- Decoder* =OBJECT
- VAR
- huff: HuffmanCoder;
- PROCEDURE & New*( archive: Streams.Reader );
- BEGIN
- NEW( huff, Decode, archive, NIL );
- END New;
- PROCEDURE ExtractBlock*( VAR buf: ARRAY OF CHAR; len: LONGINT );
- VAR i: LONGINT;
- BEGIN
- huff.Initialize;
- i := 0;
- WHILE i < len DO
- buf[i] := huff.ExtractByte( ); INC( i );
- IF (i MOD ScaleLimit = 0) & ((len - i) >= ScaleLimit) THEN huff.Scale END
- END;
- END ExtractBlock;
- END Decoder;
- END AdaptiveHuffman.
|